From bd2940861c282dfc39309bca94378c820ab7df7e Mon Sep 17 00:00:00 2001 From: Ian Lewis Date: Tue, 15 Jan 2019 16:41:01 +0900 Subject: End to end tests refs #3 (#10) * Separate docs for containerd 1.1 and 1.2 The configuration for the untrusted workload annotation and runtime class are different enough that it makes sense to separate the docs. Commands in docs are taken from scripts in the docs/scripts directory. These scripts can be used later for integration & doc tests (#3). The docs can be updated using the embedmd tool: https://github.com/campoy/embedmd * Add basic e2e tests refs #3 Added end-to-end tests based on the quickstart workflows for containerd 1.1 and containerd 1.2+. --- test/e2e/containerd-install.sh | 38 ++++++++++++++++++++++++++++++++++ test/e2e/crictl-install.sh | 17 +++++++++++++++ test/e2e/run-container.sh | 30 +++++++++++++++++++++++++++ test/e2e/runsc-install.sh | 8 +++++++ test/e2e/runtime-handler/install.sh | 24 +++++++++++++++++++++ test/e2e/runtime-handler/test.sh | 33 +++++++++++++++++++++++++++++ test/e2e/runtime-handler/usage.sh | 30 +++++++++++++++++++++++++++ test/e2e/shim-install.sh | 30 +++++++++++++++++++++++++++ test/e2e/untrusted-workload/install.sh | 24 +++++++++++++++++++++ test/e2e/untrusted-workload/test.sh | 33 +++++++++++++++++++++++++++++ test/e2e/untrusted-workload/usage.sh | 33 +++++++++++++++++++++++++++++ test/e2e/validate.sh | 17 +++++++++++++++ 12 files changed, 317 insertions(+) create mode 100755 test/e2e/containerd-install.sh create mode 100755 test/e2e/crictl-install.sh create mode 100755 test/e2e/run-container.sh create mode 100755 test/e2e/runsc-install.sh create mode 100755 test/e2e/runtime-handler/install.sh create mode 100755 test/e2e/runtime-handler/test.sh create mode 100755 test/e2e/runtime-handler/usage.sh create mode 100755 test/e2e/shim-install.sh create mode 100755 test/e2e/untrusted-workload/install.sh create mode 100755 test/e2e/untrusted-workload/test.sh create mode 100755 test/e2e/untrusted-workload/usage.sh create mode 100755 test/e2e/validate.sh (limited to 'test') diff --git a/test/e2e/containerd-install.sh b/test/e2e/containerd-install.sh new file mode 100755 index 000000000..154f7d7a5 --- /dev/null +++ b/test/e2e/containerd-install.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# A script to install containerd and CNI plugins for e2e testing + +wget -q --https-only \ + https://github.com/containerd/containerd/releases/download/v${CONTAINERD_VERSION}/containerd-${CONTAINERD_VERSION}.linux-amd64.tar.gz \ + https://github.com/containernetworking/plugins/releases/download/v0.6.0/cni-plugins-amd64-v0.6.0.tgz + +sudo mkdir -p /etc/containerd /etc/cni/net.d /opt/cni/bin +sudo tar -xvf cni-plugins-amd64-v0.6.0.tgz -C /opt/cni/bin/ +sudo tar -xvf containerd-${CONTAINERD_VERSION}.linux-amd64.tar.gz -C / + +cat < /tmp/containerd-cri.log & + diff --git a/test/e2e/crictl-install.sh b/test/e2e/crictl-install.sh new file mode 100755 index 000000000..1d63c889b --- /dev/null +++ b/test/e2e/crictl-install.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# A sample script for installing crictl. + +set -ex + +{ # Step 1: Download crictl +wget https://github.com/kubernetes-sigs/cri-tools/releases/download/v1.13.0/crictl-v1.13.0-linux-amd64.tar.gz +tar xf crictl-v1.13.0-linux-amd64.tar.gz +sudo mv crictl /usr/local/bin +} + +{ # Step 2: Configure crictl +cat < /tmp/containerd-cri.log & +} diff --git a/test/e2e/runtime-handler/test.sh b/test/e2e/runtime-handler/test.sh new file mode 100755 index 000000000..99f3565b6 --- /dev/null +++ b/test/e2e/runtime-handler/test.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# Runs end-to-end tests for gvisor-containerd-shim to test the use of runtime +# handler. This should work on containerd 1.2+ + +# This is meant to be run in a VM as it makes a fairly invasive install of +# containerd. + +set -ex + +# Install containerd +. ./test/e2e/containerd-install.sh + +# Install gVisor +. ./test/e2e/runsc-install.sh + +# Install gvisor-containerd-shim +. ./test/e2e/shim-install.sh + +# Test installation/configuration +. ./test/e2e/runtime-handler/install.sh + +# Install crictl +. ./test/e2e/crictl-install.sh + +# Test usage +. ./test/e2e/runtime-handler/usage.sh + +# Run a container in the sandbox +. ./test/e2e/run-container.sh + +# Validate the pod and container +. ./test/e2e/validate.sh diff --git a/test/e2e/runtime-handler/usage.sh b/test/e2e/runtime-handler/usage.sh new file mode 100755 index 000000000..1f8a09757 --- /dev/null +++ b/test/e2e/runtime-handler/usage.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# A sample script for testing the gvisor-containerd-shim # using untrusted +# workload extension. + +set -ex + +{ # Step 1: Pull the nginx image +sudo crictl pull nginx +} + +{ # Step 2: Create sandbox.json +cat < /tmp/containerd-cri.log & +} diff --git a/test/e2e/untrusted-workload/test.sh b/test/e2e/untrusted-workload/test.sh new file mode 100755 index 000000000..6e312cf6d --- /dev/null +++ b/test/e2e/untrusted-workload/test.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# Runs end-to-end tests for gvisor-containerd-shim to test using the +# untrusted workload extension. This should work on containerd 1.1+ + +# This is meant to be run in a VM as it makes a fairly invasive install of +# containerd. + +set -ex + +# Install containerd +. ./test/e2e/containerd-install.sh + +# Install gVisor +. ./test/e2e/runsc-install.sh + +# Install gvisor-containerd-shim +. ./test/e2e/shim-install.sh + +# Test installation/configuration +. ./test/e2e/untrusted-workload/install.sh + +# Install crictl +. ./test/e2e/crictl-install.sh + +# Test usage +. ./test/e2e/untrusted-workload/usage.sh + +# Run a container in the sandbox +. ./test/e2e/run-container.sh + +# Validate the pod and container +. ./test/e2e/validate.sh diff --git a/test/e2e/untrusted-workload/usage.sh b/test/e2e/untrusted-workload/usage.sh new file mode 100755 index 000000000..db8206964 --- /dev/null +++ b/test/e2e/untrusted-workload/usage.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# A sample script for testing the gvisor-containerd-shim # using untrusted +# workload extension. + +set -ex + +{ # Step 1: Pull the nginx image +sudo crictl pull nginx +} + +{ # Step 2: Create sandbox.json +cat < Date: Tue, 15 Jan 2019 04:54:36 -0800 Subject: Use cni v0.7.0. (#11) Use cni v0.7.0 in the integration test. Signed-off-by: Lantao Liu --- test/e2e/containerd-install.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/e2e/containerd-install.sh b/test/e2e/containerd-install.sh index 154f7d7a5..538b0ca2b 100755 --- a/test/e2e/containerd-install.sh +++ b/test/e2e/containerd-install.sh @@ -4,10 +4,10 @@ wget -q --https-only \ https://github.com/containerd/containerd/releases/download/v${CONTAINERD_VERSION}/containerd-${CONTAINERD_VERSION}.linux-amd64.tar.gz \ - https://github.com/containernetworking/plugins/releases/download/v0.6.0/cni-plugins-amd64-v0.6.0.tgz + https://github.com/containernetworking/plugins/releases/download/v0.7.0/cni-plugins-amd64-v0.7.0.tgz sudo mkdir -p /etc/containerd /etc/cni/net.d /opt/cni/bin -sudo tar -xvf cni-plugins-amd64-v0.6.0.tgz -C /opt/cni/bin/ +sudo tar -xvf cni-plugins-amd64-v0.7.0.tgz -C /opt/cni/bin/ sudo tar -xvf containerd-${CONTAINERD_VERSION}.linux-amd64.tar.gz -C / cat < Date: Tue, 29 Jan 2019 18:51:18 -0800 Subject: Add containerd shim v2 support. (#13) * Update vendors Signed-off-by: Lantao Liu * Add containerd shim v2 support. Signed-off-by: Lantao Liu * Add test and doc for containerd-shim-runsc-v1. Signed-off-by: Lantao Liu * Address comments. --- .travis.yml | 1 + Makefile | 6 + README.md | 1 + cmd/containerd-shim-runsc-v1/main.go | 24 + cmd/gvisor-containerd-shim/main.go | 2 +- docs/README.md | 1 + docs/runtime-handler-quickstart.md | 20 +- docs/runtime-handler-shim-v2-quickstart.md | 187 + docs/untrusted-workload-quickstart.md | 20 +- pkg/proc/deleted_state.go | 53 - pkg/proc/exec.go | 254 - pkg/proc/exec_state.go | 187 - pkg/proc/init.go | 416 -- pkg/proc/init_state.go | 224 - pkg/proc/io.go | 146 - pkg/proc/process.go | 39 - pkg/proc/types.go | 62 - pkg/proc/utils.go | 94 - pkg/shim/platform.go | 112 - pkg/shim/service.go | 595 -- pkg/v1/proc/deleted_state.go | 53 + pkg/v1/proc/exec.go | 249 + pkg/v1/proc/exec_state.go | 187 + pkg/v1/proc/init.go | 413 ++ pkg/v1/proc/init_state.go | 224 + pkg/v1/proc/io.go | 146 + pkg/v1/proc/process.go | 39 + pkg/v1/proc/types.go | 72 + pkg/v1/proc/utils.go | 94 + pkg/v1/shim/platform.go | 112 + pkg/v1/shim/service.go | 573 ++ pkg/v1/utils/utils.go | 50 + pkg/v2/options/options.go | 34 + pkg/v2/service.go | 718 +++ pkg/v2/service_linux.go | 111 + test/e2e/runtime-handler-shim-v2/install.sh | 21 + test/e2e/runtime-handler-shim-v2/test.sh | 34 + test/e2e/runtime-handler-shim-v2/validate.sh | 7 + test/e2e/runtime-handler/usage.sh | 4 +- test/e2e/shim-install.sh | 16 +- vendor.conf | 8 +- vendor/github.com/BurntSushi/toml/COPYING | 27 +- vendor/github.com/BurntSushi/toml/lex.go | 2 +- vendor/github.com/containerd/cgroups/LICENSE | 201 + vendor/github.com/containerd/cgroups/README.md | 112 + vendor/github.com/containerd/cgroups/blkio.go | 326 ++ vendor/github.com/containerd/cgroups/cgroup.go | 445 ++ vendor/github.com/containerd/cgroups/control.go | 77 + vendor/github.com/containerd/cgroups/cpu.go | 129 + vendor/github.com/containerd/cgroups/cpuacct.go | 121 + vendor/github.com/containerd/cgroups/cpuset.go | 159 + vendor/github.com/containerd/cgroups/devices.go | 90 + vendor/github.com/containerd/cgroups/errors.go | 47 + vendor/github.com/containerd/cgroups/freezer.go | 82 + vendor/github.com/containerd/cgroups/hierarchy.go | 20 + vendor/github.com/containerd/cgroups/hugetlb.go | 109 + vendor/github.com/containerd/cgroups/memory.go | 325 ++ vendor/github.com/containerd/cgroups/metrics.pb.go | 4288 +++++++++++++++ vendor/github.com/containerd/cgroups/metrics.proto | 123 + vendor/github.com/containerd/cgroups/named.go | 39 + vendor/github.com/containerd/cgroups/net_cls.go | 58 + vendor/github.com/containerd/cgroups/net_prio.go | 66 + vendor/github.com/containerd/cgroups/paths.go | 104 + vendor/github.com/containerd/cgroups/perf_event.go | 37 + vendor/github.com/containerd/cgroups/pids.go | 85 + vendor/github.com/containerd/cgroups/rdma.go | 153 + vendor/github.com/containerd/cgroups/state.go | 28 + vendor/github.com/containerd/cgroups/subsystem.go | 112 + vendor/github.com/containerd/cgroups/systemd.go | 131 + vendor/github.com/containerd/cgroups/ticks.go | 26 + vendor/github.com/containerd/cgroups/utils.go | 297 + vendor/github.com/containerd/cgroups/v1.go | 81 + .../containerd/containerd/runtime/v2/README.md | 173 + .../containerd/runtime/v2/shim/reaper_unix.go | 110 + .../containerd/containerd/runtime/v2/shim/shim.go | 269 + .../containerd/runtime/v2/shim/shim_darwin.go | 29 + .../containerd/runtime/v2/shim/shim_linux.go | 30 + .../containerd/runtime/v2/shim/shim_unix.go | 117 + .../containerd/runtime/v2/shim/shim_windows.go | 307 ++ .../containerd/containerd/runtime/v2/shim/util.go | 128 + .../containerd/runtime/v2/shim/util_unix.go | 70 + .../containerd/runtime/v2/shim/util_windows.go | 90 + .../containerd/containerd/runtime/v2/task/doc.go | 17 + .../containerd/runtime/v2/task/shim.pb.go | 5693 ++++++++++++++++++++ .../containerd/runtime/v2/task/shim.proto | 184 + vendor/github.com/containerd/cri/README.md | 6 +- .../cri/pkg/api/runtimeoptions/v1/api.pb.go | 394 ++ .../cri/pkg/api/runtimeoptions/v1/api.proto | 22 + vendor/github.com/containerd/cri/vendor.conf | 30 +- vendor/github.com/coreos/go-systemd/LICENSE | 191 + vendor/github.com/coreos/go-systemd/README.md | 54 + vendor/github.com/coreos/go-systemd/dbus/dbus.go | 213 + .../github.com/coreos/go-systemd/dbus/methods.go | 565 ++ .../coreos/go-systemd/dbus/properties.go | 237 + vendor/github.com/coreos/go-systemd/dbus/set.go | 47 + .../coreos/go-systemd/dbus/subscription.go | 250 + .../coreos/go-systemd/dbus/subscription_set.go | 57 + vendor/github.com/docker/go-units/LICENSE | 191 + vendor/github.com/docker/go-units/README.md | 16 + vendor/github.com/docker/go-units/duration.go | 33 + vendor/github.com/docker/go-units/size.go | 96 + vendor/github.com/docker/go-units/ulimit.go | 118 + vendor/github.com/godbus/dbus/LICENSE | 25 + vendor/github.com/godbus/dbus/README.markdown | 41 + vendor/github.com/godbus/dbus/auth.go | 253 + vendor/github.com/godbus/dbus/auth_external.go | 26 + vendor/github.com/godbus/dbus/auth_sha1.go | 102 + vendor/github.com/godbus/dbus/call.go | 36 + vendor/github.com/godbus/dbus/conn.go | 625 +++ vendor/github.com/godbus/dbus/conn_darwin.go | 21 + vendor/github.com/godbus/dbus/conn_other.go | 27 + vendor/github.com/godbus/dbus/dbus.go | 258 + vendor/github.com/godbus/dbus/decoder.go | 228 + vendor/github.com/godbus/dbus/doc.go | 63 + vendor/github.com/godbus/dbus/encoder.go | 208 + vendor/github.com/godbus/dbus/export.go | 411 ++ vendor/github.com/godbus/dbus/homedir.go | 28 + vendor/github.com/godbus/dbus/homedir_dynamic.go | 15 + vendor/github.com/godbus/dbus/homedir_static.go | 45 + vendor/github.com/godbus/dbus/message.go | 346 ++ vendor/github.com/godbus/dbus/object.go | 126 + vendor/github.com/godbus/dbus/sig.go | 257 + vendor/github.com/godbus/dbus/transport_darwin.go | 6 + vendor/github.com/godbus/dbus/transport_generic.go | 35 + vendor/github.com/godbus/dbus/transport_unix.go | 196 + .../godbus/dbus/transport_unixcred_dragonfly.go | 95 + .../godbus/dbus/transport_unixcred_linux.go | 25 + vendor/github.com/godbus/dbus/variant.go | 139 + vendor/github.com/godbus/dbus/variant_lexer.go | 284 + vendor/github.com/godbus/dbus/variant_parser.go | 817 +++ vendor/github.com/gogo/protobuf/gogoproto/doc.go | 169 + .../github.com/gogo/protobuf/gogoproto/gogo.pb.go | 804 +++ .../github.com/gogo/protobuf/gogoproto/gogo.proto | 133 + .../github.com/gogo/protobuf/gogoproto/helper.go | 357 ++ .../protoc-gen-gogo/descriptor/descriptor.go | 118 + .../protoc-gen-gogo/descriptor/descriptor.pb.go | 2280 ++++++++ .../descriptor/descriptor_gostring.gen.go | 772 +++ .../protobuf/protoc-gen-gogo/descriptor/helper.go | 390 ++ 138 files changed, 30748 insertions(+), 2259 deletions(-) create mode 100644 cmd/containerd-shim-runsc-v1/main.go create mode 100644 docs/runtime-handler-shim-v2-quickstart.md delete mode 100644 pkg/proc/deleted_state.go delete mode 100644 pkg/proc/exec.go delete mode 100644 pkg/proc/exec_state.go delete mode 100644 pkg/proc/init.go delete mode 100644 pkg/proc/init_state.go delete mode 100644 pkg/proc/io.go delete mode 100644 pkg/proc/process.go delete mode 100644 pkg/proc/types.go delete mode 100644 pkg/proc/utils.go delete mode 100644 pkg/shim/platform.go delete mode 100644 pkg/shim/service.go create mode 100644 pkg/v1/proc/deleted_state.go create mode 100644 pkg/v1/proc/exec.go create mode 100644 pkg/v1/proc/exec_state.go create mode 100644 pkg/v1/proc/init.go create mode 100644 pkg/v1/proc/init_state.go create mode 100644 pkg/v1/proc/io.go create mode 100644 pkg/v1/proc/process.go create mode 100644 pkg/v1/proc/types.go create mode 100644 pkg/v1/proc/utils.go create mode 100644 pkg/v1/shim/platform.go create mode 100644 pkg/v1/shim/service.go create mode 100644 pkg/v1/utils/utils.go create mode 100644 pkg/v2/options/options.go create mode 100644 pkg/v2/service.go create mode 100644 pkg/v2/service_linux.go create mode 100755 test/e2e/runtime-handler-shim-v2/install.sh create mode 100755 test/e2e/runtime-handler-shim-v2/test.sh create mode 100755 test/e2e/runtime-handler-shim-v2/validate.sh create mode 100644 vendor/github.com/containerd/cgroups/LICENSE create mode 100644 vendor/github.com/containerd/cgroups/README.md create mode 100644 vendor/github.com/containerd/cgroups/blkio.go create mode 100644 vendor/github.com/containerd/cgroups/cgroup.go create mode 100644 vendor/github.com/containerd/cgroups/control.go create mode 100644 vendor/github.com/containerd/cgroups/cpu.go create mode 100644 vendor/github.com/containerd/cgroups/cpuacct.go create mode 100644 vendor/github.com/containerd/cgroups/cpuset.go create mode 100644 vendor/github.com/containerd/cgroups/devices.go create mode 100644 vendor/github.com/containerd/cgroups/errors.go create mode 100644 vendor/github.com/containerd/cgroups/freezer.go create mode 100644 vendor/github.com/containerd/cgroups/hierarchy.go create mode 100644 vendor/github.com/containerd/cgroups/hugetlb.go create mode 100644 vendor/github.com/containerd/cgroups/memory.go create mode 100644 vendor/github.com/containerd/cgroups/metrics.pb.go create mode 100644 vendor/github.com/containerd/cgroups/metrics.proto create mode 100644 vendor/github.com/containerd/cgroups/named.go create mode 100644 vendor/github.com/containerd/cgroups/net_cls.go create mode 100644 vendor/github.com/containerd/cgroups/net_prio.go create mode 100644 vendor/github.com/containerd/cgroups/paths.go create mode 100644 vendor/github.com/containerd/cgroups/perf_event.go create mode 100644 vendor/github.com/containerd/cgroups/pids.go create mode 100644 vendor/github.com/containerd/cgroups/rdma.go create mode 100644 vendor/github.com/containerd/cgroups/state.go create mode 100644 vendor/github.com/containerd/cgroups/subsystem.go create mode 100644 vendor/github.com/containerd/cgroups/systemd.go create mode 100644 vendor/github.com/containerd/cgroups/ticks.go create mode 100644 vendor/github.com/containerd/cgroups/utils.go create mode 100644 vendor/github.com/containerd/cgroups/v1.go create mode 100644 vendor/github.com/containerd/containerd/runtime/v2/README.md create mode 100644 vendor/github.com/containerd/containerd/runtime/v2/shim/reaper_unix.go create mode 100644 vendor/github.com/containerd/containerd/runtime/v2/shim/shim.go create mode 100644 vendor/github.com/containerd/containerd/runtime/v2/shim/shim_darwin.go create mode 100644 vendor/github.com/containerd/containerd/runtime/v2/shim/shim_linux.go create mode 100644 vendor/github.com/containerd/containerd/runtime/v2/shim/shim_unix.go create mode 100644 vendor/github.com/containerd/containerd/runtime/v2/shim/shim_windows.go create mode 100644 vendor/github.com/containerd/containerd/runtime/v2/shim/util.go create mode 100644 vendor/github.com/containerd/containerd/runtime/v2/shim/util_unix.go create mode 100644 vendor/github.com/containerd/containerd/runtime/v2/shim/util_windows.go create mode 100644 vendor/github.com/containerd/containerd/runtime/v2/task/doc.go create mode 100644 vendor/github.com/containerd/containerd/runtime/v2/task/shim.pb.go create mode 100644 vendor/github.com/containerd/containerd/runtime/v2/task/shim.proto create mode 100644 vendor/github.com/containerd/cri/pkg/api/runtimeoptions/v1/api.pb.go create mode 100644 vendor/github.com/containerd/cri/pkg/api/runtimeoptions/v1/api.proto create mode 100644 vendor/github.com/coreos/go-systemd/LICENSE create mode 100644 vendor/github.com/coreos/go-systemd/README.md create mode 100644 vendor/github.com/coreos/go-systemd/dbus/dbus.go create mode 100644 vendor/github.com/coreos/go-systemd/dbus/methods.go create mode 100644 vendor/github.com/coreos/go-systemd/dbus/properties.go create mode 100644 vendor/github.com/coreos/go-systemd/dbus/set.go create mode 100644 vendor/github.com/coreos/go-systemd/dbus/subscription.go create mode 100644 vendor/github.com/coreos/go-systemd/dbus/subscription_set.go create mode 100644 vendor/github.com/docker/go-units/LICENSE create mode 100644 vendor/github.com/docker/go-units/README.md create mode 100644 vendor/github.com/docker/go-units/duration.go create mode 100644 vendor/github.com/docker/go-units/size.go create mode 100644 vendor/github.com/docker/go-units/ulimit.go create mode 100644 vendor/github.com/godbus/dbus/LICENSE create mode 100644 vendor/github.com/godbus/dbus/README.markdown create mode 100644 vendor/github.com/godbus/dbus/auth.go create mode 100644 vendor/github.com/godbus/dbus/auth_external.go create mode 100644 vendor/github.com/godbus/dbus/auth_sha1.go create mode 100644 vendor/github.com/godbus/dbus/call.go create mode 100644 vendor/github.com/godbus/dbus/conn.go create mode 100644 vendor/github.com/godbus/dbus/conn_darwin.go create mode 100644 vendor/github.com/godbus/dbus/conn_other.go create mode 100644 vendor/github.com/godbus/dbus/dbus.go create mode 100644 vendor/github.com/godbus/dbus/decoder.go create mode 100644 vendor/github.com/godbus/dbus/doc.go create mode 100644 vendor/github.com/godbus/dbus/encoder.go create mode 100644 vendor/github.com/godbus/dbus/export.go create mode 100644 vendor/github.com/godbus/dbus/homedir.go create mode 100644 vendor/github.com/godbus/dbus/homedir_dynamic.go create mode 100644 vendor/github.com/godbus/dbus/homedir_static.go create mode 100644 vendor/github.com/godbus/dbus/message.go create mode 100644 vendor/github.com/godbus/dbus/object.go create mode 100644 vendor/github.com/godbus/dbus/sig.go create mode 100644 vendor/github.com/godbus/dbus/transport_darwin.go create mode 100644 vendor/github.com/godbus/dbus/transport_generic.go create mode 100644 vendor/github.com/godbus/dbus/transport_unix.go create mode 100644 vendor/github.com/godbus/dbus/transport_unixcred_dragonfly.go create mode 100644 vendor/github.com/godbus/dbus/transport_unixcred_linux.go create mode 100644 vendor/github.com/godbus/dbus/variant.go create mode 100644 vendor/github.com/godbus/dbus/variant_lexer.go create mode 100644 vendor/github.com/godbus/dbus/variant_parser.go create mode 100644 vendor/github.com/gogo/protobuf/gogoproto/doc.go create mode 100644 vendor/github.com/gogo/protobuf/gogoproto/gogo.pb.go create mode 100644 vendor/github.com/gogo/protobuf/gogoproto/gogo.proto create mode 100644 vendor/github.com/gogo/protobuf/gogoproto/helper.go create mode 100644 vendor/github.com/gogo/protobuf/protoc-gen-gogo/descriptor/descriptor.go create mode 100644 vendor/github.com/gogo/protobuf/protoc-gen-gogo/descriptor/descriptor.pb.go create mode 100644 vendor/github.com/gogo/protobuf/protoc-gen-gogo/descriptor/descriptor_gostring.gen.go create mode 100644 vendor/github.com/gogo/protobuf/protoc-gen-gogo/descriptor/helper.go (limited to 'test') diff --git a/.travis.yml b/.travis.yml index 4aaccf523..57bb27901 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,7 @@ env: - CONTAINERD_VERSION=1.1.5 RUNSC_VERSION=2018-12-07 TEST=untrusted-workload - CONTAINERD_VERSION=1.2.1 RUNSC_VERSION=2018-12-07 TEST=untrusted-workload - CONTAINERD_VERSION=1.2.1 RUNSC_VERSION=2018-12-07 TEST=runtime-handler + - CONTAINERD_VERSION=1.2.1 RUNSC_VERSION=2018-12-07 TEST=runtime-handler-shim-v2 go_import_path: github.com/google/gvisor-containerd-shim diff --git a/Makefile b/Makefile index 89e705ddf..0404fc0ee 100644 --- a/Makefile +++ b/Makefile @@ -7,16 +7,22 @@ SOURCES=$(shell find cmd/ pkg/ vendor/ -name '*.go') DEPLOY_PATH=cri-containerd-staging/gvisor-containerd-shim VERSION=$(shell git rev-parse HEAD) +all: bin/gvisor-containerd-shim bin/containerd-shim-runsc-v1 + bin/gvisor-containerd-shim: $(SOURCES) CGO_ENABLED=0 go build ${GO_BUILD_FLAGS} -o bin/gvisor-containerd-shim ${SHIM_GO_LDFLAGS} ${GO_TAGS} ./cmd/gvisor-containerd-shim +bin/containerd-shim-runsc-v1: $(SOURCES) + CGO_ENABLED=0 go build ${GO_BUILD_FLAGS} -o bin/containerd-shim-runsc-v1 ${SHIM_GO_LDFLAGS} ${GO_TAGS} ./cmd/containerd-shim-runsc-v1 install: bin/gvisor-containerd-shim mkdir -p $(DESTDIR)/bin install bin/gvisor-containerd-shim $(DESTDIR)/bin + install bin/containerd-shim-runsc-v1 $(DESTDIR)/bin uninstall: rm -f $(DESTDIR)/bin/gvisor-containerd-shim + rm -f $(DESTDIR)/bin/containerd-shim-runsc-v1 clean: rm -rf bin/* diff --git a/README.md b/README.md index cabd715d7..26efdbc2a 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ gvisor-containerd-shim is a containerd shim for [gVisor](https://github.com/goog - [Untrusted Workload Quick Start (containerd >=1.1)](docs/untrusted-workload-quickstart.md) - [Runtime Handler Quick Start (containerd >=1.2)](docs/runtime-handler-quickstart.md) +- [Runtime Handler Quick Start (shim v2) (containerd >=1.2)](docs/runtime-handler-shim-v2-quickstart.md) # Contributing diff --git a/cmd/containerd-shim-runsc-v1/main.go b/cmd/containerd-shim-runsc-v1/main.go new file mode 100644 index 000000000..ec73edbd2 --- /dev/null +++ b/cmd/containerd-shim-runsc-v1/main.go @@ -0,0 +1,24 @@ +/* + Copyright The containerd Authors. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package main + +import ( + "github.com/containerd/containerd/runtime/v2/shim" + + runsc "github.com/google/gvisor-containerd-shim/pkg/v2" +) + +func main() { + shim.Run("io.containerd.runsc.v1", runsc.New) +} diff --git a/cmd/gvisor-containerd-shim/main.go b/cmd/gvisor-containerd-shim/main.go index cdea2d7b1..ea26aa105 100644 --- a/cmd/gvisor-containerd-shim/main.go +++ b/cmd/gvisor-containerd-shim/main.go @@ -48,7 +48,7 @@ import ( "golang.org/x/sys/unix" runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" - "github.com/google/gvisor-containerd-shim/pkg/shim" + "github.com/google/gvisor-containerd-shim/pkg/v1/shim" ) var ( diff --git a/docs/README.md b/docs/README.md index a0915e32c..eca857423 100644 --- a/docs/README.md +++ b/docs/README.md @@ -4,3 +4,4 @@ Everything you need to know about gvisor-containerd-shim - [Untrusted Workload Quick Start (containerd >=1.1)](untrusted-workload-quickstart.md) - [Runtime Handler Quick Start (containerd >=1.2)](runtime-handler-quickstart.md) +- [Runtime Handler Quick Start (shim v2) (containerd >=1.2)](runtime-handler-shim-v2-quickstart.md) diff --git a/docs/runtime-handler-quickstart.md b/docs/runtime-handler-quickstart.md index d97b99034..d8fea65fd 100644 --- a/docs/runtime-handler-quickstart.md +++ b/docs/runtime-handler-quickstart.md @@ -16,30 +16,22 @@ later. 1. Download the latest release of the `gvisor-containerd-shim`. See the [releases page](https://github.com/google/gvisor-containerd-shim/releases) -[embedmd]:# (../test/e2e/shim-install.sh shell /{ # Step 1/ /^}/) +[embedmd]:# (../test/e2e/shim-install.sh shell /{ # Step 1\(release\)/ /^}/) ```shell -{ # Step 1: Download gvisor-containerd-shim +{ # Step 1(release): Install gvisor-containerd-shim LATEST_RELEASE=$(wget -qO - https://api.github.com/repos/google/gvisor-containerd-shim/releases | grep -oP '(?<="browser_download_url": ")https://[^"]*' | head -1) wget -O gvisor-containerd-shim chmod +x gvisor-containerd-shim +sudo mv gvisor-containerd-shim /usr/local/bin/gvisor-containerd-shim } ``` -2. Copy the binary to the desired directory: - -[embedmd]:# (../test/e2e/shim-install.sh shell /{ # Step 2/ /^}/) -```shell -{ # Step 2: Copy the binary to the desired directory -sudo mv gvisor-containerd-shim-* /usr/local/bin/gvisor-containerd-shim -} -``` - -3. Create the configuration for the gvisor shim in +2. Create the configuration for the gvisor shim in `/etc/containerd/gvisor-containerd-shim.yaml`: -[embedmd]:# (../test/e2e/shim-install.sh shell /{ # Step 3/ /^}/) +[embedmd]:# (../test/e2e/shim-install.sh shell /{ # Step 2/ /^}/) ```shell -{ # Step 3: Create the gvisor-containerd-shim.yaml +{ # Step 2: Create the gvisor-containerd-shim.yaml cat < +[embedmd]:# (../test/e2e/shim-install.sh shell /{ # Step 1\(dev\)/ /^}/) +```shell +{ # Step 1(dev): Build and install gvisor-containerd-shim and containerd-shim-runsc-v1 + make + sudo make install +} +``` + +### Configure containerd + +1. Update `/etc/containerd/config.toml`. Make sure `containerd-shim-runsc-v1` is + in `${PATH}`. + +[embedmd]:# (../test/e2e/runtime-handler-shim-v2/install.sh shell /{ # Step 1/ /^}/) +```shell +{ # Step 1: Create containerd config.toml +cat < /tmp/containerd-cri.log & +} diff --git a/test/e2e/runtime-handler-shim-v2/test.sh b/test/e2e/runtime-handler-shim-v2/test.sh new file mode 100755 index 000000000..e33655ec1 --- /dev/null +++ b/test/e2e/runtime-handler-shim-v2/test.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# Runs end-to-end tests for gvisor-containerd-shim to test the use of runtime +# handler. This should work on containerd 1.2+ + +# This is meant to be run in a VM as it makes a fairly invasive install of +# containerd. + +set -ex + +# Install containerd +. ./test/e2e/containerd-install.sh + +# Install gVisor +. ./test/e2e/runsc-install.sh + +# Install gvisor-containerd-shim +. ./test/e2e/shim-install.sh + +# Test installation/configuration +. ./test/e2e/runtime-handler-shim-v2/install.sh + +# Install crictl +. ./test/e2e/crictl-install.sh + +# Test usage (the same with runtime-handler) +. ./test/e2e/runtime-handler/usage.sh + +# Run a container in the sandbox +. ./test/e2e/run-container.sh + +# Validate the pod and container +. ./test/e2e/validate.sh +. ./test/e2e/runtime-handler-shim-v2/validate.sh diff --git a/test/e2e/runtime-handler-shim-v2/validate.sh b/test/e2e/runtime-handler-shim-v2/validate.sh new file mode 100755 index 000000000..b74a059ef --- /dev/null +++ b/test/e2e/runtime-handler-shim-v2/validate.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +# A sample script to validating the running containerd-shim-runsc-v1. + +set -ex + +ps aux | grep [c]ontainerd-shim-runsc-v1 diff --git a/test/e2e/runtime-handler/usage.sh b/test/e2e/runtime-handler/usage.sh index 1f8a09757..350c720c2 100755 --- a/test/e2e/runtime-handler/usage.sh +++ b/test/e2e/runtime-handler/usage.sh @@ -1,7 +1,7 @@ #!/bin/bash -# A sample script for testing the gvisor-containerd-shim # using untrusted -# workload extension. +# A sample script for testing the gvisor-containerd-shim +# using runtime handler. set -ex diff --git a/test/e2e/shim-install.sh b/test/e2e/shim-install.sh index 93587ea50..c0252bdd4 100755 --- a/test/e2e/shim-install.sh +++ b/test/e2e/shim-install.sh @@ -5,23 +5,21 @@ set -ex # Build gvisor-containerd-shim -if [ "${INSTALL_LATEST}" === "1" ]; then -{ # Step 1: Download gvisor-containerd-shim +if [ "${INSTALL_LATEST}" == "1" ]; then +{ # Step 1(release): Install gvisor-containerd-shim LATEST_RELEASE=$(wget -qO - https://api.github.com/repos/google/gvisor-containerd-shim/releases | grep -oP '(?<="browser_download_url": ")https://[^"]*' | head -1) wget -O gvisor-containerd-shim chmod +x gvisor-containerd-shim +sudo mv gvisor-containerd-shim /usr/local/bin/gvisor-containerd-shim } else +{ # Step 1(dev): Build and install gvisor-containerd-shim and containerd-shim-runsc-v1 make - mv bin/gvisor-containerd-shim gvisor-containerd-shim-dev -fi - -{ # Step 2: Copy the binary to the desired directory -sudo mv gvisor-containerd-shim-* /usr/local/bin/gvisor-containerd-shim + sudo make install } +fi - -{ # Step 3: Create the gvisor-containerd-shim.yaml +{ # Step 2: Create the gvisor-containerd-shim.yaml cat < -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: + Everyone is permitted to copy and distribute verbatim or modified + copies of this license document, and changing it is allowed as long + as the name is changed. -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/vendor/github.com/BurntSushi/toml/lex.go b/vendor/github.com/BurntSushi/toml/lex.go index e0a742a88..6dee7fc79 100644 --- a/vendor/github.com/BurntSushi/toml/lex.go +++ b/vendor/github.com/BurntSushi/toml/lex.go @@ -775,7 +775,7 @@ func lexDatetime(lx *lexer) stateFn { return lexDatetime } switch r { - case '-', 'T', ':', '.', 'Z', '+': + case '-', 'T', ':', '.', 'Z': return lexDatetime } diff --git a/vendor/github.com/containerd/cgroups/LICENSE b/vendor/github.com/containerd/cgroups/LICENSE new file mode 100644 index 000000000..261eeb9e9 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/containerd/cgroups/README.md b/vendor/github.com/containerd/cgroups/README.md new file mode 100644 index 000000000..69e932a9f --- /dev/null +++ b/vendor/github.com/containerd/cgroups/README.md @@ -0,0 +1,112 @@ +# cgroups + +[![Build Status](https://travis-ci.org/containerd/cgroups.svg?branch=master)](https://travis-ci.org/containerd/cgroups) + +[![codecov](https://codecov.io/gh/containerd/cgroups/branch/master/graph/badge.svg)](https://codecov.io/gh/containerd/cgroups) + +Go package for creating, managing, inspecting, and destroying cgroups. +The resources format for settings on the cgroup uses the OCI runtime-spec found +[here](https://github.com/opencontainers/runtime-spec). + +## Examples + +### Create a new cgroup + +This creates a new cgroup using a static path for all subsystems under `/test`. + +* /sys/fs/cgroup/cpu/test +* /sys/fs/cgroup/memory/test +* etc.... + +It uses a single hierarchy and specifies cpu shares as a resource constraint and +uses the v1 implementation of cgroups. + + +```go +shares := uint64(100) +control, err := cgroups.New(cgroups.V1, cgroups.StaticPath("/test"), &specs.LinuxResources{ + CPU: &specs.CPU{ + Shares: &shares, + }, +}) +defer control.Delete() +``` + +### Create with systemd slice support + + +```go +control, err := cgroups.New(cgroups.Systemd, cgroups.Slice("system.slice", "runc-test"), &specs.LinuxResources{ + CPU: &specs.CPU{ + Shares: &shares, + }, +}) + +``` + +### Load an existing cgroup + +```go +control, err = cgroups.Load(cgroups.V1, cgroups.StaticPath("/test")) +``` + +### Add a process to the cgroup + +```go +if err := control.Add(cgroups.Process{Pid:1234}); err != nil { +} +``` + +### Update the cgroup + +To update the resources applied in the cgroup + +```go +shares = uint64(200) +if err := control.Update(&specs.LinuxResources{ + CPU: &specs.CPU{ + Shares: &shares, + }, +}); err != nil { +} +``` + +### Freeze and Thaw the cgroup + +```go +if err := control.Freeze(); err != nil { +} +if err := control.Thaw(); err != nil { +} +``` + +### List all processes in the cgroup or recursively + +```go +processes, err := control.Processes(cgroups.Devices, recursive) +``` + +### Get Stats on the cgroup + +```go +stats, err := control.Stat() +``` + +By adding `cgroups.IgnoreNotExist` all non-existent files will be ignored, e.g. swap memory stats without swap enabled +```go +stats, err := control.Stat(cgroups.IgnoreNotExist) +``` + +### Move process across cgroups + +This allows you to take processes from one cgroup and move them to another. + +```go +err := control.MoveTo(destination) +``` + +### Create subcgroup + +```go +subCgroup, err := control.New("child", resources) +``` diff --git a/vendor/github.com/containerd/cgroups/blkio.go b/vendor/github.com/containerd/cgroups/blkio.go new file mode 100644 index 000000000..fc1e689cb --- /dev/null +++ b/vendor/github.com/containerd/cgroups/blkio.go @@ -0,0 +1,326 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import ( + "bufio" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func NewBlkio(root string) *blkioController { + return &blkioController{ + root: filepath.Join(root, string(Blkio)), + } +} + +type blkioController struct { + root string +} + +func (b *blkioController) Name() Name { + return Blkio +} + +func (b *blkioController) Path(path string) string { + return filepath.Join(b.root, path) +} + +func (b *blkioController) Create(path string, resources *specs.LinuxResources) error { + if err := os.MkdirAll(b.Path(path), defaultDirPerm); err != nil { + return err + } + if resources.BlockIO == nil { + return nil + } + for _, t := range createBlkioSettings(resources.BlockIO) { + if t.value != nil { + if err := ioutil.WriteFile( + filepath.Join(b.Path(path), fmt.Sprintf("blkio.%s", t.name)), + t.format(t.value), + defaultFilePerm, + ); err != nil { + return err + } + } + } + return nil +} + +func (b *blkioController) Update(path string, resources *specs.LinuxResources) error { + return b.Create(path, resources) +} + +func (b *blkioController) Stat(path string, stats *Metrics) error { + stats.Blkio = &BlkIOStat{} + settings := []blkioStatSettings{ + { + name: "throttle.io_serviced", + entry: &stats.Blkio.IoServicedRecursive, + }, + { + name: "throttle.io_service_bytes", + entry: &stats.Blkio.IoServiceBytesRecursive, + }, + } + // Try to read CFQ stats available on all CFQ enabled kernels first + if _, err := os.Lstat(filepath.Join(b.Path(path), fmt.Sprintf("blkio.io_serviced_recursive"))); err == nil { + settings = append(settings, + blkioStatSettings{ + name: "sectors_recursive", + entry: &stats.Blkio.SectorsRecursive, + }, + blkioStatSettings{ + name: "io_service_bytes_recursive", + entry: &stats.Blkio.IoServiceBytesRecursive, + }, + blkioStatSettings{ + name: "io_serviced_recursive", + entry: &stats.Blkio.IoServicedRecursive, + }, + blkioStatSettings{ + name: "io_queued_recursive", + entry: &stats.Blkio.IoQueuedRecursive, + }, + blkioStatSettings{ + name: "io_service_time_recursive", + entry: &stats.Blkio.IoServiceTimeRecursive, + }, + blkioStatSettings{ + name: "io_wait_time_recursive", + entry: &stats.Blkio.IoWaitTimeRecursive, + }, + blkioStatSettings{ + name: "io_merged_recursive", + entry: &stats.Blkio.IoMergedRecursive, + }, + blkioStatSettings{ + name: "time_recursive", + entry: &stats.Blkio.IoTimeRecursive, + }, + ) + } + f, err := os.Open("/proc/diskstats") + if err != nil { + return err + } + defer f.Close() + + devices, err := getDevices(f) + if err != nil { + return err + } + + for _, t := range settings { + if err := b.readEntry(devices, path, t.name, t.entry); err != nil { + return err + } + } + return nil +} + +func (b *blkioController) readEntry(devices map[deviceKey]string, path, name string, entry *[]*BlkIOEntry) error { + f, err := os.Open(filepath.Join(b.Path(path), fmt.Sprintf("blkio.%s", name))) + if err != nil { + return err + } + defer f.Close() + sc := bufio.NewScanner(f) + for sc.Scan() { + if err := sc.Err(); err != nil { + return err + } + // format: dev type amount + fields := strings.FieldsFunc(sc.Text(), splitBlkIOStatLine) + if len(fields) < 3 { + if len(fields) == 2 && fields[0] == "Total" { + // skip total line + continue + } else { + return fmt.Errorf("Invalid line found while parsing %s: %s", path, sc.Text()) + } + } + major, err := strconv.ParseUint(fields[0], 10, 64) + if err != nil { + return err + } + minor, err := strconv.ParseUint(fields[1], 10, 64) + if err != nil { + return err + } + op := "" + valueField := 2 + if len(fields) == 4 { + op = fields[2] + valueField = 3 + } + v, err := strconv.ParseUint(fields[valueField], 10, 64) + if err != nil { + return err + } + *entry = append(*entry, &BlkIOEntry{ + Device: devices[deviceKey{major, minor}], + Major: major, + Minor: minor, + Op: op, + Value: v, + }) + } + return nil +} + +func createBlkioSettings(blkio *specs.LinuxBlockIO) []blkioSettings { + settings := []blkioSettings{ + { + name: "weight", + value: blkio.Weight, + format: uintf, + }, + { + name: "leaf_weight", + value: blkio.LeafWeight, + format: uintf, + }, + } + for _, wd := range blkio.WeightDevice { + settings = append(settings, + blkioSettings{ + name: "weight_device", + value: wd, + format: weightdev, + }, + blkioSettings{ + name: "leaf_weight_device", + value: wd, + format: weightleafdev, + }) + } + for _, t := range []struct { + name string + list []specs.LinuxThrottleDevice + }{ + { + name: "throttle.read_bps_device", + list: blkio.ThrottleReadBpsDevice, + }, + { + name: "throttle.read_iops_device", + list: blkio.ThrottleReadIOPSDevice, + }, + { + name: "throttle.write_bps_device", + list: blkio.ThrottleWriteBpsDevice, + }, + { + name: "throttle.write_iops_device", + list: blkio.ThrottleWriteIOPSDevice, + }, + } { + for _, td := range t.list { + settings = append(settings, blkioSettings{ + name: t.name, + value: td, + format: throttleddev, + }) + } + } + return settings +} + +type blkioSettings struct { + name string + value interface{} + format func(v interface{}) []byte +} + +type blkioStatSettings struct { + name string + entry *[]*BlkIOEntry +} + +func uintf(v interface{}) []byte { + return []byte(strconv.FormatUint(uint64(*v.(*uint16)), 10)) +} + +func weightdev(v interface{}) []byte { + wd := v.(specs.LinuxWeightDevice) + return []byte(fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight)) +} + +func weightleafdev(v interface{}) []byte { + wd := v.(specs.LinuxWeightDevice) + return []byte(fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight)) +} + +func throttleddev(v interface{}) []byte { + td := v.(specs.LinuxThrottleDevice) + return []byte(fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate)) +} + +func splitBlkIOStatLine(r rune) bool { + return r == ' ' || r == ':' +} + +type deviceKey struct { + major, minor uint64 +} + +// getDevices makes a best effort attempt to read all the devices into a map +// keyed by major and minor number. Since devices may be mapped multiple times, +// we err on taking the first occurrence. +func getDevices(r io.Reader) (map[deviceKey]string, error) { + + var ( + s = bufio.NewScanner(r) + devices = make(map[deviceKey]string) + ) + for s.Scan() { + fields := strings.Fields(s.Text()) + major, err := strconv.Atoi(fields[0]) + if err != nil { + return nil, err + } + minor, err := strconv.Atoi(fields[1]) + if err != nil { + return nil, err + } + key := deviceKey{ + major: uint64(major), + minor: uint64(minor), + } + if _, ok := devices[key]; ok { + continue + } + devices[key] = filepath.Join("/dev", fields[2]) + } + return devices, s.Err() +} + +func major(devNumber uint64) uint64 { + return (devNumber >> 8) & 0xfff +} + +func minor(devNumber uint64) uint64 { + return (devNumber & 0xff) | ((devNumber >> 12) & 0xfff00) +} diff --git a/vendor/github.com/containerd/cgroups/cgroup.go b/vendor/github.com/containerd/cgroups/cgroup.go new file mode 100644 index 000000000..7959feb49 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/cgroup.go @@ -0,0 +1,445 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" +) + +// New returns a new control via the cgroup cgroups interface +func New(hierarchy Hierarchy, path Path, resources *specs.LinuxResources) (Cgroup, error) { + subsystems, err := hierarchy() + if err != nil { + return nil, err + } + for _, s := range subsystems { + if err := initializeSubsystem(s, path, resources); err != nil { + return nil, err + } + } + return &cgroup{ + path: path, + subsystems: subsystems, + }, nil +} + +// Load will load an existing cgroup and allow it to be controlled +func Load(hierarchy Hierarchy, path Path) (Cgroup, error) { + subsystems, err := hierarchy() + if err != nil { + return nil, err + } + // check the the subsystems still exist + for _, s := range pathers(subsystems) { + p, err := path(s.Name()) + if err != nil { + if os.IsNotExist(errors.Cause(err)) { + return nil, ErrCgroupDeleted + } + return nil, err + } + if _, err := os.Lstat(s.Path(p)); err != nil { + if os.IsNotExist(err) { + return nil, ErrCgroupDeleted + } + return nil, err + } + } + return &cgroup{ + path: path, + subsystems: subsystems, + }, nil +} + +type cgroup struct { + path Path + + subsystems []Subsystem + mu sync.Mutex + err error +} + +// New returns a new sub cgroup +func (c *cgroup) New(name string, resources *specs.LinuxResources) (Cgroup, error) { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return nil, c.err + } + path := subPath(c.path, name) + for _, s := range c.subsystems { + if err := initializeSubsystem(s, path, resources); err != nil { + return nil, err + } + } + return &cgroup{ + path: path, + subsystems: c.subsystems, + }, nil +} + +// Subsystems returns all the subsystems that are currently being +// consumed by the group +func (c *cgroup) Subsystems() []Subsystem { + return c.subsystems +} + +// Add moves the provided process into the new cgroup +func (c *cgroup) Add(process Process) error { + if process.Pid <= 0 { + return ErrInvalidPid + } + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return c.err + } + return c.add(process) +} + +func (c *cgroup) add(process Process) error { + for _, s := range pathers(c.subsystems) { + p, err := c.path(s.Name()) + if err != nil { + return err + } + if err := ioutil.WriteFile( + filepath.Join(s.Path(p), cgroupProcs), + []byte(strconv.Itoa(process.Pid)), + defaultFilePerm, + ); err != nil { + return err + } + } + return nil +} + +// AddTask moves the provided tasks (threads) into the new cgroup +func (c *cgroup) AddTask(process Process) error { + if process.Pid <= 0 { + return ErrInvalidPid + } + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return c.err + } + return c.addTask(process) +} + +func (c *cgroup) addTask(process Process) error { + for _, s := range pathers(c.subsystems) { + p, err := c.path(s.Name()) + if err != nil { + return err + } + if err := ioutil.WriteFile( + filepath.Join(s.Path(p), cgroupTasks), + []byte(strconv.Itoa(process.Pid)), + defaultFilePerm, + ); err != nil { + return err + } + } + return nil +} + +// Delete will remove the control group from each of the subsystems registered +func (c *cgroup) Delete() error { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return c.err + } + var errors []string + for _, s := range c.subsystems { + if d, ok := s.(deleter); ok { + sp, err := c.path(s.Name()) + if err != nil { + return err + } + if err := d.Delete(sp); err != nil { + errors = append(errors, string(s.Name())) + } + continue + } + if p, ok := s.(pather); ok { + sp, err := c.path(s.Name()) + if err != nil { + return err + } + path := p.Path(sp) + if err := remove(path); err != nil { + errors = append(errors, path) + } + } + } + if len(errors) > 0 { + return fmt.Errorf("cgroups: unable to remove paths %s", strings.Join(errors, ", ")) + } + c.err = ErrCgroupDeleted + return nil +} + +// Stat returns the current metrics for the cgroup +func (c *cgroup) Stat(handlers ...ErrorHandler) (*Metrics, error) { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return nil, c.err + } + if len(handlers) == 0 { + handlers = append(handlers, errPassthrough) + } + var ( + stats = &Metrics{ + CPU: &CPUStat{ + Throttling: &Throttle{}, + Usage: &CPUUsage{}, + }, + } + wg = &sync.WaitGroup{} + errs = make(chan error, len(c.subsystems)) + ) + for _, s := range c.subsystems { + if ss, ok := s.(stater); ok { + sp, err := c.path(s.Name()) + if err != nil { + return nil, err + } + wg.Add(1) + go func() { + defer wg.Done() + if err := ss.Stat(sp, stats); err != nil { + for _, eh := range handlers { + if herr := eh(err); herr != nil { + errs <- herr + } + } + } + }() + } + } + wg.Wait() + close(errs) + for err := range errs { + return nil, err + } + return stats, nil +} + +// Update updates the cgroup with the new resource values provided +// +// Be prepared to handle EBUSY when trying to update a cgroup with +// live processes and other operations like Stats being performed at the +// same time +func (c *cgroup) Update(resources *specs.LinuxResources) error { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return c.err + } + for _, s := range c.subsystems { + if u, ok := s.(updater); ok { + sp, err := c.path(s.Name()) + if err != nil { + return err + } + if err := u.Update(sp, resources); err != nil { + return err + } + } + } + return nil +} + +// Processes returns the processes running inside the cgroup along +// with the subsystem used, pid, and path +func (c *cgroup) Processes(subsystem Name, recursive bool) ([]Process, error) { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return nil, c.err + } + return c.processes(subsystem, recursive) +} + +func (c *cgroup) processes(subsystem Name, recursive bool) ([]Process, error) { + s := c.getSubsystem(subsystem) + sp, err := c.path(subsystem) + if err != nil { + return nil, err + } + path := s.(pather).Path(sp) + var processes []Process + err = filepath.Walk(path, func(p string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !recursive && info.IsDir() { + if p == path { + return nil + } + return filepath.SkipDir + } + dir, name := filepath.Split(p) + if name != cgroupProcs { + return nil + } + procs, err := readPids(dir, subsystem) + if err != nil { + return err + } + processes = append(processes, procs...) + return nil + }) + return processes, err +} + +// Freeze freezes the entire cgroup and all the processes inside it +func (c *cgroup) Freeze() error { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return c.err + } + s := c.getSubsystem(Freezer) + if s == nil { + return ErrFreezerNotSupported + } + sp, err := c.path(Freezer) + if err != nil { + return err + } + return s.(*freezerController).Freeze(sp) +} + +// Thaw thaws out the cgroup and all the processes inside it +func (c *cgroup) Thaw() error { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return c.err + } + s := c.getSubsystem(Freezer) + if s == nil { + return ErrFreezerNotSupported + } + sp, err := c.path(Freezer) + if err != nil { + return err + } + return s.(*freezerController).Thaw(sp) +} + +// OOMEventFD returns the memory cgroup's out of memory event fd that triggers +// when processes inside the cgroup receive an oom event. Returns +// ErrMemoryNotSupported if memory cgroups is not supported. +func (c *cgroup) OOMEventFD() (uintptr, error) { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return 0, c.err + } + s := c.getSubsystem(Memory) + if s == nil { + return 0, ErrMemoryNotSupported + } + sp, err := c.path(Memory) + if err != nil { + return 0, err + } + return s.(*memoryController).OOMEventFD(sp) +} + +// State returns the state of the cgroup and its processes +func (c *cgroup) State() State { + c.mu.Lock() + defer c.mu.Unlock() + c.checkExists() + if c.err != nil && c.err == ErrCgroupDeleted { + return Deleted + } + s := c.getSubsystem(Freezer) + if s == nil { + return Thawed + } + sp, err := c.path(Freezer) + if err != nil { + return Unknown + } + state, err := s.(*freezerController).state(sp) + if err != nil { + return Unknown + } + return state +} + +// MoveTo does a recursive move subsystem by subsystem of all the processes +// inside the group +func (c *cgroup) MoveTo(destination Cgroup) error { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return c.err + } + for _, s := range c.subsystems { + processes, err := c.processes(s.Name(), true) + if err != nil { + return err + } + for _, p := range processes { + if err := destination.Add(p); err != nil { + return err + } + } + } + return nil +} + +func (c *cgroup) getSubsystem(n Name) Subsystem { + for _, s := range c.subsystems { + if s.Name() == n { + return s + } + } + return nil +} + +func (c *cgroup) checkExists() { + for _, s := range pathers(c.subsystems) { + p, err := c.path(s.Name()) + if err != nil { + return + } + if _, err := os.Lstat(s.Path(p)); err != nil { + if os.IsNotExist(err) { + c.err = ErrCgroupDeleted + return + } + } + } +} diff --git a/vendor/github.com/containerd/cgroups/control.go b/vendor/github.com/containerd/cgroups/control.go new file mode 100644 index 000000000..63e2df93d --- /dev/null +++ b/vendor/github.com/containerd/cgroups/control.go @@ -0,0 +1,77 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import ( + "os" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +const ( + cgroupProcs = "cgroup.procs" + cgroupTasks = "tasks" + defaultDirPerm = 0755 +) + +// defaultFilePerm is a var so that the test framework can change the filemode +// of all files created when the tests are running. The difference between the +// tests and real world use is that files like "cgroup.procs" will exist when writing +// to a read cgroup filesystem and do not exist prior when running in the tests. +// this is set to a non 0 value in the test code +var defaultFilePerm = os.FileMode(0) + +type Process struct { + // Subsystem is the name of the subsystem that the process is in + Subsystem Name + // Pid is the process id of the process + Pid int + // Path is the full path of the subsystem and location that the process is in + Path string +} + +// Cgroup handles interactions with the individual groups to perform +// actions on them as them main interface to this cgroup package +type Cgroup interface { + // New creates a new cgroup under the calling cgroup + New(string, *specs.LinuxResources) (Cgroup, error) + // Add adds a process to the cgroup (cgroup.procs) + Add(Process) error + // AddTask adds a process to the cgroup (tasks) + AddTask(Process) error + // Delete removes the cgroup as a whole + Delete() error + // MoveTo moves all the processes under the calling cgroup to the provided one + // subsystems are moved one at a time + MoveTo(Cgroup) error + // Stat returns the stats for all subsystems in the cgroup + Stat(...ErrorHandler) (*Metrics, error) + // Update updates all the subsystems with the provided resource changes + Update(resources *specs.LinuxResources) error + // Processes returns all the processes in a select subsystem for the cgroup + Processes(Name, bool) ([]Process, error) + // Freeze freezes or pauses all processes inside the cgroup + Freeze() error + // Thaw thaw or resumes all processes inside the cgroup + Thaw() error + // OOMEventFD returns the memory subsystem's event fd for OOM events + OOMEventFD() (uintptr, error) + // State returns the cgroups current state + State() State + // Subsystems returns all the subsystems in the cgroup + Subsystems() []Subsystem +} diff --git a/vendor/github.com/containerd/cgroups/cpu.go b/vendor/github.com/containerd/cgroups/cpu.go new file mode 100644 index 000000000..431cd3e51 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/cpu.go @@ -0,0 +1,129 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import ( + "bufio" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func NewCpu(root string) *cpuController { + return &cpuController{ + root: filepath.Join(root, string(Cpu)), + } +} + +type cpuController struct { + root string +} + +func (c *cpuController) Name() Name { + return Cpu +} + +func (c *cpuController) Path(path string) string { + return filepath.Join(c.root, path) +} + +func (c *cpuController) Create(path string, resources *specs.LinuxResources) error { + if err := os.MkdirAll(c.Path(path), defaultDirPerm); err != nil { + return err + } + if cpu := resources.CPU; cpu != nil { + for _, t := range []struct { + name string + ivalue *int64 + uvalue *uint64 + }{ + { + name: "rt_period_us", + uvalue: cpu.RealtimePeriod, + }, + { + name: "rt_runtime_us", + ivalue: cpu.RealtimeRuntime, + }, + { + name: "shares", + uvalue: cpu.Shares, + }, + { + name: "cfs_period_us", + uvalue: cpu.Period, + }, + { + name: "cfs_quota_us", + ivalue: cpu.Quota, + }, + } { + var value []byte + if t.uvalue != nil { + value = []byte(strconv.FormatUint(*t.uvalue, 10)) + } else if t.ivalue != nil { + value = []byte(strconv.FormatInt(*t.ivalue, 10)) + } + if value != nil { + if err := ioutil.WriteFile( + filepath.Join(c.Path(path), fmt.Sprintf("cpu.%s", t.name)), + value, + defaultFilePerm, + ); err != nil { + return err + } + } + } + } + return nil +} + +func (c *cpuController) Update(path string, resources *specs.LinuxResources) error { + return c.Create(path, resources) +} + +func (c *cpuController) Stat(path string, stats *Metrics) error { + f, err := os.Open(filepath.Join(c.Path(path), "cpu.stat")) + if err != nil { + return err + } + defer f.Close() + // get or create the cpu field because cpuacct can also set values on this struct + sc := bufio.NewScanner(f) + for sc.Scan() { + if err := sc.Err(); err != nil { + return err + } + key, v, err := parseKV(sc.Text()) + if err != nil { + return err + } + switch key { + case "nr_periods": + stats.CPU.Throttling.Periods = v + case "nr_throttled": + stats.CPU.Throttling.ThrottledPeriods = v + case "throttled_time": + stats.CPU.Throttling.ThrottledTime = v + } + } + return nil +} diff --git a/vendor/github.com/containerd/cgroups/cpuacct.go b/vendor/github.com/containerd/cgroups/cpuacct.go new file mode 100644 index 000000000..42a490a87 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/cpuacct.go @@ -0,0 +1,121 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import ( + "fmt" + "io/ioutil" + "path/filepath" + "strconv" + "strings" +) + +const nanosecondsInSecond = 1000000000 + +var clockTicks = getClockTicks() + +func NewCpuacct(root string) *cpuacctController { + return &cpuacctController{ + root: filepath.Join(root, string(Cpuacct)), + } +} + +type cpuacctController struct { + root string +} + +func (c *cpuacctController) Name() Name { + return Cpuacct +} + +func (c *cpuacctController) Path(path string) string { + return filepath.Join(c.root, path) +} + +func (c *cpuacctController) Stat(path string, stats *Metrics) error { + user, kernel, err := c.getUsage(path) + if err != nil { + return err + } + total, err := readUint(filepath.Join(c.Path(path), "cpuacct.usage")) + if err != nil { + return err + } + percpu, err := c.percpuUsage(path) + if err != nil { + return err + } + stats.CPU.Usage.Total = total + stats.CPU.Usage.User = user + stats.CPU.Usage.Kernel = kernel + stats.CPU.Usage.PerCPU = percpu + return nil +} + +func (c *cpuacctController) percpuUsage(path string) ([]uint64, error) { + var usage []uint64 + data, err := ioutil.ReadFile(filepath.Join(c.Path(path), "cpuacct.usage_percpu")) + if err != nil { + return nil, err + } + for _, v := range strings.Fields(string(data)) { + u, err := strconv.ParseUint(v, 10, 64) + if err != nil { + return nil, err + } + usage = append(usage, u) + } + return usage, nil +} + +func (c *cpuacctController) getUsage(path string) (user uint64, kernel uint64, err error) { + statPath := filepath.Join(c.Path(path), "cpuacct.stat") + data, err := ioutil.ReadFile(statPath) + if err != nil { + return 0, 0, err + } + fields := strings.Fields(string(data)) + if len(fields) != 4 { + return 0, 0, fmt.Errorf("%q is expected to have 4 fields", statPath) + } + for _, t := range []struct { + index int + name string + value *uint64 + }{ + { + index: 0, + name: "user", + value: &user, + }, + { + index: 2, + name: "system", + value: &kernel, + }, + } { + if fields[t.index] != t.name { + return 0, 0, fmt.Errorf("expected field %q but found %q in %q", t.name, fields[t.index], statPath) + } + v, err := strconv.ParseUint(fields[t.index+1], 10, 64) + if err != nil { + return 0, 0, err + } + *t.value = v + } + return (user * nanosecondsInSecond) / clockTicks, (kernel * nanosecondsInSecond) / clockTicks, nil +} diff --git a/vendor/github.com/containerd/cgroups/cpuset.go b/vendor/github.com/containerd/cgroups/cpuset.go new file mode 100644 index 000000000..f182aa68c --- /dev/null +++ b/vendor/github.com/containerd/cgroups/cpuset.go @@ -0,0 +1,159 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import ( + "bytes" + "fmt" + "io/ioutil" + "os" + "path/filepath" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func NewCputset(root string) *cpusetController { + return &cpusetController{ + root: filepath.Join(root, string(Cpuset)), + } +} + +type cpusetController struct { + root string +} + +func (c *cpusetController) Name() Name { + return Cpuset +} + +func (c *cpusetController) Path(path string) string { + return filepath.Join(c.root, path) +} + +func (c *cpusetController) Create(path string, resources *specs.LinuxResources) error { + if err := c.ensureParent(c.Path(path), c.root); err != nil { + return err + } + if err := os.MkdirAll(c.Path(path), defaultDirPerm); err != nil { + return err + } + if err := c.copyIfNeeded(c.Path(path), filepath.Dir(c.Path(path))); err != nil { + return err + } + if resources.CPU != nil { + for _, t := range []struct { + name string + value *string + }{ + { + name: "cpus", + value: &resources.CPU.Cpus, + }, + { + name: "mems", + value: &resources.CPU.Mems, + }, + } { + if t.value != nil { + if err := ioutil.WriteFile( + filepath.Join(c.Path(path), fmt.Sprintf("cpuset.%s", t.name)), + []byte(*t.value), + defaultFilePerm, + ); err != nil { + return err + } + } + } + } + return nil +} + +func (c *cpusetController) Update(path string, resources *specs.LinuxResources) error { + return c.Create(path, resources) +} + +func (c *cpusetController) getValues(path string) (cpus []byte, mems []byte, err error) { + if cpus, err = ioutil.ReadFile(filepath.Join(path, "cpuset.cpus")); err != nil && !os.IsNotExist(err) { + return + } + if mems, err = ioutil.ReadFile(filepath.Join(path, "cpuset.mems")); err != nil && !os.IsNotExist(err) { + return + } + return cpus, mems, nil +} + +// ensureParent makes sure that the parent directory of current is created +// and populated with the proper cpus and mems files copied from +// it's parent. +func (c *cpusetController) ensureParent(current, root string) error { + parent := filepath.Dir(current) + if _, err := filepath.Rel(root, parent); err != nil { + return nil + } + // Avoid infinite recursion. + if parent == current { + return fmt.Errorf("cpuset: cgroup parent path outside cgroup root") + } + if cleanPath(parent) != root { + if err := c.ensureParent(parent, root); err != nil { + return err + } + } + if err := os.MkdirAll(current, defaultDirPerm); err != nil { + return err + } + return c.copyIfNeeded(current, parent) +} + +// copyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent +// directory to the current directory if the file's contents are 0 +func (c *cpusetController) copyIfNeeded(current, parent string) error { + var ( + err error + currentCpus, currentMems []byte + parentCpus, parentMems []byte + ) + if currentCpus, currentMems, err = c.getValues(current); err != nil { + return err + } + if parentCpus, parentMems, err = c.getValues(parent); err != nil { + return err + } + if isEmpty(currentCpus) { + if err := ioutil.WriteFile( + filepath.Join(current, "cpuset.cpus"), + parentCpus, + defaultFilePerm, + ); err != nil { + return err + } + } + if isEmpty(currentMems) { + if err := ioutil.WriteFile( + filepath.Join(current, "cpuset.mems"), + parentMems, + defaultFilePerm, + ); err != nil { + return err + } + } + return nil +} + +func isEmpty(b []byte) bool { + return len(bytes.Trim(b, "\n")) == 0 +} diff --git a/vendor/github.com/containerd/cgroups/devices.go b/vendor/github.com/containerd/cgroups/devices.go new file mode 100644 index 000000000..f9a118b22 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/devices.go @@ -0,0 +1,90 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +const ( + allowDeviceFile = "devices.allow" + denyDeviceFile = "devices.deny" + wildcard = -1 +) + +func NewDevices(root string) *devicesController { + return &devicesController{ + root: filepath.Join(root, string(Devices)), + } +} + +type devicesController struct { + root string +} + +func (d *devicesController) Name() Name { + return Devices +} + +func (d *devicesController) Path(path string) string { + return filepath.Join(d.root, path) +} + +func (d *devicesController) Create(path string, resources *specs.LinuxResources) error { + if err := os.MkdirAll(d.Path(path), defaultDirPerm); err != nil { + return err + } + for _, device := range resources.Devices { + file := denyDeviceFile + if device.Allow { + file = allowDeviceFile + } + if err := ioutil.WriteFile( + filepath.Join(d.Path(path), file), + []byte(deviceString(device)), + defaultFilePerm, + ); err != nil { + return err + } + } + return nil +} + +func (d *devicesController) Update(path string, resources *specs.LinuxResources) error { + return d.Create(path, resources) +} + +func deviceString(device specs.LinuxDeviceCgroup) string { + return fmt.Sprintf("%s %s:%s %s", + device.Type, + deviceNumber(device.Major), + deviceNumber(device.Minor), + device.Access, + ) +} + +func deviceNumber(number *int64) string { + if number == nil || *number == wildcard { + return "*" + } + return fmt.Sprint(*number) +} diff --git a/vendor/github.com/containerd/cgroups/errors.go b/vendor/github.com/containerd/cgroups/errors.go new file mode 100644 index 000000000..f1ad8315c --- /dev/null +++ b/vendor/github.com/containerd/cgroups/errors.go @@ -0,0 +1,47 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import ( + "errors" + "os" +) + +var ( + ErrInvalidPid = errors.New("cgroups: pid must be greater than 0") + ErrMountPointNotExist = errors.New("cgroups: cgroup mountpoint does not exist") + ErrInvalidFormat = errors.New("cgroups: parsing file with invalid format failed") + ErrFreezerNotSupported = errors.New("cgroups: freezer cgroup not supported on this system") + ErrMemoryNotSupported = errors.New("cgroups: memory cgroup not supported on this system") + ErrCgroupDeleted = errors.New("cgroups: cgroup deleted") + ErrNoCgroupMountDestination = errors.New("cgroups: cannot find cgroup mount destination") +) + +// ErrorHandler is a function that handles and acts on errors +type ErrorHandler func(err error) error + +// IgnoreNotExist ignores any errors that are for not existing files +func IgnoreNotExist(err error) error { + if os.IsNotExist(err) { + return nil + } + return err +} + +func errPassthrough(err error) error { + return err +} diff --git a/vendor/github.com/containerd/cgroups/freezer.go b/vendor/github.com/containerd/cgroups/freezer.go new file mode 100644 index 000000000..5e668408a --- /dev/null +++ b/vendor/github.com/containerd/cgroups/freezer.go @@ -0,0 +1,82 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import ( + "io/ioutil" + "path/filepath" + "strings" + "time" +) + +func NewFreezer(root string) *freezerController { + return &freezerController{ + root: filepath.Join(root, string(Freezer)), + } +} + +type freezerController struct { + root string +} + +func (f *freezerController) Name() Name { + return Freezer +} + +func (f *freezerController) Path(path string) string { + return filepath.Join(f.root, path) +} + +func (f *freezerController) Freeze(path string) error { + return f.waitState(path, Frozen) +} + +func (f *freezerController) Thaw(path string) error { + return f.waitState(path, Thawed) +} + +func (f *freezerController) changeState(path string, state State) error { + return ioutil.WriteFile( + filepath.Join(f.root, path, "freezer.state"), + []byte(strings.ToUpper(string(state))), + defaultFilePerm, + ) +} + +func (f *freezerController) state(path string) (State, error) { + current, err := ioutil.ReadFile(filepath.Join(f.root, path, "freezer.state")) + if err != nil { + return "", err + } + return State(strings.ToLower(strings.TrimSpace(string(current)))), nil +} + +func (f *freezerController) waitState(path string, state State) error { + for { + if err := f.changeState(path, state); err != nil { + return err + } + current, err := f.state(path) + if err != nil { + return err + } + if current == state { + return nil + } + time.Sleep(1 * time.Millisecond) + } +} diff --git a/vendor/github.com/containerd/cgroups/hierarchy.go b/vendor/github.com/containerd/cgroups/hierarchy.go new file mode 100644 index 000000000..9221bf3f1 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/hierarchy.go @@ -0,0 +1,20 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +// Hierarchy enableds both unified and split hierarchy for cgroups +type Hierarchy func() ([]Subsystem, error) diff --git a/vendor/github.com/containerd/cgroups/hugetlb.go b/vendor/github.com/containerd/cgroups/hugetlb.go new file mode 100644 index 000000000..3718706d7 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/hugetlb.go @@ -0,0 +1,109 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import ( + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func NewHugetlb(root string) (*hugetlbController, error) { + sizes, err := hugePageSizes() + if err != nil { + return nil, err + } + + return &hugetlbController{ + root: filepath.Join(root, string(Hugetlb)), + sizes: sizes, + }, nil +} + +type hugetlbController struct { + root string + sizes []string +} + +func (h *hugetlbController) Name() Name { + return Hugetlb +} + +func (h *hugetlbController) Path(path string) string { + return filepath.Join(h.root, path) +} + +func (h *hugetlbController) Create(path string, resources *specs.LinuxResources) error { + if err := os.MkdirAll(h.Path(path), defaultDirPerm); err != nil { + return err + } + for _, limit := range resources.HugepageLimits { + if err := ioutil.WriteFile( + filepath.Join(h.Path(path), strings.Join([]string{"hugetlb", limit.Pagesize, "limit_in_bytes"}, ".")), + []byte(strconv.FormatUint(limit.Limit, 10)), + defaultFilePerm, + ); err != nil { + return err + } + } + return nil +} + +func (h *hugetlbController) Stat(path string, stats *Metrics) error { + for _, size := range h.sizes { + s, err := h.readSizeStat(path, size) + if err != nil { + return err + } + stats.Hugetlb = append(stats.Hugetlb, s) + } + return nil +} + +func (h *hugetlbController) readSizeStat(path, size string) (*HugetlbStat, error) { + s := HugetlbStat{ + Pagesize: size, + } + for _, t := range []struct { + name string + value *uint64 + }{ + { + name: "usage_in_bytes", + value: &s.Usage, + }, + { + name: "max_usage_in_bytes", + value: &s.Max, + }, + { + name: "failcnt", + value: &s.Failcnt, + }, + } { + v, err := readUint(filepath.Join(h.Path(path), strings.Join([]string{"hugetlb", size, t.name}, "."))) + if err != nil { + return nil, err + } + *t.value = v + } + return &s, nil +} diff --git a/vendor/github.com/containerd/cgroups/memory.go b/vendor/github.com/containerd/cgroups/memory.go new file mode 100644 index 000000000..ce15ca2b9 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/memory.go @@ -0,0 +1,325 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import ( + "bufio" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + "syscall" + + "golang.org/x/sys/unix" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func NewMemory(root string) *memoryController { + return &memoryController{ + root: filepath.Join(root, string(Memory)), + } +} + +type memoryController struct { + root string +} + +func (m *memoryController) Name() Name { + return Memory +} + +func (m *memoryController) Path(path string) string { + return filepath.Join(m.root, path) +} + +func (m *memoryController) Create(path string, resources *specs.LinuxResources) error { + if err := os.MkdirAll(m.Path(path), defaultDirPerm); err != nil { + return err + } + if resources.Memory == nil { + return nil + } + if resources.Memory.Kernel != nil { + // Check if kernel memory is enabled + // We have to limit the kernel memory here as it won't be accounted at all + // until a limit is set on the cgroup and limit cannot be set once the + // cgroup has children, or if there are already tasks in the cgroup. + for _, i := range []int64{1, -1} { + if err := ioutil.WriteFile( + filepath.Join(m.Path(path), "memory.kmem.limit_in_bytes"), + []byte(strconv.FormatInt(i, 10)), + defaultFilePerm, + ); err != nil { + return checkEBUSY(err) + } + } + } + return m.set(path, getMemorySettings(resources)) +} + +func (m *memoryController) Update(path string, resources *specs.LinuxResources) error { + if resources.Memory == nil { + return nil + } + g := func(v *int64) bool { + return v != nil && *v > 0 + } + settings := getMemorySettings(resources) + if g(resources.Memory.Limit) && g(resources.Memory.Swap) { + // if the updated swap value is larger than the current memory limit set the swap changes first + // then set the memory limit as swap must always be larger than the current limit + current, err := readUint(filepath.Join(m.Path(path), "memory.limit_in_bytes")) + if err != nil { + return err + } + if current < uint64(*resources.Memory.Swap) { + settings[0], settings[1] = settings[1], settings[0] + } + } + return m.set(path, settings) +} + +func (m *memoryController) Stat(path string, stats *Metrics) error { + f, err := os.Open(filepath.Join(m.Path(path), "memory.stat")) + if err != nil { + return err + } + defer f.Close() + stats.Memory = &MemoryStat{ + Usage: &MemoryEntry{}, + Swap: &MemoryEntry{}, + Kernel: &MemoryEntry{}, + KernelTCP: &MemoryEntry{}, + } + if err := m.parseStats(f, stats.Memory); err != nil { + return err + } + for _, t := range []struct { + module string + entry *MemoryEntry + }{ + { + module: "", + entry: stats.Memory.Usage, + }, + { + module: "memsw", + entry: stats.Memory.Swap, + }, + { + module: "kmem", + entry: stats.Memory.Kernel, + }, + { + module: "kmem.tcp", + entry: stats.Memory.KernelTCP, + }, + } { + for _, tt := range []struct { + name string + value *uint64 + }{ + { + name: "usage_in_bytes", + value: &t.entry.Usage, + }, + { + name: "max_usage_in_bytes", + value: &t.entry.Max, + }, + { + name: "failcnt", + value: &t.entry.Failcnt, + }, + { + name: "limit_in_bytes", + value: &t.entry.Limit, + }, + } { + parts := []string{"memory"} + if t.module != "" { + parts = append(parts, t.module) + } + parts = append(parts, tt.name) + v, err := readUint(filepath.Join(m.Path(path), strings.Join(parts, "."))) + if err != nil { + return err + } + *tt.value = v + } + } + return nil +} + +func (m *memoryController) OOMEventFD(path string) (uintptr, error) { + root := m.Path(path) + f, err := os.Open(filepath.Join(root, "memory.oom_control")) + if err != nil { + return 0, err + } + defer f.Close() + fd, _, serr := unix.RawSyscall(unix.SYS_EVENTFD2, 0, unix.EFD_CLOEXEC, 0) + if serr != 0 { + return 0, serr + } + if err := writeEventFD(root, f.Fd(), fd); err != nil { + unix.Close(int(fd)) + return 0, err + } + return fd, nil +} + +func writeEventFD(root string, cfd, efd uintptr) error { + f, err := os.OpenFile(filepath.Join(root, "cgroup.event_control"), os.O_WRONLY, 0) + if err != nil { + return err + } + _, err = f.WriteString(fmt.Sprintf("%d %d", efd, cfd)) + f.Close() + return err +} + +func (m *memoryController) parseStats(r io.Reader, stat *MemoryStat) error { + var ( + raw = make(map[string]uint64) + sc = bufio.NewScanner(r) + line int + ) + for sc.Scan() { + if err := sc.Err(); err != nil { + return err + } + key, v, err := parseKV(sc.Text()) + if err != nil { + return fmt.Errorf("%d: %v", line, err) + } + raw[key] = v + line++ + } + stat.Cache = raw["cache"] + stat.RSS = raw["rss"] + stat.RSSHuge = raw["rss_huge"] + stat.MappedFile = raw["mapped_file"] + stat.Dirty = raw["dirty"] + stat.Writeback = raw["writeback"] + stat.PgPgIn = raw["pgpgin"] + stat.PgPgOut = raw["pgpgout"] + stat.PgFault = raw["pgfault"] + stat.PgMajFault = raw["pgmajfault"] + stat.InactiveAnon = raw["inactive_anon"] + stat.ActiveAnon = raw["active_anon"] + stat.InactiveFile = raw["inactive_file"] + stat.ActiveFile = raw["active_file"] + stat.Unevictable = raw["unevictable"] + stat.HierarchicalMemoryLimit = raw["hierarchical_memory_limit"] + stat.HierarchicalSwapLimit = raw["hierarchical_memsw_limit"] + stat.TotalCache = raw["total_cache"] + stat.TotalRSS = raw["total_rss"] + stat.TotalRSSHuge = raw["total_rss_huge"] + stat.TotalMappedFile = raw["total_mapped_file"] + stat.TotalDirty = raw["total_dirty"] + stat.TotalWriteback = raw["total_writeback"] + stat.TotalPgPgIn = raw["total_pgpgin"] + stat.TotalPgPgOut = raw["total_pgpgout"] + stat.TotalPgFault = raw["total_pgfault"] + stat.TotalPgMajFault = raw["total_pgmajfault"] + stat.TotalInactiveAnon = raw["total_inactive_anon"] + stat.TotalActiveAnon = raw["total_active_anon"] + stat.TotalInactiveFile = raw["total_inactive_file"] + stat.TotalActiveFile = raw["total_active_file"] + stat.TotalUnevictable = raw["total_unevictable"] + return nil +} + +func (m *memoryController) set(path string, settings []memorySettings) error { + for _, t := range settings { + if t.value != nil { + if err := ioutil.WriteFile( + filepath.Join(m.Path(path), fmt.Sprintf("memory.%s", t.name)), + []byte(strconv.FormatInt(*t.value, 10)), + defaultFilePerm, + ); err != nil { + return err + } + } + } + return nil +} + +type memorySettings struct { + name string + value *int64 +} + +func getMemorySettings(resources *specs.LinuxResources) []memorySettings { + mem := resources.Memory + var swappiness *int64 + if mem.Swappiness != nil { + v := int64(*mem.Swappiness) + swappiness = &v + } + return []memorySettings{ + { + name: "limit_in_bytes", + value: mem.Limit, + }, + { + name: "memsw.limit_in_bytes", + value: mem.Swap, + }, + { + name: "kmem.limit_in_bytes", + value: mem.Kernel, + }, + { + name: "kmem.tcp.limit_in_bytes", + value: mem.KernelTCP, + }, + { + name: "oom_control", + value: getOomControlValue(mem), + }, + { + name: "swappiness", + value: swappiness, + }, + } +} + +func checkEBUSY(err error) error { + if pathErr, ok := err.(*os.PathError); ok { + if errNo, ok := pathErr.Err.(syscall.Errno); ok { + if errNo == unix.EBUSY { + return fmt.Errorf( + "failed to set memory.kmem.limit_in_bytes, because either tasks have already joined this cgroup or it has children") + } + } + } + return err +} + +func getOomControlValue(mem *specs.LinuxMemory) *int64 { + if mem.DisableOOMKiller != nil && *mem.DisableOOMKiller { + i := int64(1) + return &i + } + return nil +} diff --git a/vendor/github.com/containerd/cgroups/metrics.pb.go b/vendor/github.com/containerd/cgroups/metrics.pb.go new file mode 100644 index 000000000..6043a8f7d --- /dev/null +++ b/vendor/github.com/containerd/cgroups/metrics.pb.go @@ -0,0 +1,4288 @@ +// Code generated by protoc-gen-gogo. +// source: github.com/containerd/cgroups/metrics.proto +// DO NOT EDIT! + +/* + Package cgroups is a generated protocol buffer package. + + It is generated from these files: + github.com/containerd/cgroups/metrics.proto + + It has these top-level messages: + Metrics + HugetlbStat + PidsStat + CPUStat + CPUUsage + Throttle + MemoryStat + MemoryEntry + BlkIOStat + BlkIOEntry + RdmaStat + RdmaEntry +*/ +package cgroups + +import proto "github.com/gogo/protobuf/proto" +import fmt "fmt" +import math "math" +import _ "github.com/gogo/protobuf/gogoproto" + +import strings "strings" +import reflect "reflect" + +import io "io" + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package + +type Metrics struct { + Hugetlb []*HugetlbStat `protobuf:"bytes,1,rep,name=hugetlb" json:"hugetlb,omitempty"` + Pids *PidsStat `protobuf:"bytes,2,opt,name=pids" json:"pids,omitempty"` + CPU *CPUStat `protobuf:"bytes,3,opt,name=cpu" json:"cpu,omitempty"` + Memory *MemoryStat `protobuf:"bytes,4,opt,name=memory" json:"memory,omitempty"` + Blkio *BlkIOStat `protobuf:"bytes,5,opt,name=blkio" json:"blkio,omitempty"` + Rdma *RdmaStat `protobuf:"bytes,6,opt,name=rdma" json:"rdma,omitempty"` +} + +func (m *Metrics) Reset() { *m = Metrics{} } +func (*Metrics) ProtoMessage() {} +func (*Metrics) Descriptor() ([]byte, []int) { return fileDescriptorMetrics, []int{0} } + +type HugetlbStat struct { + Usage uint64 `protobuf:"varint,1,opt,name=usage,proto3" json:"usage,omitempty"` + Max uint64 `protobuf:"varint,2,opt,name=max,proto3" json:"max,omitempty"` + Failcnt uint64 `protobuf:"varint,3,opt,name=failcnt,proto3" json:"failcnt,omitempty"` + Pagesize string `protobuf:"bytes,4,opt,name=pagesize,proto3" json:"pagesize,omitempty"` +} + +func (m *HugetlbStat) Reset() { *m = HugetlbStat{} } +func (*HugetlbStat) ProtoMessage() {} +func (*HugetlbStat) Descriptor() ([]byte, []int) { return fileDescriptorMetrics, []int{1} } + +type PidsStat struct { + Current uint64 `protobuf:"varint,1,opt,name=current,proto3" json:"current,omitempty"` + Limit uint64 `protobuf:"varint,2,opt,name=limit,proto3" json:"limit,omitempty"` +} + +func (m *PidsStat) Reset() { *m = PidsStat{} } +func (*PidsStat) ProtoMessage() {} +func (*PidsStat) Descriptor() ([]byte, []int) { return fileDescriptorMetrics, []int{2} } + +type CPUStat struct { + Usage *CPUUsage `protobuf:"bytes,1,opt,name=usage" json:"usage,omitempty"` + Throttling *Throttle `protobuf:"bytes,2,opt,name=throttling" json:"throttling,omitempty"` +} + +func (m *CPUStat) Reset() { *m = CPUStat{} } +func (*CPUStat) ProtoMessage() {} +func (*CPUStat) Descriptor() ([]byte, []int) { return fileDescriptorMetrics, []int{3} } + +type CPUUsage struct { + // values in nanoseconds + Total uint64 `protobuf:"varint,1,opt,name=total,proto3" json:"total,omitempty"` + Kernel uint64 `protobuf:"varint,2,opt,name=kernel,proto3" json:"kernel,omitempty"` + User uint64 `protobuf:"varint,3,opt,name=user,proto3" json:"user,omitempty"` + PerCPU []uint64 `protobuf:"varint,4,rep,packed,name=per_cpu,json=perCpu" json:"per_cpu,omitempty"` +} + +func (m *CPUUsage) Reset() { *m = CPUUsage{} } +func (*CPUUsage) ProtoMessage() {} +func (*CPUUsage) Descriptor() ([]byte, []int) { return fileDescriptorMetrics, []int{4} } + +type Throttle struct { + Periods uint64 `protobuf:"varint,1,opt,name=periods,proto3" json:"periods,omitempty"` + ThrottledPeriods uint64 `protobuf:"varint,2,opt,name=throttled_periods,json=throttledPeriods,proto3" json:"throttled_periods,omitempty"` + ThrottledTime uint64 `protobuf:"varint,3,opt,name=throttled_time,json=throttledTime,proto3" json:"throttled_time,omitempty"` +} + +func (m *Throttle) Reset() { *m = Throttle{} } +func (*Throttle) ProtoMessage() {} +func (*Throttle) Descriptor() ([]byte, []int) { return fileDescriptorMetrics, []int{5} } + +type MemoryStat struct { + Cache uint64 `protobuf:"varint,1,opt,name=cache,proto3" json:"cache,omitempty"` + RSS uint64 `protobuf:"varint,2,opt,name=rss,proto3" json:"rss,omitempty"` + RSSHuge uint64 `protobuf:"varint,3,opt,name=rss_huge,json=rssHuge,proto3" json:"rss_huge,omitempty"` + MappedFile uint64 `protobuf:"varint,4,opt,name=mapped_file,json=mappedFile,proto3" json:"mapped_file,omitempty"` + Dirty uint64 `protobuf:"varint,5,opt,name=dirty,proto3" json:"dirty,omitempty"` + Writeback uint64 `protobuf:"varint,6,opt,name=writeback,proto3" json:"writeback,omitempty"` + PgPgIn uint64 `protobuf:"varint,7,opt,name=pg_pg_in,json=pgPgIn,proto3" json:"pg_pg_in,omitempty"` + PgPgOut uint64 `protobuf:"varint,8,opt,name=pg_pg_out,json=pgPgOut,proto3" json:"pg_pg_out,omitempty"` + PgFault uint64 `protobuf:"varint,9,opt,name=pg_fault,json=pgFault,proto3" json:"pg_fault,omitempty"` + PgMajFault uint64 `protobuf:"varint,10,opt,name=pg_maj_fault,json=pgMajFault,proto3" json:"pg_maj_fault,omitempty"` + InactiveAnon uint64 `protobuf:"varint,11,opt,name=inactive_anon,json=inactiveAnon,proto3" json:"inactive_anon,omitempty"` + ActiveAnon uint64 `protobuf:"varint,12,opt,name=active_anon,json=activeAnon,proto3" json:"active_anon,omitempty"` + InactiveFile uint64 `protobuf:"varint,13,opt,name=inactive_file,json=inactiveFile,proto3" json:"inactive_file,omitempty"` + ActiveFile uint64 `protobuf:"varint,14,opt,name=active_file,json=activeFile,proto3" json:"active_file,omitempty"` + Unevictable uint64 `protobuf:"varint,15,opt,name=unevictable,proto3" json:"unevictable,omitempty"` + HierarchicalMemoryLimit uint64 `protobuf:"varint,16,opt,name=hierarchical_memory_limit,json=hierarchicalMemoryLimit,proto3" json:"hierarchical_memory_limit,omitempty"` + HierarchicalSwapLimit uint64 `protobuf:"varint,17,opt,name=hierarchical_swap_limit,json=hierarchicalSwapLimit,proto3" json:"hierarchical_swap_limit,omitempty"` + TotalCache uint64 `protobuf:"varint,18,opt,name=total_cache,json=totalCache,proto3" json:"total_cache,omitempty"` + TotalRSS uint64 `protobuf:"varint,19,opt,name=total_rss,json=totalRss,proto3" json:"total_rss,omitempty"` + TotalRSSHuge uint64 `protobuf:"varint,20,opt,name=total_rss_huge,json=totalRssHuge,proto3" json:"total_rss_huge,omitempty"` + TotalMappedFile uint64 `protobuf:"varint,21,opt,name=total_mapped_file,json=totalMappedFile,proto3" json:"total_mapped_file,omitempty"` + TotalDirty uint64 `protobuf:"varint,22,opt,name=total_dirty,json=totalDirty,proto3" json:"total_dirty,omitempty"` + TotalWriteback uint64 `protobuf:"varint,23,opt,name=total_writeback,json=totalWriteback,proto3" json:"total_writeback,omitempty"` + TotalPgPgIn uint64 `protobuf:"varint,24,opt,name=total_pg_pg_in,json=totalPgPgIn,proto3" json:"total_pg_pg_in,omitempty"` + TotalPgPgOut uint64 `protobuf:"varint,25,opt,name=total_pg_pg_out,json=totalPgPgOut,proto3" json:"total_pg_pg_out,omitempty"` + TotalPgFault uint64 `protobuf:"varint,26,opt,name=total_pg_fault,json=totalPgFault,proto3" json:"total_pg_fault,omitempty"` + TotalPgMajFault uint64 `protobuf:"varint,27,opt,name=total_pg_maj_fault,json=totalPgMajFault,proto3" json:"total_pg_maj_fault,omitempty"` + TotalInactiveAnon uint64 `protobuf:"varint,28,opt,name=total_inactive_anon,json=totalInactiveAnon,proto3" json:"total_inactive_anon,omitempty"` + TotalActiveAnon uint64 `protobuf:"varint,29,opt,name=total_active_anon,json=totalActiveAnon,proto3" json:"total_active_anon,omitempty"` + TotalInactiveFile uint64 `protobuf:"varint,30,opt,name=total_inactive_file,json=totalInactiveFile,proto3" json:"total_inactive_file,omitempty"` + TotalActiveFile uint64 `protobuf:"varint,31,opt,name=total_active_file,json=totalActiveFile,proto3" json:"total_active_file,omitempty"` + TotalUnevictable uint64 `protobuf:"varint,32,opt,name=total_unevictable,json=totalUnevictable,proto3" json:"total_unevictable,omitempty"` + Usage *MemoryEntry `protobuf:"bytes,33,opt,name=usage" json:"usage,omitempty"` + Swap *MemoryEntry `protobuf:"bytes,34,opt,name=swap" json:"swap,omitempty"` + Kernel *MemoryEntry `protobuf:"bytes,35,opt,name=kernel" json:"kernel,omitempty"` + KernelTCP *MemoryEntry `protobuf:"bytes,36,opt,name=kernel_tcp,json=kernelTcp" json:"kernel_tcp,omitempty"` +} + +func (m *MemoryStat) Reset() { *m = MemoryStat{} } +func (*MemoryStat) ProtoMessage() {} +func (*MemoryStat) Descriptor() ([]byte, []int) { return fileDescriptorMetrics, []int{6} } + +type MemoryEntry struct { + Limit uint64 `protobuf:"varint,1,opt,name=limit,proto3" json:"limit,omitempty"` + Usage uint64 `protobuf:"varint,2,opt,name=usage,proto3" json:"usage,omitempty"` + Max uint64 `protobuf:"varint,3,opt,name=max,proto3" json:"max,omitempty"` + Failcnt uint64 `protobuf:"varint,4,opt,name=failcnt,proto3" json:"failcnt,omitempty"` +} + +func (m *MemoryEntry) Reset() { *m = MemoryEntry{} } +func (*MemoryEntry) ProtoMessage() {} +func (*MemoryEntry) Descriptor() ([]byte, []int) { return fileDescriptorMetrics, []int{7} } + +type BlkIOStat struct { + IoServiceBytesRecursive []*BlkIOEntry `protobuf:"bytes,1,rep,name=io_service_bytes_recursive,json=ioServiceBytesRecursive" json:"io_service_bytes_recursive,omitempty"` + IoServicedRecursive []*BlkIOEntry `protobuf:"bytes,2,rep,name=io_serviced_recursive,json=ioServicedRecursive" json:"io_serviced_recursive,omitempty"` + IoQueuedRecursive []*BlkIOEntry `protobuf:"bytes,3,rep,name=io_queued_recursive,json=ioQueuedRecursive" json:"io_queued_recursive,omitempty"` + IoServiceTimeRecursive []*BlkIOEntry `protobuf:"bytes,4,rep,name=io_service_time_recursive,json=ioServiceTimeRecursive" json:"io_service_time_recursive,omitempty"` + IoWaitTimeRecursive []*BlkIOEntry `protobuf:"bytes,5,rep,name=io_wait_time_recursive,json=ioWaitTimeRecursive" json:"io_wait_time_recursive,omitempty"` + IoMergedRecursive []*BlkIOEntry `protobuf:"bytes,6,rep,name=io_merged_recursive,json=ioMergedRecursive" json:"io_merged_recursive,omitempty"` + IoTimeRecursive []*BlkIOEntry `protobuf:"bytes,7,rep,name=io_time_recursive,json=ioTimeRecursive" json:"io_time_recursive,omitempty"` + SectorsRecursive []*BlkIOEntry `protobuf:"bytes,8,rep,name=sectors_recursive,json=sectorsRecursive" json:"sectors_recursive,omitempty"` +} + +func (m *BlkIOStat) Reset() { *m = BlkIOStat{} } +func (*BlkIOStat) ProtoMessage() {} +func (*BlkIOStat) Descriptor() ([]byte, []int) { return fileDescriptorMetrics, []int{8} } + +type BlkIOEntry struct { + Op string `protobuf:"bytes,1,opt,name=op,proto3" json:"op,omitempty"` + Device string `protobuf:"bytes,2,opt,name=device,proto3" json:"device,omitempty"` + Major uint64 `protobuf:"varint,3,opt,name=major,proto3" json:"major,omitempty"` + Minor uint64 `protobuf:"varint,4,opt,name=minor,proto3" json:"minor,omitempty"` + Value uint64 `protobuf:"varint,5,opt,name=value,proto3" json:"value,omitempty"` +} + +func (m *BlkIOEntry) Reset() { *m = BlkIOEntry{} } +func (*BlkIOEntry) ProtoMessage() {} +func (*BlkIOEntry) Descriptor() ([]byte, []int) { return fileDescriptorMetrics, []int{9} } + +type RdmaStat struct { + Current []*RdmaEntry `protobuf:"bytes,1,rep,name=current" json:"current,omitempty"` + Limit []*RdmaEntry `protobuf:"bytes,2,rep,name=limit" json:"limit,omitempty"` +} + +func (m *RdmaStat) Reset() { *m = RdmaStat{} } +func (*RdmaStat) ProtoMessage() {} +func (*RdmaStat) Descriptor() ([]byte, []int) { return fileDescriptorMetrics, []int{10} } + +type RdmaEntry struct { + Device string `protobuf:"bytes,1,opt,name=device,proto3" json:"device,omitempty"` + HcaHandles uint32 `protobuf:"varint,2,opt,name=hca_handles,json=hcaHandles,proto3" json:"hca_handles,omitempty"` + HcaObjects uint32 `protobuf:"varint,3,opt,name=hca_objects,json=hcaObjects,proto3" json:"hca_objects,omitempty"` +} + +func (m *RdmaEntry) Reset() { *m = RdmaEntry{} } +func (*RdmaEntry) ProtoMessage() {} +func (*RdmaEntry) Descriptor() ([]byte, []int) { return fileDescriptorMetrics, []int{11} } + +func init() { + proto.RegisterType((*Metrics)(nil), "io.containerd.cgroups.v1.Metrics") + proto.RegisterType((*HugetlbStat)(nil), "io.containerd.cgroups.v1.HugetlbStat") + proto.RegisterType((*PidsStat)(nil), "io.containerd.cgroups.v1.PidsStat") + proto.RegisterType((*CPUStat)(nil), "io.containerd.cgroups.v1.CPUStat") + proto.RegisterType((*CPUUsage)(nil), "io.containerd.cgroups.v1.CPUUsage") + proto.RegisterType((*Throttle)(nil), "io.containerd.cgroups.v1.Throttle") + proto.RegisterType((*MemoryStat)(nil), "io.containerd.cgroups.v1.MemoryStat") + proto.RegisterType((*MemoryEntry)(nil), "io.containerd.cgroups.v1.MemoryEntry") + proto.RegisterType((*BlkIOStat)(nil), "io.containerd.cgroups.v1.BlkIOStat") + proto.RegisterType((*BlkIOEntry)(nil), "io.containerd.cgroups.v1.BlkIOEntry") + proto.RegisterType((*RdmaStat)(nil), "io.containerd.cgroups.v1.RdmaStat") + proto.RegisterType((*RdmaEntry)(nil), "io.containerd.cgroups.v1.RdmaEntry") +} +func (m *Metrics) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *Metrics) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.Hugetlb) > 0 { + for _, msg := range m.Hugetlb { + dAtA[i] = 0xa + i++ + i = encodeVarintMetrics(dAtA, i, uint64(msg.Size())) + n, err := msg.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n + } + } + if m.Pids != nil { + dAtA[i] = 0x12 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Pids.Size())) + n1, err := m.Pids.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n1 + } + if m.CPU != nil { + dAtA[i] = 0x1a + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.CPU.Size())) + n2, err := m.CPU.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n2 + } + if m.Memory != nil { + dAtA[i] = 0x22 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Memory.Size())) + n3, err := m.Memory.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n3 + } + if m.Blkio != nil { + dAtA[i] = 0x2a + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Blkio.Size())) + n4, err := m.Blkio.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n4 + } + if m.Rdma != nil { + dAtA[i] = 0x32 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Rdma.Size())) + n5, err := m.Rdma.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n5 + } + return i, nil +} + +func (m *HugetlbStat) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *HugetlbStat) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.Usage != 0 { + dAtA[i] = 0x8 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Usage)) + } + if m.Max != 0 { + dAtA[i] = 0x10 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Max)) + } + if m.Failcnt != 0 { + dAtA[i] = 0x18 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Failcnt)) + } + if len(m.Pagesize) > 0 { + dAtA[i] = 0x22 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(len(m.Pagesize))) + i += copy(dAtA[i:], m.Pagesize) + } + return i, nil +} + +func (m *PidsStat) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *PidsStat) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.Current != 0 { + dAtA[i] = 0x8 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Current)) + } + if m.Limit != 0 { + dAtA[i] = 0x10 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Limit)) + } + return i, nil +} + +func (m *CPUStat) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *CPUStat) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.Usage != nil { + dAtA[i] = 0xa + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Usage.Size())) + n5, err := m.Usage.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n5 + } + if m.Throttling != nil { + dAtA[i] = 0x12 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Throttling.Size())) + n6, err := m.Throttling.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n6 + } + return i, nil +} + +func (m *CPUUsage) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *CPUUsage) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.Total != 0 { + dAtA[i] = 0x8 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Total)) + } + if m.Kernel != 0 { + dAtA[i] = 0x10 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Kernel)) + } + if m.User != 0 { + dAtA[i] = 0x18 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.User)) + } + if len(m.PerCPU) > 0 { + dAtA8 := make([]byte, len(m.PerCPU)*10) + var j7 int + for _, num := range m.PerCPU { + for num >= 1<<7 { + dAtA8[j7] = uint8(uint64(num)&0x7f | 0x80) + num >>= 7 + j7++ + } + dAtA8[j7] = uint8(num) + j7++ + } + dAtA[i] = 0x22 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(j7)) + i += copy(dAtA[i:], dAtA8[:j7]) + } + return i, nil +} + +func (m *Throttle) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *Throttle) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.Periods != 0 { + dAtA[i] = 0x8 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Periods)) + } + if m.ThrottledPeriods != 0 { + dAtA[i] = 0x10 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.ThrottledPeriods)) + } + if m.ThrottledTime != 0 { + dAtA[i] = 0x18 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.ThrottledTime)) + } + return i, nil +} + +func (m *MemoryStat) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *MemoryStat) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.Cache != 0 { + dAtA[i] = 0x8 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Cache)) + } + if m.RSS != 0 { + dAtA[i] = 0x10 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.RSS)) + } + if m.RSSHuge != 0 { + dAtA[i] = 0x18 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.RSSHuge)) + } + if m.MappedFile != 0 { + dAtA[i] = 0x20 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.MappedFile)) + } + if m.Dirty != 0 { + dAtA[i] = 0x28 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Dirty)) + } + if m.Writeback != 0 { + dAtA[i] = 0x30 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Writeback)) + } + if m.PgPgIn != 0 { + dAtA[i] = 0x38 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.PgPgIn)) + } + if m.PgPgOut != 0 { + dAtA[i] = 0x40 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.PgPgOut)) + } + if m.PgFault != 0 { + dAtA[i] = 0x48 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.PgFault)) + } + if m.PgMajFault != 0 { + dAtA[i] = 0x50 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.PgMajFault)) + } + if m.InactiveAnon != 0 { + dAtA[i] = 0x58 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.InactiveAnon)) + } + if m.ActiveAnon != 0 { + dAtA[i] = 0x60 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.ActiveAnon)) + } + if m.InactiveFile != 0 { + dAtA[i] = 0x68 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.InactiveFile)) + } + if m.ActiveFile != 0 { + dAtA[i] = 0x70 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.ActiveFile)) + } + if m.Unevictable != 0 { + dAtA[i] = 0x78 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Unevictable)) + } + if m.HierarchicalMemoryLimit != 0 { + dAtA[i] = 0x80 + i++ + dAtA[i] = 0x1 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.HierarchicalMemoryLimit)) + } + if m.HierarchicalSwapLimit != 0 { + dAtA[i] = 0x88 + i++ + dAtA[i] = 0x1 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.HierarchicalSwapLimit)) + } + if m.TotalCache != 0 { + dAtA[i] = 0x90 + i++ + dAtA[i] = 0x1 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.TotalCache)) + } + if m.TotalRSS != 0 { + dAtA[i] = 0x98 + i++ + dAtA[i] = 0x1 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.TotalRSS)) + } + if m.TotalRSSHuge != 0 { + dAtA[i] = 0xa0 + i++ + dAtA[i] = 0x1 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.TotalRSSHuge)) + } + if m.TotalMappedFile != 0 { + dAtA[i] = 0xa8 + i++ + dAtA[i] = 0x1 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.TotalMappedFile)) + } + if m.TotalDirty != 0 { + dAtA[i] = 0xb0 + i++ + dAtA[i] = 0x1 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.TotalDirty)) + } + if m.TotalWriteback != 0 { + dAtA[i] = 0xb8 + i++ + dAtA[i] = 0x1 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.TotalWriteback)) + } + if m.TotalPgPgIn != 0 { + dAtA[i] = 0xc0 + i++ + dAtA[i] = 0x1 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.TotalPgPgIn)) + } + if m.TotalPgPgOut != 0 { + dAtA[i] = 0xc8 + i++ + dAtA[i] = 0x1 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.TotalPgPgOut)) + } + if m.TotalPgFault != 0 { + dAtA[i] = 0xd0 + i++ + dAtA[i] = 0x1 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.TotalPgFault)) + } + if m.TotalPgMajFault != 0 { + dAtA[i] = 0xd8 + i++ + dAtA[i] = 0x1 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.TotalPgMajFault)) + } + if m.TotalInactiveAnon != 0 { + dAtA[i] = 0xe0 + i++ + dAtA[i] = 0x1 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.TotalInactiveAnon)) + } + if m.TotalActiveAnon != 0 { + dAtA[i] = 0xe8 + i++ + dAtA[i] = 0x1 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.TotalActiveAnon)) + } + if m.TotalInactiveFile != 0 { + dAtA[i] = 0xf0 + i++ + dAtA[i] = 0x1 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.TotalInactiveFile)) + } + if m.TotalActiveFile != 0 { + dAtA[i] = 0xf8 + i++ + dAtA[i] = 0x1 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.TotalActiveFile)) + } + if m.TotalUnevictable != 0 { + dAtA[i] = 0x80 + i++ + dAtA[i] = 0x2 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.TotalUnevictable)) + } + if m.Usage != nil { + dAtA[i] = 0x8a + i++ + dAtA[i] = 0x2 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Usage.Size())) + n9, err := m.Usage.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n9 + } + if m.Swap != nil { + dAtA[i] = 0x92 + i++ + dAtA[i] = 0x2 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Swap.Size())) + n10, err := m.Swap.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n10 + } + if m.Kernel != nil { + dAtA[i] = 0x9a + i++ + dAtA[i] = 0x2 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Kernel.Size())) + n11, err := m.Kernel.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n11 + } + if m.KernelTCP != nil { + dAtA[i] = 0xa2 + i++ + dAtA[i] = 0x2 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.KernelTCP.Size())) + n12, err := m.KernelTCP.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n12 + } + return i, nil +} + +func (m *MemoryEntry) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *MemoryEntry) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + + if m.Limit != 0 { + dAtA[i] = 0x8 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Limit)) + } + if m.Usage != 0 { + dAtA[i] = 0x10 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Usage)) + } + if m.Max != 0 { + dAtA[i] = 0x18 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Max)) + } + if m.Failcnt != 0 { + dAtA[i] = 0x20 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Failcnt)) + } + return i, nil +} + +func (m *BlkIOStat) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *BlkIOStat) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.IoServiceBytesRecursive) > 0 { + for _, msg := range m.IoServiceBytesRecursive { + dAtA[i] = 0xa + i++ + i = encodeVarintMetrics(dAtA, i, uint64(msg.Size())) + n, err := msg.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n + } + } + if len(m.IoServicedRecursive) > 0 { + for _, msg := range m.IoServicedRecursive { + dAtA[i] = 0x12 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(msg.Size())) + n, err := msg.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n + } + } + if len(m.IoQueuedRecursive) > 0 { + for _, msg := range m.IoQueuedRecursive { + dAtA[i] = 0x1a + i++ + i = encodeVarintMetrics(dAtA, i, uint64(msg.Size())) + n, err := msg.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n + } + } + if len(m.IoServiceTimeRecursive) > 0 { + for _, msg := range m.IoServiceTimeRecursive { + dAtA[i] = 0x22 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(msg.Size())) + n, err := msg.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n + } + } + if len(m.IoWaitTimeRecursive) > 0 { + for _, msg := range m.IoWaitTimeRecursive { + dAtA[i] = 0x2a + i++ + i = encodeVarintMetrics(dAtA, i, uint64(msg.Size())) + n, err := msg.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n + } + } + if len(m.IoMergedRecursive) > 0 { + for _, msg := range m.IoMergedRecursive { + dAtA[i] = 0x32 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(msg.Size())) + n, err := msg.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n + } + } + if len(m.IoTimeRecursive) > 0 { + for _, msg := range m.IoTimeRecursive { + dAtA[i] = 0x3a + i++ + i = encodeVarintMetrics(dAtA, i, uint64(msg.Size())) + n, err := msg.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n + } + } + if len(m.SectorsRecursive) > 0 { + for _, msg := range m.SectorsRecursive { + dAtA[i] = 0x42 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(msg.Size())) + n, err := msg.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n + } + } + return i, nil +} + +func (m *BlkIOEntry) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *BlkIOEntry) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.Op) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintMetrics(dAtA, i, uint64(len(m.Op))) + i += copy(dAtA[i:], m.Op) + } + if len(m.Device) > 0 { + dAtA[i] = 0x12 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(len(m.Device))) + i += copy(dAtA[i:], m.Device) + } + if m.Major != 0 { + dAtA[i] = 0x18 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Major)) + } + if m.Minor != 0 { + dAtA[i] = 0x20 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Minor)) + } + if m.Value != 0 { + dAtA[i] = 0x28 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.Value)) + } + return i, nil +} + +func (m *RdmaStat) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *RdmaStat) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.Current) > 0 { + for _, msg := range m.Current { + dAtA[i] = 0xa + i++ + i = encodeVarintMetrics(dAtA, i, uint64(msg.Size())) + n, err := msg.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n + } + } + if len(m.Limit) > 0 { + for _, msg := range m.Limit { + dAtA[i] = 0x12 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(msg.Size())) + n, err := msg.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n + } + } + return i, nil +} + +func (m *RdmaEntry) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *RdmaEntry) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.Device) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintMetrics(dAtA, i, uint64(len(m.Device))) + i += copy(dAtA[i:], m.Device) + } + if m.HcaHandles != 0 { + dAtA[i] = 0x10 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.HcaHandles)) + } + if m.HcaObjects != 0 { + dAtA[i] = 0x18 + i++ + i = encodeVarintMetrics(dAtA, i, uint64(m.HcaObjects)) + } + return i, nil +} + +func encodeFixed64Metrics(dAtA []byte, offset int, v uint64) int { + dAtA[offset] = uint8(v) + dAtA[offset+1] = uint8(v >> 8) + dAtA[offset+2] = uint8(v >> 16) + dAtA[offset+3] = uint8(v >> 24) + dAtA[offset+4] = uint8(v >> 32) + dAtA[offset+5] = uint8(v >> 40) + dAtA[offset+6] = uint8(v >> 48) + dAtA[offset+7] = uint8(v >> 56) + return offset + 8 +} +func encodeFixed32Metrics(dAtA []byte, offset int, v uint32) int { + dAtA[offset] = uint8(v) + dAtA[offset+1] = uint8(v >> 8) + dAtA[offset+2] = uint8(v >> 16) + dAtA[offset+3] = uint8(v >> 24) + return offset + 4 +} +func encodeVarintMetrics(dAtA []byte, offset int, v uint64) int { + for v >= 1<<7 { + dAtA[offset] = uint8(v&0x7f | 0x80) + v >>= 7 + offset++ + } + dAtA[offset] = uint8(v) + return offset + 1 +} +func (m *Metrics) Size() (n int) { + var l int + _ = l + if len(m.Hugetlb) > 0 { + for _, e := range m.Hugetlb { + l = e.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + } + if m.Pids != nil { + l = m.Pids.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + if m.CPU != nil { + l = m.CPU.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + if m.Memory != nil { + l = m.Memory.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + if m.Blkio != nil { + l = m.Blkio.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + if m.Rdma != nil { + l = m.Rdma.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + return n +} + +func (m *HugetlbStat) Size() (n int) { + var l int + _ = l + if m.Usage != 0 { + n += 1 + sovMetrics(uint64(m.Usage)) + } + if m.Max != 0 { + n += 1 + sovMetrics(uint64(m.Max)) + } + if m.Failcnt != 0 { + n += 1 + sovMetrics(uint64(m.Failcnt)) + } + l = len(m.Pagesize) + if l > 0 { + n += 1 + l + sovMetrics(uint64(l)) + } + return n +} + +func (m *PidsStat) Size() (n int) { + var l int + _ = l + if m.Current != 0 { + n += 1 + sovMetrics(uint64(m.Current)) + } + if m.Limit != 0 { + n += 1 + sovMetrics(uint64(m.Limit)) + } + return n +} + +func (m *CPUStat) Size() (n int) { + var l int + _ = l + if m.Usage != nil { + l = m.Usage.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + if m.Throttling != nil { + l = m.Throttling.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + return n +} + +func (m *CPUUsage) Size() (n int) { + var l int + _ = l + if m.Total != 0 { + n += 1 + sovMetrics(uint64(m.Total)) + } + if m.Kernel != 0 { + n += 1 + sovMetrics(uint64(m.Kernel)) + } + if m.User != 0 { + n += 1 + sovMetrics(uint64(m.User)) + } + if len(m.PerCPU) > 0 { + l = 0 + for _, e := range m.PerCPU { + l += sovMetrics(uint64(e)) + } + n += 1 + sovMetrics(uint64(l)) + l + } + return n +} + +func (m *Throttle) Size() (n int) { + var l int + _ = l + if m.Periods != 0 { + n += 1 + sovMetrics(uint64(m.Periods)) + } + if m.ThrottledPeriods != 0 { + n += 1 + sovMetrics(uint64(m.ThrottledPeriods)) + } + if m.ThrottledTime != 0 { + n += 1 + sovMetrics(uint64(m.ThrottledTime)) + } + return n +} + +func (m *MemoryStat) Size() (n int) { + var l int + _ = l + if m.Cache != 0 { + n += 1 + sovMetrics(uint64(m.Cache)) + } + if m.RSS != 0 { + n += 1 + sovMetrics(uint64(m.RSS)) + } + if m.RSSHuge != 0 { + n += 1 + sovMetrics(uint64(m.RSSHuge)) + } + if m.MappedFile != 0 { + n += 1 + sovMetrics(uint64(m.MappedFile)) + } + if m.Dirty != 0 { + n += 1 + sovMetrics(uint64(m.Dirty)) + } + if m.Writeback != 0 { + n += 1 + sovMetrics(uint64(m.Writeback)) + } + if m.PgPgIn != 0 { + n += 1 + sovMetrics(uint64(m.PgPgIn)) + } + if m.PgPgOut != 0 { + n += 1 + sovMetrics(uint64(m.PgPgOut)) + } + if m.PgFault != 0 { + n += 1 + sovMetrics(uint64(m.PgFault)) + } + if m.PgMajFault != 0 { + n += 1 + sovMetrics(uint64(m.PgMajFault)) + } + if m.InactiveAnon != 0 { + n += 1 + sovMetrics(uint64(m.InactiveAnon)) + } + if m.ActiveAnon != 0 { + n += 1 + sovMetrics(uint64(m.ActiveAnon)) + } + if m.InactiveFile != 0 { + n += 1 + sovMetrics(uint64(m.InactiveFile)) + } + if m.ActiveFile != 0 { + n += 1 + sovMetrics(uint64(m.ActiveFile)) + } + if m.Unevictable != 0 { + n += 1 + sovMetrics(uint64(m.Unevictable)) + } + if m.HierarchicalMemoryLimit != 0 { + n += 2 + sovMetrics(uint64(m.HierarchicalMemoryLimit)) + } + if m.HierarchicalSwapLimit != 0 { + n += 2 + sovMetrics(uint64(m.HierarchicalSwapLimit)) + } + if m.TotalCache != 0 { + n += 2 + sovMetrics(uint64(m.TotalCache)) + } + if m.TotalRSS != 0 { + n += 2 + sovMetrics(uint64(m.TotalRSS)) + } + if m.TotalRSSHuge != 0 { + n += 2 + sovMetrics(uint64(m.TotalRSSHuge)) + } + if m.TotalMappedFile != 0 { + n += 2 + sovMetrics(uint64(m.TotalMappedFile)) + } + if m.TotalDirty != 0 { + n += 2 + sovMetrics(uint64(m.TotalDirty)) + } + if m.TotalWriteback != 0 { + n += 2 + sovMetrics(uint64(m.TotalWriteback)) + } + if m.TotalPgPgIn != 0 { + n += 2 + sovMetrics(uint64(m.TotalPgPgIn)) + } + if m.TotalPgPgOut != 0 { + n += 2 + sovMetrics(uint64(m.TotalPgPgOut)) + } + if m.TotalPgFault != 0 { + n += 2 + sovMetrics(uint64(m.TotalPgFault)) + } + if m.TotalPgMajFault != 0 { + n += 2 + sovMetrics(uint64(m.TotalPgMajFault)) + } + if m.TotalInactiveAnon != 0 { + n += 2 + sovMetrics(uint64(m.TotalInactiveAnon)) + } + if m.TotalActiveAnon != 0 { + n += 2 + sovMetrics(uint64(m.TotalActiveAnon)) + } + if m.TotalInactiveFile != 0 { + n += 2 + sovMetrics(uint64(m.TotalInactiveFile)) + } + if m.TotalActiveFile != 0 { + n += 2 + sovMetrics(uint64(m.TotalActiveFile)) + } + if m.TotalUnevictable != 0 { + n += 2 + sovMetrics(uint64(m.TotalUnevictable)) + } + if m.Usage != nil { + l = m.Usage.Size() + n += 2 + l + sovMetrics(uint64(l)) + } + if m.Swap != nil { + l = m.Swap.Size() + n += 2 + l + sovMetrics(uint64(l)) + } + if m.Kernel != nil { + l = m.Kernel.Size() + n += 2 + l + sovMetrics(uint64(l)) + } + if m.KernelTCP != nil { + l = m.KernelTCP.Size() + n += 2 + l + sovMetrics(uint64(l)) + } + return n +} + +func (m *MemoryEntry) Size() (n int) { + var l int + _ = l + if m.Limit != 0 { + n += 1 + sovMetrics(uint64(m.Limit)) + } + if m.Usage != 0 { + n += 1 + sovMetrics(uint64(m.Usage)) + } + if m.Max != 0 { + n += 1 + sovMetrics(uint64(m.Max)) + } + if m.Failcnt != 0 { + n += 1 + sovMetrics(uint64(m.Failcnt)) + } + return n +} + +func (m *BlkIOStat) Size() (n int) { + var l int + _ = l + if len(m.IoServiceBytesRecursive) > 0 { + for _, e := range m.IoServiceBytesRecursive { + l = e.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + } + if len(m.IoServicedRecursive) > 0 { + for _, e := range m.IoServicedRecursive { + l = e.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + } + if len(m.IoQueuedRecursive) > 0 { + for _, e := range m.IoQueuedRecursive { + l = e.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + } + if len(m.IoServiceTimeRecursive) > 0 { + for _, e := range m.IoServiceTimeRecursive { + l = e.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + } + if len(m.IoWaitTimeRecursive) > 0 { + for _, e := range m.IoWaitTimeRecursive { + l = e.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + } + if len(m.IoMergedRecursive) > 0 { + for _, e := range m.IoMergedRecursive { + l = e.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + } + if len(m.IoTimeRecursive) > 0 { + for _, e := range m.IoTimeRecursive { + l = e.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + } + if len(m.SectorsRecursive) > 0 { + for _, e := range m.SectorsRecursive { + l = e.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + } + return n +} + +func (m *BlkIOEntry) Size() (n int) { + var l int + _ = l + l = len(m.Op) + if l > 0 { + n += 1 + l + sovMetrics(uint64(l)) + } + l = len(m.Device) + if l > 0 { + n += 1 + l + sovMetrics(uint64(l)) + } + if m.Major != 0 { + n += 1 + sovMetrics(uint64(m.Major)) + } + if m.Minor != 0 { + n += 1 + sovMetrics(uint64(m.Minor)) + } + if m.Value != 0 { + n += 1 + sovMetrics(uint64(m.Value)) + } + return n +} + +func (m *RdmaStat) Size() (n int) { + var l int + _ = l + if len(m.Current) > 0 { + for _, e := range m.Current { + l = e.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + } + if len(m.Limit) > 0 { + for _, e := range m.Limit { + l = e.Size() + n += 1 + l + sovMetrics(uint64(l)) + } + } + return n +} + +func (m *RdmaEntry) Size() (n int) { + var l int + _ = l + l = len(m.Device) + if l > 0 { + n += 1 + l + sovMetrics(uint64(l)) + } + if m.HcaHandles != 0 { + n += 1 + sovMetrics(uint64(m.HcaHandles)) + } + if m.HcaObjects != 0 { + n += 1 + sovMetrics(uint64(m.HcaObjects)) + } + return n +} + +func sovMetrics(x uint64) (n int) { + for { + n++ + x >>= 7 + if x == 0 { + break + } + } + return n +} +func sozMetrics(x uint64) (n int) { + return sovMetrics(uint64((x << 1) ^ uint64((int64(x) >> 63)))) +} +func (this *Metrics) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&Metrics{`, + `Hugetlb:` + strings.Replace(fmt.Sprintf("%v", this.Hugetlb), "HugetlbStat", "HugetlbStat", 1) + `,`, + `Pids:` + strings.Replace(fmt.Sprintf("%v", this.Pids), "PidsStat", "PidsStat", 1) + `,`, + `CPU:` + strings.Replace(fmt.Sprintf("%v", this.CPU), "CPUStat", "CPUStat", 1) + `,`, + `Memory:` + strings.Replace(fmt.Sprintf("%v", this.Memory), "MemoryStat", "MemoryStat", 1) + `,`, + `Blkio:` + strings.Replace(fmt.Sprintf("%v", this.Blkio), "BlkIOStat", "BlkIOStat", 1) + `,`, + `Rdma:` + strings.Replace(fmt.Sprintf("%v", this.Rdma), "RdmaStat", "RdmaStat", 1) + `,`, + `}`, + }, "") + return s +} +func (this *HugetlbStat) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&HugetlbStat{`, + `Usage:` + fmt.Sprintf("%v", this.Usage) + `,`, + `Max:` + fmt.Sprintf("%v", this.Max) + `,`, + `Failcnt:` + fmt.Sprintf("%v", this.Failcnt) + `,`, + `Pagesize:` + fmt.Sprintf("%v", this.Pagesize) + `,`, + `}`, + }, "") + return s +} +func (this *PidsStat) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&PidsStat{`, + `Current:` + fmt.Sprintf("%v", this.Current) + `,`, + `Limit:` + fmt.Sprintf("%v", this.Limit) + `,`, + `}`, + }, "") + return s +} +func (this *CPUStat) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&CPUStat{`, + `Usage:` + strings.Replace(fmt.Sprintf("%v", this.Usage), "CPUUsage", "CPUUsage", 1) + `,`, + `Throttling:` + strings.Replace(fmt.Sprintf("%v", this.Throttling), "Throttle", "Throttle", 1) + `,`, + `}`, + }, "") + return s +} +func (this *CPUUsage) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&CPUUsage{`, + `Total:` + fmt.Sprintf("%v", this.Total) + `,`, + `Kernel:` + fmt.Sprintf("%v", this.Kernel) + `,`, + `User:` + fmt.Sprintf("%v", this.User) + `,`, + `PerCPU:` + fmt.Sprintf("%v", this.PerCPU) + `,`, + `}`, + }, "") + return s +} +func (this *Throttle) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&Throttle{`, + `Periods:` + fmt.Sprintf("%v", this.Periods) + `,`, + `ThrottledPeriods:` + fmt.Sprintf("%v", this.ThrottledPeriods) + `,`, + `ThrottledTime:` + fmt.Sprintf("%v", this.ThrottledTime) + `,`, + `}`, + }, "") + return s +} +func (this *MemoryStat) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&MemoryStat{`, + `Cache:` + fmt.Sprintf("%v", this.Cache) + `,`, + `RSS:` + fmt.Sprintf("%v", this.RSS) + `,`, + `RSSHuge:` + fmt.Sprintf("%v", this.RSSHuge) + `,`, + `MappedFile:` + fmt.Sprintf("%v", this.MappedFile) + `,`, + `Dirty:` + fmt.Sprintf("%v", this.Dirty) + `,`, + `Writeback:` + fmt.Sprintf("%v", this.Writeback) + `,`, + `PgPgIn:` + fmt.Sprintf("%v", this.PgPgIn) + `,`, + `PgPgOut:` + fmt.Sprintf("%v", this.PgPgOut) + `,`, + `PgFault:` + fmt.Sprintf("%v", this.PgFault) + `,`, + `PgMajFault:` + fmt.Sprintf("%v", this.PgMajFault) + `,`, + `InactiveAnon:` + fmt.Sprintf("%v", this.InactiveAnon) + `,`, + `ActiveAnon:` + fmt.Sprintf("%v", this.ActiveAnon) + `,`, + `InactiveFile:` + fmt.Sprintf("%v", this.InactiveFile) + `,`, + `ActiveFile:` + fmt.Sprintf("%v", this.ActiveFile) + `,`, + `Unevictable:` + fmt.Sprintf("%v", this.Unevictable) + `,`, + `HierarchicalMemoryLimit:` + fmt.Sprintf("%v", this.HierarchicalMemoryLimit) + `,`, + `HierarchicalSwapLimit:` + fmt.Sprintf("%v", this.HierarchicalSwapLimit) + `,`, + `TotalCache:` + fmt.Sprintf("%v", this.TotalCache) + `,`, + `TotalRSS:` + fmt.Sprintf("%v", this.TotalRSS) + `,`, + `TotalRSSHuge:` + fmt.Sprintf("%v", this.TotalRSSHuge) + `,`, + `TotalMappedFile:` + fmt.Sprintf("%v", this.TotalMappedFile) + `,`, + `TotalDirty:` + fmt.Sprintf("%v", this.TotalDirty) + `,`, + `TotalWriteback:` + fmt.Sprintf("%v", this.TotalWriteback) + `,`, + `TotalPgPgIn:` + fmt.Sprintf("%v", this.TotalPgPgIn) + `,`, + `TotalPgPgOut:` + fmt.Sprintf("%v", this.TotalPgPgOut) + `,`, + `TotalPgFault:` + fmt.Sprintf("%v", this.TotalPgFault) + `,`, + `TotalPgMajFault:` + fmt.Sprintf("%v", this.TotalPgMajFault) + `,`, + `TotalInactiveAnon:` + fmt.Sprintf("%v", this.TotalInactiveAnon) + `,`, + `TotalActiveAnon:` + fmt.Sprintf("%v", this.TotalActiveAnon) + `,`, + `TotalInactiveFile:` + fmt.Sprintf("%v", this.TotalInactiveFile) + `,`, + `TotalActiveFile:` + fmt.Sprintf("%v", this.TotalActiveFile) + `,`, + `TotalUnevictable:` + fmt.Sprintf("%v", this.TotalUnevictable) + `,`, + `Usage:` + strings.Replace(fmt.Sprintf("%v", this.Usage), "MemoryEntry", "MemoryEntry", 1) + `,`, + `Swap:` + strings.Replace(fmt.Sprintf("%v", this.Swap), "MemoryEntry", "MemoryEntry", 1) + `,`, + `Kernel:` + strings.Replace(fmt.Sprintf("%v", this.Kernel), "MemoryEntry", "MemoryEntry", 1) + `,`, + `KernelTCP:` + strings.Replace(fmt.Sprintf("%v", this.KernelTCP), "MemoryEntry", "MemoryEntry", 1) + `,`, + `}`, + }, "") + return s +} +func (this *MemoryEntry) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&MemoryEntry{`, + `Limit:` + fmt.Sprintf("%v", this.Limit) + `,`, + `Usage:` + fmt.Sprintf("%v", this.Usage) + `,`, + `Max:` + fmt.Sprintf("%v", this.Max) + `,`, + `Failcnt:` + fmt.Sprintf("%v", this.Failcnt) + `,`, + `}`, + }, "") + return s +} +func (this *BlkIOStat) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&BlkIOStat{`, + `IoServiceBytesRecursive:` + strings.Replace(fmt.Sprintf("%v", this.IoServiceBytesRecursive), "BlkIOEntry", "BlkIOEntry", 1) + `,`, + `IoServicedRecursive:` + strings.Replace(fmt.Sprintf("%v", this.IoServicedRecursive), "BlkIOEntry", "BlkIOEntry", 1) + `,`, + `IoQueuedRecursive:` + strings.Replace(fmt.Sprintf("%v", this.IoQueuedRecursive), "BlkIOEntry", "BlkIOEntry", 1) + `,`, + `IoServiceTimeRecursive:` + strings.Replace(fmt.Sprintf("%v", this.IoServiceTimeRecursive), "BlkIOEntry", "BlkIOEntry", 1) + `,`, + `IoWaitTimeRecursive:` + strings.Replace(fmt.Sprintf("%v", this.IoWaitTimeRecursive), "BlkIOEntry", "BlkIOEntry", 1) + `,`, + `IoMergedRecursive:` + strings.Replace(fmt.Sprintf("%v", this.IoMergedRecursive), "BlkIOEntry", "BlkIOEntry", 1) + `,`, + `IoTimeRecursive:` + strings.Replace(fmt.Sprintf("%v", this.IoTimeRecursive), "BlkIOEntry", "BlkIOEntry", 1) + `,`, + `SectorsRecursive:` + strings.Replace(fmt.Sprintf("%v", this.SectorsRecursive), "BlkIOEntry", "BlkIOEntry", 1) + `,`, + `}`, + }, "") + return s +} +func (this *BlkIOEntry) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&BlkIOEntry{`, + `Op:` + fmt.Sprintf("%v", this.Op) + `,`, + `Device:` + fmt.Sprintf("%v", this.Device) + `,`, + `Major:` + fmt.Sprintf("%v", this.Major) + `,`, + `Minor:` + fmt.Sprintf("%v", this.Minor) + `,`, + `Value:` + fmt.Sprintf("%v", this.Value) + `,`, + `}`, + }, "") + return s +} +func (this *RdmaStat) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&RdmaStat{`, + `Current:` + strings.Replace(fmt.Sprintf("%v", this.Current), "RdmaEntry", "RdmaEntry", 1) + `,`, + `Limit:` + strings.Replace(fmt.Sprintf("%v", this.Limit), "RdmaEntry", "RdmaEntry", 1) + `,`, + `}`, + }, "") + return s +} +func (this *RdmaEntry) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&RdmaEntry{`, + `Device:` + fmt.Sprintf("%v", this.Device) + `,`, + `HcaHandles:` + fmt.Sprintf("%v", this.HcaHandles) + `,`, + `HcaObjects:` + fmt.Sprintf("%v", this.HcaObjects) + `,`, + `}`, + }, "") + return s +} +func valueToStringMetrics(v interface{}) string { + rv := reflect.ValueOf(v) + if rv.IsNil() { + return "nil" + } + pv := reflect.Indirect(rv).Interface() + return fmt.Sprintf("*%v", pv) +} +func (m *Metrics) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: Metrics: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: Metrics: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Hugetlb", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Hugetlb = append(m.Hugetlb, &HugetlbStat{}) + if err := m.Hugetlb[len(m.Hugetlb)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Pids", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Pids == nil { + m.Pids = &PidsStat{} + } + if err := m.Pids.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 3: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field CPU", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.CPU == nil { + m.CPU = &CPUStat{} + } + if err := m.CPU.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 4: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Memory", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Memory == nil { + m.Memory = &MemoryStat{} + } + if err := m.Memory.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 5: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Blkio", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Blkio == nil { + m.Blkio = &BlkIOStat{} + } + if err := m.Blkio.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 6: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Rdma", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Rdma == nil { + m.Rdma = &RdmaStat{} + } + if err := m.Rdma.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *HugetlbStat) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: HugetlbStat: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: HugetlbStat: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Usage", wireType) + } + m.Usage = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Usage |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Max", wireType) + } + m.Max = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Max |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Failcnt", wireType) + } + m.Failcnt = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Failcnt |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 4: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Pagesize", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Pagesize = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *PidsStat) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: PidsStat: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: PidsStat: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Current", wireType) + } + m.Current = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Current |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Limit", wireType) + } + m.Limit = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Limit |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *CPUStat) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: CPUStat: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: CPUStat: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Usage", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Usage == nil { + m.Usage = &CPUUsage{} + } + if err := m.Usage.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Throttling", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Throttling == nil { + m.Throttling = &Throttle{} + } + if err := m.Throttling.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *CPUUsage) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: CPUUsage: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: CPUUsage: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Total", wireType) + } + m.Total = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Total |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Kernel", wireType) + } + m.Kernel = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Kernel |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field User", wireType) + } + m.User = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.User |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 4: + if wireType == 0 { + var v uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.PerCPU = append(m.PerCPU, v) + } else if wireType == 2 { + var packedLen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + packedLen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if packedLen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + packedLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + for iNdEx < postIndex { + var v uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.PerCPU = append(m.PerCPU, v) + } + } else { + return fmt.Errorf("proto: wrong wireType = %d for field PerCPU", wireType) + } + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *Throttle) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: Throttle: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: Throttle: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Periods", wireType) + } + m.Periods = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Periods |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field ThrottledPeriods", wireType) + } + m.ThrottledPeriods = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.ThrottledPeriods |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field ThrottledTime", wireType) + } + m.ThrottledTime = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.ThrottledTime |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *MemoryStat) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: MemoryStat: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: MemoryStat: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Cache", wireType) + } + m.Cache = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Cache |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field RSS", wireType) + } + m.RSS = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.RSS |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field RSSHuge", wireType) + } + m.RSSHuge = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.RSSHuge |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 4: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field MappedFile", wireType) + } + m.MappedFile = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.MappedFile |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 5: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Dirty", wireType) + } + m.Dirty = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Dirty |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 6: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Writeback", wireType) + } + m.Writeback = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Writeback |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 7: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field PgPgIn", wireType) + } + m.PgPgIn = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.PgPgIn |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 8: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field PgPgOut", wireType) + } + m.PgPgOut = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.PgPgOut |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 9: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field PgFault", wireType) + } + m.PgFault = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.PgFault |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 10: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field PgMajFault", wireType) + } + m.PgMajFault = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.PgMajFault |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 11: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field InactiveAnon", wireType) + } + m.InactiveAnon = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.InactiveAnon |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 12: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field ActiveAnon", wireType) + } + m.ActiveAnon = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.ActiveAnon |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 13: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field InactiveFile", wireType) + } + m.InactiveFile = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.InactiveFile |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 14: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field ActiveFile", wireType) + } + m.ActiveFile = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.ActiveFile |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 15: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Unevictable", wireType) + } + m.Unevictable = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Unevictable |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 16: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field HierarchicalMemoryLimit", wireType) + } + m.HierarchicalMemoryLimit = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.HierarchicalMemoryLimit |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 17: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field HierarchicalSwapLimit", wireType) + } + m.HierarchicalSwapLimit = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.HierarchicalSwapLimit |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 18: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field TotalCache", wireType) + } + m.TotalCache = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.TotalCache |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 19: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field TotalRSS", wireType) + } + m.TotalRSS = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.TotalRSS |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 20: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field TotalRSSHuge", wireType) + } + m.TotalRSSHuge = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.TotalRSSHuge |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 21: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field TotalMappedFile", wireType) + } + m.TotalMappedFile = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.TotalMappedFile |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 22: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field TotalDirty", wireType) + } + m.TotalDirty = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.TotalDirty |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 23: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field TotalWriteback", wireType) + } + m.TotalWriteback = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.TotalWriteback |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 24: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field TotalPgPgIn", wireType) + } + m.TotalPgPgIn = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.TotalPgPgIn |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 25: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field TotalPgPgOut", wireType) + } + m.TotalPgPgOut = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.TotalPgPgOut |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 26: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field TotalPgFault", wireType) + } + m.TotalPgFault = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.TotalPgFault |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 27: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field TotalPgMajFault", wireType) + } + m.TotalPgMajFault = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.TotalPgMajFault |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 28: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field TotalInactiveAnon", wireType) + } + m.TotalInactiveAnon = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.TotalInactiveAnon |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 29: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field TotalActiveAnon", wireType) + } + m.TotalActiveAnon = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.TotalActiveAnon |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 30: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field TotalInactiveFile", wireType) + } + m.TotalInactiveFile = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.TotalInactiveFile |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 31: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field TotalActiveFile", wireType) + } + m.TotalActiveFile = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.TotalActiveFile |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 32: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field TotalUnevictable", wireType) + } + m.TotalUnevictable = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.TotalUnevictable |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 33: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Usage", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Usage == nil { + m.Usage = &MemoryEntry{} + } + if err := m.Usage.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 34: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Swap", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Swap == nil { + m.Swap = &MemoryEntry{} + } + if err := m.Swap.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 35: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Kernel", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Kernel == nil { + m.Kernel = &MemoryEntry{} + } + if err := m.Kernel.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 36: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field KernelTCP", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.KernelTCP == nil { + m.KernelTCP = &MemoryEntry{} + } + if err := m.KernelTCP.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *MemoryEntry) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: MemoryEntry: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: MemoryEntry: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Limit", wireType) + } + m.Limit = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Limit |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Usage", wireType) + } + m.Usage = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Usage |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Max", wireType) + } + m.Max = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Max |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 4: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Failcnt", wireType) + } + m.Failcnt = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Failcnt |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *BlkIOStat) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: BlkIOStat: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: BlkIOStat: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field IoServiceBytesRecursive", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.IoServiceBytesRecursive = append(m.IoServiceBytesRecursive, &BlkIOEntry{}) + if err := m.IoServiceBytesRecursive[len(m.IoServiceBytesRecursive)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field IoServicedRecursive", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.IoServicedRecursive = append(m.IoServicedRecursive, &BlkIOEntry{}) + if err := m.IoServicedRecursive[len(m.IoServicedRecursive)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 3: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field IoQueuedRecursive", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.IoQueuedRecursive = append(m.IoQueuedRecursive, &BlkIOEntry{}) + if err := m.IoQueuedRecursive[len(m.IoQueuedRecursive)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 4: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field IoServiceTimeRecursive", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.IoServiceTimeRecursive = append(m.IoServiceTimeRecursive, &BlkIOEntry{}) + if err := m.IoServiceTimeRecursive[len(m.IoServiceTimeRecursive)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 5: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field IoWaitTimeRecursive", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.IoWaitTimeRecursive = append(m.IoWaitTimeRecursive, &BlkIOEntry{}) + if err := m.IoWaitTimeRecursive[len(m.IoWaitTimeRecursive)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 6: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field IoMergedRecursive", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.IoMergedRecursive = append(m.IoMergedRecursive, &BlkIOEntry{}) + if err := m.IoMergedRecursive[len(m.IoMergedRecursive)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 7: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field IoTimeRecursive", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.IoTimeRecursive = append(m.IoTimeRecursive, &BlkIOEntry{}) + if err := m.IoTimeRecursive[len(m.IoTimeRecursive)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 8: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field SectorsRecursive", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.SectorsRecursive = append(m.SectorsRecursive, &BlkIOEntry{}) + if err := m.SectorsRecursive[len(m.SectorsRecursive)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *BlkIOEntry) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: BlkIOEntry: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: BlkIOEntry: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Op", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Op = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Device", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Device = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Major", wireType) + } + m.Major = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Major |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 4: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Minor", wireType) + } + m.Minor = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Minor |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 5: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Value", wireType) + } + m.Value = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Value |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *RdmaStat) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: RdmaStat: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: RdmaStat: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Current", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Current = append(m.Current, &RdmaEntry{}) + if err := m.Current[len(m.Current)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Limit", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Limit = append(m.Limit, &RdmaEntry{}) + if err := m.Limit[len(m.Limit)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *RdmaEntry) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: RdmaEntry: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: RdmaEntry: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Device", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthMetrics + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Device = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field HcaHandles", wireType) + } + m.HcaHandles = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.HcaHandles |= (uint32(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field HcaObjects", wireType) + } + m.HcaObjects = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetrics + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.HcaObjects |= (uint32(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipMetrics(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthMetrics + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} + +func skipMetrics(dAtA []byte) (n int, err error) { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowMetrics + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + wireType := int(wire & 0x7) + switch wireType { + case 0: + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowMetrics + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + iNdEx++ + if dAtA[iNdEx-1] < 0x80 { + break + } + } + return iNdEx, nil + case 1: + iNdEx += 8 + return iNdEx, nil + case 2: + var length int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowMetrics + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + length |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + iNdEx += length + if length < 0 { + return 0, ErrInvalidLengthMetrics + } + return iNdEx, nil + case 3: + for { + var innerWire uint64 + var start int = iNdEx + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowMetrics + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + innerWire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + innerWireType := int(innerWire & 0x7) + if innerWireType == 4 { + break + } + next, err := skipMetrics(dAtA[start:]) + if err != nil { + return 0, err + } + iNdEx = start + next + } + return iNdEx, nil + case 4: + return iNdEx, nil + case 5: + iNdEx += 4 + return iNdEx, nil + default: + return 0, fmt.Errorf("proto: illegal wireType %d", wireType) + } + } + panic("unreachable") +} + +var ( + ErrInvalidLengthMetrics = fmt.Errorf("proto: negative length found during unmarshaling") + ErrIntOverflowMetrics = fmt.Errorf("proto: integer overflow") +) + +func init() { proto.RegisterFile("github.com/containerd/cgroups/metrics.proto", fileDescriptorMetrics) } + +var fileDescriptorMetrics = []byte{ + // 1325 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x94, 0x57, 0x4d, 0x6f, 0x1b, 0xb7, + 0x16, 0x8d, 0xac, 0xb1, 0x3e, 0xae, 0x6c, 0xc7, 0xa6, 0x13, 0x67, 0xec, 0x97, 0x27, 0x29, 0xb2, + 0xfd, 0x9e, 0x5b, 0x03, 0x32, 0x9a, 0x02, 0x41, 0x93, 0xa6, 0x28, 0x22, 0xb7, 0x41, 0x83, 0xd6, + 0x88, 0x32, 0xb2, 0x91, 0x76, 0x35, 0x18, 0x8d, 0x98, 0x31, 0xe3, 0xd1, 0x70, 0xc2, 0xe1, 0xc8, + 0x71, 0x57, 0xdd, 0xf5, 0x37, 0xf5, 0x1f, 0x64, 0xd9, 0x4d, 0x81, 0x76, 0x63, 0x34, 0xfa, 0x25, + 0x05, 0x2f, 0xe7, 0x4b, 0x49, 0xdc, 0x40, 0xbb, 0xb9, 0xbc, 0xe7, 0x1c, 0x5e, 0x5e, 0x1e, 0x8a, + 0x14, 0xec, 0x7b, 0x4c, 0x9e, 0xc6, 0xc3, 0xae, 0xcb, 0xc7, 0x07, 0x2e, 0x0f, 0xa4, 0xc3, 0x02, + 0x2a, 0x46, 0x07, 0xae, 0x27, 0x78, 0x1c, 0x46, 0x07, 0x63, 0x2a, 0x05, 0x73, 0xa3, 0x6e, 0x28, + 0xb8, 0xe4, 0xc4, 0x64, 0xbc, 0x9b, 0x83, 0xba, 0x09, 0xa8, 0x3b, 0xf9, 0x6c, 0xeb, 0x86, 0xc7, + 0x3d, 0x8e, 0xa0, 0x03, 0xf5, 0xa5, 0xf1, 0x9d, 0xdf, 0x16, 0xa0, 0x7a, 0xa4, 0x15, 0xc8, 0xd7, + 0x50, 0x3d, 0x8d, 0x3d, 0x2a, 0xfd, 0xa1, 0x59, 0x6a, 0x97, 0xf7, 0x1a, 0x77, 0x77, 0xbb, 0x57, + 0xa9, 0x75, 0xbf, 0xd3, 0xc0, 0x81, 0x74, 0xa4, 0x95, 0xb2, 0xc8, 0x3d, 0x30, 0x42, 0x36, 0x8a, + 0xcc, 0x85, 0x76, 0x69, 0xaf, 0x71, 0xb7, 0x73, 0x35, 0xbb, 0xcf, 0x46, 0x11, 0x52, 0x11, 0x4f, + 0x1e, 0x42, 0xd9, 0x0d, 0x63, 0xb3, 0x8c, 0xb4, 0x3b, 0x57, 0xd3, 0x0e, 0xfb, 0x27, 0x8a, 0xd5, + 0xab, 0x4e, 0x2f, 0x5b, 0xe5, 0xc3, 0xfe, 0x89, 0xa5, 0x68, 0xe4, 0x21, 0x54, 0xc6, 0x74, 0xcc, + 0xc5, 0x85, 0x69, 0xa0, 0xc0, 0xce, 0xd5, 0x02, 0x47, 0x88, 0xc3, 0x99, 0x13, 0x0e, 0xb9, 0x0f, + 0x8b, 0x43, 0xff, 0x8c, 0x71, 0x73, 0x11, 0xc9, 0xdb, 0x57, 0x93, 0x7b, 0xfe, 0xd9, 0x93, 0xa7, + 0xc8, 0xd5, 0x8c, 0xce, 0x19, 0x34, 0x0a, 0x6d, 0x20, 0x37, 0x60, 0x31, 0x8e, 0x1c, 0x8f, 0x9a, + 0xa5, 0x76, 0x69, 0xcf, 0xb0, 0x74, 0x40, 0x56, 0xa1, 0x3c, 0x76, 0x5e, 0x63, 0x4b, 0x0c, 0x4b, + 0x7d, 0x12, 0x13, 0xaa, 0x2f, 0x1c, 0xe6, 0xbb, 0x81, 0xc4, 0x15, 0x1b, 0x56, 0x1a, 0x92, 0x2d, + 0xa8, 0x85, 0x8e, 0x47, 0x23, 0xf6, 0x33, 0xc5, 0xb5, 0xd4, 0xad, 0x2c, 0xee, 0x3c, 0x80, 0x5a, + 0xda, 0x35, 0xa5, 0xe0, 0xc6, 0x42, 0xd0, 0x40, 0x26, 0x73, 0xa5, 0xa1, 0xaa, 0xc1, 0x67, 0x63, + 0x26, 0x93, 0xf9, 0x74, 0xd0, 0xf9, 0xb5, 0x04, 0xd5, 0xa4, 0x77, 0xe4, 0x8b, 0x62, 0x95, 0xff, + 0xba, 0x49, 0x87, 0xfd, 0x93, 0x13, 0x85, 0x4c, 0x57, 0xd2, 0x03, 0x90, 0xa7, 0x82, 0x4b, 0xe9, + 0xb3, 0xc0, 0xfb, 0xf8, 0x1e, 0x1f, 0x6b, 0x2c, 0xb5, 0x0a, 0xac, 0xce, 0x2b, 0xa8, 0xa5, 0xb2, + 0xaa, 0x56, 0xc9, 0xa5, 0xe3, 0xa7, 0xfd, 0xc2, 0x80, 0x6c, 0x40, 0xe5, 0x8c, 0x8a, 0x80, 0xfa, + 0xc9, 0x12, 0x92, 0x88, 0x10, 0x30, 0xe2, 0x88, 0x8a, 0xa4, 0x65, 0xf8, 0x4d, 0xb6, 0xa1, 0x1a, + 0x52, 0x61, 0x2b, 0xef, 0x18, 0xed, 0xf2, 0x9e, 0xd1, 0x83, 0xe9, 0x65, 0xab, 0xd2, 0xa7, 0x42, + 0x79, 0xa3, 0x12, 0x52, 0x71, 0x18, 0xc6, 0x9d, 0xd7, 0x50, 0x4b, 0x4b, 0x51, 0x8d, 0x0b, 0xa9, + 0x60, 0x7c, 0x14, 0xa5, 0x8d, 0x4b, 0x42, 0xb2, 0x0f, 0x6b, 0x49, 0x99, 0x74, 0x64, 0xa7, 0x18, + 0x5d, 0xc1, 0x6a, 0x96, 0xe8, 0x27, 0xe0, 0x5d, 0x58, 0xc9, 0xc1, 0x92, 0x8d, 0x69, 0x52, 0xd5, + 0x72, 0x36, 0x7a, 0xcc, 0xc6, 0xb4, 0xf3, 0x57, 0x03, 0x20, 0x77, 0x9c, 0x5a, 0xaf, 0xeb, 0xb8, + 0xa7, 0x99, 0x3f, 0x30, 0x20, 0x9b, 0x50, 0x16, 0x51, 0x32, 0x95, 0x36, 0xb6, 0x35, 0x18, 0x58, + 0x6a, 0x8c, 0xfc, 0x0f, 0x6a, 0x22, 0x8a, 0x6c, 0x75, 0xba, 0xf4, 0x04, 0xbd, 0xc6, 0xf4, 0xb2, + 0x55, 0xb5, 0x06, 0x03, 0x65, 0x3b, 0xab, 0x2a, 0xa2, 0x48, 0x7d, 0x90, 0x16, 0x34, 0xc6, 0x4e, + 0x18, 0xd2, 0x91, 0xfd, 0x82, 0xf9, 0xda, 0x39, 0x86, 0x05, 0x7a, 0xe8, 0x31, 0xf3, 0xb1, 0xd3, + 0x23, 0x26, 0xe4, 0x05, 0x7a, 0xdc, 0xb0, 0x74, 0x40, 0x6e, 0x43, 0xfd, 0x5c, 0x30, 0x49, 0x87, + 0x8e, 0x7b, 0x66, 0x56, 0x30, 0x93, 0x0f, 0x10, 0x13, 0x6a, 0xa1, 0x67, 0x87, 0x9e, 0xcd, 0x02, + 0xb3, 0xaa, 0x77, 0x22, 0xf4, 0xfa, 0xde, 0x93, 0x80, 0x6c, 0x41, 0x5d, 0x67, 0x78, 0x2c, 0xcd, + 0x5a, 0xd2, 0x46, 0xaf, 0xef, 0x3d, 0x8d, 0x25, 0xd9, 0x44, 0xd6, 0x0b, 0x27, 0xf6, 0xa5, 0x59, + 0x4f, 0x53, 0x8f, 0x55, 0x48, 0xda, 0xb0, 0x14, 0x7a, 0xf6, 0xd8, 0x79, 0x99, 0xa4, 0x41, 0x97, + 0x19, 0x7a, 0x47, 0xce, 0x4b, 0x8d, 0xd8, 0x86, 0x65, 0x16, 0x38, 0xae, 0x64, 0x13, 0x6a, 0x3b, + 0x01, 0x0f, 0xcc, 0x06, 0x42, 0x96, 0xd2, 0xc1, 0x47, 0x01, 0x0f, 0xd4, 0x62, 0x8b, 0x90, 0x25, + 0xad, 0x52, 0x00, 0x14, 0x55, 0xb0, 0x1f, 0xcb, 0xb3, 0x2a, 0xd8, 0x91, 0x5c, 0x05, 0x21, 0x2b, + 0x45, 0x15, 0x04, 0xb4, 0xa1, 0x11, 0x07, 0x74, 0xc2, 0x5c, 0xe9, 0x0c, 0x7d, 0x6a, 0x5e, 0x47, + 0x40, 0x71, 0x88, 0x3c, 0x80, 0xcd, 0x53, 0x46, 0x85, 0x23, 0xdc, 0x53, 0xe6, 0x3a, 0xbe, 0xad, + 0x7f, 0x4f, 0x6c, 0x7d, 0xfc, 0x56, 0x11, 0x7f, 0xab, 0x08, 0xd0, 0x4e, 0xf8, 0x41, 0xa5, 0xc9, + 0x3d, 0x98, 0x49, 0xd9, 0xd1, 0xb9, 0x13, 0x26, 0xcc, 0x35, 0x64, 0xde, 0x2c, 0xa6, 0x07, 0xe7, + 0x4e, 0xa8, 0x79, 0x2d, 0x68, 0xe0, 0x29, 0xb1, 0xb5, 0x91, 0x88, 0x2e, 0x1b, 0x87, 0x0e, 0xd1, + 0x4d, 0x9f, 0x40, 0x5d, 0x03, 0x94, 0xa7, 0xd6, 0xd1, 0x33, 0x4b, 0xd3, 0xcb, 0x56, 0xed, 0x58, + 0x0d, 0x2a, 0x63, 0xd5, 0x30, 0x6d, 0x45, 0x11, 0xb9, 0x07, 0x2b, 0x19, 0x54, 0x7b, 0xec, 0x06, + 0xe2, 0x57, 0xa7, 0x97, 0xad, 0xa5, 0x14, 0x8f, 0x46, 0x5b, 0x4a, 0x39, 0xe8, 0xb6, 0x4f, 0x61, + 0x4d, 0xf3, 0x8a, 0x9e, 0xbb, 0x89, 0x95, 0x5c, 0xc7, 0xc4, 0x51, 0x6e, 0xbc, 0xac, 0x5e, 0x6d, + 0xbf, 0x8d, 0x42, 0xbd, 0xdf, 0xa0, 0x07, 0xff, 0x0f, 0x9a, 0x63, 0xe7, 0x4e, 0xbc, 0x85, 0x20, + 0x5d, 0xdb, 0xf3, 0xcc, 0x8e, 0xdb, 0x69, 0xb5, 0x99, 0x29, 0x4d, 0xbd, 0x25, 0x38, 0xda, 0xd7, + 0xce, 0xdc, 0x4d, 0xd5, 0x72, 0x7f, 0x6e, 0xea, 0xcd, 0xcf, 0x50, 0xca, 0xa4, 0x3b, 0x05, 0x2d, + 0xed, 0xc5, 0xad, 0x19, 0x94, 0x76, 0xe3, 0x3e, 0x90, 0x0c, 0x95, 0xbb, 0xf6, 0x3f, 0x85, 0x85, + 0xf6, 0x73, 0xeb, 0x76, 0x61, 0x5d, 0x83, 0x67, 0x0d, 0x7c, 0x1b, 0xd1, 0xba, 0x5f, 0x4f, 0x8a, + 0x2e, 0xce, 0x9a, 0x58, 0x44, 0xff, 0xb7, 0xa0, 0xfd, 0x28, 0xc7, 0xbe, 0xaf, 0x8d, 0x2d, 0x6f, + 0x7e, 0x40, 0x1b, 0x9b, 0xfe, 0xae, 0x36, 0xa2, 0x5b, 0xef, 0x69, 0x23, 0x76, 0x3f, 0xc5, 0x16, + 0xcd, 0xde, 0x4e, 0x7e, 0xf6, 0x54, 0xe2, 0xa4, 0xe0, 0xf8, 0x2f, 0xd3, 0xab, 0xe3, 0x0e, 0xfe, + 0xf6, 0xef, 0x7e, 0xec, 0x9e, 0xfd, 0x36, 0x90, 0xe2, 0x22, 0xbd, 0x3d, 0xee, 0x83, 0xa1, 0x5c, + 0x6e, 0x76, 0xe6, 0xe1, 0x22, 0x85, 0x7c, 0x95, 0x5d, 0x09, 0xdb, 0xf3, 0x90, 0xd3, 0x9b, 0x63, + 0x00, 0xa0, 0xbf, 0x6c, 0xe9, 0x86, 0xe6, 0xce, 0x1c, 0x12, 0xbd, 0xe5, 0xe9, 0x65, 0xab, 0xfe, + 0x3d, 0x92, 0x8f, 0x0f, 0xfb, 0x56, 0x5d, 0xeb, 0x1c, 0xbb, 0x61, 0x87, 0x42, 0xa3, 0x00, 0xcc, + 0xef, 0xdd, 0x52, 0xe1, 0xde, 0xcd, 0x5f, 0x04, 0x0b, 0x1f, 0x78, 0x11, 0x94, 0x3f, 0xf8, 0x22, + 0x30, 0x66, 0x5e, 0x04, 0x9d, 0x3f, 0x16, 0xa1, 0x9e, 0xbd, 0x3b, 0x88, 0x03, 0x5b, 0x8c, 0xdb, + 0x11, 0x15, 0x13, 0xe6, 0x52, 0x7b, 0x78, 0x21, 0x69, 0x64, 0x0b, 0xea, 0xc6, 0x22, 0x62, 0x13, + 0x9a, 0xbc, 0xd9, 0x76, 0x3e, 0xf2, 0x80, 0xd1, 0xbd, 0xb9, 0xc5, 0xf8, 0x40, 0xcb, 0xf4, 0x94, + 0x8a, 0x95, 0x8a, 0x90, 0x1f, 0xe1, 0x66, 0x3e, 0xc5, 0xa8, 0xa0, 0xbe, 0x30, 0x87, 0xfa, 0x7a, + 0xa6, 0x3e, 0xca, 0x95, 0x8f, 0x61, 0x9d, 0x71, 0xfb, 0x55, 0x4c, 0xe3, 0x19, 0xdd, 0xf2, 0x1c, + 0xba, 0x6b, 0x8c, 0x3f, 0x43, 0x7e, 0xae, 0x6a, 0xc3, 0x66, 0xa1, 0x25, 0xea, 0x2e, 0x2e, 0x68, + 0x1b, 0x73, 0x68, 0x6f, 0x64, 0x35, 0xab, 0xbb, 0x3b, 0x9f, 0xe0, 0x27, 0xd8, 0x60, 0xdc, 0x3e, + 0x77, 0x98, 0x7c, 0x57, 0x7d, 0x71, 0xbe, 0x8e, 0x3c, 0x77, 0x98, 0x9c, 0x95, 0xd6, 0x1d, 0x19, + 0x53, 0xe1, 0xcd, 0x74, 0xa4, 0x32, 0x5f, 0x47, 0x8e, 0x90, 0x9f, 0xab, 0xf6, 0x61, 0x8d, 0xf1, + 0x77, 0x6b, 0xad, 0xce, 0xa1, 0x79, 0x9d, 0xf1, 0xd9, 0x3a, 0x9f, 0xc1, 0x5a, 0x44, 0x5d, 0xc9, + 0x45, 0xd1, 0x6d, 0xb5, 0x39, 0x14, 0x57, 0x13, 0x7a, 0x26, 0xd9, 0x99, 0x00, 0xe4, 0x79, 0xb2, + 0x02, 0x0b, 0x3c, 0xc4, 0xa3, 0x53, 0xb7, 0x16, 0x78, 0xa8, 0xde, 0x80, 0x23, 0xf5, 0xb3, 0xa3, + 0x0f, 0x4e, 0xdd, 0x4a, 0x22, 0x75, 0x9e, 0xc6, 0xce, 0x4b, 0x9e, 0x3e, 0x02, 0x75, 0x80, 0xa3, + 0x2c, 0xe0, 0x22, 0x39, 0x3b, 0x3a, 0x50, 0xa3, 0x13, 0xc7, 0x8f, 0x69, 0xfa, 0xe6, 0xc1, 0xa0, + 0x67, 0xbe, 0x79, 0xdb, 0xbc, 0xf6, 0xe7, 0xdb, 0xe6, 0xb5, 0x5f, 0xa6, 0xcd, 0xd2, 0x9b, 0x69, + 0xb3, 0xf4, 0xfb, 0xb4, 0x59, 0xfa, 0x7b, 0xda, 0x2c, 0x0d, 0x2b, 0xf8, 0x7f, 0xe8, 0xf3, 0x7f, + 0x02, 0x00, 0x00, 0xff, 0xff, 0xb2, 0x21, 0x0b, 0xcd, 0x6e, 0x0d, 0x00, 0x00, +} diff --git a/vendor/github.com/containerd/cgroups/metrics.proto b/vendor/github.com/containerd/cgroups/metrics.proto new file mode 100644 index 000000000..642623fce --- /dev/null +++ b/vendor/github.com/containerd/cgroups/metrics.proto @@ -0,0 +1,123 @@ +syntax = "proto3"; + +package io.containerd.cgroups.v1; + +import "gogoproto/gogo.proto"; + +message Metrics { + repeated HugetlbStat hugetlb = 1; + PidsStat pids = 2; + CPUStat cpu = 3 [(gogoproto.customname) = "CPU"]; + MemoryStat memory = 4; + BlkIOStat blkio = 5; + RdmaStat rdma = 6; +} + +message HugetlbStat { + uint64 usage = 1; + uint64 max = 2; + uint64 failcnt = 3; + string pagesize = 4; +} + +message PidsStat { + uint64 current = 1; + uint64 limit = 2; +} + +message CPUStat { + CPUUsage usage = 1; + Throttle throttling = 2; +} + +message CPUUsage { + // values in nanoseconds + uint64 total = 1; + uint64 kernel = 2; + uint64 user = 3; + repeated uint64 per_cpu = 4 [(gogoproto.customname) = "PerCPU"]; + +} + +message Throttle { + uint64 periods = 1; + uint64 throttled_periods = 2; + uint64 throttled_time = 3; +} + +message MemoryStat { + uint64 cache = 1; + uint64 rss = 2 [(gogoproto.customname) = "RSS"]; + uint64 rss_huge = 3 [(gogoproto.customname) = "RSSHuge"]; + uint64 mapped_file = 4; + uint64 dirty = 5; + uint64 writeback = 6; + uint64 pg_pg_in = 7; + uint64 pg_pg_out = 8; + uint64 pg_fault = 9; + uint64 pg_maj_fault = 10; + uint64 inactive_anon = 11; + uint64 active_anon = 12; + uint64 inactive_file = 13; + uint64 active_file = 14; + uint64 unevictable = 15; + uint64 hierarchical_memory_limit = 16; + uint64 hierarchical_swap_limit = 17; + uint64 total_cache = 18; + uint64 total_rss = 19 [(gogoproto.customname) = "TotalRSS"]; + uint64 total_rss_huge = 20 [(gogoproto.customname) = "TotalRSSHuge"]; + uint64 total_mapped_file = 21; + uint64 total_dirty = 22; + uint64 total_writeback = 23; + uint64 total_pg_pg_in = 24; + uint64 total_pg_pg_out = 25; + uint64 total_pg_fault = 26; + uint64 total_pg_maj_fault = 27; + uint64 total_inactive_anon = 28; + uint64 total_active_anon = 29; + uint64 total_inactive_file = 30; + uint64 total_active_file = 31; + uint64 total_unevictable = 32; + MemoryEntry usage = 33; + MemoryEntry swap = 34; + MemoryEntry kernel = 35; + MemoryEntry kernel_tcp = 36 [(gogoproto.customname) = "KernelTCP"]; + +} + +message MemoryEntry { + uint64 limit = 1; + uint64 usage = 2; + uint64 max = 3; + uint64 failcnt = 4; +} + +message BlkIOStat { + repeated BlkIOEntry io_service_bytes_recursive = 1; + repeated BlkIOEntry io_serviced_recursive = 2; + repeated BlkIOEntry io_queued_recursive = 3; + repeated BlkIOEntry io_service_time_recursive = 4; + repeated BlkIOEntry io_wait_time_recursive = 5; + repeated BlkIOEntry io_merged_recursive = 6; + repeated BlkIOEntry io_time_recursive = 7; + repeated BlkIOEntry sectors_recursive = 8; +} + +message BlkIOEntry { + string op = 1; + string device = 2; + uint64 major = 3; + uint64 minor = 4; + uint64 value = 5; +} + +message RdmaStat { + repeated RdmaEntry current = 1; + repeated RdmaEntry limit = 2; +} + +message RdmaEntry { + string device = 1; + uint32 hca_handles = 2; + uint32 hca_objects = 3; +} diff --git a/vendor/github.com/containerd/cgroups/named.go b/vendor/github.com/containerd/cgroups/named.go new file mode 100644 index 000000000..06b16c3b1 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/named.go @@ -0,0 +1,39 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import "path/filepath" + +func NewNamed(root string, name Name) *namedController { + return &namedController{ + root: root, + name: name, + } +} + +type namedController struct { + root string + name Name +} + +func (n *namedController) Name() Name { + return n.name +} + +func (n *namedController) Path(path string) string { + return filepath.Join(n.root, string(n.name), path) +} diff --git a/vendor/github.com/containerd/cgroups/net_cls.go b/vendor/github.com/containerd/cgroups/net_cls.go new file mode 100644 index 000000000..8f1a2651f --- /dev/null +++ b/vendor/github.com/containerd/cgroups/net_cls.go @@ -0,0 +1,58 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import ( + "io/ioutil" + "os" + "path/filepath" + "strconv" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func NewNetCls(root string) *netclsController { + return &netclsController{ + root: filepath.Join(root, string(NetCLS)), + } +} + +type netclsController struct { + root string +} + +func (n *netclsController) Name() Name { + return NetCLS +} + +func (n *netclsController) Path(path string) string { + return filepath.Join(n.root, path) +} + +func (n *netclsController) Create(path string, resources *specs.LinuxResources) error { + if err := os.MkdirAll(n.Path(path), defaultDirPerm); err != nil { + return err + } + if resources.Network != nil && resources.Network.ClassID != nil && *resources.Network.ClassID > 0 { + return ioutil.WriteFile( + filepath.Join(n.Path(path), "net_cls.classid"), + []byte(strconv.FormatUint(uint64(*resources.Network.ClassID), 10)), + defaultFilePerm, + ) + } + return nil +} diff --git a/vendor/github.com/containerd/cgroups/net_prio.go b/vendor/github.com/containerd/cgroups/net_prio.go new file mode 100644 index 000000000..c77169215 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/net_prio.go @@ -0,0 +1,66 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func NewNetPrio(root string) *netprioController { + return &netprioController{ + root: filepath.Join(root, string(NetPrio)), + } +} + +type netprioController struct { + root string +} + +func (n *netprioController) Name() Name { + return NetPrio +} + +func (n *netprioController) Path(path string) string { + return filepath.Join(n.root, path) +} + +func (n *netprioController) Create(path string, resources *specs.LinuxResources) error { + if err := os.MkdirAll(n.Path(path), defaultDirPerm); err != nil { + return err + } + if resources.Network != nil { + for _, prio := range resources.Network.Priorities { + if err := ioutil.WriteFile( + filepath.Join(n.Path(path), "net_prio_ifpriomap"), + formatPrio(prio.Name, prio.Priority), + defaultFilePerm, + ); err != nil { + return err + } + } + } + return nil +} + +func formatPrio(name string, prio uint32) []byte { + return []byte(fmt.Sprintf("%s %d", name, prio)) +} diff --git a/vendor/github.com/containerd/cgroups/paths.go b/vendor/github.com/containerd/cgroups/paths.go new file mode 100644 index 000000000..455ce857f --- /dev/null +++ b/vendor/github.com/containerd/cgroups/paths.go @@ -0,0 +1,104 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import ( + "fmt" + "path/filepath" + + "github.com/pkg/errors" +) + +type Path func(subsystem Name) (string, error) + +func RootPath(subsysem Name) (string, error) { + return "/", nil +} + +// StaticPath returns a static path to use for all cgroups +func StaticPath(path string) Path { + return func(_ Name) (string, error) { + return path, nil + } +} + +// NestedPath will nest the cgroups based on the calling processes cgroup +// placing its child processes inside its own path +func NestedPath(suffix string) Path { + paths, err := parseCgroupFile("/proc/self/cgroup") + if err != nil { + return errorPath(err) + } + return existingPath(paths, suffix) +} + +// PidPath will return the correct cgroup paths for an existing process running inside a cgroup +// This is commonly used for the Load function to restore an existing container +func PidPath(pid int) Path { + p := fmt.Sprintf("/proc/%d/cgroup", pid) + paths, err := parseCgroupFile(p) + if err != nil { + return errorPath(errors.Wrapf(err, "parse cgroup file %s", p)) + } + return existingPath(paths, "") +} + +func existingPath(paths map[string]string, suffix string) Path { + // localize the paths based on the root mount dest for nested cgroups + for n, p := range paths { + dest, err := getCgroupDestination(string(n)) + if err != nil { + return errorPath(err) + } + rel, err := filepath.Rel(dest, p) + if err != nil { + return errorPath(err) + } + if rel == "." { + rel = dest + } + paths[n] = filepath.Join("/", rel) + } + return func(name Name) (string, error) { + root, ok := paths[string(name)] + if !ok { + if root, ok = paths[fmt.Sprintf("name=%s", name)]; !ok { + return "", fmt.Errorf("unable to find %q in controller set", name) + } + } + if suffix != "" { + return filepath.Join(root, suffix), nil + } + return root, nil + } +} + +func subPath(path Path, subName string) Path { + return func(name Name) (string, error) { + p, err := path(name) + if err != nil { + return "", err + } + return filepath.Join(p, subName), nil + } +} + +func errorPath(err error) Path { + return func(_ Name) (string, error) { + return "", err + } +} diff --git a/vendor/github.com/containerd/cgroups/perf_event.go b/vendor/github.com/containerd/cgroups/perf_event.go new file mode 100644 index 000000000..648786db6 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/perf_event.go @@ -0,0 +1,37 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import "path/filepath" + +func NewPerfEvent(root string) *PerfEventController { + return &PerfEventController{ + root: filepath.Join(root, string(PerfEvent)), + } +} + +type PerfEventController struct { + root string +} + +func (p *PerfEventController) Name() Name { + return PerfEvent +} + +func (p *PerfEventController) Path(path string) string { + return filepath.Join(p.root, path) +} diff --git a/vendor/github.com/containerd/cgroups/pids.go b/vendor/github.com/containerd/cgroups/pids.go new file mode 100644 index 000000000..a1cfcb88d --- /dev/null +++ b/vendor/github.com/containerd/cgroups/pids.go @@ -0,0 +1,85 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import ( + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func NewPids(root string) *pidsController { + return &pidsController{ + root: filepath.Join(root, string(Pids)), + } +} + +type pidsController struct { + root string +} + +func (p *pidsController) Name() Name { + return Pids +} + +func (p *pidsController) Path(path string) string { + return filepath.Join(p.root, path) +} + +func (p *pidsController) Create(path string, resources *specs.LinuxResources) error { + if err := os.MkdirAll(p.Path(path), defaultDirPerm); err != nil { + return err + } + if resources.Pids != nil && resources.Pids.Limit > 0 { + return ioutil.WriteFile( + filepath.Join(p.Path(path), "pids.max"), + []byte(strconv.FormatInt(resources.Pids.Limit, 10)), + defaultFilePerm, + ) + } + return nil +} + +func (p *pidsController) Update(path string, resources *specs.LinuxResources) error { + return p.Create(path, resources) +} + +func (p *pidsController) Stat(path string, stats *Metrics) error { + current, err := readUint(filepath.Join(p.Path(path), "pids.current")) + if err != nil { + return err + } + var max uint64 + maxData, err := ioutil.ReadFile(filepath.Join(p.Path(path), "pids.max")) + if err != nil { + return err + } + if maxS := strings.TrimSpace(string(maxData)); maxS != "max" { + if max, err = parseUint(maxS, 10, 64); err != nil { + return err + } + } + stats.Pids = &PidsStat{ + Current: current, + Limit: max, + } + return nil +} diff --git a/vendor/github.com/containerd/cgroups/rdma.go b/vendor/github.com/containerd/cgroups/rdma.go new file mode 100644 index 000000000..4f423d33a --- /dev/null +++ b/vendor/github.com/containerd/cgroups/rdma.go @@ -0,0 +1,153 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import ( + "io/ioutil" + "math" + "os" + "path/filepath" + "strconv" + "strings" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +type rdmaController struct { + root string +} + +func (p *rdmaController) Name() Name { + return Rdma +} + +func (p *rdmaController) Path(path string) string { + return filepath.Join(p.root, path) +} + +func NewRdma(root string) *rdmaController { + return &rdmaController{ + root: filepath.Join(root, string(Rdma)), + } +} + +func createCmdString(device string, limits *specs.LinuxRdma) string { + var cmdString string + + cmdString = device + if limits.HcaHandles != nil { + cmdString = cmdString + " " + "hca_handle=" + strconv.FormatUint(uint64(*limits.HcaHandles), 10) + } + + if limits.HcaObjects != nil { + cmdString = cmdString + " " + "hca_object=" + strconv.FormatUint(uint64(*limits.HcaObjects), 10) + } + return cmdString +} + +func (p *rdmaController) Create(path string, resources *specs.LinuxResources) error { + if err := os.MkdirAll(p.Path(path), defaultDirPerm); err != nil { + return err + } + + for device, limit := range resources.Rdma { + if device != "" && (limit.HcaHandles != nil || limit.HcaObjects != nil) { + return ioutil.WriteFile( + filepath.Join(p.Path(path), "rdma.max"), + []byte(createCmdString(device, &limit)), + defaultFilePerm, + ) + } + } + return nil +} + +func (p *rdmaController) Update(path string, resources *specs.LinuxResources) error { + return p.Create(path, resources) +} + +func parseRdmaKV(raw string, entry *RdmaEntry) { + var value uint64 + var err error + + parts := strings.Split(raw, "=") + switch len(parts) { + case 2: + if parts[1] == "max" { + value = math.MaxUint32 + } else { + value, err = parseUint(parts[1], 10, 32) + if err != nil { + return + } + } + if parts[0] == "hca_handle" { + entry.HcaHandles = uint32(value) + } else if parts[0] == "hca_object" { + entry.HcaObjects = uint32(value) + } + } +} + +func toRdmaEntry(strEntries []string) []*RdmaEntry { + var rdmaEntries []*RdmaEntry + for i := range strEntries { + parts := strings.Fields(strEntries[i]) + switch len(parts) { + case 3: + entry := new(RdmaEntry) + entry.Device = parts[0] + parseRdmaKV(parts[1], entry) + parseRdmaKV(parts[2], entry) + + rdmaEntries = append(rdmaEntries, entry) + default: + continue + } + } + return rdmaEntries +} + +func (p *rdmaController) Stat(path string, stats *Metrics) error { + + currentData, err := ioutil.ReadFile(filepath.Join(p.Path(path), "rdma.current")) + if err != nil { + return err + } + currentPerDevices := strings.Split(string(currentData), "\n") + + maxData, err := ioutil.ReadFile(filepath.Join(p.Path(path), "rdma.max")) + if err != nil { + return err + } + maxPerDevices := strings.Split(string(maxData), "\n") + + // If device got removed between reading two files, ignore returning + // stats. + if len(currentPerDevices) != len(maxPerDevices) { + return nil + } + + currentEntries := toRdmaEntry(currentPerDevices) + maxEntries := toRdmaEntry(maxPerDevices) + + stats.Rdma = &RdmaStat{ + Current: currentEntries, + Limit: maxEntries, + } + return nil +} diff --git a/vendor/github.com/containerd/cgroups/state.go b/vendor/github.com/containerd/cgroups/state.go new file mode 100644 index 000000000..cfeabbbc6 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/state.go @@ -0,0 +1,28 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +// State is a type that represents the state of the current cgroup +type State string + +const ( + Unknown State = "" + Thawed State = "thawed" + Frozen State = "frozen" + Freezing State = "freezing" + Deleted State = "deleted" +) diff --git a/vendor/github.com/containerd/cgroups/subsystem.go b/vendor/github.com/containerd/cgroups/subsystem.go new file mode 100644 index 000000000..933a6c38d --- /dev/null +++ b/vendor/github.com/containerd/cgroups/subsystem.go @@ -0,0 +1,112 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import ( + "fmt" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +// Name is a typed name for a cgroup subsystem +type Name string + +const ( + Devices Name = "devices" + Hugetlb Name = "hugetlb" + Freezer Name = "freezer" + Pids Name = "pids" + NetCLS Name = "net_cls" + NetPrio Name = "net_prio" + PerfEvent Name = "perf_event" + Cpuset Name = "cpuset" + Cpu Name = "cpu" + Cpuacct Name = "cpuacct" + Memory Name = "memory" + Blkio Name = "blkio" + Rdma Name = "rdma" +) + +// Subsystems returns a complete list of the default cgroups +// avaliable on most linux systems +func Subsystems() []Name { + n := []Name{ + Hugetlb, + Freezer, + Pids, + NetCLS, + NetPrio, + PerfEvent, + Cpuset, + Cpu, + Cpuacct, + Memory, + Blkio, + Rdma, + } + if !isUserNS { + n = append(n, Devices) + } + return n +} + +type Subsystem interface { + Name() Name +} + +type pather interface { + Subsystem + Path(path string) string +} + +type creator interface { + Subsystem + Create(path string, resources *specs.LinuxResources) error +} + +type deleter interface { + Subsystem + Delete(path string) error +} + +type stater interface { + Subsystem + Stat(path string, stats *Metrics) error +} + +type updater interface { + Subsystem + Update(path string, resources *specs.LinuxResources) error +} + +// SingleSubsystem returns a single cgroup subsystem within the base Hierarchy +func SingleSubsystem(baseHierarchy Hierarchy, subsystem Name) Hierarchy { + return func() ([]Subsystem, error) { + subsystems, err := baseHierarchy() + if err != nil { + return nil, err + } + for _, s := range subsystems { + if s.Name() == subsystem { + return []Subsystem{ + s, + }, nil + } + } + return nil, fmt.Errorf("unable to find subsystem %s", subsystem) + } +} diff --git a/vendor/github.com/containerd/cgroups/systemd.go b/vendor/github.com/containerd/cgroups/systemd.go new file mode 100644 index 000000000..8153d744c --- /dev/null +++ b/vendor/github.com/containerd/cgroups/systemd.go @@ -0,0 +1,131 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import ( + "fmt" + "path/filepath" + "strings" + "sync" + + systemdDbus "github.com/coreos/go-systemd/dbus" + "github.com/godbus/dbus" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +const ( + SystemdDbus Name = "systemd" + defaultSlice = "system.slice" +) + +func Systemd() ([]Subsystem, error) { + root, err := v1MountPoint() + if err != nil { + return nil, err + } + defaultSubsystems, err := defaults(root) + if err != nil { + return nil, err + } + s, err := NewSystemd(root) + if err != nil { + return nil, err + } + // make sure the systemd controller is added first + return append([]Subsystem{s}, defaultSubsystems...), nil +} + +func Slice(slice, name string) Path { + if slice == "" { + slice = defaultSlice + } + return func(subsystem Name) (string, error) { + return filepath.Join(slice, unitName(name)), nil + } +} + +func NewSystemd(root string) (*SystemdController, error) { + return &SystemdController{ + root: root, + }, nil +} + +type SystemdController struct { + mu sync.Mutex + root string +} + +func (s *SystemdController) Name() Name { + return SystemdDbus +} + +func (s *SystemdController) Create(path string, resources *specs.LinuxResources) error { + conn, err := systemdDbus.New() + if err != nil { + return err + } + defer conn.Close() + slice, name := splitName(path) + properties := []systemdDbus.Property{ + systemdDbus.PropDescription(fmt.Sprintf("cgroup %s", name)), + systemdDbus.PropWants(slice), + newProperty("DefaultDependencies", false), + newProperty("Delegate", true), + newProperty("MemoryAccounting", true), + newProperty("CPUAccounting", true), + newProperty("BlockIOAccounting", true), + } + ch := make(chan string) + _, err = conn.StartTransientUnit(name, "replace", properties, ch) + if err != nil { + return err + } + <-ch + return nil +} + +func (s *SystemdController) Delete(path string) error { + conn, err := systemdDbus.New() + if err != nil { + return err + } + defer conn.Close() + _, name := splitName(path) + ch := make(chan string) + _, err = conn.StopUnit(name, "replace", ch) + if err != nil { + return err + } + <-ch + return nil +} + +func newProperty(name string, units interface{}) systemdDbus.Property { + return systemdDbus.Property{ + Name: name, + Value: dbus.MakeVariant(units), + } +} + +func unitName(name string) string { + return fmt.Sprintf("%s.slice", name) +} + +func splitName(path string) (slice string, unit string) { + slice, unit = filepath.Split(path) + return strings.TrimSuffix(slice, "/"), unit +} diff --git a/vendor/github.com/containerd/cgroups/ticks.go b/vendor/github.com/containerd/cgroups/ticks.go new file mode 100644 index 000000000..84dc38d0c --- /dev/null +++ b/vendor/github.com/containerd/cgroups/ticks.go @@ -0,0 +1,26 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +func getClockTicks() uint64 { + // The value comes from `C.sysconf(C._SC_CLK_TCK)`, and + // on Linux it's a constant which is safe to be hard coded, + // so we can avoid using cgo here. + // See https://github.com/containerd/cgroups/pull/12 for + // more details. + return 100 +} diff --git a/vendor/github.com/containerd/cgroups/utils.go b/vendor/github.com/containerd/cgroups/utils.go new file mode 100644 index 000000000..345be4e46 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/utils.go @@ -0,0 +1,297 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import ( + "bufio" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + units "github.com/docker/go-units" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +var isUserNS = runningInUserNS() + +// runningInUserNS detects whether we are currently running in a user namespace. +// Copied from github.com/lxc/lxd/shared/util.go +func runningInUserNS() bool { + file, err := os.Open("/proc/self/uid_map") + if err != nil { + // This kernel-provided file only exists if user namespaces are supported + return false + } + defer file.Close() + + buf := bufio.NewReader(file) + l, _, err := buf.ReadLine() + if err != nil { + return false + } + + line := string(l) + var a, b, c int64 + fmt.Sscanf(line, "%d %d %d", &a, &b, &c) + /* + * We assume we are in the initial user namespace if we have a full + * range - 4294967295 uids starting at uid 0. + */ + if a == 0 && b == 0 && c == 4294967295 { + return false + } + return true +} + +// defaults returns all known groups +func defaults(root string) ([]Subsystem, error) { + h, err := NewHugetlb(root) + if err != nil && !os.IsNotExist(err) { + return nil, err + } + s := []Subsystem{ + NewNamed(root, "systemd"), + NewFreezer(root), + NewPids(root), + NewNetCls(root), + NewNetPrio(root), + NewPerfEvent(root), + NewCputset(root), + NewCpu(root), + NewCpuacct(root), + NewMemory(root), + NewBlkio(root), + NewRdma(root), + } + // only add the devices cgroup if we are not in a user namespace + // because modifications are not allowed + if !isUserNS { + s = append(s, NewDevices(root)) + } + // add the hugetlb cgroup if error wasn't due to missing hugetlb + // cgroup support on the host + if err == nil { + s = append(s, h) + } + return s, nil +} + +// remove will remove a cgroup path handling EAGAIN and EBUSY errors and +// retrying the remove after a exp timeout +func remove(path string) error { + delay := 10 * time.Millisecond + for i := 0; i < 5; i++ { + if i != 0 { + time.Sleep(delay) + delay *= 2 + } + if err := os.RemoveAll(path); err == nil { + return nil + } + } + return fmt.Errorf("cgroups: unable to remove path %q", path) +} + +// readPids will read all the pids in a cgroup by the provided path +func readPids(path string, subsystem Name) ([]Process, error) { + f, err := os.Open(filepath.Join(path, cgroupProcs)) + if err != nil { + return nil, err + } + defer f.Close() + var ( + out []Process + s = bufio.NewScanner(f) + ) + for s.Scan() { + if t := s.Text(); t != "" { + pid, err := strconv.Atoi(t) + if err != nil { + return nil, err + } + out = append(out, Process{ + Pid: pid, + Subsystem: subsystem, + Path: path, + }) + } + } + return out, nil +} + +func hugePageSizes() ([]string, error) { + var ( + pageSizes []string + sizeList = []string{"B", "kB", "MB", "GB", "TB", "PB"} + ) + files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages") + if err != nil { + return nil, err + } + for _, st := range files { + nameArray := strings.Split(st.Name(), "-") + pageSize, err := units.RAMInBytes(nameArray[1]) + if err != nil { + return nil, err + } + pageSizes = append(pageSizes, units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList)) + } + return pageSizes, nil +} + +func readUint(path string) (uint64, error) { + v, err := ioutil.ReadFile(path) + if err != nil { + return 0, err + } + return parseUint(strings.TrimSpace(string(v)), 10, 64) +} + +func parseUint(s string, base, bitSize int) (uint64, error) { + v, err := strconv.ParseUint(s, base, bitSize) + if err != nil { + intValue, intErr := strconv.ParseInt(s, base, bitSize) + // 1. Handle negative values greater than MinInt64 (and) + // 2. Handle negative values lesser than MinInt64 + if intErr == nil && intValue < 0 { + return 0, nil + } else if intErr != nil && + intErr.(*strconv.NumError).Err == strconv.ErrRange && + intValue < 0 { + return 0, nil + } + return 0, err + } + return v, nil +} + +func parseKV(raw string) (string, uint64, error) { + parts := strings.Fields(raw) + switch len(parts) { + case 2: + v, err := parseUint(parts[1], 10, 64) + if err != nil { + return "", 0, err + } + return parts[0], v, nil + default: + return "", 0, ErrInvalidFormat + } +} + +func parseCgroupFile(path string) (map[string]string, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + return parseCgroupFromReader(f) +} + +func parseCgroupFromReader(r io.Reader) (map[string]string, error) { + var ( + cgroups = make(map[string]string) + s = bufio.NewScanner(r) + ) + for s.Scan() { + if err := s.Err(); err != nil { + return nil, err + } + var ( + text = s.Text() + parts = strings.SplitN(text, ":", 3) + ) + if len(parts) < 3 { + return nil, fmt.Errorf("invalid cgroup entry: %q", text) + } + for _, subs := range strings.Split(parts[1], ",") { + if subs != "" { + cgroups[subs] = parts[2] + } + } + } + return cgroups, nil +} + +func getCgroupDestination(subsystem string) (string, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", err + } + defer f.Close() + s := bufio.NewScanner(f) + for s.Scan() { + if err := s.Err(); err != nil { + return "", err + } + fields := strings.Fields(s.Text()) + for _, opt := range strings.Split(fields[len(fields)-1], ",") { + if opt == subsystem { + return fields[3], nil + } + } + } + return "", ErrNoCgroupMountDestination +} + +func pathers(subystems []Subsystem) []pather { + var out []pather + for _, s := range subystems { + if p, ok := s.(pather); ok { + out = append(out, p) + } + } + return out +} + +func initializeSubsystem(s Subsystem, path Path, resources *specs.LinuxResources) error { + if c, ok := s.(creator); ok { + p, err := path(s.Name()) + if err != nil { + return err + } + if err := c.Create(p, resources); err != nil { + return err + } + } else if c, ok := s.(pather); ok { + p, err := path(s.Name()) + if err != nil { + return err + } + // do the default create if the group does not have a custom one + if err := os.MkdirAll(c.Path(p), defaultDirPerm); err != nil { + return err + } + } + return nil +} + +func cleanPath(path string) string { + if path == "" { + return "" + } + path = filepath.Clean(path) + if !filepath.IsAbs(path) { + path, _ = filepath.Rel(string(os.PathSeparator), filepath.Clean(string(os.PathSeparator)+path)) + } + return filepath.Clean(path) +} diff --git a/vendor/github.com/containerd/cgroups/v1.go b/vendor/github.com/containerd/cgroups/v1.go new file mode 100644 index 000000000..a076d4692 --- /dev/null +++ b/vendor/github.com/containerd/cgroups/v1.go @@ -0,0 +1,81 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "strings" +) + +// V1 returns all the groups in the default cgroups mountpoint in a single hierarchy +func V1() ([]Subsystem, error) { + root, err := v1MountPoint() + if err != nil { + return nil, err + } + subsystems, err := defaults(root) + if err != nil { + return nil, err + } + var enabled []Subsystem + for _, s := range pathers(subsystems) { + // check and remove the default groups that do not exist + if _, err := os.Lstat(s.Path("/")); err == nil { + enabled = append(enabled, s) + } + } + return enabled, nil +} + +// v1MountPoint returns the mount point where the cgroup +// mountpoints are mounted in a single hiearchy +func v1MountPoint() (string, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", err + } + defer f.Close() + scanner := bufio.NewScanner(f) + for scanner.Scan() { + if err := scanner.Err(); err != nil { + return "", err + } + var ( + text = scanner.Text() + fields = strings.Split(text, " ") + // safe as mountinfo encodes mountpoints with spaces as \040. + index = strings.Index(text, " - ") + postSeparatorFields = strings.Fields(text[index+3:]) + numPostFields = len(postSeparatorFields) + ) + // this is an error as we can't detect if the mount is for "cgroup" + if numPostFields == 0 { + return "", fmt.Errorf("Found no fields post '-' in %q", text) + } + if postSeparatorFields[0] == "cgroup" { + // check that the mount is properly formated. + if numPostFields < 3 { + return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) + } + return filepath.Dir(fields[4]), nil + } + } + return "", ErrMountPointNotExist +} diff --git a/vendor/github.com/containerd/containerd/runtime/v2/README.md b/vendor/github.com/containerd/containerd/runtime/v2/README.md new file mode 100644 index 000000000..481861643 --- /dev/null +++ b/vendor/github.com/containerd/containerd/runtime/v2/README.md @@ -0,0 +1,173 @@ +# Runtime v2 + +Runtime v2 introduces a first class shim API for runtime authors to integrate with containerd. +The shim API is minimal and scoped to the execution lifecycle of a container. + +## Binary Naming + +Users specify the runtime they wish to use when creating a container. +The runtime can also be changed via a container update. + +```bash +> ctr run --runtime io.containerd.runc.v1 +``` + +When a user specifies a runtime name, `io.containerd.runc.v1`, they will specify the name and version of the runtime. +This will be translated by containerd into a binary name for the shim. + +`io.containerd.runc.v1` -> `containerd-shim-runc-v1` + +containerd keeps the `containerd-shim-*` prefix so that users can `ps aux | grep containerd-shim` to see running shims on their system. + +## Shim Authoring + +This section is dedicated to runtime authors wishing to build a shim. +It will detail how the API works and different considerations when building shim. + +### Commands + +Container information is provided to a shim in two ways. +The OCI Runtime Bundle and on the `Create` rpc request. + +#### `start` + +Each shim MUST implement a `start` subcommand. +This command will launch new shims. +The start command MUST accept the following flags: + +* `-namespace` the namespace for the container +* `-id` the id of the container +* `-address` the address of the containerd's main socket +* `-publish-binary` the binary path to publish events back to containerd + +The start command, as well as all binary calls to the shim, has the bundle for the container set as the `cwd`. + +The start command MUST return an address to a shim for containerd to issue API requests for container operations. + +The start command can either start a new shim or return an address to an existing shim based on the shim's logic. + +#### `delete` + +Each shim MUST implement a `delete` subcommand. +This command allows containerd to delete any container resources created, mounted, and/or run by a shim when containerd can no longer communicate over rpc. +This happens if a shim is SIGKILL'd with a running container. +These resources will need to be cleaned up when containerd looses the connection to a shim. +This is also used when containerd boots and reconnects to shims. +If a bundle is still on disk but containerd cannot connect to a shim, the delete command is invoked. + +The delete command MUST accept the following flags: + +* `-namespace` the namespace for the container +* `-id` the id of the container +* `-address` the address of the containerd's main socket +* `-publish-binary` the binary path to publish events back to containerd + +The delete command will be executed in the container's bundle as its `cwd`. + +### Host Level Shim Configuration + +containerd does not provide any host level configuration for shims via the API. +If a shim needs configuration from the user with host level information across all instances, a shim specific configuration file can be setup. + +### Container Level Shim Configuration + +On the create request, there is a generic `*protobuf.Any` that allows a user to specify container level configuration for the shim. + +```proto +message CreateTaskRequest { + string id = 1; + ... + google.protobuf.Any options = 10; +} +``` + +A shim author can create their own protobuf message for configuration and clients can import and provide this information is needed. + +### I/O + +I/O for a container is provided by the client to the shim via fifo on Linux, named pipes on Windows, or log files on disk. +The paths to these files are provided on the `Create` rpc for the initial creation and on the `Exec` rpc for additional processes. + +```proto +message CreateTaskRequest { + string id = 1; + bool terminal = 4; + string stdin = 5; + string stdout = 6; + string stderr = 7; +} +``` + +```proto +message ExecProcessRequest { + string id = 1; + string exec_id = 2; + bool terminal = 3; + string stdin = 4; + string stdout = 5; + string stderr = 6; +} +``` + +Containers that are to be launched with an interactive terminal will have the `terminal` field set to `true`, data is still copied over the files(fifos,pipes) in the same way as non interactive containers. + +### Root Filesystems + +The root filesystem for the containers is provided by on the `Create` rpc. +Shims are responsible for managing the lifecycle of the filesystem mount during the lifecycle of a container. + +```proto +message CreateTaskRequest { + string id = 1; + string bundle = 2; + repeated containerd.types.Mount rootfs = 3; + ... +} +``` + +The mount protobuf message is: + +```proto +message Mount { + // Type defines the nature of the mount. + string type = 1; + // Source specifies the name of the mount. Depending on mount type, this + // may be a volume name or a host path, or even ignored. + string source = 2; + // Target path in container + string target = 3; + // Options specifies zero or more fstab style mount options. + repeated string options = 4; +} +``` + +Shims are responsible for mounting the filesystem into the `rootfs/` directory of the bundle. +Shims are also responsible for unmounting of the filesystem. +During a `delete` binary call, the shim MUST ensure that filesystem is also unmounted. +Filesystems are provided by the containerd snapshotters. + +### Events + +The shim MUST publish a `runtime.TaskExitEventTopic` when the container exits. +If the shim collects Out of Memory events, it SHOULD also publish a `runtime.TaskOOMEventTopic`. + +### Other + +#### Unsupported rpcs + +If a shim does not or cannot implement an rpc call, it MUST return a `github.com/containerd/containerd/errdefs.ErrNotImplemented` error. + +#### Debugging and Shim Logs + +A fifo on unix or named pipe on Windows will be provided to the shim. +It can be located inside the `cwd` of the shim named "log". +The shims can use the existing `github.com/containerd/containerd/log` package to log debug messages. +Messages will automatically be output in the containerd's daemon logs with the correct fields and runtime set. + +#### ttrpc + +[ttrpc](https://github.com/containerd/ttrpc) is the only currently supported protocol for shims. +It works with standard protobufs and GRPC services as well as generating clients. +The only difference between grpc and ttrpc is the wire protocol. +ttrpc removes the http stack in order to save memory and binary size to keep shims small. +It is recommended to use ttrpc in your shim but grpc support is also in development. diff --git a/vendor/github.com/containerd/containerd/runtime/v2/shim/reaper_unix.go b/vendor/github.com/containerd/containerd/runtime/v2/shim/reaper_unix.go new file mode 100644 index 000000000..2937f1a9e --- /dev/null +++ b/vendor/github.com/containerd/containerd/runtime/v2/shim/reaper_unix.go @@ -0,0 +1,110 @@ +// +build !windows + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package shim + +import ( + "os/exec" + "sync" + "time" + + "github.com/containerd/containerd/sys" + runc "github.com/containerd/go-runc" + "github.com/pkg/errors" +) + +// ErrNoSuchProcess is returned when the process no longer exists +var ErrNoSuchProcess = errors.New("no such process") + +const bufferSize = 32 + +// Reap should be called when the process receives an SIGCHLD. Reap will reap +// all exited processes and close their wait channels +func Reap() error { + now := time.Now() + exits, err := sys.Reap(false) + Default.Lock() + for c := range Default.subscribers { + for _, e := range exits { + c <- runc.Exit{ + Timestamp: now, + Pid: e.Pid, + Status: e.Status, + } + } + + } + Default.Unlock() + return err +} + +// Default is the default monitor initialized for the package +var Default = &Monitor{ + subscribers: make(map[chan runc.Exit]struct{}), +} + +// Monitor monitors the underlying system for process status changes +type Monitor struct { + sync.Mutex + + subscribers map[chan runc.Exit]struct{} +} + +// Start starts the command a registers the process with the reaper +func (m *Monitor) Start(c *exec.Cmd) (chan runc.Exit, error) { + ec := m.Subscribe() + if err := c.Start(); err != nil { + m.Unsubscribe(ec) + return nil, err + } + return ec, nil +} + +// Wait blocks until a process is signal as dead. +// User should rely on the value of the exit status to determine if the +// command was successful or not. +func (m *Monitor) Wait(c *exec.Cmd, ec chan runc.Exit) (int, error) { + for e := range ec { + if e.Pid == c.Process.Pid { + // make sure we flush all IO + c.Wait() + m.Unsubscribe(ec) + return e.Status, nil + } + } + // return no such process if the ec channel is closed and no more exit + // events will be sent + return -1, ErrNoSuchProcess +} + +// Subscribe to process exit changes +func (m *Monitor) Subscribe() chan runc.Exit { + c := make(chan runc.Exit, bufferSize) + m.Lock() + m.subscribers[c] = struct{}{} + m.Unlock() + return c +} + +// Unsubscribe to process exit changes +func (m *Monitor) Unsubscribe(c chan runc.Exit) { + m.Lock() + delete(m.subscribers, c) + close(c) + m.Unlock() +} diff --git a/vendor/github.com/containerd/containerd/runtime/v2/shim/shim.go b/vendor/github.com/containerd/containerd/runtime/v2/shim/shim.go new file mode 100644 index 000000000..39484c191 --- /dev/null +++ b/vendor/github.com/containerd/containerd/runtime/v2/shim/shim.go @@ -0,0 +1,269 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package shim + +import ( + "context" + "flag" + "fmt" + "os" + "runtime" + "runtime/debug" + "strings" + "time" + + "github.com/containerd/containerd/events" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/namespaces" + shimapi "github.com/containerd/containerd/runtime/v2/task" + "github.com/containerd/ttrpc" + "github.com/gogo/protobuf/proto" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +// Client for a shim server +type Client struct { + service shimapi.TaskService + context context.Context + signals chan os.Signal +} + +// Init func for the creation of a shim server +type Init func(context.Context, string, events.Publisher) (Shim, error) + +// Shim server interface +type Shim interface { + shimapi.TaskService + Cleanup(ctx context.Context) (*shimapi.DeleteResponse, error) + StartShim(ctx context.Context, id, containerdBinary, containerdAddress string) (string, error) +} + +// OptsKey is the context key for the Opts value. +type OptsKey struct{} + +// Opts are context options associated with the shim invocation. +type Opts struct { + BundlePath string + Debug bool +} + +var ( + debugFlag bool + idFlag string + namespaceFlag string + socketFlag string + bundlePath string + addressFlag string + containerdBinaryFlag string + action string +) + +func parseFlags() { + flag.BoolVar(&debugFlag, "debug", false, "enable debug output in logs") + flag.StringVar(&namespaceFlag, "namespace", "", "namespace that owns the shim") + flag.StringVar(&idFlag, "id", "", "id of the task") + flag.StringVar(&socketFlag, "socket", "", "abstract socket path to serve") + flag.StringVar(&bundlePath, "bundle", "", "path to the bundle if not workdir") + + flag.StringVar(&addressFlag, "address", "", "grpc address back to main containerd") + flag.StringVar(&containerdBinaryFlag, "publish-binary", "containerd", "path to publish binary (used for publishing events)") + + flag.Parse() + action = flag.Arg(0) +} + +func setRuntime() { + debug.SetGCPercent(40) + go func() { + for range time.Tick(30 * time.Second) { + debug.FreeOSMemory() + } + }() + if os.Getenv("GOMAXPROCS") == "" { + // If GOMAXPROCS hasn't been set, we default to a value of 2 to reduce + // the number of Go stacks present in the shim. + runtime.GOMAXPROCS(2) + } +} + +func setLogger(ctx context.Context, id string) error { + logrus.SetFormatter(&logrus.TextFormatter{ + TimestampFormat: log.RFC3339NanoFixed, + FullTimestamp: true, + }) + if debugFlag { + logrus.SetLevel(logrus.DebugLevel) + } + f, err := openLog(ctx, id) + if err != nil { + return err + } + logrus.SetOutput(f) + return nil +} + +// Run initializes and runs a shim server +func Run(id string, initFunc Init) { + if err := run(id, initFunc); err != nil { + fmt.Fprintf(os.Stderr, "%s: %s\n", id, err) + os.Exit(1) + } +} + +func run(id string, initFunc Init) error { + parseFlags() + setRuntime() + + signals, err := setupSignals() + if err != nil { + return err + } + if err := subreaper(); err != nil { + return err + } + publisher := &remoteEventsPublisher{ + address: addressFlag, + containerdBinaryPath: containerdBinaryFlag, + } + if namespaceFlag == "" { + return fmt.Errorf("shim namespace cannot be empty") + } + ctx := namespaces.WithNamespace(context.Background(), namespaceFlag) + ctx = context.WithValue(ctx, OptsKey{}, Opts{BundlePath: bundlePath, Debug: debugFlag}) + ctx = log.WithLogger(ctx, log.G(ctx).WithField("runtime", id)) + + service, err := initFunc(ctx, idFlag, publisher) + if err != nil { + return err + } + switch action { + case "delete": + logger := logrus.WithFields(logrus.Fields{ + "pid": os.Getpid(), + "namespace": namespaceFlag, + }) + go handleSignals(logger, signals) + response, err := service.Cleanup(ctx) + if err != nil { + return err + } + data, err := proto.Marshal(response) + if err != nil { + return err + } + if _, err := os.Stdout.Write(data); err != nil { + return err + } + return nil + case "start": + address, err := service.StartShim(ctx, idFlag, containerdBinaryFlag, addressFlag) + if err != nil { + return err + } + if _, err := os.Stdout.WriteString(address); err != nil { + return err + } + return nil + default: + if err := setLogger(ctx, idFlag); err != nil { + return err + } + client := NewShimClient(ctx, service, signals) + return client.Serve() + } +} + +// NewShimClient creates a new shim server client +func NewShimClient(ctx context.Context, svc shimapi.TaskService, signals chan os.Signal) *Client { + s := &Client{ + service: svc, + context: ctx, + signals: signals, + } + return s +} + +// Serve the shim server +func (s *Client) Serve() error { + dump := make(chan os.Signal, 32) + setupDumpStacks(dump) + + path, err := os.Getwd() + if err != nil { + return err + } + server, err := newServer() + if err != nil { + return errors.Wrap(err, "failed creating server") + } + + logrus.Debug("registering ttrpc server") + shimapi.RegisterTaskService(server, s.service) + + if err := serve(s.context, server, socketFlag); err != nil { + return err + } + logger := logrus.WithFields(logrus.Fields{ + "pid": os.Getpid(), + "path": path, + "namespace": namespaceFlag, + }) + go func() { + for range dump { + dumpStacks(logger) + } + }() + return handleSignals(logger, s.signals) +} + +// serve serves the ttrpc API over a unix socket at the provided path +// this function does not block +func serve(ctx context.Context, server *ttrpc.Server, path string) error { + l, err := serveListener(path) + if err != nil { + return err + } + go func() { + defer l.Close() + if err := server.Serve(ctx, l); err != nil && + !strings.Contains(err.Error(), "use of closed network connection") { + logrus.WithError(err).Fatal("containerd-shim: ttrpc server failure") + } + }() + return nil +} + +func dumpStacks(logger *logrus.Entry) { + var ( + buf []byte + stackSize int + ) + bufferLen := 16384 + for stackSize == len(buf) { + buf = make([]byte, bufferLen) + stackSize = runtime.Stack(buf, true) + bufferLen *= 2 + } + buf = buf[:stackSize] + logger.Infof("=== BEGIN goroutine stack dump ===\n%s\n=== END goroutine stack dump ===", buf) +} + +type remoteEventsPublisher struct { + address string + containerdBinaryPath string +} diff --git a/vendor/github.com/containerd/containerd/runtime/v2/shim/shim_darwin.go b/vendor/github.com/containerd/containerd/runtime/v2/shim/shim_darwin.go new file mode 100644 index 000000000..314b45cb5 --- /dev/null +++ b/vendor/github.com/containerd/containerd/runtime/v2/shim/shim_darwin.go @@ -0,0 +1,29 @@ +// +build darwin + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package shim + +import "github.com/containerd/ttrpc" + +func newServer() (*ttrpc.Server, error) { + return ttrpc.NewServer() +} + +func subreaper() error { + return nil +} diff --git a/vendor/github.com/containerd/containerd/runtime/v2/shim/shim_linux.go b/vendor/github.com/containerd/containerd/runtime/v2/shim/shim_linux.go new file mode 100644 index 000000000..7ad2a7262 --- /dev/null +++ b/vendor/github.com/containerd/containerd/runtime/v2/shim/shim_linux.go @@ -0,0 +1,30 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package shim + +import ( + "github.com/containerd/containerd/sys" + "github.com/containerd/ttrpc" +) + +func newServer() (*ttrpc.Server, error) { + return ttrpc.NewServer(ttrpc.WithServerHandshaker(ttrpc.UnixSocketRequireSameUser())) +} + +func subreaper() error { + return sys.SetSubreaper(1) +} diff --git a/vendor/github.com/containerd/containerd/runtime/v2/shim/shim_unix.go b/vendor/github.com/containerd/containerd/runtime/v2/shim/shim_unix.go new file mode 100644 index 000000000..6f1ef6e28 --- /dev/null +++ b/vendor/github.com/containerd/containerd/runtime/v2/shim/shim_unix.go @@ -0,0 +1,117 @@ +// +build !windows + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package shim + +import ( + "bytes" + "context" + "io" + "net" + "os" + "os/exec" + "os/signal" + "syscall" + + "github.com/containerd/containerd/events" + "github.com/containerd/containerd/namespaces" + "github.com/containerd/fifo" + "github.com/containerd/typeurl" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +// setupSignals creates a new signal handler for all signals and sets the shim as a +// sub-reaper so that the container processes are reparented +func setupSignals() (chan os.Signal, error) { + signals := make(chan os.Signal, 32) + signal.Notify(signals, unix.SIGTERM, unix.SIGINT, unix.SIGCHLD, unix.SIGPIPE) + return signals, nil +} + +func setupDumpStacks(dump chan<- os.Signal) { + signal.Notify(dump, syscall.SIGUSR1) +} + +func serveListener(path string) (net.Listener, error) { + var ( + l net.Listener + err error + ) + if path == "" { + l, err = net.FileListener(os.NewFile(3, "socket")) + path = "[inherited from parent]" + } else { + if len(path) > 106 { + return nil, errors.Errorf("%q: unix socket path too long (> 106)", path) + } + l, err = net.Listen("unix", "\x00"+path) + } + if err != nil { + return nil, err + } + logrus.WithField("socket", path).Debug("serving api on abstract socket") + return l, nil +} + +func handleSignals(logger *logrus.Entry, signals chan os.Signal) error { + logger.Info("starting signal loop") + for { + select { + case s := <-signals: + switch s { + case unix.SIGCHLD: + if err := Reap(); err != nil { + logger.WithError(err).Error("reap exit status") + } + case unix.SIGPIPE: + } + } + } +} + +func openLog(ctx context.Context, _ string) (io.Writer, error) { + return fifo.OpenFifo(context.Background(), "log", unix.O_WRONLY, 0700) +} + +func (l *remoteEventsPublisher) Publish(ctx context.Context, topic string, event events.Event) error { + ns, _ := namespaces.Namespace(ctx) + encoded, err := typeurl.MarshalAny(event) + if err != nil { + return err + } + data, err := encoded.Marshal() + if err != nil { + return err + } + cmd := exec.CommandContext(ctx, l.containerdBinaryPath, "--address", l.address, "publish", "--topic", topic, "--namespace", ns) + cmd.Stdin = bytes.NewReader(data) + c, err := Default.Start(cmd) + if err != nil { + return err + } + status, err := Default.Wait(cmd, c) + if err != nil { + return err + } + if status != 0 { + return errors.New("failed to publish event") + } + return nil +} diff --git a/vendor/github.com/containerd/containerd/runtime/v2/shim/shim_windows.go b/vendor/github.com/containerd/containerd/runtime/v2/shim/shim_windows.go new file mode 100644 index 000000000..6e5642620 --- /dev/null +++ b/vendor/github.com/containerd/containerd/runtime/v2/shim/shim_windows.go @@ -0,0 +1,307 @@ +// +build windows + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package shim + +import ( + "bytes" + "context" + "fmt" + "io" + "net" + "os" + "os/exec" + "sync" + "syscall" + "unsafe" + + winio "github.com/Microsoft/go-winio" + "github.com/containerd/containerd/events" + "github.com/containerd/containerd/namespaces" + "github.com/containerd/ttrpc" + "github.com/containerd/typeurl" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sys/windows" +) + +const ( + errorConnectionAborted syscall.Errno = 1236 +) + +// setupSignals creates a new signal handler for all signals +func setupSignals() (chan os.Signal, error) { + signals := make(chan os.Signal, 32) + return signals, nil +} + +func newServer() (*ttrpc.Server, error) { + return ttrpc.NewServer() +} + +func subreaper() error { + return nil +} + +type fakeSignal struct { +} + +func (fs *fakeSignal) String() string { + return "" +} + +func (fs *fakeSignal) Signal() { +} + +func setupDumpStacks(dump chan<- os.Signal) { + // Windows does not support signals like *nix systems. So instead of + // trapping on SIGUSR1 to dump stacks, we wait on a Win32 event to be + // signaled. ACL'd to builtin administrators and local system + event := "Global\\containerd-shim-runhcs-v1-" + fmt.Sprint(os.Getpid()) + ev, _ := windows.UTF16PtrFromString(event) + sd, err := winio.SddlToSecurityDescriptor("D:P(A;;GA;;;BA)(A;;GA;;;SY)") + if err != nil { + logrus.Errorf("failed to get security descriptor for debug stackdump event %s: %s", event, err.Error()) + return + } + var sa windows.SecurityAttributes + sa.Length = uint32(unsafe.Sizeof(sa)) + sa.InheritHandle = 1 + sa.SecurityDescriptor = uintptr(unsafe.Pointer(&sd[0])) + h, err := windows.CreateEvent(&sa, 0, 0, ev) + if h == 0 || err != nil { + logrus.Errorf("failed to create debug stackdump event %s: %s", event, err.Error()) + return + } + go func() { + logrus.Debugf("Stackdump - waiting signal at %s", event) + for { + windows.WaitForSingleObject(h, windows.INFINITE) + dump <- new(fakeSignal) + } + }() +} + +// serve serves the ttrpc API over a unix socket at the provided path +// this function does not block +func serveListener(path string) (net.Listener, error) { + if path == "" { + return nil, errors.New("'socket' must be npipe path") + } + l, err := winio.ListenPipe(path, nil) + if err != nil { + return nil, err + } + logrus.WithField("socket", path).Debug("serving api on npipe socket") + return l, nil +} + +func handleSignals(logger *logrus.Entry, signals chan os.Signal) error { + logger.Info("starting signal loop") + for { + select { + case s := <-signals: + switch s { + case os.Interrupt: + break + } + } + } +} + +var _ = (io.WriterTo)(&blockingBuffer{}) +var _ = (io.Writer)(&blockingBuffer{}) + +// blockingBuffer implements the `io.Writer` and `io.WriterTo` interfaces. Once +// `capacity` is reached the calls to `Write` will block until a successful call +// to `WriterTo` frees up the buffer space. +// +// Note: This has the same threadding semantics as bytes.Buffer with no +// additional locking so multithreading is not supported. +type blockingBuffer struct { + c *sync.Cond + + capacity int + + buffer bytes.Buffer +} + +func newBlockingBuffer(capacity int) *blockingBuffer { + return &blockingBuffer{ + c: sync.NewCond(&sync.Mutex{}), + capacity: capacity, + } +} + +func (bb *blockingBuffer) Len() int { + bb.c.L.Lock() + defer bb.c.L.Unlock() + return bb.buffer.Len() +} + +func (bb *blockingBuffer) Write(p []byte) (int, error) { + if len(p) > bb.capacity { + return 0, errors.Errorf("len(p) (%d) too large for capacity (%d)", len(p), bb.capacity) + } + + bb.c.L.Lock() + for bb.buffer.Len()+len(p) > bb.capacity { + bb.c.Wait() + } + defer bb.c.L.Unlock() + return bb.buffer.Write(p) +} + +func (bb *blockingBuffer) WriteTo(w io.Writer) (int64, error) { + bb.c.L.Lock() + defer bb.c.L.Unlock() + defer bb.c.Signal() + return bb.buffer.WriteTo(w) +} + +// deferredShimWriteLogger exists to solve the upstream loggin issue presented +// by using Windows Named Pipes for logging. When containerd restarts it tries +// to reconnect to any shims. This means that the connection to the logger will +// be severed but when containerd starts up it should reconnect and start +// logging again. We abstract all of this logic behind what looks like a simple +// `io.Writer` that can reconnect in the lifetime and buffers logs while +// disconnected. +type deferredShimWriteLogger struct { + mu sync.Mutex + + ctx context.Context + + connected bool + aborted bool + + buffer *blockingBuffer + + l net.Listener + c net.Conn + conerr error +} + +// beginAccept issues an accept to wait for a connection. Once a connection +// occurs drains any outstanding buffer. While draining the buffer any writes +// are blocked. If the buffer fails to fully drain due to a connection drop a +// call to `beginAccept` is re-issued waiting for another connection from +// containerd. +func (dswl *deferredShimWriteLogger) beginAccept() { + dswl.mu.Lock() + if dswl.connected { + return + } + dswl.mu.Unlock() + + c, err := dswl.l.Accept() + if err == errorConnectionAborted { + dswl.mu.Lock() + dswl.aborted = true + dswl.l.Close() + dswl.conerr = errors.New("connection closed") + dswl.mu.Unlock() + return + } + dswl.mu.Lock() + dswl.connected = true + dswl.c = c + + // Drain the buffer + if dswl.buffer.Len() > 0 { + _, err := dswl.buffer.WriteTo(dswl.c) + if err != nil { + // We lost our connection draining the buffer. + dswl.connected = false + dswl.c.Close() + go dswl.beginAccept() + } + } + dswl.mu.Unlock() +} + +func (dswl *deferredShimWriteLogger) Write(p []byte) (int, error) { + dswl.mu.Lock() + defer dswl.mu.Unlock() + + if dswl.aborted { + return 0, dswl.conerr + } + + if dswl.connected { + // We have a connection. beginAccept would have drained the buffer so we just write our data to + // the connection directly. + written, err := dswl.c.Write(p) + if err != nil { + // We lost the connection. + dswl.connected = false + dswl.c.Close() + go dswl.beginAccept() + + // We weren't able to write the full `p` bytes. Buffer the rest + if written != len(p) { + w, err := dswl.buffer.Write(p[written:]) + if err != nil { + // We failed to buffer. Return this error + return written + w, err + } + written += w + } + } + + return written, nil + } + + // We are disconnected. Buffer the contents. + return dswl.buffer.Write(p) +} + +// openLog on Windows acts as the server of the log pipe. This allows the +// containerd daemon to independently restart and reconnect to the logs. +func openLog(ctx context.Context, id string) (io.Writer, error) { + ns, err := namespaces.NamespaceRequired(ctx) + if err != nil { + return nil, err + } + + dswl := &deferredShimWriteLogger{ + ctx: ctx, + buffer: newBlockingBuffer(64 * 1024), // 64KB, + } + l, err := winio.ListenPipe(fmt.Sprintf("\\\\.\\pipe\\containerd-shim-%s-%s-log", ns, id), nil) + if err != nil { + return nil, err + } + dswl.l = l + go dswl.beginAccept() + return dswl, nil +} + +func (l *remoteEventsPublisher) Publish(ctx context.Context, topic string, event events.Event) error { + ns, _ := namespaces.Namespace(ctx) + encoded, err := typeurl.MarshalAny(event) + if err != nil { + return err + } + data, err := encoded.Marshal() + if err != nil { + return err + } + cmd := exec.CommandContext(ctx, l.containerdBinaryPath, "--address", l.address, "publish", "--topic", topic, "--namespace", ns) + cmd.Stdin = bytes.NewReader(data) + return cmd.Run() +} diff --git a/vendor/github.com/containerd/containerd/runtime/v2/shim/util.go b/vendor/github.com/containerd/containerd/runtime/v2/shim/util.go new file mode 100644 index 000000000..b7034ce50 --- /dev/null +++ b/vendor/github.com/containerd/containerd/runtime/v2/shim/util.go @@ -0,0 +1,128 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package shim + +import ( + "context" + "fmt" + "net" + "os" + "os/exec" + "path/filepath" + "strings" + "time" + + "github.com/containerd/containerd/namespaces" + "github.com/pkg/errors" +) + +const shimBinaryFormat = "containerd-shim-%s-%s" + +// Command returns the shim command with the provided args and configuration +func Command(ctx context.Context, runtime, containerdAddress, path string, cmdArgs ...string) (*exec.Cmd, error) { + ns, err := namespaces.NamespaceRequired(ctx) + if err != nil { + return nil, err + } + self, err := os.Executable() + if err != nil { + return nil, err + } + args := []string{ + "-namespace", ns, + "-address", containerdAddress, + "-publish-binary", self, + } + args = append(args, cmdArgs...) + name := BinaryName(runtime) + if name == "" { + return nil, fmt.Errorf("invalid runtime name %s, correct runtime name should format like io.containerd.runc.v1", runtime) + } + var cmdPath string + var lerr error + if cmdPath, lerr = exec.LookPath(name); lerr != nil { + if eerr, ok := lerr.(*exec.Error); ok { + if eerr.Err == exec.ErrNotFound { + return nil, errors.Wrapf(os.ErrNotExist, "runtime %q binary not installed %q", runtime, name) + } + } + } + cmdPath, err = filepath.Abs(cmdPath) + if err != nil { + return nil, err + } + cmd := exec.Command(cmdPath, args...) + cmd.Dir = path + cmd.Env = append(os.Environ(), "GOMAXPROCS=2") + cmd.SysProcAttr = getSysProcAttr() + return cmd, nil +} + +// BinaryName returns the shim binary name from the runtime name, +// empty string returns means runtime name is invalid +func BinaryName(runtime string) string { + // runtime name should format like $prefix.name.version + parts := strings.Split(runtime, ".") + if len(parts) < 2 { + return "" + } + + return fmt.Sprintf(shimBinaryFormat, parts[len(parts)-2], parts[len(parts)-1]) +} + +// Connect to the provided address +func Connect(address string, d func(string, time.Duration) (net.Conn, error)) (net.Conn, error) { + return d(address, 100*time.Second) +} + +// WritePidFile writes a pid file atomically +func WritePidFile(path string, pid int) error { + path, err := filepath.Abs(path) + if err != nil { + return err + } + tempPath := filepath.Join(filepath.Dir(path), fmt.Sprintf(".%s", filepath.Base(path))) + f, err := os.OpenFile(tempPath, os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, 0666) + if err != nil { + return err + } + _, err = fmt.Fprintf(f, "%d", pid) + f.Close() + if err != nil { + return err + } + return os.Rename(tempPath, path) +} + +// WriteAddress writes a address file atomically +func WriteAddress(path, address string) error { + path, err := filepath.Abs(path) + if err != nil { + return err + } + tempPath := filepath.Join(filepath.Dir(path), fmt.Sprintf(".%s", filepath.Base(path))) + f, err := os.OpenFile(tempPath, os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, 0666) + if err != nil { + return err + } + _, err = f.WriteString(address) + f.Close() + if err != nil { + return err + } + return os.Rename(tempPath, path) +} diff --git a/vendor/github.com/containerd/containerd/runtime/v2/shim/util_unix.go b/vendor/github.com/containerd/containerd/runtime/v2/shim/util_unix.go new file mode 100644 index 000000000..262fe2b36 --- /dev/null +++ b/vendor/github.com/containerd/containerd/runtime/v2/shim/util_unix.go @@ -0,0 +1,70 @@ +// +build !windows + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package shim + +import ( + "context" + "net" + "path/filepath" + "strings" + "syscall" + "time" + + "github.com/containerd/containerd/namespaces" + "github.com/containerd/containerd/sys" + "github.com/pkg/errors" +) + +func getSysProcAttr() *syscall.SysProcAttr { + return &syscall.SysProcAttr{ + Setpgid: true, + } +} + +// SetScore sets the oom score for a process +func SetScore(pid int) error { + return sys.SetOOMScore(pid, sys.OOMScoreMaxKillable) +} + +// SocketAddress returns an abstract socket address +func SocketAddress(ctx context.Context, id string) (string, error) { + ns, err := namespaces.NamespaceRequired(ctx) + if err != nil { + return "", err + } + return filepath.Join(string(filepath.Separator), "containerd-shim", ns, id, "shim.sock"), nil +} + +// AnonDialer returns a dialer for an abstract socket +func AnonDialer(address string, timeout time.Duration) (net.Conn, error) { + address = strings.TrimPrefix(address, "unix://") + return net.DialTimeout("unix", "\x00"+address, timeout) +} + +// NewSocket returns a new socket +func NewSocket(address string) (*net.UnixListener, error) { + if len(address) > 106 { + return nil, errors.Errorf("%q: unix socket path too long (> 106)", address) + } + l, err := net.Listen("unix", "\x00"+address) + if err != nil { + return nil, errors.Wrapf(err, "failed to listen to abstract unix socket %q", address) + } + return l.(*net.UnixListener), nil +} diff --git a/vendor/github.com/containerd/containerd/runtime/v2/shim/util_windows.go b/vendor/github.com/containerd/containerd/runtime/v2/shim/util_windows.go new file mode 100644 index 000000000..986fc754b --- /dev/null +++ b/vendor/github.com/containerd/containerd/runtime/v2/shim/util_windows.go @@ -0,0 +1,90 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package shim + +import ( + "context" + "fmt" + "net" + "os" + "syscall" + "time" + + winio "github.com/Microsoft/go-winio" + "github.com/containerd/containerd/namespaces" + "github.com/pkg/errors" +) + +func getSysProcAttr() *syscall.SysProcAttr { + return nil +} + +// SetScore sets the oom score for a process +func SetScore(pid int) error { + return nil +} + +// SocketAddress returns a npipe address +func SocketAddress(ctx context.Context, id string) (string, error) { + ns, err := namespaces.NamespaceRequired(ctx) + if err != nil { + return "", err + } + return fmt.Sprintf("\\\\.\\pipe\\containerd-shim-%s-%s-pipe", ns, id), nil +} + +// AnonDialer returns a dialer for a npipe +func AnonDialer(address string, timeout time.Duration) (net.Conn, error) { + var c net.Conn + var lastError error + timedOutError := errors.Errorf("timed out waiting for npipe %s", address) + start := time.Now() + for { + remaining := timeout - time.Now().Sub(start) + if remaining <= 0 { + lastError = timedOutError + break + } + c, lastError = winio.DialPipe(address, &remaining) + if lastError == nil { + break + } + if !os.IsNotExist(lastError) { + break + } + // There is nobody serving the pipe. We limit the timeout for this case + // to 5 seconds because any shim that would serve this endpoint should + // serve it within 5 seconds. We use the passed in timeout for the + // `DialPipe` timeout if the pipe exists however to give the pipe time + // to `Accept` the connection. + if time.Now().Sub(start) >= 5*time.Second { + lastError = timedOutError + break + } + time.Sleep(10 * time.Millisecond) + } + return c, lastError +} + +// NewSocket returns a new npipe listener +func NewSocket(address string) (net.Listener, error) { + l, err := winio.ListenPipe(address, nil) + if err != nil { + return nil, errors.Wrapf(err, "failed to listen to npipe %s", address) + } + return l, nil +} diff --git a/vendor/github.com/containerd/containerd/runtime/v2/task/doc.go b/vendor/github.com/containerd/containerd/runtime/v2/task/doc.go new file mode 100644 index 000000000..f933dd8d4 --- /dev/null +++ b/vendor/github.com/containerd/containerd/runtime/v2/task/doc.go @@ -0,0 +1,17 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package task diff --git a/vendor/github.com/containerd/containerd/runtime/v2/task/shim.pb.go b/vendor/github.com/containerd/containerd/runtime/v2/task/shim.pb.go new file mode 100644 index 000000000..9caefd758 --- /dev/null +++ b/vendor/github.com/containerd/containerd/runtime/v2/task/shim.pb.go @@ -0,0 +1,5693 @@ +// Code generated by protoc-gen-gogo. DO NOT EDIT. +// source: github.com/containerd/containerd/runtime/v2/task/shim.proto + +/* + Package task is a generated protocol buffer package. + + It is generated from these files: + github.com/containerd/containerd/runtime/v2/task/shim.proto + + It has these top-level messages: + CreateTaskRequest + CreateTaskResponse + DeleteRequest + DeleteResponse + ExecProcessRequest + ExecProcessResponse + ResizePtyRequest + StateRequest + StateResponse + KillRequest + CloseIORequest + PidsRequest + PidsResponse + CheckpointTaskRequest + UpdateTaskRequest + StartRequest + StartResponse + WaitRequest + WaitResponse + StatsRequest + StatsResponse + ConnectRequest + ConnectResponse + ShutdownRequest + PauseRequest + ResumeRequest +*/ +package task + +import proto "github.com/gogo/protobuf/proto" +import fmt "fmt" +import math "math" +import google_protobuf "github.com/gogo/protobuf/types" +import google_protobuf1 "github.com/gogo/protobuf/types" + +// skipping weak import gogoproto "github.com/gogo/protobuf/gogoproto" +import _ "github.com/gogo/protobuf/types" +import containerd_types "github.com/containerd/containerd/api/types" +import containerd_v1_types "github.com/containerd/containerd/api/types/task" + +import time "time" + +import types "github.com/gogo/protobuf/types" + +import strings "strings" +import reflect "reflect" + +import context "context" +import ttrpc "github.com/containerd/ttrpc" + +import io "io" + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf +var _ = time.Kitchen + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package + +type CreateTaskRequest struct { + ID string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` + Bundle string `protobuf:"bytes,2,opt,name=bundle,proto3" json:"bundle,omitempty"` + Rootfs []*containerd_types.Mount `protobuf:"bytes,3,rep,name=rootfs" json:"rootfs,omitempty"` + Terminal bool `protobuf:"varint,4,opt,name=terminal,proto3" json:"terminal,omitempty"` + Stdin string `protobuf:"bytes,5,opt,name=stdin,proto3" json:"stdin,omitempty"` + Stdout string `protobuf:"bytes,6,opt,name=stdout,proto3" json:"stdout,omitempty"` + Stderr string `protobuf:"bytes,7,opt,name=stderr,proto3" json:"stderr,omitempty"` + Checkpoint string `protobuf:"bytes,8,opt,name=checkpoint,proto3" json:"checkpoint,omitempty"` + ParentCheckpoint string `protobuf:"bytes,9,opt,name=parent_checkpoint,json=parentCheckpoint,proto3" json:"parent_checkpoint,omitempty"` + Options *google_protobuf.Any `protobuf:"bytes,10,opt,name=options" json:"options,omitempty"` +} + +func (m *CreateTaskRequest) Reset() { *m = CreateTaskRequest{} } +func (*CreateTaskRequest) ProtoMessage() {} +func (*CreateTaskRequest) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{0} } + +type CreateTaskResponse struct { + Pid uint32 `protobuf:"varint,1,opt,name=pid,proto3" json:"pid,omitempty"` +} + +func (m *CreateTaskResponse) Reset() { *m = CreateTaskResponse{} } +func (*CreateTaskResponse) ProtoMessage() {} +func (*CreateTaskResponse) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{1} } + +type DeleteRequest struct { + ID string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` + ExecID string `protobuf:"bytes,2,opt,name=exec_id,json=execId,proto3" json:"exec_id,omitempty"` +} + +func (m *DeleteRequest) Reset() { *m = DeleteRequest{} } +func (*DeleteRequest) ProtoMessage() {} +func (*DeleteRequest) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{2} } + +type DeleteResponse struct { + Pid uint32 `protobuf:"varint,1,opt,name=pid,proto3" json:"pid,omitempty"` + ExitStatus uint32 `protobuf:"varint,2,opt,name=exit_status,json=exitStatus,proto3" json:"exit_status,omitempty"` + ExitedAt time.Time `protobuf:"bytes,3,opt,name=exited_at,json=exitedAt,stdtime" json:"exited_at"` +} + +func (m *DeleteResponse) Reset() { *m = DeleteResponse{} } +func (*DeleteResponse) ProtoMessage() {} +func (*DeleteResponse) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{3} } + +type ExecProcessRequest struct { + ID string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` + ExecID string `protobuf:"bytes,2,opt,name=exec_id,json=execId,proto3" json:"exec_id,omitempty"` + Terminal bool `protobuf:"varint,3,opt,name=terminal,proto3" json:"terminal,omitempty"` + Stdin string `protobuf:"bytes,4,opt,name=stdin,proto3" json:"stdin,omitempty"` + Stdout string `protobuf:"bytes,5,opt,name=stdout,proto3" json:"stdout,omitempty"` + Stderr string `protobuf:"bytes,6,opt,name=stderr,proto3" json:"stderr,omitempty"` + Spec *google_protobuf.Any `protobuf:"bytes,7,opt,name=spec" json:"spec,omitempty"` +} + +func (m *ExecProcessRequest) Reset() { *m = ExecProcessRequest{} } +func (*ExecProcessRequest) ProtoMessage() {} +func (*ExecProcessRequest) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{4} } + +type ExecProcessResponse struct { +} + +func (m *ExecProcessResponse) Reset() { *m = ExecProcessResponse{} } +func (*ExecProcessResponse) ProtoMessage() {} +func (*ExecProcessResponse) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{5} } + +type ResizePtyRequest struct { + ID string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` + ExecID string `protobuf:"bytes,2,opt,name=exec_id,json=execId,proto3" json:"exec_id,omitempty"` + Width uint32 `protobuf:"varint,3,opt,name=width,proto3" json:"width,omitempty"` + Height uint32 `protobuf:"varint,4,opt,name=height,proto3" json:"height,omitempty"` +} + +func (m *ResizePtyRequest) Reset() { *m = ResizePtyRequest{} } +func (*ResizePtyRequest) ProtoMessage() {} +func (*ResizePtyRequest) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{6} } + +type StateRequest struct { + ID string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` + ExecID string `protobuf:"bytes,2,opt,name=exec_id,json=execId,proto3" json:"exec_id,omitempty"` +} + +func (m *StateRequest) Reset() { *m = StateRequest{} } +func (*StateRequest) ProtoMessage() {} +func (*StateRequest) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{7} } + +type StateResponse struct { + ID string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` + Bundle string `protobuf:"bytes,2,opt,name=bundle,proto3" json:"bundle,omitempty"` + Pid uint32 `protobuf:"varint,3,opt,name=pid,proto3" json:"pid,omitempty"` + Status containerd_v1_types.Status `protobuf:"varint,4,opt,name=status,proto3,enum=containerd.v1.types.Status" json:"status,omitempty"` + Stdin string `protobuf:"bytes,5,opt,name=stdin,proto3" json:"stdin,omitempty"` + Stdout string `protobuf:"bytes,6,opt,name=stdout,proto3" json:"stdout,omitempty"` + Stderr string `protobuf:"bytes,7,opt,name=stderr,proto3" json:"stderr,omitempty"` + Terminal bool `protobuf:"varint,8,opt,name=terminal,proto3" json:"terminal,omitempty"` + ExitStatus uint32 `protobuf:"varint,9,opt,name=exit_status,json=exitStatus,proto3" json:"exit_status,omitempty"` + ExitedAt time.Time `protobuf:"bytes,10,opt,name=exited_at,json=exitedAt,stdtime" json:"exited_at"` +} + +func (m *StateResponse) Reset() { *m = StateResponse{} } +func (*StateResponse) ProtoMessage() {} +func (*StateResponse) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{8} } + +type KillRequest struct { + ID string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` + ExecID string `protobuf:"bytes,2,opt,name=exec_id,json=execId,proto3" json:"exec_id,omitempty"` + Signal uint32 `protobuf:"varint,3,opt,name=signal,proto3" json:"signal,omitempty"` + All bool `protobuf:"varint,4,opt,name=all,proto3" json:"all,omitempty"` +} + +func (m *KillRequest) Reset() { *m = KillRequest{} } +func (*KillRequest) ProtoMessage() {} +func (*KillRequest) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{9} } + +type CloseIORequest struct { + ID string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` + ExecID string `protobuf:"bytes,2,opt,name=exec_id,json=execId,proto3" json:"exec_id,omitempty"` + Stdin bool `protobuf:"varint,3,opt,name=stdin,proto3" json:"stdin,omitempty"` +} + +func (m *CloseIORequest) Reset() { *m = CloseIORequest{} } +func (*CloseIORequest) ProtoMessage() {} +func (*CloseIORequest) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{10} } + +type PidsRequest struct { + ID string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` +} + +func (m *PidsRequest) Reset() { *m = PidsRequest{} } +func (*PidsRequest) ProtoMessage() {} +func (*PidsRequest) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{11} } + +type PidsResponse struct { + Processes []*containerd_v1_types.ProcessInfo `protobuf:"bytes,1,rep,name=processes" json:"processes,omitempty"` +} + +func (m *PidsResponse) Reset() { *m = PidsResponse{} } +func (*PidsResponse) ProtoMessage() {} +func (*PidsResponse) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{12} } + +type CheckpointTaskRequest struct { + ID string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` + Path string `protobuf:"bytes,2,opt,name=path,proto3" json:"path,omitempty"` + Options *google_protobuf.Any `protobuf:"bytes,3,opt,name=options" json:"options,omitempty"` +} + +func (m *CheckpointTaskRequest) Reset() { *m = CheckpointTaskRequest{} } +func (*CheckpointTaskRequest) ProtoMessage() {} +func (*CheckpointTaskRequest) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{13} } + +type UpdateTaskRequest struct { + ID string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` + Resources *google_protobuf.Any `protobuf:"bytes,2,opt,name=resources" json:"resources,omitempty"` +} + +func (m *UpdateTaskRequest) Reset() { *m = UpdateTaskRequest{} } +func (*UpdateTaskRequest) ProtoMessage() {} +func (*UpdateTaskRequest) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{14} } + +type StartRequest struct { + ID string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` + ExecID string `protobuf:"bytes,2,opt,name=exec_id,json=execId,proto3" json:"exec_id,omitempty"` +} + +func (m *StartRequest) Reset() { *m = StartRequest{} } +func (*StartRequest) ProtoMessage() {} +func (*StartRequest) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{15} } + +type StartResponse struct { + Pid uint32 `protobuf:"varint,1,opt,name=pid,proto3" json:"pid,omitempty"` +} + +func (m *StartResponse) Reset() { *m = StartResponse{} } +func (*StartResponse) ProtoMessage() {} +func (*StartResponse) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{16} } + +type WaitRequest struct { + ID string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` + ExecID string `protobuf:"bytes,2,opt,name=exec_id,json=execId,proto3" json:"exec_id,omitempty"` +} + +func (m *WaitRequest) Reset() { *m = WaitRequest{} } +func (*WaitRequest) ProtoMessage() {} +func (*WaitRequest) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{17} } + +type WaitResponse struct { + ExitStatus uint32 `protobuf:"varint,1,opt,name=exit_status,json=exitStatus,proto3" json:"exit_status,omitempty"` + ExitedAt time.Time `protobuf:"bytes,2,opt,name=exited_at,json=exitedAt,stdtime" json:"exited_at"` +} + +func (m *WaitResponse) Reset() { *m = WaitResponse{} } +func (*WaitResponse) ProtoMessage() {} +func (*WaitResponse) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{18} } + +type StatsRequest struct { + ID string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` +} + +func (m *StatsRequest) Reset() { *m = StatsRequest{} } +func (*StatsRequest) ProtoMessage() {} +func (*StatsRequest) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{19} } + +type StatsResponse struct { + Stats *google_protobuf.Any `protobuf:"bytes,1,opt,name=stats" json:"stats,omitempty"` +} + +func (m *StatsResponse) Reset() { *m = StatsResponse{} } +func (*StatsResponse) ProtoMessage() {} +func (*StatsResponse) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{20} } + +type ConnectRequest struct { + ID string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` +} + +func (m *ConnectRequest) Reset() { *m = ConnectRequest{} } +func (*ConnectRequest) ProtoMessage() {} +func (*ConnectRequest) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{21} } + +type ConnectResponse struct { + ShimPid uint32 `protobuf:"varint,1,opt,name=shim_pid,json=shimPid,proto3" json:"shim_pid,omitempty"` + TaskPid uint32 `protobuf:"varint,2,opt,name=task_pid,json=taskPid,proto3" json:"task_pid,omitempty"` + Version string `protobuf:"bytes,3,opt,name=version,proto3" json:"version,omitempty"` +} + +func (m *ConnectResponse) Reset() { *m = ConnectResponse{} } +func (*ConnectResponse) ProtoMessage() {} +func (*ConnectResponse) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{22} } + +type ShutdownRequest struct { + ID string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` + Now bool `protobuf:"varint,2,opt,name=now,proto3" json:"now,omitempty"` +} + +func (m *ShutdownRequest) Reset() { *m = ShutdownRequest{} } +func (*ShutdownRequest) ProtoMessage() {} +func (*ShutdownRequest) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{23} } + +type PauseRequest struct { + ID string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` +} + +func (m *PauseRequest) Reset() { *m = PauseRequest{} } +func (*PauseRequest) ProtoMessage() {} +func (*PauseRequest) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{24} } + +type ResumeRequest struct { + ID string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` +} + +func (m *ResumeRequest) Reset() { *m = ResumeRequest{} } +func (*ResumeRequest) ProtoMessage() {} +func (*ResumeRequest) Descriptor() ([]byte, []int) { return fileDescriptorShim, []int{25} } + +func init() { + proto.RegisterType((*CreateTaskRequest)(nil), "containerd.task.v2.CreateTaskRequest") + proto.RegisterType((*CreateTaskResponse)(nil), "containerd.task.v2.CreateTaskResponse") + proto.RegisterType((*DeleteRequest)(nil), "containerd.task.v2.DeleteRequest") + proto.RegisterType((*DeleteResponse)(nil), "containerd.task.v2.DeleteResponse") + proto.RegisterType((*ExecProcessRequest)(nil), "containerd.task.v2.ExecProcessRequest") + proto.RegisterType((*ExecProcessResponse)(nil), "containerd.task.v2.ExecProcessResponse") + proto.RegisterType((*ResizePtyRequest)(nil), "containerd.task.v2.ResizePtyRequest") + proto.RegisterType((*StateRequest)(nil), "containerd.task.v2.StateRequest") + proto.RegisterType((*StateResponse)(nil), "containerd.task.v2.StateResponse") + proto.RegisterType((*KillRequest)(nil), "containerd.task.v2.KillRequest") + proto.RegisterType((*CloseIORequest)(nil), "containerd.task.v2.CloseIORequest") + proto.RegisterType((*PidsRequest)(nil), "containerd.task.v2.PidsRequest") + proto.RegisterType((*PidsResponse)(nil), "containerd.task.v2.PidsResponse") + proto.RegisterType((*CheckpointTaskRequest)(nil), "containerd.task.v2.CheckpointTaskRequest") + proto.RegisterType((*UpdateTaskRequest)(nil), "containerd.task.v2.UpdateTaskRequest") + proto.RegisterType((*StartRequest)(nil), "containerd.task.v2.StartRequest") + proto.RegisterType((*StartResponse)(nil), "containerd.task.v2.StartResponse") + proto.RegisterType((*WaitRequest)(nil), "containerd.task.v2.WaitRequest") + proto.RegisterType((*WaitResponse)(nil), "containerd.task.v2.WaitResponse") + proto.RegisterType((*StatsRequest)(nil), "containerd.task.v2.StatsRequest") + proto.RegisterType((*StatsResponse)(nil), "containerd.task.v2.StatsResponse") + proto.RegisterType((*ConnectRequest)(nil), "containerd.task.v2.ConnectRequest") + proto.RegisterType((*ConnectResponse)(nil), "containerd.task.v2.ConnectResponse") + proto.RegisterType((*ShutdownRequest)(nil), "containerd.task.v2.ShutdownRequest") + proto.RegisterType((*PauseRequest)(nil), "containerd.task.v2.PauseRequest") + proto.RegisterType((*ResumeRequest)(nil), "containerd.task.v2.ResumeRequest") +} +func (m *CreateTaskRequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *CreateTaskRequest) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.ID) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ID))) + i += copy(dAtA[i:], m.ID) + } + if len(m.Bundle) > 0 { + dAtA[i] = 0x12 + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.Bundle))) + i += copy(dAtA[i:], m.Bundle) + } + if len(m.Rootfs) > 0 { + for _, msg := range m.Rootfs { + dAtA[i] = 0x1a + i++ + i = encodeVarintShim(dAtA, i, uint64(msg.Size())) + n, err := msg.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n + } + } + if m.Terminal { + dAtA[i] = 0x20 + i++ + if m.Terminal { + dAtA[i] = 1 + } else { + dAtA[i] = 0 + } + i++ + } + if len(m.Stdin) > 0 { + dAtA[i] = 0x2a + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.Stdin))) + i += copy(dAtA[i:], m.Stdin) + } + if len(m.Stdout) > 0 { + dAtA[i] = 0x32 + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.Stdout))) + i += copy(dAtA[i:], m.Stdout) + } + if len(m.Stderr) > 0 { + dAtA[i] = 0x3a + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.Stderr))) + i += copy(dAtA[i:], m.Stderr) + } + if len(m.Checkpoint) > 0 { + dAtA[i] = 0x42 + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.Checkpoint))) + i += copy(dAtA[i:], m.Checkpoint) + } + if len(m.ParentCheckpoint) > 0 { + dAtA[i] = 0x4a + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ParentCheckpoint))) + i += copy(dAtA[i:], m.ParentCheckpoint) + } + if m.Options != nil { + dAtA[i] = 0x52 + i++ + i = encodeVarintShim(dAtA, i, uint64(m.Options.Size())) + n1, err := m.Options.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n1 + } + return i, nil +} + +func (m *CreateTaskResponse) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *CreateTaskResponse) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.Pid != 0 { + dAtA[i] = 0x8 + i++ + i = encodeVarintShim(dAtA, i, uint64(m.Pid)) + } + return i, nil +} + +func (m *DeleteRequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *DeleteRequest) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.ID) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ID))) + i += copy(dAtA[i:], m.ID) + } + if len(m.ExecID) > 0 { + dAtA[i] = 0x12 + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ExecID))) + i += copy(dAtA[i:], m.ExecID) + } + return i, nil +} + +func (m *DeleteResponse) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *DeleteResponse) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.Pid != 0 { + dAtA[i] = 0x8 + i++ + i = encodeVarintShim(dAtA, i, uint64(m.Pid)) + } + if m.ExitStatus != 0 { + dAtA[i] = 0x10 + i++ + i = encodeVarintShim(dAtA, i, uint64(m.ExitStatus)) + } + dAtA[i] = 0x1a + i++ + i = encodeVarintShim(dAtA, i, uint64(types.SizeOfStdTime(m.ExitedAt))) + n2, err := types.StdTimeMarshalTo(m.ExitedAt, dAtA[i:]) + if err != nil { + return 0, err + } + i += n2 + return i, nil +} + +func (m *ExecProcessRequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *ExecProcessRequest) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.ID) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ID))) + i += copy(dAtA[i:], m.ID) + } + if len(m.ExecID) > 0 { + dAtA[i] = 0x12 + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ExecID))) + i += copy(dAtA[i:], m.ExecID) + } + if m.Terminal { + dAtA[i] = 0x18 + i++ + if m.Terminal { + dAtA[i] = 1 + } else { + dAtA[i] = 0 + } + i++ + } + if len(m.Stdin) > 0 { + dAtA[i] = 0x22 + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.Stdin))) + i += copy(dAtA[i:], m.Stdin) + } + if len(m.Stdout) > 0 { + dAtA[i] = 0x2a + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.Stdout))) + i += copy(dAtA[i:], m.Stdout) + } + if len(m.Stderr) > 0 { + dAtA[i] = 0x32 + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.Stderr))) + i += copy(dAtA[i:], m.Stderr) + } + if m.Spec != nil { + dAtA[i] = 0x3a + i++ + i = encodeVarintShim(dAtA, i, uint64(m.Spec.Size())) + n3, err := m.Spec.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n3 + } + return i, nil +} + +func (m *ExecProcessResponse) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *ExecProcessResponse) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + return i, nil +} + +func (m *ResizePtyRequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *ResizePtyRequest) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.ID) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ID))) + i += copy(dAtA[i:], m.ID) + } + if len(m.ExecID) > 0 { + dAtA[i] = 0x12 + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ExecID))) + i += copy(dAtA[i:], m.ExecID) + } + if m.Width != 0 { + dAtA[i] = 0x18 + i++ + i = encodeVarintShim(dAtA, i, uint64(m.Width)) + } + if m.Height != 0 { + dAtA[i] = 0x20 + i++ + i = encodeVarintShim(dAtA, i, uint64(m.Height)) + } + return i, nil +} + +func (m *StateRequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *StateRequest) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.ID) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ID))) + i += copy(dAtA[i:], m.ID) + } + if len(m.ExecID) > 0 { + dAtA[i] = 0x12 + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ExecID))) + i += copy(dAtA[i:], m.ExecID) + } + return i, nil +} + +func (m *StateResponse) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *StateResponse) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.ID) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ID))) + i += copy(dAtA[i:], m.ID) + } + if len(m.Bundle) > 0 { + dAtA[i] = 0x12 + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.Bundle))) + i += copy(dAtA[i:], m.Bundle) + } + if m.Pid != 0 { + dAtA[i] = 0x18 + i++ + i = encodeVarintShim(dAtA, i, uint64(m.Pid)) + } + if m.Status != 0 { + dAtA[i] = 0x20 + i++ + i = encodeVarintShim(dAtA, i, uint64(m.Status)) + } + if len(m.Stdin) > 0 { + dAtA[i] = 0x2a + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.Stdin))) + i += copy(dAtA[i:], m.Stdin) + } + if len(m.Stdout) > 0 { + dAtA[i] = 0x32 + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.Stdout))) + i += copy(dAtA[i:], m.Stdout) + } + if len(m.Stderr) > 0 { + dAtA[i] = 0x3a + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.Stderr))) + i += copy(dAtA[i:], m.Stderr) + } + if m.Terminal { + dAtA[i] = 0x40 + i++ + if m.Terminal { + dAtA[i] = 1 + } else { + dAtA[i] = 0 + } + i++ + } + if m.ExitStatus != 0 { + dAtA[i] = 0x48 + i++ + i = encodeVarintShim(dAtA, i, uint64(m.ExitStatus)) + } + dAtA[i] = 0x52 + i++ + i = encodeVarintShim(dAtA, i, uint64(types.SizeOfStdTime(m.ExitedAt))) + n4, err := types.StdTimeMarshalTo(m.ExitedAt, dAtA[i:]) + if err != nil { + return 0, err + } + i += n4 + return i, nil +} + +func (m *KillRequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *KillRequest) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.ID) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ID))) + i += copy(dAtA[i:], m.ID) + } + if len(m.ExecID) > 0 { + dAtA[i] = 0x12 + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ExecID))) + i += copy(dAtA[i:], m.ExecID) + } + if m.Signal != 0 { + dAtA[i] = 0x18 + i++ + i = encodeVarintShim(dAtA, i, uint64(m.Signal)) + } + if m.All { + dAtA[i] = 0x20 + i++ + if m.All { + dAtA[i] = 1 + } else { + dAtA[i] = 0 + } + i++ + } + return i, nil +} + +func (m *CloseIORequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *CloseIORequest) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.ID) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ID))) + i += copy(dAtA[i:], m.ID) + } + if len(m.ExecID) > 0 { + dAtA[i] = 0x12 + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ExecID))) + i += copy(dAtA[i:], m.ExecID) + } + if m.Stdin { + dAtA[i] = 0x18 + i++ + if m.Stdin { + dAtA[i] = 1 + } else { + dAtA[i] = 0 + } + i++ + } + return i, nil +} + +func (m *PidsRequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *PidsRequest) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.ID) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ID))) + i += copy(dAtA[i:], m.ID) + } + return i, nil +} + +func (m *PidsResponse) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *PidsResponse) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.Processes) > 0 { + for _, msg := range m.Processes { + dAtA[i] = 0xa + i++ + i = encodeVarintShim(dAtA, i, uint64(msg.Size())) + n, err := msg.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n + } + } + return i, nil +} + +func (m *CheckpointTaskRequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *CheckpointTaskRequest) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.ID) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ID))) + i += copy(dAtA[i:], m.ID) + } + if len(m.Path) > 0 { + dAtA[i] = 0x12 + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.Path))) + i += copy(dAtA[i:], m.Path) + } + if m.Options != nil { + dAtA[i] = 0x1a + i++ + i = encodeVarintShim(dAtA, i, uint64(m.Options.Size())) + n5, err := m.Options.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n5 + } + return i, nil +} + +func (m *UpdateTaskRequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *UpdateTaskRequest) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.ID) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ID))) + i += copy(dAtA[i:], m.ID) + } + if m.Resources != nil { + dAtA[i] = 0x12 + i++ + i = encodeVarintShim(dAtA, i, uint64(m.Resources.Size())) + n6, err := m.Resources.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n6 + } + return i, nil +} + +func (m *StartRequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *StartRequest) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.ID) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ID))) + i += copy(dAtA[i:], m.ID) + } + if len(m.ExecID) > 0 { + dAtA[i] = 0x12 + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ExecID))) + i += copy(dAtA[i:], m.ExecID) + } + return i, nil +} + +func (m *StartResponse) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *StartResponse) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.Pid != 0 { + dAtA[i] = 0x8 + i++ + i = encodeVarintShim(dAtA, i, uint64(m.Pid)) + } + return i, nil +} + +func (m *WaitRequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *WaitRequest) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.ID) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ID))) + i += copy(dAtA[i:], m.ID) + } + if len(m.ExecID) > 0 { + dAtA[i] = 0x12 + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ExecID))) + i += copy(dAtA[i:], m.ExecID) + } + return i, nil +} + +func (m *WaitResponse) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *WaitResponse) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.ExitStatus != 0 { + dAtA[i] = 0x8 + i++ + i = encodeVarintShim(dAtA, i, uint64(m.ExitStatus)) + } + dAtA[i] = 0x12 + i++ + i = encodeVarintShim(dAtA, i, uint64(types.SizeOfStdTime(m.ExitedAt))) + n7, err := types.StdTimeMarshalTo(m.ExitedAt, dAtA[i:]) + if err != nil { + return 0, err + } + i += n7 + return i, nil +} + +func (m *StatsRequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *StatsRequest) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.ID) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ID))) + i += copy(dAtA[i:], m.ID) + } + return i, nil +} + +func (m *StatsResponse) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *StatsResponse) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.Stats != nil { + dAtA[i] = 0xa + i++ + i = encodeVarintShim(dAtA, i, uint64(m.Stats.Size())) + n8, err := m.Stats.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n8 + } + return i, nil +} + +func (m *ConnectRequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *ConnectRequest) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.ID) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ID))) + i += copy(dAtA[i:], m.ID) + } + return i, nil +} + +func (m *ConnectResponse) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *ConnectResponse) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.ShimPid != 0 { + dAtA[i] = 0x8 + i++ + i = encodeVarintShim(dAtA, i, uint64(m.ShimPid)) + } + if m.TaskPid != 0 { + dAtA[i] = 0x10 + i++ + i = encodeVarintShim(dAtA, i, uint64(m.TaskPid)) + } + if len(m.Version) > 0 { + dAtA[i] = 0x1a + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.Version))) + i += copy(dAtA[i:], m.Version) + } + return i, nil +} + +func (m *ShutdownRequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *ShutdownRequest) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.ID) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ID))) + i += copy(dAtA[i:], m.ID) + } + if m.Now { + dAtA[i] = 0x10 + i++ + if m.Now { + dAtA[i] = 1 + } else { + dAtA[i] = 0 + } + i++ + } + return i, nil +} + +func (m *PauseRequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *PauseRequest) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.ID) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ID))) + i += copy(dAtA[i:], m.ID) + } + return i, nil +} + +func (m *ResumeRequest) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *ResumeRequest) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.ID) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintShim(dAtA, i, uint64(len(m.ID))) + i += copy(dAtA[i:], m.ID) + } + return i, nil +} + +func encodeVarintShim(dAtA []byte, offset int, v uint64) int { + for v >= 1<<7 { + dAtA[offset] = uint8(v&0x7f | 0x80) + v >>= 7 + offset++ + } + dAtA[offset] = uint8(v) + return offset + 1 +} +func (m *CreateTaskRequest) Size() (n int) { + var l int + _ = l + l = len(m.ID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + l = len(m.Bundle) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + if len(m.Rootfs) > 0 { + for _, e := range m.Rootfs { + l = e.Size() + n += 1 + l + sovShim(uint64(l)) + } + } + if m.Terminal { + n += 2 + } + l = len(m.Stdin) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + l = len(m.Stdout) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + l = len(m.Stderr) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + l = len(m.Checkpoint) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + l = len(m.ParentCheckpoint) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + if m.Options != nil { + l = m.Options.Size() + n += 1 + l + sovShim(uint64(l)) + } + return n +} + +func (m *CreateTaskResponse) Size() (n int) { + var l int + _ = l + if m.Pid != 0 { + n += 1 + sovShim(uint64(m.Pid)) + } + return n +} + +func (m *DeleteRequest) Size() (n int) { + var l int + _ = l + l = len(m.ID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + l = len(m.ExecID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + return n +} + +func (m *DeleteResponse) Size() (n int) { + var l int + _ = l + if m.Pid != 0 { + n += 1 + sovShim(uint64(m.Pid)) + } + if m.ExitStatus != 0 { + n += 1 + sovShim(uint64(m.ExitStatus)) + } + l = types.SizeOfStdTime(m.ExitedAt) + n += 1 + l + sovShim(uint64(l)) + return n +} + +func (m *ExecProcessRequest) Size() (n int) { + var l int + _ = l + l = len(m.ID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + l = len(m.ExecID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + if m.Terminal { + n += 2 + } + l = len(m.Stdin) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + l = len(m.Stdout) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + l = len(m.Stderr) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + if m.Spec != nil { + l = m.Spec.Size() + n += 1 + l + sovShim(uint64(l)) + } + return n +} + +func (m *ExecProcessResponse) Size() (n int) { + var l int + _ = l + return n +} + +func (m *ResizePtyRequest) Size() (n int) { + var l int + _ = l + l = len(m.ID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + l = len(m.ExecID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + if m.Width != 0 { + n += 1 + sovShim(uint64(m.Width)) + } + if m.Height != 0 { + n += 1 + sovShim(uint64(m.Height)) + } + return n +} + +func (m *StateRequest) Size() (n int) { + var l int + _ = l + l = len(m.ID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + l = len(m.ExecID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + return n +} + +func (m *StateResponse) Size() (n int) { + var l int + _ = l + l = len(m.ID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + l = len(m.Bundle) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + if m.Pid != 0 { + n += 1 + sovShim(uint64(m.Pid)) + } + if m.Status != 0 { + n += 1 + sovShim(uint64(m.Status)) + } + l = len(m.Stdin) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + l = len(m.Stdout) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + l = len(m.Stderr) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + if m.Terminal { + n += 2 + } + if m.ExitStatus != 0 { + n += 1 + sovShim(uint64(m.ExitStatus)) + } + l = types.SizeOfStdTime(m.ExitedAt) + n += 1 + l + sovShim(uint64(l)) + return n +} + +func (m *KillRequest) Size() (n int) { + var l int + _ = l + l = len(m.ID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + l = len(m.ExecID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + if m.Signal != 0 { + n += 1 + sovShim(uint64(m.Signal)) + } + if m.All { + n += 2 + } + return n +} + +func (m *CloseIORequest) Size() (n int) { + var l int + _ = l + l = len(m.ID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + l = len(m.ExecID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + if m.Stdin { + n += 2 + } + return n +} + +func (m *PidsRequest) Size() (n int) { + var l int + _ = l + l = len(m.ID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + return n +} + +func (m *PidsResponse) Size() (n int) { + var l int + _ = l + if len(m.Processes) > 0 { + for _, e := range m.Processes { + l = e.Size() + n += 1 + l + sovShim(uint64(l)) + } + } + return n +} + +func (m *CheckpointTaskRequest) Size() (n int) { + var l int + _ = l + l = len(m.ID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + l = len(m.Path) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + if m.Options != nil { + l = m.Options.Size() + n += 1 + l + sovShim(uint64(l)) + } + return n +} + +func (m *UpdateTaskRequest) Size() (n int) { + var l int + _ = l + l = len(m.ID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + if m.Resources != nil { + l = m.Resources.Size() + n += 1 + l + sovShim(uint64(l)) + } + return n +} + +func (m *StartRequest) Size() (n int) { + var l int + _ = l + l = len(m.ID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + l = len(m.ExecID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + return n +} + +func (m *StartResponse) Size() (n int) { + var l int + _ = l + if m.Pid != 0 { + n += 1 + sovShim(uint64(m.Pid)) + } + return n +} + +func (m *WaitRequest) Size() (n int) { + var l int + _ = l + l = len(m.ID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + l = len(m.ExecID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + return n +} + +func (m *WaitResponse) Size() (n int) { + var l int + _ = l + if m.ExitStatus != 0 { + n += 1 + sovShim(uint64(m.ExitStatus)) + } + l = types.SizeOfStdTime(m.ExitedAt) + n += 1 + l + sovShim(uint64(l)) + return n +} + +func (m *StatsRequest) Size() (n int) { + var l int + _ = l + l = len(m.ID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + return n +} + +func (m *StatsResponse) Size() (n int) { + var l int + _ = l + if m.Stats != nil { + l = m.Stats.Size() + n += 1 + l + sovShim(uint64(l)) + } + return n +} + +func (m *ConnectRequest) Size() (n int) { + var l int + _ = l + l = len(m.ID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + return n +} + +func (m *ConnectResponse) Size() (n int) { + var l int + _ = l + if m.ShimPid != 0 { + n += 1 + sovShim(uint64(m.ShimPid)) + } + if m.TaskPid != 0 { + n += 1 + sovShim(uint64(m.TaskPid)) + } + l = len(m.Version) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + return n +} + +func (m *ShutdownRequest) Size() (n int) { + var l int + _ = l + l = len(m.ID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + if m.Now { + n += 2 + } + return n +} + +func (m *PauseRequest) Size() (n int) { + var l int + _ = l + l = len(m.ID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + return n +} + +func (m *ResumeRequest) Size() (n int) { + var l int + _ = l + l = len(m.ID) + if l > 0 { + n += 1 + l + sovShim(uint64(l)) + } + return n +} + +func sovShim(x uint64) (n int) { + for { + n++ + x >>= 7 + if x == 0 { + break + } + } + return n +} +func sozShim(x uint64) (n int) { + return sovShim(uint64((x << 1) ^ uint64((int64(x) >> 63)))) +} +func (this *CreateTaskRequest) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&CreateTaskRequest{`, + `ID:` + fmt.Sprintf("%v", this.ID) + `,`, + `Bundle:` + fmt.Sprintf("%v", this.Bundle) + `,`, + `Rootfs:` + strings.Replace(fmt.Sprintf("%v", this.Rootfs), "Mount", "containerd_types.Mount", 1) + `,`, + `Terminal:` + fmt.Sprintf("%v", this.Terminal) + `,`, + `Stdin:` + fmt.Sprintf("%v", this.Stdin) + `,`, + `Stdout:` + fmt.Sprintf("%v", this.Stdout) + `,`, + `Stderr:` + fmt.Sprintf("%v", this.Stderr) + `,`, + `Checkpoint:` + fmt.Sprintf("%v", this.Checkpoint) + `,`, + `ParentCheckpoint:` + fmt.Sprintf("%v", this.ParentCheckpoint) + `,`, + `Options:` + strings.Replace(fmt.Sprintf("%v", this.Options), "Any", "google_protobuf.Any", 1) + `,`, + `}`, + }, "") + return s +} +func (this *CreateTaskResponse) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&CreateTaskResponse{`, + `Pid:` + fmt.Sprintf("%v", this.Pid) + `,`, + `}`, + }, "") + return s +} +func (this *DeleteRequest) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&DeleteRequest{`, + `ID:` + fmt.Sprintf("%v", this.ID) + `,`, + `ExecID:` + fmt.Sprintf("%v", this.ExecID) + `,`, + `}`, + }, "") + return s +} +func (this *DeleteResponse) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&DeleteResponse{`, + `Pid:` + fmt.Sprintf("%v", this.Pid) + `,`, + `ExitStatus:` + fmt.Sprintf("%v", this.ExitStatus) + `,`, + `ExitedAt:` + strings.Replace(strings.Replace(this.ExitedAt.String(), "Timestamp", "google_protobuf3.Timestamp", 1), `&`, ``, 1) + `,`, + `}`, + }, "") + return s +} +func (this *ExecProcessRequest) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&ExecProcessRequest{`, + `ID:` + fmt.Sprintf("%v", this.ID) + `,`, + `ExecID:` + fmt.Sprintf("%v", this.ExecID) + `,`, + `Terminal:` + fmt.Sprintf("%v", this.Terminal) + `,`, + `Stdin:` + fmt.Sprintf("%v", this.Stdin) + `,`, + `Stdout:` + fmt.Sprintf("%v", this.Stdout) + `,`, + `Stderr:` + fmt.Sprintf("%v", this.Stderr) + `,`, + `Spec:` + strings.Replace(fmt.Sprintf("%v", this.Spec), "Any", "google_protobuf.Any", 1) + `,`, + `}`, + }, "") + return s +} +func (this *ExecProcessResponse) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&ExecProcessResponse{`, + `}`, + }, "") + return s +} +func (this *ResizePtyRequest) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&ResizePtyRequest{`, + `ID:` + fmt.Sprintf("%v", this.ID) + `,`, + `ExecID:` + fmt.Sprintf("%v", this.ExecID) + `,`, + `Width:` + fmt.Sprintf("%v", this.Width) + `,`, + `Height:` + fmt.Sprintf("%v", this.Height) + `,`, + `}`, + }, "") + return s +} +func (this *StateRequest) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&StateRequest{`, + `ID:` + fmt.Sprintf("%v", this.ID) + `,`, + `ExecID:` + fmt.Sprintf("%v", this.ExecID) + `,`, + `}`, + }, "") + return s +} +func (this *StateResponse) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&StateResponse{`, + `ID:` + fmt.Sprintf("%v", this.ID) + `,`, + `Bundle:` + fmt.Sprintf("%v", this.Bundle) + `,`, + `Pid:` + fmt.Sprintf("%v", this.Pid) + `,`, + `Status:` + fmt.Sprintf("%v", this.Status) + `,`, + `Stdin:` + fmt.Sprintf("%v", this.Stdin) + `,`, + `Stdout:` + fmt.Sprintf("%v", this.Stdout) + `,`, + `Stderr:` + fmt.Sprintf("%v", this.Stderr) + `,`, + `Terminal:` + fmt.Sprintf("%v", this.Terminal) + `,`, + `ExitStatus:` + fmt.Sprintf("%v", this.ExitStatus) + `,`, + `ExitedAt:` + strings.Replace(strings.Replace(this.ExitedAt.String(), "Timestamp", "google_protobuf3.Timestamp", 1), `&`, ``, 1) + `,`, + `}`, + }, "") + return s +} +func (this *KillRequest) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&KillRequest{`, + `ID:` + fmt.Sprintf("%v", this.ID) + `,`, + `ExecID:` + fmt.Sprintf("%v", this.ExecID) + `,`, + `Signal:` + fmt.Sprintf("%v", this.Signal) + `,`, + `All:` + fmt.Sprintf("%v", this.All) + `,`, + `}`, + }, "") + return s +} +func (this *CloseIORequest) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&CloseIORequest{`, + `ID:` + fmt.Sprintf("%v", this.ID) + `,`, + `ExecID:` + fmt.Sprintf("%v", this.ExecID) + `,`, + `Stdin:` + fmt.Sprintf("%v", this.Stdin) + `,`, + `}`, + }, "") + return s +} +func (this *PidsRequest) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&PidsRequest{`, + `ID:` + fmt.Sprintf("%v", this.ID) + `,`, + `}`, + }, "") + return s +} +func (this *PidsResponse) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&PidsResponse{`, + `Processes:` + strings.Replace(fmt.Sprintf("%v", this.Processes), "ProcessInfo", "containerd_v1_types.ProcessInfo", 1) + `,`, + `}`, + }, "") + return s +} +func (this *CheckpointTaskRequest) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&CheckpointTaskRequest{`, + `ID:` + fmt.Sprintf("%v", this.ID) + `,`, + `Path:` + fmt.Sprintf("%v", this.Path) + `,`, + `Options:` + strings.Replace(fmt.Sprintf("%v", this.Options), "Any", "google_protobuf.Any", 1) + `,`, + `}`, + }, "") + return s +} +func (this *UpdateTaskRequest) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&UpdateTaskRequest{`, + `ID:` + fmt.Sprintf("%v", this.ID) + `,`, + `Resources:` + strings.Replace(fmt.Sprintf("%v", this.Resources), "Any", "google_protobuf.Any", 1) + `,`, + `}`, + }, "") + return s +} +func (this *StartRequest) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&StartRequest{`, + `ID:` + fmt.Sprintf("%v", this.ID) + `,`, + `ExecID:` + fmt.Sprintf("%v", this.ExecID) + `,`, + `}`, + }, "") + return s +} +func (this *StartResponse) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&StartResponse{`, + `Pid:` + fmt.Sprintf("%v", this.Pid) + `,`, + `}`, + }, "") + return s +} +func (this *WaitRequest) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&WaitRequest{`, + `ID:` + fmt.Sprintf("%v", this.ID) + `,`, + `ExecID:` + fmt.Sprintf("%v", this.ExecID) + `,`, + `}`, + }, "") + return s +} +func (this *WaitResponse) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&WaitResponse{`, + `ExitStatus:` + fmt.Sprintf("%v", this.ExitStatus) + `,`, + `ExitedAt:` + strings.Replace(strings.Replace(this.ExitedAt.String(), "Timestamp", "google_protobuf3.Timestamp", 1), `&`, ``, 1) + `,`, + `}`, + }, "") + return s +} +func (this *StatsRequest) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&StatsRequest{`, + `ID:` + fmt.Sprintf("%v", this.ID) + `,`, + `}`, + }, "") + return s +} +func (this *StatsResponse) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&StatsResponse{`, + `Stats:` + strings.Replace(fmt.Sprintf("%v", this.Stats), "Any", "google_protobuf.Any", 1) + `,`, + `}`, + }, "") + return s +} +func (this *ConnectRequest) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&ConnectRequest{`, + `ID:` + fmt.Sprintf("%v", this.ID) + `,`, + `}`, + }, "") + return s +} +func (this *ConnectResponse) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&ConnectResponse{`, + `ShimPid:` + fmt.Sprintf("%v", this.ShimPid) + `,`, + `TaskPid:` + fmt.Sprintf("%v", this.TaskPid) + `,`, + `Version:` + fmt.Sprintf("%v", this.Version) + `,`, + `}`, + }, "") + return s +} +func (this *ShutdownRequest) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&ShutdownRequest{`, + `ID:` + fmt.Sprintf("%v", this.ID) + `,`, + `Now:` + fmt.Sprintf("%v", this.Now) + `,`, + `}`, + }, "") + return s +} +func (this *PauseRequest) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&PauseRequest{`, + `ID:` + fmt.Sprintf("%v", this.ID) + `,`, + `}`, + }, "") + return s +} +func (this *ResumeRequest) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&ResumeRequest{`, + `ID:` + fmt.Sprintf("%v", this.ID) + `,`, + `}`, + }, "") + return s +} +func valueToStringShim(v interface{}) string { + rv := reflect.ValueOf(v) + if rv.IsNil() { + return "nil" + } + pv := reflect.Indirect(rv).Interface() + return fmt.Sprintf("*%v", pv) +} + +type TaskService interface { + State(ctx context.Context, req *StateRequest) (*StateResponse, error) + Create(ctx context.Context, req *CreateTaskRequest) (*CreateTaskResponse, error) + Start(ctx context.Context, req *StartRequest) (*StartResponse, error) + Delete(ctx context.Context, req *DeleteRequest) (*DeleteResponse, error) + Pids(ctx context.Context, req *PidsRequest) (*PidsResponse, error) + Pause(ctx context.Context, req *PauseRequest) (*google_protobuf1.Empty, error) + Resume(ctx context.Context, req *ResumeRequest) (*google_protobuf1.Empty, error) + Checkpoint(ctx context.Context, req *CheckpointTaskRequest) (*google_protobuf1.Empty, error) + Kill(ctx context.Context, req *KillRequest) (*google_protobuf1.Empty, error) + Exec(ctx context.Context, req *ExecProcessRequest) (*google_protobuf1.Empty, error) + ResizePty(ctx context.Context, req *ResizePtyRequest) (*google_protobuf1.Empty, error) + CloseIO(ctx context.Context, req *CloseIORequest) (*google_protobuf1.Empty, error) + Update(ctx context.Context, req *UpdateTaskRequest) (*google_protobuf1.Empty, error) + Wait(ctx context.Context, req *WaitRequest) (*WaitResponse, error) + Stats(ctx context.Context, req *StatsRequest) (*StatsResponse, error) + Connect(ctx context.Context, req *ConnectRequest) (*ConnectResponse, error) + Shutdown(ctx context.Context, req *ShutdownRequest) (*google_protobuf1.Empty, error) +} + +func RegisterTaskService(srv *ttrpc.Server, svc TaskService) { + srv.Register("containerd.task.v2.Task", map[string]ttrpc.Method{ + "State": func(ctx context.Context, unmarshal func(interface{}) error) (interface{}, error) { + var req StateRequest + if err := unmarshal(&req); err != nil { + return nil, err + } + return svc.State(ctx, &req) + }, + "Create": func(ctx context.Context, unmarshal func(interface{}) error) (interface{}, error) { + var req CreateTaskRequest + if err := unmarshal(&req); err != nil { + return nil, err + } + return svc.Create(ctx, &req) + }, + "Start": func(ctx context.Context, unmarshal func(interface{}) error) (interface{}, error) { + var req StartRequest + if err := unmarshal(&req); err != nil { + return nil, err + } + return svc.Start(ctx, &req) + }, + "Delete": func(ctx context.Context, unmarshal func(interface{}) error) (interface{}, error) { + var req DeleteRequest + if err := unmarshal(&req); err != nil { + return nil, err + } + return svc.Delete(ctx, &req) + }, + "Pids": func(ctx context.Context, unmarshal func(interface{}) error) (interface{}, error) { + var req PidsRequest + if err := unmarshal(&req); err != nil { + return nil, err + } + return svc.Pids(ctx, &req) + }, + "Pause": func(ctx context.Context, unmarshal func(interface{}) error) (interface{}, error) { + var req PauseRequest + if err := unmarshal(&req); err != nil { + return nil, err + } + return svc.Pause(ctx, &req) + }, + "Resume": func(ctx context.Context, unmarshal func(interface{}) error) (interface{}, error) { + var req ResumeRequest + if err := unmarshal(&req); err != nil { + return nil, err + } + return svc.Resume(ctx, &req) + }, + "Checkpoint": func(ctx context.Context, unmarshal func(interface{}) error) (interface{}, error) { + var req CheckpointTaskRequest + if err := unmarshal(&req); err != nil { + return nil, err + } + return svc.Checkpoint(ctx, &req) + }, + "Kill": func(ctx context.Context, unmarshal func(interface{}) error) (interface{}, error) { + var req KillRequest + if err := unmarshal(&req); err != nil { + return nil, err + } + return svc.Kill(ctx, &req) + }, + "Exec": func(ctx context.Context, unmarshal func(interface{}) error) (interface{}, error) { + var req ExecProcessRequest + if err := unmarshal(&req); err != nil { + return nil, err + } + return svc.Exec(ctx, &req) + }, + "ResizePty": func(ctx context.Context, unmarshal func(interface{}) error) (interface{}, error) { + var req ResizePtyRequest + if err := unmarshal(&req); err != nil { + return nil, err + } + return svc.ResizePty(ctx, &req) + }, + "CloseIO": func(ctx context.Context, unmarshal func(interface{}) error) (interface{}, error) { + var req CloseIORequest + if err := unmarshal(&req); err != nil { + return nil, err + } + return svc.CloseIO(ctx, &req) + }, + "Update": func(ctx context.Context, unmarshal func(interface{}) error) (interface{}, error) { + var req UpdateTaskRequest + if err := unmarshal(&req); err != nil { + return nil, err + } + return svc.Update(ctx, &req) + }, + "Wait": func(ctx context.Context, unmarshal func(interface{}) error) (interface{}, error) { + var req WaitRequest + if err := unmarshal(&req); err != nil { + return nil, err + } + return svc.Wait(ctx, &req) + }, + "Stats": func(ctx context.Context, unmarshal func(interface{}) error) (interface{}, error) { + var req StatsRequest + if err := unmarshal(&req); err != nil { + return nil, err + } + return svc.Stats(ctx, &req) + }, + "Connect": func(ctx context.Context, unmarshal func(interface{}) error) (interface{}, error) { + var req ConnectRequest + if err := unmarshal(&req); err != nil { + return nil, err + } + return svc.Connect(ctx, &req) + }, + "Shutdown": func(ctx context.Context, unmarshal func(interface{}) error) (interface{}, error) { + var req ShutdownRequest + if err := unmarshal(&req); err != nil { + return nil, err + } + return svc.Shutdown(ctx, &req) + }, + }) +} + +type taskClient struct { + client *ttrpc.Client +} + +func NewTaskClient(client *ttrpc.Client) TaskService { + return &taskClient{ + client: client, + } +} + +func (c *taskClient) State(ctx context.Context, req *StateRequest) (*StateResponse, error) { + var resp StateResponse + if err := c.client.Call(ctx, "containerd.task.v2.Task", "State", req, &resp); err != nil { + return nil, err + } + return &resp, nil +} + +func (c *taskClient) Create(ctx context.Context, req *CreateTaskRequest) (*CreateTaskResponse, error) { + var resp CreateTaskResponse + if err := c.client.Call(ctx, "containerd.task.v2.Task", "Create", req, &resp); err != nil { + return nil, err + } + return &resp, nil +} + +func (c *taskClient) Start(ctx context.Context, req *StartRequest) (*StartResponse, error) { + var resp StartResponse + if err := c.client.Call(ctx, "containerd.task.v2.Task", "Start", req, &resp); err != nil { + return nil, err + } + return &resp, nil +} + +func (c *taskClient) Delete(ctx context.Context, req *DeleteRequest) (*DeleteResponse, error) { + var resp DeleteResponse + if err := c.client.Call(ctx, "containerd.task.v2.Task", "Delete", req, &resp); err != nil { + return nil, err + } + return &resp, nil +} + +func (c *taskClient) Pids(ctx context.Context, req *PidsRequest) (*PidsResponse, error) { + var resp PidsResponse + if err := c.client.Call(ctx, "containerd.task.v2.Task", "Pids", req, &resp); err != nil { + return nil, err + } + return &resp, nil +} + +func (c *taskClient) Pause(ctx context.Context, req *PauseRequest) (*google_protobuf1.Empty, error) { + var resp google_protobuf1.Empty + if err := c.client.Call(ctx, "containerd.task.v2.Task", "Pause", req, &resp); err != nil { + return nil, err + } + return &resp, nil +} + +func (c *taskClient) Resume(ctx context.Context, req *ResumeRequest) (*google_protobuf1.Empty, error) { + var resp google_protobuf1.Empty + if err := c.client.Call(ctx, "containerd.task.v2.Task", "Resume", req, &resp); err != nil { + return nil, err + } + return &resp, nil +} + +func (c *taskClient) Checkpoint(ctx context.Context, req *CheckpointTaskRequest) (*google_protobuf1.Empty, error) { + var resp google_protobuf1.Empty + if err := c.client.Call(ctx, "containerd.task.v2.Task", "Checkpoint", req, &resp); err != nil { + return nil, err + } + return &resp, nil +} + +func (c *taskClient) Kill(ctx context.Context, req *KillRequest) (*google_protobuf1.Empty, error) { + var resp google_protobuf1.Empty + if err := c.client.Call(ctx, "containerd.task.v2.Task", "Kill", req, &resp); err != nil { + return nil, err + } + return &resp, nil +} + +func (c *taskClient) Exec(ctx context.Context, req *ExecProcessRequest) (*google_protobuf1.Empty, error) { + var resp google_protobuf1.Empty + if err := c.client.Call(ctx, "containerd.task.v2.Task", "Exec", req, &resp); err != nil { + return nil, err + } + return &resp, nil +} + +func (c *taskClient) ResizePty(ctx context.Context, req *ResizePtyRequest) (*google_protobuf1.Empty, error) { + var resp google_protobuf1.Empty + if err := c.client.Call(ctx, "containerd.task.v2.Task", "ResizePty", req, &resp); err != nil { + return nil, err + } + return &resp, nil +} + +func (c *taskClient) CloseIO(ctx context.Context, req *CloseIORequest) (*google_protobuf1.Empty, error) { + var resp google_protobuf1.Empty + if err := c.client.Call(ctx, "containerd.task.v2.Task", "CloseIO", req, &resp); err != nil { + return nil, err + } + return &resp, nil +} + +func (c *taskClient) Update(ctx context.Context, req *UpdateTaskRequest) (*google_protobuf1.Empty, error) { + var resp google_protobuf1.Empty + if err := c.client.Call(ctx, "containerd.task.v2.Task", "Update", req, &resp); err != nil { + return nil, err + } + return &resp, nil +} + +func (c *taskClient) Wait(ctx context.Context, req *WaitRequest) (*WaitResponse, error) { + var resp WaitResponse + if err := c.client.Call(ctx, "containerd.task.v2.Task", "Wait", req, &resp); err != nil { + return nil, err + } + return &resp, nil +} + +func (c *taskClient) Stats(ctx context.Context, req *StatsRequest) (*StatsResponse, error) { + var resp StatsResponse + if err := c.client.Call(ctx, "containerd.task.v2.Task", "Stats", req, &resp); err != nil { + return nil, err + } + return &resp, nil +} + +func (c *taskClient) Connect(ctx context.Context, req *ConnectRequest) (*ConnectResponse, error) { + var resp ConnectResponse + if err := c.client.Call(ctx, "containerd.task.v2.Task", "Connect", req, &resp); err != nil { + return nil, err + } + return &resp, nil +} + +func (c *taskClient) Shutdown(ctx context.Context, req *ShutdownRequest) (*google_protobuf1.Empty, error) { + var resp google_protobuf1.Empty + if err := c.client.Call(ctx, "containerd.task.v2.Task", "Shutdown", req, &resp); err != nil { + return nil, err + } + return &resp, nil +} +func (m *CreateTaskRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: CreateTaskRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: CreateTaskRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Bundle", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Bundle = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 3: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Rootfs", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Rootfs = append(m.Rootfs, &containerd_types.Mount{}) + if err := m.Rootfs[len(m.Rootfs)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 4: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Terminal", wireType) + } + var v int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.Terminal = bool(v != 0) + case 5: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Stdin", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Stdin = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 6: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Stdout", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Stdout = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 7: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Stderr", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Stderr = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 8: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Checkpoint", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Checkpoint = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 9: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ParentCheckpoint", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ParentCheckpoint = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 10: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Options", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Options == nil { + m.Options = &google_protobuf.Any{} + } + if err := m.Options.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *CreateTaskResponse) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: CreateTaskResponse: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: CreateTaskResponse: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Pid", wireType) + } + m.Pid = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Pid |= (uint32(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *DeleteRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: DeleteRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: DeleteRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ExecID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ExecID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *DeleteResponse) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: DeleteResponse: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: DeleteResponse: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Pid", wireType) + } + m.Pid = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Pid |= (uint32(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field ExitStatus", wireType) + } + m.ExitStatus = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.ExitStatus |= (uint32(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ExitedAt", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if err := types.StdTimeUnmarshal(&m.ExitedAt, dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *ExecProcessRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: ExecProcessRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: ExecProcessRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ExecID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ExecID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Terminal", wireType) + } + var v int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.Terminal = bool(v != 0) + case 4: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Stdin", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Stdin = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 5: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Stdout", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Stdout = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 6: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Stderr", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Stderr = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 7: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Spec", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Spec == nil { + m.Spec = &google_protobuf.Any{} + } + if err := m.Spec.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *ExecProcessResponse) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: ExecProcessResponse: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: ExecProcessResponse: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *ResizePtyRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: ResizePtyRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: ResizePtyRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ExecID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ExecID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Width", wireType) + } + m.Width = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Width |= (uint32(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 4: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Height", wireType) + } + m.Height = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Height |= (uint32(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *StateRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: StateRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: StateRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ExecID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ExecID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *StateResponse) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: StateResponse: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: StateResponse: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Bundle", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Bundle = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Pid", wireType) + } + m.Pid = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Pid |= (uint32(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 4: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Status", wireType) + } + m.Status = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Status |= (containerd_v1_types.Status(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 5: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Stdin", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Stdin = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 6: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Stdout", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Stdout = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 7: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Stderr", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Stderr = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 8: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Terminal", wireType) + } + var v int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.Terminal = bool(v != 0) + case 9: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field ExitStatus", wireType) + } + m.ExitStatus = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.ExitStatus |= (uint32(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 10: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ExitedAt", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if err := types.StdTimeUnmarshal(&m.ExitedAt, dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *KillRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: KillRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: KillRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ExecID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ExecID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Signal", wireType) + } + m.Signal = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Signal |= (uint32(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 4: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field All", wireType) + } + var v int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.All = bool(v != 0) + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *CloseIORequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: CloseIORequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: CloseIORequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ExecID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ExecID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Stdin", wireType) + } + var v int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.Stdin = bool(v != 0) + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *PidsRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: PidsRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: PidsRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *PidsResponse) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: PidsResponse: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: PidsResponse: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Processes", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Processes = append(m.Processes, &containerd_v1_types.ProcessInfo{}) + if err := m.Processes[len(m.Processes)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *CheckpointTaskRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: CheckpointTaskRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: CheckpointTaskRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Path", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Path = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 3: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Options", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Options == nil { + m.Options = &google_protobuf.Any{} + } + if err := m.Options.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *UpdateTaskRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: UpdateTaskRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: UpdateTaskRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Resources", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Resources == nil { + m.Resources = &google_protobuf.Any{} + } + if err := m.Resources.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *StartRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: StartRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: StartRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ExecID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ExecID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *StartResponse) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: StartResponse: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: StartResponse: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Pid", wireType) + } + m.Pid = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Pid |= (uint32(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *WaitRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: WaitRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: WaitRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ExecID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ExecID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *WaitResponse) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: WaitResponse: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: WaitResponse: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field ExitStatus", wireType) + } + m.ExitStatus = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.ExitStatus |= (uint32(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ExitedAt", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if err := types.StdTimeUnmarshal(&m.ExitedAt, dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *StatsRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: StatsRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: StatsRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *StatsResponse) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: StatsResponse: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: StatsResponse: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Stats", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Stats == nil { + m.Stats = &google_protobuf.Any{} + } + if err := m.Stats.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *ConnectRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: ConnectRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: ConnectRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *ConnectResponse) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: ConnectResponse: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: ConnectResponse: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field ShimPid", wireType) + } + m.ShimPid = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.ShimPid |= (uint32(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field TaskPid", wireType) + } + m.TaskPid = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.TaskPid |= (uint32(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Version", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Version = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *ShutdownRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: ShutdownRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: ShutdownRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Now", wireType) + } + var v int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.Now = bool(v != 0) + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *PauseRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: PauseRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: PauseRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *ResumeRequest) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: ResumeRequest: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: ResumeRequest: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ID", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowShim + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthShim + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ID = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipShim(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthShim + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func skipShim(dAtA []byte) (n int, err error) { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowShim + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + wireType := int(wire & 0x7) + switch wireType { + case 0: + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowShim + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + iNdEx++ + if dAtA[iNdEx-1] < 0x80 { + break + } + } + return iNdEx, nil + case 1: + iNdEx += 8 + return iNdEx, nil + case 2: + var length int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowShim + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + length |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + iNdEx += length + if length < 0 { + return 0, ErrInvalidLengthShim + } + return iNdEx, nil + case 3: + for { + var innerWire uint64 + var start int = iNdEx + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowShim + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + innerWire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + innerWireType := int(innerWire & 0x7) + if innerWireType == 4 { + break + } + next, err := skipShim(dAtA[start:]) + if err != nil { + return 0, err + } + iNdEx = start + next + } + return iNdEx, nil + case 4: + return iNdEx, nil + case 5: + iNdEx += 4 + return iNdEx, nil + default: + return 0, fmt.Errorf("proto: illegal wireType %d", wireType) + } + } + panic("unreachable") +} + +var ( + ErrInvalidLengthShim = fmt.Errorf("proto: negative length found during unmarshaling") + ErrIntOverflowShim = fmt.Errorf("proto: integer overflow") +) + +func init() { + proto.RegisterFile("github.com/containerd/containerd/runtime/v2/task/shim.proto", fileDescriptorShim) +} + +var fileDescriptorShim = []byte{ + // 1240 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xac, 0x58, 0xdb, 0x6f, 0x1b, 0xc5, + 0x17, 0xee, 0xfa, 0xb2, 0xb6, 0x8f, 0xeb, 0x34, 0x9d, 0x5f, 0x9a, 0xdf, 0xd6, 0x95, 0x6c, 0x77, + 0x4b, 0x83, 0x01, 0xc9, 0x16, 0xae, 0xe0, 0x81, 0x48, 0xa0, 0xdc, 0xa8, 0x4c, 0x0b, 0x89, 0xb6, + 0x45, 0x45, 0xbc, 0x58, 0x1b, 0xef, 0xc4, 0x5e, 0xc5, 0xde, 0x59, 0x76, 0x66, 0x73, 0x41, 0x42, + 0xe2, 0x89, 0x07, 0x9e, 0xf8, 0xb3, 0xf2, 0xc0, 0x03, 0x12, 0x2f, 0xbc, 0x10, 0xa8, 0xff, 0x12, + 0x34, 0x17, 0xc7, 0x6b, 0x67, 0xd7, 0x4e, 0x2a, 0xbf, 0x44, 0x73, 0x76, 0xbe, 0x39, 0x33, 0xe7, + 0xcc, 0x77, 0xbe, 0x33, 0x31, 0x6c, 0xf6, 0x5c, 0xd6, 0x0f, 0x0f, 0x1b, 0x5d, 0x32, 0x6c, 0x76, + 0x89, 0xc7, 0x6c, 0xd7, 0xc3, 0x81, 0x13, 0x1d, 0x06, 0xa1, 0xc7, 0xdc, 0x21, 0x6e, 0x9e, 0xb4, + 0x9a, 0xcc, 0xa6, 0xc7, 0x4d, 0xda, 0x77, 0x87, 0x0d, 0x3f, 0x20, 0x8c, 0x20, 0x34, 0x81, 0x35, + 0xf8, 0x5c, 0xe3, 0xa4, 0x55, 0x7e, 0xd8, 0x23, 0xa4, 0x37, 0xc0, 0x4d, 0x81, 0x38, 0x0c, 0x8f, + 0x9a, 0xb6, 0x77, 0x2e, 0xe1, 0xe5, 0x47, 0xb3, 0x53, 0x78, 0xe8, 0xb3, 0xf1, 0xe4, 0x5a, 0x8f, + 0xf4, 0x88, 0x18, 0x36, 0xf9, 0x48, 0x7d, 0xad, 0xce, 0x2e, 0xe1, 0x47, 0xa1, 0xcc, 0x1e, 0xfa, + 0x0a, 0xf0, 0xe9, 0xc2, 0xf3, 0xdb, 0xbe, 0xdb, 0x64, 0xe7, 0x3e, 0xa6, 0xcd, 0x21, 0x09, 0x3d, + 0xa6, 0xd6, 0x7d, 0x76, 0x8b, 0x75, 0x22, 0x6c, 0x11, 0x9f, 0x58, 0x6b, 0xfe, 0x99, 0x82, 0xfb, + 0x3b, 0x01, 0xb6, 0x19, 0x7e, 0x6d, 0xd3, 0x63, 0x0b, 0xff, 0x10, 0x62, 0xca, 0xd0, 0x3a, 0xa4, + 0x5c, 0xc7, 0xd0, 0x6a, 0x5a, 0xbd, 0xb0, 0xad, 0x8f, 0x2e, 0xab, 0xa9, 0xf6, 0xae, 0x95, 0x72, + 0x1d, 0xb4, 0x0e, 0xfa, 0x61, 0xe8, 0x39, 0x03, 0x6c, 0xa4, 0xf8, 0x9c, 0xa5, 0x2c, 0xd4, 0x04, + 0x3d, 0x20, 0x84, 0x1d, 0x51, 0x23, 0x5d, 0x4b, 0xd7, 0x8b, 0xad, 0xff, 0x37, 0xa2, 0xd9, 0xe4, + 0x1b, 0x37, 0xbe, 0xe6, 0x07, 0xb6, 0x14, 0x0c, 0x95, 0x21, 0xcf, 0x70, 0x30, 0x74, 0x3d, 0x7b, + 0x60, 0x64, 0x6a, 0x5a, 0x3d, 0x6f, 0x5d, 0xd9, 0x68, 0x0d, 0xb2, 0x94, 0x39, 0xae, 0x67, 0x64, + 0xc5, 0x1e, 0xd2, 0xe0, 0x5b, 0x53, 0xe6, 0x90, 0x90, 0x19, 0xba, 0xdc, 0x5a, 0x5a, 0xea, 0x3b, + 0x0e, 0x02, 0x23, 0x77, 0xf5, 0x1d, 0x07, 0x01, 0xaa, 0x00, 0x74, 0xfb, 0xb8, 0x7b, 0xec, 0x13, + 0xd7, 0x63, 0x46, 0x5e, 0xcc, 0x45, 0xbe, 0xa0, 0x8f, 0xe0, 0xbe, 0x6f, 0x07, 0xd8, 0x63, 0x9d, + 0x08, 0xac, 0x20, 0x60, 0xab, 0x72, 0x62, 0x67, 0x02, 0x6e, 0x40, 0x8e, 0xf8, 0xcc, 0x25, 0x1e, + 0x35, 0xa0, 0xa6, 0xd5, 0x8b, 0xad, 0xb5, 0x86, 0xbc, 0xcc, 0xc6, 0xf8, 0x32, 0x1b, 0x5b, 0xde, + 0xb9, 0x35, 0x06, 0x99, 0x1b, 0x80, 0xa2, 0x49, 0xa5, 0x3e, 0xf1, 0x28, 0x46, 0xab, 0x90, 0xf6, + 0x55, 0x5a, 0x4b, 0x16, 0x1f, 0x9a, 0x2f, 0xa1, 0xb4, 0x8b, 0x07, 0x98, 0xe1, 0x45, 0x89, 0x7f, + 0x02, 0x39, 0x7c, 0x86, 0xbb, 0x1d, 0xd7, 0x91, 0x99, 0xdf, 0x86, 0xd1, 0x65, 0x55, 0xdf, 0x3b, + 0xc3, 0xdd, 0xf6, 0xae, 0xa5, 0xf3, 0xa9, 0xb6, 0x63, 0xfe, 0xa2, 0xc1, 0xca, 0xd8, 0x5d, 0xd2, + 0x96, 0xa8, 0x0a, 0x45, 0x7c, 0xe6, 0xb2, 0x0e, 0x65, 0x36, 0x0b, 0xa9, 0xf0, 0x56, 0xb2, 0x80, + 0x7f, 0x7a, 0x25, 0xbe, 0xa0, 0x2d, 0x28, 0x70, 0x0b, 0x3b, 0x1d, 0x9b, 0x19, 0x69, 0x11, 0x6d, + 0xf9, 0x5a, 0xb4, 0xaf, 0xc7, 0xd4, 0xdd, 0xce, 0x5f, 0x5c, 0x56, 0xef, 0xfc, 0xf6, 0x4f, 0x55, + 0xb3, 0xf2, 0x72, 0xd9, 0x16, 0x33, 0xff, 0xd6, 0x00, 0xf1, 0xb3, 0x1d, 0x04, 0xa4, 0x8b, 0x29, + 0x5d, 0x46, 0x70, 0x53, 0x8c, 0x49, 0x27, 0x31, 0x26, 0x13, 0xcf, 0x98, 0x6c, 0x02, 0x63, 0xf4, + 0x29, 0xc6, 0xd4, 0x21, 0x43, 0x7d, 0xdc, 0x15, 0x3c, 0x4a, 0xba, 0x61, 0x81, 0x30, 0x1f, 0xc0, + 0xff, 0xa6, 0xc2, 0x93, 0xc9, 0x36, 0x7f, 0x82, 0x55, 0x0b, 0x53, 0xf7, 0x47, 0x7c, 0xc0, 0xce, + 0x97, 0x12, 0xf3, 0x1a, 0x64, 0x4f, 0x5d, 0x87, 0xf5, 0x45, 0xc0, 0x25, 0x4b, 0x1a, 0xfc, 0xfc, + 0x7d, 0xec, 0xf6, 0xfa, 0x4c, 0x84, 0x5b, 0xb2, 0x94, 0x65, 0xbe, 0x80, 0xbb, 0xfc, 0x0a, 0x97, + 0xc3, 0xa5, 0xdf, 0x53, 0x50, 0x52, 0xde, 0x14, 0x95, 0x6e, 0xab, 0x09, 0x8a, 0x7a, 0xe9, 0x09, + 0xf5, 0x9e, 0xf1, 0xc4, 0x0b, 0xd6, 0xf1, 0x83, 0xaf, 0xb4, 0x1e, 0x45, 0x55, 0xe2, 0xe4, 0x63, + 0x25, 0x14, 0x92, 0x86, 0x96, 0x82, 0x2e, 0x49, 0x0d, 0xa2, 0xec, 0xc9, 0xcf, 0xb0, 0x67, 0xa6, + 0x22, 0x0a, 0xf3, 0x2b, 0x02, 0xde, 0xa9, 0x22, 0x18, 0x14, 0x5f, 0xb8, 0x83, 0xc1, 0x52, 0x58, + 0xc1, 0x63, 0x74, 0x7b, 0xe3, 0x3a, 0x28, 0x59, 0xca, 0xe2, 0x09, 0xb7, 0x07, 0x63, 0x39, 0xe5, + 0x43, 0xb3, 0x0b, 0x2b, 0x3b, 0x03, 0x42, 0x71, 0x7b, 0x7f, 0x59, 0x74, 0x94, 0x57, 0x21, 0xeb, + 0x4f, 0x1a, 0xe6, 0x53, 0x28, 0x1e, 0xb8, 0xce, 0xa2, 0x22, 0x37, 0xbf, 0x81, 0xbb, 0x12, 0xa6, + 0xe8, 0xf4, 0x39, 0x14, 0x7c, 0x59, 0x3f, 0x98, 0x1a, 0x9a, 0xe8, 0x1a, 0xb5, 0x58, 0x3e, 0xa8, + 0x2a, 0x6b, 0x7b, 0x47, 0xc4, 0x9a, 0x2c, 0x31, 0x29, 0x3c, 0x98, 0x08, 0xf4, 0x4d, 0x7a, 0x17, + 0x82, 0x8c, 0x6f, 0xb3, 0xbe, 0x62, 0xa9, 0x18, 0x47, 0x75, 0x3d, 0x7d, 0x13, 0x5d, 0xef, 0xc0, + 0xfd, 0x6f, 0x7d, 0xe7, 0x86, 0xcd, 0xb2, 0x05, 0x85, 0x00, 0x53, 0x12, 0x06, 0x5d, 0x2c, 0x75, + 0x36, 0xc9, 0xfd, 0x04, 0xa6, 0x6a, 0x38, 0x60, 0x4b, 0xa9, 0xe1, 0xc7, 0xa2, 0x84, 0xb9, 0xb3, + 0xc4, 0x06, 0xf4, 0x15, 0x14, 0xdf, 0xd8, 0xee, 0x72, 0xb6, 0x0b, 0xe0, 0xae, 0xf4, 0xa5, 0x76, + 0x9b, 0xa9, 0x2b, 0x6d, 0x7e, 0x5d, 0xa5, 0xde, 0xa9, 0xae, 0x36, 0xa4, 0xe6, 0x2d, 0x64, 0xdf, + 0xa6, 0x54, 0xb3, 0x09, 0xfd, 0x3e, 0xe4, 0x5c, 0xb6, 0x99, 0x3c, 0x56, 0xd2, 0xc5, 0x48, 0x88, + 0x59, 0x87, 0x95, 0x1d, 0xe2, 0x79, 0xb8, 0xbb, 0x28, 0x4f, 0xa6, 0x0d, 0xf7, 0xae, 0x90, 0x6a, + 0xa3, 0x87, 0x90, 0xe7, 0xaf, 0xcc, 0xce, 0x24, 0xf1, 0x39, 0x6e, 0x1f, 0xb8, 0x0e, 0x9f, 0xe2, + 0x2f, 0x31, 0x31, 0x25, 0xfb, 0x70, 0x8e, 0xdb, 0x7c, 0xca, 0x80, 0xdc, 0x09, 0x0e, 0xa8, 0x4b, + 0x64, 0xb1, 0x15, 0xac, 0xb1, 0x69, 0x6e, 0xc2, 0xbd, 0x57, 0xfd, 0x90, 0x39, 0xe4, 0xd4, 0x5b, + 0x74, 0x6b, 0xab, 0x90, 0xf6, 0xc8, 0xa9, 0x70, 0x9d, 0xb7, 0xf8, 0x90, 0xa7, 0xeb, 0xc0, 0x0e, + 0xe9, 0xa2, 0x16, 0x61, 0xbe, 0x0f, 0x25, 0x0b, 0xd3, 0x70, 0xb8, 0x08, 0xd8, 0xfa, 0x15, 0x20, + 0xc3, 0x6b, 0x01, 0xbd, 0x84, 0xac, 0x68, 0x17, 0x68, 0xaa, 0x88, 0xd5, 0x43, 0xba, 0x11, 0xed, + 0x4b, 0xe5, 0xc7, 0x73, 0x10, 0x2a, 0x69, 0x6f, 0x40, 0x97, 0xef, 0x27, 0xf4, 0x34, 0x0e, 0x7c, + 0xed, 0xc1, 0x5a, 0xde, 0x58, 0x04, 0x53, 0x8e, 0xe5, 0x31, 0x03, 0x96, 0x78, 0xcc, 0xab, 0xd2, + 0x4b, 0x3c, 0x66, 0xa4, 0x9e, 0xf6, 0x41, 0x97, 0xef, 0x2d, 0x14, 0x0b, 0x9e, 0x7a, 0xda, 0x95, + 0xcd, 0x79, 0x10, 0xe5, 0xb0, 0x0d, 0x19, 0x2e, 0x92, 0xa8, 0x1a, 0x87, 0x8d, 0xa8, 0x6c, 0xb9, + 0x96, 0x0c, 0x50, 0xae, 0xb6, 0x20, 0x2b, 0xae, 0x3a, 0x3e, 0xd2, 0x28, 0x0b, 0xca, 0xeb, 0xd7, + 0xc8, 0xbf, 0xc7, 0xff, 0x99, 0x41, 0x3b, 0xa0, 0x4b, 0x16, 0xc4, 0x87, 0x37, 0xc5, 0x90, 0x44, + 0x27, 0xfb, 0x00, 0x91, 0x87, 0xf4, 0x07, 0xb1, 0xf7, 0x14, 0xa7, 0xe3, 0x89, 0x0e, 0xbf, 0x80, + 0x0c, 0x6f, 0xa5, 0xf1, 0x39, 0x8a, 0x34, 0xd9, 0x44, 0x07, 0x5f, 0x42, 0x86, 0x2b, 0x17, 0x8a, + 0xe5, 0xcc, 0xf5, 0x67, 0x6b, 0xa2, 0x9f, 0x36, 0x14, 0xae, 0x9e, 0x7b, 0xe8, 0xbd, 0x84, 0x0c, + 0x4d, 0xbd, 0x06, 0x13, 0x5d, 0xed, 0x41, 0x4e, 0x35, 0x6a, 0x14, 0x4b, 0x93, 0xe9, 0x2e, 0x9e, + 0xe8, 0xe6, 0x39, 0xe8, 0xb2, 0x3d, 0xc5, 0x97, 0xcd, 0xb5, 0xd6, 0x35, 0x27, 0xb4, 0x0c, 0x97, + 0xf2, 0xf8, 0x1c, 0x47, 0x1a, 0x46, 0x3c, 0x0f, 0xa7, 0xba, 0x80, 0x12, 0x06, 0x9a, 0x2c, 0x0c, + 0x74, 0xa1, 0x30, 0x4c, 0x58, 0x6d, 0x41, 0x4e, 0x09, 0x6c, 0x42, 0xa2, 0xa6, 0x74, 0xba, 0xfc, + 0x64, 0x2e, 0x46, 0xf9, 0x7c, 0x0e, 0xf9, 0xb1, 0xa2, 0xa2, 0xd8, 0x05, 0x33, 0x7a, 0x9b, 0x94, + 0xb5, 0xed, 0xfd, 0x8b, 0xb7, 0x95, 0x3b, 0x7f, 0xbd, 0xad, 0xdc, 0xf9, 0x79, 0x54, 0xd1, 0x2e, + 0x46, 0x15, 0xed, 0x8f, 0x51, 0x45, 0xfb, 0x77, 0x54, 0xd1, 0xbe, 0xff, 0xe4, 0xb6, 0xbf, 0x4c, + 0x6c, 0xf2, 0x3f, 0xdf, 0xa5, 0x0e, 0x75, 0xb1, 0xc5, 0xb3, 0xff, 0x02, 0x00, 0x00, 0xff, 0xff, + 0x11, 0x4c, 0x17, 0x02, 0xdb, 0x10, 0x00, 0x00, +} diff --git a/vendor/github.com/containerd/containerd/runtime/v2/task/shim.proto b/vendor/github.com/containerd/containerd/runtime/v2/task/shim.proto new file mode 100644 index 000000000..5521ff799 --- /dev/null +++ b/vendor/github.com/containerd/containerd/runtime/v2/task/shim.proto @@ -0,0 +1,184 @@ +syntax = "proto3"; + +package containerd.task.v2; + +import "google/protobuf/any.proto"; +import "google/protobuf/empty.proto"; +import weak "gogoproto/gogo.proto"; +import "google/protobuf/timestamp.proto"; +import "github.com/containerd/containerd/api/types/mount.proto"; +import "github.com/containerd/containerd/api/types/task/task.proto"; + +option go_package = "github.com/containerd/containerd/runtime/v2/task;task"; + +// Shim service is launched for each container and is responsible for owning the IO +// for the container and its additional processes. The shim is also the parent of +// each container and allows reattaching to the IO and receiving the exit status +// for the container processes. +service Task { + rpc State(StateRequest) returns (StateResponse); + rpc Create(CreateTaskRequest) returns (CreateTaskResponse); + rpc Start(StartRequest) returns (StartResponse); + rpc Delete(DeleteRequest) returns (DeleteResponse); + rpc Pids(PidsRequest) returns (PidsResponse); + rpc Pause(PauseRequest) returns (google.protobuf.Empty); + rpc Resume(ResumeRequest) returns (google.protobuf.Empty); + rpc Checkpoint(CheckpointTaskRequest) returns (google.protobuf.Empty); + rpc Kill(KillRequest) returns (google.protobuf.Empty); + rpc Exec(ExecProcessRequest) returns (google.protobuf.Empty); + rpc ResizePty(ResizePtyRequest) returns (google.protobuf.Empty); + rpc CloseIO(CloseIORequest) returns (google.protobuf.Empty); + rpc Update(UpdateTaskRequest) returns (google.protobuf.Empty); + rpc Wait(WaitRequest) returns (WaitResponse); + rpc Stats(StatsRequest) returns (StatsResponse); + rpc Connect(ConnectRequest) returns (ConnectResponse); + rpc Shutdown(ShutdownRequest) returns (google.protobuf.Empty); +} + +message CreateTaskRequest { + string id = 1; + string bundle = 2; + repeated containerd.types.Mount rootfs = 3; + bool terminal = 4; + string stdin = 5; + string stdout = 6; + string stderr = 7; + string checkpoint = 8; + string parent_checkpoint = 9; + google.protobuf.Any options = 10; +} + +message CreateTaskResponse { + uint32 pid = 1; +} + +message DeleteRequest { + string id = 1; + string exec_id = 2; +} + +message DeleteResponse { + uint32 pid = 1; + uint32 exit_status = 2; + google.protobuf.Timestamp exited_at = 3 [(gogoproto.stdtime) = true, (gogoproto.nullable) = false]; +} + +message ExecProcessRequest { + string id = 1; + string exec_id = 2; + bool terminal = 3; + string stdin = 4; + string stdout = 5; + string stderr = 6; + google.protobuf.Any spec = 7; +} + +message ExecProcessResponse { +} + +message ResizePtyRequest { + string id = 1; + string exec_id = 2; + uint32 width = 3; + uint32 height = 4; +} + +message StateRequest { + string id = 1; + string exec_id = 2; +} + +message StateResponse { + string id = 1; + string bundle = 2; + uint32 pid = 3; + containerd.v1.types.Status status = 4; + string stdin = 5; + string stdout = 6; + string stderr = 7; + bool terminal = 8; + uint32 exit_status = 9; + google.protobuf.Timestamp exited_at = 10 [(gogoproto.stdtime) = true, (gogoproto.nullable) = false]; +} + +message KillRequest { + string id = 1; + string exec_id = 2; + uint32 signal = 3; + bool all = 4; +} + +message CloseIORequest { + string id = 1; + string exec_id = 2; + bool stdin = 3; +} + +message PidsRequest { + string id = 1; +} + +message PidsResponse { + repeated containerd.v1.types.ProcessInfo processes = 1; +} + +message CheckpointTaskRequest { + string id = 1; + string path = 2; + google.protobuf.Any options = 3; +} + +message UpdateTaskRequest { + string id = 1; + google.protobuf.Any resources = 2; +} + +message StartRequest { + string id = 1; + string exec_id = 2; +} + +message StartResponse { + uint32 pid = 1; +} + +message WaitRequest { + string id = 1; + string exec_id = 2; +} + +message WaitResponse { + uint32 exit_status = 1; + google.protobuf.Timestamp exited_at = 2 [(gogoproto.stdtime) = true, (gogoproto.nullable) = false]; +} + +message StatsRequest { + string id = 1; +} + +message StatsResponse { + google.protobuf.Any stats = 1; +} + +message ConnectRequest { + string id = 1; +} + +message ConnectResponse { + uint32 shim_pid = 1; + uint32 task_pid = 2; + string version = 3; +} + +message ShutdownRequest { + string id = 1; + bool now = 2; +} + +message PauseRequest { + string id = 1; +} + +message ResumeRequest { + string id = 1; +} diff --git a/vendor/github.com/containerd/cri/README.md b/vendor/github.com/containerd/cri/README.md index a97c2fe3d..e77abb51f 100644 --- a/vendor/github.com/containerd/cri/README.md +++ b/vendor/github.com/containerd/cri/README.md @@ -1,7 +1,7 @@ # cri

- +

*Note: The standalone `cri-containerd` binary is end-of-life. `cri-containerd` is @@ -36,6 +36,7 @@ See [test dashboard](https://k8s-testgrid.appspot.com/sig-node-containerd) | v1.0.0-alpha.x | | 1.7, 1.8 | v1alpha1 | | v1.0.0-beta.x | | 1.9 | v1alpha1 | | End-Of-Life | v1.1 | 1.10+ | v1alpha2 | +| | v1.2 | 1.10+ | v1alpha2 | | | HEAD | 1.10+ | v1alpha2 | ## Production Quality Cluster on GCE @@ -149,7 +150,8 @@ implementation. For sync communication we have a community slack with a #containerd channel that everyone is welcome to join and chat about development. -**Slack:** https://dockr.ly/community +**Slack:** Catch us in the #containerd and #containerd-dev channels on dockercommunity.slack.com. +[Click here for an invite to docker community slack.](https://join.slack.com/t/dockercommunity/shared_invite/enQtNDY4MDc1Mzc0MzIwLTgxZDBlMmM4ZGEyNDc1N2FkMzlhODJkYmE1YTVkYjM1MDE3ZjAwZjBkOGFlOTJkZjRmZGYzNjYyY2M3ZTUxYzQ) ## Other Communications As this project is tightly coupled to CRI and CRI-Tools and they are Kubernetes diff --git a/vendor/github.com/containerd/cri/pkg/api/runtimeoptions/v1/api.pb.go b/vendor/github.com/containerd/cri/pkg/api/runtimeoptions/v1/api.pb.go new file mode 100644 index 000000000..71341ec78 --- /dev/null +++ b/vendor/github.com/containerd/cri/pkg/api/runtimeoptions/v1/api.pb.go @@ -0,0 +1,394 @@ +/* +Copyright 2019 The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by protoc-gen-gogo. DO NOT EDIT. +// source: api.proto + +/* + Package cri_runtimeoptions_v1 is a generated protocol buffer package. + + It is generated from these files: + api.proto + + It has these top-level messages: + Options +*/ +package cri_runtimeoptions_v1 + +import proto "github.com/gogo/protobuf/proto" +import fmt "fmt" +import math "math" +import _ "github.com/gogo/protobuf/gogoproto" + +import strings "strings" +import reflect "reflect" + +import io "io" + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package + +type Options struct { + // TypeUrl specifies the type of the content inside the config file. + TypeUrl string `protobuf:"bytes,1,opt,name=type_url,json=typeUrl,proto3" json:"type_url,omitempty"` + // ConfigPath specifies the filesystem location of the config file + // used by the runtime. + ConfigPath string `protobuf:"bytes,2,opt,name=config_path,json=configPath,proto3" json:"config_path,omitempty"` +} + +func (m *Options) Reset() { *m = Options{} } +func (*Options) ProtoMessage() {} +func (*Options) Descriptor() ([]byte, []int) { return fileDescriptorApi, []int{0} } + +func (m *Options) GetTypeUrl() string { + if m != nil { + return m.TypeUrl + } + return "" +} + +func (m *Options) GetConfigPath() string { + if m != nil { + return m.ConfigPath + } + return "" +} + +func init() { + proto.RegisterType((*Options)(nil), "cri.runtimeoptions.v1.Options") +} +func (m *Options) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *Options) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.TypeUrl) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintApi(dAtA, i, uint64(len(m.TypeUrl))) + i += copy(dAtA[i:], m.TypeUrl) + } + if len(m.ConfigPath) > 0 { + dAtA[i] = 0x12 + i++ + i = encodeVarintApi(dAtA, i, uint64(len(m.ConfigPath))) + i += copy(dAtA[i:], m.ConfigPath) + } + return i, nil +} + +func encodeVarintApi(dAtA []byte, offset int, v uint64) int { + for v >= 1<<7 { + dAtA[offset] = uint8(v&0x7f | 0x80) + v >>= 7 + offset++ + } + dAtA[offset] = uint8(v) + return offset + 1 +} +func (m *Options) Size() (n int) { + var l int + _ = l + l = len(m.TypeUrl) + if l > 0 { + n += 1 + l + sovApi(uint64(l)) + } + l = len(m.ConfigPath) + if l > 0 { + n += 1 + l + sovApi(uint64(l)) + } + return n +} + +func sovApi(x uint64) (n int) { + for { + n++ + x >>= 7 + if x == 0 { + break + } + } + return n +} +func sozApi(x uint64) (n int) { + return sovApi(uint64((x << 1) ^ uint64((int64(x) >> 63)))) +} +func (this *Options) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&Options{`, + `TypeUrl:` + fmt.Sprintf("%v", this.TypeUrl) + `,`, + `ConfigPath:` + fmt.Sprintf("%v", this.ConfigPath) + `,`, + `}`, + }, "") + return s +} +func valueToStringApi(v interface{}) string { + rv := reflect.ValueOf(v) + if rv.IsNil() { + return "nil" + } + pv := reflect.Indirect(rv).Interface() + return fmt.Sprintf("*%v", pv) +} +func (m *Options) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowApi + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: Options: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: Options: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field TypeUrl", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowApi + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthApi + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.TypeUrl = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ConfigPath", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowApi + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthApi + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ConfigPath = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipApi(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthApi + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func skipApi(dAtA []byte) (n int, err error) { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowApi + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + wireType := int(wire & 0x7) + switch wireType { + case 0: + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowApi + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + iNdEx++ + if dAtA[iNdEx-1] < 0x80 { + break + } + } + return iNdEx, nil + case 1: + iNdEx += 8 + return iNdEx, nil + case 2: + var length int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowApi + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + length |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + iNdEx += length + if length < 0 { + return 0, ErrInvalidLengthApi + } + return iNdEx, nil + case 3: + for { + var innerWire uint64 + var start int = iNdEx + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowApi + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + innerWire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + innerWireType := int(innerWire & 0x7) + if innerWireType == 4 { + break + } + next, err := skipApi(dAtA[start:]) + if err != nil { + return 0, err + } + iNdEx = start + next + } + return iNdEx, nil + case 4: + return iNdEx, nil + case 5: + iNdEx += 4 + return iNdEx, nil + default: + return 0, fmt.Errorf("proto: illegal wireType %d", wireType) + } + } + panic("unreachable") +} + +var ( + ErrInvalidLengthApi = fmt.Errorf("proto: negative length found during unmarshaling") + ErrIntOverflowApi = fmt.Errorf("proto: integer overflow") +) + +func init() { proto.RegisterFile("api.proto", fileDescriptorApi) } + +var fileDescriptorApi = []byte{ + // 183 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0xe2, 0x4c, 0x2c, 0xc8, 0xd4, + 0x2b, 0x28, 0xca, 0x2f, 0xc9, 0x17, 0x12, 0x4d, 0x2e, 0xca, 0xd4, 0x2b, 0x2a, 0xcd, 0x2b, 0xc9, + 0xcc, 0x4d, 0xcd, 0x2f, 0x28, 0xc9, 0xcc, 0xcf, 0x2b, 0xd6, 0x2b, 0x33, 0x94, 0xd2, 0x4d, 0xcf, + 0x2c, 0xc9, 0x28, 0x4d, 0xd2, 0x4b, 0xce, 0xcf, 0xd5, 0x4f, 0xcf, 0x4f, 0xcf, 0xd7, 0x07, 0xab, + 0x4e, 0x2a, 0x4d, 0x03, 0xf3, 0xc0, 0x1c, 0x30, 0x0b, 0x62, 0x8a, 0x92, 0x2b, 0x17, 0xbb, 0x3f, + 0x44, 0xb3, 0x90, 0x24, 0x17, 0x47, 0x49, 0x65, 0x41, 0x6a, 0x7c, 0x69, 0x51, 0x8e, 0x04, 0xa3, + 0x02, 0xa3, 0x06, 0x67, 0x10, 0x3b, 0x88, 0x1f, 0x5a, 0x94, 0x23, 0x24, 0xcf, 0xc5, 0x9d, 0x9c, + 0x9f, 0x97, 0x96, 0x99, 0x1e, 0x5f, 0x90, 0x58, 0x92, 0x21, 0xc1, 0x04, 0x96, 0xe5, 0x82, 0x08, + 0x05, 0x24, 0x96, 0x64, 0x38, 0xc9, 0x9c, 0x78, 0x28, 0xc7, 0x78, 0xe3, 0xa1, 0x1c, 0x43, 0xc3, + 0x23, 0x39, 0xc6, 0x13, 0x8f, 0xe4, 0x18, 0x2f, 0x3c, 0x92, 0x63, 0x7c, 0xf0, 0x48, 0x8e, 0x71, + 0xc2, 0x63, 0x39, 0x86, 0x24, 0x36, 0xb0, 0x5d, 0xc6, 0x80, 0x00, 0x00, 0x00, 0xff, 0xff, 0x07, + 0x00, 0xf2, 0x18, 0xbe, 0x00, 0x00, 0x00, +} diff --git a/vendor/github.com/containerd/cri/pkg/api/runtimeoptions/v1/api.proto b/vendor/github.com/containerd/cri/pkg/api/runtimeoptions/v1/api.proto new file mode 100644 index 000000000..f907d609c --- /dev/null +++ b/vendor/github.com/containerd/cri/pkg/api/runtimeoptions/v1/api.proto @@ -0,0 +1,22 @@ +// To regenerate api.pb.go run `make proto` +syntax = "proto3"; + +package cri.runtimeoptions.v1; + +import "github.com/gogo/protobuf/gogoproto/gogo.proto"; + +option (gogoproto.goproto_stringer_all) = false; +option (gogoproto.stringer_all) = true; +option (gogoproto.goproto_getters_all) = true; +option (gogoproto.marshaler_all) = true; +option (gogoproto.sizer_all) = true; +option (gogoproto.unmarshaler_all) = true; +option (gogoproto.goproto_unrecognized_all) = false; + +message Options { + // TypeUrl specifies the type of the content inside the config file. + string type_url = 1; + // ConfigPath specifies the filesystem location of the config file + // used by the runtime. + string config_path = 2; +} diff --git a/vendor/github.com/containerd/cri/vendor.conf b/vendor/github.com/containerd/cri/vendor.conf index c81758046..f59340b54 100644 --- a/vendor/github.com/containerd/cri/vendor.conf +++ b/vendor/github.com/containerd/cri/vendor.conf @@ -3,7 +3,7 @@ github.com/blang/semver v3.1.0 github.com/BurntSushi/toml a368813c5e648fee92e5f6c30e3944ff9d5e8895 github.com/containerd/cgroups 5e610833b72089b37d0e615de9a92dfc043757c2 github.com/containerd/console c12b1e7919c14469339a5d38f2f8ed9b64a9de23 -github.com/containerd/containerd 15f19d7a67fa322e6de0ef4c6a1bf9da0f056554 +github.com/containerd/containerd 6937c5a3ba8280edff9e9030767e3b0cb742581c github.com/containerd/continuity bd77b46c8352f74eb12c85bdc01f4b90f69d66b4 github.com/containerd/fifo 3d5202aec260678c48179c56f40e6f38a095738c github.com/containerd/go-cni 40bcf8ec8acd7372be1d77031d585d5d8e561c90 @@ -21,11 +21,9 @@ github.com/docker/go-metrics 4ea375f7759c82740c893fc030bc37088d2ec098 github.com/docker/go-units v0.3.1 github.com/docker/spdystream 449fdfce4d962303d702fec724ef0ad181c92528 github.com/emicklei/go-restful v2.2.1 -github.com/ghodss/yaml v1.0.0 github.com/godbus/dbus v3 github.com/gogo/googleapis 08a7655d27152912db7aaf4f983275eaf8d128ef github.com/gogo/protobuf v1.0.0 -github.com/golang/glog 44145f04b68cf362d9c4df2182967c2275eaefed github.com/golang/protobuf v1.1.0 github.com/google/gofuzz 44d81051d367757e1c7c6a5a86423ece9afcf63c github.com/grpc-ecosystem/go-grpc-prometheus v1.1 @@ -33,15 +31,15 @@ github.com/hashicorp/errwrap 7554cd9344cec97297fa6649b055a8c98c2a1e55 github.com/hashicorp/go-multierror ed905158d87462226a13fe39ddf685ea65f1c11f github.com/json-iterator/go 1.1.5 github.com/matttproud/golang_protobuf_extensions v1.0.0 -github.com/Microsoft/go-winio v0.4.10 -github.com/Microsoft/hcsshim v0.7.6 +github.com/Microsoft/go-winio v0.4.11 +github.com/Microsoft/hcsshim v0.8.2 github.com/modern-go/concurrent 1.0.3 github.com/modern-go/reflect2 1.0.1 github.com/opencontainers/go-digest c9281466c8b2f606084ac71339773efd177436e7 github.com/opencontainers/image-spec v1.0.1 -github.com/opencontainers/runc 00dc70017d222b178a002ed30e9321b12647af2d +github.com/opencontainers/runc v1.0.0-rc6 github.com/opencontainers/runtime-spec eba862dc2470385a233c7507392675cbeadf7353 -github.com/opencontainers/runtime-tools v0.6.0 +github.com/opencontainers/runtime-tools fb101d5d42ab9c040f7d0a004e78336e5d5cb197 github.com/opencontainers/selinux b6fa367ed7f534f9ba25391cc2d467085dbb445a github.com/pkg/errors v0.8.0 github.com/pmezard/go-difflib v1.0.0 @@ -50,7 +48,7 @@ github.com/prometheus/client_model 99fa1f4be8e564e8a6b613da7fa6f46c9edafc6c github.com/prometheus/common 89604d197083d4781071d3c65855d24ecfb0a563 github.com/prometheus/procfs cb4147076ac75738c9a7d279075a253c0cc5acbd github.com/seccomp/libseccomp-golang 32f571b70023028bd57d9288c20efbcb237f3ce0 -github.com/sirupsen/logrus v1.0.0 +github.com/sirupsen/logrus v1.0.3 github.com/stretchr/testify v1.1.4 github.com/syndtr/gocapability db04d3cc01c8b54962a58ec7e491717d06cfcc16 github.com/tchap/go-patricia v2.2.6 @@ -69,10 +67,12 @@ golang.org/x/time f51c12702a4d776e4c1fa9b0fabab841babae631 google.golang.org/genproto d80a6e20e776b0b17a324d0ba1ab50a39c8e8944 google.golang.org/grpc v1.12.0 gopkg.in/inf.v0 3887ee99ecf07df5b447e9b00d9c0b2adaa9f3e4 -gopkg.in/yaml.v2 53feefa2559fb8dfa8d81baad31be332c97d6c77 -k8s.io/api kubernetes-1.12.0 -k8s.io/apimachinery kubernetes-1.12.0 -k8s.io/apiserver kubernetes-1.12.0 -k8s.io/client-go kubernetes-1.12.0 -k8s.io/kubernetes v1.12.0 -k8s.io/utils cd34563cd63c2bd7c6fe88a73c4dcf34ed8a67cb +gopkg.in/yaml.v2 v2.2.1 +k8s.io/api kubernetes-1.13.0 +k8s.io/apimachinery kubernetes-1.13.0 +k8s.io/apiserver kubernetes-1.13.0 +k8s.io/client-go kubernetes-1.13.0 +k8s.io/klog 8139d8cb77af419532b33dfa7dd09fbc5f1d344f +k8s.io/kubernetes v1.13.0 +k8s.io/utils 0d26856f57b32ec3398579285e5c8a2bfe8c5243 +sigs.k8s.io/yaml v1.1.0 diff --git a/vendor/github.com/coreos/go-systemd/LICENSE b/vendor/github.com/coreos/go-systemd/LICENSE new file mode 100644 index 000000000..37ec93a14 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/LICENSE @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/coreos/go-systemd/README.md b/vendor/github.com/coreos/go-systemd/README.md new file mode 100644 index 000000000..cb87a1124 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/README.md @@ -0,0 +1,54 @@ +# go-systemd + +[![Build Status](https://travis-ci.org/coreos/go-systemd.png?branch=master)](https://travis-ci.org/coreos/go-systemd) +[![godoc](https://godoc.org/github.com/coreos/go-systemd?status.svg)](http://godoc.org/github.com/coreos/go-systemd) + +Go bindings to systemd. The project has several packages: + +- `activation` - for writing and using socket activation from Go +- `dbus` - for starting/stopping/inspecting running services and units +- `journal` - for writing to systemd's logging service, journald +- `sdjournal` - for reading from journald by wrapping its C API +- `machine1` - for registering machines/containers with systemd +- `unit` - for (de)serialization and comparison of unit files + +## Socket Activation + +An example HTTP server using socket activation can be quickly set up by following this README on a Linux machine running systemd: + +https://github.com/coreos/go-systemd/tree/master/examples/activation/httpserver + +## Journal + +Using the pure-Go `journal` package you can submit journal entries directly to systemd's journal, taking advantage of features like indexed key/value pairs for each log entry. +The `sdjournal` package provides read access to the journal by wrapping around journald's native C API; consequently it requires cgo and the journal headers to be available. + +## D-Bus + +The `dbus` package connects to the [systemd D-Bus API](http://www.freedesktop.org/wiki/Software/systemd/dbus/) and lets you start, stop and introspect systemd units. The API docs are here: + +http://godoc.org/github.com/coreos/go-systemd/dbus + +### Debugging + +Create `/etc/dbus-1/system-local.conf` that looks like this: + +``` + + + + + + + +``` + +## machined + +The `machine1` package allows interaction with the [systemd machined D-Bus API](http://www.freedesktop.org/wiki/Software/systemd/machined/). + +## Units + +The `unit` package provides various functions for working with [systemd unit files](http://www.freedesktop.org/software/systemd/man/systemd.unit.html). diff --git a/vendor/github.com/coreos/go-systemd/dbus/dbus.go b/vendor/github.com/coreos/go-systemd/dbus/dbus.go new file mode 100644 index 000000000..c1694fb52 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/dbus/dbus.go @@ -0,0 +1,213 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Integration with the systemd D-Bus API. See http://www.freedesktop.org/wiki/Software/systemd/dbus/ +package dbus + +import ( + "fmt" + "os" + "strconv" + "strings" + "sync" + + "github.com/godbus/dbus" +) + +const ( + alpha = `abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ` + num = `0123456789` + alphanum = alpha + num + signalBuffer = 100 +) + +// needsEscape checks whether a byte in a potential dbus ObjectPath needs to be escaped +func needsEscape(i int, b byte) bool { + // Escape everything that is not a-z-A-Z-0-9 + // Also escape 0-9 if it's the first character + return strings.IndexByte(alphanum, b) == -1 || + (i == 0 && strings.IndexByte(num, b) != -1) +} + +// PathBusEscape sanitizes a constituent string of a dbus ObjectPath using the +// rules that systemd uses for serializing special characters. +func PathBusEscape(path string) string { + // Special case the empty string + if len(path) == 0 { + return "_" + } + n := []byte{} + for i := 0; i < len(path); i++ { + c := path[i] + if needsEscape(i, c) { + e := fmt.Sprintf("_%x", c) + n = append(n, []byte(e)...) + } else { + n = append(n, c) + } + } + return string(n) +} + +// Conn is a connection to systemd's dbus endpoint. +type Conn struct { + // sysconn/sysobj are only used to call dbus methods + sysconn *dbus.Conn + sysobj dbus.BusObject + + // sigconn/sigobj are only used to receive dbus signals + sigconn *dbus.Conn + sigobj dbus.BusObject + + jobListener struct { + jobs map[dbus.ObjectPath]chan<- string + sync.Mutex + } + subscriber struct { + updateCh chan<- *SubStateUpdate + errCh chan<- error + sync.Mutex + ignore map[dbus.ObjectPath]int64 + cleanIgnore int64 + } +} + +// New establishes a connection to any available bus and authenticates. +// Callers should call Close() when done with the connection. +func New() (*Conn, error) { + conn, err := NewSystemConnection() + if err != nil && os.Geteuid() == 0 { + return NewSystemdConnection() + } + return conn, err +} + +// NewSystemConnection establishes a connection to the system bus and authenticates. +// Callers should call Close() when done with the connection +func NewSystemConnection() (*Conn, error) { + return NewConnection(func() (*dbus.Conn, error) { + return dbusAuthHelloConnection(dbus.SystemBusPrivate) + }) +} + +// NewUserConnection establishes a connection to the session bus and +// authenticates. This can be used to connect to systemd user instances. +// Callers should call Close() when done with the connection. +func NewUserConnection() (*Conn, error) { + return NewConnection(func() (*dbus.Conn, error) { + return dbusAuthHelloConnection(dbus.SessionBusPrivate) + }) +} + +// NewSystemdConnection establishes a private, direct connection to systemd. +// This can be used for communicating with systemd without a dbus daemon. +// Callers should call Close() when done with the connection. +func NewSystemdConnection() (*Conn, error) { + return NewConnection(func() (*dbus.Conn, error) { + // We skip Hello when talking directly to systemd. + return dbusAuthConnection(func() (*dbus.Conn, error) { + return dbus.Dial("unix:path=/run/systemd/private") + }) + }) +} + +// Close closes an established connection +func (c *Conn) Close() { + c.sysconn.Close() + c.sigconn.Close() +} + +// NewConnection establishes a connection to a bus using a caller-supplied function. +// This allows connecting to remote buses through a user-supplied mechanism. +// The supplied function may be called multiple times, and should return independent connections. +// The returned connection must be fully initialised: the org.freedesktop.DBus.Hello call must have succeeded, +// and any authentication should be handled by the function. +func NewConnection(dialBus func() (*dbus.Conn, error)) (*Conn, error) { + sysconn, err := dialBus() + if err != nil { + return nil, err + } + + sigconn, err := dialBus() + if err != nil { + sysconn.Close() + return nil, err + } + + c := &Conn{ + sysconn: sysconn, + sysobj: systemdObject(sysconn), + sigconn: sigconn, + sigobj: systemdObject(sigconn), + } + + c.subscriber.ignore = make(map[dbus.ObjectPath]int64) + c.jobListener.jobs = make(map[dbus.ObjectPath]chan<- string) + + // Setup the listeners on jobs so that we can get completions + c.sigconn.BusObject().Call("org.freedesktop.DBus.AddMatch", 0, + "type='signal', interface='org.freedesktop.systemd1.Manager', member='JobRemoved'") + + c.dispatch() + return c, nil +} + +// GetManagerProperty returns the value of a property on the org.freedesktop.systemd1.Manager +// interface. The value is returned in its string representation, as defined at +// https://developer.gnome.org/glib/unstable/gvariant-text.html +func (c *Conn) GetManagerProperty(prop string) (string, error) { + variant, err := c.sysobj.GetProperty("org.freedesktop.systemd1.Manager." + prop) + if err != nil { + return "", err + } + return variant.String(), nil +} + +func dbusAuthConnection(createBus func() (*dbus.Conn, error)) (*dbus.Conn, error) { + conn, err := createBus() + if err != nil { + return nil, err + } + + // Only use EXTERNAL method, and hardcode the uid (not username) + // to avoid a username lookup (which requires a dynamically linked + // libc) + methods := []dbus.Auth{dbus.AuthExternal(strconv.Itoa(os.Getuid()))} + + err = conn.Auth(methods) + if err != nil { + conn.Close() + return nil, err + } + + return conn, nil +} + +func dbusAuthHelloConnection(createBus func() (*dbus.Conn, error)) (*dbus.Conn, error) { + conn, err := dbusAuthConnection(createBus) + if err != nil { + return nil, err + } + + if err = conn.Hello(); err != nil { + conn.Close() + return nil, err + } + + return conn, nil +} + +func systemdObject(conn *dbus.Conn) dbus.BusObject { + return conn.Object("org.freedesktop.systemd1", dbus.ObjectPath("/org/freedesktop/systemd1")) +} diff --git a/vendor/github.com/coreos/go-systemd/dbus/methods.go b/vendor/github.com/coreos/go-systemd/dbus/methods.go new file mode 100644 index 000000000..ab17f7cc7 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/dbus/methods.go @@ -0,0 +1,565 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dbus + +import ( + "errors" + "path" + "strconv" + + "github.com/godbus/dbus" +) + +func (c *Conn) jobComplete(signal *dbus.Signal) { + var id uint32 + var job dbus.ObjectPath + var unit string + var result string + dbus.Store(signal.Body, &id, &job, &unit, &result) + c.jobListener.Lock() + out, ok := c.jobListener.jobs[job] + if ok { + out <- result + delete(c.jobListener.jobs, job) + } + c.jobListener.Unlock() +} + +func (c *Conn) startJob(ch chan<- string, job string, args ...interface{}) (int, error) { + if ch != nil { + c.jobListener.Lock() + defer c.jobListener.Unlock() + } + + var p dbus.ObjectPath + err := c.sysobj.Call(job, 0, args...).Store(&p) + if err != nil { + return 0, err + } + + if ch != nil { + c.jobListener.jobs[p] = ch + } + + // ignore error since 0 is fine if conversion fails + jobID, _ := strconv.Atoi(path.Base(string(p))) + + return jobID, nil +} + +// StartUnit enqueues a start job and depending jobs, if any (unless otherwise +// specified by the mode string). +// +// Takes the unit to activate, plus a mode string. The mode needs to be one of +// replace, fail, isolate, ignore-dependencies, ignore-requirements. If +// "replace" the call will start the unit and its dependencies, possibly +// replacing already queued jobs that conflict with this. If "fail" the call +// will start the unit and its dependencies, but will fail if this would change +// an already queued job. If "isolate" the call will start the unit in question +// and terminate all units that aren't dependencies of it. If +// "ignore-dependencies" it will start a unit but ignore all its dependencies. +// If "ignore-requirements" it will start a unit but only ignore the +// requirement dependencies. It is not recommended to make use of the latter +// two options. +// +// If the provided channel is non-nil, a result string will be sent to it upon +// job completion: one of done, canceled, timeout, failed, dependency, skipped. +// done indicates successful execution of a job. canceled indicates that a job +// has been canceled before it finished execution. timeout indicates that the +// job timeout was reached. failed indicates that the job failed. dependency +// indicates that a job this job has been depending on failed and the job hence +// has been removed too. skipped indicates that a job was skipped because it +// didn't apply to the units current state. +// +// If no error occurs, the ID of the underlying systemd job will be returned. There +// does exist the possibility for no error to be returned, but for the returned job +// ID to be 0. In this case, the actual underlying ID is not 0 and this datapoint +// should not be considered authoritative. +// +// If an error does occur, it will be returned to the user alongside a job ID of 0. +func (c *Conn) StartUnit(name string, mode string, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.StartUnit", name, mode) +} + +// StopUnit is similar to StartUnit but stops the specified unit rather +// than starting it. +func (c *Conn) StopUnit(name string, mode string, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.StopUnit", name, mode) +} + +// ReloadUnit reloads a unit. Reloading is done only if the unit is already running and fails otherwise. +func (c *Conn) ReloadUnit(name string, mode string, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.ReloadUnit", name, mode) +} + +// RestartUnit restarts a service. If a service is restarted that isn't +// running it will be started. +func (c *Conn) RestartUnit(name string, mode string, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.RestartUnit", name, mode) +} + +// TryRestartUnit is like RestartUnit, except that a service that isn't running +// is not affected by the restart. +func (c *Conn) TryRestartUnit(name string, mode string, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.TryRestartUnit", name, mode) +} + +// ReloadOrRestart attempts a reload if the unit supports it and use a restart +// otherwise. +func (c *Conn) ReloadOrRestartUnit(name string, mode string, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.ReloadOrRestartUnit", name, mode) +} + +// ReloadOrTryRestart attempts a reload if the unit supports it and use a "Try" +// flavored restart otherwise. +func (c *Conn) ReloadOrTryRestartUnit(name string, mode string, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.ReloadOrTryRestartUnit", name, mode) +} + +// StartTransientUnit() may be used to create and start a transient unit, which +// will be released as soon as it is not running or referenced anymore or the +// system is rebooted. name is the unit name including suffix, and must be +// unique. mode is the same as in StartUnit(), properties contains properties +// of the unit. +func (c *Conn) StartTransientUnit(name string, mode string, properties []Property, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.StartTransientUnit", name, mode, properties, make([]PropertyCollection, 0)) +} + +// KillUnit takes the unit name and a UNIX signal number to send. All of the unit's +// processes are killed. +func (c *Conn) KillUnit(name string, signal int32) { + c.sysobj.Call("org.freedesktop.systemd1.Manager.KillUnit", 0, name, "all", signal).Store() +} + +// ResetFailedUnit resets the "failed" state of a specific unit. +func (c *Conn) ResetFailedUnit(name string) error { + return c.sysobj.Call("org.freedesktop.systemd1.Manager.ResetFailedUnit", 0, name).Store() +} + +// getProperties takes the unit name and returns all of its dbus object properties, for the given dbus interface +func (c *Conn) getProperties(unit string, dbusInterface string) (map[string]interface{}, error) { + var err error + var props map[string]dbus.Variant + + path := unitPath(unit) + if !path.IsValid() { + return nil, errors.New("invalid unit name: " + unit) + } + + obj := c.sysconn.Object("org.freedesktop.systemd1", path) + err = obj.Call("org.freedesktop.DBus.Properties.GetAll", 0, dbusInterface).Store(&props) + if err != nil { + return nil, err + } + + out := make(map[string]interface{}, len(props)) + for k, v := range props { + out[k] = v.Value() + } + + return out, nil +} + +// GetUnitProperties takes the unit name and returns all of its dbus object properties. +func (c *Conn) GetUnitProperties(unit string) (map[string]interface{}, error) { + return c.getProperties(unit, "org.freedesktop.systemd1.Unit") +} + +func (c *Conn) getProperty(unit string, dbusInterface string, propertyName string) (*Property, error) { + var err error + var prop dbus.Variant + + path := unitPath(unit) + if !path.IsValid() { + return nil, errors.New("invalid unit name: " + unit) + } + + obj := c.sysconn.Object("org.freedesktop.systemd1", path) + err = obj.Call("org.freedesktop.DBus.Properties.Get", 0, dbusInterface, propertyName).Store(&prop) + if err != nil { + return nil, err + } + + return &Property{Name: propertyName, Value: prop}, nil +} + +func (c *Conn) GetUnitProperty(unit string, propertyName string) (*Property, error) { + return c.getProperty(unit, "org.freedesktop.systemd1.Unit", propertyName) +} + +// GetServiceProperty returns property for given service name and property name +func (c *Conn) GetServiceProperty(service string, propertyName string) (*Property, error) { + return c.getProperty(service, "org.freedesktop.systemd1.Service", propertyName) +} + +// GetUnitTypeProperties returns the extra properties for a unit, specific to the unit type. +// Valid values for unitType: Service, Socket, Target, Device, Mount, Automount, Snapshot, Timer, Swap, Path, Slice, Scope +// return "dbus.Error: Unknown interface" if the unitType is not the correct type of the unit +func (c *Conn) GetUnitTypeProperties(unit string, unitType string) (map[string]interface{}, error) { + return c.getProperties(unit, "org.freedesktop.systemd1."+unitType) +} + +// SetUnitProperties() may be used to modify certain unit properties at runtime. +// Not all properties may be changed at runtime, but many resource management +// settings (primarily those in systemd.cgroup(5)) may. The changes are applied +// instantly, and stored on disk for future boots, unless runtime is true, in which +// case the settings only apply until the next reboot. name is the name of the unit +// to modify. properties are the settings to set, encoded as an array of property +// name and value pairs. +func (c *Conn) SetUnitProperties(name string, runtime bool, properties ...Property) error { + return c.sysobj.Call("org.freedesktop.systemd1.Manager.SetUnitProperties", 0, name, runtime, properties).Store() +} + +func (c *Conn) GetUnitTypeProperty(unit string, unitType string, propertyName string) (*Property, error) { + return c.getProperty(unit, "org.freedesktop.systemd1."+unitType, propertyName) +} + +type UnitStatus struct { + Name string // The primary unit name as string + Description string // The human readable description string + LoadState string // The load state (i.e. whether the unit file has been loaded successfully) + ActiveState string // The active state (i.e. whether the unit is currently started or not) + SubState string // The sub state (a more fine-grained version of the active state that is specific to the unit type, which the active state is not) + Followed string // A unit that is being followed in its state by this unit, if there is any, otherwise the empty string. + Path dbus.ObjectPath // The unit object path + JobId uint32 // If there is a job queued for the job unit the numeric job id, 0 otherwise + JobType string // The job type as string + JobPath dbus.ObjectPath // The job object path +} + +type storeFunc func(retvalues ...interface{}) error + +func (c *Conn) listUnitsInternal(f storeFunc) ([]UnitStatus, error) { + result := make([][]interface{}, 0) + err := f(&result) + if err != nil { + return nil, err + } + + resultInterface := make([]interface{}, len(result)) + for i := range result { + resultInterface[i] = result[i] + } + + status := make([]UnitStatus, len(result)) + statusInterface := make([]interface{}, len(status)) + for i := range status { + statusInterface[i] = &status[i] + } + + err = dbus.Store(resultInterface, statusInterface...) + if err != nil { + return nil, err + } + + return status, nil +} + +// ListUnits returns an array with all currently loaded units. Note that +// units may be known by multiple names at the same time, and hence there might +// be more unit names loaded than actual units behind them. +func (c *Conn) ListUnits() ([]UnitStatus, error) { + return c.listUnitsInternal(c.sysobj.Call("org.freedesktop.systemd1.Manager.ListUnits", 0).Store) +} + +// ListUnitsFiltered returns an array with units filtered by state. +// It takes a list of units' statuses to filter. +func (c *Conn) ListUnitsFiltered(states []string) ([]UnitStatus, error) { + return c.listUnitsInternal(c.sysobj.Call("org.freedesktop.systemd1.Manager.ListUnitsFiltered", 0, states).Store) +} + +// ListUnitsByPatterns returns an array with units. +// It takes a list of units' statuses and names to filter. +// Note that units may be known by multiple names at the same time, +// and hence there might be more unit names loaded than actual units behind them. +func (c *Conn) ListUnitsByPatterns(states []string, patterns []string) ([]UnitStatus, error) { + return c.listUnitsInternal(c.sysobj.Call("org.freedesktop.systemd1.Manager.ListUnitsByPatterns", 0, states, patterns).Store) +} + +// ListUnitsByNames returns an array with units. It takes a list of units' +// names and returns an UnitStatus array. Comparing to ListUnitsByPatterns +// method, this method returns statuses even for inactive or non-existing +// units. Input array should contain exact unit names, but not patterns. +func (c *Conn) ListUnitsByNames(units []string) ([]UnitStatus, error) { + return c.listUnitsInternal(c.sysobj.Call("org.freedesktop.systemd1.Manager.ListUnitsByNames", 0, units).Store) +} + +type UnitFile struct { + Path string + Type string +} + +func (c *Conn) listUnitFilesInternal(f storeFunc) ([]UnitFile, error) { + result := make([][]interface{}, 0) + err := f(&result) + if err != nil { + return nil, err + } + + resultInterface := make([]interface{}, len(result)) + for i := range result { + resultInterface[i] = result[i] + } + + files := make([]UnitFile, len(result)) + fileInterface := make([]interface{}, len(files)) + for i := range files { + fileInterface[i] = &files[i] + } + + err = dbus.Store(resultInterface, fileInterface...) + if err != nil { + return nil, err + } + + return files, nil +} + +// ListUnitFiles returns an array of all available units on disk. +func (c *Conn) ListUnitFiles() ([]UnitFile, error) { + return c.listUnitFilesInternal(c.sysobj.Call("org.freedesktop.systemd1.Manager.ListUnitFiles", 0).Store) +} + +// ListUnitFilesByPatterns returns an array of all available units on disk matched the patterns. +func (c *Conn) ListUnitFilesByPatterns(states []string, patterns []string) ([]UnitFile, error) { + return c.listUnitFilesInternal(c.sysobj.Call("org.freedesktop.systemd1.Manager.ListUnitFilesByPatterns", 0, states, patterns).Store) +} + +type LinkUnitFileChange EnableUnitFileChange + +// LinkUnitFiles() links unit files (that are located outside of the +// usual unit search paths) into the unit search path. +// +// It takes a list of absolute paths to unit files to link and two +// booleans. The first boolean controls whether the unit shall be +// enabled for runtime only (true, /run), or persistently (false, +// /etc). +// The second controls whether symlinks pointing to other units shall +// be replaced if necessary. +// +// This call returns a list of the changes made. The list consists of +// structures with three strings: the type of the change (one of symlink +// or unlink), the file name of the symlink and the destination of the +// symlink. +func (c *Conn) LinkUnitFiles(files []string, runtime bool, force bool) ([]LinkUnitFileChange, error) { + result := make([][]interface{}, 0) + err := c.sysobj.Call("org.freedesktop.systemd1.Manager.LinkUnitFiles", 0, files, runtime, force).Store(&result) + if err != nil { + return nil, err + } + + resultInterface := make([]interface{}, len(result)) + for i := range result { + resultInterface[i] = result[i] + } + + changes := make([]LinkUnitFileChange, len(result)) + changesInterface := make([]interface{}, len(changes)) + for i := range changes { + changesInterface[i] = &changes[i] + } + + err = dbus.Store(resultInterface, changesInterface...) + if err != nil { + return nil, err + } + + return changes, nil +} + +// EnableUnitFiles() may be used to enable one or more units in the system (by +// creating symlinks to them in /etc or /run). +// +// It takes a list of unit files to enable (either just file names or full +// absolute paths if the unit files are residing outside the usual unit +// search paths), and two booleans: the first controls whether the unit shall +// be enabled for runtime only (true, /run), or persistently (false, /etc). +// The second one controls whether symlinks pointing to other units shall +// be replaced if necessary. +// +// This call returns one boolean and an array with the changes made. The +// boolean signals whether the unit files contained any enablement +// information (i.e. an [Install]) section. The changes list consists of +// structures with three strings: the type of the change (one of symlink +// or unlink), the file name of the symlink and the destination of the +// symlink. +func (c *Conn) EnableUnitFiles(files []string, runtime bool, force bool) (bool, []EnableUnitFileChange, error) { + var carries_install_info bool + + result := make([][]interface{}, 0) + err := c.sysobj.Call("org.freedesktop.systemd1.Manager.EnableUnitFiles", 0, files, runtime, force).Store(&carries_install_info, &result) + if err != nil { + return false, nil, err + } + + resultInterface := make([]interface{}, len(result)) + for i := range result { + resultInterface[i] = result[i] + } + + changes := make([]EnableUnitFileChange, len(result)) + changesInterface := make([]interface{}, len(changes)) + for i := range changes { + changesInterface[i] = &changes[i] + } + + err = dbus.Store(resultInterface, changesInterface...) + if err != nil { + return false, nil, err + } + + return carries_install_info, changes, nil +} + +type EnableUnitFileChange struct { + Type string // Type of the change (one of symlink or unlink) + Filename string // File name of the symlink + Destination string // Destination of the symlink +} + +// DisableUnitFiles() may be used to disable one or more units in the system (by +// removing symlinks to them from /etc or /run). +// +// It takes a list of unit files to disable (either just file names or full +// absolute paths if the unit files are residing outside the usual unit +// search paths), and one boolean: whether the unit was enabled for runtime +// only (true, /run), or persistently (false, /etc). +// +// This call returns an array with the changes made. The changes list +// consists of structures with three strings: the type of the change (one of +// symlink or unlink), the file name of the symlink and the destination of the +// symlink. +func (c *Conn) DisableUnitFiles(files []string, runtime bool) ([]DisableUnitFileChange, error) { + result := make([][]interface{}, 0) + err := c.sysobj.Call("org.freedesktop.systemd1.Manager.DisableUnitFiles", 0, files, runtime).Store(&result) + if err != nil { + return nil, err + } + + resultInterface := make([]interface{}, len(result)) + for i := range result { + resultInterface[i] = result[i] + } + + changes := make([]DisableUnitFileChange, len(result)) + changesInterface := make([]interface{}, len(changes)) + for i := range changes { + changesInterface[i] = &changes[i] + } + + err = dbus.Store(resultInterface, changesInterface...) + if err != nil { + return nil, err + } + + return changes, nil +} + +type DisableUnitFileChange struct { + Type string // Type of the change (one of symlink or unlink) + Filename string // File name of the symlink + Destination string // Destination of the symlink +} + +// MaskUnitFiles masks one or more units in the system +// +// It takes three arguments: +// * list of units to mask (either just file names or full +// absolute paths if the unit files are residing outside +// the usual unit search paths) +// * runtime to specify whether the unit was enabled for runtime +// only (true, /run/systemd/..), or persistently (false, /etc/systemd/..) +// * force flag +func (c *Conn) MaskUnitFiles(files []string, runtime bool, force bool) ([]MaskUnitFileChange, error) { + result := make([][]interface{}, 0) + err := c.sysobj.Call("org.freedesktop.systemd1.Manager.MaskUnitFiles", 0, files, runtime, force).Store(&result) + if err != nil { + return nil, err + } + + resultInterface := make([]interface{}, len(result)) + for i := range result { + resultInterface[i] = result[i] + } + + changes := make([]MaskUnitFileChange, len(result)) + changesInterface := make([]interface{}, len(changes)) + for i := range changes { + changesInterface[i] = &changes[i] + } + + err = dbus.Store(resultInterface, changesInterface...) + if err != nil { + return nil, err + } + + return changes, nil +} + +type MaskUnitFileChange struct { + Type string // Type of the change (one of symlink or unlink) + Filename string // File name of the symlink + Destination string // Destination of the symlink +} + +// UnmaskUnitFiles unmasks one or more units in the system +// +// It takes two arguments: +// * list of unit files to mask (either just file names or full +// absolute paths if the unit files are residing outside +// the usual unit search paths) +// * runtime to specify whether the unit was enabled for runtime +// only (true, /run/systemd/..), or persistently (false, /etc/systemd/..) +func (c *Conn) UnmaskUnitFiles(files []string, runtime bool) ([]UnmaskUnitFileChange, error) { + result := make([][]interface{}, 0) + err := c.sysobj.Call("org.freedesktop.systemd1.Manager.UnmaskUnitFiles", 0, files, runtime).Store(&result) + if err != nil { + return nil, err + } + + resultInterface := make([]interface{}, len(result)) + for i := range result { + resultInterface[i] = result[i] + } + + changes := make([]UnmaskUnitFileChange, len(result)) + changesInterface := make([]interface{}, len(changes)) + for i := range changes { + changesInterface[i] = &changes[i] + } + + err = dbus.Store(resultInterface, changesInterface...) + if err != nil { + return nil, err + } + + return changes, nil +} + +type UnmaskUnitFileChange struct { + Type string // Type of the change (one of symlink or unlink) + Filename string // File name of the symlink + Destination string // Destination of the symlink +} + +// Reload instructs systemd to scan for and reload unit files. This is +// equivalent to a 'systemctl daemon-reload'. +func (c *Conn) Reload() error { + return c.sysobj.Call("org.freedesktop.systemd1.Manager.Reload", 0).Store() +} + +func unitPath(name string) dbus.ObjectPath { + return dbus.ObjectPath("/org/freedesktop/systemd1/unit/" + PathBusEscape(name)) +} diff --git a/vendor/github.com/coreos/go-systemd/dbus/properties.go b/vendor/github.com/coreos/go-systemd/dbus/properties.go new file mode 100644 index 000000000..6c8189587 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/dbus/properties.go @@ -0,0 +1,237 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dbus + +import ( + "github.com/godbus/dbus" +) + +// From the systemd docs: +// +// The properties array of StartTransientUnit() may take many of the settings +// that may also be configured in unit files. Not all parameters are currently +// accepted though, but we plan to cover more properties with future release. +// Currently you may set the Description, Slice and all dependency types of +// units, as well as RemainAfterExit, ExecStart for service units, +// TimeoutStopUSec and PIDs for scope units, and CPUAccounting, CPUShares, +// BlockIOAccounting, BlockIOWeight, BlockIOReadBandwidth, +// BlockIOWriteBandwidth, BlockIODeviceWeight, MemoryAccounting, MemoryLimit, +// DevicePolicy, DeviceAllow for services/scopes/slices. These fields map +// directly to their counterparts in unit files and as normal D-Bus object +// properties. The exception here is the PIDs field of scope units which is +// used for construction of the scope only and specifies the initial PIDs to +// add to the scope object. + +type Property struct { + Name string + Value dbus.Variant +} + +type PropertyCollection struct { + Name string + Properties []Property +} + +type execStart struct { + Path string // the binary path to execute + Args []string // an array with all arguments to pass to the executed command, starting with argument 0 + UncleanIsFailure bool // a boolean whether it should be considered a failure if the process exits uncleanly +} + +// PropExecStart sets the ExecStart service property. The first argument is a +// slice with the binary path to execute followed by the arguments to pass to +// the executed command. See +// http://www.freedesktop.org/software/systemd/man/systemd.service.html#ExecStart= +func PropExecStart(command []string, uncleanIsFailure bool) Property { + execStarts := []execStart{ + execStart{ + Path: command[0], + Args: command, + UncleanIsFailure: uncleanIsFailure, + }, + } + + return Property{ + Name: "ExecStart", + Value: dbus.MakeVariant(execStarts), + } +} + +// PropRemainAfterExit sets the RemainAfterExit service property. See +// http://www.freedesktop.org/software/systemd/man/systemd.service.html#RemainAfterExit= +func PropRemainAfterExit(b bool) Property { + return Property{ + Name: "RemainAfterExit", + Value: dbus.MakeVariant(b), + } +} + +// PropType sets the Type service property. See +// http://www.freedesktop.org/software/systemd/man/systemd.service.html#Type= +func PropType(t string) Property { + return Property{ + Name: "Type", + Value: dbus.MakeVariant(t), + } +} + +// PropDescription sets the Description unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit#Description= +func PropDescription(desc string) Property { + return Property{ + Name: "Description", + Value: dbus.MakeVariant(desc), + } +} + +func propDependency(name string, units []string) Property { + return Property{ + Name: name, + Value: dbus.MakeVariant(units), + } +} + +// PropRequires sets the Requires unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Requires= +func PropRequires(units ...string) Property { + return propDependency("Requires", units) +} + +// PropRequiresOverridable sets the RequiresOverridable unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#RequiresOverridable= +func PropRequiresOverridable(units ...string) Property { + return propDependency("RequiresOverridable", units) +} + +// PropRequisite sets the Requisite unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Requisite= +func PropRequisite(units ...string) Property { + return propDependency("Requisite", units) +} + +// PropRequisiteOverridable sets the RequisiteOverridable unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#RequisiteOverridable= +func PropRequisiteOverridable(units ...string) Property { + return propDependency("RequisiteOverridable", units) +} + +// PropWants sets the Wants unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Wants= +func PropWants(units ...string) Property { + return propDependency("Wants", units) +} + +// PropBindsTo sets the BindsTo unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#BindsTo= +func PropBindsTo(units ...string) Property { + return propDependency("BindsTo", units) +} + +// PropRequiredBy sets the RequiredBy unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#RequiredBy= +func PropRequiredBy(units ...string) Property { + return propDependency("RequiredBy", units) +} + +// PropRequiredByOverridable sets the RequiredByOverridable unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#RequiredByOverridable= +func PropRequiredByOverridable(units ...string) Property { + return propDependency("RequiredByOverridable", units) +} + +// PropWantedBy sets the WantedBy unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#WantedBy= +func PropWantedBy(units ...string) Property { + return propDependency("WantedBy", units) +} + +// PropBoundBy sets the BoundBy unit property. See +// http://www.freedesktop.org/software/systemd/main/systemd.unit.html#BoundBy= +func PropBoundBy(units ...string) Property { + return propDependency("BoundBy", units) +} + +// PropConflicts sets the Conflicts unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Conflicts= +func PropConflicts(units ...string) Property { + return propDependency("Conflicts", units) +} + +// PropConflictedBy sets the ConflictedBy unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#ConflictedBy= +func PropConflictedBy(units ...string) Property { + return propDependency("ConflictedBy", units) +} + +// PropBefore sets the Before unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Before= +func PropBefore(units ...string) Property { + return propDependency("Before", units) +} + +// PropAfter sets the After unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#After= +func PropAfter(units ...string) Property { + return propDependency("After", units) +} + +// PropOnFailure sets the OnFailure unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#OnFailure= +func PropOnFailure(units ...string) Property { + return propDependency("OnFailure", units) +} + +// PropTriggers sets the Triggers unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Triggers= +func PropTriggers(units ...string) Property { + return propDependency("Triggers", units) +} + +// PropTriggeredBy sets the TriggeredBy unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#TriggeredBy= +func PropTriggeredBy(units ...string) Property { + return propDependency("TriggeredBy", units) +} + +// PropPropagatesReloadTo sets the PropagatesReloadTo unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#PropagatesReloadTo= +func PropPropagatesReloadTo(units ...string) Property { + return propDependency("PropagatesReloadTo", units) +} + +// PropRequiresMountsFor sets the RequiresMountsFor unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#RequiresMountsFor= +func PropRequiresMountsFor(units ...string) Property { + return propDependency("RequiresMountsFor", units) +} + +// PropSlice sets the Slice unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.resource-control.html#Slice= +func PropSlice(slice string) Property { + return Property{ + Name: "Slice", + Value: dbus.MakeVariant(slice), + } +} + +// PropPids sets the PIDs field of scope units used in the initial construction +// of the scope only and specifies the initial PIDs to add to the scope object. +// See https://www.freedesktop.org/wiki/Software/systemd/ControlGroupInterface/#properties +func PropPids(pids ...uint32) Property { + return Property{ + Name: "PIDs", + Value: dbus.MakeVariant(pids), + } +} diff --git a/vendor/github.com/coreos/go-systemd/dbus/set.go b/vendor/github.com/coreos/go-systemd/dbus/set.go new file mode 100644 index 000000000..f92e6fbed --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/dbus/set.go @@ -0,0 +1,47 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dbus + +type set struct { + data map[string]bool +} + +func (s *set) Add(value string) { + s.data[value] = true +} + +func (s *set) Remove(value string) { + delete(s.data, value) +} + +func (s *set) Contains(value string) (exists bool) { + _, exists = s.data[value] + return +} + +func (s *set) Length() int { + return len(s.data) +} + +func (s *set) Values() (values []string) { + for val, _ := range s.data { + values = append(values, val) + } + return +} + +func newSet() *set { + return &set{make(map[string]bool)} +} diff --git a/vendor/github.com/coreos/go-systemd/dbus/subscription.go b/vendor/github.com/coreos/go-systemd/dbus/subscription.go new file mode 100644 index 000000000..996451445 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/dbus/subscription.go @@ -0,0 +1,250 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dbus + +import ( + "errors" + "time" + + "github.com/godbus/dbus" +) + +const ( + cleanIgnoreInterval = int64(10 * time.Second) + ignoreInterval = int64(30 * time.Millisecond) +) + +// Subscribe sets up this connection to subscribe to all systemd dbus events. +// This is required before calling SubscribeUnits. When the connection closes +// systemd will automatically stop sending signals so there is no need to +// explicitly call Unsubscribe(). +func (c *Conn) Subscribe() error { + c.sigconn.BusObject().Call("org.freedesktop.DBus.AddMatch", 0, + "type='signal',interface='org.freedesktop.systemd1.Manager',member='UnitNew'") + c.sigconn.BusObject().Call("org.freedesktop.DBus.AddMatch", 0, + "type='signal',interface='org.freedesktop.DBus.Properties',member='PropertiesChanged'") + + err := c.sigobj.Call("org.freedesktop.systemd1.Manager.Subscribe", 0).Store() + if err != nil { + return err + } + + return nil +} + +// Unsubscribe this connection from systemd dbus events. +func (c *Conn) Unsubscribe() error { + err := c.sigobj.Call("org.freedesktop.systemd1.Manager.Unsubscribe", 0).Store() + if err != nil { + return err + } + + return nil +} + +func (c *Conn) dispatch() { + ch := make(chan *dbus.Signal, signalBuffer) + + c.sigconn.Signal(ch) + + go func() { + for { + signal, ok := <-ch + if !ok { + return + } + + if signal.Name == "org.freedesktop.systemd1.Manager.JobRemoved" { + c.jobComplete(signal) + } + + if c.subscriber.updateCh == nil { + continue + } + + var unitPath dbus.ObjectPath + switch signal.Name { + case "org.freedesktop.systemd1.Manager.JobRemoved": + unitName := signal.Body[2].(string) + c.sysobj.Call("org.freedesktop.systemd1.Manager.GetUnit", 0, unitName).Store(&unitPath) + case "org.freedesktop.systemd1.Manager.UnitNew": + unitPath = signal.Body[1].(dbus.ObjectPath) + case "org.freedesktop.DBus.Properties.PropertiesChanged": + if signal.Body[0].(string) == "org.freedesktop.systemd1.Unit" { + unitPath = signal.Path + } + } + + if unitPath == dbus.ObjectPath("") { + continue + } + + c.sendSubStateUpdate(unitPath) + } + }() +} + +// Returns two unbuffered channels which will receive all changed units every +// interval. Deleted units are sent as nil. +func (c *Conn) SubscribeUnits(interval time.Duration) (<-chan map[string]*UnitStatus, <-chan error) { + return c.SubscribeUnitsCustom(interval, 0, func(u1, u2 *UnitStatus) bool { return *u1 != *u2 }, nil) +} + +// SubscribeUnitsCustom is like SubscribeUnits but lets you specify the buffer +// size of the channels, the comparison function for detecting changes and a filter +// function for cutting down on the noise that your channel receives. +func (c *Conn) SubscribeUnitsCustom(interval time.Duration, buffer int, isChanged func(*UnitStatus, *UnitStatus) bool, filterUnit func(string) bool) (<-chan map[string]*UnitStatus, <-chan error) { + old := make(map[string]*UnitStatus) + statusChan := make(chan map[string]*UnitStatus, buffer) + errChan := make(chan error, buffer) + + go func() { + for { + timerChan := time.After(interval) + + units, err := c.ListUnits() + if err == nil { + cur := make(map[string]*UnitStatus) + for i := range units { + if filterUnit != nil && filterUnit(units[i].Name) { + continue + } + cur[units[i].Name] = &units[i] + } + + // add all new or changed units + changed := make(map[string]*UnitStatus) + for n, u := range cur { + if oldU, ok := old[n]; !ok || isChanged(oldU, u) { + changed[n] = u + } + delete(old, n) + } + + // add all deleted units + for oldN := range old { + changed[oldN] = nil + } + + old = cur + + if len(changed) != 0 { + statusChan <- changed + } + } else { + errChan <- err + } + + <-timerChan + } + }() + + return statusChan, errChan +} + +type SubStateUpdate struct { + UnitName string + SubState string +} + +// SetSubStateSubscriber writes to updateCh when any unit's substate changes. +// Although this writes to updateCh on every state change, the reported state +// may be more recent than the change that generated it (due to an unavoidable +// race in the systemd dbus interface). That is, this method provides a good +// way to keep a current view of all units' states, but is not guaranteed to +// show every state transition they go through. Furthermore, state changes +// will only be written to the channel with non-blocking writes. If updateCh +// is full, it attempts to write an error to errCh; if errCh is full, the error +// passes silently. +func (c *Conn) SetSubStateSubscriber(updateCh chan<- *SubStateUpdate, errCh chan<- error) { + c.subscriber.Lock() + defer c.subscriber.Unlock() + c.subscriber.updateCh = updateCh + c.subscriber.errCh = errCh +} + +func (c *Conn) sendSubStateUpdate(path dbus.ObjectPath) { + c.subscriber.Lock() + defer c.subscriber.Unlock() + + if c.shouldIgnore(path) { + return + } + + info, err := c.GetUnitProperties(string(path)) + if err != nil { + select { + case c.subscriber.errCh <- err: + default: + } + } + + name := info["Id"].(string) + substate := info["SubState"].(string) + + update := &SubStateUpdate{name, substate} + select { + case c.subscriber.updateCh <- update: + default: + select { + case c.subscriber.errCh <- errors.New("update channel full!"): + default: + } + } + + c.updateIgnore(path, info) +} + +// The ignore functions work around a wart in the systemd dbus interface. +// Requesting the properties of an unloaded unit will cause systemd to send a +// pair of UnitNew/UnitRemoved signals. Because we need to get a unit's +// properties on UnitNew (as that's the only indication of a new unit coming up +// for the first time), we would enter an infinite loop if we did not attempt +// to detect and ignore these spurious signals. The signal themselves are +// indistinguishable from relevant ones, so we (somewhat hackishly) ignore an +// unloaded unit's signals for a short time after requesting its properties. +// This means that we will miss e.g. a transient unit being restarted +// *immediately* upon failure and also a transient unit being started +// immediately after requesting its status (with systemctl status, for example, +// because this causes a UnitNew signal to be sent which then causes us to fetch +// the properties). + +func (c *Conn) shouldIgnore(path dbus.ObjectPath) bool { + t, ok := c.subscriber.ignore[path] + return ok && t >= time.Now().UnixNano() +} + +func (c *Conn) updateIgnore(path dbus.ObjectPath, info map[string]interface{}) { + c.cleanIgnore() + + // unit is unloaded - it will trigger bad systemd dbus behavior + if info["LoadState"].(string) == "not-found" { + c.subscriber.ignore[path] = time.Now().UnixNano() + ignoreInterval + } +} + +// without this, ignore would grow unboundedly over time +func (c *Conn) cleanIgnore() { + now := time.Now().UnixNano() + if c.subscriber.cleanIgnore < now { + c.subscriber.cleanIgnore = now + cleanIgnoreInterval + + for p, t := range c.subscriber.ignore { + if t < now { + delete(c.subscriber.ignore, p) + } + } + } +} diff --git a/vendor/github.com/coreos/go-systemd/dbus/subscription_set.go b/vendor/github.com/coreos/go-systemd/dbus/subscription_set.go new file mode 100644 index 000000000..5b408d584 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/dbus/subscription_set.go @@ -0,0 +1,57 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dbus + +import ( + "time" +) + +// SubscriptionSet returns a subscription set which is like conn.Subscribe but +// can filter to only return events for a set of units. +type SubscriptionSet struct { + *set + conn *Conn +} + +func (s *SubscriptionSet) filter(unit string) bool { + return !s.Contains(unit) +} + +// Subscribe starts listening for dbus events for all of the units in the set. +// Returns channels identical to conn.SubscribeUnits. +func (s *SubscriptionSet) Subscribe() (<-chan map[string]*UnitStatus, <-chan error) { + // TODO: Make fully evented by using systemd 209 with properties changed values + return s.conn.SubscribeUnitsCustom(time.Second, 0, + mismatchUnitStatus, + func(unit string) bool { return s.filter(unit) }, + ) +} + +// NewSubscriptionSet returns a new subscription set. +func (conn *Conn) NewSubscriptionSet() *SubscriptionSet { + return &SubscriptionSet{newSet(), conn} +} + +// mismatchUnitStatus returns true if the provided UnitStatus objects +// are not equivalent. false is returned if the objects are equivalent. +// Only the Name, Description and state-related fields are used in +// the comparison. +func mismatchUnitStatus(u1, u2 *UnitStatus) bool { + return u1.Name != u2.Name || + u1.Description != u2.Description || + u1.LoadState != u2.LoadState || + u1.ActiveState != u2.ActiveState || + u1.SubState != u2.SubState +} diff --git a/vendor/github.com/docker/go-units/LICENSE b/vendor/github.com/docker/go-units/LICENSE new file mode 100644 index 000000000..b55b37bc3 --- /dev/null +++ b/vendor/github.com/docker/go-units/LICENSE @@ -0,0 +1,191 @@ + + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2015 Docker, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/docker/go-units/README.md b/vendor/github.com/docker/go-units/README.md new file mode 100644 index 000000000..4f70a4e13 --- /dev/null +++ b/vendor/github.com/docker/go-units/README.md @@ -0,0 +1,16 @@ +[![GoDoc](https://godoc.org/github.com/docker/go-units?status.svg)](https://godoc.org/github.com/docker/go-units) + +# Introduction + +go-units is a library to transform human friendly measurements into machine friendly values. + +## Usage + +See the [docs in godoc](https://godoc.org/github.com/docker/go-units) for examples and documentation. + +## Copyright and license + +Copyright © 2015 Docker, Inc. + +go-units is licensed under the Apache License, Version 2.0. +See [LICENSE](LICENSE) for the full text of the license. diff --git a/vendor/github.com/docker/go-units/duration.go b/vendor/github.com/docker/go-units/duration.go new file mode 100644 index 000000000..c219a8a96 --- /dev/null +++ b/vendor/github.com/docker/go-units/duration.go @@ -0,0 +1,33 @@ +// Package units provides helper function to parse and print size and time units +// in human-readable format. +package units + +import ( + "fmt" + "time" +) + +// HumanDuration returns a human-readable approximation of a duration +// (eg. "About a minute", "4 hours ago", etc.). +func HumanDuration(d time.Duration) string { + if seconds := int(d.Seconds()); seconds < 1 { + return "Less than a second" + } else if seconds < 60 { + return fmt.Sprintf("%d seconds", seconds) + } else if minutes := int(d.Minutes()); minutes == 1 { + return "About a minute" + } else if minutes < 60 { + return fmt.Sprintf("%d minutes", minutes) + } else if hours := int(d.Hours()); hours == 1 { + return "About an hour" + } else if hours < 48 { + return fmt.Sprintf("%d hours", hours) + } else if hours < 24*7*2 { + return fmt.Sprintf("%d days", hours/24) + } else if hours < 24*30*3 { + return fmt.Sprintf("%d weeks", hours/24/7) + } else if hours < 24*365*2 { + return fmt.Sprintf("%d months", hours/24/30) + } + return fmt.Sprintf("%d years", int(d.Hours())/24/365) +} diff --git a/vendor/github.com/docker/go-units/size.go b/vendor/github.com/docker/go-units/size.go new file mode 100644 index 000000000..f5b82ea24 --- /dev/null +++ b/vendor/github.com/docker/go-units/size.go @@ -0,0 +1,96 @@ +package units + +import ( + "fmt" + "regexp" + "strconv" + "strings" +) + +// See: http://en.wikipedia.org/wiki/Binary_prefix +const ( + // Decimal + + KB = 1000 + MB = 1000 * KB + GB = 1000 * MB + TB = 1000 * GB + PB = 1000 * TB + + // Binary + + KiB = 1024 + MiB = 1024 * KiB + GiB = 1024 * MiB + TiB = 1024 * GiB + PiB = 1024 * TiB +) + +type unitMap map[string]int64 + +var ( + decimalMap = unitMap{"k": KB, "m": MB, "g": GB, "t": TB, "p": PB} + binaryMap = unitMap{"k": KiB, "m": MiB, "g": GiB, "t": TiB, "p": PiB} + sizeRegex = regexp.MustCompile(`^(\d+(\.\d+)*) ?([kKmMgGtTpP])?[bB]?$`) +) + +var decimapAbbrs = []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"} +var binaryAbbrs = []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"} + +// CustomSize returns a human-readable approximation of a size +// using custom format. +func CustomSize(format string, size float64, base float64, _map []string) string { + i := 0 + unitsLimit := len(_map) - 1 + for size >= base && i < unitsLimit { + size = size / base + i++ + } + return fmt.Sprintf(format, size, _map[i]) +} + +// HumanSize returns a human-readable approximation of a size +// capped at 4 valid numbers (eg. "2.746 MB", "796 KB"). +func HumanSize(size float64) string { + return CustomSize("%.4g %s", size, 1000.0, decimapAbbrs) +} + +// BytesSize returns a human-readable size in bytes, kibibytes, +// mebibytes, gibibytes, or tebibytes (eg. "44kiB", "17MiB"). +func BytesSize(size float64) string { + return CustomSize("%.4g %s", size, 1024.0, binaryAbbrs) +} + +// FromHumanSize returns an integer from a human-readable specification of a +// size using SI standard (eg. "44kB", "17MB"). +func FromHumanSize(size string) (int64, error) { + return parseSize(size, decimalMap) +} + +// RAMInBytes parses a human-readable string representing an amount of RAM +// in bytes, kibibytes, mebibytes, gibibytes, or tebibytes and +// returns the number of bytes, or -1 if the string is unparseable. +// Units are case-insensitive, and the 'b' suffix is optional. +func RAMInBytes(size string) (int64, error) { + return parseSize(size, binaryMap) +} + +// Parses the human-readable size string into the amount it represents. +func parseSize(sizeStr string, uMap unitMap) (int64, error) { + matches := sizeRegex.FindStringSubmatch(sizeStr) + if len(matches) != 4 { + return -1, fmt.Errorf("invalid size: '%s'", sizeStr) + } + + size, err := strconv.ParseFloat(matches[1], 64) + if err != nil { + return -1, err + } + + unitPrefix := strings.ToLower(matches[3]) + if mul, ok := uMap[unitPrefix]; ok { + size *= float64(mul) + } + + return int64(size), nil +} diff --git a/vendor/github.com/docker/go-units/ulimit.go b/vendor/github.com/docker/go-units/ulimit.go new file mode 100644 index 000000000..5ac7fd825 --- /dev/null +++ b/vendor/github.com/docker/go-units/ulimit.go @@ -0,0 +1,118 @@ +package units + +import ( + "fmt" + "strconv" + "strings" +) + +// Ulimit is a human friendly version of Rlimit. +type Ulimit struct { + Name string + Hard int64 + Soft int64 +} + +// Rlimit specifies the resource limits, such as max open files. +type Rlimit struct { + Type int `json:"type,omitempty"` + Hard uint64 `json:"hard,omitempty"` + Soft uint64 `json:"soft,omitempty"` +} + +const ( + // magic numbers for making the syscall + // some of these are defined in the syscall package, but not all. + // Also since Windows client doesn't get access to the syscall package, need to + // define these here + rlimitAs = 9 + rlimitCore = 4 + rlimitCPU = 0 + rlimitData = 2 + rlimitFsize = 1 + rlimitLocks = 10 + rlimitMemlock = 8 + rlimitMsgqueue = 12 + rlimitNice = 13 + rlimitNofile = 7 + rlimitNproc = 6 + rlimitRss = 5 + rlimitRtprio = 14 + rlimitRttime = 15 + rlimitSigpending = 11 + rlimitStack = 3 +) + +var ulimitNameMapping = map[string]int{ + //"as": rlimitAs, // Disabled since this doesn't seem usable with the way Docker inits a container. + "core": rlimitCore, + "cpu": rlimitCPU, + "data": rlimitData, + "fsize": rlimitFsize, + "locks": rlimitLocks, + "memlock": rlimitMemlock, + "msgqueue": rlimitMsgqueue, + "nice": rlimitNice, + "nofile": rlimitNofile, + "nproc": rlimitNproc, + "rss": rlimitRss, + "rtprio": rlimitRtprio, + "rttime": rlimitRttime, + "sigpending": rlimitSigpending, + "stack": rlimitStack, +} + +// ParseUlimit parses and returns a Ulimit from the specified string. +func ParseUlimit(val string) (*Ulimit, error) { + parts := strings.SplitN(val, "=", 2) + if len(parts) != 2 { + return nil, fmt.Errorf("invalid ulimit argument: %s", val) + } + + if _, exists := ulimitNameMapping[parts[0]]; !exists { + return nil, fmt.Errorf("invalid ulimit type: %s", parts[0]) + } + + var ( + soft int64 + hard = &soft // default to soft in case no hard was set + temp int64 + err error + ) + switch limitVals := strings.Split(parts[1], ":"); len(limitVals) { + case 2: + temp, err = strconv.ParseInt(limitVals[1], 10, 64) + if err != nil { + return nil, err + } + hard = &temp + fallthrough + case 1: + soft, err = strconv.ParseInt(limitVals[0], 10, 64) + if err != nil { + return nil, err + } + default: + return nil, fmt.Errorf("too many limit value arguments - %s, can only have up to two, `soft[:hard]`", parts[1]) + } + + if soft > *hard { + return nil, fmt.Errorf("ulimit soft limit must be less than or equal to hard limit: %d > %d", soft, *hard) + } + + return &Ulimit{Name: parts[0], Soft: soft, Hard: *hard}, nil +} + +// GetRlimit returns the RLimit corresponding to Ulimit. +func (u *Ulimit) GetRlimit() (*Rlimit, error) { + t, exists := ulimitNameMapping[u.Name] + if !exists { + return nil, fmt.Errorf("invalid ulimit name %s", u.Name) + } + + return &Rlimit{Type: t, Soft: uint64(u.Soft), Hard: uint64(u.Hard)}, nil +} + +func (u *Ulimit) String() string { + return fmt.Sprintf("%s=%d:%d", u.Name, u.Soft, u.Hard) +} diff --git a/vendor/github.com/godbus/dbus/LICENSE b/vendor/github.com/godbus/dbus/LICENSE new file mode 100644 index 000000000..670d88fca --- /dev/null +++ b/vendor/github.com/godbus/dbus/LICENSE @@ -0,0 +1,25 @@ +Copyright (c) 2013, Georg Reinke (), Google +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/godbus/dbus/README.markdown b/vendor/github.com/godbus/dbus/README.markdown new file mode 100644 index 000000000..0a6e7e5bf --- /dev/null +++ b/vendor/github.com/godbus/dbus/README.markdown @@ -0,0 +1,41 @@ +dbus +---- + +dbus is a simple library that implements native Go client bindings for the +D-Bus message bus system. + +### Features + +* Complete native implementation of the D-Bus message protocol +* Go-like API (channels for signals / asynchronous method calls, Goroutine-safe connections) +* Subpackages that help with the introspection / property interfaces + +### Installation + +This packages requires Go 1.1. If you installed it and set up your GOPATH, just run: + +``` +go get github.com/godbus/dbus +``` + +If you want to use the subpackages, you can install them the same way. + +### Usage + +The complete package documentation and some simple examples are available at +[godoc.org](http://godoc.org/github.com/godbus/dbus). Also, the +[_examples](https://github.com/godbus/dbus/tree/master/_examples) directory +gives a short overview over the basic usage. + +#### Projects using godbus +- [notify](https://github.com/esiqveland/notify) provides desktop notifications over dbus into a library. + +Please note that the API is considered unstable for now and may change without +further notice. + +### License + +go.dbus is available under the Simplified BSD License; see LICENSE for the full +text. + +Nearly all of the credit for this library goes to github.com/guelfey/go.dbus. diff --git a/vendor/github.com/godbus/dbus/auth.go b/vendor/github.com/godbus/dbus/auth.go new file mode 100644 index 000000000..98017b693 --- /dev/null +++ b/vendor/github.com/godbus/dbus/auth.go @@ -0,0 +1,253 @@ +package dbus + +import ( + "bufio" + "bytes" + "errors" + "io" + "os" + "strconv" +) + +// AuthStatus represents the Status of an authentication mechanism. +type AuthStatus byte + +const ( + // AuthOk signals that authentication is finished; the next command + // from the server should be an OK. + AuthOk AuthStatus = iota + + // AuthContinue signals that additional data is needed; the next command + // from the server should be a DATA. + AuthContinue + + // AuthError signals an error; the server sent invalid data or some + // other unexpected thing happened and the current authentication + // process should be aborted. + AuthError +) + +type authState byte + +const ( + waitingForData authState = iota + waitingForOk + waitingForReject +) + +// Auth defines the behaviour of an authentication mechanism. +type Auth interface { + // Return the name of the mechnism, the argument to the first AUTH command + // and the next status. + FirstData() (name, resp []byte, status AuthStatus) + + // Process the given DATA command, and return the argument to the DATA + // command and the next status. If len(resp) == 0, no DATA command is sent. + HandleData(data []byte) (resp []byte, status AuthStatus) +} + +// Auth authenticates the connection, trying the given list of authentication +// mechanisms (in that order). If nil is passed, the EXTERNAL and +// DBUS_COOKIE_SHA1 mechanisms are tried for the current user. For private +// connections, this method must be called before sending any messages to the +// bus. Auth must not be called on shared connections. +func (conn *Conn) Auth(methods []Auth) error { + if methods == nil { + uid := strconv.Itoa(os.Getuid()) + methods = []Auth{AuthExternal(uid), AuthCookieSha1(uid, getHomeDir())} + } + in := bufio.NewReader(conn.transport) + err := conn.transport.SendNullByte() + if err != nil { + return err + } + err = authWriteLine(conn.transport, []byte("AUTH")) + if err != nil { + return err + } + s, err := authReadLine(in) + if err != nil { + return err + } + if len(s) < 2 || !bytes.Equal(s[0], []byte("REJECTED")) { + return errors.New("dbus: authentication protocol error") + } + s = s[1:] + for _, v := range s { + for _, m := range methods { + if name, data, status := m.FirstData(); bytes.Equal(v, name) { + var ok bool + err = authWriteLine(conn.transport, []byte("AUTH"), []byte(v), data) + if err != nil { + return err + } + switch status { + case AuthOk: + err, ok = conn.tryAuth(m, waitingForOk, in) + case AuthContinue: + err, ok = conn.tryAuth(m, waitingForData, in) + default: + panic("dbus: invalid authentication status") + } + if err != nil { + return err + } + if ok { + if conn.transport.SupportsUnixFDs() { + err = authWriteLine(conn, []byte("NEGOTIATE_UNIX_FD")) + if err != nil { + return err + } + line, err := authReadLine(in) + if err != nil { + return err + } + switch { + case bytes.Equal(line[0], []byte("AGREE_UNIX_FD")): + conn.EnableUnixFDs() + conn.unixFD = true + case bytes.Equal(line[0], []byte("ERROR")): + default: + return errors.New("dbus: authentication protocol error") + } + } + err = authWriteLine(conn.transport, []byte("BEGIN")) + if err != nil { + return err + } + go conn.inWorker() + go conn.outWorker() + return nil + } + } + } + } + return errors.New("dbus: authentication failed") +} + +// tryAuth tries to authenticate with m as the mechanism, using state as the +// initial authState and in for reading input. It returns (nil, true) on +// success, (nil, false) on a REJECTED and (someErr, false) if some other +// error occured. +func (conn *Conn) tryAuth(m Auth, state authState, in *bufio.Reader) (error, bool) { + for { + s, err := authReadLine(in) + if err != nil { + return err, false + } + switch { + case state == waitingForData && string(s[0]) == "DATA": + if len(s) != 2 { + err = authWriteLine(conn.transport, []byte("ERROR")) + if err != nil { + return err, false + } + continue + } + data, status := m.HandleData(s[1]) + switch status { + case AuthOk, AuthContinue: + if len(data) != 0 { + err = authWriteLine(conn.transport, []byte("DATA"), data) + if err != nil { + return err, false + } + } + if status == AuthOk { + state = waitingForOk + } + case AuthError: + err = authWriteLine(conn.transport, []byte("ERROR")) + if err != nil { + return err, false + } + } + case state == waitingForData && string(s[0]) == "REJECTED": + return nil, false + case state == waitingForData && string(s[0]) == "ERROR": + err = authWriteLine(conn.transport, []byte("CANCEL")) + if err != nil { + return err, false + } + state = waitingForReject + case state == waitingForData && string(s[0]) == "OK": + if len(s) != 2 { + err = authWriteLine(conn.transport, []byte("CANCEL")) + if err != nil { + return err, false + } + state = waitingForReject + } + conn.uuid = string(s[1]) + return nil, true + case state == waitingForData: + err = authWriteLine(conn.transport, []byte("ERROR")) + if err != nil { + return err, false + } + case state == waitingForOk && string(s[0]) == "OK": + if len(s) != 2 { + err = authWriteLine(conn.transport, []byte("CANCEL")) + if err != nil { + return err, false + } + state = waitingForReject + } + conn.uuid = string(s[1]) + return nil, true + case state == waitingForOk && string(s[0]) == "REJECTED": + return nil, false + case state == waitingForOk && (string(s[0]) == "DATA" || + string(s[0]) == "ERROR"): + + err = authWriteLine(conn.transport, []byte("CANCEL")) + if err != nil { + return err, false + } + state = waitingForReject + case state == waitingForOk: + err = authWriteLine(conn.transport, []byte("ERROR")) + if err != nil { + return err, false + } + case state == waitingForReject && string(s[0]) == "REJECTED": + return nil, false + case state == waitingForReject: + return errors.New("dbus: authentication protocol error"), false + default: + panic("dbus: invalid auth state") + } + } +} + +// authReadLine reads a line and separates it into its fields. +func authReadLine(in *bufio.Reader) ([][]byte, error) { + data, err := in.ReadBytes('\n') + if err != nil { + return nil, err + } + data = bytes.TrimSuffix(data, []byte("\r\n")) + return bytes.Split(data, []byte{' '}), nil +} + +// authWriteLine writes the given line in the authentication protocol format +// (elements of data separated by a " " and terminated by "\r\n"). +func authWriteLine(out io.Writer, data ...[]byte) error { + buf := make([]byte, 0) + for i, v := range data { + buf = append(buf, v...) + if i != len(data)-1 { + buf = append(buf, ' ') + } + } + buf = append(buf, '\r') + buf = append(buf, '\n') + n, err := out.Write(buf) + if err != nil { + return err + } + if n != len(buf) { + return io.ErrUnexpectedEOF + } + return nil +} diff --git a/vendor/github.com/godbus/dbus/auth_external.go b/vendor/github.com/godbus/dbus/auth_external.go new file mode 100644 index 000000000..7e376d3ef --- /dev/null +++ b/vendor/github.com/godbus/dbus/auth_external.go @@ -0,0 +1,26 @@ +package dbus + +import ( + "encoding/hex" +) + +// AuthExternal returns an Auth that authenticates as the given user with the +// EXTERNAL mechanism. +func AuthExternal(user string) Auth { + return authExternal{user} +} + +// AuthExternal implements the EXTERNAL authentication mechanism. +type authExternal struct { + user string +} + +func (a authExternal) FirstData() ([]byte, []byte, AuthStatus) { + b := make([]byte, 2*len(a.user)) + hex.Encode(b, []byte(a.user)) + return []byte("EXTERNAL"), b, AuthOk +} + +func (a authExternal) HandleData(b []byte) ([]byte, AuthStatus) { + return nil, AuthError +} diff --git a/vendor/github.com/godbus/dbus/auth_sha1.go b/vendor/github.com/godbus/dbus/auth_sha1.go new file mode 100644 index 000000000..df15b4611 --- /dev/null +++ b/vendor/github.com/godbus/dbus/auth_sha1.go @@ -0,0 +1,102 @@ +package dbus + +import ( + "bufio" + "bytes" + "crypto/rand" + "crypto/sha1" + "encoding/hex" + "os" +) + +// AuthCookieSha1 returns an Auth that authenticates as the given user with the +// DBUS_COOKIE_SHA1 mechanism. The home parameter should specify the home +// directory of the user. +func AuthCookieSha1(user, home string) Auth { + return authCookieSha1{user, home} +} + +type authCookieSha1 struct { + user, home string +} + +func (a authCookieSha1) FirstData() ([]byte, []byte, AuthStatus) { + b := make([]byte, 2*len(a.user)) + hex.Encode(b, []byte(a.user)) + return []byte("DBUS_COOKIE_SHA1"), b, AuthContinue +} + +func (a authCookieSha1) HandleData(data []byte) ([]byte, AuthStatus) { + challenge := make([]byte, len(data)/2) + _, err := hex.Decode(challenge, data) + if err != nil { + return nil, AuthError + } + b := bytes.Split(challenge, []byte{' '}) + if len(b) != 3 { + return nil, AuthError + } + context := b[0] + id := b[1] + svchallenge := b[2] + cookie := a.getCookie(context, id) + if cookie == nil { + return nil, AuthError + } + clchallenge := a.generateChallenge() + if clchallenge == nil { + return nil, AuthError + } + hash := sha1.New() + hash.Write(bytes.Join([][]byte{svchallenge, clchallenge, cookie}, []byte{':'})) + hexhash := make([]byte, 2*hash.Size()) + hex.Encode(hexhash, hash.Sum(nil)) + data = append(clchallenge, ' ') + data = append(data, hexhash...) + resp := make([]byte, 2*len(data)) + hex.Encode(resp, data) + return resp, AuthOk +} + +// getCookie searches for the cookie identified by id in context and returns +// the cookie content or nil. (Since HandleData can't return a specific error, +// but only whether an error occured, this function also doesn't bother to +// return an error.) +func (a authCookieSha1) getCookie(context, id []byte) []byte { + file, err := os.Open(a.home + "/.dbus-keyrings/" + string(context)) + if err != nil { + return nil + } + defer file.Close() + rd := bufio.NewReader(file) + for { + line, err := rd.ReadBytes('\n') + if err != nil { + return nil + } + line = line[:len(line)-1] + b := bytes.Split(line, []byte{' '}) + if len(b) != 3 { + return nil + } + if bytes.Equal(b[0], id) { + return b[2] + } + } +} + +// generateChallenge returns a random, hex-encoded challenge, or nil on error +// (see above). +func (a authCookieSha1) generateChallenge() []byte { + b := make([]byte, 16) + n, err := rand.Read(b) + if err != nil { + return nil + } + if n != 16 { + return nil + } + enc := make([]byte, 32) + hex.Encode(enc, b) + return enc +} diff --git a/vendor/github.com/godbus/dbus/call.go b/vendor/github.com/godbus/dbus/call.go new file mode 100644 index 000000000..ba6e73f60 --- /dev/null +++ b/vendor/github.com/godbus/dbus/call.go @@ -0,0 +1,36 @@ +package dbus + +import ( + "errors" +) + +// Call represents a pending or completed method call. +type Call struct { + Destination string + Path ObjectPath + Method string + Args []interface{} + + // Strobes when the call is complete. + Done chan *Call + + // After completion, the error status. If this is non-nil, it may be an + // error message from the peer (with Error as its type) or some other error. + Err error + + // Holds the response once the call is done. + Body []interface{} +} + +var errSignature = errors.New("dbus: mismatched signature") + +// Store stores the body of the reply into the provided pointers. It returns +// an error if the signatures of the body and retvalues don't match, or if +// the error status is not nil. +func (c *Call) Store(retvalues ...interface{}) error { + if c.Err != nil { + return c.Err + } + + return Store(c.Body, retvalues...) +} diff --git a/vendor/github.com/godbus/dbus/conn.go b/vendor/github.com/godbus/dbus/conn.go new file mode 100644 index 000000000..a4f539401 --- /dev/null +++ b/vendor/github.com/godbus/dbus/conn.go @@ -0,0 +1,625 @@ +package dbus + +import ( + "errors" + "io" + "os" + "reflect" + "strings" + "sync" +) + +const defaultSystemBusAddress = "unix:path=/var/run/dbus/system_bus_socket" + +var ( + systemBus *Conn + systemBusLck sync.Mutex + sessionBus *Conn + sessionBusLck sync.Mutex +) + +// ErrClosed is the error returned by calls on a closed connection. +var ErrClosed = errors.New("dbus: connection closed by user") + +// Conn represents a connection to a message bus (usually, the system or +// session bus). +// +// Connections are either shared or private. Shared connections +// are shared between calls to the functions that return them. As a result, +// the methods Close, Auth and Hello must not be called on them. +// +// Multiple goroutines may invoke methods on a connection simultaneously. +type Conn struct { + transport + + busObj BusObject + unixFD bool + uuid string + + names []string + namesLck sync.RWMutex + + serialLck sync.Mutex + nextSerial uint32 + serialUsed map[uint32]bool + + calls map[uint32]*Call + callsLck sync.RWMutex + + handlers map[ObjectPath]map[string]exportWithMapping + handlersLck sync.RWMutex + + out chan *Message + closed bool + outLck sync.RWMutex + + signals []chan<- *Signal + signalsLck sync.Mutex + + eavesdropped chan<- *Message + eavesdroppedLck sync.Mutex +} + +// SessionBus returns a shared connection to the session bus, connecting to it +// if not already done. +func SessionBus() (conn *Conn, err error) { + sessionBusLck.Lock() + defer sessionBusLck.Unlock() + if sessionBus != nil { + return sessionBus, nil + } + defer func() { + if conn != nil { + sessionBus = conn + } + }() + conn, err = SessionBusPrivate() + if err != nil { + return + } + if err = conn.Auth(nil); err != nil { + conn.Close() + conn = nil + return + } + if err = conn.Hello(); err != nil { + conn.Close() + conn = nil + } + return +} + +// SessionBusPrivate returns a new private connection to the session bus. +func SessionBusPrivate() (*Conn, error) { + address := os.Getenv("DBUS_SESSION_BUS_ADDRESS") + if address != "" && address != "autolaunch:" { + return Dial(address) + } + + return sessionBusPlatform() +} + +// SystemBus returns a shared connection to the system bus, connecting to it if +// not already done. +func SystemBus() (conn *Conn, err error) { + systemBusLck.Lock() + defer systemBusLck.Unlock() + if systemBus != nil { + return systemBus, nil + } + defer func() { + if conn != nil { + systemBus = conn + } + }() + conn, err = SystemBusPrivate() + if err != nil { + return + } + if err = conn.Auth(nil); err != nil { + conn.Close() + conn = nil + return + } + if err = conn.Hello(); err != nil { + conn.Close() + conn = nil + } + return +} + +// SystemBusPrivate returns a new private connection to the system bus. +func SystemBusPrivate() (*Conn, error) { + address := os.Getenv("DBUS_SYSTEM_BUS_ADDRESS") + if address != "" { + return Dial(address) + } + return Dial(defaultSystemBusAddress) +} + +// Dial establishes a new private connection to the message bus specified by address. +func Dial(address string) (*Conn, error) { + tr, err := getTransport(address) + if err != nil { + return nil, err + } + return newConn(tr) +} + +// NewConn creates a new private *Conn from an already established connection. +func NewConn(conn io.ReadWriteCloser) (*Conn, error) { + return newConn(genericTransport{conn}) +} + +// newConn creates a new *Conn from a transport. +func newConn(tr transport) (*Conn, error) { + conn := new(Conn) + conn.transport = tr + conn.calls = make(map[uint32]*Call) + conn.out = make(chan *Message, 10) + conn.handlers = make(map[ObjectPath]map[string]exportWithMapping) + conn.nextSerial = 1 + conn.serialUsed = map[uint32]bool{0: true} + conn.busObj = conn.Object("org.freedesktop.DBus", "/org/freedesktop/DBus") + return conn, nil +} + +// BusObject returns the object owned by the bus daemon which handles +// administrative requests. +func (conn *Conn) BusObject() BusObject { + return conn.busObj +} + +// Close closes the connection. Any blocked operations will return with errors +// and the channels passed to Eavesdrop and Signal are closed. This method must +// not be called on shared connections. +func (conn *Conn) Close() error { + conn.outLck.Lock() + if conn.closed { + // inWorker calls Close on read error, the read error may + // be caused by another caller calling Close to shutdown the + // dbus connection, a double-close scenario we prevent here. + conn.outLck.Unlock() + return nil + } + close(conn.out) + conn.closed = true + conn.outLck.Unlock() + conn.signalsLck.Lock() + for _, ch := range conn.signals { + close(ch) + } + conn.signalsLck.Unlock() + conn.eavesdroppedLck.Lock() + if conn.eavesdropped != nil { + close(conn.eavesdropped) + } + conn.eavesdroppedLck.Unlock() + return conn.transport.Close() +} + +// Eavesdrop causes conn to send all incoming messages to the given channel +// without further processing. Method replies, errors and signals will not be +// sent to the appropiate channels and method calls will not be handled. If nil +// is passed, the normal behaviour is restored. +// +// The caller has to make sure that ch is sufficiently buffered; +// if a message arrives when a write to ch is not possible, the message is +// discarded. +func (conn *Conn) Eavesdrop(ch chan<- *Message) { + conn.eavesdroppedLck.Lock() + conn.eavesdropped = ch + conn.eavesdroppedLck.Unlock() +} + +// getSerial returns an unused serial. +func (conn *Conn) getSerial() uint32 { + conn.serialLck.Lock() + defer conn.serialLck.Unlock() + n := conn.nextSerial + for conn.serialUsed[n] { + n++ + } + conn.serialUsed[n] = true + conn.nextSerial = n + 1 + return n +} + +// Hello sends the initial org.freedesktop.DBus.Hello call. This method must be +// called after authentication, but before sending any other messages to the +// bus. Hello must not be called for shared connections. +func (conn *Conn) Hello() error { + var s string + err := conn.busObj.Call("org.freedesktop.DBus.Hello", 0).Store(&s) + if err != nil { + return err + } + conn.namesLck.Lock() + conn.names = make([]string, 1) + conn.names[0] = s + conn.namesLck.Unlock() + return nil +} + +// inWorker runs in an own goroutine, reading incoming messages from the +// transport and dispatching them appropiately. +func (conn *Conn) inWorker() { + for { + msg, err := conn.ReadMessage() + if err == nil { + conn.eavesdroppedLck.Lock() + if conn.eavesdropped != nil { + select { + case conn.eavesdropped <- msg: + default: + } + conn.eavesdroppedLck.Unlock() + continue + } + conn.eavesdroppedLck.Unlock() + dest, _ := msg.Headers[FieldDestination].value.(string) + found := false + if dest == "" { + found = true + } else { + conn.namesLck.RLock() + if len(conn.names) == 0 { + found = true + } + for _, v := range conn.names { + if dest == v { + found = true + break + } + } + conn.namesLck.RUnlock() + } + if !found { + // Eavesdropped a message, but no channel for it is registered. + // Ignore it. + continue + } + switch msg.Type { + case TypeMethodReply, TypeError: + serial := msg.Headers[FieldReplySerial].value.(uint32) + conn.callsLck.Lock() + if c, ok := conn.calls[serial]; ok { + if msg.Type == TypeError { + name, _ := msg.Headers[FieldErrorName].value.(string) + c.Err = Error{name, msg.Body} + } else { + c.Body = msg.Body + } + c.Done <- c + conn.serialLck.Lock() + delete(conn.serialUsed, serial) + conn.serialLck.Unlock() + delete(conn.calls, serial) + } + conn.callsLck.Unlock() + case TypeSignal: + iface := msg.Headers[FieldInterface].value.(string) + member := msg.Headers[FieldMember].value.(string) + // as per http://dbus.freedesktop.org/doc/dbus-specification.html , + // sender is optional for signals. + sender, _ := msg.Headers[FieldSender].value.(string) + if iface == "org.freedesktop.DBus" && sender == "org.freedesktop.DBus" { + if member == "NameLost" { + // If we lost the name on the bus, remove it from our + // tracking list. + name, ok := msg.Body[0].(string) + if !ok { + panic("Unable to read the lost name") + } + conn.namesLck.Lock() + for i, v := range conn.names { + if v == name { + conn.names = append(conn.names[:i], + conn.names[i+1:]...) + } + } + conn.namesLck.Unlock() + } else if member == "NameAcquired" { + // If we acquired the name on the bus, add it to our + // tracking list. + name, ok := msg.Body[0].(string) + if !ok { + panic("Unable to read the acquired name") + } + conn.namesLck.Lock() + conn.names = append(conn.names, name) + conn.namesLck.Unlock() + } + } + signal := &Signal{ + Sender: sender, + Path: msg.Headers[FieldPath].value.(ObjectPath), + Name: iface + "." + member, + Body: msg.Body, + } + conn.signalsLck.Lock() + for _, ch := range conn.signals { + ch <- signal + } + conn.signalsLck.Unlock() + case TypeMethodCall: + go conn.handleCall(msg) + } + } else if _, ok := err.(InvalidMessageError); !ok { + // Some read error occured (usually EOF); we can't really do + // anything but to shut down all stuff and returns errors to all + // pending replies. + conn.Close() + conn.callsLck.RLock() + for _, v := range conn.calls { + v.Err = err + v.Done <- v + } + conn.callsLck.RUnlock() + return + } + // invalid messages are ignored + } +} + +// Names returns the list of all names that are currently owned by this +// connection. The slice is always at least one element long, the first element +// being the unique name of the connection. +func (conn *Conn) Names() []string { + conn.namesLck.RLock() + // copy the slice so it can't be modified + s := make([]string, len(conn.names)) + copy(s, conn.names) + conn.namesLck.RUnlock() + return s +} + +// Object returns the object identified by the given destination name and path. +func (conn *Conn) Object(dest string, path ObjectPath) BusObject { + return &Object{conn, dest, path} +} + +// outWorker runs in an own goroutine, encoding and sending messages that are +// sent to conn.out. +func (conn *Conn) outWorker() { + for msg := range conn.out { + err := conn.SendMessage(msg) + conn.callsLck.RLock() + if err != nil { + if c := conn.calls[msg.serial]; c != nil { + c.Err = err + c.Done <- c + } + conn.serialLck.Lock() + delete(conn.serialUsed, msg.serial) + conn.serialLck.Unlock() + } else if msg.Type != TypeMethodCall { + conn.serialLck.Lock() + delete(conn.serialUsed, msg.serial) + conn.serialLck.Unlock() + } + conn.callsLck.RUnlock() + } +} + +// Send sends the given message to the message bus. You usually don't need to +// use this; use the higher-level equivalents (Call / Go, Emit and Export) +// instead. If msg is a method call and NoReplyExpected is not set, a non-nil +// call is returned and the same value is sent to ch (which must be buffered) +// once the call is complete. Otherwise, ch is ignored and a Call structure is +// returned of which only the Err member is valid. +func (conn *Conn) Send(msg *Message, ch chan *Call) *Call { + var call *Call + + msg.serial = conn.getSerial() + if msg.Type == TypeMethodCall && msg.Flags&FlagNoReplyExpected == 0 { + if ch == nil { + ch = make(chan *Call, 5) + } else if cap(ch) == 0 { + panic("dbus: unbuffered channel passed to (*Conn).Send") + } + call = new(Call) + call.Destination, _ = msg.Headers[FieldDestination].value.(string) + call.Path, _ = msg.Headers[FieldPath].value.(ObjectPath) + iface, _ := msg.Headers[FieldInterface].value.(string) + member, _ := msg.Headers[FieldMember].value.(string) + call.Method = iface + "." + member + call.Args = msg.Body + call.Done = ch + conn.callsLck.Lock() + conn.calls[msg.serial] = call + conn.callsLck.Unlock() + conn.outLck.RLock() + if conn.closed { + call.Err = ErrClosed + call.Done <- call + } else { + conn.out <- msg + } + conn.outLck.RUnlock() + } else { + conn.outLck.RLock() + if conn.closed { + call = &Call{Err: ErrClosed} + } else { + conn.out <- msg + call = &Call{Err: nil} + } + conn.outLck.RUnlock() + } + return call +} + +// sendError creates an error message corresponding to the parameters and sends +// it to conn.out. +func (conn *Conn) sendError(e Error, dest string, serial uint32) { + msg := new(Message) + msg.Type = TypeError + msg.serial = conn.getSerial() + msg.Headers = make(map[HeaderField]Variant) + if dest != "" { + msg.Headers[FieldDestination] = MakeVariant(dest) + } + msg.Headers[FieldErrorName] = MakeVariant(e.Name) + msg.Headers[FieldReplySerial] = MakeVariant(serial) + msg.Body = e.Body + if len(e.Body) > 0 { + msg.Headers[FieldSignature] = MakeVariant(SignatureOf(e.Body...)) + } + conn.outLck.RLock() + if !conn.closed { + conn.out <- msg + } + conn.outLck.RUnlock() +} + +// sendReply creates a method reply message corresponding to the parameters and +// sends it to conn.out. +func (conn *Conn) sendReply(dest string, serial uint32, values ...interface{}) { + msg := new(Message) + msg.Type = TypeMethodReply + msg.serial = conn.getSerial() + msg.Headers = make(map[HeaderField]Variant) + if dest != "" { + msg.Headers[FieldDestination] = MakeVariant(dest) + } + msg.Headers[FieldReplySerial] = MakeVariant(serial) + msg.Body = values + if len(values) > 0 { + msg.Headers[FieldSignature] = MakeVariant(SignatureOf(values...)) + } + conn.outLck.RLock() + if !conn.closed { + conn.out <- msg + } + conn.outLck.RUnlock() +} + +// Signal registers the given channel to be passed all received signal messages. +// The caller has to make sure that ch is sufficiently buffered; if a message +// arrives when a write to c is not possible, it is discarded. +// +// Multiple of these channels can be registered at the same time. Passing a +// channel that already is registered will remove it from the list of the +// registered channels. +// +// These channels are "overwritten" by Eavesdrop; i.e., if there currently is a +// channel for eavesdropped messages, this channel receives all signals, and +// none of the channels passed to Signal will receive any signals. +func (conn *Conn) Signal(ch chan<- *Signal) { + conn.signalsLck.Lock() + conn.signals = append(conn.signals, ch) + conn.signalsLck.Unlock() +} + +// SupportsUnixFDs returns whether the underlying transport supports passing of +// unix file descriptors. If this is false, method calls containing unix file +// descriptors will return an error and emitted signals containing them will +// not be sent. +func (conn *Conn) SupportsUnixFDs() bool { + return conn.unixFD +} + +// Error represents a D-Bus message of type Error. +type Error struct { + Name string + Body []interface{} +} + +func NewError(name string, body []interface{}) *Error { + return &Error{name, body} +} + +func (e Error) Error() string { + if len(e.Body) >= 1 { + s, ok := e.Body[0].(string) + if ok { + return s + } + } + return e.Name +} + +// Signal represents a D-Bus message of type Signal. The name member is given in +// "interface.member" notation, e.g. org.freedesktop.D-Bus.NameLost. +type Signal struct { + Sender string + Path ObjectPath + Name string + Body []interface{} +} + +// transport is a D-Bus transport. +type transport interface { + // Read and Write raw data (for example, for the authentication protocol). + io.ReadWriteCloser + + // Send the initial null byte used for the EXTERNAL mechanism. + SendNullByte() error + + // Returns whether this transport supports passing Unix FDs. + SupportsUnixFDs() bool + + // Signal the transport that Unix FD passing is enabled for this connection. + EnableUnixFDs() + + // Read / send a message, handling things like Unix FDs. + ReadMessage() (*Message, error) + SendMessage(*Message) error +} + +var ( + transports = make(map[string]func(string) (transport, error)) +) + +func getTransport(address string) (transport, error) { + var err error + var t transport + + addresses := strings.Split(address, ";") + for _, v := range addresses { + i := strings.IndexRune(v, ':') + if i == -1 { + err = errors.New("dbus: invalid bus address (no transport)") + continue + } + f := transports[v[:i]] + if f == nil { + err = errors.New("dbus: invalid bus address (invalid or unsupported transport)") + continue + } + t, err = f(v[i+1:]) + if err == nil { + return t, nil + } + } + return nil, err +} + +// dereferenceAll returns a slice that, assuming that vs is a slice of pointers +// of arbitrary types, containes the values that are obtained from dereferencing +// all elements in vs. +func dereferenceAll(vs []interface{}) []interface{} { + for i := range vs { + v := reflect.ValueOf(vs[i]) + v = v.Elem() + vs[i] = v.Interface() + } + return vs +} + +// getKey gets a key from a the list of keys. Returns "" on error / not found... +func getKey(s, key string) string { + i := strings.Index(s, key) + if i == -1 { + return "" + } + if i+len(key)+1 >= len(s) || s[i+len(key)] != '=' { + return "" + } + j := strings.Index(s, ",") + if j == -1 { + j = len(s) + } + return s[i+len(key)+1 : j] +} diff --git a/vendor/github.com/godbus/dbus/conn_darwin.go b/vendor/github.com/godbus/dbus/conn_darwin.go new file mode 100644 index 000000000..b67bb1b81 --- /dev/null +++ b/vendor/github.com/godbus/dbus/conn_darwin.go @@ -0,0 +1,21 @@ +package dbus + +import ( + "errors" + "os/exec" +) + +func sessionBusPlatform() (*Conn, error) { + cmd := exec.Command("launchctl", "getenv", "DBUS_LAUNCHD_SESSION_BUS_SOCKET") + b, err := cmd.CombinedOutput() + + if err != nil { + return nil, err + } + + if len(b) == 0 { + return nil, errors.New("dbus: couldn't determine address of session bus") + } + + return Dial("unix:path=" + string(b[:len(b)-1])) +} diff --git a/vendor/github.com/godbus/dbus/conn_other.go b/vendor/github.com/godbus/dbus/conn_other.go new file mode 100644 index 000000000..f74b8758d --- /dev/null +++ b/vendor/github.com/godbus/dbus/conn_other.go @@ -0,0 +1,27 @@ +// +build !darwin + +package dbus + +import ( + "bytes" + "errors" + "os/exec" +) + +func sessionBusPlatform() (*Conn, error) { + cmd := exec.Command("dbus-launch") + b, err := cmd.CombinedOutput() + + if err != nil { + return nil, err + } + + i := bytes.IndexByte(b, '=') + j := bytes.IndexByte(b, '\n') + + if i == -1 || j == -1 { + return nil, errors.New("dbus: couldn't determine address of session bus") + } + + return Dial(string(b[i+1 : j])) +} diff --git a/vendor/github.com/godbus/dbus/dbus.go b/vendor/github.com/godbus/dbus/dbus.go new file mode 100644 index 000000000..2ce68735c --- /dev/null +++ b/vendor/github.com/godbus/dbus/dbus.go @@ -0,0 +1,258 @@ +package dbus + +import ( + "errors" + "reflect" + "strings" +) + +var ( + byteType = reflect.TypeOf(byte(0)) + boolType = reflect.TypeOf(false) + uint8Type = reflect.TypeOf(uint8(0)) + int16Type = reflect.TypeOf(int16(0)) + uint16Type = reflect.TypeOf(uint16(0)) + int32Type = reflect.TypeOf(int32(0)) + uint32Type = reflect.TypeOf(uint32(0)) + int64Type = reflect.TypeOf(int64(0)) + uint64Type = reflect.TypeOf(uint64(0)) + float64Type = reflect.TypeOf(float64(0)) + stringType = reflect.TypeOf("") + signatureType = reflect.TypeOf(Signature{""}) + objectPathType = reflect.TypeOf(ObjectPath("")) + variantType = reflect.TypeOf(Variant{Signature{""}, nil}) + interfacesType = reflect.TypeOf([]interface{}{}) + unixFDType = reflect.TypeOf(UnixFD(0)) + unixFDIndexType = reflect.TypeOf(UnixFDIndex(0)) +) + +// An InvalidTypeError signals that a value which cannot be represented in the +// D-Bus wire format was passed to a function. +type InvalidTypeError struct { + Type reflect.Type +} + +func (e InvalidTypeError) Error() string { + return "dbus: invalid type " + e.Type.String() +} + +// Store copies the values contained in src to dest, which must be a slice of +// pointers. It converts slices of interfaces from src to corresponding structs +// in dest. An error is returned if the lengths of src and dest or the types of +// their elements don't match. +func Store(src []interface{}, dest ...interface{}) error { + if len(src) != len(dest) { + return errors.New("dbus.Store: length mismatch") + } + + for i := range src { + if err := store(src[i], dest[i]); err != nil { + return err + } + } + return nil +} + +func store(src, dest interface{}) error { + if reflect.TypeOf(dest).Elem() == reflect.TypeOf(src) { + reflect.ValueOf(dest).Elem().Set(reflect.ValueOf(src)) + return nil + } else if hasStruct(dest) { + rv := reflect.ValueOf(dest).Elem() + switch rv.Kind() { + case reflect.Struct: + vs, ok := src.([]interface{}) + if !ok { + return errors.New("dbus.Store: type mismatch") + } + t := rv.Type() + ndest := make([]interface{}, 0, rv.NumField()) + for i := 0; i < rv.NumField(); i++ { + field := t.Field(i) + if field.PkgPath == "" && field.Tag.Get("dbus") != "-" { + ndest = append(ndest, rv.Field(i).Addr().Interface()) + } + } + if len(vs) != len(ndest) { + return errors.New("dbus.Store: type mismatch") + } + err := Store(vs, ndest...) + if err != nil { + return errors.New("dbus.Store: type mismatch") + } + case reflect.Slice: + sv := reflect.ValueOf(src) + if sv.Kind() != reflect.Slice { + return errors.New("dbus.Store: type mismatch") + } + rv.Set(reflect.MakeSlice(rv.Type(), sv.Len(), sv.Len())) + for i := 0; i < sv.Len(); i++ { + if err := store(sv.Index(i).Interface(), rv.Index(i).Addr().Interface()); err != nil { + return err + } + } + case reflect.Map: + sv := reflect.ValueOf(src) + if sv.Kind() != reflect.Map { + return errors.New("dbus.Store: type mismatch") + } + keys := sv.MapKeys() + rv.Set(reflect.MakeMap(sv.Type())) + for _, key := range keys { + v := reflect.New(sv.Type().Elem()) + if err := store(v, sv.MapIndex(key).Interface()); err != nil { + return err + } + rv.SetMapIndex(key, v.Elem()) + } + default: + return errors.New("dbus.Store: type mismatch") + } + return nil + } else { + return errors.New("dbus.Store: type mismatch") + } +} + +func hasStruct(v interface{}) bool { + t := reflect.TypeOf(v) + for { + switch t.Kind() { + case reflect.Struct: + return true + case reflect.Slice, reflect.Ptr, reflect.Map: + t = t.Elem() + default: + return false + } + } +} + +// An ObjectPath is an object path as defined by the D-Bus spec. +type ObjectPath string + +// IsValid returns whether the object path is valid. +func (o ObjectPath) IsValid() bool { + s := string(o) + if len(s) == 0 { + return false + } + if s[0] != '/' { + return false + } + if s[len(s)-1] == '/' && len(s) != 1 { + return false + } + // probably not used, but technically possible + if s == "/" { + return true + } + split := strings.Split(s[1:], "/") + for _, v := range split { + if len(v) == 0 { + return false + } + for _, c := range v { + if !isMemberChar(c) { + return false + } + } + } + return true +} + +// A UnixFD is a Unix file descriptor sent over the wire. See the package-level +// documentation for more information about Unix file descriptor passsing. +type UnixFD int32 + +// A UnixFDIndex is the representation of a Unix file descriptor in a message. +type UnixFDIndex uint32 + +// alignment returns the alignment of values of type t. +func alignment(t reflect.Type) int { + switch t { + case variantType: + return 1 + case objectPathType: + return 4 + case signatureType: + return 1 + case interfacesType: // sometimes used for structs + return 8 + } + switch t.Kind() { + case reflect.Uint8: + return 1 + case reflect.Uint16, reflect.Int16: + return 2 + case reflect.Uint32, reflect.Int32, reflect.String, reflect.Array, reflect.Slice, reflect.Map: + return 4 + case reflect.Uint64, reflect.Int64, reflect.Float64, reflect.Struct: + return 8 + case reflect.Ptr: + return alignment(t.Elem()) + } + return 1 +} + +// isKeyType returns whether t is a valid type for a D-Bus dict. +func isKeyType(t reflect.Type) bool { + switch t.Kind() { + case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, + reflect.Int16, reflect.Int32, reflect.Int64, reflect.Float64, + reflect.String: + + return true + } + return false +} + +// isValidInterface returns whether s is a valid name for an interface. +func isValidInterface(s string) bool { + if len(s) == 0 || len(s) > 255 || s[0] == '.' { + return false + } + elem := strings.Split(s, ".") + if len(elem) < 2 { + return false + } + for _, v := range elem { + if len(v) == 0 { + return false + } + if v[0] >= '0' && v[0] <= '9' { + return false + } + for _, c := range v { + if !isMemberChar(c) { + return false + } + } + } + return true +} + +// isValidMember returns whether s is a valid name for a member. +func isValidMember(s string) bool { + if len(s) == 0 || len(s) > 255 { + return false + } + i := strings.Index(s, ".") + if i != -1 { + return false + } + if s[0] >= '0' && s[0] <= '9' { + return false + } + for _, c := range s { + if !isMemberChar(c) { + return false + } + } + return true +} + +func isMemberChar(c rune) bool { + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z') || c == '_' +} diff --git a/vendor/github.com/godbus/dbus/decoder.go b/vendor/github.com/godbus/dbus/decoder.go new file mode 100644 index 000000000..ef50dcab9 --- /dev/null +++ b/vendor/github.com/godbus/dbus/decoder.go @@ -0,0 +1,228 @@ +package dbus + +import ( + "encoding/binary" + "io" + "reflect" +) + +type decoder struct { + in io.Reader + order binary.ByteOrder + pos int +} + +// newDecoder returns a new decoder that reads values from in. The input is +// expected to be in the given byte order. +func newDecoder(in io.Reader, order binary.ByteOrder) *decoder { + dec := new(decoder) + dec.in = in + dec.order = order + return dec +} + +// align aligns the input to the given boundary and panics on error. +func (dec *decoder) align(n int) { + if dec.pos%n != 0 { + newpos := (dec.pos + n - 1) & ^(n - 1) + empty := make([]byte, newpos-dec.pos) + if _, err := io.ReadFull(dec.in, empty); err != nil { + panic(err) + } + dec.pos = newpos + } +} + +// Calls binary.Read(dec.in, dec.order, v) and panics on read errors. +func (dec *decoder) binread(v interface{}) { + if err := binary.Read(dec.in, dec.order, v); err != nil { + panic(err) + } +} + +func (dec *decoder) Decode(sig Signature) (vs []interface{}, err error) { + defer func() { + var ok bool + v := recover() + if err, ok = v.(error); ok { + if err == io.EOF || err == io.ErrUnexpectedEOF { + err = FormatError("unexpected EOF") + } + } + }() + vs = make([]interface{}, 0) + s := sig.str + for s != "" { + err, rem := validSingle(s, 0) + if err != nil { + return nil, err + } + v := dec.decode(s[:len(s)-len(rem)], 0) + vs = append(vs, v) + s = rem + } + return vs, nil +} + +func (dec *decoder) decode(s string, depth int) interface{} { + dec.align(alignment(typeFor(s))) + switch s[0] { + case 'y': + var b [1]byte + if _, err := dec.in.Read(b[:]); err != nil { + panic(err) + } + dec.pos++ + return b[0] + case 'b': + i := dec.decode("u", depth).(uint32) + switch { + case i == 0: + return false + case i == 1: + return true + default: + panic(FormatError("invalid value for boolean")) + } + case 'n': + var i int16 + dec.binread(&i) + dec.pos += 2 + return i + case 'i': + var i int32 + dec.binread(&i) + dec.pos += 4 + return i + case 'x': + var i int64 + dec.binread(&i) + dec.pos += 8 + return i + case 'q': + var i uint16 + dec.binread(&i) + dec.pos += 2 + return i + case 'u': + var i uint32 + dec.binread(&i) + dec.pos += 4 + return i + case 't': + var i uint64 + dec.binread(&i) + dec.pos += 8 + return i + case 'd': + var f float64 + dec.binread(&f) + dec.pos += 8 + return f + case 's': + length := dec.decode("u", depth).(uint32) + b := make([]byte, int(length)+1) + if _, err := io.ReadFull(dec.in, b); err != nil { + panic(err) + } + dec.pos += int(length) + 1 + return string(b[:len(b)-1]) + case 'o': + return ObjectPath(dec.decode("s", depth).(string)) + case 'g': + length := dec.decode("y", depth).(byte) + b := make([]byte, int(length)+1) + if _, err := io.ReadFull(dec.in, b); err != nil { + panic(err) + } + dec.pos += int(length) + 1 + sig, err := ParseSignature(string(b[:len(b)-1])) + if err != nil { + panic(err) + } + return sig + case 'v': + if depth >= 64 { + panic(FormatError("input exceeds container depth limit")) + } + var variant Variant + sig := dec.decode("g", depth).(Signature) + if len(sig.str) == 0 { + panic(FormatError("variant signature is empty")) + } + err, rem := validSingle(sig.str, 0) + if err != nil { + panic(err) + } + if rem != "" { + panic(FormatError("variant signature has multiple types")) + } + variant.sig = sig + variant.value = dec.decode(sig.str, depth+1) + return variant + case 'h': + return UnixFDIndex(dec.decode("u", depth).(uint32)) + case 'a': + if len(s) > 1 && s[1] == '{' { + ksig := s[2:3] + vsig := s[3 : len(s)-1] + v := reflect.MakeMap(reflect.MapOf(typeFor(ksig), typeFor(vsig))) + if depth >= 63 { + panic(FormatError("input exceeds container depth limit")) + } + length := dec.decode("u", depth).(uint32) + // Even for empty maps, the correct padding must be included + dec.align(8) + spos := dec.pos + for dec.pos < spos+int(length) { + dec.align(8) + if !isKeyType(v.Type().Key()) { + panic(InvalidTypeError{v.Type()}) + } + kv := dec.decode(ksig, depth+2) + vv := dec.decode(vsig, depth+2) + v.SetMapIndex(reflect.ValueOf(kv), reflect.ValueOf(vv)) + } + return v.Interface() + } + if depth >= 64 { + panic(FormatError("input exceeds container depth limit")) + } + length := dec.decode("u", depth).(uint32) + v := reflect.MakeSlice(reflect.SliceOf(typeFor(s[1:])), 0, int(length)) + // Even for empty arrays, the correct padding must be included + dec.align(alignment(typeFor(s[1:]))) + spos := dec.pos + for dec.pos < spos+int(length) { + ev := dec.decode(s[1:], depth+1) + v = reflect.Append(v, reflect.ValueOf(ev)) + } + return v.Interface() + case '(': + if depth >= 64 { + panic(FormatError("input exceeds container depth limit")) + } + dec.align(8) + v := make([]interface{}, 0) + s = s[1 : len(s)-1] + for s != "" { + err, rem := validSingle(s, 0) + if err != nil { + panic(err) + } + ev := dec.decode(s[:len(s)-len(rem)], depth+1) + v = append(v, ev) + s = rem + } + return v + default: + panic(SignatureError{Sig: s}) + } +} + +// A FormatError is an error in the wire format. +type FormatError string + +func (e FormatError) Error() string { + return "dbus: wire format error: " + string(e) +} diff --git a/vendor/github.com/godbus/dbus/doc.go b/vendor/github.com/godbus/dbus/doc.go new file mode 100644 index 000000000..deff554a3 --- /dev/null +++ b/vendor/github.com/godbus/dbus/doc.go @@ -0,0 +1,63 @@ +/* +Package dbus implements bindings to the D-Bus message bus system. + +To use the message bus API, you first need to connect to a bus (usually the +session or system bus). The acquired connection then can be used to call methods +on remote objects and emit or receive signals. Using the Export method, you can +arrange D-Bus methods calls to be directly translated to method calls on a Go +value. + +Conversion Rules + +For outgoing messages, Go types are automatically converted to the +corresponding D-Bus types. The following types are directly encoded as their +respective D-Bus equivalents: + + Go type | D-Bus type + ------------+----------- + byte | BYTE + bool | BOOLEAN + int16 | INT16 + uint16 | UINT16 + int32 | INT32 + uint32 | UINT32 + int64 | INT64 + uint64 | UINT64 + float64 | DOUBLE + string | STRING + ObjectPath | OBJECT_PATH + Signature | SIGNATURE + Variant | VARIANT + UnixFDIndex | UNIX_FD + +Slices and arrays encode as ARRAYs of their element type. + +Maps encode as DICTs, provided that their key type can be used as a key for +a DICT. + +Structs other than Variant and Signature encode as a STRUCT containing their +exported fields. Fields whose tags contain `dbus:"-"` and unexported fields will +be skipped. + +Pointers encode as the value they're pointed to. + +Trying to encode any other type or a slice, map or struct containing an +unsupported type will result in an InvalidTypeError. + +For incoming messages, the inverse of these rules are used, with the exception +of STRUCTs. Incoming STRUCTS are represented as a slice of empty interfaces +containing the struct fields in the correct order. The Store function can be +used to convert such values to Go structs. + +Unix FD passing + +Handling Unix file descriptors deserves special mention. To use them, you should +first check that they are supported on a connection by calling SupportsUnixFDs. +If it returns true, all method of Connection will translate messages containing +UnixFD's to messages that are accompanied by the given file descriptors with the +UnixFD values being substituted by the correct indices. Similarily, the indices +of incoming messages are automatically resolved. It shouldn't be necessary to use +UnixFDIndex. + +*/ +package dbus diff --git a/vendor/github.com/godbus/dbus/encoder.go b/vendor/github.com/godbus/dbus/encoder.go new file mode 100644 index 000000000..9f0a9e89e --- /dev/null +++ b/vendor/github.com/godbus/dbus/encoder.go @@ -0,0 +1,208 @@ +package dbus + +import ( + "bytes" + "encoding/binary" + "io" + "reflect" +) + +// An encoder encodes values to the D-Bus wire format. +type encoder struct { + out io.Writer + order binary.ByteOrder + pos int +} + +// NewEncoder returns a new encoder that writes to out in the given byte order. +func newEncoder(out io.Writer, order binary.ByteOrder) *encoder { + return newEncoderAtOffset(out, 0, order) +} + +// newEncoderAtOffset returns a new encoder that writes to out in the given +// byte order. Specify the offset to initialize pos for proper alignment +// computation. +func newEncoderAtOffset(out io.Writer, offset int, order binary.ByteOrder) *encoder { + enc := new(encoder) + enc.out = out + enc.order = order + enc.pos = offset + return enc +} + +// Aligns the next output to be on a multiple of n. Panics on write errors. +func (enc *encoder) align(n int) { + pad := enc.padding(0, n) + if pad > 0 { + empty := make([]byte, pad) + if _, err := enc.out.Write(empty); err != nil { + panic(err) + } + enc.pos += pad + } +} + +// pad returns the number of bytes of padding, based on current position and additional offset. +// and alignment. +func (enc *encoder) padding(offset, algn int) int { + abs := enc.pos + offset + if abs%algn != 0 { + newabs := (abs + algn - 1) & ^(algn - 1) + return newabs - abs + } + return 0 +} + +// Calls binary.Write(enc.out, enc.order, v) and panics on write errors. +func (enc *encoder) binwrite(v interface{}) { + if err := binary.Write(enc.out, enc.order, v); err != nil { + panic(err) + } +} + +// Encode encodes the given values to the underyling reader. All written values +// are aligned properly as required by the D-Bus spec. +func (enc *encoder) Encode(vs ...interface{}) (err error) { + defer func() { + err, _ = recover().(error) + }() + for _, v := range vs { + enc.encode(reflect.ValueOf(v), 0) + } + return nil +} + +// encode encodes the given value to the writer and panics on error. depth holds +// the depth of the container nesting. +func (enc *encoder) encode(v reflect.Value, depth int) { + enc.align(alignment(v.Type())) + switch v.Kind() { + case reflect.Uint8: + var b [1]byte + b[0] = byte(v.Uint()) + if _, err := enc.out.Write(b[:]); err != nil { + panic(err) + } + enc.pos++ + case reflect.Bool: + if v.Bool() { + enc.encode(reflect.ValueOf(uint32(1)), depth) + } else { + enc.encode(reflect.ValueOf(uint32(0)), depth) + } + case reflect.Int16: + enc.binwrite(int16(v.Int())) + enc.pos += 2 + case reflect.Uint16: + enc.binwrite(uint16(v.Uint())) + enc.pos += 2 + case reflect.Int32: + enc.binwrite(int32(v.Int())) + enc.pos += 4 + case reflect.Uint32: + enc.binwrite(uint32(v.Uint())) + enc.pos += 4 + case reflect.Int64: + enc.binwrite(v.Int()) + enc.pos += 8 + case reflect.Uint64: + enc.binwrite(v.Uint()) + enc.pos += 8 + case reflect.Float64: + enc.binwrite(v.Float()) + enc.pos += 8 + case reflect.String: + enc.encode(reflect.ValueOf(uint32(len(v.String()))), depth) + b := make([]byte, v.Len()+1) + copy(b, v.String()) + b[len(b)-1] = 0 + n, err := enc.out.Write(b) + if err != nil { + panic(err) + } + enc.pos += n + case reflect.Ptr: + enc.encode(v.Elem(), depth) + case reflect.Slice, reflect.Array: + if depth >= 64 { + panic(FormatError("input exceeds container depth limit")) + } + // Lookahead offset: 4 bytes for uint32 length (with alignment), + // plus alignment for elements. + n := enc.padding(0, 4) + 4 + offset := enc.pos + n + enc.padding(n, alignment(v.Type().Elem())) + + var buf bytes.Buffer + bufenc := newEncoderAtOffset(&buf, offset, enc.order) + + for i := 0; i < v.Len(); i++ { + bufenc.encode(v.Index(i), depth+1) + } + enc.encode(reflect.ValueOf(uint32(buf.Len())), depth) + length := buf.Len() + enc.align(alignment(v.Type().Elem())) + if _, err := buf.WriteTo(enc.out); err != nil { + panic(err) + } + enc.pos += length + case reflect.Struct: + if depth >= 64 && v.Type() != signatureType { + panic(FormatError("input exceeds container depth limit")) + } + switch t := v.Type(); t { + case signatureType: + str := v.Field(0) + enc.encode(reflect.ValueOf(byte(str.Len())), depth+1) + b := make([]byte, str.Len()+1) + copy(b, str.String()) + b[len(b)-1] = 0 + n, err := enc.out.Write(b) + if err != nil { + panic(err) + } + enc.pos += n + case variantType: + variant := v.Interface().(Variant) + enc.encode(reflect.ValueOf(variant.sig), depth+1) + enc.encode(reflect.ValueOf(variant.value), depth+1) + default: + for i := 0; i < v.Type().NumField(); i++ { + field := t.Field(i) + if field.PkgPath == "" && field.Tag.Get("dbus") != "-" { + enc.encode(v.Field(i), depth+1) + } + } + } + case reflect.Map: + // Maps are arrays of structures, so they actually increase the depth by + // 2. + if depth >= 63 { + panic(FormatError("input exceeds container depth limit")) + } + if !isKeyType(v.Type().Key()) { + panic(InvalidTypeError{v.Type()}) + } + keys := v.MapKeys() + // Lookahead offset: 4 bytes for uint32 length (with alignment), + // plus 8-byte alignment + n := enc.padding(0, 4) + 4 + offset := enc.pos + n + enc.padding(n, 8) + + var buf bytes.Buffer + bufenc := newEncoderAtOffset(&buf, offset, enc.order) + for _, k := range keys { + bufenc.align(8) + bufenc.encode(k, depth+2) + bufenc.encode(v.MapIndex(k), depth+2) + } + enc.encode(reflect.ValueOf(uint32(buf.Len())), depth) + length := buf.Len() + enc.align(8) + if _, err := buf.WriteTo(enc.out); err != nil { + panic(err) + } + enc.pos += length + default: + panic(InvalidTypeError{v.Type()}) + } +} diff --git a/vendor/github.com/godbus/dbus/export.go b/vendor/github.com/godbus/dbus/export.go new file mode 100644 index 000000000..c6440a741 --- /dev/null +++ b/vendor/github.com/godbus/dbus/export.go @@ -0,0 +1,411 @@ +package dbus + +import ( + "errors" + "fmt" + "reflect" + "strings" +) + +var ( + errmsgInvalidArg = Error{ + "org.freedesktop.DBus.Error.InvalidArgs", + []interface{}{"Invalid type / number of args"}, + } + errmsgNoObject = Error{ + "org.freedesktop.DBus.Error.NoSuchObject", + []interface{}{"No such object"}, + } + errmsgUnknownMethod = Error{ + "org.freedesktop.DBus.Error.UnknownMethod", + []interface{}{"Unknown / invalid method"}, + } +) + +// exportWithMapping represents an exported struct along with a method name +// mapping to allow for exporting lower-case methods, etc. +type exportWithMapping struct { + export interface{} + + // Method name mapping; key -> struct method, value -> dbus method. + mapping map[string]string + + // Whether or not this export is for the entire subtree + includeSubtree bool +} + +// Sender is a type which can be used in exported methods to receive the message +// sender. +type Sender string + +func exportedMethod(export exportWithMapping, name string) reflect.Value { + if export.export == nil { + return reflect.Value{} + } + + // If a mapping was included in the export, check the map to see if we + // should be looking for a different method in the export. + if export.mapping != nil { + for key, value := range export.mapping { + if value == name { + name = key + break + } + + // Catch the case where a method is aliased but the client is calling + // the original, e.g. the "Foo" method was exported mapped to + // "foo," and dbus client called the original "Foo." + if key == name { + return reflect.Value{} + } + } + } + + value := reflect.ValueOf(export.export) + m := value.MethodByName(name) + + // Catch the case of attempting to call an unexported method + method, ok := value.Type().MethodByName(name) + + if !m.IsValid() || !ok || method.PkgPath != "" { + return reflect.Value{} + } + t := m.Type() + if t.NumOut() == 0 || + t.Out(t.NumOut()-1) != reflect.TypeOf(&errmsgInvalidArg) { + + return reflect.Value{} + } + return m +} + +// searchHandlers will look through all registered handlers looking for one +// to handle the given path. If a verbatim one isn't found, it will check for +// a subtree registration for the path as well. +func (conn *Conn) searchHandlers(path ObjectPath) (map[string]exportWithMapping, bool) { + conn.handlersLck.RLock() + defer conn.handlersLck.RUnlock() + + handlers, ok := conn.handlers[path] + if ok { + return handlers, ok + } + + // If handlers weren't found for this exact path, look for a matching subtree + // registration + handlers = make(map[string]exportWithMapping) + path = path[:strings.LastIndex(string(path), "/")] + for len(path) > 0 { + var subtreeHandlers map[string]exportWithMapping + subtreeHandlers, ok = conn.handlers[path] + if ok { + for iface, handler := range subtreeHandlers { + // Only include this handler if it registered for the subtree + if handler.includeSubtree { + handlers[iface] = handler + } + } + + break + } + + path = path[:strings.LastIndex(string(path), "/")] + } + + return handlers, ok +} + +// handleCall handles the given method call (i.e. looks if it's one of the +// pre-implemented ones and searches for a corresponding handler if not). +func (conn *Conn) handleCall(msg *Message) { + name := msg.Headers[FieldMember].value.(string) + path := msg.Headers[FieldPath].value.(ObjectPath) + ifaceName, hasIface := msg.Headers[FieldInterface].value.(string) + sender, hasSender := msg.Headers[FieldSender].value.(string) + serial := msg.serial + if ifaceName == "org.freedesktop.DBus.Peer" { + switch name { + case "Ping": + conn.sendReply(sender, serial) + case "GetMachineId": + conn.sendReply(sender, serial, conn.uuid) + default: + conn.sendError(errmsgUnknownMethod, sender, serial) + } + return + } + if len(name) == 0 { + conn.sendError(errmsgUnknownMethod, sender, serial) + } + + // Find the exported handler (if any) for this path + handlers, ok := conn.searchHandlers(path) + if !ok { + conn.sendError(errmsgNoObject, sender, serial) + return + } + + var m reflect.Value + if hasIface { + iface := handlers[ifaceName] + m = exportedMethod(iface, name) + } else { + for _, v := range handlers { + m = exportedMethod(v, name) + if m.IsValid() { + break + } + } + } + + if !m.IsValid() { + conn.sendError(errmsgUnknownMethod, sender, serial) + return + } + + t := m.Type() + vs := msg.Body + pointers := make([]interface{}, t.NumIn()) + decode := make([]interface{}, 0, len(vs)) + for i := 0; i < t.NumIn(); i++ { + tp := t.In(i) + val := reflect.New(tp) + pointers[i] = val.Interface() + if tp == reflect.TypeOf((*Sender)(nil)).Elem() { + val.Elem().SetString(sender) + } else if tp == reflect.TypeOf((*Message)(nil)).Elem() { + val.Elem().Set(reflect.ValueOf(*msg)) + } else { + decode = append(decode, pointers[i]) + } + } + + if len(decode) != len(vs) { + conn.sendError(errmsgInvalidArg, sender, serial) + return + } + + if err := Store(vs, decode...); err != nil { + conn.sendError(errmsgInvalidArg, sender, serial) + return + } + + // Extract parameters + params := make([]reflect.Value, len(pointers)) + for i := 0; i < len(pointers); i++ { + params[i] = reflect.ValueOf(pointers[i]).Elem() + } + + // Call method + ret := m.Call(params) + if em := ret[t.NumOut()-1].Interface().(*Error); em != nil { + conn.sendError(*em, sender, serial) + return + } + + if msg.Flags&FlagNoReplyExpected == 0 { + reply := new(Message) + reply.Type = TypeMethodReply + reply.serial = conn.getSerial() + reply.Headers = make(map[HeaderField]Variant) + if hasSender { + reply.Headers[FieldDestination] = msg.Headers[FieldSender] + } + reply.Headers[FieldReplySerial] = MakeVariant(msg.serial) + reply.Body = make([]interface{}, len(ret)-1) + for i := 0; i < len(ret)-1; i++ { + reply.Body[i] = ret[i].Interface() + } + if len(ret) != 1 { + reply.Headers[FieldSignature] = MakeVariant(SignatureOf(reply.Body...)) + } + conn.outLck.RLock() + if !conn.closed { + conn.out <- reply + } + conn.outLck.RUnlock() + } +} + +// Emit emits the given signal on the message bus. The name parameter must be +// formatted as "interface.member", e.g., "org.freedesktop.DBus.NameLost". +func (conn *Conn) Emit(path ObjectPath, name string, values ...interface{}) error { + if !path.IsValid() { + return errors.New("dbus: invalid object path") + } + i := strings.LastIndex(name, ".") + if i == -1 { + return errors.New("dbus: invalid method name") + } + iface := name[:i] + member := name[i+1:] + if !isValidMember(member) { + return errors.New("dbus: invalid method name") + } + if !isValidInterface(iface) { + return errors.New("dbus: invalid interface name") + } + msg := new(Message) + msg.Type = TypeSignal + msg.serial = conn.getSerial() + msg.Headers = make(map[HeaderField]Variant) + msg.Headers[FieldInterface] = MakeVariant(iface) + msg.Headers[FieldMember] = MakeVariant(member) + msg.Headers[FieldPath] = MakeVariant(path) + msg.Body = values + if len(values) > 0 { + msg.Headers[FieldSignature] = MakeVariant(SignatureOf(values...)) + } + conn.outLck.RLock() + defer conn.outLck.RUnlock() + if conn.closed { + return ErrClosed + } + conn.out <- msg + return nil +} + +// Export registers the given value to be exported as an object on the +// message bus. +// +// If a method call on the given path and interface is received, an exported +// method with the same name is called with v as the receiver if the +// parameters match and the last return value is of type *Error. If this +// *Error is not nil, it is sent back to the caller as an error. +// Otherwise, a method reply is sent with the other return values as its body. +// +// Any parameters with the special type Sender are set to the sender of the +// dbus message when the method is called. Parameters of this type do not +// contribute to the dbus signature of the method (i.e. the method is exposed +// as if the parameters of type Sender were not there). +// +// Similarly, any parameters with the type Message are set to the raw message +// received on the bus. Again, parameters of this type do not contribute to the +// dbus signature of the method. +// +// Every method call is executed in a new goroutine, so the method may be called +// in multiple goroutines at once. +// +// Method calls on the interface org.freedesktop.DBus.Peer will be automatically +// handled for every object. +// +// Passing nil as the first parameter will cause conn to cease handling calls on +// the given combination of path and interface. +// +// Export returns an error if path is not a valid path name. +func (conn *Conn) Export(v interface{}, path ObjectPath, iface string) error { + return conn.ExportWithMap(v, nil, path, iface) +} + +// ExportWithMap works exactly like Export but provides the ability to remap +// method names (e.g. export a lower-case method). +// +// The keys in the map are the real method names (exported on the struct), and +// the values are the method names to be exported on DBus. +func (conn *Conn) ExportWithMap(v interface{}, mapping map[string]string, path ObjectPath, iface string) error { + return conn.exportWithMap(v, mapping, path, iface, false) +} + +// ExportSubtree works exactly like Export but registers the given value for +// an entire subtree rather under the root path provided. +// +// In order to make this useful, one parameter in each of the value's exported +// methods should be a Message, in which case it will contain the raw message +// (allowing one to get access to the path that caused the method to be called). +// +// Note that more specific export paths take precedence over less specific. For +// example, a method call using the ObjectPath /foo/bar/baz will call a method +// exported on /foo/bar before a method exported on /foo. +func (conn *Conn) ExportSubtree(v interface{}, path ObjectPath, iface string) error { + return conn.ExportSubtreeWithMap(v, nil, path, iface) +} + +// ExportSubtreeWithMap works exactly like ExportSubtree but provides the +// ability to remap method names (e.g. export a lower-case method). +// +// The keys in the map are the real method names (exported on the struct), and +// the values are the method names to be exported on DBus. +func (conn *Conn) ExportSubtreeWithMap(v interface{}, mapping map[string]string, path ObjectPath, iface string) error { + return conn.exportWithMap(v, mapping, path, iface, true) +} + +// exportWithMap is the worker function for all exports/registrations. +func (conn *Conn) exportWithMap(v interface{}, mapping map[string]string, path ObjectPath, iface string, includeSubtree bool) error { + if !path.IsValid() { + return fmt.Errorf(`dbus: Invalid path name: "%s"`, path) + } + + conn.handlersLck.Lock() + defer conn.handlersLck.Unlock() + + // Remove a previous export if the interface is nil + if v == nil { + if _, ok := conn.handlers[path]; ok { + delete(conn.handlers[path], iface) + if len(conn.handlers[path]) == 0 { + delete(conn.handlers, path) + } + } + + return nil + } + + // If this is the first handler for this path, make a new map to hold all + // handlers for this path. + if _, ok := conn.handlers[path]; !ok { + conn.handlers[path] = make(map[string]exportWithMapping) + } + + // Finally, save this handler + conn.handlers[path][iface] = exportWithMapping{export: v, mapping: mapping, includeSubtree: includeSubtree} + + return nil +} + +// ReleaseName calls org.freedesktop.DBus.ReleaseName and awaits a response. +func (conn *Conn) ReleaseName(name string) (ReleaseNameReply, error) { + var r uint32 + err := conn.busObj.Call("org.freedesktop.DBus.ReleaseName", 0, name).Store(&r) + if err != nil { + return 0, err + } + return ReleaseNameReply(r), nil +} + +// RequestName calls org.freedesktop.DBus.RequestName and awaits a response. +func (conn *Conn) RequestName(name string, flags RequestNameFlags) (RequestNameReply, error) { + var r uint32 + err := conn.busObj.Call("org.freedesktop.DBus.RequestName", 0, name, flags).Store(&r) + if err != nil { + return 0, err + } + return RequestNameReply(r), nil +} + +// ReleaseNameReply is the reply to a ReleaseName call. +type ReleaseNameReply uint32 + +const ( + ReleaseNameReplyReleased ReleaseNameReply = 1 + iota + ReleaseNameReplyNonExistent + ReleaseNameReplyNotOwner +) + +// RequestNameFlags represents the possible flags for a RequestName call. +type RequestNameFlags uint32 + +const ( + NameFlagAllowReplacement RequestNameFlags = 1 << iota + NameFlagReplaceExisting + NameFlagDoNotQueue +) + +// RequestNameReply is the reply to a RequestName call. +type RequestNameReply uint32 + +const ( + RequestNameReplyPrimaryOwner RequestNameReply = 1 + iota + RequestNameReplyInQueue + RequestNameReplyExists + RequestNameReplyAlreadyOwner +) diff --git a/vendor/github.com/godbus/dbus/homedir.go b/vendor/github.com/godbus/dbus/homedir.go new file mode 100644 index 000000000..0b745f931 --- /dev/null +++ b/vendor/github.com/godbus/dbus/homedir.go @@ -0,0 +1,28 @@ +package dbus + +import ( + "os" + "sync" +) + +var ( + homeDir string + homeDirLock sync.Mutex +) + +func getHomeDir() string { + homeDirLock.Lock() + defer homeDirLock.Unlock() + + if homeDir != "" { + return homeDir + } + + homeDir = os.Getenv("HOME") + if homeDir != "" { + return homeDir + } + + homeDir = lookupHomeDir() + return homeDir +} diff --git a/vendor/github.com/godbus/dbus/homedir_dynamic.go b/vendor/github.com/godbus/dbus/homedir_dynamic.go new file mode 100644 index 000000000..2732081e7 --- /dev/null +++ b/vendor/github.com/godbus/dbus/homedir_dynamic.go @@ -0,0 +1,15 @@ +// +build !static_build + +package dbus + +import ( + "os/user" +) + +func lookupHomeDir() string { + u, err := user.Current() + if err != nil { + return "/" + } + return u.HomeDir +} diff --git a/vendor/github.com/godbus/dbus/homedir_static.go b/vendor/github.com/godbus/dbus/homedir_static.go new file mode 100644 index 000000000..b9d9cb552 --- /dev/null +++ b/vendor/github.com/godbus/dbus/homedir_static.go @@ -0,0 +1,45 @@ +// +build static_build + +package dbus + +import ( + "bufio" + "os" + "strconv" + "strings" +) + +func lookupHomeDir() string { + myUid := os.Getuid() + + f, err := os.Open("/etc/passwd") + if err != nil { + return "/" + } + defer f.Close() + + s := bufio.NewScanner(f) + + for s.Scan() { + if err := s.Err(); err != nil { + break + } + + line := strings.TrimSpace(s.Text()) + if line == "" { + continue + } + + parts := strings.Split(line, ":") + + if len(parts) >= 6 { + uid, err := strconv.Atoi(parts[2]) + if err == nil && uid == myUid { + return parts[5] + } + } + } + + // Default to / if we can't get a better value + return "/" +} diff --git a/vendor/github.com/godbus/dbus/message.go b/vendor/github.com/godbus/dbus/message.go new file mode 100644 index 000000000..075d6e38b --- /dev/null +++ b/vendor/github.com/godbus/dbus/message.go @@ -0,0 +1,346 @@ +package dbus + +import ( + "bytes" + "encoding/binary" + "errors" + "io" + "reflect" + "strconv" +) + +const protoVersion byte = 1 + +// Flags represents the possible flags of a D-Bus message. +type Flags byte + +const ( + // FlagNoReplyExpected signals that the message is not expected to generate + // a reply. If this flag is set on outgoing messages, any possible reply + // will be discarded. + FlagNoReplyExpected Flags = 1 << iota + // FlagNoAutoStart signals that the message bus should not automatically + // start an application when handling this message. + FlagNoAutoStart +) + +// Type represents the possible types of a D-Bus message. +type Type byte + +const ( + TypeMethodCall Type = 1 + iota + TypeMethodReply + TypeError + TypeSignal + typeMax +) + +func (t Type) String() string { + switch t { + case TypeMethodCall: + return "method call" + case TypeMethodReply: + return "reply" + case TypeError: + return "error" + case TypeSignal: + return "signal" + } + return "invalid" +} + +// HeaderField represents the possible byte codes for the headers +// of a D-Bus message. +type HeaderField byte + +const ( + FieldPath HeaderField = 1 + iota + FieldInterface + FieldMember + FieldErrorName + FieldReplySerial + FieldDestination + FieldSender + FieldSignature + FieldUnixFDs + fieldMax +) + +// An InvalidMessageError describes the reason why a D-Bus message is regarded as +// invalid. +type InvalidMessageError string + +func (e InvalidMessageError) Error() string { + return "dbus: invalid message: " + string(e) +} + +// fieldType are the types of the various header fields. +var fieldTypes = [fieldMax]reflect.Type{ + FieldPath: objectPathType, + FieldInterface: stringType, + FieldMember: stringType, + FieldErrorName: stringType, + FieldReplySerial: uint32Type, + FieldDestination: stringType, + FieldSender: stringType, + FieldSignature: signatureType, + FieldUnixFDs: uint32Type, +} + +// requiredFields lists the header fields that are required by the different +// message types. +var requiredFields = [typeMax][]HeaderField{ + TypeMethodCall: {FieldPath, FieldMember}, + TypeMethodReply: {FieldReplySerial}, + TypeError: {FieldErrorName, FieldReplySerial}, + TypeSignal: {FieldPath, FieldInterface, FieldMember}, +} + +// Message represents a single D-Bus message. +type Message struct { + Type + Flags + Headers map[HeaderField]Variant + Body []interface{} + + serial uint32 +} + +type header struct { + Field byte + Variant +} + +// DecodeMessage tries to decode a single message in the D-Bus wire format +// from the given reader. The byte order is figured out from the first byte. +// The possibly returned error can be an error of the underlying reader, an +// InvalidMessageError or a FormatError. +func DecodeMessage(rd io.Reader) (msg *Message, err error) { + var order binary.ByteOrder + var hlength, length uint32 + var typ, flags, proto byte + var headers []header + + b := make([]byte, 1) + _, err = rd.Read(b) + if err != nil { + return + } + switch b[0] { + case 'l': + order = binary.LittleEndian + case 'B': + order = binary.BigEndian + default: + return nil, InvalidMessageError("invalid byte order") + } + + dec := newDecoder(rd, order) + dec.pos = 1 + + msg = new(Message) + vs, err := dec.Decode(Signature{"yyyuu"}) + if err != nil { + return nil, err + } + if err = Store(vs, &typ, &flags, &proto, &length, &msg.serial); err != nil { + return nil, err + } + msg.Type = Type(typ) + msg.Flags = Flags(flags) + + // get the header length separately because we need it later + b = make([]byte, 4) + _, err = io.ReadFull(rd, b) + if err != nil { + return nil, err + } + binary.Read(bytes.NewBuffer(b), order, &hlength) + if hlength+length+16 > 1<<27 { + return nil, InvalidMessageError("message is too long") + } + dec = newDecoder(io.MultiReader(bytes.NewBuffer(b), rd), order) + dec.pos = 12 + vs, err = dec.Decode(Signature{"a(yv)"}) + if err != nil { + return nil, err + } + if err = Store(vs, &headers); err != nil { + return nil, err + } + + msg.Headers = make(map[HeaderField]Variant) + for _, v := range headers { + msg.Headers[HeaderField(v.Field)] = v.Variant + } + + dec.align(8) + body := make([]byte, int(length)) + if length != 0 { + _, err := io.ReadFull(rd, body) + if err != nil { + return nil, err + } + } + + if err = msg.IsValid(); err != nil { + return nil, err + } + sig, _ := msg.Headers[FieldSignature].value.(Signature) + if sig.str != "" { + buf := bytes.NewBuffer(body) + dec = newDecoder(buf, order) + vs, err := dec.Decode(sig) + if err != nil { + return nil, err + } + msg.Body = vs + } + + return +} + +// EncodeTo encodes and sends a message to the given writer. The byte order must +// be either binary.LittleEndian or binary.BigEndian. If the message is not +// valid or an error occurs when writing, an error is returned. +func (msg *Message) EncodeTo(out io.Writer, order binary.ByteOrder) error { + if err := msg.IsValid(); err != nil { + return err + } + var vs [7]interface{} + switch order { + case binary.LittleEndian: + vs[0] = byte('l') + case binary.BigEndian: + vs[0] = byte('B') + default: + return errors.New("dbus: invalid byte order") + } + body := new(bytes.Buffer) + enc := newEncoder(body, order) + if len(msg.Body) != 0 { + enc.Encode(msg.Body...) + } + vs[1] = msg.Type + vs[2] = msg.Flags + vs[3] = protoVersion + vs[4] = uint32(len(body.Bytes())) + vs[5] = msg.serial + headers := make([]header, 0, len(msg.Headers)) + for k, v := range msg.Headers { + headers = append(headers, header{byte(k), v}) + } + vs[6] = headers + var buf bytes.Buffer + enc = newEncoder(&buf, order) + enc.Encode(vs[:]...) + enc.align(8) + body.WriteTo(&buf) + if buf.Len() > 1<<27 { + return InvalidMessageError("message is too long") + } + if _, err := buf.WriteTo(out); err != nil { + return err + } + return nil +} + +// IsValid checks whether msg is a valid message and returns an +// InvalidMessageError if it is not. +func (msg *Message) IsValid() error { + if msg.Flags & ^(FlagNoAutoStart|FlagNoReplyExpected) != 0 { + return InvalidMessageError("invalid flags") + } + if msg.Type == 0 || msg.Type >= typeMax { + return InvalidMessageError("invalid message type") + } + for k, v := range msg.Headers { + if k == 0 || k >= fieldMax { + return InvalidMessageError("invalid header") + } + if reflect.TypeOf(v.value) != fieldTypes[k] { + return InvalidMessageError("invalid type of header field") + } + } + for _, v := range requiredFields[msg.Type] { + if _, ok := msg.Headers[v]; !ok { + return InvalidMessageError("missing required header") + } + } + if path, ok := msg.Headers[FieldPath]; ok { + if !path.value.(ObjectPath).IsValid() { + return InvalidMessageError("invalid path name") + } + } + if iface, ok := msg.Headers[FieldInterface]; ok { + if !isValidInterface(iface.value.(string)) { + return InvalidMessageError("invalid interface name") + } + } + if member, ok := msg.Headers[FieldMember]; ok { + if !isValidMember(member.value.(string)) { + return InvalidMessageError("invalid member name") + } + } + if errname, ok := msg.Headers[FieldErrorName]; ok { + if !isValidInterface(errname.value.(string)) { + return InvalidMessageError("invalid error name") + } + } + if len(msg.Body) != 0 { + if _, ok := msg.Headers[FieldSignature]; !ok { + return InvalidMessageError("missing signature") + } + } + return nil +} + +// Serial returns the message's serial number. The returned value is only valid +// for messages received by eavesdropping. +func (msg *Message) Serial() uint32 { + return msg.serial +} + +// String returns a string representation of a message similar to the format of +// dbus-monitor. +func (msg *Message) String() string { + if err := msg.IsValid(); err != nil { + return "" + } + s := msg.Type.String() + if v, ok := msg.Headers[FieldSender]; ok { + s += " from " + v.value.(string) + } + if v, ok := msg.Headers[FieldDestination]; ok { + s += " to " + v.value.(string) + } + s += " serial " + strconv.FormatUint(uint64(msg.serial), 10) + if v, ok := msg.Headers[FieldReplySerial]; ok { + s += " reply_serial " + strconv.FormatUint(uint64(v.value.(uint32)), 10) + } + if v, ok := msg.Headers[FieldUnixFDs]; ok { + s += " unixfds " + strconv.FormatUint(uint64(v.value.(uint32)), 10) + } + if v, ok := msg.Headers[FieldPath]; ok { + s += " path " + string(v.value.(ObjectPath)) + } + if v, ok := msg.Headers[FieldInterface]; ok { + s += " interface " + v.value.(string) + } + if v, ok := msg.Headers[FieldErrorName]; ok { + s += " error " + v.value.(string) + } + if v, ok := msg.Headers[FieldMember]; ok { + s += " member " + v.value.(string) + } + if len(msg.Body) != 0 { + s += "\n" + } + for i, v := range msg.Body { + s += " " + MakeVariant(v).String() + if i != len(msg.Body)-1 { + s += "\n" + } + } + return s +} diff --git a/vendor/github.com/godbus/dbus/object.go b/vendor/github.com/godbus/dbus/object.go new file mode 100644 index 000000000..7ef45da4c --- /dev/null +++ b/vendor/github.com/godbus/dbus/object.go @@ -0,0 +1,126 @@ +package dbus + +import ( + "errors" + "strings" +) + +// BusObject is the interface of a remote object on which methods can be +// invoked. +type BusObject interface { + Call(method string, flags Flags, args ...interface{}) *Call + Go(method string, flags Flags, ch chan *Call, args ...interface{}) *Call + GetProperty(p string) (Variant, error) + Destination() string + Path() ObjectPath +} + +// Object represents a remote object on which methods can be invoked. +type Object struct { + conn *Conn + dest string + path ObjectPath +} + +// Call calls a method with (*Object).Go and waits for its reply. +func (o *Object) Call(method string, flags Flags, args ...interface{}) *Call { + return <-o.Go(method, flags, make(chan *Call, 1), args...).Done +} + +// Go calls a method with the given arguments asynchronously. It returns a +// Call structure representing this method call. The passed channel will +// return the same value once the call is done. If ch is nil, a new channel +// will be allocated. Otherwise, ch has to be buffered or Go will panic. +// +// If the flags include FlagNoReplyExpected, ch is ignored and a Call structure +// is returned of which only the Err member is valid. +// +// If the method parameter contains a dot ('.'), the part before the last dot +// specifies the interface on which the method is called. +func (o *Object) Go(method string, flags Flags, ch chan *Call, args ...interface{}) *Call { + iface := "" + i := strings.LastIndex(method, ".") + if i != -1 { + iface = method[:i] + } + method = method[i+1:] + msg := new(Message) + msg.Type = TypeMethodCall + msg.serial = o.conn.getSerial() + msg.Flags = flags & (FlagNoAutoStart | FlagNoReplyExpected) + msg.Headers = make(map[HeaderField]Variant) + msg.Headers[FieldPath] = MakeVariant(o.path) + msg.Headers[FieldDestination] = MakeVariant(o.dest) + msg.Headers[FieldMember] = MakeVariant(method) + if iface != "" { + msg.Headers[FieldInterface] = MakeVariant(iface) + } + msg.Body = args + if len(args) > 0 { + msg.Headers[FieldSignature] = MakeVariant(SignatureOf(args...)) + } + if msg.Flags&FlagNoReplyExpected == 0 { + if ch == nil { + ch = make(chan *Call, 10) + } else if cap(ch) == 0 { + panic("dbus: unbuffered channel passed to (*Object).Go") + } + call := &Call{ + Destination: o.dest, + Path: o.path, + Method: method, + Args: args, + Done: ch, + } + o.conn.callsLck.Lock() + o.conn.calls[msg.serial] = call + o.conn.callsLck.Unlock() + o.conn.outLck.RLock() + if o.conn.closed { + call.Err = ErrClosed + call.Done <- call + } else { + o.conn.out <- msg + } + o.conn.outLck.RUnlock() + return call + } + o.conn.outLck.RLock() + defer o.conn.outLck.RUnlock() + if o.conn.closed { + return &Call{Err: ErrClosed} + } + o.conn.out <- msg + return &Call{Err: nil} +} + +// GetProperty calls org.freedesktop.DBus.Properties.GetProperty on the given +// object. The property name must be given in interface.member notation. +func (o *Object) GetProperty(p string) (Variant, error) { + idx := strings.LastIndex(p, ".") + if idx == -1 || idx+1 == len(p) { + return Variant{}, errors.New("dbus: invalid property " + p) + } + + iface := p[:idx] + prop := p[idx+1:] + + result := Variant{} + err := o.Call("org.freedesktop.DBus.Properties.Get", 0, iface, prop).Store(&result) + + if err != nil { + return Variant{}, err + } + + return result, nil +} + +// Destination returns the destination that calls on o are sent to. +func (o *Object) Destination() string { + return o.dest +} + +// Path returns the path that calls on o are sent to. +func (o *Object) Path() ObjectPath { + return o.path +} diff --git a/vendor/github.com/godbus/dbus/sig.go b/vendor/github.com/godbus/dbus/sig.go new file mode 100644 index 000000000..f45b53ce1 --- /dev/null +++ b/vendor/github.com/godbus/dbus/sig.go @@ -0,0 +1,257 @@ +package dbus + +import ( + "fmt" + "reflect" + "strings" +) + +var sigToType = map[byte]reflect.Type{ + 'y': byteType, + 'b': boolType, + 'n': int16Type, + 'q': uint16Type, + 'i': int32Type, + 'u': uint32Type, + 'x': int64Type, + 't': uint64Type, + 'd': float64Type, + 's': stringType, + 'g': signatureType, + 'o': objectPathType, + 'v': variantType, + 'h': unixFDIndexType, +} + +// Signature represents a correct type signature as specified by the D-Bus +// specification. The zero value represents the empty signature, "". +type Signature struct { + str string +} + +// SignatureOf returns the concatenation of all the signatures of the given +// values. It panics if one of them is not representable in D-Bus. +func SignatureOf(vs ...interface{}) Signature { + var s string + for _, v := range vs { + s += getSignature(reflect.TypeOf(v)) + } + return Signature{s} +} + +// SignatureOfType returns the signature of the given type. It panics if the +// type is not representable in D-Bus. +func SignatureOfType(t reflect.Type) Signature { + return Signature{getSignature(t)} +} + +// getSignature returns the signature of the given type and panics on unknown types. +func getSignature(t reflect.Type) string { + // handle simple types first + switch t.Kind() { + case reflect.Uint8: + return "y" + case reflect.Bool: + return "b" + case reflect.Int16: + return "n" + case reflect.Uint16: + return "q" + case reflect.Int32: + if t == unixFDType { + return "h" + } + return "i" + case reflect.Uint32: + if t == unixFDIndexType { + return "h" + } + return "u" + case reflect.Int64: + return "x" + case reflect.Uint64: + return "t" + case reflect.Float64: + return "d" + case reflect.Ptr: + return getSignature(t.Elem()) + case reflect.String: + if t == objectPathType { + return "o" + } + return "s" + case reflect.Struct: + if t == variantType { + return "v" + } else if t == signatureType { + return "g" + } + var s string + for i := 0; i < t.NumField(); i++ { + field := t.Field(i) + if field.PkgPath == "" && field.Tag.Get("dbus") != "-" { + s += getSignature(t.Field(i).Type) + } + } + return "(" + s + ")" + case reflect.Array, reflect.Slice: + return "a" + getSignature(t.Elem()) + case reflect.Map: + if !isKeyType(t.Key()) { + panic(InvalidTypeError{t}) + } + return "a{" + getSignature(t.Key()) + getSignature(t.Elem()) + "}" + } + panic(InvalidTypeError{t}) +} + +// ParseSignature returns the signature represented by this string, or a +// SignatureError if the string is not a valid signature. +func ParseSignature(s string) (sig Signature, err error) { + if len(s) == 0 { + return + } + if len(s) > 255 { + return Signature{""}, SignatureError{s, "too long"} + } + sig.str = s + for err == nil && len(s) != 0 { + err, s = validSingle(s, 0) + } + if err != nil { + sig = Signature{""} + } + + return +} + +// ParseSignatureMust behaves like ParseSignature, except that it panics if s +// is not valid. +func ParseSignatureMust(s string) Signature { + sig, err := ParseSignature(s) + if err != nil { + panic(err) + } + return sig +} + +// Empty retruns whether the signature is the empty signature. +func (s Signature) Empty() bool { + return s.str == "" +} + +// Single returns whether the signature represents a single, complete type. +func (s Signature) Single() bool { + err, r := validSingle(s.str, 0) + return err != nil && r == "" +} + +// String returns the signature's string representation. +func (s Signature) String() string { + return s.str +} + +// A SignatureError indicates that a signature passed to a function or received +// on a connection is not a valid signature. +type SignatureError struct { + Sig string + Reason string +} + +func (e SignatureError) Error() string { + return fmt.Sprintf("dbus: invalid signature: %q (%s)", e.Sig, e.Reason) +} + +// Try to read a single type from this string. If it was successfull, err is nil +// and rem is the remaining unparsed part. Otherwise, err is a non-nil +// SignatureError and rem is "". depth is the current recursion depth which may +// not be greater than 64 and should be given as 0 on the first call. +func validSingle(s string, depth int) (err error, rem string) { + if s == "" { + return SignatureError{Sig: s, Reason: "empty signature"}, "" + } + if depth > 64 { + return SignatureError{Sig: s, Reason: "container nesting too deep"}, "" + } + switch s[0] { + case 'y', 'b', 'n', 'q', 'i', 'u', 'x', 't', 'd', 's', 'g', 'o', 'v', 'h': + return nil, s[1:] + case 'a': + if len(s) > 1 && s[1] == '{' { + i := findMatching(s[1:], '{', '}') + if i == -1 { + return SignatureError{Sig: s, Reason: "unmatched '{'"}, "" + } + i++ + rem = s[i+1:] + s = s[2:i] + if err, _ = validSingle(s[:1], depth+1); err != nil { + return err, "" + } + err, nr := validSingle(s[1:], depth+1) + if err != nil { + return err, "" + } + if nr != "" { + return SignatureError{Sig: s, Reason: "too many types in dict"}, "" + } + return nil, rem + } + return validSingle(s[1:], depth+1) + case '(': + i := findMatching(s, '(', ')') + if i == -1 { + return SignatureError{Sig: s, Reason: "unmatched ')'"}, "" + } + rem = s[i+1:] + s = s[1:i] + for err == nil && s != "" { + err, s = validSingle(s, depth+1) + } + if err != nil { + rem = "" + } + return + } + return SignatureError{Sig: s, Reason: "invalid type character"}, "" +} + +func findMatching(s string, left, right rune) int { + n := 0 + for i, v := range s { + if v == left { + n++ + } else if v == right { + n-- + } + if n == 0 { + return i + } + } + return -1 +} + +// typeFor returns the type of the given signature. It ignores any left over +// characters and panics if s doesn't start with a valid type signature. +func typeFor(s string) (t reflect.Type) { + err, _ := validSingle(s, 0) + if err != nil { + panic(err) + } + + if t, ok := sigToType[s[0]]; ok { + return t + } + switch s[0] { + case 'a': + if s[1] == '{' { + i := strings.LastIndex(s, "}") + t = reflect.MapOf(sigToType[s[2]], typeFor(s[3:i])) + } else { + t = reflect.SliceOf(typeFor(s[1:])) + } + case '(': + t = interfacesType + } + return +} diff --git a/vendor/github.com/godbus/dbus/transport_darwin.go b/vendor/github.com/godbus/dbus/transport_darwin.go new file mode 100644 index 000000000..1bba0d6bf --- /dev/null +++ b/vendor/github.com/godbus/dbus/transport_darwin.go @@ -0,0 +1,6 @@ +package dbus + +func (t *unixTransport) SendNullByte() error { + _, err := t.Write([]byte{0}) + return err +} diff --git a/vendor/github.com/godbus/dbus/transport_generic.go b/vendor/github.com/godbus/dbus/transport_generic.go new file mode 100644 index 000000000..46f8f49d6 --- /dev/null +++ b/vendor/github.com/godbus/dbus/transport_generic.go @@ -0,0 +1,35 @@ +package dbus + +import ( + "encoding/binary" + "errors" + "io" +) + +type genericTransport struct { + io.ReadWriteCloser +} + +func (t genericTransport) SendNullByte() error { + _, err := t.Write([]byte{0}) + return err +} + +func (t genericTransport) SupportsUnixFDs() bool { + return false +} + +func (t genericTransport) EnableUnixFDs() {} + +func (t genericTransport) ReadMessage() (*Message, error) { + return DecodeMessage(t) +} + +func (t genericTransport) SendMessage(msg *Message) error { + for _, v := range msg.Body { + if _, ok := v.(UnixFD); ok { + return errors.New("dbus: unix fd passing not enabled") + } + } + return msg.EncodeTo(t, binary.LittleEndian) +} diff --git a/vendor/github.com/godbus/dbus/transport_unix.go b/vendor/github.com/godbus/dbus/transport_unix.go new file mode 100644 index 000000000..3fafeabb1 --- /dev/null +++ b/vendor/github.com/godbus/dbus/transport_unix.go @@ -0,0 +1,196 @@ +//+build !windows + +package dbus + +import ( + "bytes" + "encoding/binary" + "errors" + "io" + "net" + "syscall" +) + +type oobReader struct { + conn *net.UnixConn + oob []byte + buf [4096]byte +} + +func (o *oobReader) Read(b []byte) (n int, err error) { + n, oobn, flags, _, err := o.conn.ReadMsgUnix(b, o.buf[:]) + if err != nil { + return n, err + } + if flags&syscall.MSG_CTRUNC != 0 { + return n, errors.New("dbus: control data truncated (too many fds received)") + } + o.oob = append(o.oob, o.buf[:oobn]...) + return n, nil +} + +type unixTransport struct { + *net.UnixConn + hasUnixFDs bool +} + +func newUnixTransport(keys string) (transport, error) { + var err error + + t := new(unixTransport) + abstract := getKey(keys, "abstract") + path := getKey(keys, "path") + switch { + case abstract == "" && path == "": + return nil, errors.New("dbus: invalid address (neither path nor abstract set)") + case abstract != "" && path == "": + t.UnixConn, err = net.DialUnix("unix", nil, &net.UnixAddr{Name: "@" + abstract, Net: "unix"}) + if err != nil { + return nil, err + } + return t, nil + case abstract == "" && path != "": + t.UnixConn, err = net.DialUnix("unix", nil, &net.UnixAddr{Name: path, Net: "unix"}) + if err != nil { + return nil, err + } + return t, nil + default: + return nil, errors.New("dbus: invalid address (both path and abstract set)") + } +} + +func init() { + transports["unix"] = newUnixTransport +} + +func (t *unixTransport) EnableUnixFDs() { + t.hasUnixFDs = true +} + +func (t *unixTransport) ReadMessage() (*Message, error) { + var ( + blen, hlen uint32 + csheader [16]byte + headers []header + order binary.ByteOrder + unixfds uint32 + ) + // To be sure that all bytes of out-of-band data are read, we use a special + // reader that uses ReadUnix on the underlying connection instead of Read + // and gathers the out-of-band data in a buffer. + rd := &oobReader{conn: t.UnixConn} + // read the first 16 bytes (the part of the header that has a constant size), + // from which we can figure out the length of the rest of the message + if _, err := io.ReadFull(rd, csheader[:]); err != nil { + return nil, err + } + switch csheader[0] { + case 'l': + order = binary.LittleEndian + case 'B': + order = binary.BigEndian + default: + return nil, InvalidMessageError("invalid byte order") + } + // csheader[4:8] -> length of message body, csheader[12:16] -> length of + // header fields (without alignment) + binary.Read(bytes.NewBuffer(csheader[4:8]), order, &blen) + binary.Read(bytes.NewBuffer(csheader[12:]), order, &hlen) + if hlen%8 != 0 { + hlen += 8 - (hlen % 8) + } + + // decode headers and look for unix fds + headerdata := make([]byte, hlen+4) + copy(headerdata, csheader[12:]) + if _, err := io.ReadFull(t, headerdata[4:]); err != nil { + return nil, err + } + dec := newDecoder(bytes.NewBuffer(headerdata), order) + dec.pos = 12 + vs, err := dec.Decode(Signature{"a(yv)"}) + if err != nil { + return nil, err + } + Store(vs, &headers) + for _, v := range headers { + if v.Field == byte(FieldUnixFDs) { + unixfds, _ = v.Variant.value.(uint32) + } + } + all := make([]byte, 16+hlen+blen) + copy(all, csheader[:]) + copy(all[16:], headerdata[4:]) + if _, err := io.ReadFull(rd, all[16+hlen:]); err != nil { + return nil, err + } + if unixfds != 0 { + if !t.hasUnixFDs { + return nil, errors.New("dbus: got unix fds on unsupported transport") + } + // read the fds from the OOB data + scms, err := syscall.ParseSocketControlMessage(rd.oob) + if err != nil { + return nil, err + } + if len(scms) != 1 { + return nil, errors.New("dbus: received more than one socket control message") + } + fds, err := syscall.ParseUnixRights(&scms[0]) + if err != nil { + return nil, err + } + msg, err := DecodeMessage(bytes.NewBuffer(all)) + if err != nil { + return nil, err + } + // substitute the values in the message body (which are indices for the + // array receiver via OOB) with the actual values + for i, v := range msg.Body { + if j, ok := v.(UnixFDIndex); ok { + if uint32(j) >= unixfds { + return nil, InvalidMessageError("invalid index for unix fd") + } + msg.Body[i] = UnixFD(fds[j]) + } + } + return msg, nil + } + return DecodeMessage(bytes.NewBuffer(all)) +} + +func (t *unixTransport) SendMessage(msg *Message) error { + fds := make([]int, 0) + for i, v := range msg.Body { + if fd, ok := v.(UnixFD); ok { + msg.Body[i] = UnixFDIndex(len(fds)) + fds = append(fds, int(fd)) + } + } + if len(fds) != 0 { + if !t.hasUnixFDs { + return errors.New("dbus: unix fd passing not enabled") + } + msg.Headers[FieldUnixFDs] = MakeVariant(uint32(len(fds))) + oob := syscall.UnixRights(fds...) + buf := new(bytes.Buffer) + msg.EncodeTo(buf, binary.LittleEndian) + n, oobn, err := t.UnixConn.WriteMsgUnix(buf.Bytes(), oob, nil) + if err != nil { + return err + } + if n != buf.Len() || oobn != len(oob) { + return io.ErrShortWrite + } + } else { + if err := msg.EncodeTo(t, binary.LittleEndian); err != nil { + return nil + } + } + return nil +} + +func (t *unixTransport) SupportsUnixFDs() bool { + return true +} diff --git a/vendor/github.com/godbus/dbus/transport_unixcred_dragonfly.go b/vendor/github.com/godbus/dbus/transport_unixcred_dragonfly.go new file mode 100644 index 000000000..a8cd39395 --- /dev/null +++ b/vendor/github.com/godbus/dbus/transport_unixcred_dragonfly.go @@ -0,0 +1,95 @@ +// The UnixCredentials system call is currently only implemented on Linux +// http://golang.org/src/pkg/syscall/sockcmsg_linux.go +// https://golang.org/s/go1.4-syscall +// http://code.google.com/p/go/source/browse/unix/sockcmsg_linux.go?repo=sys + +// Local implementation of the UnixCredentials system call for DragonFly BSD + +package dbus + +/* +#include +*/ +import "C" + +import ( + "io" + "os" + "syscall" + "unsafe" +) + +// http://golang.org/src/pkg/syscall/ztypes_linux_amd64.go +// http://golang.org/src/pkg/syscall/ztypes_dragonfly_amd64.go +type Ucred struct { + Pid int32 + Uid uint32 + Gid uint32 +} + +// http://golang.org/src/pkg/syscall/types_linux.go +// http://golang.org/src/pkg/syscall/types_dragonfly.go +// https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/sys/sys/ucred.h +const ( + SizeofUcred = C.sizeof_struct_ucred +) + +// http://golang.org/src/pkg/syscall/sockcmsg_unix.go +func cmsgAlignOf(salen int) int { + // From http://golang.org/src/pkg/syscall/sockcmsg_unix.go + //salign := sizeofPtr + // NOTE: It seems like 64-bit Darwin and DragonFly BSD kernels + // still require 32-bit aligned access to network subsystem. + //if darwin64Bit || dragonfly64Bit { + // salign = 4 + //} + salign := 4 + return (salen + salign - 1) & ^(salign - 1) +} + +// http://golang.org/src/pkg/syscall/sockcmsg_unix.go +func cmsgData(h *syscall.Cmsghdr) unsafe.Pointer { + return unsafe.Pointer(uintptr(unsafe.Pointer(h)) + uintptr(cmsgAlignOf(syscall.SizeofCmsghdr))) +} + +// http://golang.org/src/pkg/syscall/sockcmsg_linux.go +// UnixCredentials encodes credentials into a socket control message +// for sending to another process. This can be used for +// authentication. +func UnixCredentials(ucred *Ucred) []byte { + b := make([]byte, syscall.CmsgSpace(SizeofUcred)) + h := (*syscall.Cmsghdr)(unsafe.Pointer(&b[0])) + h.Level = syscall.SOL_SOCKET + h.Type = syscall.SCM_CREDS + h.SetLen(syscall.CmsgLen(SizeofUcred)) + *((*Ucred)(cmsgData(h))) = *ucred + return b +} + +// http://golang.org/src/pkg/syscall/sockcmsg_linux.go +// ParseUnixCredentials decodes a socket control message that contains +// credentials in a Ucred structure. To receive such a message, the +// SO_PASSCRED option must be enabled on the socket. +func ParseUnixCredentials(m *syscall.SocketControlMessage) (*Ucred, error) { + if m.Header.Level != syscall.SOL_SOCKET { + return nil, syscall.EINVAL + } + if m.Header.Type != syscall.SCM_CREDS { + return nil, syscall.EINVAL + } + ucred := *(*Ucred)(unsafe.Pointer(&m.Data[0])) + return &ucred, nil +} + +func (t *unixTransport) SendNullByte() error { + ucred := &Ucred{Pid: int32(os.Getpid()), Uid: uint32(os.Getuid()), Gid: uint32(os.Getgid())} + b := UnixCredentials(ucred) + _, oobn, err := t.UnixConn.WriteMsgUnix([]byte{0}, b, nil) + if err != nil { + return err + } + if oobn != len(b) { + return io.ErrShortWrite + } + return nil +} diff --git a/vendor/github.com/godbus/dbus/transport_unixcred_linux.go b/vendor/github.com/godbus/dbus/transport_unixcred_linux.go new file mode 100644 index 000000000..d9dfdf698 --- /dev/null +++ b/vendor/github.com/godbus/dbus/transport_unixcred_linux.go @@ -0,0 +1,25 @@ +// The UnixCredentials system call is currently only implemented on Linux +// http://golang.org/src/pkg/syscall/sockcmsg_linux.go +// https://golang.org/s/go1.4-syscall +// http://code.google.com/p/go/source/browse/unix/sockcmsg_linux.go?repo=sys + +package dbus + +import ( + "io" + "os" + "syscall" +) + +func (t *unixTransport) SendNullByte() error { + ucred := &syscall.Ucred{Pid: int32(os.Getpid()), Uid: uint32(os.Getuid()), Gid: uint32(os.Getgid())} + b := syscall.UnixCredentials(ucred) + _, oobn, err := t.UnixConn.WriteMsgUnix([]byte{0}, b, nil) + if err != nil { + return err + } + if oobn != len(b) { + return io.ErrShortWrite + } + return nil +} diff --git a/vendor/github.com/godbus/dbus/variant.go b/vendor/github.com/godbus/dbus/variant.go new file mode 100644 index 000000000..b7b13ae90 --- /dev/null +++ b/vendor/github.com/godbus/dbus/variant.go @@ -0,0 +1,139 @@ +package dbus + +import ( + "bytes" + "fmt" + "reflect" + "sort" + "strconv" +) + +// Variant represents the D-Bus variant type. +type Variant struct { + sig Signature + value interface{} +} + +// MakeVariant converts the given value to a Variant. It panics if v cannot be +// represented as a D-Bus type. +func MakeVariant(v interface{}) Variant { + return Variant{SignatureOf(v), v} +} + +// ParseVariant parses the given string as a variant as described at +// https://developer.gnome.org/glib/unstable/gvariant-text.html. If sig is not +// empty, it is taken to be the expected signature for the variant. +func ParseVariant(s string, sig Signature) (Variant, error) { + tokens := varLex(s) + p := &varParser{tokens: tokens} + n, err := varMakeNode(p) + if err != nil { + return Variant{}, err + } + if sig.str == "" { + sig, err = varInfer(n) + if err != nil { + return Variant{}, err + } + } + v, err := n.Value(sig) + if err != nil { + return Variant{}, err + } + return MakeVariant(v), nil +} + +// format returns a formatted version of v and whether this string can be parsed +// unambigously. +func (v Variant) format() (string, bool) { + switch v.sig.str[0] { + case 'b', 'i': + return fmt.Sprint(v.value), true + case 'n', 'q', 'u', 'x', 't', 'd', 'h': + return fmt.Sprint(v.value), false + case 's': + return strconv.Quote(v.value.(string)), true + case 'o': + return strconv.Quote(string(v.value.(ObjectPath))), false + case 'g': + return strconv.Quote(v.value.(Signature).str), false + case 'v': + s, unamb := v.value.(Variant).format() + if !unamb { + return "<@" + v.value.(Variant).sig.str + " " + s + ">", true + } + return "<" + s + ">", true + case 'y': + return fmt.Sprintf("%#x", v.value.(byte)), false + } + rv := reflect.ValueOf(v.value) + switch rv.Kind() { + case reflect.Slice: + if rv.Len() == 0 { + return "[]", false + } + unamb := true + buf := bytes.NewBuffer([]byte("[")) + for i := 0; i < rv.Len(); i++ { + // TODO: slooow + s, b := MakeVariant(rv.Index(i).Interface()).format() + unamb = unamb && b + buf.WriteString(s) + if i != rv.Len()-1 { + buf.WriteString(", ") + } + } + buf.WriteByte(']') + return buf.String(), unamb + case reflect.Map: + if rv.Len() == 0 { + return "{}", false + } + unamb := true + var buf bytes.Buffer + kvs := make([]string, rv.Len()) + for i, k := range rv.MapKeys() { + s, b := MakeVariant(k.Interface()).format() + unamb = unamb && b + buf.Reset() + buf.WriteString(s) + buf.WriteString(": ") + s, b = MakeVariant(rv.MapIndex(k).Interface()).format() + unamb = unamb && b + buf.WriteString(s) + kvs[i] = buf.String() + } + buf.Reset() + buf.WriteByte('{') + sort.Strings(kvs) + for i, kv := range kvs { + if i > 0 { + buf.WriteString(", ") + } + buf.WriteString(kv) + } + buf.WriteByte('}') + return buf.String(), unamb + } + return `"INVALID"`, true +} + +// Signature returns the D-Bus signature of the underlying value of v. +func (v Variant) Signature() Signature { + return v.sig +} + +// String returns the string representation of the underlying value of v as +// described at https://developer.gnome.org/glib/unstable/gvariant-text.html. +func (v Variant) String() string { + s, unamb := v.format() + if !unamb { + return "@" + v.sig.str + " " + s + } + return s +} + +// Value returns the underlying value of v. +func (v Variant) Value() interface{} { + return v.value +} diff --git a/vendor/github.com/godbus/dbus/variant_lexer.go b/vendor/github.com/godbus/dbus/variant_lexer.go new file mode 100644 index 000000000..332007d6f --- /dev/null +++ b/vendor/github.com/godbus/dbus/variant_lexer.go @@ -0,0 +1,284 @@ +package dbus + +import ( + "fmt" + "strings" + "unicode" + "unicode/utf8" +) + +// Heavily inspired by the lexer from text/template. + +type varToken struct { + typ varTokenType + val string +} + +type varTokenType byte + +const ( + tokEOF varTokenType = iota + tokError + tokNumber + tokString + tokBool + tokArrayStart + tokArrayEnd + tokDictStart + tokDictEnd + tokVariantStart + tokVariantEnd + tokComma + tokColon + tokType + tokByteString +) + +type varLexer struct { + input string + start int + pos int + width int + tokens []varToken +} + +type lexState func(*varLexer) lexState + +func varLex(s string) []varToken { + l := &varLexer{input: s} + l.run() + return l.tokens +} + +func (l *varLexer) accept(valid string) bool { + if strings.IndexRune(valid, l.next()) >= 0 { + return true + } + l.backup() + return false +} + +func (l *varLexer) backup() { + l.pos -= l.width +} + +func (l *varLexer) emit(t varTokenType) { + l.tokens = append(l.tokens, varToken{t, l.input[l.start:l.pos]}) + l.start = l.pos +} + +func (l *varLexer) errorf(format string, v ...interface{}) lexState { + l.tokens = append(l.tokens, varToken{ + tokError, + fmt.Sprintf(format, v...), + }) + return nil +} + +func (l *varLexer) ignore() { + l.start = l.pos +} + +func (l *varLexer) next() rune { + var r rune + + if l.pos >= len(l.input) { + l.width = 0 + return -1 + } + r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) + l.pos += l.width + return r +} + +func (l *varLexer) run() { + for state := varLexNormal; state != nil; { + state = state(l) + } +} + +func (l *varLexer) peek() rune { + r := l.next() + l.backup() + return r +} + +func varLexNormal(l *varLexer) lexState { + for { + r := l.next() + switch { + case r == -1: + l.emit(tokEOF) + return nil + case r == '[': + l.emit(tokArrayStart) + case r == ']': + l.emit(tokArrayEnd) + case r == '{': + l.emit(tokDictStart) + case r == '}': + l.emit(tokDictEnd) + case r == '<': + l.emit(tokVariantStart) + case r == '>': + l.emit(tokVariantEnd) + case r == ':': + l.emit(tokColon) + case r == ',': + l.emit(tokComma) + case r == '\'' || r == '"': + l.backup() + return varLexString + case r == '@': + l.backup() + return varLexType + case unicode.IsSpace(r): + l.ignore() + case unicode.IsNumber(r) || r == '+' || r == '-': + l.backup() + return varLexNumber + case r == 'b': + pos := l.start + if n := l.peek(); n == '"' || n == '\'' { + return varLexByteString + } + // not a byte string; try to parse it as a type or bool below + l.pos = pos + 1 + l.width = 1 + fallthrough + default: + // either a bool or a type. Try bools first. + l.backup() + if l.pos+4 <= len(l.input) { + if l.input[l.pos:l.pos+4] == "true" { + l.pos += 4 + l.emit(tokBool) + continue + } + } + if l.pos+5 <= len(l.input) { + if l.input[l.pos:l.pos+5] == "false" { + l.pos += 5 + l.emit(tokBool) + continue + } + } + // must be a type. + return varLexType + } + } +} + +var varTypeMap = map[string]string{ + "boolean": "b", + "byte": "y", + "int16": "n", + "uint16": "q", + "int32": "i", + "uint32": "u", + "int64": "x", + "uint64": "t", + "double": "f", + "string": "s", + "objectpath": "o", + "signature": "g", +} + +func varLexByteString(l *varLexer) lexState { + q := l.next() +Loop: + for { + switch l.next() { + case '\\': + if r := l.next(); r != -1 { + break + } + fallthrough + case -1: + return l.errorf("unterminated bytestring") + case q: + break Loop + } + } + l.emit(tokByteString) + return varLexNormal +} + +func varLexNumber(l *varLexer) lexState { + l.accept("+-") + digits := "0123456789" + if l.accept("0") { + if l.accept("x") { + digits = "0123456789abcdefABCDEF" + } else { + digits = "01234567" + } + } + for strings.IndexRune(digits, l.next()) >= 0 { + } + l.backup() + if l.accept(".") { + for strings.IndexRune(digits, l.next()) >= 0 { + } + l.backup() + } + if l.accept("eE") { + l.accept("+-") + for strings.IndexRune("0123456789", l.next()) >= 0 { + } + l.backup() + } + if r := l.peek(); unicode.IsLetter(r) { + l.next() + return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) + } + l.emit(tokNumber) + return varLexNormal +} + +func varLexString(l *varLexer) lexState { + q := l.next() +Loop: + for { + switch l.next() { + case '\\': + if r := l.next(); r != -1 { + break + } + fallthrough + case -1: + return l.errorf("unterminated string") + case q: + break Loop + } + } + l.emit(tokString) + return varLexNormal +} + +func varLexType(l *varLexer) lexState { + at := l.accept("@") + for { + r := l.next() + if r == -1 { + break + } + if unicode.IsSpace(r) { + l.backup() + break + } + } + if at { + if _, err := ParseSignature(l.input[l.start+1 : l.pos]); err != nil { + return l.errorf("%s", err) + } + } else { + if _, ok := varTypeMap[l.input[l.start:l.pos]]; ok { + l.emit(tokType) + return varLexNormal + } + return l.errorf("unrecognized type %q", l.input[l.start:l.pos]) + } + l.emit(tokType) + return varLexNormal +} diff --git a/vendor/github.com/godbus/dbus/variant_parser.go b/vendor/github.com/godbus/dbus/variant_parser.go new file mode 100644 index 000000000..d20f5da6d --- /dev/null +++ b/vendor/github.com/godbus/dbus/variant_parser.go @@ -0,0 +1,817 @@ +package dbus + +import ( + "bytes" + "errors" + "fmt" + "io" + "reflect" + "strconv" + "strings" + "unicode/utf8" +) + +type varParser struct { + tokens []varToken + i int +} + +func (p *varParser) backup() { + p.i-- +} + +func (p *varParser) next() varToken { + if p.i < len(p.tokens) { + t := p.tokens[p.i] + p.i++ + return t + } + return varToken{typ: tokEOF} +} + +type varNode interface { + Infer() (Signature, error) + String() string + Sigs() sigSet + Value(Signature) (interface{}, error) +} + +func varMakeNode(p *varParser) (varNode, error) { + var sig Signature + + for { + t := p.next() + switch t.typ { + case tokEOF: + return nil, io.ErrUnexpectedEOF + case tokError: + return nil, errors.New(t.val) + case tokNumber: + return varMakeNumNode(t, sig) + case tokString: + return varMakeStringNode(t, sig) + case tokBool: + if sig.str != "" && sig.str != "b" { + return nil, varTypeError{t.val, sig} + } + b, err := strconv.ParseBool(t.val) + if err != nil { + return nil, err + } + return boolNode(b), nil + case tokArrayStart: + return varMakeArrayNode(p, sig) + case tokVariantStart: + return varMakeVariantNode(p, sig) + case tokDictStart: + return varMakeDictNode(p, sig) + case tokType: + if sig.str != "" { + return nil, errors.New("unexpected type annotation") + } + if t.val[0] == '@' { + sig.str = t.val[1:] + } else { + sig.str = varTypeMap[t.val] + } + case tokByteString: + if sig.str != "" && sig.str != "ay" { + return nil, varTypeError{t.val, sig} + } + b, err := varParseByteString(t.val) + if err != nil { + return nil, err + } + return byteStringNode(b), nil + default: + return nil, fmt.Errorf("unexpected %q", t.val) + } + } +} + +type varTypeError struct { + val string + sig Signature +} + +func (e varTypeError) Error() string { + return fmt.Sprintf("dbus: can't parse %q as type %q", e.val, e.sig.str) +} + +type sigSet map[Signature]bool + +func (s sigSet) Empty() bool { + return len(s) == 0 +} + +func (s sigSet) Intersect(s2 sigSet) sigSet { + r := make(sigSet) + for k := range s { + if s2[k] { + r[k] = true + } + } + return r +} + +func (s sigSet) Single() (Signature, bool) { + if len(s) == 1 { + for k := range s { + return k, true + } + } + return Signature{}, false +} + +func (s sigSet) ToArray() sigSet { + r := make(sigSet, len(s)) + for k := range s { + r[Signature{"a" + k.str}] = true + } + return r +} + +type numNode struct { + sig Signature + str string + val interface{} +} + +var numSigSet = sigSet{ + Signature{"y"}: true, + Signature{"n"}: true, + Signature{"q"}: true, + Signature{"i"}: true, + Signature{"u"}: true, + Signature{"x"}: true, + Signature{"t"}: true, + Signature{"d"}: true, +} + +func (n numNode) Infer() (Signature, error) { + if strings.ContainsAny(n.str, ".e") { + return Signature{"d"}, nil + } + return Signature{"i"}, nil +} + +func (n numNode) String() string { + return n.str +} + +func (n numNode) Sigs() sigSet { + if n.sig.str != "" { + return sigSet{n.sig: true} + } + if strings.ContainsAny(n.str, ".e") { + return sigSet{Signature{"d"}: true} + } + return numSigSet +} + +func (n numNode) Value(sig Signature) (interface{}, error) { + if n.sig.str != "" && n.sig != sig { + return nil, varTypeError{n.str, sig} + } + if n.val != nil { + return n.val, nil + } + return varNumAs(n.str, sig) +} + +func varMakeNumNode(tok varToken, sig Signature) (varNode, error) { + if sig.str == "" { + return numNode{str: tok.val}, nil + } + num, err := varNumAs(tok.val, sig) + if err != nil { + return nil, err + } + return numNode{sig: sig, val: num}, nil +} + +func varNumAs(s string, sig Signature) (interface{}, error) { + isUnsigned := false + size := 32 + switch sig.str { + case "n": + size = 16 + case "i": + case "x": + size = 64 + case "y": + size = 8 + isUnsigned = true + case "q": + size = 16 + isUnsigned = true + case "u": + isUnsigned = true + case "t": + size = 64 + isUnsigned = true + case "d": + d, err := strconv.ParseFloat(s, 64) + if err != nil { + return nil, err + } + return d, nil + default: + return nil, varTypeError{s, sig} + } + base := 10 + if strings.HasPrefix(s, "0x") { + base = 16 + s = s[2:] + } + if strings.HasPrefix(s, "0") && len(s) != 1 { + base = 8 + s = s[1:] + } + if isUnsigned { + i, err := strconv.ParseUint(s, base, size) + if err != nil { + return nil, err + } + var v interface{} = i + switch sig.str { + case "y": + v = byte(i) + case "q": + v = uint16(i) + case "u": + v = uint32(i) + } + return v, nil + } + i, err := strconv.ParseInt(s, base, size) + if err != nil { + return nil, err + } + var v interface{} = i + switch sig.str { + case "n": + v = int16(i) + case "i": + v = int32(i) + } + return v, nil +} + +type stringNode struct { + sig Signature + str string // parsed + val interface{} // has correct type +} + +var stringSigSet = sigSet{ + Signature{"s"}: true, + Signature{"g"}: true, + Signature{"o"}: true, +} + +func (n stringNode) Infer() (Signature, error) { + return Signature{"s"}, nil +} + +func (n stringNode) String() string { + return n.str +} + +func (n stringNode) Sigs() sigSet { + if n.sig.str != "" { + return sigSet{n.sig: true} + } + return stringSigSet +} + +func (n stringNode) Value(sig Signature) (interface{}, error) { + if n.sig.str != "" && n.sig != sig { + return nil, varTypeError{n.str, sig} + } + if n.val != nil { + return n.val, nil + } + switch { + case sig.str == "g": + return Signature{n.str}, nil + case sig.str == "o": + return ObjectPath(n.str), nil + case sig.str == "s": + return n.str, nil + default: + return nil, varTypeError{n.str, sig} + } +} + +func varMakeStringNode(tok varToken, sig Signature) (varNode, error) { + if sig.str != "" && sig.str != "s" && sig.str != "g" && sig.str != "o" { + return nil, fmt.Errorf("invalid type %q for string", sig.str) + } + s, err := varParseString(tok.val) + if err != nil { + return nil, err + } + n := stringNode{str: s} + if sig.str == "" { + return stringNode{str: s}, nil + } + n.sig = sig + switch sig.str { + case "o": + n.val = ObjectPath(s) + case "g": + n.val = Signature{s} + case "s": + n.val = s + } + return n, nil +} + +func varParseString(s string) (string, error) { + // quotes are guaranteed to be there + s = s[1 : len(s)-1] + buf := new(bytes.Buffer) + for len(s) != 0 { + r, size := utf8.DecodeRuneInString(s) + if r == utf8.RuneError && size == 1 { + return "", errors.New("invalid UTF-8") + } + s = s[size:] + if r != '\\' { + buf.WriteRune(r) + continue + } + r, size = utf8.DecodeRuneInString(s) + if r == utf8.RuneError && size == 1 { + return "", errors.New("invalid UTF-8") + } + s = s[size:] + switch r { + case 'a': + buf.WriteRune(0x7) + case 'b': + buf.WriteRune(0x8) + case 'f': + buf.WriteRune(0xc) + case 'n': + buf.WriteRune('\n') + case 'r': + buf.WriteRune('\r') + case 't': + buf.WriteRune('\t') + case '\n': + case 'u': + if len(s) < 4 { + return "", errors.New("short unicode escape") + } + r, err := strconv.ParseUint(s[:4], 16, 32) + if err != nil { + return "", err + } + buf.WriteRune(rune(r)) + s = s[4:] + case 'U': + if len(s) < 8 { + return "", errors.New("short unicode escape") + } + r, err := strconv.ParseUint(s[:8], 16, 32) + if err != nil { + return "", err + } + buf.WriteRune(rune(r)) + s = s[8:] + default: + buf.WriteRune(r) + } + } + return buf.String(), nil +} + +var boolSigSet = sigSet{Signature{"b"}: true} + +type boolNode bool + +func (boolNode) Infer() (Signature, error) { + return Signature{"b"}, nil +} + +func (b boolNode) String() string { + if b { + return "true" + } + return "false" +} + +func (boolNode) Sigs() sigSet { + return boolSigSet +} + +func (b boolNode) Value(sig Signature) (interface{}, error) { + if sig.str != "b" { + return nil, varTypeError{b.String(), sig} + } + return bool(b), nil +} + +type arrayNode struct { + set sigSet + children []varNode + val interface{} +} + +func (n arrayNode) Infer() (Signature, error) { + for _, v := range n.children { + csig, err := varInfer(v) + if err != nil { + continue + } + return Signature{"a" + csig.str}, nil + } + return Signature{}, fmt.Errorf("can't infer type for %q", n.String()) +} + +func (n arrayNode) String() string { + s := "[" + for i, v := range n.children { + s += v.String() + if i != len(n.children)-1 { + s += ", " + } + } + return s + "]" +} + +func (n arrayNode) Sigs() sigSet { + return n.set +} + +func (n arrayNode) Value(sig Signature) (interface{}, error) { + if n.set.Empty() { + // no type information whatsoever, so this must be an empty slice + return reflect.MakeSlice(typeFor(sig.str), 0, 0).Interface(), nil + } + if !n.set[sig] { + return nil, varTypeError{n.String(), sig} + } + s := reflect.MakeSlice(typeFor(sig.str), len(n.children), len(n.children)) + for i, v := range n.children { + rv, err := v.Value(Signature{sig.str[1:]}) + if err != nil { + return nil, err + } + s.Index(i).Set(reflect.ValueOf(rv)) + } + return s.Interface(), nil +} + +func varMakeArrayNode(p *varParser, sig Signature) (varNode, error) { + var n arrayNode + if sig.str != "" { + n.set = sigSet{sig: true} + } + if t := p.next(); t.typ == tokArrayEnd { + return n, nil + } else { + p.backup() + } +Loop: + for { + t := p.next() + switch t.typ { + case tokEOF: + return nil, io.ErrUnexpectedEOF + case tokError: + return nil, errors.New(t.val) + } + p.backup() + cn, err := varMakeNode(p) + if err != nil { + return nil, err + } + if cset := cn.Sigs(); !cset.Empty() { + if n.set.Empty() { + n.set = cset.ToArray() + } else { + nset := cset.ToArray().Intersect(n.set) + if nset.Empty() { + return nil, fmt.Errorf("can't parse %q with given type information", cn.String()) + } + n.set = nset + } + } + n.children = append(n.children, cn) + switch t := p.next(); t.typ { + case tokEOF: + return nil, io.ErrUnexpectedEOF + case tokError: + return nil, errors.New(t.val) + case tokArrayEnd: + break Loop + case tokComma: + continue + default: + return nil, fmt.Errorf("unexpected %q", t.val) + } + } + return n, nil +} + +type variantNode struct { + n varNode +} + +var variantSet = sigSet{ + Signature{"v"}: true, +} + +func (variantNode) Infer() (Signature, error) { + return Signature{"v"}, nil +} + +func (n variantNode) String() string { + return "<" + n.n.String() + ">" +} + +func (variantNode) Sigs() sigSet { + return variantSet +} + +func (n variantNode) Value(sig Signature) (interface{}, error) { + if sig.str != "v" { + return nil, varTypeError{n.String(), sig} + } + sig, err := varInfer(n.n) + if err != nil { + return nil, err + } + v, err := n.n.Value(sig) + if err != nil { + return nil, err + } + return MakeVariant(v), nil +} + +func varMakeVariantNode(p *varParser, sig Signature) (varNode, error) { + n, err := varMakeNode(p) + if err != nil { + return nil, err + } + if t := p.next(); t.typ != tokVariantEnd { + return nil, fmt.Errorf("unexpected %q", t.val) + } + vn := variantNode{n} + if sig.str != "" && sig.str != "v" { + return nil, varTypeError{vn.String(), sig} + } + return variantNode{n}, nil +} + +type dictEntry struct { + key, val varNode +} + +type dictNode struct { + kset, vset sigSet + children []dictEntry + val interface{} +} + +func (n dictNode) Infer() (Signature, error) { + for _, v := range n.children { + ksig, err := varInfer(v.key) + if err != nil { + continue + } + vsig, err := varInfer(v.val) + if err != nil { + continue + } + return Signature{"a{" + ksig.str + vsig.str + "}"}, nil + } + return Signature{}, fmt.Errorf("can't infer type for %q", n.String()) +} + +func (n dictNode) String() string { + s := "{" + for i, v := range n.children { + s += v.key.String() + ": " + v.val.String() + if i != len(n.children)-1 { + s += ", " + } + } + return s + "}" +} + +func (n dictNode) Sigs() sigSet { + r := sigSet{} + for k := range n.kset { + for v := range n.vset { + sig := "a{" + k.str + v.str + "}" + r[Signature{sig}] = true + } + } + return r +} + +func (n dictNode) Value(sig Signature) (interface{}, error) { + set := n.Sigs() + if set.Empty() { + // no type information -> empty dict + return reflect.MakeMap(typeFor(sig.str)).Interface(), nil + } + if !set[sig] { + return nil, varTypeError{n.String(), sig} + } + m := reflect.MakeMap(typeFor(sig.str)) + ksig := Signature{sig.str[2:3]} + vsig := Signature{sig.str[3 : len(sig.str)-1]} + for _, v := range n.children { + kv, err := v.key.Value(ksig) + if err != nil { + return nil, err + } + vv, err := v.val.Value(vsig) + if err != nil { + return nil, err + } + m.SetMapIndex(reflect.ValueOf(kv), reflect.ValueOf(vv)) + } + return m.Interface(), nil +} + +func varMakeDictNode(p *varParser, sig Signature) (varNode, error) { + var n dictNode + + if sig.str != "" { + if len(sig.str) < 5 { + return nil, fmt.Errorf("invalid signature %q for dict type", sig) + } + ksig := Signature{string(sig.str[2])} + vsig := Signature{sig.str[3 : len(sig.str)-1]} + n.kset = sigSet{ksig: true} + n.vset = sigSet{vsig: true} + } + if t := p.next(); t.typ == tokDictEnd { + return n, nil + } else { + p.backup() + } +Loop: + for { + t := p.next() + switch t.typ { + case tokEOF: + return nil, io.ErrUnexpectedEOF + case tokError: + return nil, errors.New(t.val) + } + p.backup() + kn, err := varMakeNode(p) + if err != nil { + return nil, err + } + if kset := kn.Sigs(); !kset.Empty() { + if n.kset.Empty() { + n.kset = kset + } else { + n.kset = kset.Intersect(n.kset) + if n.kset.Empty() { + return nil, fmt.Errorf("can't parse %q with given type information", kn.String()) + } + } + } + t = p.next() + switch t.typ { + case tokEOF: + return nil, io.ErrUnexpectedEOF + case tokError: + return nil, errors.New(t.val) + case tokColon: + default: + return nil, fmt.Errorf("unexpected %q", t.val) + } + t = p.next() + switch t.typ { + case tokEOF: + return nil, io.ErrUnexpectedEOF + case tokError: + return nil, errors.New(t.val) + } + p.backup() + vn, err := varMakeNode(p) + if err != nil { + return nil, err + } + if vset := vn.Sigs(); !vset.Empty() { + if n.vset.Empty() { + n.vset = vset + } else { + n.vset = n.vset.Intersect(vset) + if n.vset.Empty() { + return nil, fmt.Errorf("can't parse %q with given type information", vn.String()) + } + } + } + n.children = append(n.children, dictEntry{kn, vn}) + t = p.next() + switch t.typ { + case tokEOF: + return nil, io.ErrUnexpectedEOF + case tokError: + return nil, errors.New(t.val) + case tokDictEnd: + break Loop + case tokComma: + continue + default: + return nil, fmt.Errorf("unexpected %q", t.val) + } + } + return n, nil +} + +type byteStringNode []byte + +var byteStringSet = sigSet{ + Signature{"ay"}: true, +} + +func (byteStringNode) Infer() (Signature, error) { + return Signature{"ay"}, nil +} + +func (b byteStringNode) String() string { + return string(b) +} + +func (b byteStringNode) Sigs() sigSet { + return byteStringSet +} + +func (b byteStringNode) Value(sig Signature) (interface{}, error) { + if sig.str != "ay" { + return nil, varTypeError{b.String(), sig} + } + return []byte(b), nil +} + +func varParseByteString(s string) ([]byte, error) { + // quotes and b at start are guaranteed to be there + b := make([]byte, 0, 1) + s = s[2 : len(s)-1] + for len(s) != 0 { + c := s[0] + s = s[1:] + if c != '\\' { + b = append(b, c) + continue + } + c = s[0] + s = s[1:] + switch c { + case 'a': + b = append(b, 0x7) + case 'b': + b = append(b, 0x8) + case 'f': + b = append(b, 0xc) + case 'n': + b = append(b, '\n') + case 'r': + b = append(b, '\r') + case 't': + b = append(b, '\t') + case 'x': + if len(s) < 2 { + return nil, errors.New("short escape") + } + n, err := strconv.ParseUint(s[:2], 16, 8) + if err != nil { + return nil, err + } + b = append(b, byte(n)) + s = s[2:] + case '0': + if len(s) < 3 { + return nil, errors.New("short escape") + } + n, err := strconv.ParseUint(s[:3], 8, 8) + if err != nil { + return nil, err + } + b = append(b, byte(n)) + s = s[3:] + default: + b = append(b, c) + } + } + return append(b, 0), nil +} + +func varInfer(n varNode) (Signature, error) { + if sig, ok := n.Sigs().Single(); ok { + return sig, nil + } + return n.Infer() +} diff --git a/vendor/github.com/gogo/protobuf/gogoproto/doc.go b/vendor/github.com/gogo/protobuf/gogoproto/doc.go new file mode 100644 index 000000000..147b5ecc6 --- /dev/null +++ b/vendor/github.com/gogo/protobuf/gogoproto/doc.go @@ -0,0 +1,169 @@ +// Protocol Buffers for Go with Gadgets +// +// Copyright (c) 2013, The GoGo Authors. All rights reserved. +// http://github.com/gogo/protobuf +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/* +Package gogoproto provides extensions for protocol buffers to achieve: + + - fast marshalling and unmarshalling. + - peace of mind by optionally generating test and benchmark code. + - more canonical Go structures. + - less typing by optionally generating extra helper code. + - goprotobuf compatibility + +More Canonical Go Structures + +A lot of time working with a goprotobuf struct will lead you to a place where you create another struct that is easier to work with and then have a function to copy the values between the two structs. +You might also find that basic structs that started their life as part of an API need to be sent over the wire. With gob, you could just send it. With goprotobuf, you need to make a parallel struct. +Gogoprotobuf tries to fix these problems with the nullable, embed, customtype and customname field extensions. + + - nullable, if false, a field is generated without a pointer (see warning below). + - embed, if true, the field is generated as an embedded field. + - customtype, It works with the Marshal and Unmarshal methods, to allow you to have your own types in your struct, but marshal to bytes. For example, custom.Uuid or custom.Fixed128 + - customname (beta), Changes the generated fieldname. This is especially useful when generated methods conflict with fieldnames. + - casttype (beta), Changes the generated fieldtype. All generated code assumes that this type is castable to the protocol buffer field type. It does not work for structs or enums. + - castkey (beta), Changes the generated fieldtype for a map key. All generated code assumes that this type is castable to the protocol buffer field type. Only supported on maps. + - castvalue (beta), Changes the generated fieldtype for a map value. All generated code assumes that this type is castable to the protocol buffer field type. Only supported on maps. + +Warning about nullable: According to the Protocol Buffer specification, you should be able to tell whether a field is set or unset. With the option nullable=false this feature is lost, since your non-nullable fields will always be set. It can be seen as a layer on top of Protocol Buffers, where before and after marshalling all non-nullable fields are set and they cannot be unset. + +Let us look at: + + github.com/gogo/protobuf/test/example/example.proto + +for a quicker overview. + +The following message: + + package test; + + import "github.com/gogo/protobuf/gogoproto/gogo.proto"; + + message A { + optional string Description = 1 [(gogoproto.nullable) = false]; + optional int64 Number = 2 [(gogoproto.nullable) = false]; + optional bytes Id = 3 [(gogoproto.customtype) = "github.com/gogo/protobuf/test/custom.Uuid", (gogoproto.nullable) = false]; + } + +Will generate a go struct which looks a lot like this: + + type A struct { + Description string + Number int64 + Id github_com_gogo_protobuf_test_custom.Uuid + } + +You will see there are no pointers, since all fields are non-nullable. +You will also see a custom type which marshals to a string. +Be warned it is your responsibility to test your custom types thoroughly. +You should think of every possible empty and nil case for your marshaling, unmarshaling and size methods. + +Next we will embed the message A in message B. + + message B { + optional A A = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; + repeated bytes G = 2 [(gogoproto.customtype) = "github.com/gogo/protobuf/test/custom.Uint128", (gogoproto.nullable) = false]; + } + +See below that A is embedded in B. + + type B struct { + A + G []github_com_gogo_protobuf_test_custom.Uint128 + } + +Also see the repeated custom type. + + type Uint128 [2]uint64 + +Next we will create a custom name for one of our fields. + + message C { + optional int64 size = 1 [(gogoproto.customname) = "MySize"]; + } + +See below that the field's name is MySize and not Size. + + type C struct { + MySize *int64 + } + +The is useful when having a protocol buffer message with a field name which conflicts with a generated method. +As an example, having a field name size and using the sizer plugin to generate a Size method will cause a go compiler error. +Using customname you can fix this error without changing the field name. +This is typically useful when working with a protocol buffer that was designed before these methods and/or the go language were avialable. + +Gogoprotobuf also has some more subtle changes, these could be changed back: + + - the generated package name for imports do not have the extra /filename.pb, + but are actually the imports specified in the .proto file. + +Gogoprotobuf also has lost some features which should be brought back with time: + + - Marshalling and unmarshalling with reflect and without the unsafe package, + this requires work in pointer_reflect.go + +Why does nullable break protocol buffer specifications: + +The protocol buffer specification states, somewhere, that you should be able to tell whether a +field is set or unset. With the option nullable=false this feature is lost, +since your non-nullable fields will always be set. It can be seen as a layer on top of +protocol buffers, where before and after marshalling all non-nullable fields are set +and they cannot be unset. + +Goprotobuf Compatibility: + +Gogoprotobuf is compatible with Goprotobuf, because it is compatible with protocol buffers. +Gogoprotobuf generates the same code as goprotobuf if no extensions are used. +The enumprefix, getters and stringer extensions can be used to remove some of the unnecessary code generated by goprotobuf: + + - gogoproto_import, if false, the generated code imports github.com/golang/protobuf/proto instead of github.com/gogo/protobuf/proto. + - goproto_enum_prefix, if false, generates the enum constant names without the messagetype prefix + - goproto_enum_stringer (experimental), if false, the enum is generated without the default string method, this is useful for rather using enum_stringer, or allowing you to write your own string method. + - goproto_getters, if false, the message is generated without get methods, this is useful when you would rather want to use face + - goproto_stringer, if false, the message is generated without the default string method, this is useful for rather using stringer, or allowing you to write your own string method. + - goproto_extensions_map (beta), if false, the extensions field is generated as type []byte instead of type map[int32]proto.Extension + - goproto_unrecognized (beta), if false, XXX_unrecognized field is not generated. This is useful in conjunction with gogoproto.nullable=false, to generate structures completely devoid of pointers and reduce GC pressure at the cost of losing information about unrecognized fields. + - goproto_registration (beta), if true, the generated files will register all messages and types against both gogo/protobuf and golang/protobuf. This is necessary when using third-party packages which read registrations from golang/protobuf (such as the grpc-gateway). + +Less Typing and Peace of Mind is explained in their specific plugin folders godoc: + + - github.com/gogo/protobuf/plugin/ + +If you do not use any of these extension the code that is generated +will be the same as if goprotobuf has generated it. + +The most complete way to see examples is to look at + + github.com/gogo/protobuf/test/thetest.proto + +Gogoprototest is a seperate project, +because we want to keep gogoprotobuf independant of goprotobuf, +but we still want to test it thoroughly. + +*/ +package gogoproto diff --git a/vendor/github.com/gogo/protobuf/gogoproto/gogo.pb.go b/vendor/github.com/gogo/protobuf/gogoproto/gogo.pb.go new file mode 100644 index 000000000..5765acb15 --- /dev/null +++ b/vendor/github.com/gogo/protobuf/gogoproto/gogo.pb.go @@ -0,0 +1,804 @@ +// Code generated by protoc-gen-gogo. DO NOT EDIT. +// source: gogo.proto + +/* +Package gogoproto is a generated protocol buffer package. + +It is generated from these files: + gogo.proto + +It has these top-level messages: +*/ +package gogoproto + +import proto "github.com/gogo/protobuf/proto" +import fmt "fmt" +import math "math" +import google_protobuf "github.com/gogo/protobuf/protoc-gen-gogo/descriptor" + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package + +var E_GoprotoEnumPrefix = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.EnumOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 62001, + Name: "gogoproto.goproto_enum_prefix", + Tag: "varint,62001,opt,name=goproto_enum_prefix,json=goprotoEnumPrefix", + Filename: "gogo.proto", +} + +var E_GoprotoEnumStringer = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.EnumOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 62021, + Name: "gogoproto.goproto_enum_stringer", + Tag: "varint,62021,opt,name=goproto_enum_stringer,json=goprotoEnumStringer", + Filename: "gogo.proto", +} + +var E_EnumStringer = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.EnumOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 62022, + Name: "gogoproto.enum_stringer", + Tag: "varint,62022,opt,name=enum_stringer,json=enumStringer", + Filename: "gogo.proto", +} + +var E_EnumCustomname = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.EnumOptions)(nil), + ExtensionType: (*string)(nil), + Field: 62023, + Name: "gogoproto.enum_customname", + Tag: "bytes,62023,opt,name=enum_customname,json=enumCustomname", + Filename: "gogo.proto", +} + +var E_Enumdecl = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.EnumOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 62024, + Name: "gogoproto.enumdecl", + Tag: "varint,62024,opt,name=enumdecl", + Filename: "gogo.proto", +} + +var E_EnumvalueCustomname = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.EnumValueOptions)(nil), + ExtensionType: (*string)(nil), + Field: 66001, + Name: "gogoproto.enumvalue_customname", + Tag: "bytes,66001,opt,name=enumvalue_customname,json=enumvalueCustomname", + Filename: "gogo.proto", +} + +var E_GoprotoGettersAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63001, + Name: "gogoproto.goproto_getters_all", + Tag: "varint,63001,opt,name=goproto_getters_all,json=goprotoGettersAll", + Filename: "gogo.proto", +} + +var E_GoprotoEnumPrefixAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63002, + Name: "gogoproto.goproto_enum_prefix_all", + Tag: "varint,63002,opt,name=goproto_enum_prefix_all,json=goprotoEnumPrefixAll", + Filename: "gogo.proto", +} + +var E_GoprotoStringerAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63003, + Name: "gogoproto.goproto_stringer_all", + Tag: "varint,63003,opt,name=goproto_stringer_all,json=goprotoStringerAll", + Filename: "gogo.proto", +} + +var E_VerboseEqualAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63004, + Name: "gogoproto.verbose_equal_all", + Tag: "varint,63004,opt,name=verbose_equal_all,json=verboseEqualAll", + Filename: "gogo.proto", +} + +var E_FaceAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63005, + Name: "gogoproto.face_all", + Tag: "varint,63005,opt,name=face_all,json=faceAll", + Filename: "gogo.proto", +} + +var E_GostringAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63006, + Name: "gogoproto.gostring_all", + Tag: "varint,63006,opt,name=gostring_all,json=gostringAll", + Filename: "gogo.proto", +} + +var E_PopulateAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63007, + Name: "gogoproto.populate_all", + Tag: "varint,63007,opt,name=populate_all,json=populateAll", + Filename: "gogo.proto", +} + +var E_StringerAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63008, + Name: "gogoproto.stringer_all", + Tag: "varint,63008,opt,name=stringer_all,json=stringerAll", + Filename: "gogo.proto", +} + +var E_OnlyoneAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63009, + Name: "gogoproto.onlyone_all", + Tag: "varint,63009,opt,name=onlyone_all,json=onlyoneAll", + Filename: "gogo.proto", +} + +var E_EqualAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63013, + Name: "gogoproto.equal_all", + Tag: "varint,63013,opt,name=equal_all,json=equalAll", + Filename: "gogo.proto", +} + +var E_DescriptionAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63014, + Name: "gogoproto.description_all", + Tag: "varint,63014,opt,name=description_all,json=descriptionAll", + Filename: "gogo.proto", +} + +var E_TestgenAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63015, + Name: "gogoproto.testgen_all", + Tag: "varint,63015,opt,name=testgen_all,json=testgenAll", + Filename: "gogo.proto", +} + +var E_BenchgenAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63016, + Name: "gogoproto.benchgen_all", + Tag: "varint,63016,opt,name=benchgen_all,json=benchgenAll", + Filename: "gogo.proto", +} + +var E_MarshalerAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63017, + Name: "gogoproto.marshaler_all", + Tag: "varint,63017,opt,name=marshaler_all,json=marshalerAll", + Filename: "gogo.proto", +} + +var E_UnmarshalerAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63018, + Name: "gogoproto.unmarshaler_all", + Tag: "varint,63018,opt,name=unmarshaler_all,json=unmarshalerAll", + Filename: "gogo.proto", +} + +var E_StableMarshalerAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63019, + Name: "gogoproto.stable_marshaler_all", + Tag: "varint,63019,opt,name=stable_marshaler_all,json=stableMarshalerAll", + Filename: "gogo.proto", +} + +var E_SizerAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63020, + Name: "gogoproto.sizer_all", + Tag: "varint,63020,opt,name=sizer_all,json=sizerAll", + Filename: "gogo.proto", +} + +var E_GoprotoEnumStringerAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63021, + Name: "gogoproto.goproto_enum_stringer_all", + Tag: "varint,63021,opt,name=goproto_enum_stringer_all,json=goprotoEnumStringerAll", + Filename: "gogo.proto", +} + +var E_EnumStringerAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63022, + Name: "gogoproto.enum_stringer_all", + Tag: "varint,63022,opt,name=enum_stringer_all,json=enumStringerAll", + Filename: "gogo.proto", +} + +var E_UnsafeMarshalerAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63023, + Name: "gogoproto.unsafe_marshaler_all", + Tag: "varint,63023,opt,name=unsafe_marshaler_all,json=unsafeMarshalerAll", + Filename: "gogo.proto", +} + +var E_UnsafeUnmarshalerAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63024, + Name: "gogoproto.unsafe_unmarshaler_all", + Tag: "varint,63024,opt,name=unsafe_unmarshaler_all,json=unsafeUnmarshalerAll", + Filename: "gogo.proto", +} + +var E_GoprotoExtensionsMapAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63025, + Name: "gogoproto.goproto_extensions_map_all", + Tag: "varint,63025,opt,name=goproto_extensions_map_all,json=goprotoExtensionsMapAll", + Filename: "gogo.proto", +} + +var E_GoprotoUnrecognizedAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63026, + Name: "gogoproto.goproto_unrecognized_all", + Tag: "varint,63026,opt,name=goproto_unrecognized_all,json=goprotoUnrecognizedAll", + Filename: "gogo.proto", +} + +var E_GogoprotoImport = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63027, + Name: "gogoproto.gogoproto_import", + Tag: "varint,63027,opt,name=gogoproto_import,json=gogoprotoImport", + Filename: "gogo.proto", +} + +var E_ProtosizerAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63028, + Name: "gogoproto.protosizer_all", + Tag: "varint,63028,opt,name=protosizer_all,json=protosizerAll", + Filename: "gogo.proto", +} + +var E_CompareAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63029, + Name: "gogoproto.compare_all", + Tag: "varint,63029,opt,name=compare_all,json=compareAll", + Filename: "gogo.proto", +} + +var E_TypedeclAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63030, + Name: "gogoproto.typedecl_all", + Tag: "varint,63030,opt,name=typedecl_all,json=typedeclAll", + Filename: "gogo.proto", +} + +var E_EnumdeclAll = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63031, + Name: "gogoproto.enumdecl_all", + Tag: "varint,63031,opt,name=enumdecl_all,json=enumdeclAll", + Filename: "gogo.proto", +} + +var E_GoprotoRegistration = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FileOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 63032, + Name: "gogoproto.goproto_registration", + Tag: "varint,63032,opt,name=goproto_registration,json=goprotoRegistration", + Filename: "gogo.proto", +} + +var E_GoprotoGetters = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 64001, + Name: "gogoproto.goproto_getters", + Tag: "varint,64001,opt,name=goproto_getters,json=goprotoGetters", + Filename: "gogo.proto", +} + +var E_GoprotoStringer = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 64003, + Name: "gogoproto.goproto_stringer", + Tag: "varint,64003,opt,name=goproto_stringer,json=goprotoStringer", + Filename: "gogo.proto", +} + +var E_VerboseEqual = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 64004, + Name: "gogoproto.verbose_equal", + Tag: "varint,64004,opt,name=verbose_equal,json=verboseEqual", + Filename: "gogo.proto", +} + +var E_Face = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 64005, + Name: "gogoproto.face", + Tag: "varint,64005,opt,name=face", + Filename: "gogo.proto", +} + +var E_Gostring = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 64006, + Name: "gogoproto.gostring", + Tag: "varint,64006,opt,name=gostring", + Filename: "gogo.proto", +} + +var E_Populate = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 64007, + Name: "gogoproto.populate", + Tag: "varint,64007,opt,name=populate", + Filename: "gogo.proto", +} + +var E_Stringer = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 67008, + Name: "gogoproto.stringer", + Tag: "varint,67008,opt,name=stringer", + Filename: "gogo.proto", +} + +var E_Onlyone = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 64009, + Name: "gogoproto.onlyone", + Tag: "varint,64009,opt,name=onlyone", + Filename: "gogo.proto", +} + +var E_Equal = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 64013, + Name: "gogoproto.equal", + Tag: "varint,64013,opt,name=equal", + Filename: "gogo.proto", +} + +var E_Description = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 64014, + Name: "gogoproto.description", + Tag: "varint,64014,opt,name=description", + Filename: "gogo.proto", +} + +var E_Testgen = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 64015, + Name: "gogoproto.testgen", + Tag: "varint,64015,opt,name=testgen", + Filename: "gogo.proto", +} + +var E_Benchgen = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 64016, + Name: "gogoproto.benchgen", + Tag: "varint,64016,opt,name=benchgen", + Filename: "gogo.proto", +} + +var E_Marshaler = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 64017, + Name: "gogoproto.marshaler", + Tag: "varint,64017,opt,name=marshaler", + Filename: "gogo.proto", +} + +var E_Unmarshaler = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 64018, + Name: "gogoproto.unmarshaler", + Tag: "varint,64018,opt,name=unmarshaler", + Filename: "gogo.proto", +} + +var E_StableMarshaler = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 64019, + Name: "gogoproto.stable_marshaler", + Tag: "varint,64019,opt,name=stable_marshaler,json=stableMarshaler", + Filename: "gogo.proto", +} + +var E_Sizer = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 64020, + Name: "gogoproto.sizer", + Tag: "varint,64020,opt,name=sizer", + Filename: "gogo.proto", +} + +var E_UnsafeMarshaler = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 64023, + Name: "gogoproto.unsafe_marshaler", + Tag: "varint,64023,opt,name=unsafe_marshaler,json=unsafeMarshaler", + Filename: "gogo.proto", +} + +var E_UnsafeUnmarshaler = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 64024, + Name: "gogoproto.unsafe_unmarshaler", + Tag: "varint,64024,opt,name=unsafe_unmarshaler,json=unsafeUnmarshaler", + Filename: "gogo.proto", +} + +var E_GoprotoExtensionsMap = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 64025, + Name: "gogoproto.goproto_extensions_map", + Tag: "varint,64025,opt,name=goproto_extensions_map,json=goprotoExtensionsMap", + Filename: "gogo.proto", +} + +var E_GoprotoUnrecognized = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 64026, + Name: "gogoproto.goproto_unrecognized", + Tag: "varint,64026,opt,name=goproto_unrecognized,json=goprotoUnrecognized", + Filename: "gogo.proto", +} + +var E_Protosizer = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 64028, + Name: "gogoproto.protosizer", + Tag: "varint,64028,opt,name=protosizer", + Filename: "gogo.proto", +} + +var E_Compare = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 64029, + Name: "gogoproto.compare", + Tag: "varint,64029,opt,name=compare", + Filename: "gogo.proto", +} + +var E_Typedecl = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 64030, + Name: "gogoproto.typedecl", + Tag: "varint,64030,opt,name=typedecl", + Filename: "gogo.proto", +} + +var E_Nullable = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FieldOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 65001, + Name: "gogoproto.nullable", + Tag: "varint,65001,opt,name=nullable", + Filename: "gogo.proto", +} + +var E_Embed = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FieldOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 65002, + Name: "gogoproto.embed", + Tag: "varint,65002,opt,name=embed", + Filename: "gogo.proto", +} + +var E_Customtype = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FieldOptions)(nil), + ExtensionType: (*string)(nil), + Field: 65003, + Name: "gogoproto.customtype", + Tag: "bytes,65003,opt,name=customtype", + Filename: "gogo.proto", +} + +var E_Customname = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FieldOptions)(nil), + ExtensionType: (*string)(nil), + Field: 65004, + Name: "gogoproto.customname", + Tag: "bytes,65004,opt,name=customname", + Filename: "gogo.proto", +} + +var E_Jsontag = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FieldOptions)(nil), + ExtensionType: (*string)(nil), + Field: 65005, + Name: "gogoproto.jsontag", + Tag: "bytes,65005,opt,name=jsontag", + Filename: "gogo.proto", +} + +var E_Moretags = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FieldOptions)(nil), + ExtensionType: (*string)(nil), + Field: 65006, + Name: "gogoproto.moretags", + Tag: "bytes,65006,opt,name=moretags", + Filename: "gogo.proto", +} + +var E_Casttype = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FieldOptions)(nil), + ExtensionType: (*string)(nil), + Field: 65007, + Name: "gogoproto.casttype", + Tag: "bytes,65007,opt,name=casttype", + Filename: "gogo.proto", +} + +var E_Castkey = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FieldOptions)(nil), + ExtensionType: (*string)(nil), + Field: 65008, + Name: "gogoproto.castkey", + Tag: "bytes,65008,opt,name=castkey", + Filename: "gogo.proto", +} + +var E_Castvalue = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FieldOptions)(nil), + ExtensionType: (*string)(nil), + Field: 65009, + Name: "gogoproto.castvalue", + Tag: "bytes,65009,opt,name=castvalue", + Filename: "gogo.proto", +} + +var E_Stdtime = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FieldOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 65010, + Name: "gogoproto.stdtime", + Tag: "varint,65010,opt,name=stdtime", + Filename: "gogo.proto", +} + +var E_Stdduration = &proto.ExtensionDesc{ + ExtendedType: (*google_protobuf.FieldOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 65011, + Name: "gogoproto.stdduration", + Tag: "varint,65011,opt,name=stdduration", + Filename: "gogo.proto", +} + +func init() { + proto.RegisterExtension(E_GoprotoEnumPrefix) + proto.RegisterExtension(E_GoprotoEnumStringer) + proto.RegisterExtension(E_EnumStringer) + proto.RegisterExtension(E_EnumCustomname) + proto.RegisterExtension(E_Enumdecl) + proto.RegisterExtension(E_EnumvalueCustomname) + proto.RegisterExtension(E_GoprotoGettersAll) + proto.RegisterExtension(E_GoprotoEnumPrefixAll) + proto.RegisterExtension(E_GoprotoStringerAll) + proto.RegisterExtension(E_VerboseEqualAll) + proto.RegisterExtension(E_FaceAll) + proto.RegisterExtension(E_GostringAll) + proto.RegisterExtension(E_PopulateAll) + proto.RegisterExtension(E_StringerAll) + proto.RegisterExtension(E_OnlyoneAll) + proto.RegisterExtension(E_EqualAll) + proto.RegisterExtension(E_DescriptionAll) + proto.RegisterExtension(E_TestgenAll) + proto.RegisterExtension(E_BenchgenAll) + proto.RegisterExtension(E_MarshalerAll) + proto.RegisterExtension(E_UnmarshalerAll) + proto.RegisterExtension(E_StableMarshalerAll) + proto.RegisterExtension(E_SizerAll) + proto.RegisterExtension(E_GoprotoEnumStringerAll) + proto.RegisterExtension(E_EnumStringerAll) + proto.RegisterExtension(E_UnsafeMarshalerAll) + proto.RegisterExtension(E_UnsafeUnmarshalerAll) + proto.RegisterExtension(E_GoprotoExtensionsMapAll) + proto.RegisterExtension(E_GoprotoUnrecognizedAll) + proto.RegisterExtension(E_GogoprotoImport) + proto.RegisterExtension(E_ProtosizerAll) + proto.RegisterExtension(E_CompareAll) + proto.RegisterExtension(E_TypedeclAll) + proto.RegisterExtension(E_EnumdeclAll) + proto.RegisterExtension(E_GoprotoRegistration) + proto.RegisterExtension(E_GoprotoGetters) + proto.RegisterExtension(E_GoprotoStringer) + proto.RegisterExtension(E_VerboseEqual) + proto.RegisterExtension(E_Face) + proto.RegisterExtension(E_Gostring) + proto.RegisterExtension(E_Populate) + proto.RegisterExtension(E_Stringer) + proto.RegisterExtension(E_Onlyone) + proto.RegisterExtension(E_Equal) + proto.RegisterExtension(E_Description) + proto.RegisterExtension(E_Testgen) + proto.RegisterExtension(E_Benchgen) + proto.RegisterExtension(E_Marshaler) + proto.RegisterExtension(E_Unmarshaler) + proto.RegisterExtension(E_StableMarshaler) + proto.RegisterExtension(E_Sizer) + proto.RegisterExtension(E_UnsafeMarshaler) + proto.RegisterExtension(E_UnsafeUnmarshaler) + proto.RegisterExtension(E_GoprotoExtensionsMap) + proto.RegisterExtension(E_GoprotoUnrecognized) + proto.RegisterExtension(E_Protosizer) + proto.RegisterExtension(E_Compare) + proto.RegisterExtension(E_Typedecl) + proto.RegisterExtension(E_Nullable) + proto.RegisterExtension(E_Embed) + proto.RegisterExtension(E_Customtype) + proto.RegisterExtension(E_Customname) + proto.RegisterExtension(E_Jsontag) + proto.RegisterExtension(E_Moretags) + proto.RegisterExtension(E_Casttype) + proto.RegisterExtension(E_Castkey) + proto.RegisterExtension(E_Castvalue) + proto.RegisterExtension(E_Stdtime) + proto.RegisterExtension(E_Stdduration) +} + +func init() { proto.RegisterFile("gogo.proto", fileDescriptorGogo) } + +var fileDescriptorGogo = []byte{ + // 1220 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x94, 0x98, 0x4b, 0x6f, 0x1c, 0x45, + 0x10, 0x80, 0x85, 0x48, 0x14, 0x6f, 0xd9, 0x8e, 0xf1, 0xda, 0x98, 0x10, 0x81, 0x08, 0x9c, 0x38, + 0xd9, 0xa7, 0x08, 0xa5, 0xad, 0xc8, 0x72, 0x2c, 0xc7, 0x4a, 0x84, 0xc1, 0x98, 0x38, 0xbc, 0x0e, + 0xab, 0xd9, 0xdd, 0xf6, 0x78, 0x60, 0x66, 0x7a, 0x98, 0xe9, 0x89, 0xe2, 0xdc, 0x50, 0x78, 0x08, + 0x21, 0xde, 0x48, 0x90, 0x90, 0x04, 0x38, 0xf0, 0x7e, 0x86, 0xf7, 0x91, 0x0b, 0x8f, 0x2b, 0xff, + 0x81, 0x0b, 0x60, 0xde, 0xbe, 0xf9, 0x82, 0x6a, 0xb6, 0x6a, 0xb6, 0x67, 0xbd, 0x52, 0xf7, 0xde, + 0xc6, 0xeb, 0xfe, 0xbe, 0xad, 0xa9, 0x9a, 0xae, 0xea, 0x59, 0x00, 0x5f, 0xf9, 0x6a, 0x3a, 0x49, + 0x95, 0x56, 0xf5, 0x1a, 0x5e, 0x17, 0x97, 0x07, 0x0f, 0xf9, 0x4a, 0xf9, 0xa1, 0x9c, 0x29, 0xfe, + 0x6a, 0xe6, 0xeb, 0x33, 0x6d, 0x99, 0xb5, 0xd2, 0x20, 0xd1, 0x2a, 0xed, 0x2c, 0x16, 0x77, 0xc1, + 0x04, 0x2d, 0x6e, 0xc8, 0x38, 0x8f, 0x1a, 0x49, 0x2a, 0xd7, 0x83, 0xb3, 0xf5, 0x9b, 0xa6, 0x3b, + 0xe4, 0x34, 0x93, 0xd3, 0x8b, 0x71, 0x1e, 0xdd, 0x9d, 0xe8, 0x40, 0xc5, 0xd9, 0x81, 0xab, 0xbf, + 0x5c, 0x7b, 0xe8, 0x9a, 0xdb, 0x87, 0x56, 0xc7, 0x09, 0xc5, 0xff, 0xad, 0x14, 0xa0, 0x58, 0x85, + 0xeb, 0x2b, 0xbe, 0x4c, 0xa7, 0x41, 0xec, 0xcb, 0xd4, 0x62, 0xfc, 0x9e, 0x8c, 0x13, 0x86, 0xf1, + 0x5e, 0x42, 0xc5, 0x02, 0x8c, 0x0e, 0xe2, 0xfa, 0x81, 0x5c, 0x23, 0xd2, 0x94, 0x2c, 0xc1, 0x58, + 0x21, 0x69, 0xe5, 0x99, 0x56, 0x51, 0xec, 0x45, 0xd2, 0xa2, 0xf9, 0xb1, 0xd0, 0xd4, 0x56, 0xf7, + 0x23, 0xb6, 0x50, 0x52, 0x42, 0xc0, 0x10, 0x7e, 0xd2, 0x96, 0xad, 0xd0, 0x62, 0xf8, 0x89, 0x02, + 0x29, 0xd7, 0x8b, 0xd3, 0x30, 0x89, 0xd7, 0x67, 0xbc, 0x30, 0x97, 0x66, 0x24, 0xb7, 0xf6, 0xf5, + 0x9c, 0xc6, 0x65, 0x2c, 0xfb, 0xf9, 0xfc, 0x9e, 0x22, 0x9c, 0x89, 0x52, 0x60, 0xc4, 0x64, 0x54, + 0xd1, 0x97, 0x5a, 0xcb, 0x34, 0x6b, 0x78, 0x61, 0xbf, 0xf0, 0x8e, 0x07, 0x61, 0x69, 0xbc, 0xb0, + 0x55, 0xad, 0xe2, 0x52, 0x87, 0x9c, 0x0f, 0x43, 0xb1, 0x06, 0x37, 0xf4, 0x79, 0x2a, 0x1c, 0x9c, + 0x17, 0xc9, 0x39, 0xb9, 0xeb, 0xc9, 0x40, 0xed, 0x0a, 0xf0, 0xe7, 0x65, 0x2d, 0x1d, 0x9c, 0xaf, + 0x93, 0xb3, 0x4e, 0x2c, 0x97, 0x14, 0x8d, 0x27, 0x61, 0xfc, 0x8c, 0x4c, 0x9b, 0x2a, 0x93, 0x0d, + 0xf9, 0x68, 0xee, 0x85, 0x0e, 0xba, 0x4b, 0xa4, 0x1b, 0x23, 0x70, 0x11, 0x39, 0x74, 0x1d, 0x81, + 0xa1, 0x75, 0xaf, 0x25, 0x1d, 0x14, 0x97, 0x49, 0xb1, 0x0f, 0xd7, 0x23, 0x3a, 0x0f, 0x23, 0xbe, + 0xea, 0xdc, 0x92, 0x03, 0x7e, 0x85, 0xf0, 0x61, 0x66, 0x48, 0x91, 0xa8, 0x24, 0x0f, 0x3d, 0xed, + 0x12, 0xc1, 0x1b, 0xac, 0x60, 0x86, 0x14, 0x03, 0xa4, 0xf5, 0x4d, 0x56, 0x64, 0x46, 0x3e, 0xe7, + 0x60, 0x58, 0xc5, 0xe1, 0xa6, 0x8a, 0x5d, 0x82, 0x78, 0x8b, 0x0c, 0x40, 0x08, 0x0a, 0x66, 0xa1, + 0xe6, 0x5a, 0x88, 0xb7, 0xb7, 0x78, 0x7b, 0x70, 0x05, 0x96, 0x60, 0x8c, 0x1b, 0x54, 0xa0, 0x62, + 0x07, 0xc5, 0x3b, 0xa4, 0xd8, 0x6f, 0x60, 0x74, 0x1b, 0x5a, 0x66, 0xda, 0x97, 0x2e, 0x92, 0x77, + 0xf9, 0x36, 0x08, 0xa1, 0x54, 0x36, 0x65, 0xdc, 0xda, 0x70, 0x33, 0xbc, 0xc7, 0xa9, 0x64, 0x06, + 0x15, 0x0b, 0x30, 0x1a, 0x79, 0x69, 0xb6, 0xe1, 0x85, 0x4e, 0xe5, 0x78, 0x9f, 0x1c, 0x23, 0x25, + 0x44, 0x19, 0xc9, 0xe3, 0x41, 0x34, 0x1f, 0x70, 0x46, 0x0c, 0x8c, 0xb6, 0x5e, 0xa6, 0xbd, 0x66, + 0x28, 0x1b, 0x83, 0xd8, 0x3e, 0xe4, 0xad, 0xd7, 0x61, 0x97, 0x4d, 0xe3, 0x2c, 0xd4, 0xb2, 0xe0, + 0x9c, 0x93, 0xe6, 0x23, 0xae, 0x74, 0x01, 0x20, 0xfc, 0x00, 0xdc, 0xd8, 0x77, 0x4c, 0x38, 0xc8, + 0x3e, 0x26, 0xd9, 0x54, 0x9f, 0x51, 0x41, 0x2d, 0x61, 0x50, 0xe5, 0x27, 0xdc, 0x12, 0x64, 0x8f, + 0x6b, 0x05, 0x26, 0xf3, 0x38, 0xf3, 0xd6, 0x07, 0xcb, 0xda, 0xa7, 0x9c, 0xb5, 0x0e, 0x5b, 0xc9, + 0xda, 0x29, 0x98, 0x22, 0xe3, 0x60, 0x75, 0xfd, 0x8c, 0x1b, 0x6b, 0x87, 0x5e, 0xab, 0x56, 0xf7, + 0x21, 0x38, 0x58, 0xa6, 0xf3, 0xac, 0x96, 0x71, 0x86, 0x4c, 0x23, 0xf2, 0x12, 0x07, 0xf3, 0x55, + 0x32, 0x73, 0xc7, 0x5f, 0x2c, 0x05, 0xcb, 0x5e, 0x82, 0xf2, 0xfb, 0xe1, 0x00, 0xcb, 0xf3, 0x38, + 0x95, 0x2d, 0xe5, 0xc7, 0xc1, 0x39, 0xd9, 0x76, 0x50, 0x7f, 0xde, 0x53, 0xaa, 0x35, 0x03, 0x47, + 0xf3, 0x09, 0xb8, 0xae, 0x3c, 0xab, 0x34, 0x82, 0x28, 0x51, 0xa9, 0xb6, 0x18, 0xbf, 0xe0, 0x4a, + 0x95, 0xdc, 0x89, 0x02, 0x13, 0x8b, 0xb0, 0xbf, 0xf8, 0xd3, 0xf5, 0x91, 0xfc, 0x92, 0x44, 0xa3, + 0x5d, 0x8a, 0x1a, 0x47, 0x4b, 0x45, 0x89, 0x97, 0xba, 0xf4, 0xbf, 0xaf, 0xb8, 0x71, 0x10, 0x42, + 0x8d, 0x43, 0x6f, 0x26, 0x12, 0xa7, 0xbd, 0x83, 0xe1, 0x6b, 0x6e, 0x1c, 0xcc, 0x90, 0x82, 0x0f, + 0x0c, 0x0e, 0x8a, 0x6f, 0x58, 0xc1, 0x0c, 0x2a, 0xee, 0xe9, 0x0e, 0xda, 0x54, 0xfa, 0x41, 0xa6, + 0x53, 0x0f, 0x57, 0x5b, 0x54, 0xdf, 0x6e, 0x55, 0x0f, 0x61, 0xab, 0x06, 0x2a, 0x4e, 0xc2, 0x58, + 0xcf, 0x11, 0xa3, 0x7e, 0xcb, 0x2e, 0xdb, 0xb2, 0xcc, 0x32, 0xcf, 0x2f, 0x85, 0x8f, 0x6d, 0x53, + 0x33, 0xaa, 0x9e, 0x30, 0xc4, 0x9d, 0x58, 0xf7, 0xea, 0x39, 0xc0, 0x2e, 0x3b, 0xbf, 0x5d, 0x96, + 0xbe, 0x72, 0x0c, 0x10, 0xc7, 0x61, 0xb4, 0x72, 0x06, 0xb0, 0xab, 0x1e, 0x27, 0xd5, 0x88, 0x79, + 0x04, 0x10, 0x87, 0x61, 0x0f, 0xce, 0x73, 0x3b, 0xfe, 0x04, 0xe1, 0xc5, 0x72, 0x71, 0x14, 0x86, + 0x78, 0x8e, 0xdb, 0xd1, 0x27, 0x09, 0x2d, 0x11, 0xc4, 0x79, 0x86, 0xdb, 0xf1, 0xa7, 0x18, 0x67, + 0x04, 0x71, 0xf7, 0x14, 0x7e, 0xf7, 0xcc, 0x1e, 0xea, 0xc3, 0x9c, 0xbb, 0x59, 0xd8, 0x47, 0xc3, + 0xdb, 0x4e, 0x3f, 0x4d, 0x5f, 0xce, 0x84, 0xb8, 0x03, 0xf6, 0x3a, 0x26, 0xfc, 0x59, 0x42, 0x3b, + 0xeb, 0xc5, 0x02, 0x0c, 0x1b, 0x03, 0xdb, 0x8e, 0x3f, 0x47, 0xb8, 0x49, 0x61, 0xe8, 0x34, 0xb0, + 0xed, 0x82, 0xe7, 0x39, 0x74, 0x22, 0x30, 0x6d, 0x3c, 0xab, 0xed, 0xf4, 0x0b, 0x9c, 0x75, 0x46, + 0xc4, 0x1c, 0xd4, 0xca, 0xfe, 0x6b, 0xe7, 0x5f, 0x24, 0xbe, 0xcb, 0x60, 0x06, 0x8c, 0xfe, 0x6f, + 0x57, 0xbc, 0xc4, 0x19, 0x30, 0x28, 0xdc, 0x46, 0xbd, 0x33, 0xdd, 0x6e, 0x7a, 0x99, 0xb7, 0x51, + 0xcf, 0x48, 0xc7, 0x6a, 0x16, 0x6d, 0xd0, 0xae, 0x78, 0x85, 0xab, 0x59, 0xac, 0xc7, 0x30, 0x7a, + 0x87, 0xa4, 0xdd, 0xf1, 0x2a, 0x87, 0xd1, 0x33, 0x23, 0xc5, 0x0a, 0xd4, 0x77, 0x0f, 0x48, 0xbb, + 0xef, 0x35, 0xf2, 0x8d, 0xef, 0x9a, 0x8f, 0xe2, 0x3e, 0x98, 0xea, 0x3f, 0x1c, 0xed, 0xd6, 0x0b, + 0xdb, 0x3d, 0xaf, 0x33, 0xe6, 0x6c, 0x14, 0xa7, 0xba, 0x5d, 0xd6, 0x1c, 0x8c, 0x76, 0xed, 0xc5, + 0xed, 0x6a, 0xa3, 0x35, 0xe7, 0xa2, 0x98, 0x07, 0xe8, 0xce, 0x24, 0xbb, 0xeb, 0x12, 0xb9, 0x0c, + 0x08, 0xb7, 0x06, 0x8d, 0x24, 0x3b, 0x7f, 0x99, 0xb7, 0x06, 0x11, 0xb8, 0x35, 0x78, 0x1a, 0xd9, + 0xe9, 0x2b, 0xbc, 0x35, 0x18, 0x11, 0xb3, 0x30, 0x14, 0xe7, 0x61, 0x88, 0xcf, 0x56, 0xfd, 0xe6, + 0x3e, 0xe3, 0x46, 0x86, 0x6d, 0x86, 0x7f, 0xdd, 0x21, 0x98, 0x01, 0x71, 0x18, 0xf6, 0xca, 0xa8, + 0x29, 0xdb, 0x36, 0xf2, 0xb7, 0x1d, 0xee, 0x27, 0xb8, 0x5a, 0xcc, 0x01, 0x74, 0x5e, 0xa6, 0x31, + 0x0a, 0x1b, 0xfb, 0xfb, 0x4e, 0xe7, 0xbd, 0xde, 0x40, 0xba, 0x82, 0xe2, 0x6d, 0xdc, 0x22, 0xd8, + 0xaa, 0x0a, 0x8a, 0x17, 0xf0, 0x23, 0xb0, 0xef, 0xe1, 0x4c, 0xc5, 0xda, 0xf3, 0x6d, 0xf4, 0x1f, + 0x44, 0xf3, 0x7a, 0x4c, 0x58, 0xa4, 0x52, 0xa9, 0x3d, 0x3f, 0xb3, 0xb1, 0x7f, 0x12, 0x5b, 0x02, + 0x08, 0xb7, 0xbc, 0x4c, 0xbb, 0xdc, 0xf7, 0x5f, 0x0c, 0x33, 0x80, 0x41, 0xe3, 0xf5, 0x23, 0x72, + 0xd3, 0xc6, 0xfe, 0xcd, 0x41, 0xd3, 0x7a, 0x71, 0x14, 0x6a, 0x78, 0x59, 0xfc, 0x0e, 0x61, 0x83, + 0xff, 0x21, 0xb8, 0x4b, 0xe0, 0x37, 0x67, 0xba, 0xad, 0x03, 0x7b, 0xb2, 0xff, 0xa5, 0x4a, 0xf3, + 0x7a, 0x31, 0x0f, 0xc3, 0x99, 0x6e, 0xb7, 0x73, 0x3a, 0xd1, 0x58, 0xf0, 0xff, 0x76, 0xca, 0x97, + 0xdc, 0x92, 0x39, 0xb6, 0x08, 0x13, 0x2d, 0x15, 0xf5, 0x82, 0xc7, 0x60, 0x49, 0x2d, 0xa9, 0x95, + 0x62, 0x17, 0x3d, 0x78, 0x9b, 0x1f, 0xe8, 0x8d, 0xbc, 0x39, 0xdd, 0x52, 0xd1, 0x0c, 0x1e, 0x35, + 0xbb, 0xbf, 0xa0, 0x95, 0x07, 0xcf, 0xff, 0x03, 0x00, 0x00, 0xff, 0xff, 0xed, 0x5f, 0x6c, 0x20, + 0x74, 0x13, 0x00, 0x00, +} diff --git a/vendor/github.com/gogo/protobuf/gogoproto/gogo.proto b/vendor/github.com/gogo/protobuf/gogoproto/gogo.proto new file mode 100644 index 000000000..7f0997935 --- /dev/null +++ b/vendor/github.com/gogo/protobuf/gogoproto/gogo.proto @@ -0,0 +1,133 @@ +// Protocol Buffers for Go with Gadgets +// +// Copyright (c) 2013, The GoGo Authors. All rights reserved. +// http://github.com/gogo/protobuf +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +syntax = "proto2"; +package gogoproto; + +import "google/protobuf/descriptor.proto"; + +option java_package = "com.google.protobuf"; +option java_outer_classname = "GoGoProtos"; +option go_package = "github.com/gogo/protobuf/gogoproto"; + +extend google.protobuf.EnumOptions { + optional bool goproto_enum_prefix = 62001; + optional bool goproto_enum_stringer = 62021; + optional bool enum_stringer = 62022; + optional string enum_customname = 62023; + optional bool enumdecl = 62024; +} + +extend google.protobuf.EnumValueOptions { + optional string enumvalue_customname = 66001; +} + +extend google.protobuf.FileOptions { + optional bool goproto_getters_all = 63001; + optional bool goproto_enum_prefix_all = 63002; + optional bool goproto_stringer_all = 63003; + optional bool verbose_equal_all = 63004; + optional bool face_all = 63005; + optional bool gostring_all = 63006; + optional bool populate_all = 63007; + optional bool stringer_all = 63008; + optional bool onlyone_all = 63009; + + optional bool equal_all = 63013; + optional bool description_all = 63014; + optional bool testgen_all = 63015; + optional bool benchgen_all = 63016; + optional bool marshaler_all = 63017; + optional bool unmarshaler_all = 63018; + optional bool stable_marshaler_all = 63019; + + optional bool sizer_all = 63020; + + optional bool goproto_enum_stringer_all = 63021; + optional bool enum_stringer_all = 63022; + + optional bool unsafe_marshaler_all = 63023; + optional bool unsafe_unmarshaler_all = 63024; + + optional bool goproto_extensions_map_all = 63025; + optional bool goproto_unrecognized_all = 63026; + optional bool gogoproto_import = 63027; + optional bool protosizer_all = 63028; + optional bool compare_all = 63029; + optional bool typedecl_all = 63030; + optional bool enumdecl_all = 63031; + + optional bool goproto_registration = 63032; +} + +extend google.protobuf.MessageOptions { + optional bool goproto_getters = 64001; + optional bool goproto_stringer = 64003; + optional bool verbose_equal = 64004; + optional bool face = 64005; + optional bool gostring = 64006; + optional bool populate = 64007; + optional bool stringer = 67008; + optional bool onlyone = 64009; + + optional bool equal = 64013; + optional bool description = 64014; + optional bool testgen = 64015; + optional bool benchgen = 64016; + optional bool marshaler = 64017; + optional bool unmarshaler = 64018; + optional bool stable_marshaler = 64019; + + optional bool sizer = 64020; + + optional bool unsafe_marshaler = 64023; + optional bool unsafe_unmarshaler = 64024; + + optional bool goproto_extensions_map = 64025; + optional bool goproto_unrecognized = 64026; + + optional bool protosizer = 64028; + optional bool compare = 64029; + + optional bool typedecl = 64030; +} + +extend google.protobuf.FieldOptions { + optional bool nullable = 65001; + optional bool embed = 65002; + optional string customtype = 65003; + optional string customname = 65004; + optional string jsontag = 65005; + optional string moretags = 65006; + optional string casttype = 65007; + optional string castkey = 65008; + optional string castvalue = 65009; + + optional bool stdtime = 65010; + optional bool stdduration = 65011; +} diff --git a/vendor/github.com/gogo/protobuf/gogoproto/helper.go b/vendor/github.com/gogo/protobuf/gogoproto/helper.go new file mode 100644 index 000000000..6b851c562 --- /dev/null +++ b/vendor/github.com/gogo/protobuf/gogoproto/helper.go @@ -0,0 +1,357 @@ +// Protocol Buffers for Go with Gadgets +// +// Copyright (c) 2013, The GoGo Authors. All rights reserved. +// http://github.com/gogo/protobuf +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package gogoproto + +import google_protobuf "github.com/gogo/protobuf/protoc-gen-gogo/descriptor" +import proto "github.com/gogo/protobuf/proto" + +func IsEmbed(field *google_protobuf.FieldDescriptorProto) bool { + return proto.GetBoolExtension(field.Options, E_Embed, false) +} + +func IsNullable(field *google_protobuf.FieldDescriptorProto) bool { + return proto.GetBoolExtension(field.Options, E_Nullable, true) +} + +func IsStdTime(field *google_protobuf.FieldDescriptorProto) bool { + return proto.GetBoolExtension(field.Options, E_Stdtime, false) +} + +func IsStdDuration(field *google_protobuf.FieldDescriptorProto) bool { + return proto.GetBoolExtension(field.Options, E_Stdduration, false) +} + +func NeedsNilCheck(proto3 bool, field *google_protobuf.FieldDescriptorProto) bool { + nullable := IsNullable(field) + if field.IsMessage() || IsCustomType(field) { + return nullable + } + if proto3 { + return false + } + return nullable || *field.Type == google_protobuf.FieldDescriptorProto_TYPE_BYTES +} + +func IsCustomType(field *google_protobuf.FieldDescriptorProto) bool { + typ := GetCustomType(field) + if len(typ) > 0 { + return true + } + return false +} + +func IsCastType(field *google_protobuf.FieldDescriptorProto) bool { + typ := GetCastType(field) + if len(typ) > 0 { + return true + } + return false +} + +func IsCastKey(field *google_protobuf.FieldDescriptorProto) bool { + typ := GetCastKey(field) + if len(typ) > 0 { + return true + } + return false +} + +func IsCastValue(field *google_protobuf.FieldDescriptorProto) bool { + typ := GetCastValue(field) + if len(typ) > 0 { + return true + } + return false +} + +func HasEnumDecl(file *google_protobuf.FileDescriptorProto, enum *google_protobuf.EnumDescriptorProto) bool { + return proto.GetBoolExtension(enum.Options, E_Enumdecl, proto.GetBoolExtension(file.Options, E_EnumdeclAll, true)) +} + +func HasTypeDecl(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + return proto.GetBoolExtension(message.Options, E_Typedecl, proto.GetBoolExtension(file.Options, E_TypedeclAll, true)) +} + +func GetCustomType(field *google_protobuf.FieldDescriptorProto) string { + if field == nil { + return "" + } + if field.Options != nil { + v, err := proto.GetExtension(field.Options, E_Customtype) + if err == nil && v.(*string) != nil { + return *(v.(*string)) + } + } + return "" +} + +func GetCastType(field *google_protobuf.FieldDescriptorProto) string { + if field == nil { + return "" + } + if field.Options != nil { + v, err := proto.GetExtension(field.Options, E_Casttype) + if err == nil && v.(*string) != nil { + return *(v.(*string)) + } + } + return "" +} + +func GetCastKey(field *google_protobuf.FieldDescriptorProto) string { + if field == nil { + return "" + } + if field.Options != nil { + v, err := proto.GetExtension(field.Options, E_Castkey) + if err == nil && v.(*string) != nil { + return *(v.(*string)) + } + } + return "" +} + +func GetCastValue(field *google_protobuf.FieldDescriptorProto) string { + if field == nil { + return "" + } + if field.Options != nil { + v, err := proto.GetExtension(field.Options, E_Castvalue) + if err == nil && v.(*string) != nil { + return *(v.(*string)) + } + } + return "" +} + +func IsCustomName(field *google_protobuf.FieldDescriptorProto) bool { + name := GetCustomName(field) + if len(name) > 0 { + return true + } + return false +} + +func IsEnumCustomName(field *google_protobuf.EnumDescriptorProto) bool { + name := GetEnumCustomName(field) + if len(name) > 0 { + return true + } + return false +} + +func IsEnumValueCustomName(field *google_protobuf.EnumValueDescriptorProto) bool { + name := GetEnumValueCustomName(field) + if len(name) > 0 { + return true + } + return false +} + +func GetCustomName(field *google_protobuf.FieldDescriptorProto) string { + if field == nil { + return "" + } + if field.Options != nil { + v, err := proto.GetExtension(field.Options, E_Customname) + if err == nil && v.(*string) != nil { + return *(v.(*string)) + } + } + return "" +} + +func GetEnumCustomName(field *google_protobuf.EnumDescriptorProto) string { + if field == nil { + return "" + } + if field.Options != nil { + v, err := proto.GetExtension(field.Options, E_EnumCustomname) + if err == nil && v.(*string) != nil { + return *(v.(*string)) + } + } + return "" +} + +func GetEnumValueCustomName(field *google_protobuf.EnumValueDescriptorProto) string { + if field == nil { + return "" + } + if field.Options != nil { + v, err := proto.GetExtension(field.Options, E_EnumvalueCustomname) + if err == nil && v.(*string) != nil { + return *(v.(*string)) + } + } + return "" +} + +func GetJsonTag(field *google_protobuf.FieldDescriptorProto) *string { + if field == nil { + return nil + } + if field.Options != nil { + v, err := proto.GetExtension(field.Options, E_Jsontag) + if err == nil && v.(*string) != nil { + return (v.(*string)) + } + } + return nil +} + +func GetMoreTags(field *google_protobuf.FieldDescriptorProto) *string { + if field == nil { + return nil + } + if field.Options != nil { + v, err := proto.GetExtension(field.Options, E_Moretags) + if err == nil && v.(*string) != nil { + return (v.(*string)) + } + } + return nil +} + +type EnableFunc func(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool + +func EnabledGoEnumPrefix(file *google_protobuf.FileDescriptorProto, enum *google_protobuf.EnumDescriptorProto) bool { + return proto.GetBoolExtension(enum.Options, E_GoprotoEnumPrefix, proto.GetBoolExtension(file.Options, E_GoprotoEnumPrefixAll, true)) +} + +func EnabledGoStringer(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + return proto.GetBoolExtension(message.Options, E_GoprotoStringer, proto.GetBoolExtension(file.Options, E_GoprotoStringerAll, true)) +} + +func HasGoGetters(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + return proto.GetBoolExtension(message.Options, E_GoprotoGetters, proto.GetBoolExtension(file.Options, E_GoprotoGettersAll, true)) +} + +func IsUnion(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + return proto.GetBoolExtension(message.Options, E_Onlyone, proto.GetBoolExtension(file.Options, E_OnlyoneAll, false)) +} + +func HasGoString(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + return proto.GetBoolExtension(message.Options, E_Gostring, proto.GetBoolExtension(file.Options, E_GostringAll, false)) +} + +func HasEqual(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + return proto.GetBoolExtension(message.Options, E_Equal, proto.GetBoolExtension(file.Options, E_EqualAll, false)) +} + +func HasVerboseEqual(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + return proto.GetBoolExtension(message.Options, E_VerboseEqual, proto.GetBoolExtension(file.Options, E_VerboseEqualAll, false)) +} + +func IsStringer(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + return proto.GetBoolExtension(message.Options, E_Stringer, proto.GetBoolExtension(file.Options, E_StringerAll, false)) +} + +func IsFace(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + return proto.GetBoolExtension(message.Options, E_Face, proto.GetBoolExtension(file.Options, E_FaceAll, false)) +} + +func HasDescription(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + return proto.GetBoolExtension(message.Options, E_Description, proto.GetBoolExtension(file.Options, E_DescriptionAll, false)) +} + +func HasPopulate(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + return proto.GetBoolExtension(message.Options, E_Populate, proto.GetBoolExtension(file.Options, E_PopulateAll, false)) +} + +func HasTestGen(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + return proto.GetBoolExtension(message.Options, E_Testgen, proto.GetBoolExtension(file.Options, E_TestgenAll, false)) +} + +func HasBenchGen(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + return proto.GetBoolExtension(message.Options, E_Benchgen, proto.GetBoolExtension(file.Options, E_BenchgenAll, false)) +} + +func IsMarshaler(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + return proto.GetBoolExtension(message.Options, E_Marshaler, proto.GetBoolExtension(file.Options, E_MarshalerAll, false)) +} + +func IsUnmarshaler(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + return proto.GetBoolExtension(message.Options, E_Unmarshaler, proto.GetBoolExtension(file.Options, E_UnmarshalerAll, false)) +} + +func IsStableMarshaler(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + return proto.GetBoolExtension(message.Options, E_StableMarshaler, proto.GetBoolExtension(file.Options, E_StableMarshalerAll, false)) +} + +func IsSizer(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + return proto.GetBoolExtension(message.Options, E_Sizer, proto.GetBoolExtension(file.Options, E_SizerAll, false)) +} + +func IsProtoSizer(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + return proto.GetBoolExtension(message.Options, E_Protosizer, proto.GetBoolExtension(file.Options, E_ProtosizerAll, false)) +} + +func IsGoEnumStringer(file *google_protobuf.FileDescriptorProto, enum *google_protobuf.EnumDescriptorProto) bool { + return proto.GetBoolExtension(enum.Options, E_GoprotoEnumStringer, proto.GetBoolExtension(file.Options, E_GoprotoEnumStringerAll, true)) +} + +func IsEnumStringer(file *google_protobuf.FileDescriptorProto, enum *google_protobuf.EnumDescriptorProto) bool { + return proto.GetBoolExtension(enum.Options, E_EnumStringer, proto.GetBoolExtension(file.Options, E_EnumStringerAll, false)) +} + +func IsUnsafeMarshaler(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + return proto.GetBoolExtension(message.Options, E_UnsafeMarshaler, proto.GetBoolExtension(file.Options, E_UnsafeMarshalerAll, false)) +} + +func IsUnsafeUnmarshaler(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + return proto.GetBoolExtension(message.Options, E_UnsafeUnmarshaler, proto.GetBoolExtension(file.Options, E_UnsafeUnmarshalerAll, false)) +} + +func HasExtensionsMap(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + return proto.GetBoolExtension(message.Options, E_GoprotoExtensionsMap, proto.GetBoolExtension(file.Options, E_GoprotoExtensionsMapAll, true)) +} + +func HasUnrecognized(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + if IsProto3(file) { + return false + } + return proto.GetBoolExtension(message.Options, E_GoprotoUnrecognized, proto.GetBoolExtension(file.Options, E_GoprotoUnrecognizedAll, true)) +} + +func IsProto3(file *google_protobuf.FileDescriptorProto) bool { + return file.GetSyntax() == "proto3" +} + +func ImportsGoGoProto(file *google_protobuf.FileDescriptorProto) bool { + return proto.GetBoolExtension(file.Options, E_GogoprotoImport, true) +} + +func HasCompare(file *google_protobuf.FileDescriptorProto, message *google_protobuf.DescriptorProto) bool { + return proto.GetBoolExtension(message.Options, E_Compare, proto.GetBoolExtension(file.Options, E_CompareAll, false)) +} + +func RegistersGolangProto(file *google_protobuf.FileDescriptorProto) bool { + return proto.GetBoolExtension(file.Options, E_GoprotoRegistration, false) +} diff --git a/vendor/github.com/gogo/protobuf/protoc-gen-gogo/descriptor/descriptor.go b/vendor/github.com/gogo/protobuf/protoc-gen-gogo/descriptor/descriptor.go new file mode 100644 index 000000000..a85bf1984 --- /dev/null +++ b/vendor/github.com/gogo/protobuf/protoc-gen-gogo/descriptor/descriptor.go @@ -0,0 +1,118 @@ +// Go support for Protocol Buffers - Google's data interchange format +// +// Copyright 2016 The Go Authors. All rights reserved. +// https://github.com/golang/protobuf +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Package descriptor provides functions for obtaining protocol buffer +// descriptors for generated Go types. +// +// These functions cannot go in package proto because they depend on the +// generated protobuf descriptor messages, which themselves depend on proto. +package descriptor + +import ( + "bytes" + "compress/gzip" + "fmt" + "io/ioutil" + + "github.com/gogo/protobuf/proto" +) + +// extractFile extracts a FileDescriptorProto from a gzip'd buffer. +func extractFile(gz []byte) (*FileDescriptorProto, error) { + r, err := gzip.NewReader(bytes.NewReader(gz)) + if err != nil { + return nil, fmt.Errorf("failed to open gzip reader: %v", err) + } + defer r.Close() + + b, err := ioutil.ReadAll(r) + if err != nil { + return nil, fmt.Errorf("failed to uncompress descriptor: %v", err) + } + + fd := new(FileDescriptorProto) + if err := proto.Unmarshal(b, fd); err != nil { + return nil, fmt.Errorf("malformed FileDescriptorProto: %v", err) + } + + return fd, nil +} + +// Message is a proto.Message with a method to return its descriptor. +// +// Message types generated by the protocol compiler always satisfy +// the Message interface. +type Message interface { + proto.Message + Descriptor() ([]byte, []int) +} + +// ForMessage returns a FileDescriptorProto and a DescriptorProto from within it +// describing the given message. +func ForMessage(msg Message) (fd *FileDescriptorProto, md *DescriptorProto) { + gz, path := msg.Descriptor() + fd, err := extractFile(gz) + if err != nil { + panic(fmt.Sprintf("invalid FileDescriptorProto for %T: %v", msg, err)) + } + + md = fd.MessageType[path[0]] + for _, i := range path[1:] { + md = md.NestedType[i] + } + return fd, md +} + +// Is this field a scalar numeric type? +func (field *FieldDescriptorProto) IsScalar() bool { + if field.Type == nil { + return false + } + switch *field.Type { + case FieldDescriptorProto_TYPE_DOUBLE, + FieldDescriptorProto_TYPE_FLOAT, + FieldDescriptorProto_TYPE_INT64, + FieldDescriptorProto_TYPE_UINT64, + FieldDescriptorProto_TYPE_INT32, + FieldDescriptorProto_TYPE_FIXED64, + FieldDescriptorProto_TYPE_FIXED32, + FieldDescriptorProto_TYPE_BOOL, + FieldDescriptorProto_TYPE_UINT32, + FieldDescriptorProto_TYPE_ENUM, + FieldDescriptorProto_TYPE_SFIXED32, + FieldDescriptorProto_TYPE_SFIXED64, + FieldDescriptorProto_TYPE_SINT32, + FieldDescriptorProto_TYPE_SINT64: + return true + default: + return false + } +} diff --git a/vendor/github.com/gogo/protobuf/protoc-gen-gogo/descriptor/descriptor.pb.go b/vendor/github.com/gogo/protobuf/protoc-gen-gogo/descriptor/descriptor.pb.go new file mode 100644 index 000000000..4174cbd9f --- /dev/null +++ b/vendor/github.com/gogo/protobuf/protoc-gen-gogo/descriptor/descriptor.pb.go @@ -0,0 +1,2280 @@ +// Code generated by protoc-gen-gogo. DO NOT EDIT. +// source: descriptor.proto + +/* +Package descriptor is a generated protocol buffer package. + +It is generated from these files: + descriptor.proto + +It has these top-level messages: + FileDescriptorSet + FileDescriptorProto + DescriptorProto + ExtensionRangeOptions + FieldDescriptorProto + OneofDescriptorProto + EnumDescriptorProto + EnumValueDescriptorProto + ServiceDescriptorProto + MethodDescriptorProto + FileOptions + MessageOptions + FieldOptions + OneofOptions + EnumOptions + EnumValueOptions + ServiceOptions + MethodOptions + UninterpretedOption + SourceCodeInfo + GeneratedCodeInfo +*/ +package descriptor + +import proto "github.com/gogo/protobuf/proto" +import fmt "fmt" +import math "math" + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package + +type FieldDescriptorProto_Type int32 + +const ( + // 0 is reserved for errors. + // Order is weird for historical reasons. + FieldDescriptorProto_TYPE_DOUBLE FieldDescriptorProto_Type = 1 + FieldDescriptorProto_TYPE_FLOAT FieldDescriptorProto_Type = 2 + // Not ZigZag encoded. Negative numbers take 10 bytes. Use TYPE_SINT64 if + // negative values are likely. + FieldDescriptorProto_TYPE_INT64 FieldDescriptorProto_Type = 3 + FieldDescriptorProto_TYPE_UINT64 FieldDescriptorProto_Type = 4 + // Not ZigZag encoded. Negative numbers take 10 bytes. Use TYPE_SINT32 if + // negative values are likely. + FieldDescriptorProto_TYPE_INT32 FieldDescriptorProto_Type = 5 + FieldDescriptorProto_TYPE_FIXED64 FieldDescriptorProto_Type = 6 + FieldDescriptorProto_TYPE_FIXED32 FieldDescriptorProto_Type = 7 + FieldDescriptorProto_TYPE_BOOL FieldDescriptorProto_Type = 8 + FieldDescriptorProto_TYPE_STRING FieldDescriptorProto_Type = 9 + // Tag-delimited aggregate. + // Group type is deprecated and not supported in proto3. However, Proto3 + // implementations should still be able to parse the group wire format and + // treat group fields as unknown fields. + FieldDescriptorProto_TYPE_GROUP FieldDescriptorProto_Type = 10 + FieldDescriptorProto_TYPE_MESSAGE FieldDescriptorProto_Type = 11 + // New in version 2. + FieldDescriptorProto_TYPE_BYTES FieldDescriptorProto_Type = 12 + FieldDescriptorProto_TYPE_UINT32 FieldDescriptorProto_Type = 13 + FieldDescriptorProto_TYPE_ENUM FieldDescriptorProto_Type = 14 + FieldDescriptorProto_TYPE_SFIXED32 FieldDescriptorProto_Type = 15 + FieldDescriptorProto_TYPE_SFIXED64 FieldDescriptorProto_Type = 16 + FieldDescriptorProto_TYPE_SINT32 FieldDescriptorProto_Type = 17 + FieldDescriptorProto_TYPE_SINT64 FieldDescriptorProto_Type = 18 +) + +var FieldDescriptorProto_Type_name = map[int32]string{ + 1: "TYPE_DOUBLE", + 2: "TYPE_FLOAT", + 3: "TYPE_INT64", + 4: "TYPE_UINT64", + 5: "TYPE_INT32", + 6: "TYPE_FIXED64", + 7: "TYPE_FIXED32", + 8: "TYPE_BOOL", + 9: "TYPE_STRING", + 10: "TYPE_GROUP", + 11: "TYPE_MESSAGE", + 12: "TYPE_BYTES", + 13: "TYPE_UINT32", + 14: "TYPE_ENUM", + 15: "TYPE_SFIXED32", + 16: "TYPE_SFIXED64", + 17: "TYPE_SINT32", + 18: "TYPE_SINT64", +} +var FieldDescriptorProto_Type_value = map[string]int32{ + "TYPE_DOUBLE": 1, + "TYPE_FLOAT": 2, + "TYPE_INT64": 3, + "TYPE_UINT64": 4, + "TYPE_INT32": 5, + "TYPE_FIXED64": 6, + "TYPE_FIXED32": 7, + "TYPE_BOOL": 8, + "TYPE_STRING": 9, + "TYPE_GROUP": 10, + "TYPE_MESSAGE": 11, + "TYPE_BYTES": 12, + "TYPE_UINT32": 13, + "TYPE_ENUM": 14, + "TYPE_SFIXED32": 15, + "TYPE_SFIXED64": 16, + "TYPE_SINT32": 17, + "TYPE_SINT64": 18, +} + +func (x FieldDescriptorProto_Type) Enum() *FieldDescriptorProto_Type { + p := new(FieldDescriptorProto_Type) + *p = x + return p +} +func (x FieldDescriptorProto_Type) String() string { + return proto.EnumName(FieldDescriptorProto_Type_name, int32(x)) +} +func (x *FieldDescriptorProto_Type) UnmarshalJSON(data []byte) error { + value, err := proto.UnmarshalJSONEnum(FieldDescriptorProto_Type_value, data, "FieldDescriptorProto_Type") + if err != nil { + return err + } + *x = FieldDescriptorProto_Type(value) + return nil +} +func (FieldDescriptorProto_Type) EnumDescriptor() ([]byte, []int) { + return fileDescriptorDescriptor, []int{4, 0} +} + +type FieldDescriptorProto_Label int32 + +const ( + // 0 is reserved for errors + FieldDescriptorProto_LABEL_OPTIONAL FieldDescriptorProto_Label = 1 + FieldDescriptorProto_LABEL_REQUIRED FieldDescriptorProto_Label = 2 + FieldDescriptorProto_LABEL_REPEATED FieldDescriptorProto_Label = 3 +) + +var FieldDescriptorProto_Label_name = map[int32]string{ + 1: "LABEL_OPTIONAL", + 2: "LABEL_REQUIRED", + 3: "LABEL_REPEATED", +} +var FieldDescriptorProto_Label_value = map[string]int32{ + "LABEL_OPTIONAL": 1, + "LABEL_REQUIRED": 2, + "LABEL_REPEATED": 3, +} + +func (x FieldDescriptorProto_Label) Enum() *FieldDescriptorProto_Label { + p := new(FieldDescriptorProto_Label) + *p = x + return p +} +func (x FieldDescriptorProto_Label) String() string { + return proto.EnumName(FieldDescriptorProto_Label_name, int32(x)) +} +func (x *FieldDescriptorProto_Label) UnmarshalJSON(data []byte) error { + value, err := proto.UnmarshalJSONEnum(FieldDescriptorProto_Label_value, data, "FieldDescriptorProto_Label") + if err != nil { + return err + } + *x = FieldDescriptorProto_Label(value) + return nil +} +func (FieldDescriptorProto_Label) EnumDescriptor() ([]byte, []int) { + return fileDescriptorDescriptor, []int{4, 1} +} + +// Generated classes can be optimized for speed or code size. +type FileOptions_OptimizeMode int32 + +const ( + FileOptions_SPEED FileOptions_OptimizeMode = 1 + // etc. + FileOptions_CODE_SIZE FileOptions_OptimizeMode = 2 + FileOptions_LITE_RUNTIME FileOptions_OptimizeMode = 3 +) + +var FileOptions_OptimizeMode_name = map[int32]string{ + 1: "SPEED", + 2: "CODE_SIZE", + 3: "LITE_RUNTIME", +} +var FileOptions_OptimizeMode_value = map[string]int32{ + "SPEED": 1, + "CODE_SIZE": 2, + "LITE_RUNTIME": 3, +} + +func (x FileOptions_OptimizeMode) Enum() *FileOptions_OptimizeMode { + p := new(FileOptions_OptimizeMode) + *p = x + return p +} +func (x FileOptions_OptimizeMode) String() string { + return proto.EnumName(FileOptions_OptimizeMode_name, int32(x)) +} +func (x *FileOptions_OptimizeMode) UnmarshalJSON(data []byte) error { + value, err := proto.UnmarshalJSONEnum(FileOptions_OptimizeMode_value, data, "FileOptions_OptimizeMode") + if err != nil { + return err + } + *x = FileOptions_OptimizeMode(value) + return nil +} +func (FileOptions_OptimizeMode) EnumDescriptor() ([]byte, []int) { + return fileDescriptorDescriptor, []int{10, 0} +} + +type FieldOptions_CType int32 + +const ( + // Default mode. + FieldOptions_STRING FieldOptions_CType = 0 + FieldOptions_CORD FieldOptions_CType = 1 + FieldOptions_STRING_PIECE FieldOptions_CType = 2 +) + +var FieldOptions_CType_name = map[int32]string{ + 0: "STRING", + 1: "CORD", + 2: "STRING_PIECE", +} +var FieldOptions_CType_value = map[string]int32{ + "STRING": 0, + "CORD": 1, + "STRING_PIECE": 2, +} + +func (x FieldOptions_CType) Enum() *FieldOptions_CType { + p := new(FieldOptions_CType) + *p = x + return p +} +func (x FieldOptions_CType) String() string { + return proto.EnumName(FieldOptions_CType_name, int32(x)) +} +func (x *FieldOptions_CType) UnmarshalJSON(data []byte) error { + value, err := proto.UnmarshalJSONEnum(FieldOptions_CType_value, data, "FieldOptions_CType") + if err != nil { + return err + } + *x = FieldOptions_CType(value) + return nil +} +func (FieldOptions_CType) EnumDescriptor() ([]byte, []int) { + return fileDescriptorDescriptor, []int{12, 0} +} + +type FieldOptions_JSType int32 + +const ( + // Use the default type. + FieldOptions_JS_NORMAL FieldOptions_JSType = 0 + // Use JavaScript strings. + FieldOptions_JS_STRING FieldOptions_JSType = 1 + // Use JavaScript numbers. + FieldOptions_JS_NUMBER FieldOptions_JSType = 2 +) + +var FieldOptions_JSType_name = map[int32]string{ + 0: "JS_NORMAL", + 1: "JS_STRING", + 2: "JS_NUMBER", +} +var FieldOptions_JSType_value = map[string]int32{ + "JS_NORMAL": 0, + "JS_STRING": 1, + "JS_NUMBER": 2, +} + +func (x FieldOptions_JSType) Enum() *FieldOptions_JSType { + p := new(FieldOptions_JSType) + *p = x + return p +} +func (x FieldOptions_JSType) String() string { + return proto.EnumName(FieldOptions_JSType_name, int32(x)) +} +func (x *FieldOptions_JSType) UnmarshalJSON(data []byte) error { + value, err := proto.UnmarshalJSONEnum(FieldOptions_JSType_value, data, "FieldOptions_JSType") + if err != nil { + return err + } + *x = FieldOptions_JSType(value) + return nil +} +func (FieldOptions_JSType) EnumDescriptor() ([]byte, []int) { + return fileDescriptorDescriptor, []int{12, 1} +} + +// Is this method side-effect-free (or safe in HTTP parlance), or idempotent, +// or neither? HTTP based RPC implementation may choose GET verb for safe +// methods, and PUT verb for idempotent methods instead of the default POST. +type MethodOptions_IdempotencyLevel int32 + +const ( + MethodOptions_IDEMPOTENCY_UNKNOWN MethodOptions_IdempotencyLevel = 0 + MethodOptions_NO_SIDE_EFFECTS MethodOptions_IdempotencyLevel = 1 + MethodOptions_IDEMPOTENT MethodOptions_IdempotencyLevel = 2 +) + +var MethodOptions_IdempotencyLevel_name = map[int32]string{ + 0: "IDEMPOTENCY_UNKNOWN", + 1: "NO_SIDE_EFFECTS", + 2: "IDEMPOTENT", +} +var MethodOptions_IdempotencyLevel_value = map[string]int32{ + "IDEMPOTENCY_UNKNOWN": 0, + "NO_SIDE_EFFECTS": 1, + "IDEMPOTENT": 2, +} + +func (x MethodOptions_IdempotencyLevel) Enum() *MethodOptions_IdempotencyLevel { + p := new(MethodOptions_IdempotencyLevel) + *p = x + return p +} +func (x MethodOptions_IdempotencyLevel) String() string { + return proto.EnumName(MethodOptions_IdempotencyLevel_name, int32(x)) +} +func (x *MethodOptions_IdempotencyLevel) UnmarshalJSON(data []byte) error { + value, err := proto.UnmarshalJSONEnum(MethodOptions_IdempotencyLevel_value, data, "MethodOptions_IdempotencyLevel") + if err != nil { + return err + } + *x = MethodOptions_IdempotencyLevel(value) + return nil +} +func (MethodOptions_IdempotencyLevel) EnumDescriptor() ([]byte, []int) { + return fileDescriptorDescriptor, []int{17, 0} +} + +// The protocol compiler can output a FileDescriptorSet containing the .proto +// files it parses. +type FileDescriptorSet struct { + File []*FileDescriptorProto `protobuf:"bytes,1,rep,name=file" json:"file,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *FileDescriptorSet) Reset() { *m = FileDescriptorSet{} } +func (m *FileDescriptorSet) String() string { return proto.CompactTextString(m) } +func (*FileDescriptorSet) ProtoMessage() {} +func (*FileDescriptorSet) Descriptor() ([]byte, []int) { return fileDescriptorDescriptor, []int{0} } + +func (m *FileDescriptorSet) GetFile() []*FileDescriptorProto { + if m != nil { + return m.File + } + return nil +} + +// Describes a complete .proto file. +type FileDescriptorProto struct { + Name *string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"` + Package *string `protobuf:"bytes,2,opt,name=package" json:"package,omitempty"` + // Names of files imported by this file. + Dependency []string `protobuf:"bytes,3,rep,name=dependency" json:"dependency,omitempty"` + // Indexes of the public imported files in the dependency list above. + PublicDependency []int32 `protobuf:"varint,10,rep,name=public_dependency,json=publicDependency" json:"public_dependency,omitempty"` + // Indexes of the weak imported files in the dependency list. + // For Google-internal migration only. Do not use. + WeakDependency []int32 `protobuf:"varint,11,rep,name=weak_dependency,json=weakDependency" json:"weak_dependency,omitempty"` + // All top-level definitions in this file. + MessageType []*DescriptorProto `protobuf:"bytes,4,rep,name=message_type,json=messageType" json:"message_type,omitempty"` + EnumType []*EnumDescriptorProto `protobuf:"bytes,5,rep,name=enum_type,json=enumType" json:"enum_type,omitempty"` + Service []*ServiceDescriptorProto `protobuf:"bytes,6,rep,name=service" json:"service,omitempty"` + Extension []*FieldDescriptorProto `protobuf:"bytes,7,rep,name=extension" json:"extension,omitempty"` + Options *FileOptions `protobuf:"bytes,8,opt,name=options" json:"options,omitempty"` + // This field contains optional information about the original source code. + // You may safely remove this entire field without harming runtime + // functionality of the descriptors -- the information is needed only by + // development tools. + SourceCodeInfo *SourceCodeInfo `protobuf:"bytes,9,opt,name=source_code_info,json=sourceCodeInfo" json:"source_code_info,omitempty"` + // The syntax of the proto file. + // The supported values are "proto2" and "proto3". + Syntax *string `protobuf:"bytes,12,opt,name=syntax" json:"syntax,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *FileDescriptorProto) Reset() { *m = FileDescriptorProto{} } +func (m *FileDescriptorProto) String() string { return proto.CompactTextString(m) } +func (*FileDescriptorProto) ProtoMessage() {} +func (*FileDescriptorProto) Descriptor() ([]byte, []int) { return fileDescriptorDescriptor, []int{1} } + +func (m *FileDescriptorProto) GetName() string { + if m != nil && m.Name != nil { + return *m.Name + } + return "" +} + +func (m *FileDescriptorProto) GetPackage() string { + if m != nil && m.Package != nil { + return *m.Package + } + return "" +} + +func (m *FileDescriptorProto) GetDependency() []string { + if m != nil { + return m.Dependency + } + return nil +} + +func (m *FileDescriptorProto) GetPublicDependency() []int32 { + if m != nil { + return m.PublicDependency + } + return nil +} + +func (m *FileDescriptorProto) GetWeakDependency() []int32 { + if m != nil { + return m.WeakDependency + } + return nil +} + +func (m *FileDescriptorProto) GetMessageType() []*DescriptorProto { + if m != nil { + return m.MessageType + } + return nil +} + +func (m *FileDescriptorProto) GetEnumType() []*EnumDescriptorProto { + if m != nil { + return m.EnumType + } + return nil +} + +func (m *FileDescriptorProto) GetService() []*ServiceDescriptorProto { + if m != nil { + return m.Service + } + return nil +} + +func (m *FileDescriptorProto) GetExtension() []*FieldDescriptorProto { + if m != nil { + return m.Extension + } + return nil +} + +func (m *FileDescriptorProto) GetOptions() *FileOptions { + if m != nil { + return m.Options + } + return nil +} + +func (m *FileDescriptorProto) GetSourceCodeInfo() *SourceCodeInfo { + if m != nil { + return m.SourceCodeInfo + } + return nil +} + +func (m *FileDescriptorProto) GetSyntax() string { + if m != nil && m.Syntax != nil { + return *m.Syntax + } + return "" +} + +// Describes a message type. +type DescriptorProto struct { + Name *string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"` + Field []*FieldDescriptorProto `protobuf:"bytes,2,rep,name=field" json:"field,omitempty"` + Extension []*FieldDescriptorProto `protobuf:"bytes,6,rep,name=extension" json:"extension,omitempty"` + NestedType []*DescriptorProto `protobuf:"bytes,3,rep,name=nested_type,json=nestedType" json:"nested_type,omitempty"` + EnumType []*EnumDescriptorProto `protobuf:"bytes,4,rep,name=enum_type,json=enumType" json:"enum_type,omitempty"` + ExtensionRange []*DescriptorProto_ExtensionRange `protobuf:"bytes,5,rep,name=extension_range,json=extensionRange" json:"extension_range,omitempty"` + OneofDecl []*OneofDescriptorProto `protobuf:"bytes,8,rep,name=oneof_decl,json=oneofDecl" json:"oneof_decl,omitempty"` + Options *MessageOptions `protobuf:"bytes,7,opt,name=options" json:"options,omitempty"` + ReservedRange []*DescriptorProto_ReservedRange `protobuf:"bytes,9,rep,name=reserved_range,json=reservedRange" json:"reserved_range,omitempty"` + // Reserved field names, which may not be used by fields in the same message. + // A given name may only be reserved once. + ReservedName []string `protobuf:"bytes,10,rep,name=reserved_name,json=reservedName" json:"reserved_name,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *DescriptorProto) Reset() { *m = DescriptorProto{} } +func (m *DescriptorProto) String() string { return proto.CompactTextString(m) } +func (*DescriptorProto) ProtoMessage() {} +func (*DescriptorProto) Descriptor() ([]byte, []int) { return fileDescriptorDescriptor, []int{2} } + +func (m *DescriptorProto) GetName() string { + if m != nil && m.Name != nil { + return *m.Name + } + return "" +} + +func (m *DescriptorProto) GetField() []*FieldDescriptorProto { + if m != nil { + return m.Field + } + return nil +} + +func (m *DescriptorProto) GetExtension() []*FieldDescriptorProto { + if m != nil { + return m.Extension + } + return nil +} + +func (m *DescriptorProto) GetNestedType() []*DescriptorProto { + if m != nil { + return m.NestedType + } + return nil +} + +func (m *DescriptorProto) GetEnumType() []*EnumDescriptorProto { + if m != nil { + return m.EnumType + } + return nil +} + +func (m *DescriptorProto) GetExtensionRange() []*DescriptorProto_ExtensionRange { + if m != nil { + return m.ExtensionRange + } + return nil +} + +func (m *DescriptorProto) GetOneofDecl() []*OneofDescriptorProto { + if m != nil { + return m.OneofDecl + } + return nil +} + +func (m *DescriptorProto) GetOptions() *MessageOptions { + if m != nil { + return m.Options + } + return nil +} + +func (m *DescriptorProto) GetReservedRange() []*DescriptorProto_ReservedRange { + if m != nil { + return m.ReservedRange + } + return nil +} + +func (m *DescriptorProto) GetReservedName() []string { + if m != nil { + return m.ReservedName + } + return nil +} + +type DescriptorProto_ExtensionRange struct { + Start *int32 `protobuf:"varint,1,opt,name=start" json:"start,omitempty"` + End *int32 `protobuf:"varint,2,opt,name=end" json:"end,omitempty"` + Options *ExtensionRangeOptions `protobuf:"bytes,3,opt,name=options" json:"options,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *DescriptorProto_ExtensionRange) Reset() { *m = DescriptorProto_ExtensionRange{} } +func (m *DescriptorProto_ExtensionRange) String() string { return proto.CompactTextString(m) } +func (*DescriptorProto_ExtensionRange) ProtoMessage() {} +func (*DescriptorProto_ExtensionRange) Descriptor() ([]byte, []int) { + return fileDescriptorDescriptor, []int{2, 0} +} + +func (m *DescriptorProto_ExtensionRange) GetStart() int32 { + if m != nil && m.Start != nil { + return *m.Start + } + return 0 +} + +func (m *DescriptorProto_ExtensionRange) GetEnd() int32 { + if m != nil && m.End != nil { + return *m.End + } + return 0 +} + +func (m *DescriptorProto_ExtensionRange) GetOptions() *ExtensionRangeOptions { + if m != nil { + return m.Options + } + return nil +} + +// Range of reserved tag numbers. Reserved tag numbers may not be used by +// fields or extension ranges in the same message. Reserved ranges may +// not overlap. +type DescriptorProto_ReservedRange struct { + Start *int32 `protobuf:"varint,1,opt,name=start" json:"start,omitempty"` + End *int32 `protobuf:"varint,2,opt,name=end" json:"end,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *DescriptorProto_ReservedRange) Reset() { *m = DescriptorProto_ReservedRange{} } +func (m *DescriptorProto_ReservedRange) String() string { return proto.CompactTextString(m) } +func (*DescriptorProto_ReservedRange) ProtoMessage() {} +func (*DescriptorProto_ReservedRange) Descriptor() ([]byte, []int) { + return fileDescriptorDescriptor, []int{2, 1} +} + +func (m *DescriptorProto_ReservedRange) GetStart() int32 { + if m != nil && m.Start != nil { + return *m.Start + } + return 0 +} + +func (m *DescriptorProto_ReservedRange) GetEnd() int32 { + if m != nil && m.End != nil { + return *m.End + } + return 0 +} + +type ExtensionRangeOptions struct { + // The parser stores options it doesn't recognize here. See above. + UninterpretedOption []*UninterpretedOption `protobuf:"bytes,999,rep,name=uninterpreted_option,json=uninterpretedOption" json:"uninterpreted_option,omitempty"` + proto.XXX_InternalExtensions `json:"-"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *ExtensionRangeOptions) Reset() { *m = ExtensionRangeOptions{} } +func (m *ExtensionRangeOptions) String() string { return proto.CompactTextString(m) } +func (*ExtensionRangeOptions) ProtoMessage() {} +func (*ExtensionRangeOptions) Descriptor() ([]byte, []int) { return fileDescriptorDescriptor, []int{3} } + +var extRange_ExtensionRangeOptions = []proto.ExtensionRange{ + {Start: 1000, End: 536870911}, +} + +func (*ExtensionRangeOptions) ExtensionRangeArray() []proto.ExtensionRange { + return extRange_ExtensionRangeOptions +} + +func (m *ExtensionRangeOptions) GetUninterpretedOption() []*UninterpretedOption { + if m != nil { + return m.UninterpretedOption + } + return nil +} + +// Describes a field within a message. +type FieldDescriptorProto struct { + Name *string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"` + Number *int32 `protobuf:"varint,3,opt,name=number" json:"number,omitempty"` + Label *FieldDescriptorProto_Label `protobuf:"varint,4,opt,name=label,enum=google.protobuf.FieldDescriptorProto_Label" json:"label,omitempty"` + // If type_name is set, this need not be set. If both this and type_name + // are set, this must be one of TYPE_ENUM, TYPE_MESSAGE or TYPE_GROUP. + Type *FieldDescriptorProto_Type `protobuf:"varint,5,opt,name=type,enum=google.protobuf.FieldDescriptorProto_Type" json:"type,omitempty"` + // For message and enum types, this is the name of the type. If the name + // starts with a '.', it is fully-qualified. Otherwise, C++-like scoping + // rules are used to find the type (i.e. first the nested types within this + // message are searched, then within the parent, on up to the root + // namespace). + TypeName *string `protobuf:"bytes,6,opt,name=type_name,json=typeName" json:"type_name,omitempty"` + // For extensions, this is the name of the type being extended. It is + // resolved in the same manner as type_name. + Extendee *string `protobuf:"bytes,2,opt,name=extendee" json:"extendee,omitempty"` + // For numeric types, contains the original text representation of the value. + // For booleans, "true" or "false". + // For strings, contains the default text contents (not escaped in any way). + // For bytes, contains the C escaped value. All bytes >= 128 are escaped. + // TODO(kenton): Base-64 encode? + DefaultValue *string `protobuf:"bytes,7,opt,name=default_value,json=defaultValue" json:"default_value,omitempty"` + // If set, gives the index of a oneof in the containing type's oneof_decl + // list. This field is a member of that oneof. + OneofIndex *int32 `protobuf:"varint,9,opt,name=oneof_index,json=oneofIndex" json:"oneof_index,omitempty"` + // JSON name of this field. The value is set by protocol compiler. If the + // user has set a "json_name" option on this field, that option's value + // will be used. Otherwise, it's deduced from the field's name by converting + // it to camelCase. + JsonName *string `protobuf:"bytes,10,opt,name=json_name,json=jsonName" json:"json_name,omitempty"` + Options *FieldOptions `protobuf:"bytes,8,opt,name=options" json:"options,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *FieldDescriptorProto) Reset() { *m = FieldDescriptorProto{} } +func (m *FieldDescriptorProto) String() string { return proto.CompactTextString(m) } +func (*FieldDescriptorProto) ProtoMessage() {} +func (*FieldDescriptorProto) Descriptor() ([]byte, []int) { return fileDescriptorDescriptor, []int{4} } + +func (m *FieldDescriptorProto) GetName() string { + if m != nil && m.Name != nil { + return *m.Name + } + return "" +} + +func (m *FieldDescriptorProto) GetNumber() int32 { + if m != nil && m.Number != nil { + return *m.Number + } + return 0 +} + +func (m *FieldDescriptorProto) GetLabel() FieldDescriptorProto_Label { + if m != nil && m.Label != nil { + return *m.Label + } + return FieldDescriptorProto_LABEL_OPTIONAL +} + +func (m *FieldDescriptorProto) GetType() FieldDescriptorProto_Type { + if m != nil && m.Type != nil { + return *m.Type + } + return FieldDescriptorProto_TYPE_DOUBLE +} + +func (m *FieldDescriptorProto) GetTypeName() string { + if m != nil && m.TypeName != nil { + return *m.TypeName + } + return "" +} + +func (m *FieldDescriptorProto) GetExtendee() string { + if m != nil && m.Extendee != nil { + return *m.Extendee + } + return "" +} + +func (m *FieldDescriptorProto) GetDefaultValue() string { + if m != nil && m.DefaultValue != nil { + return *m.DefaultValue + } + return "" +} + +func (m *FieldDescriptorProto) GetOneofIndex() int32 { + if m != nil && m.OneofIndex != nil { + return *m.OneofIndex + } + return 0 +} + +func (m *FieldDescriptorProto) GetJsonName() string { + if m != nil && m.JsonName != nil { + return *m.JsonName + } + return "" +} + +func (m *FieldDescriptorProto) GetOptions() *FieldOptions { + if m != nil { + return m.Options + } + return nil +} + +// Describes a oneof. +type OneofDescriptorProto struct { + Name *string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"` + Options *OneofOptions `protobuf:"bytes,2,opt,name=options" json:"options,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *OneofDescriptorProto) Reset() { *m = OneofDescriptorProto{} } +func (m *OneofDescriptorProto) String() string { return proto.CompactTextString(m) } +func (*OneofDescriptorProto) ProtoMessage() {} +func (*OneofDescriptorProto) Descriptor() ([]byte, []int) { return fileDescriptorDescriptor, []int{5} } + +func (m *OneofDescriptorProto) GetName() string { + if m != nil && m.Name != nil { + return *m.Name + } + return "" +} + +func (m *OneofDescriptorProto) GetOptions() *OneofOptions { + if m != nil { + return m.Options + } + return nil +} + +// Describes an enum type. +type EnumDescriptorProto struct { + Name *string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"` + Value []*EnumValueDescriptorProto `protobuf:"bytes,2,rep,name=value" json:"value,omitempty"` + Options *EnumOptions `protobuf:"bytes,3,opt,name=options" json:"options,omitempty"` + // Range of reserved numeric values. Reserved numeric values may not be used + // by enum values in the same enum declaration. Reserved ranges may not + // overlap. + ReservedRange []*EnumDescriptorProto_EnumReservedRange `protobuf:"bytes,4,rep,name=reserved_range,json=reservedRange" json:"reserved_range,omitempty"` + // Reserved enum value names, which may not be reused. A given name may only + // be reserved once. + ReservedName []string `protobuf:"bytes,5,rep,name=reserved_name,json=reservedName" json:"reserved_name,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *EnumDescriptorProto) Reset() { *m = EnumDescriptorProto{} } +func (m *EnumDescriptorProto) String() string { return proto.CompactTextString(m) } +func (*EnumDescriptorProto) ProtoMessage() {} +func (*EnumDescriptorProto) Descriptor() ([]byte, []int) { return fileDescriptorDescriptor, []int{6} } + +func (m *EnumDescriptorProto) GetName() string { + if m != nil && m.Name != nil { + return *m.Name + } + return "" +} + +func (m *EnumDescriptorProto) GetValue() []*EnumValueDescriptorProto { + if m != nil { + return m.Value + } + return nil +} + +func (m *EnumDescriptorProto) GetOptions() *EnumOptions { + if m != nil { + return m.Options + } + return nil +} + +func (m *EnumDescriptorProto) GetReservedRange() []*EnumDescriptorProto_EnumReservedRange { + if m != nil { + return m.ReservedRange + } + return nil +} + +func (m *EnumDescriptorProto) GetReservedName() []string { + if m != nil { + return m.ReservedName + } + return nil +} + +// Range of reserved numeric values. Reserved values may not be used by +// entries in the same enum. Reserved ranges may not overlap. +// +// Note that this is distinct from DescriptorProto.ReservedRange in that it +// is inclusive such that it can appropriately represent the entire int32 +// domain. +type EnumDescriptorProto_EnumReservedRange struct { + Start *int32 `protobuf:"varint,1,opt,name=start" json:"start,omitempty"` + End *int32 `protobuf:"varint,2,opt,name=end" json:"end,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *EnumDescriptorProto_EnumReservedRange) Reset() { *m = EnumDescriptorProto_EnumReservedRange{} } +func (m *EnumDescriptorProto_EnumReservedRange) String() string { return proto.CompactTextString(m) } +func (*EnumDescriptorProto_EnumReservedRange) ProtoMessage() {} +func (*EnumDescriptorProto_EnumReservedRange) Descriptor() ([]byte, []int) { + return fileDescriptorDescriptor, []int{6, 0} +} + +func (m *EnumDescriptorProto_EnumReservedRange) GetStart() int32 { + if m != nil && m.Start != nil { + return *m.Start + } + return 0 +} + +func (m *EnumDescriptorProto_EnumReservedRange) GetEnd() int32 { + if m != nil && m.End != nil { + return *m.End + } + return 0 +} + +// Describes a value within an enum. +type EnumValueDescriptorProto struct { + Name *string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"` + Number *int32 `protobuf:"varint,2,opt,name=number" json:"number,omitempty"` + Options *EnumValueOptions `protobuf:"bytes,3,opt,name=options" json:"options,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *EnumValueDescriptorProto) Reset() { *m = EnumValueDescriptorProto{} } +func (m *EnumValueDescriptorProto) String() string { return proto.CompactTextString(m) } +func (*EnumValueDescriptorProto) ProtoMessage() {} +func (*EnumValueDescriptorProto) Descriptor() ([]byte, []int) { + return fileDescriptorDescriptor, []int{7} +} + +func (m *EnumValueDescriptorProto) GetName() string { + if m != nil && m.Name != nil { + return *m.Name + } + return "" +} + +func (m *EnumValueDescriptorProto) GetNumber() int32 { + if m != nil && m.Number != nil { + return *m.Number + } + return 0 +} + +func (m *EnumValueDescriptorProto) GetOptions() *EnumValueOptions { + if m != nil { + return m.Options + } + return nil +} + +// Describes a service. +type ServiceDescriptorProto struct { + Name *string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"` + Method []*MethodDescriptorProto `protobuf:"bytes,2,rep,name=method" json:"method,omitempty"` + Options *ServiceOptions `protobuf:"bytes,3,opt,name=options" json:"options,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *ServiceDescriptorProto) Reset() { *m = ServiceDescriptorProto{} } +func (m *ServiceDescriptorProto) String() string { return proto.CompactTextString(m) } +func (*ServiceDescriptorProto) ProtoMessage() {} +func (*ServiceDescriptorProto) Descriptor() ([]byte, []int) { return fileDescriptorDescriptor, []int{8} } + +func (m *ServiceDescriptorProto) GetName() string { + if m != nil && m.Name != nil { + return *m.Name + } + return "" +} + +func (m *ServiceDescriptorProto) GetMethod() []*MethodDescriptorProto { + if m != nil { + return m.Method + } + return nil +} + +func (m *ServiceDescriptorProto) GetOptions() *ServiceOptions { + if m != nil { + return m.Options + } + return nil +} + +// Describes a method of a service. +type MethodDescriptorProto struct { + Name *string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"` + // Input and output type names. These are resolved in the same way as + // FieldDescriptorProto.type_name, but must refer to a message type. + InputType *string `protobuf:"bytes,2,opt,name=input_type,json=inputType" json:"input_type,omitempty"` + OutputType *string `protobuf:"bytes,3,opt,name=output_type,json=outputType" json:"output_type,omitempty"` + Options *MethodOptions `protobuf:"bytes,4,opt,name=options" json:"options,omitempty"` + // Identifies if client streams multiple client messages + ClientStreaming *bool `protobuf:"varint,5,opt,name=client_streaming,json=clientStreaming,def=0" json:"client_streaming,omitempty"` + // Identifies if server streams multiple server messages + ServerStreaming *bool `protobuf:"varint,6,opt,name=server_streaming,json=serverStreaming,def=0" json:"server_streaming,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *MethodDescriptorProto) Reset() { *m = MethodDescriptorProto{} } +func (m *MethodDescriptorProto) String() string { return proto.CompactTextString(m) } +func (*MethodDescriptorProto) ProtoMessage() {} +func (*MethodDescriptorProto) Descriptor() ([]byte, []int) { return fileDescriptorDescriptor, []int{9} } + +const Default_MethodDescriptorProto_ClientStreaming bool = false +const Default_MethodDescriptorProto_ServerStreaming bool = false + +func (m *MethodDescriptorProto) GetName() string { + if m != nil && m.Name != nil { + return *m.Name + } + return "" +} + +func (m *MethodDescriptorProto) GetInputType() string { + if m != nil && m.InputType != nil { + return *m.InputType + } + return "" +} + +func (m *MethodDescriptorProto) GetOutputType() string { + if m != nil && m.OutputType != nil { + return *m.OutputType + } + return "" +} + +func (m *MethodDescriptorProto) GetOptions() *MethodOptions { + if m != nil { + return m.Options + } + return nil +} + +func (m *MethodDescriptorProto) GetClientStreaming() bool { + if m != nil && m.ClientStreaming != nil { + return *m.ClientStreaming + } + return Default_MethodDescriptorProto_ClientStreaming +} + +func (m *MethodDescriptorProto) GetServerStreaming() bool { + if m != nil && m.ServerStreaming != nil { + return *m.ServerStreaming + } + return Default_MethodDescriptorProto_ServerStreaming +} + +type FileOptions struct { + // Sets the Java package where classes generated from this .proto will be + // placed. By default, the proto package is used, but this is often + // inappropriate because proto packages do not normally start with backwards + // domain names. + JavaPackage *string `protobuf:"bytes,1,opt,name=java_package,json=javaPackage" json:"java_package,omitempty"` + // If set, all the classes from the .proto file are wrapped in a single + // outer class with the given name. This applies to both Proto1 + // (equivalent to the old "--one_java_file" option) and Proto2 (where + // a .proto always translates to a single class, but you may want to + // explicitly choose the class name). + JavaOuterClassname *string `protobuf:"bytes,8,opt,name=java_outer_classname,json=javaOuterClassname" json:"java_outer_classname,omitempty"` + // If set true, then the Java code generator will generate a separate .java + // file for each top-level message, enum, and service defined in the .proto + // file. Thus, these types will *not* be nested inside the outer class + // named by java_outer_classname. However, the outer class will still be + // generated to contain the file's getDescriptor() method as well as any + // top-level extensions defined in the file. + JavaMultipleFiles *bool `protobuf:"varint,10,opt,name=java_multiple_files,json=javaMultipleFiles,def=0" json:"java_multiple_files,omitempty"` + // This option does nothing. + JavaGenerateEqualsAndHash *bool `protobuf:"varint,20,opt,name=java_generate_equals_and_hash,json=javaGenerateEqualsAndHash" json:"java_generate_equals_and_hash,omitempty"` + // If set true, then the Java2 code generator will generate code that + // throws an exception whenever an attempt is made to assign a non-UTF-8 + // byte sequence to a string field. + // Message reflection will do the same. + // However, an extension field still accepts non-UTF-8 byte sequences. + // This option has no effect on when used with the lite runtime. + JavaStringCheckUtf8 *bool `protobuf:"varint,27,opt,name=java_string_check_utf8,json=javaStringCheckUtf8,def=0" json:"java_string_check_utf8,omitempty"` + OptimizeFor *FileOptions_OptimizeMode `protobuf:"varint,9,opt,name=optimize_for,json=optimizeFor,enum=google.protobuf.FileOptions_OptimizeMode,def=1" json:"optimize_for,omitempty"` + // Sets the Go package where structs generated from this .proto will be + // placed. If omitted, the Go package will be derived from the following: + // - The basename of the package import path, if provided. + // - Otherwise, the package statement in the .proto file, if present. + // - Otherwise, the basename of the .proto file, without extension. + GoPackage *string `protobuf:"bytes,11,opt,name=go_package,json=goPackage" json:"go_package,omitempty"` + // Should generic services be generated in each language? "Generic" services + // are not specific to any particular RPC system. They are generated by the + // main code generators in each language (without additional plugins). + // Generic services were the only kind of service generation supported by + // early versions of google.protobuf. + // + // Generic services are now considered deprecated in favor of using plugins + // that generate code specific to your particular RPC system. Therefore, + // these default to false. Old code which depends on generic services should + // explicitly set them to true. + CcGenericServices *bool `protobuf:"varint,16,opt,name=cc_generic_services,json=ccGenericServices,def=0" json:"cc_generic_services,omitempty"` + JavaGenericServices *bool `protobuf:"varint,17,opt,name=java_generic_services,json=javaGenericServices,def=0" json:"java_generic_services,omitempty"` + PyGenericServices *bool `protobuf:"varint,18,opt,name=py_generic_services,json=pyGenericServices,def=0" json:"py_generic_services,omitempty"` + PhpGenericServices *bool `protobuf:"varint,42,opt,name=php_generic_services,json=phpGenericServices,def=0" json:"php_generic_services,omitempty"` + // Is this file deprecated? + // Depending on the target platform, this can emit Deprecated annotations + // for everything in the file, or it will be completely ignored; in the very + // least, this is a formalization for deprecating files. + Deprecated *bool `protobuf:"varint,23,opt,name=deprecated,def=0" json:"deprecated,omitempty"` + // Enables the use of arenas for the proto messages in this file. This applies + // only to generated classes for C++. + CcEnableArenas *bool `protobuf:"varint,31,opt,name=cc_enable_arenas,json=ccEnableArenas,def=0" json:"cc_enable_arenas,omitempty"` + // Sets the objective c class prefix which is prepended to all objective c + // generated classes from this .proto. There is no default. + ObjcClassPrefix *string `protobuf:"bytes,36,opt,name=objc_class_prefix,json=objcClassPrefix" json:"objc_class_prefix,omitempty"` + // Namespace for generated classes; defaults to the package. + CsharpNamespace *string `protobuf:"bytes,37,opt,name=csharp_namespace,json=csharpNamespace" json:"csharp_namespace,omitempty"` + // By default Swift generators will take the proto package and CamelCase it + // replacing '.' with underscore and use that to prefix the types/symbols + // defined. When this options is provided, they will use this value instead + // to prefix the types/symbols defined. + SwiftPrefix *string `protobuf:"bytes,39,opt,name=swift_prefix,json=swiftPrefix" json:"swift_prefix,omitempty"` + // Sets the php class prefix which is prepended to all php generated classes + // from this .proto. Default is empty. + PhpClassPrefix *string `protobuf:"bytes,40,opt,name=php_class_prefix,json=phpClassPrefix" json:"php_class_prefix,omitempty"` + // Use this option to change the namespace of php generated classes. Default + // is empty. When this option is empty, the package name will be used for + // determining the namespace. + PhpNamespace *string `protobuf:"bytes,41,opt,name=php_namespace,json=phpNamespace" json:"php_namespace,omitempty"` + // The parser stores options it doesn't recognize here. See above. + UninterpretedOption []*UninterpretedOption `protobuf:"bytes,999,rep,name=uninterpreted_option,json=uninterpretedOption" json:"uninterpreted_option,omitempty"` + proto.XXX_InternalExtensions `json:"-"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *FileOptions) Reset() { *m = FileOptions{} } +func (m *FileOptions) String() string { return proto.CompactTextString(m) } +func (*FileOptions) ProtoMessage() {} +func (*FileOptions) Descriptor() ([]byte, []int) { return fileDescriptorDescriptor, []int{10} } + +var extRange_FileOptions = []proto.ExtensionRange{ + {Start: 1000, End: 536870911}, +} + +func (*FileOptions) ExtensionRangeArray() []proto.ExtensionRange { + return extRange_FileOptions +} + +const Default_FileOptions_JavaMultipleFiles bool = false +const Default_FileOptions_JavaStringCheckUtf8 bool = false +const Default_FileOptions_OptimizeFor FileOptions_OptimizeMode = FileOptions_SPEED +const Default_FileOptions_CcGenericServices bool = false +const Default_FileOptions_JavaGenericServices bool = false +const Default_FileOptions_PyGenericServices bool = false +const Default_FileOptions_PhpGenericServices bool = false +const Default_FileOptions_Deprecated bool = false +const Default_FileOptions_CcEnableArenas bool = false + +func (m *FileOptions) GetJavaPackage() string { + if m != nil && m.JavaPackage != nil { + return *m.JavaPackage + } + return "" +} + +func (m *FileOptions) GetJavaOuterClassname() string { + if m != nil && m.JavaOuterClassname != nil { + return *m.JavaOuterClassname + } + return "" +} + +func (m *FileOptions) GetJavaMultipleFiles() bool { + if m != nil && m.JavaMultipleFiles != nil { + return *m.JavaMultipleFiles + } + return Default_FileOptions_JavaMultipleFiles +} + +func (m *FileOptions) GetJavaGenerateEqualsAndHash() bool { + if m != nil && m.JavaGenerateEqualsAndHash != nil { + return *m.JavaGenerateEqualsAndHash + } + return false +} + +func (m *FileOptions) GetJavaStringCheckUtf8() bool { + if m != nil && m.JavaStringCheckUtf8 != nil { + return *m.JavaStringCheckUtf8 + } + return Default_FileOptions_JavaStringCheckUtf8 +} + +func (m *FileOptions) GetOptimizeFor() FileOptions_OptimizeMode { + if m != nil && m.OptimizeFor != nil { + return *m.OptimizeFor + } + return Default_FileOptions_OptimizeFor +} + +func (m *FileOptions) GetGoPackage() string { + if m != nil && m.GoPackage != nil { + return *m.GoPackage + } + return "" +} + +func (m *FileOptions) GetCcGenericServices() bool { + if m != nil && m.CcGenericServices != nil { + return *m.CcGenericServices + } + return Default_FileOptions_CcGenericServices +} + +func (m *FileOptions) GetJavaGenericServices() bool { + if m != nil && m.JavaGenericServices != nil { + return *m.JavaGenericServices + } + return Default_FileOptions_JavaGenericServices +} + +func (m *FileOptions) GetPyGenericServices() bool { + if m != nil && m.PyGenericServices != nil { + return *m.PyGenericServices + } + return Default_FileOptions_PyGenericServices +} + +func (m *FileOptions) GetPhpGenericServices() bool { + if m != nil && m.PhpGenericServices != nil { + return *m.PhpGenericServices + } + return Default_FileOptions_PhpGenericServices +} + +func (m *FileOptions) GetDeprecated() bool { + if m != nil && m.Deprecated != nil { + return *m.Deprecated + } + return Default_FileOptions_Deprecated +} + +func (m *FileOptions) GetCcEnableArenas() bool { + if m != nil && m.CcEnableArenas != nil { + return *m.CcEnableArenas + } + return Default_FileOptions_CcEnableArenas +} + +func (m *FileOptions) GetObjcClassPrefix() string { + if m != nil && m.ObjcClassPrefix != nil { + return *m.ObjcClassPrefix + } + return "" +} + +func (m *FileOptions) GetCsharpNamespace() string { + if m != nil && m.CsharpNamespace != nil { + return *m.CsharpNamespace + } + return "" +} + +func (m *FileOptions) GetSwiftPrefix() string { + if m != nil && m.SwiftPrefix != nil { + return *m.SwiftPrefix + } + return "" +} + +func (m *FileOptions) GetPhpClassPrefix() string { + if m != nil && m.PhpClassPrefix != nil { + return *m.PhpClassPrefix + } + return "" +} + +func (m *FileOptions) GetPhpNamespace() string { + if m != nil && m.PhpNamespace != nil { + return *m.PhpNamespace + } + return "" +} + +func (m *FileOptions) GetUninterpretedOption() []*UninterpretedOption { + if m != nil { + return m.UninterpretedOption + } + return nil +} + +type MessageOptions struct { + // Set true to use the old proto1 MessageSet wire format for extensions. + // This is provided for backwards-compatibility with the MessageSet wire + // format. You should not use this for any other reason: It's less + // efficient, has fewer features, and is more complicated. + // + // The message must be defined exactly as follows: + // message Foo { + // option message_set_wire_format = true; + // extensions 4 to max; + // } + // Note that the message cannot have any defined fields; MessageSets only + // have extensions. + // + // All extensions of your type must be singular messages; e.g. they cannot + // be int32s, enums, or repeated messages. + // + // Because this is an option, the above two restrictions are not enforced by + // the protocol compiler. + MessageSetWireFormat *bool `protobuf:"varint,1,opt,name=message_set_wire_format,json=messageSetWireFormat,def=0" json:"message_set_wire_format,omitempty"` + // Disables the generation of the standard "descriptor()" accessor, which can + // conflict with a field of the same name. This is meant to make migration + // from proto1 easier; new code should avoid fields named "descriptor". + NoStandardDescriptorAccessor *bool `protobuf:"varint,2,opt,name=no_standard_descriptor_accessor,json=noStandardDescriptorAccessor,def=0" json:"no_standard_descriptor_accessor,omitempty"` + // Is this message deprecated? + // Depending on the target platform, this can emit Deprecated annotations + // for the message, or it will be completely ignored; in the very least, + // this is a formalization for deprecating messages. + Deprecated *bool `protobuf:"varint,3,opt,name=deprecated,def=0" json:"deprecated,omitempty"` + // Whether the message is an automatically generated map entry type for the + // maps field. + // + // For maps fields: + // map map_field = 1; + // The parsed descriptor looks like: + // message MapFieldEntry { + // option map_entry = true; + // optional KeyType key = 1; + // optional ValueType value = 2; + // } + // repeated MapFieldEntry map_field = 1; + // + // Implementations may choose not to generate the map_entry=true message, but + // use a native map in the target language to hold the keys and values. + // The reflection APIs in such implementions still need to work as + // if the field is a repeated message field. + // + // NOTE: Do not set the option in .proto files. Always use the maps syntax + // instead. The option should only be implicitly set by the proto compiler + // parser. + MapEntry *bool `protobuf:"varint,7,opt,name=map_entry,json=mapEntry" json:"map_entry,omitempty"` + // The parser stores options it doesn't recognize here. See above. + UninterpretedOption []*UninterpretedOption `protobuf:"bytes,999,rep,name=uninterpreted_option,json=uninterpretedOption" json:"uninterpreted_option,omitempty"` + proto.XXX_InternalExtensions `json:"-"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *MessageOptions) Reset() { *m = MessageOptions{} } +func (m *MessageOptions) String() string { return proto.CompactTextString(m) } +func (*MessageOptions) ProtoMessage() {} +func (*MessageOptions) Descriptor() ([]byte, []int) { return fileDescriptorDescriptor, []int{11} } + +var extRange_MessageOptions = []proto.ExtensionRange{ + {Start: 1000, End: 536870911}, +} + +func (*MessageOptions) ExtensionRangeArray() []proto.ExtensionRange { + return extRange_MessageOptions +} + +const Default_MessageOptions_MessageSetWireFormat bool = false +const Default_MessageOptions_NoStandardDescriptorAccessor bool = false +const Default_MessageOptions_Deprecated bool = false + +func (m *MessageOptions) GetMessageSetWireFormat() bool { + if m != nil && m.MessageSetWireFormat != nil { + return *m.MessageSetWireFormat + } + return Default_MessageOptions_MessageSetWireFormat +} + +func (m *MessageOptions) GetNoStandardDescriptorAccessor() bool { + if m != nil && m.NoStandardDescriptorAccessor != nil { + return *m.NoStandardDescriptorAccessor + } + return Default_MessageOptions_NoStandardDescriptorAccessor +} + +func (m *MessageOptions) GetDeprecated() bool { + if m != nil && m.Deprecated != nil { + return *m.Deprecated + } + return Default_MessageOptions_Deprecated +} + +func (m *MessageOptions) GetMapEntry() bool { + if m != nil && m.MapEntry != nil { + return *m.MapEntry + } + return false +} + +func (m *MessageOptions) GetUninterpretedOption() []*UninterpretedOption { + if m != nil { + return m.UninterpretedOption + } + return nil +} + +type FieldOptions struct { + // The ctype option instructs the C++ code generator to use a different + // representation of the field than it normally would. See the specific + // options below. This option is not yet implemented in the open source + // release -- sorry, we'll try to include it in a future version! + Ctype *FieldOptions_CType `protobuf:"varint,1,opt,name=ctype,enum=google.protobuf.FieldOptions_CType,def=0" json:"ctype,omitempty"` + // The packed option can be enabled for repeated primitive fields to enable + // a more efficient representation on the wire. Rather than repeatedly + // writing the tag and type for each element, the entire array is encoded as + // a single length-delimited blob. In proto3, only explicit setting it to + // false will avoid using packed encoding. + Packed *bool `protobuf:"varint,2,opt,name=packed" json:"packed,omitempty"` + // The jstype option determines the JavaScript type used for values of the + // field. The option is permitted only for 64 bit integral and fixed types + // (int64, uint64, sint64, fixed64, sfixed64). A field with jstype JS_STRING + // is represented as JavaScript string, which avoids loss of precision that + // can happen when a large value is converted to a floating point JavaScript. + // Specifying JS_NUMBER for the jstype causes the generated JavaScript code to + // use the JavaScript "number" type. The behavior of the default option + // JS_NORMAL is implementation dependent. + // + // This option is an enum to permit additional types to be added, e.g. + // goog.math.Integer. + Jstype *FieldOptions_JSType `protobuf:"varint,6,opt,name=jstype,enum=google.protobuf.FieldOptions_JSType,def=0" json:"jstype,omitempty"` + // Should this field be parsed lazily? Lazy applies only to message-type + // fields. It means that when the outer message is initially parsed, the + // inner message's contents will not be parsed but instead stored in encoded + // form. The inner message will actually be parsed when it is first accessed. + // + // This is only a hint. Implementations are free to choose whether to use + // eager or lazy parsing regardless of the value of this option. However, + // setting this option true suggests that the protocol author believes that + // using lazy parsing on this field is worth the additional bookkeeping + // overhead typically needed to implement it. + // + // This option does not affect the public interface of any generated code; + // all method signatures remain the same. Furthermore, thread-safety of the + // interface is not affected by this option; const methods remain safe to + // call from multiple threads concurrently, while non-const methods continue + // to require exclusive access. + // + // + // Note that implementations may choose not to check required fields within + // a lazy sub-message. That is, calling IsInitialized() on the outer message + // may return true even if the inner message has missing required fields. + // This is necessary because otherwise the inner message would have to be + // parsed in order to perform the check, defeating the purpose of lazy + // parsing. An implementation which chooses not to check required fields + // must be consistent about it. That is, for any particular sub-message, the + // implementation must either *always* check its required fields, or *never* + // check its required fields, regardless of whether or not the message has + // been parsed. + Lazy *bool `protobuf:"varint,5,opt,name=lazy,def=0" json:"lazy,omitempty"` + // Is this field deprecated? + // Depending on the target platform, this can emit Deprecated annotations + // for accessors, or it will be completely ignored; in the very least, this + // is a formalization for deprecating fields. + Deprecated *bool `protobuf:"varint,3,opt,name=deprecated,def=0" json:"deprecated,omitempty"` + // For Google-internal migration only. Do not use. + Weak *bool `protobuf:"varint,10,opt,name=weak,def=0" json:"weak,omitempty"` + // The parser stores options it doesn't recognize here. See above. + UninterpretedOption []*UninterpretedOption `protobuf:"bytes,999,rep,name=uninterpreted_option,json=uninterpretedOption" json:"uninterpreted_option,omitempty"` + proto.XXX_InternalExtensions `json:"-"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *FieldOptions) Reset() { *m = FieldOptions{} } +func (m *FieldOptions) String() string { return proto.CompactTextString(m) } +func (*FieldOptions) ProtoMessage() {} +func (*FieldOptions) Descriptor() ([]byte, []int) { return fileDescriptorDescriptor, []int{12} } + +var extRange_FieldOptions = []proto.ExtensionRange{ + {Start: 1000, End: 536870911}, +} + +func (*FieldOptions) ExtensionRangeArray() []proto.ExtensionRange { + return extRange_FieldOptions +} + +const Default_FieldOptions_Ctype FieldOptions_CType = FieldOptions_STRING +const Default_FieldOptions_Jstype FieldOptions_JSType = FieldOptions_JS_NORMAL +const Default_FieldOptions_Lazy bool = false +const Default_FieldOptions_Deprecated bool = false +const Default_FieldOptions_Weak bool = false + +func (m *FieldOptions) GetCtype() FieldOptions_CType { + if m != nil && m.Ctype != nil { + return *m.Ctype + } + return Default_FieldOptions_Ctype +} + +func (m *FieldOptions) GetPacked() bool { + if m != nil && m.Packed != nil { + return *m.Packed + } + return false +} + +func (m *FieldOptions) GetJstype() FieldOptions_JSType { + if m != nil && m.Jstype != nil { + return *m.Jstype + } + return Default_FieldOptions_Jstype +} + +func (m *FieldOptions) GetLazy() bool { + if m != nil && m.Lazy != nil { + return *m.Lazy + } + return Default_FieldOptions_Lazy +} + +func (m *FieldOptions) GetDeprecated() bool { + if m != nil && m.Deprecated != nil { + return *m.Deprecated + } + return Default_FieldOptions_Deprecated +} + +func (m *FieldOptions) GetWeak() bool { + if m != nil && m.Weak != nil { + return *m.Weak + } + return Default_FieldOptions_Weak +} + +func (m *FieldOptions) GetUninterpretedOption() []*UninterpretedOption { + if m != nil { + return m.UninterpretedOption + } + return nil +} + +type OneofOptions struct { + // The parser stores options it doesn't recognize here. See above. + UninterpretedOption []*UninterpretedOption `protobuf:"bytes,999,rep,name=uninterpreted_option,json=uninterpretedOption" json:"uninterpreted_option,omitempty"` + proto.XXX_InternalExtensions `json:"-"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *OneofOptions) Reset() { *m = OneofOptions{} } +func (m *OneofOptions) String() string { return proto.CompactTextString(m) } +func (*OneofOptions) ProtoMessage() {} +func (*OneofOptions) Descriptor() ([]byte, []int) { return fileDescriptorDescriptor, []int{13} } + +var extRange_OneofOptions = []proto.ExtensionRange{ + {Start: 1000, End: 536870911}, +} + +func (*OneofOptions) ExtensionRangeArray() []proto.ExtensionRange { + return extRange_OneofOptions +} + +func (m *OneofOptions) GetUninterpretedOption() []*UninterpretedOption { + if m != nil { + return m.UninterpretedOption + } + return nil +} + +type EnumOptions struct { + // Set this option to true to allow mapping different tag names to the same + // value. + AllowAlias *bool `protobuf:"varint,2,opt,name=allow_alias,json=allowAlias" json:"allow_alias,omitempty"` + // Is this enum deprecated? + // Depending on the target platform, this can emit Deprecated annotations + // for the enum, or it will be completely ignored; in the very least, this + // is a formalization for deprecating enums. + Deprecated *bool `protobuf:"varint,3,opt,name=deprecated,def=0" json:"deprecated,omitempty"` + // The parser stores options it doesn't recognize here. See above. + UninterpretedOption []*UninterpretedOption `protobuf:"bytes,999,rep,name=uninterpreted_option,json=uninterpretedOption" json:"uninterpreted_option,omitempty"` + proto.XXX_InternalExtensions `json:"-"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *EnumOptions) Reset() { *m = EnumOptions{} } +func (m *EnumOptions) String() string { return proto.CompactTextString(m) } +func (*EnumOptions) ProtoMessage() {} +func (*EnumOptions) Descriptor() ([]byte, []int) { return fileDescriptorDescriptor, []int{14} } + +var extRange_EnumOptions = []proto.ExtensionRange{ + {Start: 1000, End: 536870911}, +} + +func (*EnumOptions) ExtensionRangeArray() []proto.ExtensionRange { + return extRange_EnumOptions +} + +const Default_EnumOptions_Deprecated bool = false + +func (m *EnumOptions) GetAllowAlias() bool { + if m != nil && m.AllowAlias != nil { + return *m.AllowAlias + } + return false +} + +func (m *EnumOptions) GetDeprecated() bool { + if m != nil && m.Deprecated != nil { + return *m.Deprecated + } + return Default_EnumOptions_Deprecated +} + +func (m *EnumOptions) GetUninterpretedOption() []*UninterpretedOption { + if m != nil { + return m.UninterpretedOption + } + return nil +} + +type EnumValueOptions struct { + // Is this enum value deprecated? + // Depending on the target platform, this can emit Deprecated annotations + // for the enum value, or it will be completely ignored; in the very least, + // this is a formalization for deprecating enum values. + Deprecated *bool `protobuf:"varint,1,opt,name=deprecated,def=0" json:"deprecated,omitempty"` + // The parser stores options it doesn't recognize here. See above. + UninterpretedOption []*UninterpretedOption `protobuf:"bytes,999,rep,name=uninterpreted_option,json=uninterpretedOption" json:"uninterpreted_option,omitempty"` + proto.XXX_InternalExtensions `json:"-"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *EnumValueOptions) Reset() { *m = EnumValueOptions{} } +func (m *EnumValueOptions) String() string { return proto.CompactTextString(m) } +func (*EnumValueOptions) ProtoMessage() {} +func (*EnumValueOptions) Descriptor() ([]byte, []int) { return fileDescriptorDescriptor, []int{15} } + +var extRange_EnumValueOptions = []proto.ExtensionRange{ + {Start: 1000, End: 536870911}, +} + +func (*EnumValueOptions) ExtensionRangeArray() []proto.ExtensionRange { + return extRange_EnumValueOptions +} + +const Default_EnumValueOptions_Deprecated bool = false + +func (m *EnumValueOptions) GetDeprecated() bool { + if m != nil && m.Deprecated != nil { + return *m.Deprecated + } + return Default_EnumValueOptions_Deprecated +} + +func (m *EnumValueOptions) GetUninterpretedOption() []*UninterpretedOption { + if m != nil { + return m.UninterpretedOption + } + return nil +} + +type ServiceOptions struct { + // Is this service deprecated? + // Depending on the target platform, this can emit Deprecated annotations + // for the service, or it will be completely ignored; in the very least, + // this is a formalization for deprecating services. + Deprecated *bool `protobuf:"varint,33,opt,name=deprecated,def=0" json:"deprecated,omitempty"` + // The parser stores options it doesn't recognize here. See above. + UninterpretedOption []*UninterpretedOption `protobuf:"bytes,999,rep,name=uninterpreted_option,json=uninterpretedOption" json:"uninterpreted_option,omitempty"` + proto.XXX_InternalExtensions `json:"-"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *ServiceOptions) Reset() { *m = ServiceOptions{} } +func (m *ServiceOptions) String() string { return proto.CompactTextString(m) } +func (*ServiceOptions) ProtoMessage() {} +func (*ServiceOptions) Descriptor() ([]byte, []int) { return fileDescriptorDescriptor, []int{16} } + +var extRange_ServiceOptions = []proto.ExtensionRange{ + {Start: 1000, End: 536870911}, +} + +func (*ServiceOptions) ExtensionRangeArray() []proto.ExtensionRange { + return extRange_ServiceOptions +} + +const Default_ServiceOptions_Deprecated bool = false + +func (m *ServiceOptions) GetDeprecated() bool { + if m != nil && m.Deprecated != nil { + return *m.Deprecated + } + return Default_ServiceOptions_Deprecated +} + +func (m *ServiceOptions) GetUninterpretedOption() []*UninterpretedOption { + if m != nil { + return m.UninterpretedOption + } + return nil +} + +type MethodOptions struct { + // Is this method deprecated? + // Depending on the target platform, this can emit Deprecated annotations + // for the method, or it will be completely ignored; in the very least, + // this is a formalization for deprecating methods. + Deprecated *bool `protobuf:"varint,33,opt,name=deprecated,def=0" json:"deprecated,omitempty"` + IdempotencyLevel *MethodOptions_IdempotencyLevel `protobuf:"varint,34,opt,name=idempotency_level,json=idempotencyLevel,enum=google.protobuf.MethodOptions_IdempotencyLevel,def=0" json:"idempotency_level,omitempty"` + // The parser stores options it doesn't recognize here. See above. + UninterpretedOption []*UninterpretedOption `protobuf:"bytes,999,rep,name=uninterpreted_option,json=uninterpretedOption" json:"uninterpreted_option,omitempty"` + proto.XXX_InternalExtensions `json:"-"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *MethodOptions) Reset() { *m = MethodOptions{} } +func (m *MethodOptions) String() string { return proto.CompactTextString(m) } +func (*MethodOptions) ProtoMessage() {} +func (*MethodOptions) Descriptor() ([]byte, []int) { return fileDescriptorDescriptor, []int{17} } + +var extRange_MethodOptions = []proto.ExtensionRange{ + {Start: 1000, End: 536870911}, +} + +func (*MethodOptions) ExtensionRangeArray() []proto.ExtensionRange { + return extRange_MethodOptions +} + +const Default_MethodOptions_Deprecated bool = false +const Default_MethodOptions_IdempotencyLevel MethodOptions_IdempotencyLevel = MethodOptions_IDEMPOTENCY_UNKNOWN + +func (m *MethodOptions) GetDeprecated() bool { + if m != nil && m.Deprecated != nil { + return *m.Deprecated + } + return Default_MethodOptions_Deprecated +} + +func (m *MethodOptions) GetIdempotencyLevel() MethodOptions_IdempotencyLevel { + if m != nil && m.IdempotencyLevel != nil { + return *m.IdempotencyLevel + } + return Default_MethodOptions_IdempotencyLevel +} + +func (m *MethodOptions) GetUninterpretedOption() []*UninterpretedOption { + if m != nil { + return m.UninterpretedOption + } + return nil +} + +// A message representing a option the parser does not recognize. This only +// appears in options protos created by the compiler::Parser class. +// DescriptorPool resolves these when building Descriptor objects. Therefore, +// options protos in descriptor objects (e.g. returned by Descriptor::options(), +// or produced by Descriptor::CopyTo()) will never have UninterpretedOptions +// in them. +type UninterpretedOption struct { + Name []*UninterpretedOption_NamePart `protobuf:"bytes,2,rep,name=name" json:"name,omitempty"` + // The value of the uninterpreted option, in whatever type the tokenizer + // identified it as during parsing. Exactly one of these should be set. + IdentifierValue *string `protobuf:"bytes,3,opt,name=identifier_value,json=identifierValue" json:"identifier_value,omitempty"` + PositiveIntValue *uint64 `protobuf:"varint,4,opt,name=positive_int_value,json=positiveIntValue" json:"positive_int_value,omitempty"` + NegativeIntValue *int64 `protobuf:"varint,5,opt,name=negative_int_value,json=negativeIntValue" json:"negative_int_value,omitempty"` + DoubleValue *float64 `protobuf:"fixed64,6,opt,name=double_value,json=doubleValue" json:"double_value,omitempty"` + StringValue []byte `protobuf:"bytes,7,opt,name=string_value,json=stringValue" json:"string_value,omitempty"` + AggregateValue *string `protobuf:"bytes,8,opt,name=aggregate_value,json=aggregateValue" json:"aggregate_value,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *UninterpretedOption) Reset() { *m = UninterpretedOption{} } +func (m *UninterpretedOption) String() string { return proto.CompactTextString(m) } +func (*UninterpretedOption) ProtoMessage() {} +func (*UninterpretedOption) Descriptor() ([]byte, []int) { return fileDescriptorDescriptor, []int{18} } + +func (m *UninterpretedOption) GetName() []*UninterpretedOption_NamePart { + if m != nil { + return m.Name + } + return nil +} + +func (m *UninterpretedOption) GetIdentifierValue() string { + if m != nil && m.IdentifierValue != nil { + return *m.IdentifierValue + } + return "" +} + +func (m *UninterpretedOption) GetPositiveIntValue() uint64 { + if m != nil && m.PositiveIntValue != nil { + return *m.PositiveIntValue + } + return 0 +} + +func (m *UninterpretedOption) GetNegativeIntValue() int64 { + if m != nil && m.NegativeIntValue != nil { + return *m.NegativeIntValue + } + return 0 +} + +func (m *UninterpretedOption) GetDoubleValue() float64 { + if m != nil && m.DoubleValue != nil { + return *m.DoubleValue + } + return 0 +} + +func (m *UninterpretedOption) GetStringValue() []byte { + if m != nil { + return m.StringValue + } + return nil +} + +func (m *UninterpretedOption) GetAggregateValue() string { + if m != nil && m.AggregateValue != nil { + return *m.AggregateValue + } + return "" +} + +// The name of the uninterpreted option. Each string represents a segment in +// a dot-separated name. is_extension is true iff a segment represents an +// extension (denoted with parentheses in options specs in .proto files). +// E.g.,{ ["foo", false], ["bar.baz", true], ["qux", false] } represents +// "foo.(bar.baz).qux". +type UninterpretedOption_NamePart struct { + NamePart *string `protobuf:"bytes,1,req,name=name_part,json=namePart" json:"name_part,omitempty"` + IsExtension *bool `protobuf:"varint,2,req,name=is_extension,json=isExtension" json:"is_extension,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *UninterpretedOption_NamePart) Reset() { *m = UninterpretedOption_NamePart{} } +func (m *UninterpretedOption_NamePart) String() string { return proto.CompactTextString(m) } +func (*UninterpretedOption_NamePart) ProtoMessage() {} +func (*UninterpretedOption_NamePart) Descriptor() ([]byte, []int) { + return fileDescriptorDescriptor, []int{18, 0} +} + +func (m *UninterpretedOption_NamePart) GetNamePart() string { + if m != nil && m.NamePart != nil { + return *m.NamePart + } + return "" +} + +func (m *UninterpretedOption_NamePart) GetIsExtension() bool { + if m != nil && m.IsExtension != nil { + return *m.IsExtension + } + return false +} + +// Encapsulates information about the original source file from which a +// FileDescriptorProto was generated. +type SourceCodeInfo struct { + // A Location identifies a piece of source code in a .proto file which + // corresponds to a particular definition. This information is intended + // to be useful to IDEs, code indexers, documentation generators, and similar + // tools. + // + // For example, say we have a file like: + // message Foo { + // optional string foo = 1; + // } + // Let's look at just the field definition: + // optional string foo = 1; + // ^ ^^ ^^ ^ ^^^ + // a bc de f ghi + // We have the following locations: + // span path represents + // [a,i) [ 4, 0, 2, 0 ] The whole field definition. + // [a,b) [ 4, 0, 2, 0, 4 ] The label (optional). + // [c,d) [ 4, 0, 2, 0, 5 ] The type (string). + // [e,f) [ 4, 0, 2, 0, 1 ] The name (foo). + // [g,h) [ 4, 0, 2, 0, 3 ] The number (1). + // + // Notes: + // - A location may refer to a repeated field itself (i.e. not to any + // particular index within it). This is used whenever a set of elements are + // logically enclosed in a single code segment. For example, an entire + // extend block (possibly containing multiple extension definitions) will + // have an outer location whose path refers to the "extensions" repeated + // field without an index. + // - Multiple locations may have the same path. This happens when a single + // logical declaration is spread out across multiple places. The most + // obvious example is the "extend" block again -- there may be multiple + // extend blocks in the same scope, each of which will have the same path. + // - A location's span is not always a subset of its parent's span. For + // example, the "extendee" of an extension declaration appears at the + // beginning of the "extend" block and is shared by all extensions within + // the block. + // - Just because a location's span is a subset of some other location's span + // does not mean that it is a descendent. For example, a "group" defines + // both a type and a field in a single declaration. Thus, the locations + // corresponding to the type and field and their components will overlap. + // - Code which tries to interpret locations should probably be designed to + // ignore those that it doesn't understand, as more types of locations could + // be recorded in the future. + Location []*SourceCodeInfo_Location `protobuf:"bytes,1,rep,name=location" json:"location,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *SourceCodeInfo) Reset() { *m = SourceCodeInfo{} } +func (m *SourceCodeInfo) String() string { return proto.CompactTextString(m) } +func (*SourceCodeInfo) ProtoMessage() {} +func (*SourceCodeInfo) Descriptor() ([]byte, []int) { return fileDescriptorDescriptor, []int{19} } + +func (m *SourceCodeInfo) GetLocation() []*SourceCodeInfo_Location { + if m != nil { + return m.Location + } + return nil +} + +type SourceCodeInfo_Location struct { + // Identifies which part of the FileDescriptorProto was defined at this + // location. + // + // Each element is a field number or an index. They form a path from + // the root FileDescriptorProto to the place where the definition. For + // example, this path: + // [ 4, 3, 2, 7, 1 ] + // refers to: + // file.message_type(3) // 4, 3 + // .field(7) // 2, 7 + // .name() // 1 + // This is because FileDescriptorProto.message_type has field number 4: + // repeated DescriptorProto message_type = 4; + // and DescriptorProto.field has field number 2: + // repeated FieldDescriptorProto field = 2; + // and FieldDescriptorProto.name has field number 1: + // optional string name = 1; + // + // Thus, the above path gives the location of a field name. If we removed + // the last element: + // [ 4, 3, 2, 7 ] + // this path refers to the whole field declaration (from the beginning + // of the label to the terminating semicolon). + Path []int32 `protobuf:"varint,1,rep,packed,name=path" json:"path,omitempty"` + // Always has exactly three or four elements: start line, start column, + // end line (optional, otherwise assumed same as start line), end column. + // These are packed into a single field for efficiency. Note that line + // and column numbers are zero-based -- typically you will want to add + // 1 to each before displaying to a user. + Span []int32 `protobuf:"varint,2,rep,packed,name=span" json:"span,omitempty"` + // If this SourceCodeInfo represents a complete declaration, these are any + // comments appearing before and after the declaration which appear to be + // attached to the declaration. + // + // A series of line comments appearing on consecutive lines, with no other + // tokens appearing on those lines, will be treated as a single comment. + // + // leading_detached_comments will keep paragraphs of comments that appear + // before (but not connected to) the current element. Each paragraph, + // separated by empty lines, will be one comment element in the repeated + // field. + // + // Only the comment content is provided; comment markers (e.g. //) are + // stripped out. For block comments, leading whitespace and an asterisk + // will be stripped from the beginning of each line other than the first. + // Newlines are included in the output. + // + // Examples: + // + // optional int32 foo = 1; // Comment attached to foo. + // // Comment attached to bar. + // optional int32 bar = 2; + // + // optional string baz = 3; + // // Comment attached to baz. + // // Another line attached to baz. + // + // // Comment attached to qux. + // // + // // Another line attached to qux. + // optional double qux = 4; + // + // // Detached comment for corge. This is not leading or trailing comments + // // to qux or corge because there are blank lines separating it from + // // both. + // + // // Detached comment for corge paragraph 2. + // + // optional string corge = 5; + // /* Block comment attached + // * to corge. Leading asterisks + // * will be removed. */ + // /* Block comment attached to + // * grault. */ + // optional int32 grault = 6; + // + // // ignored detached comments. + LeadingComments *string `protobuf:"bytes,3,opt,name=leading_comments,json=leadingComments" json:"leading_comments,omitempty"` + TrailingComments *string `protobuf:"bytes,4,opt,name=trailing_comments,json=trailingComments" json:"trailing_comments,omitempty"` + LeadingDetachedComments []string `protobuf:"bytes,6,rep,name=leading_detached_comments,json=leadingDetachedComments" json:"leading_detached_comments,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *SourceCodeInfo_Location) Reset() { *m = SourceCodeInfo_Location{} } +func (m *SourceCodeInfo_Location) String() string { return proto.CompactTextString(m) } +func (*SourceCodeInfo_Location) ProtoMessage() {} +func (*SourceCodeInfo_Location) Descriptor() ([]byte, []int) { + return fileDescriptorDescriptor, []int{19, 0} +} + +func (m *SourceCodeInfo_Location) GetPath() []int32 { + if m != nil { + return m.Path + } + return nil +} + +func (m *SourceCodeInfo_Location) GetSpan() []int32 { + if m != nil { + return m.Span + } + return nil +} + +func (m *SourceCodeInfo_Location) GetLeadingComments() string { + if m != nil && m.LeadingComments != nil { + return *m.LeadingComments + } + return "" +} + +func (m *SourceCodeInfo_Location) GetTrailingComments() string { + if m != nil && m.TrailingComments != nil { + return *m.TrailingComments + } + return "" +} + +func (m *SourceCodeInfo_Location) GetLeadingDetachedComments() []string { + if m != nil { + return m.LeadingDetachedComments + } + return nil +} + +// Describes the relationship between generated code and its original source +// file. A GeneratedCodeInfo message is associated with only one generated +// source file, but may contain references to different source .proto files. +type GeneratedCodeInfo struct { + // An Annotation connects some span of text in generated code to an element + // of its generating .proto file. + Annotation []*GeneratedCodeInfo_Annotation `protobuf:"bytes,1,rep,name=annotation" json:"annotation,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *GeneratedCodeInfo) Reset() { *m = GeneratedCodeInfo{} } +func (m *GeneratedCodeInfo) String() string { return proto.CompactTextString(m) } +func (*GeneratedCodeInfo) ProtoMessage() {} +func (*GeneratedCodeInfo) Descriptor() ([]byte, []int) { return fileDescriptorDescriptor, []int{20} } + +func (m *GeneratedCodeInfo) GetAnnotation() []*GeneratedCodeInfo_Annotation { + if m != nil { + return m.Annotation + } + return nil +} + +type GeneratedCodeInfo_Annotation struct { + // Identifies the element in the original source .proto file. This field + // is formatted the same as SourceCodeInfo.Location.path. + Path []int32 `protobuf:"varint,1,rep,packed,name=path" json:"path,omitempty"` + // Identifies the filesystem path to the original source .proto. + SourceFile *string `protobuf:"bytes,2,opt,name=source_file,json=sourceFile" json:"source_file,omitempty"` + // Identifies the starting offset in bytes in the generated code + // that relates to the identified object. + Begin *int32 `protobuf:"varint,3,opt,name=begin" json:"begin,omitempty"` + // Identifies the ending offset in bytes in the generated code that + // relates to the identified offset. The end offset should be one past + // the last relevant byte (so the length of the text = end - begin). + End *int32 `protobuf:"varint,4,opt,name=end" json:"end,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *GeneratedCodeInfo_Annotation) Reset() { *m = GeneratedCodeInfo_Annotation{} } +func (m *GeneratedCodeInfo_Annotation) String() string { return proto.CompactTextString(m) } +func (*GeneratedCodeInfo_Annotation) ProtoMessage() {} +func (*GeneratedCodeInfo_Annotation) Descriptor() ([]byte, []int) { + return fileDescriptorDescriptor, []int{20, 0} +} + +func (m *GeneratedCodeInfo_Annotation) GetPath() []int32 { + if m != nil { + return m.Path + } + return nil +} + +func (m *GeneratedCodeInfo_Annotation) GetSourceFile() string { + if m != nil && m.SourceFile != nil { + return *m.SourceFile + } + return "" +} + +func (m *GeneratedCodeInfo_Annotation) GetBegin() int32 { + if m != nil && m.Begin != nil { + return *m.Begin + } + return 0 +} + +func (m *GeneratedCodeInfo_Annotation) GetEnd() int32 { + if m != nil && m.End != nil { + return *m.End + } + return 0 +} + +func init() { + proto.RegisterType((*FileDescriptorSet)(nil), "google.protobuf.FileDescriptorSet") + proto.RegisterType((*FileDescriptorProto)(nil), "google.protobuf.FileDescriptorProto") + proto.RegisterType((*DescriptorProto)(nil), "google.protobuf.DescriptorProto") + proto.RegisterType((*DescriptorProto_ExtensionRange)(nil), "google.protobuf.DescriptorProto.ExtensionRange") + proto.RegisterType((*DescriptorProto_ReservedRange)(nil), "google.protobuf.DescriptorProto.ReservedRange") + proto.RegisterType((*ExtensionRangeOptions)(nil), "google.protobuf.ExtensionRangeOptions") + proto.RegisterType((*FieldDescriptorProto)(nil), "google.protobuf.FieldDescriptorProto") + proto.RegisterType((*OneofDescriptorProto)(nil), "google.protobuf.OneofDescriptorProto") + proto.RegisterType((*EnumDescriptorProto)(nil), "google.protobuf.EnumDescriptorProto") + proto.RegisterType((*EnumDescriptorProto_EnumReservedRange)(nil), "google.protobuf.EnumDescriptorProto.EnumReservedRange") + proto.RegisterType((*EnumValueDescriptorProto)(nil), "google.protobuf.EnumValueDescriptorProto") + proto.RegisterType((*ServiceDescriptorProto)(nil), "google.protobuf.ServiceDescriptorProto") + proto.RegisterType((*MethodDescriptorProto)(nil), "google.protobuf.MethodDescriptorProto") + proto.RegisterType((*FileOptions)(nil), "google.protobuf.FileOptions") + proto.RegisterType((*MessageOptions)(nil), "google.protobuf.MessageOptions") + proto.RegisterType((*FieldOptions)(nil), "google.protobuf.FieldOptions") + proto.RegisterType((*OneofOptions)(nil), "google.protobuf.OneofOptions") + proto.RegisterType((*EnumOptions)(nil), "google.protobuf.EnumOptions") + proto.RegisterType((*EnumValueOptions)(nil), "google.protobuf.EnumValueOptions") + proto.RegisterType((*ServiceOptions)(nil), "google.protobuf.ServiceOptions") + proto.RegisterType((*MethodOptions)(nil), "google.protobuf.MethodOptions") + proto.RegisterType((*UninterpretedOption)(nil), "google.protobuf.UninterpretedOption") + proto.RegisterType((*UninterpretedOption_NamePart)(nil), "google.protobuf.UninterpretedOption.NamePart") + proto.RegisterType((*SourceCodeInfo)(nil), "google.protobuf.SourceCodeInfo") + proto.RegisterType((*SourceCodeInfo_Location)(nil), "google.protobuf.SourceCodeInfo.Location") + proto.RegisterType((*GeneratedCodeInfo)(nil), "google.protobuf.GeneratedCodeInfo") + proto.RegisterType((*GeneratedCodeInfo_Annotation)(nil), "google.protobuf.GeneratedCodeInfo.Annotation") + proto.RegisterEnum("google.protobuf.FieldDescriptorProto_Type", FieldDescriptorProto_Type_name, FieldDescriptorProto_Type_value) + proto.RegisterEnum("google.protobuf.FieldDescriptorProto_Label", FieldDescriptorProto_Label_name, FieldDescriptorProto_Label_value) + proto.RegisterEnum("google.protobuf.FileOptions_OptimizeMode", FileOptions_OptimizeMode_name, FileOptions_OptimizeMode_value) + proto.RegisterEnum("google.protobuf.FieldOptions_CType", FieldOptions_CType_name, FieldOptions_CType_value) + proto.RegisterEnum("google.protobuf.FieldOptions_JSType", FieldOptions_JSType_name, FieldOptions_JSType_value) + proto.RegisterEnum("google.protobuf.MethodOptions_IdempotencyLevel", MethodOptions_IdempotencyLevel_name, MethodOptions_IdempotencyLevel_value) +} + +func init() { proto.RegisterFile("descriptor.proto", fileDescriptorDescriptor) } + +var fileDescriptorDescriptor = []byte{ + // 2487 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xc4, 0x59, 0xcd, 0x6f, 0xdb, 0xc8, + 0x15, 0x5f, 0x7d, 0x5a, 0x7a, 0x92, 0xe5, 0xf1, 0xd8, 0x9b, 0x30, 0xde, 0x8f, 0x38, 0xda, 0x8f, + 0x38, 0x49, 0xab, 0x2c, 0x9c, 0xc4, 0xc9, 0x3a, 0xc5, 0xb6, 0xb2, 0xc4, 0x78, 0x95, 0xca, 0x92, + 0x4a, 0xc9, 0xdd, 0x64, 0x8b, 0x82, 0x18, 0x93, 0x23, 0x89, 0x09, 0x45, 0x72, 0x49, 0x2a, 0x89, + 0x83, 0x1e, 0x02, 0xf4, 0xd4, 0xff, 0xa0, 0x28, 0x8a, 0x1e, 0x7a, 0x59, 0xa0, 0xd7, 0x02, 0x05, + 0xda, 0x7b, 0xaf, 0x05, 0x7a, 0xef, 0xa1, 0x40, 0x0b, 0xb4, 0x7f, 0x42, 0x8f, 0xc5, 0xcc, 0x90, + 0x14, 0xf5, 0x95, 0x78, 0x17, 0x48, 0xf6, 0x64, 0xcf, 0xef, 0xfd, 0xde, 0xe3, 0x9b, 0x37, 0x6f, + 0xde, 0xbc, 0x19, 0x01, 0xd2, 0xa9, 0xa7, 0xb9, 0x86, 0xe3, 0xdb, 0x6e, 0xc5, 0x71, 0x6d, 0xdf, + 0xc6, 0x6b, 0x03, 0xdb, 0x1e, 0x98, 0x54, 0x8c, 0x4e, 0xc6, 0xfd, 0xf2, 0x11, 0xac, 0xdf, 0x33, + 0x4c, 0x5a, 0x8f, 0x88, 0x5d, 0xea, 0xe3, 0x3b, 0x90, 0xee, 0x1b, 0x26, 0x95, 0x12, 0xdb, 0xa9, + 0x9d, 0xc2, 0xee, 0x87, 0x95, 0x19, 0xa5, 0xca, 0xb4, 0x46, 0x87, 0xc1, 0x0a, 0xd7, 0x28, 0xff, + 0x3b, 0x0d, 0x1b, 0x0b, 0xa4, 0x18, 0x43, 0xda, 0x22, 0x23, 0x66, 0x31, 0xb1, 0x93, 0x57, 0xf8, + 0xff, 0x58, 0x82, 0x15, 0x87, 0x68, 0x8f, 0xc9, 0x80, 0x4a, 0x49, 0x0e, 0x87, 0x43, 0xfc, 0x3e, + 0x80, 0x4e, 0x1d, 0x6a, 0xe9, 0xd4, 0xd2, 0x4e, 0xa5, 0xd4, 0x76, 0x6a, 0x27, 0xaf, 0xc4, 0x10, + 0x7c, 0x0d, 0xd6, 0x9d, 0xf1, 0x89, 0x69, 0x68, 0x6a, 0x8c, 0x06, 0xdb, 0xa9, 0x9d, 0x8c, 0x82, + 0x84, 0xa0, 0x3e, 0x21, 0x5f, 0x86, 0xb5, 0xa7, 0x94, 0x3c, 0x8e, 0x53, 0x0b, 0x9c, 0x5a, 0x62, + 0x70, 0x8c, 0x58, 0x83, 0xe2, 0x88, 0x7a, 0x1e, 0x19, 0x50, 0xd5, 0x3f, 0x75, 0xa8, 0x94, 0xe6, + 0xb3, 0xdf, 0x9e, 0x9b, 0xfd, 0xec, 0xcc, 0x0b, 0x81, 0x56, 0xef, 0xd4, 0xa1, 0xb8, 0x0a, 0x79, + 0x6a, 0x8d, 0x47, 0xc2, 0x42, 0x66, 0x49, 0xfc, 0x64, 0x6b, 0x3c, 0x9a, 0xb5, 0x92, 0x63, 0x6a, + 0x81, 0x89, 0x15, 0x8f, 0xba, 0x4f, 0x0c, 0x8d, 0x4a, 0x59, 0x6e, 0xe0, 0xf2, 0x9c, 0x81, 0xae, + 0x90, 0xcf, 0xda, 0x08, 0xf5, 0x70, 0x0d, 0xf2, 0xf4, 0x99, 0x4f, 0x2d, 0xcf, 0xb0, 0x2d, 0x69, + 0x85, 0x1b, 0xf9, 0x68, 0xc1, 0x2a, 0x52, 0x53, 0x9f, 0x35, 0x31, 0xd1, 0xc3, 0x7b, 0xb0, 0x62, + 0x3b, 0xbe, 0x61, 0x5b, 0x9e, 0x94, 0xdb, 0x4e, 0xec, 0x14, 0x76, 0xdf, 0x5d, 0x98, 0x08, 0x6d, + 0xc1, 0x51, 0x42, 0x32, 0x6e, 0x00, 0xf2, 0xec, 0xb1, 0xab, 0x51, 0x55, 0xb3, 0x75, 0xaa, 0x1a, + 0x56, 0xdf, 0x96, 0xf2, 0xdc, 0xc0, 0xc5, 0xf9, 0x89, 0x70, 0x62, 0xcd, 0xd6, 0x69, 0xc3, 0xea, + 0xdb, 0x4a, 0xc9, 0x9b, 0x1a, 0xe3, 0x73, 0x90, 0xf5, 0x4e, 0x2d, 0x9f, 0x3c, 0x93, 0x8a, 0x3c, + 0x43, 0x82, 0x51, 0xf9, 0xcf, 0x59, 0x58, 0x3b, 0x4b, 0x8a, 0xdd, 0x85, 0x4c, 0x9f, 0xcd, 0x52, + 0x4a, 0x7e, 0x93, 0x18, 0x08, 0x9d, 0xe9, 0x20, 0x66, 0xbf, 0x65, 0x10, 0xab, 0x50, 0xb0, 0xa8, + 0xe7, 0x53, 0x5d, 0x64, 0x44, 0xea, 0x8c, 0x39, 0x05, 0x42, 0x69, 0x3e, 0xa5, 0xd2, 0xdf, 0x2a, + 0xa5, 0x1e, 0xc0, 0x5a, 0xe4, 0x92, 0xea, 0x12, 0x6b, 0x10, 0xe6, 0xe6, 0xf5, 0x57, 0x79, 0x52, + 0x91, 0x43, 0x3d, 0x85, 0xa9, 0x29, 0x25, 0x3a, 0x35, 0xc6, 0x75, 0x00, 0xdb, 0xa2, 0x76, 0x5f, + 0xd5, 0xa9, 0x66, 0x4a, 0xb9, 0x25, 0x51, 0x6a, 0x33, 0xca, 0x5c, 0x94, 0x6c, 0x81, 0x6a, 0x26, + 0xfe, 0x74, 0x92, 0x6a, 0x2b, 0x4b, 0x32, 0xe5, 0x48, 0x6c, 0xb2, 0xb9, 0x6c, 0x3b, 0x86, 0x92, + 0x4b, 0x59, 0xde, 0x53, 0x3d, 0x98, 0x59, 0x9e, 0x3b, 0x51, 0x79, 0xe5, 0xcc, 0x94, 0x40, 0x4d, + 0x4c, 0x6c, 0xd5, 0x8d, 0x0f, 0xf1, 0x07, 0x10, 0x01, 0x2a, 0x4f, 0x2b, 0xe0, 0x55, 0xa8, 0x18, + 0x82, 0x2d, 0x32, 0xa2, 0x5b, 0xcf, 0xa1, 0x34, 0x1d, 0x1e, 0xbc, 0x09, 0x19, 0xcf, 0x27, 0xae, + 0xcf, 0xb3, 0x30, 0xa3, 0x88, 0x01, 0x46, 0x90, 0xa2, 0x96, 0xce, 0xab, 0x5c, 0x46, 0x61, 0xff, + 0xe2, 0x1f, 0x4d, 0x26, 0x9c, 0xe2, 0x13, 0xfe, 0x78, 0x7e, 0x45, 0xa7, 0x2c, 0xcf, 0xce, 0x7b, + 0xeb, 0x36, 0xac, 0x4e, 0x4d, 0xe0, 0xac, 0x9f, 0x2e, 0xff, 0x02, 0xde, 0x5e, 0x68, 0x1a, 0x3f, + 0x80, 0xcd, 0xb1, 0x65, 0x58, 0x3e, 0x75, 0x1d, 0x97, 0xb2, 0x8c, 0x15, 0x9f, 0x92, 0xfe, 0xb3, + 0xb2, 0x24, 0xe7, 0x8e, 0xe3, 0x6c, 0x61, 0x45, 0xd9, 0x18, 0xcf, 0x83, 0x57, 0xf3, 0xb9, 0xff, + 0xae, 0xa0, 0x17, 0x2f, 0x5e, 0xbc, 0x48, 0x96, 0x7f, 0x9d, 0x85, 0xcd, 0x45, 0x7b, 0x66, 0xe1, + 0xf6, 0x3d, 0x07, 0x59, 0x6b, 0x3c, 0x3a, 0xa1, 0x2e, 0x0f, 0x52, 0x46, 0x09, 0x46, 0xb8, 0x0a, + 0x19, 0x93, 0x9c, 0x50, 0x53, 0x4a, 0x6f, 0x27, 0x76, 0x4a, 0xbb, 0xd7, 0xce, 0xb4, 0x2b, 0x2b, + 0x4d, 0xa6, 0xa2, 0x08, 0x4d, 0xfc, 0x19, 0xa4, 0x83, 0x12, 0xcd, 0x2c, 0x5c, 0x3d, 0x9b, 0x05, + 0xb6, 0x97, 0x14, 0xae, 0x87, 0xdf, 0x81, 0x3c, 0xfb, 0x2b, 0x72, 0x23, 0xcb, 0x7d, 0xce, 0x31, + 0x80, 0xe5, 0x05, 0xde, 0x82, 0x1c, 0xdf, 0x26, 0x3a, 0x0d, 0x8f, 0xb6, 0x68, 0xcc, 0x12, 0x4b, + 0xa7, 0x7d, 0x32, 0x36, 0x7d, 0xf5, 0x09, 0x31, 0xc7, 0x94, 0x27, 0x7c, 0x5e, 0x29, 0x06, 0xe0, + 0x4f, 0x19, 0x86, 0x2f, 0x42, 0x41, 0xec, 0x2a, 0xc3, 0xd2, 0xe9, 0x33, 0x5e, 0x3d, 0x33, 0x8a, + 0xd8, 0x68, 0x0d, 0x86, 0xb0, 0xcf, 0x3f, 0xf2, 0x6c, 0x2b, 0x4c, 0x4d, 0xfe, 0x09, 0x06, 0xf0, + 0xcf, 0xdf, 0x9e, 0x2d, 0xdc, 0xef, 0x2d, 0x9e, 0xde, 0x6c, 0x4e, 0x95, 0xff, 0x94, 0x84, 0x34, + 0xaf, 0x17, 0x6b, 0x50, 0xe8, 0x3d, 0xec, 0xc8, 0x6a, 0xbd, 0x7d, 0x7c, 0xd0, 0x94, 0x51, 0x02, + 0x97, 0x00, 0x38, 0x70, 0xaf, 0xd9, 0xae, 0xf6, 0x50, 0x32, 0x1a, 0x37, 0x5a, 0xbd, 0xbd, 0x9b, + 0x28, 0x15, 0x29, 0x1c, 0x0b, 0x20, 0x1d, 0x27, 0xdc, 0xd8, 0x45, 0x19, 0x8c, 0xa0, 0x28, 0x0c, + 0x34, 0x1e, 0xc8, 0xf5, 0xbd, 0x9b, 0x28, 0x3b, 0x8d, 0xdc, 0xd8, 0x45, 0x2b, 0x78, 0x15, 0xf2, + 0x1c, 0x39, 0x68, 0xb7, 0x9b, 0x28, 0x17, 0xd9, 0xec, 0xf6, 0x94, 0x46, 0xeb, 0x10, 0xe5, 0x23, + 0x9b, 0x87, 0x4a, 0xfb, 0xb8, 0x83, 0x20, 0xb2, 0x70, 0x24, 0x77, 0xbb, 0xd5, 0x43, 0x19, 0x15, + 0x22, 0xc6, 0xc1, 0xc3, 0x9e, 0xdc, 0x45, 0xc5, 0x29, 0xb7, 0x6e, 0xec, 0xa2, 0xd5, 0xe8, 0x13, + 0x72, 0xeb, 0xf8, 0x08, 0x95, 0xf0, 0x3a, 0xac, 0x8a, 0x4f, 0x84, 0x4e, 0xac, 0xcd, 0x40, 0x7b, + 0x37, 0x11, 0x9a, 0x38, 0x22, 0xac, 0xac, 0x4f, 0x01, 0x7b, 0x37, 0x11, 0x2e, 0xd7, 0x20, 0xc3, + 0xb3, 0x0b, 0x63, 0x28, 0x35, 0xab, 0x07, 0x72, 0x53, 0x6d, 0x77, 0x7a, 0x8d, 0x76, 0xab, 0xda, + 0x44, 0x89, 0x09, 0xa6, 0xc8, 0x3f, 0x39, 0x6e, 0x28, 0x72, 0x1d, 0x25, 0xe3, 0x58, 0x47, 0xae, + 0xf6, 0xe4, 0x3a, 0x4a, 0x95, 0x35, 0xd8, 0x5c, 0x54, 0x27, 0x17, 0xee, 0x8c, 0xd8, 0x12, 0x27, + 0x97, 0x2c, 0x31, 0xb7, 0x35, 0xb7, 0xc4, 0xff, 0x4a, 0xc2, 0xc6, 0x82, 0xb3, 0x62, 0xe1, 0x47, + 0x7e, 0x08, 0x19, 0x91, 0xa2, 0xe2, 0xf4, 0xbc, 0xb2, 0xf0, 0xd0, 0xe1, 0x09, 0x3b, 0x77, 0x82, + 0x72, 0xbd, 0x78, 0x07, 0x91, 0x5a, 0xd2, 0x41, 0x30, 0x13, 0x73, 0x35, 0xfd, 0xe7, 0x73, 0x35, + 0x5d, 0x1c, 0x7b, 0x7b, 0x67, 0x39, 0xf6, 0x38, 0xf6, 0xcd, 0x6a, 0x7b, 0x66, 0x41, 0x6d, 0xbf, + 0x0b, 0xeb, 0x73, 0x86, 0xce, 0x5c, 0x63, 0x7f, 0x99, 0x00, 0x69, 0x59, 0x70, 0x5e, 0x51, 0xe9, + 0x92, 0x53, 0x95, 0xee, 0xee, 0x6c, 0x04, 0x2f, 0x2d, 0x5f, 0x84, 0xb9, 0xb5, 0xfe, 0x3a, 0x01, + 0xe7, 0x16, 0x77, 0x8a, 0x0b, 0x7d, 0xf8, 0x0c, 0xb2, 0x23, 0xea, 0x0f, 0xed, 0xb0, 0x5b, 0xfa, + 0x78, 0xc1, 0x19, 0xcc, 0xc4, 0xb3, 0x8b, 0x1d, 0x68, 0xc5, 0x0f, 0xf1, 0xd4, 0xb2, 0x76, 0x4f, + 0x78, 0x33, 0xe7, 0xe9, 0xaf, 0x92, 0xf0, 0xf6, 0x42, 0xe3, 0x0b, 0x1d, 0x7d, 0x0f, 0xc0, 0xb0, + 0x9c, 0xb1, 0x2f, 0x3a, 0x22, 0x51, 0x60, 0xf3, 0x1c, 0xe1, 0xc5, 0x8b, 0x15, 0xcf, 0xb1, 0x1f, + 0xc9, 0x53, 0x5c, 0x0e, 0x02, 0xe2, 0x84, 0x3b, 0x13, 0x47, 0xd3, 0xdc, 0xd1, 0xf7, 0x97, 0xcc, + 0x74, 0x2e, 0x31, 0x3f, 0x01, 0xa4, 0x99, 0x06, 0xb5, 0x7c, 0xd5, 0xf3, 0x5d, 0x4a, 0x46, 0x86, + 0x35, 0xe0, 0x27, 0x48, 0x6e, 0x3f, 0xd3, 0x27, 0xa6, 0x47, 0x95, 0x35, 0x21, 0xee, 0x86, 0x52, + 0xa6, 0xc1, 0x13, 0xc8, 0x8d, 0x69, 0x64, 0xa7, 0x34, 0x84, 0x38, 0xd2, 0x28, 0xff, 0x31, 0x07, + 0x85, 0x58, 0x5f, 0x8d, 0x2f, 0x41, 0xf1, 0x11, 0x79, 0x42, 0xd4, 0xf0, 0xae, 0x24, 0x22, 0x51, + 0x60, 0x58, 0x27, 0xb8, 0x2f, 0x7d, 0x02, 0x9b, 0x9c, 0x62, 0x8f, 0x7d, 0xea, 0xaa, 0x9a, 0x49, + 0x3c, 0x8f, 0x07, 0x2d, 0xc7, 0xa9, 0x98, 0xc9, 0xda, 0x4c, 0x54, 0x0b, 0x25, 0xf8, 0x16, 0x6c, + 0x70, 0x8d, 0xd1, 0xd8, 0xf4, 0x0d, 0xc7, 0xa4, 0x2a, 0xbb, 0xbd, 0x79, 0xfc, 0x24, 0x89, 0x3c, + 0x5b, 0x67, 0x8c, 0xa3, 0x80, 0xc0, 0x3c, 0xf2, 0x70, 0x1d, 0xde, 0xe3, 0x6a, 0x03, 0x6a, 0x51, + 0x97, 0xf8, 0x54, 0xa5, 0x5f, 0x8d, 0x89, 0xe9, 0xa9, 0xc4, 0xd2, 0xd5, 0x21, 0xf1, 0x86, 0xd2, + 0x26, 0x33, 0x70, 0x90, 0x94, 0x12, 0xca, 0x05, 0x46, 0x3c, 0x0c, 0x78, 0x32, 0xa7, 0x55, 0x2d, + 0xfd, 0x73, 0xe2, 0x0d, 0xf1, 0x3e, 0x9c, 0xe3, 0x56, 0x3c, 0xdf, 0x35, 0xac, 0x81, 0xaa, 0x0d, + 0xa9, 0xf6, 0x58, 0x1d, 0xfb, 0xfd, 0x3b, 0xd2, 0x3b, 0xf1, 0xef, 0x73, 0x0f, 0xbb, 0x9c, 0x53, + 0x63, 0x94, 0x63, 0xbf, 0x7f, 0x07, 0x77, 0xa1, 0xc8, 0x16, 0x63, 0x64, 0x3c, 0xa7, 0x6a, 0xdf, + 0x76, 0xf9, 0xd1, 0x58, 0x5a, 0x50, 0x9a, 0x62, 0x11, 0xac, 0xb4, 0x03, 0x85, 0x23, 0x5b, 0xa7, + 0xfb, 0x99, 0x6e, 0x47, 0x96, 0xeb, 0x4a, 0x21, 0xb4, 0x72, 0xcf, 0x76, 0x59, 0x42, 0x0d, 0xec, + 0x28, 0xc0, 0x05, 0x91, 0x50, 0x03, 0x3b, 0x0c, 0xef, 0x2d, 0xd8, 0xd0, 0x34, 0x31, 0x67, 0x43, + 0x53, 0x83, 0x3b, 0x96, 0x27, 0xa1, 0xa9, 0x60, 0x69, 0xda, 0xa1, 0x20, 0x04, 0x39, 0xee, 0xe1, + 0x4f, 0xe1, 0xed, 0x49, 0xb0, 0xe2, 0x8a, 0xeb, 0x73, 0xb3, 0x9c, 0x55, 0xbd, 0x05, 0x1b, 0xce, + 0xe9, 0xbc, 0x22, 0x9e, 0xfa, 0xa2, 0x73, 0x3a, 0xab, 0x76, 0x1b, 0x36, 0x9d, 0xa1, 0x33, 0xaf, + 0x77, 0x35, 0xae, 0x87, 0x9d, 0xa1, 0x33, 0xab, 0xf8, 0x11, 0xbf, 0x70, 0xbb, 0x54, 0x23, 0x3e, + 0xd5, 0xa5, 0xf3, 0x71, 0x7a, 0x4c, 0x80, 0xaf, 0x03, 0xd2, 0x34, 0x95, 0x5a, 0xe4, 0xc4, 0xa4, + 0x2a, 0x71, 0xa9, 0x45, 0x3c, 0xe9, 0x62, 0x9c, 0x5c, 0xd2, 0x34, 0x99, 0x4b, 0xab, 0x5c, 0x88, + 0xaf, 0xc2, 0xba, 0x7d, 0xf2, 0x48, 0x13, 0x29, 0xa9, 0x3a, 0x2e, 0xed, 0x1b, 0xcf, 0xa4, 0x0f, + 0x79, 0x7c, 0xd7, 0x98, 0x80, 0x27, 0x64, 0x87, 0xc3, 0xf8, 0x0a, 0x20, 0xcd, 0x1b, 0x12, 0xd7, + 0xe1, 0x35, 0xd9, 0x73, 0x88, 0x46, 0xa5, 0x8f, 0x04, 0x55, 0xe0, 0xad, 0x10, 0x66, 0x5b, 0xc2, + 0x7b, 0x6a, 0xf4, 0xfd, 0xd0, 0xe2, 0x65, 0xb1, 0x25, 0x38, 0x16, 0x58, 0xdb, 0x01, 0xc4, 0x42, + 0x31, 0xf5, 0xe1, 0x1d, 0x4e, 0x2b, 0x39, 0x43, 0x27, 0xfe, 0xdd, 0x0f, 0x60, 0x95, 0x31, 0x27, + 0x1f, 0xbd, 0x22, 0x1a, 0x32, 0x67, 0x18, 0xfb, 0xe2, 0x6b, 0xeb, 0x8d, 0xcb, 0xfb, 0x50, 0x8c, + 0xe7, 0x27, 0xce, 0x83, 0xc8, 0x50, 0x94, 0x60, 0xcd, 0x4a, 0xad, 0x5d, 0x67, 0x6d, 0xc6, 0x97, + 0x32, 0x4a, 0xb2, 0x76, 0xa7, 0xd9, 0xe8, 0xc9, 0xaa, 0x72, 0xdc, 0xea, 0x35, 0x8e, 0x64, 0x94, + 0x8a, 0xf7, 0xd5, 0x7f, 0x4d, 0x42, 0x69, 0xfa, 0x8a, 0x84, 0x7f, 0x00, 0xe7, 0xc3, 0xf7, 0x0c, + 0x8f, 0xfa, 0xea, 0x53, 0xc3, 0xe5, 0x5b, 0x66, 0x44, 0xc4, 0xf1, 0x15, 0x2d, 0xda, 0x66, 0xc0, + 0xea, 0x52, 0xff, 0x0b, 0xc3, 0x65, 0x1b, 0x62, 0x44, 0x7c, 0xdc, 0x84, 0x8b, 0x96, 0xad, 0x7a, + 0x3e, 0xb1, 0x74, 0xe2, 0xea, 0xea, 0xe4, 0x25, 0x49, 0x25, 0x9a, 0x46, 0x3d, 0xcf, 0x16, 0x47, + 0x55, 0x64, 0xe5, 0x5d, 0xcb, 0xee, 0x06, 0xe4, 0x49, 0x0d, 0xaf, 0x06, 0xd4, 0x99, 0x04, 0x4b, + 0x2d, 0x4b, 0xb0, 0x77, 0x20, 0x3f, 0x22, 0x8e, 0x4a, 0x2d, 0xdf, 0x3d, 0xe5, 0x8d, 0x71, 0x4e, + 0xc9, 0x8d, 0x88, 0x23, 0xb3, 0xf1, 0x9b, 0xb9, 0x9f, 0xfc, 0x23, 0x05, 0xc5, 0x78, 0x73, 0xcc, + 0xee, 0x1a, 0x1a, 0x3f, 0x47, 0x12, 0xbc, 0xd2, 0x7c, 0xf0, 0xd2, 0x56, 0xba, 0x52, 0x63, 0x07, + 0xcc, 0x7e, 0x56, 0xb4, 0xac, 0x8a, 0xd0, 0x64, 0x87, 0x3b, 0xab, 0x2d, 0x54, 0xb4, 0x08, 0x39, + 0x25, 0x18, 0xe1, 0x43, 0xc8, 0x3e, 0xf2, 0xb8, 0xed, 0x2c, 0xb7, 0xfd, 0xe1, 0xcb, 0x6d, 0xdf, + 0xef, 0x72, 0xe3, 0xf9, 0xfb, 0x5d, 0xb5, 0xd5, 0x56, 0x8e, 0xaa, 0x4d, 0x25, 0x50, 0xc7, 0x17, + 0x20, 0x6d, 0x92, 0xe7, 0xa7, 0xd3, 0x47, 0x11, 0x87, 0xce, 0x1a, 0xf8, 0x0b, 0x90, 0x7e, 0x4a, + 0xc9, 0xe3, 0xe9, 0x03, 0x80, 0x43, 0xaf, 0x31, 0xf5, 0xaf, 0x43, 0x86, 0xc7, 0x0b, 0x03, 0x04, + 0x11, 0x43, 0x6f, 0xe1, 0x1c, 0xa4, 0x6b, 0x6d, 0x85, 0xa5, 0x3f, 0x82, 0xa2, 0x40, 0xd5, 0x4e, + 0x43, 0xae, 0xc9, 0x28, 0x59, 0xbe, 0x05, 0x59, 0x11, 0x04, 0xb6, 0x35, 0xa2, 0x30, 0xa0, 0xb7, + 0x82, 0x61, 0x60, 0x23, 0x11, 0x4a, 0x8f, 0x8f, 0x0e, 0x64, 0x05, 0x25, 0xe3, 0xcb, 0xeb, 0x41, + 0x31, 0xde, 0x17, 0xbf, 0x99, 0x9c, 0xfa, 0x4b, 0x02, 0x0a, 0xb1, 0x3e, 0x97, 0x35, 0x28, 0xc4, + 0x34, 0xed, 0xa7, 0x2a, 0x31, 0x0d, 0xe2, 0x05, 0x49, 0x01, 0x1c, 0xaa, 0x32, 0xe4, 0xac, 0x8b, + 0xf6, 0x46, 0x9c, 0xff, 0x5d, 0x02, 0xd0, 0x6c, 0x8b, 0x39, 0xe3, 0x60, 0xe2, 0x3b, 0x75, 0xf0, + 0xb7, 0x09, 0x28, 0x4d, 0xf7, 0x95, 0x33, 0xee, 0x5d, 0xfa, 0x4e, 0xdd, 0xfb, 0x67, 0x12, 0x56, + 0xa7, 0xba, 0xc9, 0xb3, 0x7a, 0xf7, 0x15, 0xac, 0x1b, 0x3a, 0x1d, 0x39, 0xb6, 0x4f, 0x2d, 0xed, + 0x54, 0x35, 0xe9, 0x13, 0x6a, 0x4a, 0x65, 0x5e, 0x28, 0xae, 0xbf, 0xbc, 0x5f, 0xad, 0x34, 0x26, + 0x7a, 0x4d, 0xa6, 0xb6, 0xbf, 0xd1, 0xa8, 0xcb, 0x47, 0x9d, 0x76, 0x4f, 0x6e, 0xd5, 0x1e, 0xaa, + 0xc7, 0xad, 0x1f, 0xb7, 0xda, 0x5f, 0xb4, 0x14, 0x64, 0xcc, 0xd0, 0x5e, 0xe3, 0x56, 0xef, 0x00, + 0x9a, 0x75, 0x0a, 0x9f, 0x87, 0x45, 0x6e, 0xa1, 0xb7, 0xf0, 0x06, 0xac, 0xb5, 0xda, 0x6a, 0xb7, + 0x51, 0x97, 0x55, 0xf9, 0xde, 0x3d, 0xb9, 0xd6, 0xeb, 0x8a, 0x17, 0x88, 0x88, 0xdd, 0x9b, 0xde, + 0xd4, 0xbf, 0x49, 0xc1, 0xc6, 0x02, 0x4f, 0x70, 0x35, 0xb8, 0x3b, 0x88, 0xeb, 0xcc, 0xf7, 0xcf, + 0xe2, 0x7d, 0x85, 0x1d, 0xf9, 0x1d, 0xe2, 0xfa, 0xc1, 0x55, 0xe3, 0x0a, 0xb0, 0x28, 0x59, 0xbe, + 0xd1, 0x37, 0xa8, 0x1b, 0x3c, 0xd8, 0x88, 0x0b, 0xc5, 0xda, 0x04, 0x17, 0x6f, 0x36, 0xdf, 0x03, + 0xec, 0xd8, 0x9e, 0xe1, 0x1b, 0x4f, 0xa8, 0x6a, 0x58, 0xe1, 0xeb, 0x0e, 0xbb, 0x60, 0xa4, 0x15, + 0x14, 0x4a, 0x1a, 0x96, 0x1f, 0xb1, 0x2d, 0x3a, 0x20, 0x33, 0x6c, 0x56, 0xc0, 0x53, 0x0a, 0x0a, + 0x25, 0x11, 0xfb, 0x12, 0x14, 0x75, 0x7b, 0xcc, 0xba, 0x2e, 0xc1, 0x63, 0xe7, 0x45, 0x42, 0x29, + 0x08, 0x2c, 0xa2, 0x04, 0xfd, 0xf4, 0xe4, 0x59, 0xa9, 0xa8, 0x14, 0x04, 0x26, 0x28, 0x97, 0x61, + 0x8d, 0x0c, 0x06, 0x2e, 0x33, 0x1e, 0x1a, 0x12, 0x37, 0x84, 0x52, 0x04, 0x73, 0xe2, 0xd6, 0x7d, + 0xc8, 0x85, 0x71, 0x60, 0x47, 0x32, 0x8b, 0x84, 0xea, 0x88, 0x6b, 0x6f, 0x72, 0x27, 0xaf, 0xe4, + 0xac, 0x50, 0x78, 0x09, 0x8a, 0x86, 0xa7, 0x4e, 0x5e, 0xc9, 0x93, 0xdb, 0xc9, 0x9d, 0x9c, 0x52, + 0x30, 0xbc, 0xe8, 0x85, 0xb1, 0xfc, 0x75, 0x12, 0x4a, 0xd3, 0xaf, 0xfc, 0xb8, 0x0e, 0x39, 0xd3, + 0xd6, 0x08, 0x4f, 0x2d, 0xf1, 0x13, 0xd3, 0xce, 0x2b, 0x7e, 0x18, 0xa8, 0x34, 0x03, 0xbe, 0x12, + 0x69, 0x6e, 0xfd, 0x2d, 0x01, 0xb9, 0x10, 0xc6, 0xe7, 0x20, 0xed, 0x10, 0x7f, 0xc8, 0xcd, 0x65, + 0x0e, 0x92, 0x28, 0xa1, 0xf0, 0x31, 0xc3, 0x3d, 0x87, 0x58, 0x3c, 0x05, 0x02, 0x9c, 0x8d, 0xd9, + 0xba, 0x9a, 0x94, 0xe8, 0xfc, 0xfa, 0x61, 0x8f, 0x46, 0xd4, 0xf2, 0xbd, 0x70, 0x5d, 0x03, 0xbc, + 0x16, 0xc0, 0xf8, 0x1a, 0xac, 0xfb, 0x2e, 0x31, 0xcc, 0x29, 0x6e, 0x9a, 0x73, 0x51, 0x28, 0x88, + 0xc8, 0xfb, 0x70, 0x21, 0xb4, 0xab, 0x53, 0x9f, 0x68, 0x43, 0xaa, 0x4f, 0x94, 0xb2, 0xfc, 0x99, + 0xe1, 0x7c, 0x40, 0xa8, 0x07, 0xf2, 0x50, 0xb7, 0xfc, 0xf7, 0x04, 0xac, 0x87, 0x17, 0x26, 0x3d, + 0x0a, 0xd6, 0x11, 0x00, 0xb1, 0x2c, 0xdb, 0x8f, 0x87, 0x6b, 0x3e, 0x95, 0xe7, 0xf4, 0x2a, 0xd5, + 0x48, 0x49, 0x89, 0x19, 0xd8, 0x1a, 0x01, 0x4c, 0x24, 0x4b, 0xc3, 0x76, 0x11, 0x0a, 0xc1, 0x4f, + 0x38, 0xfc, 0x77, 0x40, 0x71, 0xc5, 0x06, 0x01, 0xb1, 0x9b, 0x15, 0xde, 0x84, 0xcc, 0x09, 0x1d, + 0x18, 0x56, 0xf0, 0x30, 0x2b, 0x06, 0xe1, 0x43, 0x48, 0x3a, 0x7a, 0x08, 0x39, 0xf8, 0x19, 0x6c, + 0x68, 0xf6, 0x68, 0xd6, 0xdd, 0x03, 0x34, 0x73, 0xcd, 0xf7, 0x3e, 0x4f, 0x7c, 0x09, 0x93, 0x16, + 0xf3, 0x7f, 0x89, 0xc4, 0xef, 0x93, 0xa9, 0xc3, 0xce, 0xc1, 0x1f, 0x92, 0x5b, 0x87, 0x42, 0xb5, + 0x13, 0xce, 0x54, 0xa1, 0x7d, 0x93, 0x6a, 0xcc, 0xfb, 0xff, 0x07, 0x00, 0x00, 0xff, 0xff, 0xa3, + 0x58, 0x22, 0x30, 0xdf, 0x1c, 0x00, 0x00, +} diff --git a/vendor/github.com/gogo/protobuf/protoc-gen-gogo/descriptor/descriptor_gostring.gen.go b/vendor/github.com/gogo/protobuf/protoc-gen-gogo/descriptor/descriptor_gostring.gen.go new file mode 100644 index 000000000..3b95a7757 --- /dev/null +++ b/vendor/github.com/gogo/protobuf/protoc-gen-gogo/descriptor/descriptor_gostring.gen.go @@ -0,0 +1,772 @@ +// Code generated by protoc-gen-gogo. DO NOT EDIT. +// source: descriptor.proto + +/* +Package descriptor is a generated protocol buffer package. + +It is generated from these files: + descriptor.proto + +It has these top-level messages: + FileDescriptorSet + FileDescriptorProto + DescriptorProto + ExtensionRangeOptions + FieldDescriptorProto + OneofDescriptorProto + EnumDescriptorProto + EnumValueDescriptorProto + ServiceDescriptorProto + MethodDescriptorProto + FileOptions + MessageOptions + FieldOptions + OneofOptions + EnumOptions + EnumValueOptions + ServiceOptions + MethodOptions + UninterpretedOption + SourceCodeInfo + GeneratedCodeInfo +*/ +package descriptor + +import fmt "fmt" +import strings "strings" +import proto "github.com/gogo/protobuf/proto" +import sort "sort" +import strconv "strconv" +import reflect "reflect" +import math "math" + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +func (this *FileDescriptorSet) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 5) + s = append(s, "&descriptor.FileDescriptorSet{") + if this.File != nil { + s = append(s, "File: "+fmt.Sprintf("%#v", this.File)+",\n") + } + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *FileDescriptorProto) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 16) + s = append(s, "&descriptor.FileDescriptorProto{") + if this.Name != nil { + s = append(s, "Name: "+valueToGoStringDescriptor(this.Name, "string")+",\n") + } + if this.Package != nil { + s = append(s, "Package: "+valueToGoStringDescriptor(this.Package, "string")+",\n") + } + if this.Dependency != nil { + s = append(s, "Dependency: "+fmt.Sprintf("%#v", this.Dependency)+",\n") + } + if this.PublicDependency != nil { + s = append(s, "PublicDependency: "+fmt.Sprintf("%#v", this.PublicDependency)+",\n") + } + if this.WeakDependency != nil { + s = append(s, "WeakDependency: "+fmt.Sprintf("%#v", this.WeakDependency)+",\n") + } + if this.MessageType != nil { + s = append(s, "MessageType: "+fmt.Sprintf("%#v", this.MessageType)+",\n") + } + if this.EnumType != nil { + s = append(s, "EnumType: "+fmt.Sprintf("%#v", this.EnumType)+",\n") + } + if this.Service != nil { + s = append(s, "Service: "+fmt.Sprintf("%#v", this.Service)+",\n") + } + if this.Extension != nil { + s = append(s, "Extension: "+fmt.Sprintf("%#v", this.Extension)+",\n") + } + if this.Options != nil { + s = append(s, "Options: "+fmt.Sprintf("%#v", this.Options)+",\n") + } + if this.SourceCodeInfo != nil { + s = append(s, "SourceCodeInfo: "+fmt.Sprintf("%#v", this.SourceCodeInfo)+",\n") + } + if this.Syntax != nil { + s = append(s, "Syntax: "+valueToGoStringDescriptor(this.Syntax, "string")+",\n") + } + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *DescriptorProto) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 14) + s = append(s, "&descriptor.DescriptorProto{") + if this.Name != nil { + s = append(s, "Name: "+valueToGoStringDescriptor(this.Name, "string")+",\n") + } + if this.Field != nil { + s = append(s, "Field: "+fmt.Sprintf("%#v", this.Field)+",\n") + } + if this.Extension != nil { + s = append(s, "Extension: "+fmt.Sprintf("%#v", this.Extension)+",\n") + } + if this.NestedType != nil { + s = append(s, "NestedType: "+fmt.Sprintf("%#v", this.NestedType)+",\n") + } + if this.EnumType != nil { + s = append(s, "EnumType: "+fmt.Sprintf("%#v", this.EnumType)+",\n") + } + if this.ExtensionRange != nil { + s = append(s, "ExtensionRange: "+fmt.Sprintf("%#v", this.ExtensionRange)+",\n") + } + if this.OneofDecl != nil { + s = append(s, "OneofDecl: "+fmt.Sprintf("%#v", this.OneofDecl)+",\n") + } + if this.Options != nil { + s = append(s, "Options: "+fmt.Sprintf("%#v", this.Options)+",\n") + } + if this.ReservedRange != nil { + s = append(s, "ReservedRange: "+fmt.Sprintf("%#v", this.ReservedRange)+",\n") + } + if this.ReservedName != nil { + s = append(s, "ReservedName: "+fmt.Sprintf("%#v", this.ReservedName)+",\n") + } + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *DescriptorProto_ExtensionRange) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 7) + s = append(s, "&descriptor.DescriptorProto_ExtensionRange{") + if this.Start != nil { + s = append(s, "Start: "+valueToGoStringDescriptor(this.Start, "int32")+",\n") + } + if this.End != nil { + s = append(s, "End: "+valueToGoStringDescriptor(this.End, "int32")+",\n") + } + if this.Options != nil { + s = append(s, "Options: "+fmt.Sprintf("%#v", this.Options)+",\n") + } + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *DescriptorProto_ReservedRange) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 6) + s = append(s, "&descriptor.DescriptorProto_ReservedRange{") + if this.Start != nil { + s = append(s, "Start: "+valueToGoStringDescriptor(this.Start, "int32")+",\n") + } + if this.End != nil { + s = append(s, "End: "+valueToGoStringDescriptor(this.End, "int32")+",\n") + } + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *ExtensionRangeOptions) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 5) + s = append(s, "&descriptor.ExtensionRangeOptions{") + if this.UninterpretedOption != nil { + s = append(s, "UninterpretedOption: "+fmt.Sprintf("%#v", this.UninterpretedOption)+",\n") + } + s = append(s, "XXX_InternalExtensions: "+extensionToGoStringDescriptor(this)+",\n") + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *FieldDescriptorProto) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 14) + s = append(s, "&descriptor.FieldDescriptorProto{") + if this.Name != nil { + s = append(s, "Name: "+valueToGoStringDescriptor(this.Name, "string")+",\n") + } + if this.Number != nil { + s = append(s, "Number: "+valueToGoStringDescriptor(this.Number, "int32")+",\n") + } + if this.Label != nil { + s = append(s, "Label: "+valueToGoStringDescriptor(this.Label, "FieldDescriptorProto_Label")+",\n") + } + if this.Type != nil { + s = append(s, "Type: "+valueToGoStringDescriptor(this.Type, "FieldDescriptorProto_Type")+",\n") + } + if this.TypeName != nil { + s = append(s, "TypeName: "+valueToGoStringDescriptor(this.TypeName, "string")+",\n") + } + if this.Extendee != nil { + s = append(s, "Extendee: "+valueToGoStringDescriptor(this.Extendee, "string")+",\n") + } + if this.DefaultValue != nil { + s = append(s, "DefaultValue: "+valueToGoStringDescriptor(this.DefaultValue, "string")+",\n") + } + if this.OneofIndex != nil { + s = append(s, "OneofIndex: "+valueToGoStringDescriptor(this.OneofIndex, "int32")+",\n") + } + if this.JsonName != nil { + s = append(s, "JsonName: "+valueToGoStringDescriptor(this.JsonName, "string")+",\n") + } + if this.Options != nil { + s = append(s, "Options: "+fmt.Sprintf("%#v", this.Options)+",\n") + } + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *OneofDescriptorProto) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 6) + s = append(s, "&descriptor.OneofDescriptorProto{") + if this.Name != nil { + s = append(s, "Name: "+valueToGoStringDescriptor(this.Name, "string")+",\n") + } + if this.Options != nil { + s = append(s, "Options: "+fmt.Sprintf("%#v", this.Options)+",\n") + } + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *EnumDescriptorProto) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 9) + s = append(s, "&descriptor.EnumDescriptorProto{") + if this.Name != nil { + s = append(s, "Name: "+valueToGoStringDescriptor(this.Name, "string")+",\n") + } + if this.Value != nil { + s = append(s, "Value: "+fmt.Sprintf("%#v", this.Value)+",\n") + } + if this.Options != nil { + s = append(s, "Options: "+fmt.Sprintf("%#v", this.Options)+",\n") + } + if this.ReservedRange != nil { + s = append(s, "ReservedRange: "+fmt.Sprintf("%#v", this.ReservedRange)+",\n") + } + if this.ReservedName != nil { + s = append(s, "ReservedName: "+fmt.Sprintf("%#v", this.ReservedName)+",\n") + } + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *EnumDescriptorProto_EnumReservedRange) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 6) + s = append(s, "&descriptor.EnumDescriptorProto_EnumReservedRange{") + if this.Start != nil { + s = append(s, "Start: "+valueToGoStringDescriptor(this.Start, "int32")+",\n") + } + if this.End != nil { + s = append(s, "End: "+valueToGoStringDescriptor(this.End, "int32")+",\n") + } + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *EnumValueDescriptorProto) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 7) + s = append(s, "&descriptor.EnumValueDescriptorProto{") + if this.Name != nil { + s = append(s, "Name: "+valueToGoStringDescriptor(this.Name, "string")+",\n") + } + if this.Number != nil { + s = append(s, "Number: "+valueToGoStringDescriptor(this.Number, "int32")+",\n") + } + if this.Options != nil { + s = append(s, "Options: "+fmt.Sprintf("%#v", this.Options)+",\n") + } + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *ServiceDescriptorProto) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 7) + s = append(s, "&descriptor.ServiceDescriptorProto{") + if this.Name != nil { + s = append(s, "Name: "+valueToGoStringDescriptor(this.Name, "string")+",\n") + } + if this.Method != nil { + s = append(s, "Method: "+fmt.Sprintf("%#v", this.Method)+",\n") + } + if this.Options != nil { + s = append(s, "Options: "+fmt.Sprintf("%#v", this.Options)+",\n") + } + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *MethodDescriptorProto) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 10) + s = append(s, "&descriptor.MethodDescriptorProto{") + if this.Name != nil { + s = append(s, "Name: "+valueToGoStringDescriptor(this.Name, "string")+",\n") + } + if this.InputType != nil { + s = append(s, "InputType: "+valueToGoStringDescriptor(this.InputType, "string")+",\n") + } + if this.OutputType != nil { + s = append(s, "OutputType: "+valueToGoStringDescriptor(this.OutputType, "string")+",\n") + } + if this.Options != nil { + s = append(s, "Options: "+fmt.Sprintf("%#v", this.Options)+",\n") + } + if this.ClientStreaming != nil { + s = append(s, "ClientStreaming: "+valueToGoStringDescriptor(this.ClientStreaming, "bool")+",\n") + } + if this.ServerStreaming != nil { + s = append(s, "ServerStreaming: "+valueToGoStringDescriptor(this.ServerStreaming, "bool")+",\n") + } + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *FileOptions) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 23) + s = append(s, "&descriptor.FileOptions{") + if this.JavaPackage != nil { + s = append(s, "JavaPackage: "+valueToGoStringDescriptor(this.JavaPackage, "string")+",\n") + } + if this.JavaOuterClassname != nil { + s = append(s, "JavaOuterClassname: "+valueToGoStringDescriptor(this.JavaOuterClassname, "string")+",\n") + } + if this.JavaMultipleFiles != nil { + s = append(s, "JavaMultipleFiles: "+valueToGoStringDescriptor(this.JavaMultipleFiles, "bool")+",\n") + } + if this.JavaGenerateEqualsAndHash != nil { + s = append(s, "JavaGenerateEqualsAndHash: "+valueToGoStringDescriptor(this.JavaGenerateEqualsAndHash, "bool")+",\n") + } + if this.JavaStringCheckUtf8 != nil { + s = append(s, "JavaStringCheckUtf8: "+valueToGoStringDescriptor(this.JavaStringCheckUtf8, "bool")+",\n") + } + if this.OptimizeFor != nil { + s = append(s, "OptimizeFor: "+valueToGoStringDescriptor(this.OptimizeFor, "FileOptions_OptimizeMode")+",\n") + } + if this.GoPackage != nil { + s = append(s, "GoPackage: "+valueToGoStringDescriptor(this.GoPackage, "string")+",\n") + } + if this.CcGenericServices != nil { + s = append(s, "CcGenericServices: "+valueToGoStringDescriptor(this.CcGenericServices, "bool")+",\n") + } + if this.JavaGenericServices != nil { + s = append(s, "JavaGenericServices: "+valueToGoStringDescriptor(this.JavaGenericServices, "bool")+",\n") + } + if this.PyGenericServices != nil { + s = append(s, "PyGenericServices: "+valueToGoStringDescriptor(this.PyGenericServices, "bool")+",\n") + } + if this.PhpGenericServices != nil { + s = append(s, "PhpGenericServices: "+valueToGoStringDescriptor(this.PhpGenericServices, "bool")+",\n") + } + if this.Deprecated != nil { + s = append(s, "Deprecated: "+valueToGoStringDescriptor(this.Deprecated, "bool")+",\n") + } + if this.CcEnableArenas != nil { + s = append(s, "CcEnableArenas: "+valueToGoStringDescriptor(this.CcEnableArenas, "bool")+",\n") + } + if this.ObjcClassPrefix != nil { + s = append(s, "ObjcClassPrefix: "+valueToGoStringDescriptor(this.ObjcClassPrefix, "string")+",\n") + } + if this.CsharpNamespace != nil { + s = append(s, "CsharpNamespace: "+valueToGoStringDescriptor(this.CsharpNamespace, "string")+",\n") + } + if this.SwiftPrefix != nil { + s = append(s, "SwiftPrefix: "+valueToGoStringDescriptor(this.SwiftPrefix, "string")+",\n") + } + if this.PhpClassPrefix != nil { + s = append(s, "PhpClassPrefix: "+valueToGoStringDescriptor(this.PhpClassPrefix, "string")+",\n") + } + if this.PhpNamespace != nil { + s = append(s, "PhpNamespace: "+valueToGoStringDescriptor(this.PhpNamespace, "string")+",\n") + } + if this.UninterpretedOption != nil { + s = append(s, "UninterpretedOption: "+fmt.Sprintf("%#v", this.UninterpretedOption)+",\n") + } + s = append(s, "XXX_InternalExtensions: "+extensionToGoStringDescriptor(this)+",\n") + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *MessageOptions) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 9) + s = append(s, "&descriptor.MessageOptions{") + if this.MessageSetWireFormat != nil { + s = append(s, "MessageSetWireFormat: "+valueToGoStringDescriptor(this.MessageSetWireFormat, "bool")+",\n") + } + if this.NoStandardDescriptorAccessor != nil { + s = append(s, "NoStandardDescriptorAccessor: "+valueToGoStringDescriptor(this.NoStandardDescriptorAccessor, "bool")+",\n") + } + if this.Deprecated != nil { + s = append(s, "Deprecated: "+valueToGoStringDescriptor(this.Deprecated, "bool")+",\n") + } + if this.MapEntry != nil { + s = append(s, "MapEntry: "+valueToGoStringDescriptor(this.MapEntry, "bool")+",\n") + } + if this.UninterpretedOption != nil { + s = append(s, "UninterpretedOption: "+fmt.Sprintf("%#v", this.UninterpretedOption)+",\n") + } + s = append(s, "XXX_InternalExtensions: "+extensionToGoStringDescriptor(this)+",\n") + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *FieldOptions) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 11) + s = append(s, "&descriptor.FieldOptions{") + if this.Ctype != nil { + s = append(s, "Ctype: "+valueToGoStringDescriptor(this.Ctype, "FieldOptions_CType")+",\n") + } + if this.Packed != nil { + s = append(s, "Packed: "+valueToGoStringDescriptor(this.Packed, "bool")+",\n") + } + if this.Jstype != nil { + s = append(s, "Jstype: "+valueToGoStringDescriptor(this.Jstype, "FieldOptions_JSType")+",\n") + } + if this.Lazy != nil { + s = append(s, "Lazy: "+valueToGoStringDescriptor(this.Lazy, "bool")+",\n") + } + if this.Deprecated != nil { + s = append(s, "Deprecated: "+valueToGoStringDescriptor(this.Deprecated, "bool")+",\n") + } + if this.Weak != nil { + s = append(s, "Weak: "+valueToGoStringDescriptor(this.Weak, "bool")+",\n") + } + if this.UninterpretedOption != nil { + s = append(s, "UninterpretedOption: "+fmt.Sprintf("%#v", this.UninterpretedOption)+",\n") + } + s = append(s, "XXX_InternalExtensions: "+extensionToGoStringDescriptor(this)+",\n") + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *OneofOptions) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 5) + s = append(s, "&descriptor.OneofOptions{") + if this.UninterpretedOption != nil { + s = append(s, "UninterpretedOption: "+fmt.Sprintf("%#v", this.UninterpretedOption)+",\n") + } + s = append(s, "XXX_InternalExtensions: "+extensionToGoStringDescriptor(this)+",\n") + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *EnumOptions) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 7) + s = append(s, "&descriptor.EnumOptions{") + if this.AllowAlias != nil { + s = append(s, "AllowAlias: "+valueToGoStringDescriptor(this.AllowAlias, "bool")+",\n") + } + if this.Deprecated != nil { + s = append(s, "Deprecated: "+valueToGoStringDescriptor(this.Deprecated, "bool")+",\n") + } + if this.UninterpretedOption != nil { + s = append(s, "UninterpretedOption: "+fmt.Sprintf("%#v", this.UninterpretedOption)+",\n") + } + s = append(s, "XXX_InternalExtensions: "+extensionToGoStringDescriptor(this)+",\n") + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *EnumValueOptions) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 6) + s = append(s, "&descriptor.EnumValueOptions{") + if this.Deprecated != nil { + s = append(s, "Deprecated: "+valueToGoStringDescriptor(this.Deprecated, "bool")+",\n") + } + if this.UninterpretedOption != nil { + s = append(s, "UninterpretedOption: "+fmt.Sprintf("%#v", this.UninterpretedOption)+",\n") + } + s = append(s, "XXX_InternalExtensions: "+extensionToGoStringDescriptor(this)+",\n") + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *ServiceOptions) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 6) + s = append(s, "&descriptor.ServiceOptions{") + if this.Deprecated != nil { + s = append(s, "Deprecated: "+valueToGoStringDescriptor(this.Deprecated, "bool")+",\n") + } + if this.UninterpretedOption != nil { + s = append(s, "UninterpretedOption: "+fmt.Sprintf("%#v", this.UninterpretedOption)+",\n") + } + s = append(s, "XXX_InternalExtensions: "+extensionToGoStringDescriptor(this)+",\n") + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *MethodOptions) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 7) + s = append(s, "&descriptor.MethodOptions{") + if this.Deprecated != nil { + s = append(s, "Deprecated: "+valueToGoStringDescriptor(this.Deprecated, "bool")+",\n") + } + if this.IdempotencyLevel != nil { + s = append(s, "IdempotencyLevel: "+valueToGoStringDescriptor(this.IdempotencyLevel, "MethodOptions_IdempotencyLevel")+",\n") + } + if this.UninterpretedOption != nil { + s = append(s, "UninterpretedOption: "+fmt.Sprintf("%#v", this.UninterpretedOption)+",\n") + } + s = append(s, "XXX_InternalExtensions: "+extensionToGoStringDescriptor(this)+",\n") + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *UninterpretedOption) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 11) + s = append(s, "&descriptor.UninterpretedOption{") + if this.Name != nil { + s = append(s, "Name: "+fmt.Sprintf("%#v", this.Name)+",\n") + } + if this.IdentifierValue != nil { + s = append(s, "IdentifierValue: "+valueToGoStringDescriptor(this.IdentifierValue, "string")+",\n") + } + if this.PositiveIntValue != nil { + s = append(s, "PositiveIntValue: "+valueToGoStringDescriptor(this.PositiveIntValue, "uint64")+",\n") + } + if this.NegativeIntValue != nil { + s = append(s, "NegativeIntValue: "+valueToGoStringDescriptor(this.NegativeIntValue, "int64")+",\n") + } + if this.DoubleValue != nil { + s = append(s, "DoubleValue: "+valueToGoStringDescriptor(this.DoubleValue, "float64")+",\n") + } + if this.StringValue != nil { + s = append(s, "StringValue: "+valueToGoStringDescriptor(this.StringValue, "byte")+",\n") + } + if this.AggregateValue != nil { + s = append(s, "AggregateValue: "+valueToGoStringDescriptor(this.AggregateValue, "string")+",\n") + } + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *UninterpretedOption_NamePart) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 6) + s = append(s, "&descriptor.UninterpretedOption_NamePart{") + if this.NamePart != nil { + s = append(s, "NamePart: "+valueToGoStringDescriptor(this.NamePart, "string")+",\n") + } + if this.IsExtension != nil { + s = append(s, "IsExtension: "+valueToGoStringDescriptor(this.IsExtension, "bool")+",\n") + } + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *SourceCodeInfo) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 5) + s = append(s, "&descriptor.SourceCodeInfo{") + if this.Location != nil { + s = append(s, "Location: "+fmt.Sprintf("%#v", this.Location)+",\n") + } + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *SourceCodeInfo_Location) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 9) + s = append(s, "&descriptor.SourceCodeInfo_Location{") + if this.Path != nil { + s = append(s, "Path: "+fmt.Sprintf("%#v", this.Path)+",\n") + } + if this.Span != nil { + s = append(s, "Span: "+fmt.Sprintf("%#v", this.Span)+",\n") + } + if this.LeadingComments != nil { + s = append(s, "LeadingComments: "+valueToGoStringDescriptor(this.LeadingComments, "string")+",\n") + } + if this.TrailingComments != nil { + s = append(s, "TrailingComments: "+valueToGoStringDescriptor(this.TrailingComments, "string")+",\n") + } + if this.LeadingDetachedComments != nil { + s = append(s, "LeadingDetachedComments: "+fmt.Sprintf("%#v", this.LeadingDetachedComments)+",\n") + } + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *GeneratedCodeInfo) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 5) + s = append(s, "&descriptor.GeneratedCodeInfo{") + if this.Annotation != nil { + s = append(s, "Annotation: "+fmt.Sprintf("%#v", this.Annotation)+",\n") + } + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func (this *GeneratedCodeInfo_Annotation) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 8) + s = append(s, "&descriptor.GeneratedCodeInfo_Annotation{") + if this.Path != nil { + s = append(s, "Path: "+fmt.Sprintf("%#v", this.Path)+",\n") + } + if this.SourceFile != nil { + s = append(s, "SourceFile: "+valueToGoStringDescriptor(this.SourceFile, "string")+",\n") + } + if this.Begin != nil { + s = append(s, "Begin: "+valueToGoStringDescriptor(this.Begin, "int32")+",\n") + } + if this.End != nil { + s = append(s, "End: "+valueToGoStringDescriptor(this.End, "int32")+",\n") + } + if this.XXX_unrecognized != nil { + s = append(s, "XXX_unrecognized:"+fmt.Sprintf("%#v", this.XXX_unrecognized)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func valueToGoStringDescriptor(v interface{}, typ string) string { + rv := reflect.ValueOf(v) + if rv.IsNil() { + return "nil" + } + pv := reflect.Indirect(rv).Interface() + return fmt.Sprintf("func(v %v) *%v { return &v } ( %#v )", typ, typ, pv) +} +func extensionToGoStringDescriptor(m proto.Message) string { + e := proto.GetUnsafeExtensionsMap(m) + if e == nil { + return "nil" + } + s := "proto.NewUnsafeXXX_InternalExtensions(map[int32]proto.Extension{" + keys := make([]int, 0, len(e)) + for k := range e { + keys = append(keys, int(k)) + } + sort.Ints(keys) + ss := []string{} + for _, k := range keys { + ss = append(ss, strconv.Itoa(k)+": "+e[int32(k)].GoString()) + } + s += strings.Join(ss, ",") + "})" + return s +} diff --git a/vendor/github.com/gogo/protobuf/protoc-gen-gogo/descriptor/helper.go b/vendor/github.com/gogo/protobuf/protoc-gen-gogo/descriptor/helper.go new file mode 100644 index 000000000..e0846a357 --- /dev/null +++ b/vendor/github.com/gogo/protobuf/protoc-gen-gogo/descriptor/helper.go @@ -0,0 +1,390 @@ +// Protocol Buffers for Go with Gadgets +// +// Copyright (c) 2013, The GoGo Authors. All rights reserved. +// http://github.com/gogo/protobuf +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package descriptor + +import ( + "strings" +) + +func (msg *DescriptorProto) GetMapFields() (*FieldDescriptorProto, *FieldDescriptorProto) { + if !msg.GetOptions().GetMapEntry() { + return nil, nil + } + return msg.GetField()[0], msg.GetField()[1] +} + +func dotToUnderscore(r rune) rune { + if r == '.' { + return '_' + } + return r +} + +func (field *FieldDescriptorProto) WireType() (wire int) { + switch *field.Type { + case FieldDescriptorProto_TYPE_DOUBLE: + return 1 + case FieldDescriptorProto_TYPE_FLOAT: + return 5 + case FieldDescriptorProto_TYPE_INT64: + return 0 + case FieldDescriptorProto_TYPE_UINT64: + return 0 + case FieldDescriptorProto_TYPE_INT32: + return 0 + case FieldDescriptorProto_TYPE_UINT32: + return 0 + case FieldDescriptorProto_TYPE_FIXED64: + return 1 + case FieldDescriptorProto_TYPE_FIXED32: + return 5 + case FieldDescriptorProto_TYPE_BOOL: + return 0 + case FieldDescriptorProto_TYPE_STRING: + return 2 + case FieldDescriptorProto_TYPE_GROUP: + return 2 + case FieldDescriptorProto_TYPE_MESSAGE: + return 2 + case FieldDescriptorProto_TYPE_BYTES: + return 2 + case FieldDescriptorProto_TYPE_ENUM: + return 0 + case FieldDescriptorProto_TYPE_SFIXED32: + return 5 + case FieldDescriptorProto_TYPE_SFIXED64: + return 1 + case FieldDescriptorProto_TYPE_SINT32: + return 0 + case FieldDescriptorProto_TYPE_SINT64: + return 0 + } + panic("unreachable") +} + +func (field *FieldDescriptorProto) GetKeyUint64() (x uint64) { + packed := field.IsPacked() + wireType := field.WireType() + fieldNumber := field.GetNumber() + if packed { + wireType = 2 + } + x = uint64(uint32(fieldNumber)<<3 | uint32(wireType)) + return x +} + +func (field *FieldDescriptorProto) GetKey3Uint64() (x uint64) { + packed := field.IsPacked3() + wireType := field.WireType() + fieldNumber := field.GetNumber() + if packed { + wireType = 2 + } + x = uint64(uint32(fieldNumber)<<3 | uint32(wireType)) + return x +} + +func (field *FieldDescriptorProto) GetKey() []byte { + x := field.GetKeyUint64() + i := 0 + keybuf := make([]byte, 0) + for i = 0; x > 127; i++ { + keybuf = append(keybuf, 0x80|uint8(x&0x7F)) + x >>= 7 + } + keybuf = append(keybuf, uint8(x)) + return keybuf +} + +func (field *FieldDescriptorProto) GetKey3() []byte { + x := field.GetKey3Uint64() + i := 0 + keybuf := make([]byte, 0) + for i = 0; x > 127; i++ { + keybuf = append(keybuf, 0x80|uint8(x&0x7F)) + x >>= 7 + } + keybuf = append(keybuf, uint8(x)) + return keybuf +} + +func (desc *FileDescriptorSet) GetField(packageName, messageName, fieldName string) *FieldDescriptorProto { + msg := desc.GetMessage(packageName, messageName) + if msg == nil { + return nil + } + for _, field := range msg.GetField() { + if field.GetName() == fieldName { + return field + } + } + return nil +} + +func (file *FileDescriptorProto) GetMessage(typeName string) *DescriptorProto { + for _, msg := range file.GetMessageType() { + if msg.GetName() == typeName { + return msg + } + nes := file.GetNestedMessage(msg, strings.TrimPrefix(typeName, msg.GetName()+".")) + if nes != nil { + return nes + } + } + return nil +} + +func (file *FileDescriptorProto) GetNestedMessage(msg *DescriptorProto, typeName string) *DescriptorProto { + for _, nes := range msg.GetNestedType() { + if nes.GetName() == typeName { + return nes + } + res := file.GetNestedMessage(nes, strings.TrimPrefix(typeName, nes.GetName()+".")) + if res != nil { + return res + } + } + return nil +} + +func (desc *FileDescriptorSet) GetMessage(packageName string, typeName string) *DescriptorProto { + for _, file := range desc.GetFile() { + if strings.Map(dotToUnderscore, file.GetPackage()) != strings.Map(dotToUnderscore, packageName) { + continue + } + for _, msg := range file.GetMessageType() { + if msg.GetName() == typeName { + return msg + } + } + for _, msg := range file.GetMessageType() { + for _, nes := range msg.GetNestedType() { + if nes.GetName() == typeName { + return nes + } + if msg.GetName()+"."+nes.GetName() == typeName { + return nes + } + } + } + } + return nil +} + +func (desc *FileDescriptorSet) IsProto3(packageName string, typeName string) bool { + for _, file := range desc.GetFile() { + if strings.Map(dotToUnderscore, file.GetPackage()) != strings.Map(dotToUnderscore, packageName) { + continue + } + for _, msg := range file.GetMessageType() { + if msg.GetName() == typeName { + return file.GetSyntax() == "proto3" + } + } + for _, msg := range file.GetMessageType() { + for _, nes := range msg.GetNestedType() { + if nes.GetName() == typeName { + return file.GetSyntax() == "proto3" + } + if msg.GetName()+"."+nes.GetName() == typeName { + return file.GetSyntax() == "proto3" + } + } + } + } + return false +} + +func (msg *DescriptorProto) IsExtendable() bool { + return len(msg.GetExtensionRange()) > 0 +} + +func (desc *FileDescriptorSet) FindExtension(packageName string, typeName string, fieldName string) (extPackageName string, field *FieldDescriptorProto) { + parent := desc.GetMessage(packageName, typeName) + if parent == nil { + return "", nil + } + if !parent.IsExtendable() { + return "", nil + } + extendee := "." + packageName + "." + typeName + for _, file := range desc.GetFile() { + for _, ext := range file.GetExtension() { + if strings.Map(dotToUnderscore, file.GetPackage()) == strings.Map(dotToUnderscore, packageName) { + if !(ext.GetExtendee() == typeName || ext.GetExtendee() == extendee) { + continue + } + } else { + if ext.GetExtendee() != extendee { + continue + } + } + if ext.GetName() == fieldName { + return file.GetPackage(), ext + } + } + } + return "", nil +} + +func (desc *FileDescriptorSet) FindExtensionByFieldNumber(packageName string, typeName string, fieldNum int32) (extPackageName string, field *FieldDescriptorProto) { + parent := desc.GetMessage(packageName, typeName) + if parent == nil { + return "", nil + } + if !parent.IsExtendable() { + return "", nil + } + extendee := "." + packageName + "." + typeName + for _, file := range desc.GetFile() { + for _, ext := range file.GetExtension() { + if strings.Map(dotToUnderscore, file.GetPackage()) == strings.Map(dotToUnderscore, packageName) { + if !(ext.GetExtendee() == typeName || ext.GetExtendee() == extendee) { + continue + } + } else { + if ext.GetExtendee() != extendee { + continue + } + } + if ext.GetNumber() == fieldNum { + return file.GetPackage(), ext + } + } + } + return "", nil +} + +func (desc *FileDescriptorSet) FindMessage(packageName string, typeName string, fieldName string) (msgPackageName string, msgName string) { + parent := desc.GetMessage(packageName, typeName) + if parent == nil { + return "", "" + } + field := parent.GetFieldDescriptor(fieldName) + if field == nil { + var extPackageName string + extPackageName, field = desc.FindExtension(packageName, typeName, fieldName) + if field == nil { + return "", "" + } + packageName = extPackageName + } + typeNames := strings.Split(field.GetTypeName(), ".") + if len(typeNames) == 1 { + msg := desc.GetMessage(packageName, typeName) + if msg == nil { + return "", "" + } + return packageName, msg.GetName() + } + if len(typeNames) > 2 { + for i := 1; i < len(typeNames)-1; i++ { + packageName = strings.Join(typeNames[1:len(typeNames)-i], ".") + typeName = strings.Join(typeNames[len(typeNames)-i:], ".") + msg := desc.GetMessage(packageName, typeName) + if msg != nil { + typeNames := strings.Split(msg.GetName(), ".") + if len(typeNames) == 1 { + return packageName, msg.GetName() + } + return strings.Join(typeNames[1:len(typeNames)-1], "."), typeNames[len(typeNames)-1] + } + } + } + return "", "" +} + +func (msg *DescriptorProto) GetFieldDescriptor(fieldName string) *FieldDescriptorProto { + for _, field := range msg.GetField() { + if field.GetName() == fieldName { + return field + } + } + return nil +} + +func (desc *FileDescriptorSet) GetEnum(packageName string, typeName string) *EnumDescriptorProto { + for _, file := range desc.GetFile() { + if strings.Map(dotToUnderscore, file.GetPackage()) != strings.Map(dotToUnderscore, packageName) { + continue + } + for _, enum := range file.GetEnumType() { + if enum.GetName() == typeName { + return enum + } + } + } + return nil +} + +func (f *FieldDescriptorProto) IsEnum() bool { + return *f.Type == FieldDescriptorProto_TYPE_ENUM +} + +func (f *FieldDescriptorProto) IsMessage() bool { + return *f.Type == FieldDescriptorProto_TYPE_MESSAGE +} + +func (f *FieldDescriptorProto) IsBytes() bool { + return *f.Type == FieldDescriptorProto_TYPE_BYTES +} + +func (f *FieldDescriptorProto) IsRepeated() bool { + return f.Label != nil && *f.Label == FieldDescriptorProto_LABEL_REPEATED +} + +func (f *FieldDescriptorProto) IsString() bool { + return *f.Type == FieldDescriptorProto_TYPE_STRING +} + +func (f *FieldDescriptorProto) IsBool() bool { + return *f.Type == FieldDescriptorProto_TYPE_BOOL +} + +func (f *FieldDescriptorProto) IsRequired() bool { + return f.Label != nil && *f.Label == FieldDescriptorProto_LABEL_REQUIRED +} + +func (f *FieldDescriptorProto) IsPacked() bool { + return f.Options != nil && f.GetOptions().GetPacked() +} + +func (f *FieldDescriptorProto) IsPacked3() bool { + if f.IsRepeated() && f.IsScalar() { + if f.Options == nil || f.GetOptions().Packed == nil { + return true + } + return f.Options != nil && f.GetOptions().GetPacked() + } + return false +} + +func (m *DescriptorProto) HasExtension() bool { + return len(m.ExtensionRange) > 0 +} -- cgit v1.2.3 From 50211323bea5aac87f359c8bd39edce16a42d090 Mon Sep 17 00:00:00 2001 From: jerryzhuang Date: Fri, 29 Mar 2019 13:07:46 +0800 Subject: fix: instructions to install shim (#21) 1. original LATEST_RELEASE is about containerd-shim-runsc-v1, we should choose the second line gvisor-containerd-shim. lol 2. should use /etc/containerd/gvisor-containerd-shim.toml not yaml : ) fixes #18 Signed-off-by: zhuangqh --- docs/runtime-handler-quickstart.md | 10 +++++----- docs/untrusted-workload-quickstart.md | 10 +++++----- test/e2e/shim-install.sh | 8 ++++---- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'test') diff --git a/docs/runtime-handler-quickstart.md b/docs/runtime-handler-quickstart.md index d8fea65fd..e48b2dd1a 100644 --- a/docs/runtime-handler-quickstart.md +++ b/docs/runtime-handler-quickstart.md @@ -19,20 +19,20 @@ later. [embedmd]:# (../test/e2e/shim-install.sh shell /{ # Step 1\(release\)/ /^}/) ```shell { # Step 1(release): Install gvisor-containerd-shim -LATEST_RELEASE=$(wget -qO - https://api.github.com/repos/google/gvisor-containerd-shim/releases | grep -oP '(?<="browser_download_url": ")https://[^"]*' | head -1) -wget -O gvisor-containerd-shim +LATEST_RELEASE=$(wget -qO - https://api.github.com/repos/google/gvisor-containerd-shim/releases | grep -oP '(?<="browser_download_url": ")https://[^"]*gvisor-containerd-shim.linux-amd64' | head -1) +wget -O gvisor-containerd-shim ${LATEST_RELEASE} chmod +x gvisor-containerd-shim sudo mv gvisor-containerd-shim /usr/local/bin/gvisor-containerd-shim } ``` 2. Create the configuration for the gvisor shim in - `/etc/containerd/gvisor-containerd-shim.yaml`: + `/etc/containerd/gvisor-containerd-shim.toml`: [embedmd]:# (../test/e2e/shim-install.sh shell /{ # Step 2/ /^}/) ```shell -{ # Step 2: Create the gvisor-containerd-shim.yaml -cat < Date: Mon, 28 Oct 2019 18:48:35 -0700 Subject: Deflake TestCheckpointRestore PiperOrigin-RevId: 277189064 --- test/e2e/integration_test.go | 3 +++ 1 file changed, 3 insertions(+) (limited to 'test') diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go index 7cc0de129..28064e557 100644 --- a/test/e2e/integration_test.go +++ b/test/e2e/integration_test.go @@ -175,6 +175,9 @@ func TestCheckpointRestore(t *testing.T) { t.Fatal(err) } + // TODO(b/143498576): Remove after github.com/moby/moby/issues/38963 is fixed. + time.Sleep(1 * time.Second) + if err := d.Restore("test"); err != nil { t.Fatal("docker restore failed:", err) } -- cgit v1.2.3 From 29273b03842a85bce8314799348231520ceb6e9c Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Tue, 29 Oct 2019 10:03:18 -0700 Subject: Disallow execveat on interpreter scripts with fd opened with O_CLOEXEC. When an interpreter script is opened with O_CLOEXEC and the resulting fd is passed into execveat, an ENOENT error should occur (the script would otherwise be inaccessible to the interpreter). This matches the actual behavior of Linux's execveat. PiperOrigin-RevId: 277306680 --- pkg/sentry/kernel/kernel.go | 1 + pkg/sentry/loader/loader.go | 9 +++++++++ pkg/sentry/syscalls/linux/sys_thread.go | 5 ++++- test/syscalls/linux/exec.cc | 33 +++++++++++++++++++++++++++++++++ 4 files changed, 47 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index fcfe7a16d..e64d648e2 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -812,6 +812,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, ResolveFinal: true, Filename: args.Filename, File: args.File, + CloseOnExec: false, Argv: args.Argv, Envv: args.Envv, Features: k.featureSet, diff --git a/pkg/sentry/loader/loader.go b/pkg/sentry/loader/loader.go index 818941762..f75ebe08a 100644 --- a/pkg/sentry/loader/loader.go +++ b/pkg/sentry/loader/loader.go @@ -66,6 +66,12 @@ type LoadArgs struct { // nil, then File will be loaded and Filename will be ignored. File *fs.File + // CloseOnExec indicates that the executable (or one of its parent + // directories) was opened with O_CLOEXEC. If the executable is an + // interpreter script, then cause an ENOENT error to occur, since the + // script would otherwise be inaccessible to the interpreter. + CloseOnExec bool + // Argv is the vector of arguments to pass to the executable. Argv []string @@ -279,6 +285,9 @@ func loadExecutable(ctx context.Context, args LoadArgs) (loadedELF, arch.Context d.IncRef() return loaded, ac, d, args.Argv, err case bytes.Equal(hdr[:2], []byte(interpreterScriptMagic)): + if args.CloseOnExec { + return loadedELF{}, nil, nil, nil, syserror.ENOENT + } args.Filename, args.Argv, err = parseInterpreterScript(ctx, args.Filename, args.File, args.Argv) if err != nil { ctx.Infof("Error loading interpreter script: %v", err) diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go index 2476f8858..4115116ff 100644 --- a/pkg/sentry/syscalls/linux/sys_thread.go +++ b/pkg/sentry/syscalls/linux/sys_thread.go @@ -120,6 +120,7 @@ func execveat(t *kernel.Task, dirFD int32, pathnameAddr, argvAddr, envvAddr user var wd *fs.Dirent var executable *fs.File + var closeOnExec bool if dirFD == linux.AT_FDCWD || path.IsAbs(pathname) { // Even if the pathname is absolute, we may still need the wd // for interpreter scripts if the path of the interpreter is @@ -127,11 +128,12 @@ func execveat(t *kernel.Task, dirFD int32, pathnameAddr, argvAddr, envvAddr user wd = t.FSContext().WorkingDirectory() } else { // Need to extract the given FD. - f := t.GetFile(dirFD) + f, fdFlags := t.FDTable().Get(dirFD) if f == nil { return 0, nil, syserror.EBADF } defer f.DecRef() + closeOnExec = fdFlags.CloseOnExec if atEmptyPath && len(pathname) == 0 { executable = f @@ -157,6 +159,7 @@ func execveat(t *kernel.Task, dirFD int32, pathnameAddr, argvAddr, envvAddr user ResolveFinal: resolveFinal, Filename: pathname, File: executable, + CloseOnExec: closeOnExec, Argv: argv, Envv: envv, Features: t.Arch().FeatureSet(), diff --git a/test/syscalls/linux/exec.cc b/test/syscalls/linux/exec.cc index 21a5ffd40..a9067df2a 100644 --- a/test/syscalls/linux/exec.cc +++ b/test/syscalls/linux/exec.cc @@ -681,6 +681,39 @@ TEST(ExecveatTest, SymlinkNoFollowWithNormalFile) { ArgEnvExitStatus(0, 0), ""); } +TEST(ExecveatTest, BasicWithCloexecFD) { + std::string path = WorkloadPath(kBasicWorkload); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_CLOEXEC)); + + CheckExecveat(fd.get(), "", {path}, {}, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH, + ArgEnvExitStatus(0, 0), absl::StrCat(path, "\n")); +} + +TEST(ExecveatTest, InterpreterScriptWithCloexecFD) { + std::string path = WorkloadPath(kExitScript); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_CLOEXEC)); + + int execve_errno; + ASSERT_NO_ERRNO_AND_VALUE(ForkAndExecveat(fd.get(), "", {path}, {}, + AT_EMPTY_PATH, /*child=*/nullptr, + &execve_errno)); + EXPECT_EQ(execve_errno, ENOENT); +} + +TEST(ExecveatTest, InterpreterScriptWithCloexecDirFD) { + std::string absolute_path = WorkloadPath(kExitScript); + std::string parent_dir = std::string(Dirname(absolute_path)); + std::string base = std::string(Basename(absolute_path)); + const FileDescriptor dirfd = + ASSERT_NO_ERRNO_AND_VALUE(Open(parent_dir, O_CLOEXEC | O_DIRECTORY)); + + int execve_errno; + ASSERT_NO_ERRNO_AND_VALUE(ForkAndExecveat(dirfd.get(), base, {base}, {}, + /*flags=*/0, /*child=*/nullptr, + &execve_errno)); + EXPECT_EQ(execve_errno, ENOENT); +} + TEST(ExecveatTest, InvalidFlags) { int execve_errno; ASSERT_NO_ERRNO_AND_VALUE(ForkAndExecveat( -- cgit v1.2.3 From 392c56149531c82ef3c07e2899939c0d63f0980b Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Tue, 29 Oct 2019 12:15:33 -0700 Subject: Fix PollWithFullBufferBlocks. Set the snd/rcv buffer sizes so that the test is deterministic and runs in a reasonable amount of time. It also ensures that we disable any auto-tuning of the send/receive buffer which may happen. PiperOrigin-RevId: 277337232 --- test/syscalls/linux/tcp_socket.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc index bfa031bce..277d6835a 100644 --- a/test/syscalls/linux/tcp_socket.cc +++ b/test/syscalls/linux/tcp_socket.cc @@ -394,8 +394,15 @@ TEST_P(TcpSocketTest, PollWithFullBufferBlocks) { sizeof(tcp_nodelay_flag)), SyscallSucceeds()); + // Set a 256KB send/receive buffer. + int buf_sz = 1 << 18; + EXPECT_THAT(setsockopt(t_, SOL_SOCKET, SO_RCVBUF, &buf_sz, sizeof(buf_sz)), + SyscallSucceedsWithValue(0)); + EXPECT_THAT(setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &buf_sz, sizeof(buf_sz)), + SyscallSucceedsWithValue(0)); + // Create a large buffer that will be used for sending. - std::vector buf(10 * sendbuf_size_); + std::vector buf(1 << 16); // Write until we receive an error. while (RetryEINTR(send)(s_, buf.data(), buf.size(), 0) != -1) { -- cgit v1.2.3 From 38330e93774e68324d8f43adb27178453dee18b6 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Tue, 29 Oct 2019 13:58:09 -0700 Subject: Update symlink traversal limit when resolving interpreter path. When execveat is called on an interpreter script, the symlink count for resolving the script path should be separate from the count for resolving the the corresponding interpreter. An ELOOP error should not occur if we do not hit the symlink limit along any individual path, even if the total number of symlinks encountered exceeds the limit. Closes #574 PiperOrigin-RevId: 277358474 --- pkg/sentry/loader/elf.go | 2 ++ pkg/sentry/loader/loader.go | 2 ++ test/syscalls/linux/exec.cc | 41 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+) (limited to 'test') diff --git a/pkg/sentry/loader/elf.go b/pkg/sentry/loader/elf.go index 3ea037e4d..c2c3ec06e 100644 --- a/pkg/sentry/loader/elf.go +++ b/pkg/sentry/loader/elf.go @@ -644,6 +644,8 @@ func loadELF(ctx context.Context, args LoadArgs) (loadedELF, arch.Context, error // resolved, the interpreter should still be resolved if it is // a symlink. args.ResolveFinal = true + // Refresh the traversal limit. + *args.RemainingTraversals = linux.MaxSymlinkTraversals args.Filename = bin.interpreter d, i, err := openPath(ctx, args) if err != nil { diff --git a/pkg/sentry/loader/loader.go b/pkg/sentry/loader/loader.go index 803e7d41e..b03eeb005 100644 --- a/pkg/sentry/loader/loader.go +++ b/pkg/sentry/loader/loader.go @@ -293,6 +293,8 @@ func loadExecutable(ctx context.Context, args LoadArgs) (loadedELF, arch.Context ctx.Infof("Error loading interpreter script: %v", err) return loadedELF{}, nil, nil, nil, err } + // Refresh the traversal limit for the interpreter. + *args.RemainingTraversals = linux.MaxSymlinkTraversals default: ctx.Infof("Unknown magic: %v", hdr) return loadedELF{}, nil, nil, nil, syserror.ENOEXEC diff --git a/test/syscalls/linux/exec.cc b/test/syscalls/linux/exec.cc index a9067df2a..581f03533 100644 --- a/test/syscalls/linux/exec.cc +++ b/test/syscalls/linux/exec.cc @@ -533,6 +533,47 @@ TEST(ExecTest, CloexecEventfd) { W_EXITCODE(0, 0), ""); } +constexpr int kLinuxMaxSymlinks = 40; + +TEST(ExecTest, SymlinkLimitExceeded) { + std::string path = WorkloadPath(kBasicWorkload); + + // Hold onto TempPath objects so they are not destructed prematurely. + std::vector symlinks; + for (int i = 0; i < kLinuxMaxSymlinks + 1; i++) { + symlinks.push_back( + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateSymlinkTo("/tmp", path))); + path = symlinks[i].path(); + } + + int execve_errno; + ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(path, {path}, {}, /*child=*/nullptr, &execve_errno)); + EXPECT_EQ(execve_errno, ELOOP); +} + +TEST(ExecTest, SymlinkLimitRefreshedForInterpreter) { + std::string tmp_dir = "/tmp"; + std::string interpreter_path = "/bin/echo"; + TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + tmp_dir, absl::StrCat("#!", interpreter_path), 0755)); + std::string script_path = script.path(); + + // Hold onto TempPath objects so they are not destructed prematurely. + std::vector interpreter_symlinks; + std::vector script_symlinks; + for (int i = 0; i < kLinuxMaxSymlinks; i++) { + interpreter_symlinks.push_back(ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(tmp_dir, interpreter_path))); + interpreter_path = interpreter_symlinks[i].path(); + script_symlinks.push_back(ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(tmp_dir, script_path))); + script_path = script_symlinks[i].path(); + } + + CheckExec(script_path, {script_path}, {}, ArgEnvExitStatus(0, 0), ""); +} + TEST(ExecveatTest, BasicWithFDCWD) { std::string path = WorkloadPath(kBasicWorkload); CheckExecveat(AT_FDCWD, path, {path}, {}, /*flags=*/0, ArgEnvExitStatus(0, 0), -- cgit v1.2.3 From 8bc7b8dba2dcc339ab5bd1b05c83f74a6211a7d0 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Wed, 30 Oct 2019 13:29:56 -0700 Subject: Clean up typos in test names. PiperOrigin-RevId: 277572791 --- test/syscalls/linux/socket_ip_tcp_generic.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/socket_ip_tcp_generic.cc b/test/syscalls/linux/socket_ip_tcp_generic.cc index 7e0deda05..592448289 100644 --- a/test/syscalls/linux/socket_ip_tcp_generic.cc +++ b/test/syscalls/linux/socket_ip_tcp_generic.cc @@ -30,7 +30,7 @@ namespace gvisor { namespace testing { -TEST_P(TCPSocketPairTest, TcpInfoSucceedes) { +TEST_P(TCPSocketPairTest, TcpInfoSucceeds) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); struct tcp_info opt = {}; @@ -39,7 +39,7 @@ TEST_P(TCPSocketPairTest, TcpInfoSucceedes) { SyscallSucceeds()); } -TEST_P(TCPSocketPairTest, ShortTcpInfoSucceedes) { +TEST_P(TCPSocketPairTest, ShortTcpInfoSucceeds) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); struct tcp_info opt = {}; @@ -48,7 +48,7 @@ TEST_P(TCPSocketPairTest, ShortTcpInfoSucceedes) { SyscallSucceeds()); } -TEST_P(TCPSocketPairTest, ZeroTcpInfoSucceedes) { +TEST_P(TCPSocketPairTest, ZeroTcpInfoSucceeds) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); struct tcp_info opt = {}; -- cgit v1.2.3 From db37483cb6acf55b66132d534bb734f09555b1cf Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 30 Oct 2019 15:32:20 -0700 Subject: Store endpoints inside multiPortEndpoint in a sorted order It is required to guarantee the same order of endpoints after save/restore. PiperOrigin-RevId: 277598665 --- pkg/tcpip/stack/registration.go | 3 + pkg/tcpip/stack/stack.go | 29 ++++++++ pkg/tcpip/stack/transport_demuxer.go | 10 +++ pkg/tcpip/stack/transport_test.go | 11 ++- pkg/tcpip/transport/icmp/endpoint.go | 7 ++ pkg/tcpip/transport/tcp/endpoint.go | 7 ++ pkg/tcpip/transport/udp/endpoint.go | 7 ++ runsc/boot/loader.go | 5 +- test/syscalls/linux/socket_inet_loopback.cc | 107 ++++++++++++++++++++++++++++ 9 files changed, 181 insertions(+), 5 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go index 0360187b8..94015ba54 100644 --- a/pkg/tcpip/stack/registration.go +++ b/pkg/tcpip/stack/registration.go @@ -60,6 +60,9 @@ const ( // TransportEndpoint is the interface that needs to be implemented by transport // protocol (e.g., tcp, udp) endpoints that can handle packets. type TransportEndpoint interface { + // UniqueID returns an unique ID for this transport endpoint. + UniqueID() uint64 + // HandlePacket is called by the stack when new packets arrive to // this transport endpoint. HandlePacket(r *Route, id TransportEndpointID, vv buffer.VectorisedView) diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index 6d6ddc0ff..115a6fcb8 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -22,6 +22,7 @@ package stack import ( "encoding/binary" "sync" + "sync/atomic" "time" "golang.org/x/time/rate" @@ -344,6 +345,13 @@ type ResumableEndpoint interface { Resume(*Stack) } +// uniqueIDGenerator is a default unique ID generator. +type uniqueIDGenerator uint64 + +func (u *uniqueIDGenerator) UniqueID() uint64 { + return atomic.AddUint64((*uint64)(u), 1) +} + // Stack is a networking stack, with all supported protocols, NICs, and route // table. type Stack struct { @@ -411,6 +419,14 @@ type Stack struct { // ndpDisp is the NDP event dispatcher that is used to send the netstack // integrator NDP related events. ndpDisp NDPDispatcher + + // uniqueIDGenerator is a generator of unique identifiers. + uniqueIDGenerator UniqueID +} + +// UniqueID is an abstract generator of unique identifiers. +type UniqueID interface { + UniqueID() uint64 } // Options contains optional Stack configuration. @@ -434,6 +450,9 @@ type Options struct { // stack (false). HandleLocal bool + // UniqueID is an optional generator of unique identifiers. + UniqueID UniqueID + // NDPConfigs is the default NDP configurations used by interfaces. // // By default, NDPConfigs will have a zero value for its @@ -506,6 +525,10 @@ func New(opts Options) *Stack { clock = &tcpip.StdClock{} } + if opts.UniqueID == nil { + opts.UniqueID = new(uniqueIDGenerator) + } + // Make sure opts.NDPConfigs contains valid values only. opts.NDPConfigs.validate() @@ -524,6 +547,7 @@ func New(opts Options) *Stack { portSeed: generateRandUint32(), ndpConfigs: opts.NDPConfigs, autoGenIPv6LinkLocal: opts.AutoGenIPv6LinkLocal, + uniqueIDGenerator: opts.UniqueID, ndpDisp: opts.NDPDisp, } @@ -551,6 +575,11 @@ func New(opts Options) *Stack { return s } +// UniqueID returns a unique identifier. +func (s *Stack) UniqueID() uint64 { + return s.uniqueIDGenerator.UniqueID() +} + // SetNetworkProtocolOption allows configuring individual protocol level // options. This method returns an error if the protocol is not supported or // option is not supported by the protocol implementation or the provided value diff --git a/pkg/tcpip/stack/transport_demuxer.go b/pkg/tcpip/stack/transport_demuxer.go index f633632f0..ccd3d030e 100644 --- a/pkg/tcpip/stack/transport_demuxer.go +++ b/pkg/tcpip/stack/transport_demuxer.go @@ -17,6 +17,7 @@ package stack import ( "fmt" "math/rand" + "sort" "sync" "gvisor.dev/gvisor/pkg/tcpip" @@ -310,6 +311,15 @@ func (ep *multiPortEndpoint) singleRegisterEndpoint(t TransportEndpoint, reusePo // endpointsMap. This will allow us to remove endpoint from the array fast. ep.endpointsMap[t] = len(ep.endpointsArr) ep.endpointsArr = append(ep.endpointsArr, t) + + // ep.endpointsArr is sorted by endpoint unique IDs, so that endpoints + // can be restored in the same order. + sort.Slice(ep.endpointsArr, func(i, j int) bool { + return ep.endpointsArr[i].UniqueID() < ep.endpointsArr[j].UniqueID() + }) + for i, e := range ep.endpointsArr { + ep.endpointsMap[e] = i + } return nil } diff --git a/pkg/tcpip/stack/transport_test.go b/pkg/tcpip/stack/transport_test.go index ae6fda3a9..203e79f56 100644 --- a/pkg/tcpip/stack/transport_test.go +++ b/pkg/tcpip/stack/transport_test.go @@ -43,6 +43,7 @@ type fakeTransportEndpoint struct { proto *fakeTransportProtocol peerAddr tcpip.Address route stack.Route + uniqueID uint64 // acceptQueue is non-nil iff bound. acceptQueue []fakeTransportEndpoint @@ -56,8 +57,8 @@ func (f *fakeTransportEndpoint) Stats() tcpip.EndpointStats { return nil } -func newFakeTransportEndpoint(s *stack.Stack, proto *fakeTransportProtocol, netProto tcpip.NetworkProtocolNumber) tcpip.Endpoint { - return &fakeTransportEndpoint{stack: s, TransportEndpointInfo: stack.TransportEndpointInfo{NetProto: netProto}, proto: proto} +func newFakeTransportEndpoint(s *stack.Stack, proto *fakeTransportProtocol, netProto tcpip.NetworkProtocolNumber, uniqueID uint64) tcpip.Endpoint { + return &fakeTransportEndpoint{stack: s, TransportEndpointInfo: stack.TransportEndpointInfo{NetProto: netProto}, proto: proto, uniqueID: uniqueID} } func (f *fakeTransportEndpoint) Close() { @@ -144,6 +145,10 @@ func (f *fakeTransportEndpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { return nil } +func (f *fakeTransportEndpoint) UniqueID() uint64 { + return f.uniqueID +} + func (f *fakeTransportEndpoint) ConnectEndpoint(e tcpip.Endpoint) *tcpip.Error { return nil } @@ -251,7 +256,7 @@ func (*fakeTransportProtocol) Number() tcpip.TransportProtocolNumber { } func (f *fakeTransportProtocol) NewEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, _ *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { - return newFakeTransportEndpoint(stack, f, netProto), nil + return newFakeTransportEndpoint(stack, f, netProto, stack.UniqueID()), nil } func (f *fakeTransportProtocol) NewRawEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, _ *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go index d0dd383fd..114a69b4e 100644 --- a/pkg/tcpip/transport/icmp/endpoint.go +++ b/pkg/tcpip/transport/icmp/endpoint.go @@ -58,6 +58,7 @@ type endpoint struct { // immutable. stack *stack.Stack `state:"manual"` waiterQueue *waiter.Queue + uniqueID uint64 // The following fields are used to manage the receive queue, and are // protected by rcvMu. @@ -90,9 +91,15 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProt rcvBufSizeMax: 32 * 1024, sndBufSize: 32 * 1024, state: stateInitial, + uniqueID: s.UniqueID(), }, nil } +// UniqueID implements stack.TransportEndpoint.UniqueID. +func (e *endpoint) UniqueID() uint64 { + return e.uniqueID +} + // Close puts the endpoint in a closed state and frees all resources // associated with it. func (e *endpoint) Close() { diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 8a3ca0f1b..a1efd8d55 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -287,6 +287,7 @@ type endpoint struct { // change throughout the lifetime of the endpoint. stack *stack.Stack `state:"manual"` waiterQueue *waiter.Queue `state:"wait"` + uniqueID uint64 // lastError represents the last error that the endpoint reported; // access to it is protected by the following mutex. @@ -504,6 +505,11 @@ type endpoint struct { stats Stats `state:"nosave"` } +// UniqueID implements stack.TransportEndpoint.UniqueID. +func (e *endpoint) UniqueID() uint64 { + return e.uniqueID +} + // calculateAdvertisedMSS calculates the MSS to advertise. // // If userMSS is non-zero and is not greater than the maximum possible MSS for @@ -565,6 +571,7 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue interval: 75 * time.Second, count: 9, }, + uniqueID: s.UniqueID(), } var ss SendBufferSizeOption diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index cda302bb7..68977dc25 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -80,6 +80,7 @@ type endpoint struct { // change throughout the lifetime of the endpoint. stack *stack.Stack `state:"manual"` waiterQueue *waiter.Queue + uniqueID uint64 // The following fields are used to manage the receive queue, and are // protected by rcvMu. @@ -160,9 +161,15 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue rcvBufSizeMax: 32 * 1024, sndBufSize: 32 * 1024, state: StateInitial, + uniqueID: s.UniqueID(), } } +// UniqueID implements stack.TransportEndpoint.UniqueID. +func (e *endpoint) UniqueID() uint64 { + return e.uniqueID +} + // Close puts the endpoint in a closed state and frees all resources // associated with it. func (e *endpoint) Close() { diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 0c0eba99e..86df384f8 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -232,7 +232,7 @@ func New(args Args) (*Loader, error) { // this point. Netns is configured before Run() is called. Netstack is // configured using a control uRPC message. Host network is configured inside // Run(). - networkStack, err := newEmptyNetworkStack(args.Conf, k) + networkStack, err := newEmptyNetworkStack(args.Conf, k, k) if err != nil { return nil, fmt.Errorf("creating network: %v", err) } @@ -905,7 +905,7 @@ func (l *Loader) WaitExit() kernel.ExitStatus { return l.k.GlobalInit().ExitStatus() } -func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) { +func newEmptyNetworkStack(conf *Config, clock tcpip.Clock, uniqueID stack.UniqueID) (inet.Stack, error) { switch conf.Network { case NetworkHost: return hostinet.NewStack(), nil @@ -923,6 +923,7 @@ func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) { // Enable raw sockets for users with sufficient // privileges. RawFactory: raw.EndpointFactory{}, + UniqueID: uniqueID, })} // Enable SACK Recovery. diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 322ee07ad..ab375aaaf 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -516,6 +517,112 @@ TEST_P(SocketInetReusePortTest, UdpPortReuseMultiThread) { EquivalentWithin((kConnectAttempts / kThreadCount), 0.10)); } +TEST_P(SocketInetReusePortTest, UdpPortReuseMultiThreadShort) { + auto const& param = GetParam(); + + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + sockaddr_storage listen_addr = listener.addr; + sockaddr_storage conn_addr = connector.addr; + constexpr int kThreadCount = 3; + + // TODO(b/141211329): endpointsByNic.seed has to be saved/restored. + const DisableSave ds141211329; + + // Create listening sockets. + FileDescriptor listener_fds[kThreadCount]; + for (int i = 0; i < kThreadCount; i++) { + listener_fds[i] = + ASSERT_NO_ERRNO_AND_VALUE(Socket(listener.family(), SOCK_DGRAM, 0)); + int fd = listener_fds[i].get(); + + ASSERT_THAT(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT( + bind(fd, reinterpret_cast(&listen_addr), listener.addr_len), + SyscallSucceeds()); + + // On the first bind we need to determine which port was bound. + if (i != 0) { + continue; + } + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT( + getsockname(listener_fds[0].get(), + reinterpret_cast(&listen_addr), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + ASSERT_NO_ERRNO(SetAddrPort(listener.family(), &listen_addr, port)); + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + } + + constexpr int kConnectAttempts = 10; + FileDescriptor client_fds[kConnectAttempts]; + + // Do the first run without save/restore. + DisableSave ds; + for (int i = 0; i < kConnectAttempts; i++) { + client_fds[i] = + ASSERT_NO_ERRNO_AND_VALUE(Socket(connector.family(), SOCK_DGRAM, 0)); + EXPECT_THAT(RetryEINTR(sendto)(client_fds[i].get(), &i, sizeof(i), 0, + reinterpret_cast(&conn_addr), + connector.addr_len), + SyscallSucceedsWithValue(sizeof(i))); + } + ds.reset(); + + // Check that a mapping of client and server sockets has + // not been change after save/restore. + for (int i = 0; i < kConnectAttempts; i++) { + EXPECT_THAT(RetryEINTR(sendto)(client_fds[i].get(), &i, sizeof(i), 0, + reinterpret_cast(&conn_addr), + connector.addr_len), + SyscallSucceedsWithValue(sizeof(i))); + } + + int epollfd; + ASSERT_THAT(epollfd = epoll_create1(0), SyscallSucceeds()); + + for (int i = 0; i < kThreadCount; i++) { + int fd = listener_fds[i].get(); + struct epoll_event ev; + ev.data.fd = fd; + ev.events = EPOLLIN; + ASSERT_THAT(epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev), SyscallSucceeds()); + } + + std::map portToFD; + + for (int i = 0; i < kConnectAttempts * 2; i++) { + struct sockaddr_storage addr = {}; + socklen_t addrlen = sizeof(addr); + struct epoll_event ev; + int data, fd; + + ASSERT_THAT(epoll_wait(epollfd, &ev, 1, -1), SyscallSucceedsWithValue(1)); + + fd = ev.data.fd; + EXPECT_THAT(RetryEINTR(recvfrom)(fd, &data, sizeof(data), 0, + reinterpret_cast(&addr), + &addrlen), + SyscallSucceedsWithValue(sizeof(data))); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(connector.family(), addr)); + auto prev_port = portToFD.find(port); + // Check that all packets from one client have been delivered to the same + // server socket. + if (prev_port == portToFD.end()) { + portToFD[port] = ev.data.fd; + } else { + EXPECT_EQ(portToFD[port], ev.data.fd); + } + } +} + INSTANTIATE_TEST_SUITE_P( All, SocketInetReusePortTest, ::testing::Values( -- cgit v1.2.3 From df125c986948fbbae2bc30de33213e2095762a86 Mon Sep 17 00:00:00 2001 From: Brad Burlage Date: Wed, 30 Oct 2019 16:14:30 -0700 Subject: Add Kokoro config for new runtime tests PiperOrigin-RevId: 277607217 --- kokoro/runtime_tests.cfg | 1 + scripts/runtime_tests.sh | 25 +++++++++++++++++++++++++ test/runtimes/BUILD | 10 +++++----- test/runtimes/build_defs.bzl | 39 +++++++++++++++++++++++++++------------ 4 files changed, 58 insertions(+), 17 deletions(-) create mode 100644 kokoro/runtime_tests.cfg create mode 100755 scripts/runtime_tests.sh (limited to 'test') diff --git a/kokoro/runtime_tests.cfg b/kokoro/runtime_tests.cfg new file mode 100644 index 000000000..7d56d5aca --- /dev/null +++ b/kokoro/runtime_tests.cfg @@ -0,0 +1 @@ +build_file: "repo/scripts/runtime_tests.sh" diff --git a/scripts/runtime_tests.sh b/scripts/runtime_tests.sh new file mode 100755 index 000000000..fb82b2491 --- /dev/null +++ b/scripts/runtime_tests.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# Copyright 2019 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +source $(dirname $0)/common.sh + +if [ ! -v RUNTIME ]; then + echo 'Must set $RUNTIME' >&2 + exit 1 +fi + +install_runsc_for_test runtimes +test_runsc "//test/runtimes:${RUNTIME}_test" diff --git a/test/runtimes/BUILD b/test/runtimes/BUILD index 2e125525b..367295206 100644 --- a/test/runtimes/BUILD +++ b/test/runtimes/BUILD @@ -16,32 +16,32 @@ go_binary( ) runtime_test( + name = "go1.12", blacklist_file = "blacklist_go1.12.csv", - image = "gcr.io/gvisor-presubmit/go1.12", lang = "go", ) runtime_test( + name = "java11", blacklist_file = "blacklist_java11.csv", - image = "gcr.io/gvisor-presubmit/java11", lang = "java", ) runtime_test( + name = "nodejs12.4.0", blacklist_file = "blacklist_nodejs12.4.0.csv", - image = "gcr.io/gvisor-presubmit/nodejs12.4.0", lang = "nodejs", ) runtime_test( + name = "php7.3.6", blacklist_file = "blacklist_php7.3.6.csv", - image = "gcr.io/gvisor-presubmit/php7.3.6", lang = "php", ) runtime_test( + name = "python3.7.3", blacklist_file = "blacklist_python3.7.3.csv", - image = "gcr.io/gvisor-presubmit/python3.7.3", lang = "python", ) diff --git a/test/runtimes/build_defs.bzl b/test/runtimes/build_defs.bzl index 7c11624b4..d458df1fd 100644 --- a/test/runtimes/build_defs.bzl +++ b/test/runtimes/build_defs.bzl @@ -2,32 +2,48 @@ load("@io_bazel_rules_go//go:def.bzl", "go_test") -# runtime_test is a macro that will create targets to run the given test target -# with different runtime options. def runtime_test( + name, lang, - image, + image_repo = "gcr.io/gvisor-presubmit", + image_name = None, + blacklist_file = None, shard_count = 50, - size = "enormous", - blacklist_file = ""): + size = "enormous"): + """Generates sh_test and blacklist test targets for a given runtime. + + Args: + name: The name of the runtime being tested. Typically, the lang + version. + This is used in the names of the generated test targets. + lang: The language being tested. + image_repo: The docker repository containing the proctor image to run. + i.e., the prefix to the fully qualified docker image id. + image_name: The name of the image in the image_repo. + Defaults to the test name. + blacklist_file: A test blacklist to pass to the runtime test's runner. + shard_count: See Bazel common test attributes. + size: See Bazel common test attributes. + """ + if image_name == None: + image_name = name args = [ "--lang", lang, "--image", - image, + "/".join([image_repo, image_name]), ] data = [ ":runner", ] - if blacklist_file != "": + if blacklist_file: args += ["--blacklist_file", "test/runtimes/" + blacklist_file] data += [blacklist_file] # Add a test that the blacklist parses correctly. - blacklist_test(lang, blacklist_file) + blacklist_test(name, blacklist_file) sh_test( - name = lang + "_test", + name = name + "_test", srcs = ["runner.sh"], args = args, data = data, @@ -35,15 +51,14 @@ def runtime_test( shard_count = shard_count, tags = [ # Requires docker and runsc to be configured before the test runs. - "manual", "local", ], ) -def blacklist_test(lang, blacklist_file): +def blacklist_test(name, blacklist_file): """Test that a blacklist parses correctly.""" go_test( - name = lang + "_blacklist_test", + name = name + "_blacklist_test", embed = [":runner"], srcs = ["blacklist_test.go"], args = ["--blacklist_file", "test/runtimes/" + blacklist_file], -- cgit v1.2.3 From af6af2c34131c4ec5e3195be99c1deb6a2669c06 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Fri, 1 Nov 2019 11:21:06 -0700 Subject: tests: don't use ASSERT_THAT after fork PiperOrigin-RevId: 277965624 --- test/syscalls/linux/semaphore.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/semaphore.cc b/test/syscalls/linux/semaphore.cc index 40c57f543..e9b131ca9 100644 --- a/test/syscalls/linux/semaphore.cc +++ b/test/syscalls/linux/semaphore.cc @@ -447,9 +447,8 @@ TEST(SemaphoreTest, SemCtlGetPidFork) { const pid_t child_pid = fork(); if (child_pid == 0) { - ASSERT_THAT(semctl(sem.get(), 0, SETVAL, 1), SyscallSucceeds()); - ASSERT_THAT(semctl(sem.get(), 0, GETPID), - SyscallSucceedsWithValue(getpid())); + TEST_PCHECK(semctl(sem.get(), 0, SETVAL, 1) == 0); + TEST_PCHECK(semctl(sem.get(), 0, GETPID) == getpid()); _exit(0); } -- cgit v1.2.3 From 2a709a1b7b150cae6121cb97259db5cbeb57b330 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Fri, 1 Nov 2019 11:51:50 -0700 Subject: Add "manual" tag back to runtime tests. PiperOrigin-RevId: 277971910 --- test/runtimes/build_defs.bzl | 2 ++ 1 file changed, 2 insertions(+) (limited to 'test') diff --git a/test/runtimes/build_defs.bzl b/test/runtimes/build_defs.bzl index d458df1fd..6f84ca852 100644 --- a/test/runtimes/build_defs.bzl +++ b/test/runtimes/build_defs.bzl @@ -52,6 +52,8 @@ def runtime_test( tags = [ # Requires docker and runsc to be configured before the test runs. "local", + # Don't include test target in wildcard target patterns. + "manual", ], ) -- cgit v1.2.3 From 515fee5b6d4f3270c951f72283aef79a28d463dd Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Fri, 1 Nov 2019 12:42:04 -0700 Subject: Add SO_PASSCRED support to netlink sockets Since we only supporting sending messages from the kernel, the peer is always the kernel, simplifying handling. There are currently no known users of SO_PASSCRED that would actually receive messages from gVisor, but adding full support is barely more work than stubbing out fake support. Updates #1117 Fixes #1119 PiperOrigin-RevId: 277981465 --- pkg/sentry/socket/netlink/BUILD | 1 + pkg/sentry/socket/netlink/socket.go | 76 ++++++++++++++++++- test/syscalls/linux/socket_netlink_route.cc | 110 +++++++++++++++++++++++++++- 3 files changed, 183 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD index f95803f91..79589e3c8 100644 --- a/pkg/sentry/socket/netlink/BUILD +++ b/pkg/sentry/socket/netlink/BUILD @@ -20,6 +20,7 @@ go_library( "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", "//pkg/sentry/kernel", + "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/time", "//pkg/sentry/safemem", "//pkg/sentry/socket", diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go index b2732ca29..05dac4f0a 100644 --- a/pkg/sentry/socket/netlink/socket.go +++ b/pkg/sentry/socket/netlink/socket.go @@ -27,6 +27,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/socket" @@ -61,7 +62,7 @@ var netlinkSocketDevice = device.NewAnonDevice() // This implementation only supports userspace sending and receiving messages // to/from the kernel. // -// Socket implements socket.Socket. +// Socket implements socket.Socket and transport.Credentialer. // // +stateify savable type Socket struct { @@ -104,9 +105,13 @@ type Socket struct { // sendBufferSize is the send buffer "size". We don't actually have a // fixed buffer but only consume this many bytes. sendBufferSize uint32 + + // passcred indicates if this socket wants SCM credentials. + passcred bool } var _ socket.Socket = (*Socket)(nil) +var _ transport.Credentialer = (*Socket)(nil) // NewSocket creates a new Socket. func NewSocket(t *kernel.Task, skType linux.SockType, protocol Protocol) (*Socket, *syserr.Error) { @@ -172,6 +177,22 @@ func (s *Socket) EventUnregister(e *waiter.Entry) { s.ep.EventUnregister(e) } +// Passcred implements transport.Credentialer.Passcred. +func (s *Socket) Passcred() bool { + s.mu.Lock() + passcred := s.passcred + s.mu.Unlock() + return passcred +} + +// ConnectedPasscred implements transport.Credentialer.ConnectedPasscred. +func (s *Socket) ConnectedPasscred() bool { + // This socket is connected to the kernel, which doesn't need creds. + // + // This is arbitrary, as ConnectedPasscred on this type has no callers. + return false +} + // Ioctl implements fs.FileOperations.Ioctl. func (*Socket) Ioctl(context.Context, *fs.File, usermem.IO, arch.SyscallArguments) (uintptr, error) { // TODO(b/68878065): no ioctls supported. @@ -309,9 +330,20 @@ func (s *Socket) GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem. // We don't have limit on receiving size. return int32(math.MaxInt32), nil + case linux.SO_PASSCRED: + if outLen < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + var passcred int32 + if s.Passcred() { + passcred = 1 + } + return passcred, nil + default: socket.GetSockOptEmitUnimplementedEvent(t, name) } + case linux.SOL_NETLINK: switch name { case linux.NETLINK_BROADCAST_ERROR, @@ -348,6 +380,7 @@ func (s *Socket) SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *sy s.sendBufferSize = size s.mu.Unlock() return nil + case linux.SO_RCVBUF: if len(opt) < sizeOfInt32 { return syserr.ErrInvalidArgument @@ -355,6 +388,18 @@ func (s *Socket) SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *sy // We don't have limit on receiving size. So just accept anything as // valid for compatibility. return nil + + case linux.SO_PASSCRED: + if len(opt) < sizeOfInt32 { + return syserr.ErrInvalidArgument + } + passcred := usermem.ByteOrder.Uint32(opt) + + s.mu.Lock() + s.passcred = passcred != 0 + s.mu.Unlock() + return nil + default: socket.SetSockOptEmitUnimplementedEvent(t, name) } @@ -483,6 +528,26 @@ func (s *Socket) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, _ }) } +// kernelSCM implements control.SCMCredentials with credentials that represent +// the kernel itself rather than a Task. +// +// +stateify savable +type kernelSCM struct{} + +// Equals implements transport.CredentialsControlMessage.Equals. +func (kernelSCM) Equals(oc transport.CredentialsControlMessage) bool { + _, ok := oc.(kernelSCM) + return ok +} + +// Credentials implements control.SCMCredentials.Credentials. +func (kernelSCM) Credentials(*kernel.Task) (kernel.ThreadID, auth.UID, auth.GID) { + return 0, auth.RootUID, auth.RootGID +} + +// kernelCreds is the concrete version of kernelSCM used in all creds. +var kernelCreds = &kernelSCM{} + // sendResponse sends the response messages in ms back to userspace. func (s *Socket) sendResponse(ctx context.Context, ms *MessageSet) *syserr.Error { // Linux combines multiple netlink messages into a single datagram. @@ -491,10 +556,15 @@ func (s *Socket) sendResponse(ctx context.Context, ms *MessageSet) *syserr.Error bufs = append(bufs, m.Finalize()) } + // All messages are from the kernel. + cms := transport.ControlMessages{ + Credentials: kernelCreds, + } + if len(bufs) > 0 { // RecvMsg never receives the address, so we don't need to send // one. - _, notify, err := s.connection.Send(bufs, transport.ControlMessages{}, tcpip.FullAddress{}) + _, notify, err := s.connection.Send(bufs, cms, tcpip.FullAddress{}) // If the buffer is full, we simply drop messages, just like // Linux. if err != nil && err != syserr.ErrWouldBlock { @@ -521,7 +591,7 @@ func (s *Socket) sendResponse(ctx context.Context, ms *MessageSet) *syserr.Error // Add the dump_done_errno payload. m.Put(int64(0)) - _, notify, err := s.connection.Send([][]byte{m.Finalize()}, transport.ControlMessages{}, tcpip.FullAddress{}) + _, notify, err := s.connection.Send([][]byte{m.Finalize()}, cms, tcpip.FullAddress{}) if err != nil && err != syserr.ErrWouldBlock { return err } diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc index dd4a11655..be0dadcd6 100644 --- a/test/syscalls/linux/socket_netlink_route.cc +++ b/test/syscalls/linux/socket_netlink_route.cc @@ -195,7 +195,8 @@ INSTANTIATE_TEST_SUITE_P( std::make_tuple(SO_DOMAIN, IsEqual(AF_NETLINK), absl::StrFormat("AF_NETLINK (%d)", AF_NETLINK)), std::make_tuple(SO_PROTOCOL, IsEqual(NETLINK_ROUTE), - absl::StrFormat("NETLINK_ROUTE (%d)", NETLINK_ROUTE)))); + absl::StrFormat("NETLINK_ROUTE (%d)", NETLINK_ROUTE)), + std::make_tuple(SO_PASSCRED, IsEqual(0), "0"))); // Validates the reponses to RTM_GETLINK + NLM_F_DUMP. void CheckGetLinkResponse(const struct nlmsghdr* hdr, int seq, int port) { @@ -692,6 +693,113 @@ TEST(NetlinkRouteTest, RecvmsgTruncPeek) { } while (type != NLMSG_DONE && type != NLMSG_ERROR); } +// No SCM_CREDENTIALS are received without SO_PASSCRED set. +TEST(NetlinkRouteTest, NoPasscredNoCreds) { + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket()); + + ASSERT_THAT(setsockopt(fd.get(), SOL_SOCKET, SO_PASSCRED, &kSockOptOff, + sizeof(kSockOptOff)), + SyscallSucceeds()); + + struct request { + struct nlmsghdr hdr; + struct rtgenmsg rgm; + }; + + constexpr uint32_t kSeq = 12345; + + struct request req; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETADDR; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_seq = kSeq; + req.rgm.rtgen_family = AF_UNSPEC; + + struct iovec iov = {}; + iov.iov_base = &req; + iov.iov_len = sizeof(req); + + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(sendmsg)(fd.get(), &msg, 0), SyscallSucceeds()); + + iov.iov_base = NULL; + iov.iov_len = 0; + + char control[CMSG_SPACE(sizeof(struct ucred))] = {}; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + // Note: This test assumes at least one message is returned by the + // RTM_GETADDR request. + ASSERT_THAT(RetryEINTR(recvmsg)(fd.get(), &msg, 0), SyscallSucceeds()); + + // No control messages. + EXPECT_EQ(CMSG_FIRSTHDR(&msg), nullptr); +} + +// SCM_CREDENTIALS are received with SO_PASSCRED set. +TEST(NetlinkRouteTest, PasscredCreds) { + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket()); + + ASSERT_THAT(setsockopt(fd.get(), SOL_SOCKET, SO_PASSCRED, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + struct request { + struct nlmsghdr hdr; + struct rtgenmsg rgm; + }; + + constexpr uint32_t kSeq = 12345; + + struct request req; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETADDR; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_seq = kSeq; + req.rgm.rtgen_family = AF_UNSPEC; + + struct iovec iov = {}; + iov.iov_base = &req; + iov.iov_len = sizeof(req); + + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(sendmsg)(fd.get(), &msg, 0), SyscallSucceeds()); + + iov.iov_base = NULL; + iov.iov_len = 0; + + char control[CMSG_SPACE(sizeof(struct ucred))] = {}; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + // Note: This test assumes at least one message is returned by the + // RTM_GETADDR request. + ASSERT_THAT(RetryEINTR(recvmsg)(fd.get(), &msg, 0), SyscallSucceeds()); + + struct ucred creds; + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(creds))); + ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET); + ASSERT_EQ(cmsg->cmsg_type, SCM_CREDENTIALS); + + memcpy(&creds, CMSG_DATA(cmsg), sizeof(creds)); + + // The peer is the kernel, which is "PID" 0. + EXPECT_EQ(creds.pid, 0); + // The kernel identifies as root. Also allow nobody in case this test is + // running in a userns without root mapped. + EXPECT_THAT(creds.uid, AnyOf(Eq(0), Eq(65534))); + EXPECT_THAT(creds.gid, AnyOf(Eq(0), Eq(65534))); +} + } // namespace } // namespace testing -- cgit v1.2.3 From b23b36e701c40827065217f4652a51eebc5f9913 Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Mon, 4 Nov 2019 10:06:00 -0800 Subject: Add NETLINK_KOBJECT_UEVENT socket support NETLINK_KOBJECT_UEVENT sockets send udev-style messages for device events. gVisor doesn't have any device events, so our sockets don't need to do anything once created. systemd's device manager needs to be able to create one of these sockets. It also wants to install a BPF filter on the socket. Since we'll never send any messages, the filter would never be invoked, thus we just fake it out. Fixes #1117 Updates #1119 PiperOrigin-RevId: 278405893 --- pkg/sentry/socket/netlink/provider.go | 7 ++ pkg/sentry/socket/netlink/route/protocol.go | 5 + pkg/sentry/socket/netlink/socket.go | 42 ++++++++ pkg/sentry/socket/netlink/uevent/BUILD | 17 +++ pkg/sentry/socket/netlink/uevent/protocol.go | 60 +++++++++++ runsc/boot/BUILD | 1 + runsc/boot/loader.go | 1 + test/syscalls/BUILD | 4 + test/syscalls/linux/BUILD | 29 +++++ test/syscalls/linux/socket_netdevice.cc | 3 +- test/syscalls/linux/socket_netlink.cc | 153 +++++++++++++++++++++++++++ test/syscalls/linux/socket_netlink_route.cc | 140 ++++-------------------- test/syscalls/linux/socket_netlink_uevent.cc | 83 +++++++++++++++ test/syscalls/linux/socket_netlink_util.cc | 5 +- test/syscalls/linux/socket_netlink_util.h | 5 +- 15 files changed, 431 insertions(+), 124 deletions(-) create mode 100644 pkg/sentry/socket/netlink/uevent/BUILD create mode 100644 pkg/sentry/socket/netlink/uevent/protocol.go create mode 100644 test/syscalls/linux/socket_netlink.cc create mode 100644 test/syscalls/linux/socket_netlink_uevent.cc (limited to 'test') diff --git a/pkg/sentry/socket/netlink/provider.go b/pkg/sentry/socket/netlink/provider.go index 689cad997..be005df24 100644 --- a/pkg/sentry/socket/netlink/provider.go +++ b/pkg/sentry/socket/netlink/provider.go @@ -30,6 +30,13 @@ type Protocol interface { // Protocol returns the Linux netlink protocol value. Protocol() int + // CanSend returns true if this protocol may ever send messages. + // + // TODO(gvisor.dev/issue/1119): This is a workaround to allow + // advertising support for otherwise unimplemented features on sockets + // that will never send messages, thus making those features no-ops. + CanSend() bool + // ProcessMessage processes a single message from userspace. // // If err == nil, any messages added to ms will be sent back to the diff --git a/pkg/sentry/socket/netlink/route/protocol.go b/pkg/sentry/socket/netlink/route/protocol.go index cc70ac237..6b4a0ecf4 100644 --- a/pkg/sentry/socket/netlink/route/protocol.go +++ b/pkg/sentry/socket/netlink/route/protocol.go @@ -61,6 +61,11 @@ func (p *Protocol) Protocol() int { return linux.NETLINK_ROUTE } +// CanSend implements netlink.Protocol.CanSend. +func (p *Protocol) CanSend() bool { + return true +} + // dumpLinks handles RTM_GETLINK + NLM_F_DUMP requests. func (p *Protocol) dumpLinks(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error { // NLM_F_DUMP + RTM_GETLINK messages are supposed to include an diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go index 05dac4f0a..4a1b87a9a 100644 --- a/pkg/sentry/socket/netlink/socket.go +++ b/pkg/sentry/socket/netlink/socket.go @@ -54,6 +54,8 @@ const ( maxSendBufferSize = 4 << 20 // 4MB ) +var errNoFilter = syserr.New("no filter attached", linux.ENOENT) + // netlinkSocketDevice is the netlink socket virtual device. var netlinkSocketDevice = device.NewAnonDevice() @@ -108,6 +110,12 @@ type Socket struct { // passcred indicates if this socket wants SCM credentials. passcred bool + + // filter indicates that this socket has a BPF filter "installed". + // + // TODO(gvisor.dev/issue/1119): We don't actually support filtering, + // this is just bookkeeping for tracking add/remove. + filter bool } var _ socket.Socket = (*Socket)(nil) @@ -400,6 +408,40 @@ func (s *Socket) SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *sy s.mu.Unlock() return nil + case linux.SO_ATTACH_FILTER: + // TODO(gvisor.dev/issue/1119): We don't actually + // support filtering. If this socket can't ever send + // messages, then there is nothing to filter and we can + // advertise support. Otherwise, be conservative and + // return an error. + if s.protocol.CanSend() { + socket.SetSockOptEmitUnimplementedEvent(t, name) + return syserr.ErrProtocolNotAvailable + } + + s.mu.Lock() + s.filter = true + s.mu.Unlock() + return nil + + case linux.SO_DETACH_FILTER: + // TODO(gvisor.dev/issue/1119): See above. + if s.protocol.CanSend() { + socket.SetSockOptEmitUnimplementedEvent(t, name) + return syserr.ErrProtocolNotAvailable + } + + s.mu.Lock() + filter := s.filter + s.filter = false + s.mu.Unlock() + + if !filter { + return errNoFilter + } + + return nil + default: socket.SetSockOptEmitUnimplementedEvent(t, name) } diff --git a/pkg/sentry/socket/netlink/uevent/BUILD b/pkg/sentry/socket/netlink/uevent/BUILD new file mode 100644 index 000000000..0777f3baf --- /dev/null +++ b/pkg/sentry/socket/netlink/uevent/BUILD @@ -0,0 +1,17 @@ +load("//tools/go_stateify:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "uevent", + srcs = ["protocol.go"], + importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink/uevent", + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/abi/linux", + "//pkg/sentry/context", + "//pkg/sentry/kernel", + "//pkg/sentry/socket/netlink", + "//pkg/syserr", + ], +) diff --git a/pkg/sentry/socket/netlink/uevent/protocol.go b/pkg/sentry/socket/netlink/uevent/protocol.go new file mode 100644 index 000000000..b5d7808d7 --- /dev/null +++ b/pkg/sentry/socket/netlink/uevent/protocol.go @@ -0,0 +1,60 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package uevent provides a NETLINK_KOBJECT_UEVENT socket protocol. +// +// NETLINK_KOBJECT_UEVENT sockets send udev-style device events. gVisor does +// not support any device events, so these sockets never send any messages. +package uevent + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/socket/netlink" + "gvisor.dev/gvisor/pkg/syserr" +) + +// Protocol implements netlink.Protocol. +// +// +stateify savable +type Protocol struct{} + +var _ netlink.Protocol = (*Protocol)(nil) + +// NewProtocol creates a NETLINK_KOBJECT_UEVENT netlink.Protocol. +func NewProtocol(t *kernel.Task) (netlink.Protocol, *syserr.Error) { + return &Protocol{}, nil +} + +// Protocol implements netlink.Protocol.Protocol. +func (p *Protocol) Protocol() int { + return linux.NETLINK_KOBJECT_UEVENT +} + +// CanSend implements netlink.Protocol.CanSend. +func (p *Protocol) CanSend() bool { + return false +} + +// ProcessMessage implements netlink.Protocol.ProcessMessage. +func (p *Protocol) ProcessMessage(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error { + // Silently ignore all messages. + return nil +} + +// init registers the NETLINK_KOBJECT_UEVENT provider. +func init() { + netlink.RegisterProvider(linux.NETLINK_KOBJECT_UEVENT, NewProtocol) +} diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 6fe2b57de..58e86ae7f 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -60,6 +60,7 @@ go_library( "//pkg/sentry/socket/hostinet", "//pkg/sentry/socket/netlink", "//pkg/sentry/socket/netlink/route", + "//pkg/sentry/socket/netlink/uevent", "//pkg/sentry/socket/netstack", "//pkg/sentry/socket/unix", "//pkg/sentry/state", diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 4d1bd2d08..f05d5973f 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -65,6 +65,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/hostinet" _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink" _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink/route" + _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink/uevent" "gvisor.dev/gvisor/pkg/sentry/socket/netstack" _ "gvisor.dev/gvisor/pkg/sentry/socket/unix" ) diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index a53a23afd..3e5b6b3c3 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -511,8 +511,12 @@ syscall_test(test = "//test/syscalls/linux:socket_ip_unbound_test") syscall_test(test = "//test/syscalls/linux:socket_netdevice_test") +syscall_test(test = "//test/syscalls/linux:socket_netlink_test") + syscall_test(test = "//test/syscalls/linux:socket_netlink_route_test") +syscall_test(test = "//test/syscalls/linux:socket_netlink_uevent_test") + syscall_test(test = "//test/syscalls/linux:socket_blocking_local_test") syscall_test(test = "//test/syscalls/linux:socket_blocking_ip_test") diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 833fbaa09..93bff8299 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -2675,6 +2675,20 @@ cc_binary( ], ) +cc_binary( + name = "socket_netlink_test", + testonly = 1, + srcs = ["socket_netlink.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + "//test/util:file_descriptor", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + cc_binary( name = "socket_netlink_route_test", testonly = 1, @@ -2692,6 +2706,21 @@ cc_binary( ], ) +cc_binary( + name = "socket_netlink_uevent_test", + testonly = 1, + srcs = ["socket_netlink_uevent.cc"], + linkstatic = 1, + deps = [ + ":socket_netlink_util", + ":socket_test_util", + "//test/util:file_descriptor", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + # These socket tests are in a library because the test cases are shared # across several test build targets. cc_library( diff --git a/test/syscalls/linux/socket_netdevice.cc b/test/syscalls/linux/socket_netdevice.cc index 765f8e0e4..405dbbd73 100644 --- a/test/syscalls/linux/socket_netdevice.cc +++ b/test/syscalls/linux/socket_netdevice.cc @@ -68,7 +68,8 @@ TEST(NetdeviceTest, Netmask) { // Use a netlink socket to get the netmask, which we'll then compare to the // netmask obtained via ioctl. - FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); struct request { diff --git a/test/syscalls/linux/socket_netlink.cc b/test/syscalls/linux/socket_netlink.cc new file mode 100644 index 000000000..4ec0fd4fa --- /dev/null +++ b/test/syscalls/linux/socket_netlink.cc @@ -0,0 +1,153 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +// Tests for all netlink socket protocols. + +namespace gvisor { +namespace testing { + +namespace { + +// NetlinkTest parameter is the protocol to test. +using NetlinkTest = ::testing::TestWithParam; + +// Netlink sockets must be SOCK_DGRAM or SOCK_RAW. +TEST_P(NetlinkTest, Types) { + const int protocol = GetParam(); + + EXPECT_THAT(socket(AF_NETLINK, SOCK_STREAM, protocol), + SyscallFailsWithErrno(ESOCKTNOSUPPORT)); + EXPECT_THAT(socket(AF_NETLINK, SOCK_SEQPACKET, protocol), + SyscallFailsWithErrno(ESOCKTNOSUPPORT)); + EXPECT_THAT(socket(AF_NETLINK, SOCK_RDM, protocol), + SyscallFailsWithErrno(ESOCKTNOSUPPORT)); + EXPECT_THAT(socket(AF_NETLINK, SOCK_DCCP, protocol), + SyscallFailsWithErrno(ESOCKTNOSUPPORT)); + EXPECT_THAT(socket(AF_NETLINK, SOCK_PACKET, protocol), + SyscallFailsWithErrno(ESOCKTNOSUPPORT)); + + int fd; + EXPECT_THAT(fd = socket(AF_NETLINK, SOCK_DGRAM, protocol), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + EXPECT_THAT(fd = socket(AF_NETLINK, SOCK_RAW, protocol), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + +TEST_P(NetlinkTest, AutomaticPort) { + const int protocol = GetParam(); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, protocol)); + + struct sockaddr_nl addr = {}; + addr.nl_family = AF_NETLINK; + + EXPECT_THAT( + bind(fd.get(), reinterpret_cast(&addr), sizeof(addr)), + SyscallSucceeds()); + + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(fd.get(), reinterpret_cast(&addr), + &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, sizeof(addr)); + // This is the only netlink socket in the process, so it should get the PID as + // the port id. + // + // N.B. Another process could theoretically have explicitly reserved our pid + // as a port ID, but that is very unlikely. + EXPECT_EQ(addr.nl_pid, getpid()); +} + +// Calling connect automatically binds to an automatic port. +TEST_P(NetlinkTest, ConnectBinds) { + const int protocol = GetParam(); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, protocol)); + + struct sockaddr_nl addr = {}; + addr.nl_family = AF_NETLINK; + + EXPECT_THAT(connect(fd.get(), reinterpret_cast(&addr), + sizeof(addr)), + SyscallSucceeds()); + + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(fd.get(), reinterpret_cast(&addr), + &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, sizeof(addr)); + + // Each test is running in a pid namespace, so another process can explicitly + // reserve our pid as a port ID. In this case, a negative portid value will be + // set. + if (static_cast(addr.nl_pid) > 0) { + EXPECT_EQ(addr.nl_pid, getpid()); + } + + memset(&addr, 0, sizeof(addr)); + addr.nl_family = AF_NETLINK; + + // Connecting again is allowed, but keeps the same port. + EXPECT_THAT(connect(fd.get(), reinterpret_cast(&addr), + sizeof(addr)), + SyscallSucceeds()); + + addrlen = sizeof(addr); + EXPECT_THAT(getsockname(fd.get(), reinterpret_cast(&addr), + &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, sizeof(addr)); + EXPECT_EQ(addr.nl_pid, getpid()); +} + +TEST_P(NetlinkTest, GetPeerName) { + const int protocol = GetParam(); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, protocol)); + + struct sockaddr_nl addr = {}; + socklen_t addrlen = sizeof(addr); + + EXPECT_THAT(getpeername(fd.get(), reinterpret_cast(&addr), + &addrlen), + SyscallSucceeds()); + + EXPECT_EQ(addrlen, sizeof(addr)); + EXPECT_EQ(addr.nl_family, AF_NETLINK); + // Peer is the kernel if we didn't connect elsewhere. + EXPECT_EQ(addr.nl_pid, 0); +} + +INSTANTIATE_TEST_SUITE_P(ProtocolTest, NetlinkTest, + ::testing::Values(NETLINK_ROUTE, + NETLINK_KOBJECT_UEVENT)); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc index be0dadcd6..ef567f512 100644 --- a/test/syscalls/linux/socket_netlink_route.cc +++ b/test/syscalls/linux/socket_netlink_route.cc @@ -41,112 +41,7 @@ namespace { using ::testing::AnyOf; using ::testing::Eq; -// Netlink sockets must be SOCK_DGRAM or SOCK_RAW. -TEST(NetlinkRouteTest, Types) { - EXPECT_THAT(socket(AF_NETLINK, SOCK_STREAM, NETLINK_ROUTE), - SyscallFailsWithErrno(ESOCKTNOSUPPORT)); - EXPECT_THAT(socket(AF_NETLINK, SOCK_SEQPACKET, NETLINK_ROUTE), - SyscallFailsWithErrno(ESOCKTNOSUPPORT)); - EXPECT_THAT(socket(AF_NETLINK, SOCK_RDM, NETLINK_ROUTE), - SyscallFailsWithErrno(ESOCKTNOSUPPORT)); - EXPECT_THAT(socket(AF_NETLINK, SOCK_DCCP, NETLINK_ROUTE), - SyscallFailsWithErrno(ESOCKTNOSUPPORT)); - EXPECT_THAT(socket(AF_NETLINK, SOCK_PACKET, NETLINK_ROUTE), - SyscallFailsWithErrno(ESOCKTNOSUPPORT)); - - int fd; - EXPECT_THAT(fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE), - SyscallSucceeds()); - EXPECT_THAT(close(fd), SyscallSucceeds()); - - EXPECT_THAT(fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE), - SyscallSucceeds()); - EXPECT_THAT(close(fd), SyscallSucceeds()); -} - -TEST(NetlinkRouteTest, AutomaticPort) { - FileDescriptor fd = - ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)); - - struct sockaddr_nl addr = {}; - addr.nl_family = AF_NETLINK; - - EXPECT_THAT( - bind(fd.get(), reinterpret_cast(&addr), sizeof(addr)), - SyscallSucceeds()); - - socklen_t addrlen = sizeof(addr); - EXPECT_THAT(getsockname(fd.get(), reinterpret_cast(&addr), - &addrlen), - SyscallSucceeds()); - EXPECT_EQ(addrlen, sizeof(addr)); - // This is the only netlink socket in the process, so it should get the PID as - // the port id. - // - // N.B. Another process could theoretically have explicitly reserved our pid - // as a port ID, but that is very unlikely. - EXPECT_EQ(addr.nl_pid, getpid()); -} - -// Calling connect automatically binds to an automatic port. -TEST(NetlinkRouteTest, ConnectBinds) { - FileDescriptor fd = - ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)); - - struct sockaddr_nl addr = {}; - addr.nl_family = AF_NETLINK; - - EXPECT_THAT(connect(fd.get(), reinterpret_cast(&addr), - sizeof(addr)), - SyscallSucceeds()); - - socklen_t addrlen = sizeof(addr); - EXPECT_THAT(getsockname(fd.get(), reinterpret_cast(&addr), - &addrlen), - SyscallSucceeds()); - EXPECT_EQ(addrlen, sizeof(addr)); - - // Each test is running in a pid namespace, so another process can explicitly - // reserve our pid as a port ID. In this case, a negative portid value will be - // set. - if (static_cast(addr.nl_pid) > 0) { - EXPECT_EQ(addr.nl_pid, getpid()); - } - - memset(&addr, 0, sizeof(addr)); - addr.nl_family = AF_NETLINK; - - // Connecting again is allowed, but keeps the same port. - EXPECT_THAT(connect(fd.get(), reinterpret_cast(&addr), - sizeof(addr)), - SyscallSucceeds()); - - addrlen = sizeof(addr); - EXPECT_THAT(getsockname(fd.get(), reinterpret_cast(&addr), - &addrlen), - SyscallSucceeds()); - EXPECT_EQ(addrlen, sizeof(addr)); - EXPECT_EQ(addr.nl_pid, getpid()); -} - -TEST(NetlinkRouteTest, GetPeerName) { - FileDescriptor fd = - ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)); - - struct sockaddr_nl addr = {}; - socklen_t addrlen = sizeof(addr); - - EXPECT_THAT(getpeername(fd.get(), reinterpret_cast(&addr), - &addrlen), - SyscallSucceeds()); - - EXPECT_EQ(addrlen, sizeof(addr)); - EXPECT_EQ(addr.nl_family, AF_NETLINK); - // Peer is the kernel if we didn't connect elsewhere. - EXPECT_EQ(addr.nl_pid, 0); -} - -// Parameters for GetSockOpt test. They are: +// Parameters for SockOptTest. They are: // 0: Socket option to query. // 1: A predicate to run on the returned sockopt value. Should return true if // the value is considered ok. @@ -219,7 +114,8 @@ void CheckGetLinkResponse(const struct nlmsghdr* hdr, int seq, int port) { } TEST(NetlinkRouteTest, GetLinkDump) { - FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); struct request { @@ -260,7 +156,8 @@ TEST(NetlinkRouteTest, GetLinkDump) { } TEST(NetlinkRouteTest, MsgHdrMsgUnsuppType) { - FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); struct request { struct nlmsghdr hdr; @@ -293,7 +190,8 @@ TEST(NetlinkRouteTest, MsgHdrMsgUnsuppType) { } TEST(NetlinkRouteTest, MsgHdrMsgTrunc) { - FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); struct request { struct nlmsghdr hdr; @@ -332,7 +230,8 @@ TEST(NetlinkRouteTest, MsgHdrMsgTrunc) { } TEST(NetlinkRouteTest, MsgTruncMsgHdrMsgTrunc) { - FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); struct request { struct nlmsghdr hdr; @@ -373,7 +272,8 @@ TEST(NetlinkRouteTest, MsgTruncMsgHdrMsgTrunc) { } TEST(NetlinkRouteTest, ControlMessageIgnored) { - FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); struct request { @@ -408,7 +308,8 @@ TEST(NetlinkRouteTest, ControlMessageIgnored) { } TEST(NetlinkRouteTest, GetAddrDump) { - FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); struct request { @@ -468,7 +369,8 @@ TEST(NetlinkRouteTest, LookupAll) { // GetRouteDump tests a RTM_GETROUTE + NLM_F_DUMP request. TEST(NetlinkRouteTest, GetRouteDump) { - FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); struct request { @@ -544,7 +446,8 @@ TEST(NetlinkRouteTest, GetRouteDump) { // buffer. MSG_TRUNC with a zero length buffer should consume subsequent // messages off the socket. TEST(NetlinkRouteTest, RecvmsgTrunc) { - FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); struct request { struct nlmsghdr hdr; @@ -620,7 +523,8 @@ TEST(NetlinkRouteTest, RecvmsgTrunc) { // it, so a properly sized buffer can be allocated to store the message. This // test tests that scenario. TEST(NetlinkRouteTest, RecvmsgTruncPeek) { - FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); struct request { struct nlmsghdr hdr; @@ -695,7 +599,8 @@ TEST(NetlinkRouteTest, RecvmsgTruncPeek) { // No SCM_CREDENTIALS are received without SO_PASSCRED set. TEST(NetlinkRouteTest, NoPasscredNoCreds) { - FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); ASSERT_THAT(setsockopt(fd.get(), SOL_SOCKET, SO_PASSCRED, &kSockOptOff, sizeof(kSockOptOff)), @@ -742,7 +647,8 @@ TEST(NetlinkRouteTest, NoPasscredNoCreds) { // SCM_CREDENTIALS are received with SO_PASSCRED set. TEST(NetlinkRouteTest, PasscredCreds) { - FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket()); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); ASSERT_THAT(setsockopt(fd.get(), SOL_SOCKET, SO_PASSCRED, &kSockOptOn, sizeof(kSockOptOn)), diff --git a/test/syscalls/linux/socket_netlink_uevent.cc b/test/syscalls/linux/socket_netlink_uevent.cc new file mode 100644 index 000000000..da425bed4 --- /dev/null +++ b/test/syscalls/linux/socket_netlink_uevent.cc @@ -0,0 +1,83 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_netlink_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +// Tests for NETLINK_KOBJECT_UEVENT sockets. +// +// gVisor never sends any messages on these sockets, so we don't test the events +// themselves. + +namespace gvisor { +namespace testing { + +namespace { + +// SO_PASSCRED can be enabled. Since no messages are sent in gVisor, we don't +// actually test receiving credentials. +TEST(NetlinkUeventTest, PassCred) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_KOBJECT_UEVENT)); + + EXPECT_THAT(setsockopt(fd.get(), SOL_SOCKET, SO_PASSCRED, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); +} + +// SO_DETACH_FILTER fails without a filter already installed. +TEST(NetlinkUeventTest, DetachNoFilter) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_KOBJECT_UEVENT)); + + int opt; + EXPECT_THAT( + setsockopt(fd.get(), SOL_SOCKET, SO_DETACH_FILTER, &opt, sizeof(opt)), + SyscallFailsWithErrno(ENOENT)); +} + +// We can attach a BPF filter. +TEST(NetlinkUeventTest, AttachFilter) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_KOBJECT_UEVENT)); + + // Minimal BPF program: a single ret. + struct sock_filter filter = {0x6, 0, 0, 0}; + struct sock_fprog prog = {}; + prog.len = 1; + prog.filter = &filter; + + EXPECT_THAT( + setsockopt(fd.get(), SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)), + SyscallSucceeds()); + + int opt; + EXPECT_THAT( + setsockopt(fd.get(), SOL_SOCKET, SO_DETACH_FILTER, &opt, sizeof(opt)), + SyscallSucceeds()); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_netlink_util.cc b/test/syscalls/linux/socket_netlink_util.cc index fcb8f8a88..5f05bab10 100644 --- a/test/syscalls/linux/socket_netlink_util.cc +++ b/test/syscalls/linux/socket_netlink_util.cc @@ -16,7 +16,6 @@ #include #include -#include #include @@ -27,9 +26,9 @@ namespace gvisor { namespace testing { -PosixErrorOr NetlinkBoundSocket() { +PosixErrorOr NetlinkBoundSocket(int protocol) { FileDescriptor fd; - ASSIGN_OR_RETURN_ERRNO(fd, Socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)); + ASSIGN_OR_RETURN_ERRNO(fd, Socket(AF_NETLINK, SOCK_RAW, protocol)); struct sockaddr_nl addr = {}; addr.nl_family = AF_NETLINK; diff --git a/test/syscalls/linux/socket_netlink_util.h b/test/syscalls/linux/socket_netlink_util.h index db8639a2f..da99f0d60 100644 --- a/test/syscalls/linux/socket_netlink_util.h +++ b/test/syscalls/linux/socket_netlink_util.h @@ -17,7 +17,6 @@ #include #include -#include #include "test/util/file_descriptor.h" #include "test/util/posix_error.h" @@ -25,8 +24,8 @@ namespace gvisor { namespace testing { -// Returns a bound NETLINK_ROUTE socket. -PosixErrorOr NetlinkBoundSocket(); +// Returns a bound netlink socket. +PosixErrorOr NetlinkBoundSocket(int protocol); // Returns the port ID of the passed socket. PosixErrorOr NetlinkPortID(int fd); -- cgit v1.2.3 From 493334f8b594eb1c2b0f5a6133dbedad4e0ecd32 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 4 Nov 2019 15:59:11 -0800 Subject: kokoro: run KVM syscall tests We don't know how stable they are, so let's start with warning. PiperOrigin-RevId: 278484186 --- kokoro/syscall_kvm_tests.cfg | 9 +++++++++ scripts/syscall_kvm_tests.sh | 21 +++++++++++++++++++++ test/syscalls/linux/itimer.cc | 6 ++++++ 3 files changed, 36 insertions(+) create mode 100644 kokoro/syscall_kvm_tests.cfg create mode 100755 scripts/syscall_kvm_tests.sh (limited to 'test') diff --git a/kokoro/syscall_kvm_tests.cfg b/kokoro/syscall_kvm_tests.cfg new file mode 100644 index 000000000..3b99e9c13 --- /dev/null +++ b/kokoro/syscall_kvm_tests.cfg @@ -0,0 +1,9 @@ +build_file: "repo/scripts/syscall_kvm_tests.sh" + +action { + define_artifacts { + regex: "**/sponge_log.xml" + regex: "**/sponge_log.log" + regex: "**/outputs.zip" + } +} diff --git a/scripts/syscall_kvm_tests.sh b/scripts/syscall_kvm_tests.sh new file mode 100755 index 000000000..de85daa5a --- /dev/null +++ b/scripts/syscall_kvm_tests.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Copyright 2019 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +source $(dirname $0)/common.sh + +# TODO(b/112165693): "test --test_tag_filters=runsc_kvm" can be used +# when the "manual" tag will be removed for kvm tests. +test `bazel query "attr(tags, runsc_kvm, tests(//test/syscalls/...))"` diff --git a/test/syscalls/linux/itimer.cc b/test/syscalls/linux/itimer.cc index 930d2b940..b77e4cbd1 100644 --- a/test/syscalls/linux/itimer.cc +++ b/test/syscalls/linux/itimer.cc @@ -267,6 +267,9 @@ int TestSIGPROFFairness(absl::Duration sleep) { // Random save/restore is disabled as it introduces additional latency and // unpredictable distribution patterns. TEST(ItimerTest, DeliversSIGPROFToThreadsRoughlyFairlyActive_NoRandomSave) { + // TODO(b/143247272): CPU time accounting is inaccurate for the KVM platform. + SKIP_IF(GvisorPlatform() == Platform::kKVM); + pid_t child; int execve_errno; auto kill = ASSERT_NO_ERRNO_AND_VALUE( @@ -288,6 +291,9 @@ TEST(ItimerTest, DeliversSIGPROFToThreadsRoughlyFairlyActive_NoRandomSave) { // Random save/restore is disabled as it introduces additional latency and // unpredictable distribution patterns. TEST(ItimerTest, DeliversSIGPROFToThreadsRoughlyFairlyIdle_NoRandomSave) { + // TODO(b/143247272): CPU time accounting is inaccurate for the KVM platform. + SKIP_IF(GvisorPlatform() == Platform::kKVM); + pid_t child; int execve_errno; auto kill = ASSERT_NO_ERRNO_AND_VALUE( -- cgit v1.2.3 From 57f6dbc4be5c9c5416c9d3a442eacfb797e57e9c Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 5 Nov 2019 17:02:15 -0800 Subject: test/root: check that memory accouting works as expected PiperOrigin-RevId: 278739427 --- test/root/cgroup_test.go | 54 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'test') diff --git a/test/root/cgroup_test.go b/test/root/cgroup_test.go index 76f1e4f2a..4038661cb 100644 --- a/test/root/cgroup_test.go +++ b/test/root/cgroup_test.go @@ -24,6 +24,7 @@ import ( "strconv" "strings" "testing" + "time" "gvisor.dev/gvisor/runsc/cgroup" "gvisor.dev/gvisor/runsc/dockerutil" @@ -55,6 +56,59 @@ func verifyPid(pid int, path string) error { return fmt.Errorf("got: %s, want: %d", gots, pid) } +// TestCgroup sets cgroup options and checks that cgroup was properly configured. +func TestMemCGroup(t *testing.T) { + allocMemSize := 128 << 20 + if err := dockerutil.Pull("python"); err != nil { + t.Fatal("docker pull failed:", err) + } + d := dockerutil.MakeDocker("memusage-test") + + // Start a new container and allocate the specified about of memory. + args := []string{ + "--memory=256MB", + "python", + "python", + "-c", + fmt.Sprintf("import time; s = 'a' * %d; time.sleep(100)", allocMemSize), + } + if err := d.Run(args...); err != nil { + t.Fatal("docker create failed:", err) + } + defer d.CleanUp() + + gid, err := d.ID() + if err != nil { + t.Fatalf("Docker.ID() failed: %v", err) + } + t.Logf("cgroup ID: %s", gid) + + path := filepath.Join("/sys/fs/cgroup/memory/docker", gid, "memory.usage_in_bytes") + memUsage := 0 + + // Wait when the container will allocate memory. + start := time.Now() + for time.Now().Sub(start) < 30*time.Second { + outRaw, err := ioutil.ReadFile(path) + if err != nil { + t.Fatalf("failed to read %q: %v", path, err) + } + out := strings.TrimSpace(string(outRaw)) + memUsage, err = strconv.Atoi(out) + if err != nil { + t.Fatalf("Atoi(%v): %v", out, err) + } + + if memUsage > allocMemSize { + return + } + + time.Sleep(100 * time.Millisecond) + } + + t.Fatalf("%vMB is less than %vMB: %v", memUsage>>20, allocMemSize>>20) +} + // TestCgroup sets cgroup options and checks that cgroup was properly configured. func TestCgroup(t *testing.T) { if err := dockerutil.Pull("alpine"); err != nil { -- cgit v1.2.3 From e1b21f3c8ca989dc94b25526fda1bb107691f1af Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Wed, 6 Nov 2019 14:24:38 -0800 Subject: Use PacketBuffers, rather than VectorisedViews, in netstack. PacketBuffers are analogous to Linux's sk_buff. They hold all information about a packet, headers, and payload. This is important for: * iptables to access various headers of packets * Preventing the clutter of passing different net and link headers along with VectorisedViews to packet handling functions. This change only affects the incoming packet path, and a future change will change the outgoing path. Benchmark Regular PacketBufferPtr PacketBufferConcrete -------------------------------------------------------------------------------- BM_Recvmsg 400.715MB/s 373.676MB/s 396.276MB/s BM_Sendmsg 361.832MB/s 333.003MB/s 335.571MB/s BM_Recvfrom 453.336MB/s 393.321MB/s 381.650MB/s BM_Sendto 378.052MB/s 372.134MB/s 341.342MB/s BM_SendmsgTCP/0/1k 353.711MB/s 316.216MB/s 322.747MB/s BM_SendmsgTCP/0/2k 600.681MB/s 588.776MB/s 565.050MB/s BM_SendmsgTCP/0/4k 995.301MB/s 888.808MB/s 941.888MB/s BM_SendmsgTCP/0/8k 1.517GB/s 1.274GB/s 1.345GB/s BM_SendmsgTCP/0/16k 1.872GB/s 1.586GB/s 1.698GB/s BM_SendmsgTCP/0/32k 1.017GB/s 1.020GB/s 1.133GB/s BM_SendmsgTCP/0/64k 475.626MB/s 584.587MB/s 627.027MB/s BM_SendmsgTCP/0/128k 416.371MB/s 503.434MB/s 409.850MB/s BM_SendmsgTCP/0/256k 323.449MB/s 449.599MB/s 388.852MB/s BM_SendmsgTCP/0/512k 243.992MB/s 267.676MB/s 314.474MB/s BM_SendmsgTCP/0/1M 95.138MB/s 95.874MB/s 95.417MB/s BM_SendmsgTCP/0/2M 96.261MB/s 94.977MB/s 96.005MB/s BM_SendmsgTCP/0/4M 96.512MB/s 95.978MB/s 95.370MB/s BM_SendmsgTCP/0/8M 95.603MB/s 95.541MB/s 94.935MB/s BM_SendmsgTCP/0/16M 94.598MB/s 94.696MB/s 94.521MB/s BM_SendmsgTCP/0/32M 94.006MB/s 94.671MB/s 94.768MB/s BM_SendmsgTCP/0/64M 94.133MB/s 94.333MB/s 94.746MB/s BM_SendmsgTCP/0/128M 93.615MB/s 93.497MB/s 93.573MB/s BM_SendmsgTCP/0/256M 93.241MB/s 95.100MB/s 93.272MB/s BM_SendmsgTCP/1/1k 303.644MB/s 316.074MB/s 308.430MB/s BM_SendmsgTCP/1/2k 537.093MB/s 584.962MB/s 529.020MB/s BM_SendmsgTCP/1/4k 882.362MB/s 939.087MB/s 892.285MB/s BM_SendmsgTCP/1/8k 1.272GB/s 1.394GB/s 1.296GB/s BM_SendmsgTCP/1/16k 1.802GB/s 2.019GB/s 1.830GB/s BM_SendmsgTCP/1/32k 2.084GB/s 2.173GB/s 2.156GB/s BM_SendmsgTCP/1/64k 2.515GB/s 2.463GB/s 2.473GB/s BM_SendmsgTCP/1/128k 2.811GB/s 3.004GB/s 2.946GB/s BM_SendmsgTCP/1/256k 3.008GB/s 3.159GB/s 3.171GB/s BM_SendmsgTCP/1/512k 2.980GB/s 3.150GB/s 3.126GB/s BM_SendmsgTCP/1/1M 2.165GB/s 2.233GB/s 2.163GB/s BM_SendmsgTCP/1/2M 2.370GB/s 2.219GB/s 2.453GB/s BM_SendmsgTCP/1/4M 2.005GB/s 2.091GB/s 2.214GB/s BM_SendmsgTCP/1/8M 2.111GB/s 2.013GB/s 2.109GB/s BM_SendmsgTCP/1/16M 1.902GB/s 1.868GB/s 1.897GB/s BM_SendmsgTCP/1/32M 1.655GB/s 1.665GB/s 1.635GB/s BM_SendmsgTCP/1/64M 1.575GB/s 1.547GB/s 1.575GB/s BM_SendmsgTCP/1/128M 1.524GB/s 1.584GB/s 1.580GB/s BM_SendmsgTCP/1/256M 1.579GB/s 1.607GB/s 1.593GB/s PiperOrigin-RevId: 278940079 --- pkg/tcpip/BUILD | 2 + pkg/tcpip/link/channel/channel.go | 10 ++-- pkg/tcpip/link/fdbased/endpoint.go | 4 +- pkg/tcpip/link/fdbased/endpoint_test.go | 27 ++++----- pkg/tcpip/link/fdbased/mmap.go | 5 +- pkg/tcpip/link/fdbased/packet_dispatchers.go | 18 ++++-- pkg/tcpip/link/loopback/loopback.go | 10 +++- pkg/tcpip/link/muxed/injectable.go | 4 +- pkg/tcpip/link/sharedmem/sharedmem.go | 7 ++- pkg/tcpip/link/sharedmem/sharedmem_test.go | 9 ++- pkg/tcpip/link/sniffer/sniffer.go | 12 ++-- pkg/tcpip/link/waitable/waitable.go | 4 +- pkg/tcpip/link/waitable/waitable_test.go | 8 +-- pkg/tcpip/network/arp/arp.go | 4 +- pkg/tcpip/network/arp/arp_test.go | 4 +- pkg/tcpip/network/ip_test.go | 34 ++++++++---- pkg/tcpip/network/ipv4/icmp.go | 34 +++++++----- pkg/tcpip/network/ipv4/ipv4.go | 43 +++++++++------ pkg/tcpip/network/ipv4/ipv4_test.go | 4 +- pkg/tcpip/network/ipv6/icmp.go | 48 ++++++++-------- pkg/tcpip/network/ipv6/icmp_test.go | 24 +++++--- pkg/tcpip/network/ipv6/ipv6.go | 28 ++++++---- pkg/tcpip/network/ipv6/ipv6_test.go | 8 ++- pkg/tcpip/network/ipv6/ndp_test.go | 8 ++- pkg/tcpip/packet_buffer.go | 54 ++++++++++++++++++ pkg/tcpip/packet_buffer_state.go | 26 +++++++++ pkg/tcpip/stack/ndp_test.go | 4 +- pkg/tcpip/stack/nic.go | 48 ++++++++-------- pkg/tcpip/stack/registration.go | 64 +++++++++++++--------- pkg/tcpip/stack/stack.go | 4 +- pkg/tcpip/stack/stack_test.go | 50 +++++++++++------ pkg/tcpip/stack/transport_demuxer.go | 53 +++++++++--------- pkg/tcpip/stack/transport_demuxer_test.go | 4 +- pkg/tcpip/stack/transport_test.go | 34 ++++++++---- pkg/tcpip/transport/icmp/endpoint.go | 18 +++--- pkg/tcpip/transport/icmp/protocol.go | 2 +- pkg/tcpip/transport/packet/endpoint.go | 19 ++++--- pkg/tcpip/transport/raw/endpoint.go | 17 +++--- pkg/tcpip/transport/tcp/endpoint.go | 6 +- pkg/tcpip/transport/tcp/forwarder.go | 5 +- pkg/tcpip/transport/tcp/protocol.go | 4 +- pkg/tcpip/transport/tcp/segment.go | 5 +- pkg/tcpip/transport/tcp/testing/context/context.go | 16 ++++-- pkg/tcpip/transport/udp/endpoint.go | 20 +++---- pkg/tcpip/transport/udp/forwarder.go | 9 ++- pkg/tcpip/transport/udp/protocol.go | 30 +++++----- pkg/tcpip/transport/udp/udp_test.go | 19 +++++-- test/syscalls/linux/raw_socket_icmp.cc | 2 +- 48 files changed, 542 insertions(+), 330 deletions(-) create mode 100644 pkg/tcpip/packet_buffer.go create mode 100644 pkg/tcpip/packet_buffer_state.go (limited to 'test') diff --git a/pkg/tcpip/BUILD b/pkg/tcpip/BUILD index 3c2b2b5ea..65d4d0cd8 100644 --- a/pkg/tcpip/BUILD +++ b/pkg/tcpip/BUILD @@ -6,6 +6,8 @@ package(licenses = ["notice"]) go_library( name = "tcpip", srcs = [ + "packet_buffer.go", + "packet_buffer_state.go", "tcpip.go", "time_unsafe.go", ], diff --git a/pkg/tcpip/link/channel/channel.go b/pkg/tcpip/link/channel/channel.go index 14f197a77..22eefb564 100644 --- a/pkg/tcpip/link/channel/channel.go +++ b/pkg/tcpip/link/channel/channel.go @@ -65,14 +65,14 @@ func (e *Endpoint) Drain() int { } } -// Inject injects an inbound packet. -func (e *Endpoint) Inject(protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) { - e.InjectLinkAddr(protocol, "", vv) +// InjectInbound injects an inbound packet. +func (e *Endpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) { + e.InjectLinkAddr(protocol, "", pkt) } // InjectLinkAddr injects an inbound packet with a remote link address. -func (e *Endpoint) InjectLinkAddr(protocol tcpip.NetworkProtocolNumber, remote tcpip.LinkAddress, vv buffer.VectorisedView) { - e.dispatcher.DeliverNetworkPacket(e, remote, "" /* local */, protocol, vv.Clone(nil), nil /* linkHeader */) +func (e *Endpoint) InjectLinkAddr(protocol tcpip.NetworkProtocolNumber, remote tcpip.LinkAddress, pkt tcpip.PacketBuffer) { + e.dispatcher.DeliverNetworkPacket(e, remote, "" /* local */, protocol, pkt) } // Attach saves the stack network-layer dispatcher for use later when packets diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go index ae4858529..edef7db26 100644 --- a/pkg/tcpip/link/fdbased/endpoint.go +++ b/pkg/tcpip/link/fdbased/endpoint.go @@ -598,8 +598,8 @@ func (e *InjectableEndpoint) Attach(dispatcher stack.NetworkDispatcher) { } // InjectInbound injects an inbound packet. -func (e *InjectableEndpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) { - e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, protocol, vv, nil /* linkHeader */) +func (e *InjectableEndpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) { + e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, protocol, pkt) } // NewInjectable creates a new fd-based InjectableEndpoint. diff --git a/pkg/tcpip/link/fdbased/endpoint_test.go b/pkg/tcpip/link/fdbased/endpoint_test.go index e7c05ca4f..7e08e033b 100644 --- a/pkg/tcpip/link/fdbased/endpoint_test.go +++ b/pkg/tcpip/link/fdbased/endpoint_test.go @@ -43,10 +43,9 @@ const ( ) type packetInfo struct { - raddr tcpip.LinkAddress - proto tcpip.NetworkProtocolNumber - contents buffer.VectorisedView - linkHeader buffer.View + raddr tcpip.LinkAddress + proto tcpip.NetworkProtocolNumber + contents tcpip.PacketBuffer } type context struct { @@ -93,8 +92,8 @@ func (c *context) cleanup() { syscall.Close(c.fds[1]) } -func (c *context) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote tcpip.LinkAddress, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView, linkHeader buffer.View) { - c.ch <- packetInfo{remote, protocol, vv, linkHeader} +func (c *context) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote tcpip.LinkAddress, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) { + c.ch <- packetInfo{remote, protocol, pkt} } func TestNoEthernetProperties(t *testing.T) { @@ -317,19 +316,21 @@ func TestDeliverPacket(t *testing.T) { select { case pi := <-c.ch: want := packetInfo{ - raddr: raddr, - proto: proto, - contents: buffer.View(b).ToVectorisedView(), - linkHeader: buffer.View(hdr), + raddr: raddr, + proto: proto, + contents: tcpip.PacketBuffer{ + Data: buffer.View(b).ToVectorisedView(), + LinkHeader: buffer.View(hdr), + }, } if !eth { want.proto = header.IPv4ProtocolNumber want.raddr = "" } - // want.contents will be a single view, - // so make pi do the same for the + // want.contents.Data will be a single + // view, so make pi do the same for the // DeepEqual check. - pi.contents = pi.contents.ToView().ToVectorisedView() + pi.contents.Data = pi.contents.Data.ToView().ToVectorisedView() if !reflect.DeepEqual(want, pi) { t.Fatalf("Unexpected received packet: %+v, want %+v", pi, want) } diff --git a/pkg/tcpip/link/fdbased/mmap.go b/pkg/tcpip/link/fdbased/mmap.go index 554d45715..62ed1e569 100644 --- a/pkg/tcpip/link/fdbased/mmap.go +++ b/pkg/tcpip/link/fdbased/mmap.go @@ -190,6 +190,9 @@ func (d *packetMMapDispatcher) dispatch() (bool, *tcpip.Error) { } pkt = pkt[d.e.hdrSize:] - d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, buffer.NewVectorisedView(len(pkt), []buffer.View{buffer.View(pkt)}), buffer.View(eth)) + d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, tcpip.PacketBuffer{ + Data: buffer.View(pkt).ToVectorisedView(), + LinkHeader: buffer.View(eth), + }) return true, nil } diff --git a/pkg/tcpip/link/fdbased/packet_dispatchers.go b/pkg/tcpip/link/fdbased/packet_dispatchers.go index 3331b6453..c67d684ce 100644 --- a/pkg/tcpip/link/fdbased/packet_dispatchers.go +++ b/pkg/tcpip/link/fdbased/packet_dispatchers.go @@ -139,10 +139,13 @@ func (d *readVDispatcher) dispatch() (bool, *tcpip.Error) { } used := d.capViews(n, BufConfig) - vv := buffer.NewVectorisedView(n, append([]buffer.View(nil), d.views[:used]...)) - vv.TrimFront(d.e.hdrSize) + pkt := tcpip.PacketBuffer{ + Data: buffer.NewVectorisedView(n, append([]buffer.View(nil), d.views[:used]...)), + LinkHeader: buffer.View(eth), + } + pkt.Data.TrimFront(d.e.hdrSize) - d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, vv, buffer.View(eth)) + d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, pkt) // Prepare e.views for another packet: release used views. for i := 0; i < used; i++ { @@ -293,9 +296,12 @@ func (d *recvMMsgDispatcher) dispatch() (bool, *tcpip.Error) { } used := d.capViews(k, int(n), BufConfig) - vv := buffer.NewVectorisedView(int(n), append([]buffer.View(nil), d.views[k][:used]...)) - vv.TrimFront(d.e.hdrSize) - d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, vv, buffer.View(eth)) + pkt := tcpip.PacketBuffer{ + Data: buffer.NewVectorisedView(int(n), append([]buffer.View(nil), d.views[k][:used]...)), + LinkHeader: buffer.View(eth), + } + pkt.Data.TrimFront(d.e.hdrSize) + d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, pkt) // Prepare e.views for another packet: release used views. for i := 0; i < used; i++ { diff --git a/pkg/tcpip/link/loopback/loopback.go b/pkg/tcpip/link/loopback/loopback.go index a3b48fa73..bc5d8a2f3 100644 --- a/pkg/tcpip/link/loopback/loopback.go +++ b/pkg/tcpip/link/loopback/loopback.go @@ -80,12 +80,13 @@ func (e *endpoint) WritePacket(_ *stack.Route, _ *stack.GSO, hdr buffer.Prependa views := make([]buffer.View, 1, 1+len(payload.Views())) views[0] = hdr.View() views = append(views, payload.Views()...) - vv := buffer.NewVectorisedView(len(views[0])+payload.Size(), views) // Because we're immediately turning around and writing the packet back to the // rx path, we intentionally don't preserve the remote and local link // addresses from the stack.Route we're passed. - e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, protocol, vv, nil /* linkHeader */) + e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, protocol, tcpip.PacketBuffer{ + Data: buffer.NewVectorisedView(len(views[0])+payload.Size(), views), + }) return nil } @@ -105,7 +106,10 @@ func (e *endpoint) WriteRawPacket(packet buffer.VectorisedView) *tcpip.Error { // There should be an ethernet header at the beginning of packet. linkHeader := header.Ethernet(packet.First()[:header.EthernetMinimumSize]) packet.TrimFront(len(linkHeader)) - e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, linkHeader.Type(), packet, buffer.View(linkHeader)) + e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, linkHeader.Type(), tcpip.PacketBuffer{ + Data: packet, + LinkHeader: buffer.View(linkHeader), + }) return nil } diff --git a/pkg/tcpip/link/muxed/injectable.go b/pkg/tcpip/link/muxed/injectable.go index 682b60291..9a8e8ebfe 100644 --- a/pkg/tcpip/link/muxed/injectable.go +++ b/pkg/tcpip/link/muxed/injectable.go @@ -80,8 +80,8 @@ func (m *InjectableEndpoint) IsAttached() bool { } // InjectInbound implements stack.InjectableLinkEndpoint. -func (m *InjectableEndpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) { - m.dispatcher.DeliverNetworkPacket(m, "" /* remote */, "" /* local */, protocol, vv, nil /* linkHeader */) +func (m *InjectableEndpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) { + m.dispatcher.DeliverNetworkPacket(m, "" /* remote */, "" /* local */, protocol, pkt) } // WritePackets writes outbound packets to the appropriate diff --git a/pkg/tcpip/link/sharedmem/sharedmem.go b/pkg/tcpip/link/sharedmem/sharedmem.go index 279e2b457..2bace5298 100644 --- a/pkg/tcpip/link/sharedmem/sharedmem.go +++ b/pkg/tcpip/link/sharedmem/sharedmem.go @@ -273,8 +273,11 @@ func (e *endpoint) dispatchLoop(d stack.NetworkDispatcher) { } // Send packet up the stack. - eth := header.Ethernet(b) - d.DeliverNetworkPacket(e, eth.SourceAddress(), eth.DestinationAddress(), eth.Type(), buffer.View(b[header.EthernetMinimumSize:]).ToVectorisedView(), buffer.View(eth)) + eth := header.Ethernet(b[:header.EthernetMinimumSize]) + d.DeliverNetworkPacket(e, eth.SourceAddress(), eth.DestinationAddress(), eth.Type(), tcpip.PacketBuffer{ + Data: buffer.View(b[header.EthernetMinimumSize:]).ToVectorisedView(), + LinkHeader: buffer.View(eth), + }) } // Clean state. diff --git a/pkg/tcpip/link/sharedmem/sharedmem_test.go b/pkg/tcpip/link/sharedmem/sharedmem_test.go index f3e9705c9..199406886 100644 --- a/pkg/tcpip/link/sharedmem/sharedmem_test.go +++ b/pkg/tcpip/link/sharedmem/sharedmem_test.go @@ -131,13 +131,12 @@ func newTestContext(t *testing.T, mtu, bufferSize uint32, addr tcpip.LinkAddress return c } -func (c *testContext) DeliverNetworkPacket(_ stack.LinkEndpoint, remoteLinkAddr, localLinkAddr tcpip.LinkAddress, proto tcpip.NetworkProtocolNumber, vv buffer.VectorisedView, linkHeader buffer.View) { +func (c *testContext) DeliverNetworkPacket(_ stack.LinkEndpoint, remoteLinkAddr, localLinkAddr tcpip.LinkAddress, proto tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) { c.mu.Lock() c.packets = append(c.packets, packetInfo{ - addr: remoteLinkAddr, - proto: proto, - vv: vv.Clone(nil), - linkHeader: linkHeader, + addr: remoteLinkAddr, + proto: proto, + vv: pkt.Data.Clone(nil), }) c.mu.Unlock() diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go index 39757ea2a..d71a03cd2 100644 --- a/pkg/tcpip/link/sniffer/sniffer.go +++ b/pkg/tcpip/link/sniffer/sniffer.go @@ -116,19 +116,19 @@ func NewWithFile(lower stack.LinkEndpoint, file *os.File, snapLen uint32) (stack // DeliverNetworkPacket implements the stack.NetworkDispatcher interface. It is // called by the link-layer endpoint being wrapped when a packet arrives, and // logs the packet before forwarding to the actual dispatcher. -func (e *endpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView, linkHeader buffer.View) { +func (e *endpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) { if atomic.LoadUint32(&LogPackets) == 1 && e.file == nil { - logPacket("recv", protocol, vv.First(), nil) + logPacket("recv", protocol, pkt.Data.First(), nil) } if e.file != nil && atomic.LoadUint32(&LogPacketsToFile) == 1 { - vs := vv.Views() - length := vv.Size() + vs := pkt.Data.Views() + length := pkt.Data.Size() if length > int(e.maxPCAPLen) { length = int(e.maxPCAPLen) } buf := bytes.NewBuffer(make([]byte, 0, pcapPacketHeaderLen+length)) - if err := binary.Write(buf, binary.BigEndian, newPCAPPacketHeader(uint32(length), uint32(vv.Size()))); err != nil { + if err := binary.Write(buf, binary.BigEndian, newPCAPPacketHeader(uint32(length), uint32(pkt.Data.Size()))); err != nil { panic(err) } for _, v := range vs { @@ -147,7 +147,7 @@ func (e *endpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local panic(err) } } - e.dispatcher.DeliverNetworkPacket(e, remote, local, protocol, vv, linkHeader) + e.dispatcher.DeliverNetworkPacket(e, remote, local, protocol, pkt) } // Attach implements the stack.LinkEndpoint interface. It saves the dispatcher diff --git a/pkg/tcpip/link/waitable/waitable.go b/pkg/tcpip/link/waitable/waitable.go index a04fc1062..b440970e0 100644 --- a/pkg/tcpip/link/waitable/waitable.go +++ b/pkg/tcpip/link/waitable/waitable.go @@ -50,12 +50,12 @@ func New(lower stack.LinkEndpoint) *Endpoint { // It is called by the link-layer endpoint being wrapped when a packet arrives, // and only forwards to the actual dispatcher if Wait or WaitDispatch haven't // been called. -func (e *Endpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView, linkHeader buffer.View) { +func (e *Endpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) { if !e.dispatchGate.Enter() { return } - e.dispatcher.DeliverNetworkPacket(e, remote, local, protocol, vv, linkHeader) + e.dispatcher.DeliverNetworkPacket(e, remote, local, protocol, pkt) e.dispatchGate.Leave() } diff --git a/pkg/tcpip/link/waitable/waitable_test.go b/pkg/tcpip/link/waitable/waitable_test.go index 5f0f8fa2d..df2e70e54 100644 --- a/pkg/tcpip/link/waitable/waitable_test.go +++ b/pkg/tcpip/link/waitable/waitable_test.go @@ -35,7 +35,7 @@ type countedEndpoint struct { dispatcher stack.NetworkDispatcher } -func (e *countedEndpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView, linkHeader buffer.View) { +func (e *countedEndpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) { e.dispatchCount++ } @@ -120,21 +120,21 @@ func TestWaitDispatch(t *testing.T) { } // Dispatch and check that it goes through. - ep.dispatcher.DeliverNetworkPacket(ep, "", "", 0, buffer.VectorisedView{}, buffer.View{}) + ep.dispatcher.DeliverNetworkPacket(ep, "", "", 0, tcpip.PacketBuffer{}) if want := 1; ep.dispatchCount != want { t.Fatalf("Unexpected dispatchCount: got=%v, want=%v", ep.dispatchCount, want) } // Wait on writes, then try to dispatch. It must go through. wep.WaitWrite() - ep.dispatcher.DeliverNetworkPacket(ep, "", "", 0, buffer.VectorisedView{}, buffer.View{}) + ep.dispatcher.DeliverNetworkPacket(ep, "", "", 0, tcpip.PacketBuffer{}) if want := 2; ep.dispatchCount != want { t.Fatalf("Unexpected dispatchCount: got=%v, want=%v", ep.dispatchCount, want) } // Wait on dispatches, then try to dispatch. It must not go through. wep.WaitDispatch() - ep.dispatcher.DeliverNetworkPacket(ep, "", "", 0, buffer.VectorisedView{}, buffer.View{}) + ep.dispatcher.DeliverNetworkPacket(ep, "", "", 0, tcpip.PacketBuffer{}) if want := 2; ep.dispatchCount != want { t.Fatalf("Unexpected dispatchCount: got=%v, want=%v", ep.dispatchCount, want) } diff --git a/pkg/tcpip/network/arp/arp.go b/pkg/tcpip/network/arp/arp.go index 46178459e..4161ebf87 100644 --- a/pkg/tcpip/network/arp/arp.go +++ b/pkg/tcpip/network/arp/arp.go @@ -92,8 +92,8 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, payload buffer.Vect return tcpip.ErrNotSupported } -func (e *endpoint) HandlePacket(r *stack.Route, vv buffer.VectorisedView) { - v := vv.First() +func (e *endpoint) HandlePacket(r *stack.Route, pkt tcpip.PacketBuffer) { + v := pkt.Data.First() h := header.ARP(v) if !h.IsValid() { return diff --git a/pkg/tcpip/network/arp/arp_test.go b/pkg/tcpip/network/arp/arp_test.go index 88b57ec03..47098bfdc 100644 --- a/pkg/tcpip/network/arp/arp_test.go +++ b/pkg/tcpip/network/arp/arp_test.go @@ -102,7 +102,9 @@ func TestDirectRequest(t *testing.T) { inject := func(addr tcpip.Address) { copy(h.ProtocolAddressTarget(), addr) - c.linkEP.Inject(arp.ProtocolNumber, v.ToVectorisedView()) + c.linkEP.InjectInbound(arp.ProtocolNumber, tcpip.PacketBuffer{ + Data: v.ToVectorisedView(), + }) } for i, address := range []tcpip.Address{stackAddr1, stackAddr2} { diff --git a/pkg/tcpip/network/ip_test.go b/pkg/tcpip/network/ip_test.go index 666d8b92a..fe499d47e 100644 --- a/pkg/tcpip/network/ip_test.go +++ b/pkg/tcpip/network/ip_test.go @@ -96,16 +96,16 @@ func (t *testObject) checkValues(protocol tcpip.TransportProtocolNumber, vv buff // DeliverTransportPacket is called by network endpoints after parsing incoming // packets. This is used by the test object to verify that the results of the // parsing are expected. -func (t *testObject) DeliverTransportPacket(r *stack.Route, protocol tcpip.TransportProtocolNumber, netHeader buffer.View, vv buffer.VectorisedView) { - t.checkValues(protocol, vv, r.RemoteAddress, r.LocalAddress) +func (t *testObject) DeliverTransportPacket(r *stack.Route, protocol tcpip.TransportProtocolNumber, pkt tcpip.PacketBuffer) { + t.checkValues(protocol, pkt.Data, r.RemoteAddress, r.LocalAddress) t.dataCalls++ } // DeliverTransportControlPacket is called by network endpoints after parsing // incoming control (ICMP) packets. This is used by the test object to verify // that the results of the parsing are expected. -func (t *testObject) DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ stack.ControlType, extra uint32, vv buffer.VectorisedView) { - t.checkValues(trans, vv, remote, local) +func (t *testObject) DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ stack.ControlType, extra uint32, pkt tcpip.PacketBuffer) { + t.checkValues(trans, pkt.Data, remote, local) if typ != t.typ { t.t.Errorf("typ = %v, want %v", typ, t.typ) } @@ -279,7 +279,9 @@ func TestIPv4Receive(t *testing.T) { if err != nil { t.Fatalf("could not find route: %v", err) } - ep.HandlePacket(&r, view.ToVectorisedView()) + ep.HandlePacket(&r, tcpip.PacketBuffer{ + Data: view.ToVectorisedView(), + }) if o.dataCalls != 1 { t.Fatalf("Bad number of data calls: got %x, want 1", o.dataCalls) } @@ -367,7 +369,9 @@ func TestIPv4ReceiveControl(t *testing.T) { o.extra = c.expectedExtra vv := view[:len(view)-c.trunc].ToVectorisedView() - ep.HandlePacket(&r, vv) + ep.HandlePacket(&r, tcpip.PacketBuffer{ + Data: vv, + }) if want := c.expectedCount; o.controlCalls != want { t.Fatalf("Bad number of control calls for %q case: got %v, want %v", c.name, o.controlCalls, want) } @@ -430,13 +434,17 @@ func TestIPv4FragmentationReceive(t *testing.T) { } // Send first segment. - ep.HandlePacket(&r, frag1.ToVectorisedView()) + ep.HandlePacket(&r, tcpip.PacketBuffer{ + Data: frag1.ToVectorisedView(), + }) if o.dataCalls != 0 { t.Fatalf("Bad number of data calls: got %x, want 0", o.dataCalls) } // Send second segment. - ep.HandlePacket(&r, frag2.ToVectorisedView()) + ep.HandlePacket(&r, tcpip.PacketBuffer{ + Data: frag2.ToVectorisedView(), + }) if o.dataCalls != 1 { t.Fatalf("Bad number of data calls: got %x, want 1", o.dataCalls) } @@ -509,7 +517,9 @@ func TestIPv6Receive(t *testing.T) { t.Fatalf("could not find route: %v", err) } - ep.HandlePacket(&r, view.ToVectorisedView()) + ep.HandlePacket(&r, tcpip.PacketBuffer{ + Data: view.ToVectorisedView(), + }) if o.dataCalls != 1 { t.Fatalf("Bad number of data calls: got %x, want 1", o.dataCalls) } @@ -618,12 +628,12 @@ func TestIPv6ReceiveControl(t *testing.T) { o.typ = c.expectedTyp o.extra = c.expectedExtra - vv := view[:len(view)-c.trunc].ToVectorisedView() - // Set ICMPv6 checksum. icmp.SetChecksum(header.ICMPv6Checksum(icmp, outerSrcAddr, localIpv6Addr, buffer.VectorisedView{})) - ep.HandlePacket(&r, vv) + ep.HandlePacket(&r, tcpip.PacketBuffer{ + Data: view[:len(view)-c.trunc].ToVectorisedView(), + }) if want := c.expectedCount; o.controlCalls != want { t.Fatalf("Bad number of control calls for %q case: got %v, want %v", c.name, o.controlCalls, want) } diff --git a/pkg/tcpip/network/ipv4/icmp.go b/pkg/tcpip/network/ipv4/icmp.go index 50b363dc4..ce771631c 100644 --- a/pkg/tcpip/network/ipv4/icmp.go +++ b/pkg/tcpip/network/ipv4/icmp.go @@ -15,6 +15,7 @@ package ipv4 import ( + "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/stack" @@ -24,8 +25,8 @@ import ( // the original packet that caused the ICMP one to be sent. This information is // used to find out which transport endpoint must be notified about the ICMP // packet. -func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, vv buffer.VectorisedView) { - h := header.IPv4(vv.First()) +func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt tcpip.PacketBuffer) { + h := header.IPv4(pkt.Data.First()) // We don't use IsValid() here because ICMP only requires that the IP // header plus 8 bytes of the transport header be included. So it's @@ -39,7 +40,7 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, vv buffer. } hlen := int(h.HeaderLength()) - if vv.Size() < hlen || h.FragmentOffset() != 0 { + if pkt.Data.Size() < hlen || h.FragmentOffset() != 0 { // We won't be able to handle this if it doesn't contain the // full IPv4 header, or if it's a fragment not at offset 0 // (because it won't have the transport header). @@ -47,15 +48,15 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, vv buffer. } // Skip the ip header, then deliver control message. - vv.TrimFront(hlen) + pkt.Data.TrimFront(hlen) p := h.TransportProtocol() - e.dispatcher.DeliverTransportControlPacket(e.id.LocalAddress, h.DestinationAddress(), ProtocolNumber, p, typ, extra, vv) + e.dispatcher.DeliverTransportControlPacket(e.id.LocalAddress, h.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt) } -func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, vv buffer.VectorisedView) { +func (e *endpoint) handleICMP(r *stack.Route, pkt tcpip.PacketBuffer) { stats := r.Stats() received := stats.ICMP.V4PacketsReceived - v := vv.First() + v := pkt.Data.First() if len(v) < header.ICMPv4MinimumSize { received.Invalid.Increment() return @@ -73,20 +74,23 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, vv buffer.V // checksum. We'll have to reset this before we hand the packet // off. h.SetChecksum(0) - gotChecksum := ^header.ChecksumVV(vv, 0 /* initial */) + gotChecksum := ^header.ChecksumVV(pkt.Data, 0 /* initial */) if gotChecksum != wantChecksum { // It's possible that a raw socket expects to receive this. h.SetChecksum(wantChecksum) - e.dispatcher.DeliverTransportPacket(r, header.ICMPv4ProtocolNumber, netHeader, vv) + e.dispatcher.DeliverTransportPacket(r, header.ICMPv4ProtocolNumber, pkt) received.Invalid.Increment() return } // It's possible that a raw socket expects to receive this. h.SetChecksum(wantChecksum) - e.dispatcher.DeliverTransportPacket(r, header.ICMPv4ProtocolNumber, netHeader, vv) + e.dispatcher.DeliverTransportPacket(r, header.ICMPv4ProtocolNumber, tcpip.PacketBuffer{ + Data: pkt.Data.Clone(nil), + NetworkHeader: append(buffer.View(nil), pkt.NetworkHeader...), + }) - vv := vv.Clone(nil) + vv := pkt.Data.Clone(nil) vv.TrimFront(header.ICMPv4MinimumSize) hdr := buffer.NewPrependable(int(r.MaxHeaderLength()) + header.ICMPv4MinimumSize) pkt := header.ICMPv4(hdr.Prepend(header.ICMPv4MinimumSize)) @@ -104,19 +108,19 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, vv buffer.V case header.ICMPv4EchoReply: received.EchoReply.Increment() - e.dispatcher.DeliverTransportPacket(r, header.ICMPv4ProtocolNumber, netHeader, vv) + e.dispatcher.DeliverTransportPacket(r, header.ICMPv4ProtocolNumber, pkt) case header.ICMPv4DstUnreachable: received.DstUnreachable.Increment() - vv.TrimFront(header.ICMPv4MinimumSize) + pkt.Data.TrimFront(header.ICMPv4MinimumSize) switch h.Code() { case header.ICMPv4PortUnreachable: - e.handleControl(stack.ControlPortUnreachable, 0, vv) + e.handleControl(stack.ControlPortUnreachable, 0, pkt) case header.ICMPv4FragmentationNeeded: mtu := uint32(h.MTU()) - e.handleControl(stack.ControlPacketTooBig, calculateMTU(mtu), vv) + e.handleControl(stack.ControlPacketTooBig, calculateMTU(mtu), pkt) } case header.ICMPv4SrcQuench: diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go index 1339f8474..26f1402ed 100644 --- a/pkg/tcpip/network/ipv4/ipv4.go +++ b/pkg/tcpip/network/ipv4/ipv4.go @@ -198,7 +198,7 @@ func (e *endpoint) writePacketFragments(r *stack.Route, gso *stack.GSO, hdr buff return nil } -func (e *endpoint) addIPHeader(r *stack.Route, hdr *buffer.Prependable, payloadSize int, params stack.NetworkHeaderParams) { +func (e *endpoint) addIPHeader(r *stack.Route, hdr *buffer.Prependable, payloadSize int, params stack.NetworkHeaderParams) header.IPv4 { ip := header.IPv4(hdr.Prepend(header.IPv4MinimumSize)) length := uint16(hdr.UsedLength() + payloadSize) id := uint32(0) @@ -218,19 +218,24 @@ func (e *endpoint) addIPHeader(r *stack.Route, hdr *buffer.Prependable, payloadS DstAddr: r.RemoteAddress, }) ip.SetChecksum(^ip.CalculateChecksum()) + return ip } // WritePacket writes a packet to the given destination address and protocol. func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, params stack.NetworkHeaderParams, loop stack.PacketLooping) *tcpip.Error { - e.addIPHeader(r, &hdr, payload.Size(), params) + ip := e.addIPHeader(r, &hdr, payload.Size(), params) if loop&stack.PacketLoop != 0 { views := make([]buffer.View, 1, 1+len(payload.Views())) views[0] = hdr.View() views = append(views, payload.Views()...) - vv := buffer.NewVectorisedView(len(views[0])+payload.Size(), views) loopedR := r.MakeLoopedRoute() - e.HandlePacket(&loopedR, vv) + + e.HandlePacket(&loopedR, tcpip.PacketBuffer{ + Data: buffer.NewVectorisedView(len(views[0])+payload.Size(), views), + NetworkHeader: buffer.View(ip), + }) + loopedR.Release() } if loop&stack.PacketOut == 0 { @@ -301,7 +306,10 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, payload buffer.Vect ip.SetChecksum(^ip.CalculateChecksum()) if loop&stack.PacketLoop != 0 { - e.HandlePacket(r, payload) + e.HandlePacket(r, tcpip.PacketBuffer{ + Data: payload, + NetworkHeader: buffer.View(ip), + }) } if loop&stack.PacketOut == 0 { return nil @@ -314,22 +322,23 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, payload buffer.Vect // HandlePacket is called by the link layer when new ipv4 packets arrive for // this endpoint. -func (e *endpoint) HandlePacket(r *stack.Route, vv buffer.VectorisedView) { - headerView := vv.First() +func (e *endpoint) HandlePacket(r *stack.Route, pkt tcpip.PacketBuffer) { + headerView := pkt.Data.First() h := header.IPv4(headerView) - if !h.IsValid(vv.Size()) { + if !h.IsValid(pkt.Data.Size()) { r.Stats().IP.MalformedPacketsReceived.Increment() return } + pkt.NetworkHeader = headerView[:h.HeaderLength()] hlen := int(h.HeaderLength()) tlen := int(h.TotalLength()) - vv.TrimFront(hlen) - vv.CapLength(tlen - hlen) + pkt.Data.TrimFront(hlen) + pkt.Data.CapLength(tlen - hlen) more := (h.Flags() & header.IPv4FlagMoreFragments) != 0 if more || h.FragmentOffset() != 0 { - if vv.Size() == 0 { + if pkt.Data.Size() == 0 { // Drop the packet as it's marked as a fragment but has // no payload. r.Stats().IP.MalformedPacketsReceived.Increment() @@ -337,10 +346,10 @@ func (e *endpoint) HandlePacket(r *stack.Route, vv buffer.VectorisedView) { return } // The packet is a fragment, let's try to reassemble it. - last := h.FragmentOffset() + uint16(vv.Size()) - 1 + last := h.FragmentOffset() + uint16(pkt.Data.Size()) - 1 // Drop the packet if the fragmentOffset is incorrect. i.e the - // combination of fragmentOffset and vv.size() causes a wrap - // around resulting in last being less than the offset. + // combination of fragmentOffset and pkt.Data.size() causes a + // wrap around resulting in last being less than the offset. if last < h.FragmentOffset() { r.Stats().IP.MalformedPacketsReceived.Increment() r.Stats().IP.MalformedFragmentsReceived.Increment() @@ -348,7 +357,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, vv buffer.VectorisedView) { } var ready bool var err error - vv, ready, err = e.fragmentation.Process(hash.IPv4FragmentHash(h), h.FragmentOffset(), last, more, vv) + pkt.Data, ready, err = e.fragmentation.Process(hash.IPv4FragmentHash(h), h.FragmentOffset(), last, more, pkt.Data) if err != nil { r.Stats().IP.MalformedPacketsReceived.Increment() r.Stats().IP.MalformedFragmentsReceived.Increment() @@ -361,11 +370,11 @@ func (e *endpoint) HandlePacket(r *stack.Route, vv buffer.VectorisedView) { p := h.TransportProtocol() if p == header.ICMPv4ProtocolNumber { headerView.CapLength(hlen) - e.handleICMP(r, headerView, vv) + e.handleICMP(r, pkt) return } r.Stats().IP.PacketsDelivered.Increment() - e.dispatcher.DeliverTransportPacket(r, p, headerView, vv) + e.dispatcher.DeliverTransportPacket(r, p, pkt) } // Close cleans up resources associated with the endpoint. diff --git a/pkg/tcpip/network/ipv4/ipv4_test.go b/pkg/tcpip/network/ipv4/ipv4_test.go index 99f84acd7..f100d84ee 100644 --- a/pkg/tcpip/network/ipv4/ipv4_test.go +++ b/pkg/tcpip/network/ipv4/ipv4_test.go @@ -464,7 +464,9 @@ func TestInvalidFragments(t *testing.T) { s.CreateNIC(nicid, sniffer.New(ep)) for _, pkt := range tc.packets { - ep.InjectLinkAddr(header.IPv4ProtocolNumber, remoteLinkAddr, buffer.NewVectorisedView(len(pkt), []buffer.View{pkt})) + ep.InjectLinkAddr(header.IPv4ProtocolNumber, remoteLinkAddr, tcpip.PacketBuffer{ + Data: buffer.NewVectorisedView(len(pkt), []buffer.View{pkt}), + }) } if got, want := s.Stats().IP.MalformedPacketsReceived.Value(), tc.wantMalformedIPPackets; got != want { diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go index 05e8c075b..58f8e80df 100644 --- a/pkg/tcpip/network/ipv6/icmp.go +++ b/pkg/tcpip/network/ipv6/icmp.go @@ -25,8 +25,8 @@ import ( // the original packet that caused the ICMP one to be sent. This information is // used to find out which transport endpoint must be notified about the ICMP // packet. -func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, vv buffer.VectorisedView) { - h := header.IPv6(vv.First()) +func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt tcpip.PacketBuffer) { + h := header.IPv6(pkt.Data.First()) // We don't use IsValid() here because ICMP only requires that up to // 1280 bytes of the original packet be included. So it's likely that it @@ -40,10 +40,10 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, vv buffer. // Skip the IP header, then handle the fragmentation header if there // is one. - vv.TrimFront(header.IPv6MinimumSize) + pkt.Data.TrimFront(header.IPv6MinimumSize) p := h.TransportProtocol() if p == header.IPv6FragmentHeader { - f := header.IPv6Fragment(vv.First()) + f := header.IPv6Fragment(pkt.Data.First()) if !f.IsValid() || f.FragmentOffset() != 0 { // We can't handle fragments that aren't at offset 0 // because they don't have the transport headers. @@ -52,19 +52,19 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, vv buffer. // Skip fragmentation header and find out the actual protocol // number. - vv.TrimFront(header.IPv6FragmentHeaderSize) + pkt.Data.TrimFront(header.IPv6FragmentHeaderSize) p = f.TransportProtocol() } // Deliver the control packet to the transport endpoint. - e.dispatcher.DeliverTransportControlPacket(e.id.LocalAddress, h.DestinationAddress(), ProtocolNumber, p, typ, extra, vv) + e.dispatcher.DeliverTransportControlPacket(e.id.LocalAddress, h.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt) } -func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, vv buffer.VectorisedView) { +func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, pkt tcpip.PacketBuffer) { stats := r.Stats().ICMP sent := stats.V6PacketsSent received := stats.V6PacketsReceived - v := vv.First() + v := pkt.Data.First() if len(v) < header.ICMPv6MinimumSize { received.Invalid.Increment() return @@ -77,7 +77,7 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, vv buffer.V // Only the first view in vv is accounted for by h. To account for the // rest of vv, a shallow copy is made and the first view is removed. // This copy is used as extra payload during the checksum calculation. - payload := vv + payload := pkt.Data payload.RemoveFirst() if got, want := h.Checksum(), header.ICMPv6Checksum(h, iph.SourceAddress(), iph.DestinationAddress(), payload); got != want { received.Invalid.Increment() @@ -113,9 +113,9 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, vv buffer.V received.Invalid.Increment() return } - vv.TrimFront(header.ICMPv6PacketTooBigMinimumSize) + pkt.Data.TrimFront(header.ICMPv6PacketTooBigMinimumSize) mtu := h.MTU() - e.handleControl(stack.ControlPacketTooBig, calculateMTU(mtu), vv) + e.handleControl(stack.ControlPacketTooBig, calculateMTU(mtu), pkt) case header.ICMPv6DstUnreachable: received.DstUnreachable.Increment() @@ -123,10 +123,10 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, vv buffer.V received.Invalid.Increment() return } - vv.TrimFront(header.ICMPv6DstUnreachableMinimumSize) + pkt.Data.TrimFront(header.ICMPv6DstUnreachableMinimumSize) switch h.Code() { case header.ICMPv6PortUnreachable: - e.handleControl(stack.ControlPortUnreachable, 0, vv) + e.handleControl(stack.ControlPortUnreachable, 0, pkt) } case header.ICMPv6NeighborSolicit: @@ -189,9 +189,9 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, vv buffer.V header.NDPTargetLinkLayerAddressOption(r.LocalLinkAddress[:]), } hdr := buffer.NewPrependable(int(r.MaxHeaderLength()) + header.ICMPv6NeighborAdvertMinimumSize + int(optsSerializer.Length())) - pkt := header.ICMPv6(hdr.Prepend(header.ICMPv6NeighborAdvertSize)) - pkt.SetType(header.ICMPv6NeighborAdvert) - na := header.NDPNeighborAdvert(pkt.NDPPayload()) + packet := header.ICMPv6(hdr.Prepend(header.ICMPv6NeighborAdvertSize)) + packet.SetType(header.ICMPv6NeighborAdvert) + na := header.NDPNeighborAdvert(packet.NDPPayload()) na.SetSolicitedFlag(true) na.SetOverrideFlag(true) na.SetTargetAddress(targetAddr) @@ -209,7 +209,7 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, vv buffer.V r := r.Clone() defer r.Release() r.LocalAddress = targetAddr - pkt.SetChecksum(header.ICMPv6Checksum(pkt, r.LocalAddress, r.RemoteAddress, buffer.VectorisedView{})) + packet.SetChecksum(header.ICMPv6Checksum(packet, r.LocalAddress, r.RemoteAddress, buffer.VectorisedView{})) // TODO(tamird/ghanan): there exists an explicit NDP option that is // used to update the neighbor table with link addresses for a @@ -285,13 +285,13 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, vv buffer.V received.Invalid.Increment() return } - vv.TrimFront(header.ICMPv6EchoMinimumSize) + pkt.Data.TrimFront(header.ICMPv6EchoMinimumSize) hdr := buffer.NewPrependable(int(r.MaxHeaderLength()) + header.ICMPv6EchoMinimumSize) - pkt := header.ICMPv6(hdr.Prepend(header.ICMPv6EchoMinimumSize)) - copy(pkt, h) - pkt.SetType(header.ICMPv6EchoReply) - pkt.SetChecksum(header.ICMPv6Checksum(pkt, r.LocalAddress, r.RemoteAddress, vv)) - if err := r.WritePacket(nil /* gso */, hdr, vv, stack.NetworkHeaderParams{Protocol: header.ICMPv6ProtocolNumber, TTL: r.DefaultTTL(), TOS: stack.DefaultTOS}); err != nil { + packet := header.ICMPv6(hdr.Prepend(header.ICMPv6EchoMinimumSize)) + copy(packet, h) + packet.SetType(header.ICMPv6EchoReply) + packet.SetChecksum(header.ICMPv6Checksum(packet, r.LocalAddress, r.RemoteAddress, pkt.Data)) + if err := r.WritePacket(nil /* gso */, hdr, pkt.Data, stack.NetworkHeaderParams{Protocol: header.ICMPv6ProtocolNumber, TTL: r.DefaultTTL(), TOS: stack.DefaultTOS}); err != nil { sent.Dropped.Increment() return } @@ -303,7 +303,7 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, vv buffer.V received.Invalid.Increment() return } - e.dispatcher.DeliverTransportPacket(r, header.ICMPv6ProtocolNumber, netHeader, vv) + e.dispatcher.DeliverTransportPacket(r, header.ICMPv6ProtocolNumber, pkt) case header.ICMPv6TimeExceeded: received.TimeExceeded.Increment() diff --git a/pkg/tcpip/network/ipv6/icmp_test.go b/pkg/tcpip/network/ipv6/icmp_test.go index d686f79ce..6037a1ef8 100644 --- a/pkg/tcpip/network/ipv6/icmp_test.go +++ b/pkg/tcpip/network/ipv6/icmp_test.go @@ -65,7 +65,7 @@ type stubDispatcher struct { stack.TransportDispatcher } -func (*stubDispatcher) DeliverTransportPacket(*stack.Route, tcpip.TransportProtocolNumber, buffer.View, buffer.VectorisedView) { +func (*stubDispatcher) DeliverTransportPacket(*stack.Route, tcpip.TransportProtocolNumber, tcpip.PacketBuffer) { } type stubLinkAddressCache struct { @@ -147,7 +147,9 @@ func TestICMPCounts(t *testing.T) { SrcAddr: r.LocalAddress, DstAddr: r.RemoteAddress, }) - ep.HandlePacket(&r, hdr.View().ToVectorisedView()) + ep.HandlePacket(&r, tcpip.PacketBuffer{ + Data: hdr.View().ToVectorisedView(), + }) } for _, typ := range types { @@ -280,7 +282,9 @@ func routeICMPv6Packet(t *testing.T, args routeArgs, fn func(*testing.T, header. views := []buffer.View{pkt.Header, pkt.Payload} size := len(pkt.Header) + len(pkt.Payload) vv := buffer.NewVectorisedView(size, views) - args.dst.InjectLinkAddr(pkt.Proto, args.dst.LinkAddress(), vv) + args.dst.InjectLinkAddr(pkt.Proto, args.dst.LinkAddress(), tcpip.PacketBuffer{ + Data: vv, + }) } if pkt.Proto != ProtocolNumber { @@ -498,7 +502,9 @@ func TestICMPChecksumValidationSimple(t *testing.T) { SrcAddr: lladdr1, DstAddr: lladdr0, }) - e.Inject(ProtocolNumber, hdr.View().ToVectorisedView()) + e.InjectInbound(ProtocolNumber, tcpip.PacketBuffer{ + Data: hdr.View().ToVectorisedView(), + }) } stats := s.Stats().ICMP.V6PacketsReceived @@ -673,7 +679,9 @@ func TestICMPChecksumValidationWithPayload(t *testing.T) { SrcAddr: lladdr1, DstAddr: lladdr0, }) - e.Inject(ProtocolNumber, hdr.View().ToVectorisedView()) + e.InjectInbound(ProtocolNumber, tcpip.PacketBuffer{ + Data: hdr.View().ToVectorisedView(), + }) } stats := s.Stats().ICMP.V6PacketsReceived @@ -849,9 +857,9 @@ func TestICMPChecksumValidationWithPayloadMultipleViews(t *testing.T) { SrcAddr: lladdr1, DstAddr: lladdr0, }) - e.Inject(ProtocolNumber, - buffer.NewVectorisedView(header.IPv6MinimumSize+size+payloadSize, - []buffer.View{hdr.View(), payload})) + e.InjectInbound(ProtocolNumber, tcpip.PacketBuffer{ + Data: buffer.NewVectorisedView(header.IPv6MinimumSize+size+payloadSize, []buffer.View{hdr.View(), payload}), + }) } stats := s.Stats().ICMP.V6PacketsReceived diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go index 5898f8f9e..805d1739c 100644 --- a/pkg/tcpip/network/ipv6/ipv6.go +++ b/pkg/tcpip/network/ipv6/ipv6.go @@ -97,7 +97,7 @@ func (e *endpoint) GSOMaxSize() uint32 { return 0 } -func (e *endpoint) addIPHeader(r *stack.Route, hdr *buffer.Prependable, payloadSize int, params stack.NetworkHeaderParams) { +func (e *endpoint) addIPHeader(r *stack.Route, hdr *buffer.Prependable, payloadSize int, params stack.NetworkHeaderParams) header.IPv6 { length := uint16(hdr.UsedLength() + payloadSize) ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize)) ip.Encode(&header.IPv6Fields{ @@ -108,19 +108,24 @@ func (e *endpoint) addIPHeader(r *stack.Route, hdr *buffer.Prependable, payloadS SrcAddr: r.LocalAddress, DstAddr: r.RemoteAddress, }) + return ip } // WritePacket writes a packet to the given destination address and protocol. func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, params stack.NetworkHeaderParams, loop stack.PacketLooping) *tcpip.Error { - e.addIPHeader(r, &hdr, payload.Size(), params) + ip := e.addIPHeader(r, &hdr, payload.Size(), params) if loop&stack.PacketLoop != 0 { views := make([]buffer.View, 1, 1+len(payload.Views())) views[0] = hdr.View() views = append(views, payload.Views()...) - vv := buffer.NewVectorisedView(len(views[0])+payload.Size(), views) loopedR := r.MakeLoopedRoute() - e.HandlePacket(&loopedR, vv) + + e.HandlePacket(&loopedR, tcpip.PacketBuffer{ + Data: buffer.NewVectorisedView(len(views[0])+payload.Size(), views), + NetworkHeader: buffer.View(ip), + }) + loopedR.Release() } if loop&stack.PacketOut == 0 { @@ -160,24 +165,25 @@ func (*endpoint) WriteHeaderIncludedPacket(r *stack.Route, payload buffer.Vector // HandlePacket is called by the link layer when new ipv6 packets arrive for // this endpoint. -func (e *endpoint) HandlePacket(r *stack.Route, vv buffer.VectorisedView) { - headerView := vv.First() +func (e *endpoint) HandlePacket(r *stack.Route, pkt tcpip.PacketBuffer) { + headerView := pkt.Data.First() h := header.IPv6(headerView) - if !h.IsValid(vv.Size()) { + if !h.IsValid(pkt.Data.Size()) { return } - vv.TrimFront(header.IPv6MinimumSize) - vv.CapLength(int(h.PayloadLength())) + pkt.NetworkHeader = headerView[:header.IPv6MinimumSize] + pkt.Data.TrimFront(header.IPv6MinimumSize) + pkt.Data.CapLength(int(h.PayloadLength())) p := h.TransportProtocol() if p == header.ICMPv6ProtocolNumber { - e.handleICMP(r, headerView, vv) + e.handleICMP(r, headerView, pkt) return } r.Stats().IP.PacketsDelivered.Increment() - e.dispatcher.DeliverTransportPacket(r, p, headerView, vv) + e.dispatcher.DeliverTransportPacket(r, p, pkt) } // Close cleans up resources associated with the endpoint. diff --git a/pkg/tcpip/network/ipv6/ipv6_test.go b/pkg/tcpip/network/ipv6/ipv6_test.go index deaa9b7f3..1cbfa7278 100644 --- a/pkg/tcpip/network/ipv6/ipv6_test.go +++ b/pkg/tcpip/network/ipv6/ipv6_test.go @@ -55,7 +55,9 @@ func testReceiveICMP(t *testing.T, s *stack.Stack, e *channel.Endpoint, src, dst DstAddr: dst, }) - e.Inject(ProtocolNumber, hdr.View().ToVectorisedView()) + e.InjectInbound(ProtocolNumber, tcpip.PacketBuffer{ + Data: hdr.View().ToVectorisedView(), + }) stats := s.Stats().ICMP.V6PacketsReceived @@ -111,7 +113,9 @@ func testReceiveUDP(t *testing.T, s *stack.Stack, e *channel.Endpoint, src, dst DstAddr: dst, }) - e.Inject(ProtocolNumber, hdr.View().ToVectorisedView()) + e.InjectInbound(ProtocolNumber, tcpip.PacketBuffer{ + Data: hdr.View().ToVectorisedView(), + }) stat := s.Stats().UDP.PacketsReceived diff --git a/pkg/tcpip/network/ipv6/ndp_test.go b/pkg/tcpip/network/ipv6/ndp_test.go index 69ab7ba12..0dbce14a0 100644 --- a/pkg/tcpip/network/ipv6/ndp_test.go +++ b/pkg/tcpip/network/ipv6/ndp_test.go @@ -98,7 +98,9 @@ func TestHopLimitValidation(t *testing.T) { SrcAddr: r.LocalAddress, DstAddr: r.RemoteAddress, }) - ep.HandlePacket(r, hdr.View().ToVectorisedView()) + ep.HandlePacket(r, tcpip.PacketBuffer{ + Data: hdr.View().ToVectorisedView(), + }) } types := []struct { @@ -345,7 +347,9 @@ func TestRouterAdvertValidation(t *testing.T) { t.Fatalf("got rxRA = %d, want = 0", got) } - e.Inject(header.IPv6ProtocolNumber, hdr.View().ToVectorisedView()) + e.InjectInbound(header.IPv6ProtocolNumber, tcpip.PacketBuffer{ + Data: hdr.View().ToVectorisedView(), + }) if test.expectedSuccess { if got := invalid.Value(); got != 0 { diff --git a/pkg/tcpip/packet_buffer.go b/pkg/tcpip/packet_buffer.go new file mode 100644 index 000000000..10b04239d --- /dev/null +++ b/pkg/tcpip/packet_buffer.go @@ -0,0 +1,54 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at // +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcpip + +import "gvisor.dev/gvisor/pkg/tcpip/buffer" + +// A PacketBuffer contains all the data of a network packet. +// +// As a PacketBuffer traverses up the stack, it may be necessary to pass it to +// multiple endpoints. Clone() should be called in such cases so that +// modifications to the Data field do not affect other copies. +// +// +stateify savable +type PacketBuffer struct { + // Data holds the payload of the packet. For inbound packets, it also + // holds the headers, which are consumed as the packet moves up the + // stack. Headers are guaranteed not to be split across views. + // + // The bytes backing Data are immutable, but Data itself may be trimmed + // or otherwise modified. + Data buffer.VectorisedView + + // The bytes backing these views are immutable. Each field may be nil + // if either it has not been set yet or no such header exists (e.g. + // packets sent via loopback may not have a link header). + // + // These fields may be Views into other Views. SR dosen't support this, + // so deep copies are necessary in some cases. + LinkHeader buffer.View + NetworkHeader buffer.View + TransportHeader buffer.View +} + +// Clone makes a copy of pk. It clones the Data field, which creates a new +// VectorisedView but does not deep copy the underlying bytes. +func (pk PacketBuffer) Clone() PacketBuffer { + return PacketBuffer{ + Data: pk.Data.Clone(nil), + LinkHeader: pk.LinkHeader, + NetworkHeader: pk.NetworkHeader, + TransportHeader: pk.TransportHeader, + } +} diff --git a/pkg/tcpip/packet_buffer_state.go b/pkg/tcpip/packet_buffer_state.go new file mode 100644 index 000000000..04c4cf136 --- /dev/null +++ b/pkg/tcpip/packet_buffer_state.go @@ -0,0 +1,26 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcpip + +import "gvisor.dev/gvisor/pkg/tcpip/buffer" + +// beforeSave is invoked by stateify. +func (pk *PacketBuffer) beforeSave() { + // Non-Data fields may be slices of the Data field. This causes + // problems for SR, so during save we make each header independent. + pk.LinkHeader = append(buffer.View(nil), pk.LinkHeader...) + pk.NetworkHeader = append(buffer.View(nil), pk.NetworkHeader...) + pk.TransportHeader = append(buffer.View(nil), pk.TransportHeader...) +} diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go index 525a25218..cc789b5af 100644 --- a/pkg/tcpip/stack/ndp_test.go +++ b/pkg/tcpip/stack/ndp_test.go @@ -328,7 +328,9 @@ func TestDADFail(t *testing.T) { // Receive a packet to simulate multiple nodes owning or // attempting to own the same address. hdr := test.makeBuf(addr1) - e.Inject(header.IPv6ProtocolNumber, hdr.View().ToVectorisedView()) + e.InjectInbound(header.IPv6ProtocolNumber, tcpip.PacketBuffer{ + Data: hdr.View().ToVectorisedView(), + }) stat := test.getStat(s.Stats().ICMP.V6PacketsReceived) if got := stat.Value(); got != 1 { diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index 12969c74e..28a28ae6e 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -723,10 +723,10 @@ func (n *NIC) leaveGroupLocked(addr tcpip.Address) *tcpip.Error { return nil } -func handlePacket(protocol tcpip.NetworkProtocolNumber, dst, src tcpip.Address, localLinkAddr, remotelinkAddr tcpip.LinkAddress, ref *referencedNetworkEndpoint, vv buffer.VectorisedView) { +func handlePacket(protocol tcpip.NetworkProtocolNumber, dst, src tcpip.Address, localLinkAddr, remotelinkAddr tcpip.LinkAddress, ref *referencedNetworkEndpoint, pkt tcpip.PacketBuffer) { r := makeRoute(protocol, dst, src, localLinkAddr, ref, false /* handleLocal */, false /* multicastLoop */) r.RemoteLinkAddress = remotelinkAddr - ref.ep.HandlePacket(&r, vv) + ref.ep.HandlePacket(&r, pkt) ref.decRef() } @@ -736,9 +736,9 @@ func handlePacket(protocol tcpip.NetworkProtocolNumber, dst, src tcpip.Address, // Note that the ownership of the slice backing vv is retained by the caller. // This rule applies only to the slice itself, not to the items of the slice; // the ownership of the items is not retained by the caller. -func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView, linkHeader buffer.View) { +func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) { n.stats.Rx.Packets.Increment() - n.stats.Rx.Bytes.IncrementBy(uint64(vv.Size())) + n.stats.Rx.Bytes.IncrementBy(uint64(pkt.Data.Size())) netProto, ok := n.stack.networkProtocols[protocol] if !ok { @@ -763,22 +763,22 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.Link } n.mu.RUnlock() for _, ep := range packetEPs { - ep.HandlePacket(n.id, local, protocol, vv.Clone(nil), linkHeader) + ep.HandlePacket(n.id, local, protocol, pkt.Clone()) } if netProto.Number() == header.IPv4ProtocolNumber || netProto.Number() == header.IPv6ProtocolNumber { n.stack.stats.IP.PacketsReceived.Increment() } - if len(vv.First()) < netProto.MinimumPacketSize() { + if len(pkt.Data.First()) < netProto.MinimumPacketSize() { n.stack.stats.MalformedRcvdPackets.Increment() return } - src, dst := netProto.ParseAddresses(vv.First()) + src, dst := netProto.ParseAddresses(pkt.Data.First()) if ref := n.getRef(protocol, dst); ref != nil { - handlePacket(protocol, dst, src, linkEP.LinkAddress(), remote, ref, vv) + handlePacket(protocol, dst, src, linkEP.LinkAddress(), remote, ref, pkt) return } @@ -806,20 +806,20 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.Link if ok { r.RemoteAddress = src // TODO(b/123449044): Update the source NIC as well. - ref.ep.HandlePacket(&r, vv) + ref.ep.HandlePacket(&r, pkt) ref.decRef() } else { // n doesn't have a destination endpoint. // Send the packet out of n. - hdr := buffer.NewPrependableFromView(vv.First()) - vv.RemoveFirst() + hdr := buffer.NewPrependableFromView(pkt.Data.First()) + pkt.Data.RemoveFirst() // TODO(b/128629022): use route.WritePacket. - if err := n.linkEP.WritePacket(&r, nil /* gso */, hdr, vv, protocol); err != nil { + if err := n.linkEP.WritePacket(&r, nil /* gso */, hdr, pkt.Data, protocol); err != nil { r.Stats().IP.OutgoingPacketErrors.Increment() } else { n.stats.Tx.Packets.Increment() - n.stats.Tx.Bytes.IncrementBy(uint64(hdr.UsedLength() + vv.Size())) + n.stats.Tx.Bytes.IncrementBy(uint64(hdr.UsedLength() + pkt.Data.Size())) } } return @@ -833,7 +833,7 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.Link // DeliverTransportPacket delivers the packets to the appropriate transport // protocol endpoint. -func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, netHeader buffer.View, vv buffer.VectorisedView) { +func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt tcpip.PacketBuffer) { state, ok := n.stack.transportProtocols[protocol] if !ok { n.stack.stats.UnknownProtocolRcvdPackets.Increment() @@ -845,41 +845,41 @@ func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolN // Raw socket packets are delivered based solely on the transport // protocol number. We do not inspect the payload to ensure it's // validly formed. - n.stack.demux.deliverRawPacket(r, protocol, netHeader, vv) + n.stack.demux.deliverRawPacket(r, protocol, pkt) - if len(vv.First()) < transProto.MinimumPacketSize() { + if len(pkt.Data.First()) < transProto.MinimumPacketSize() { n.stack.stats.MalformedRcvdPackets.Increment() return } - srcPort, dstPort, err := transProto.ParsePorts(vv.First()) + srcPort, dstPort, err := transProto.ParsePorts(pkt.Data.First()) if err != nil { n.stack.stats.MalformedRcvdPackets.Increment() return } id := TransportEndpointID{dstPort, r.LocalAddress, srcPort, r.RemoteAddress} - if n.stack.demux.deliverPacket(r, protocol, netHeader, vv, id) { + if n.stack.demux.deliverPacket(r, protocol, pkt, id) { return } // Try to deliver to per-stack default handler. if state.defaultHandler != nil { - if state.defaultHandler(r, id, netHeader, vv) { + if state.defaultHandler(r, id, pkt) { return } } // We could not find an appropriate destination for this packet, so // deliver it to the global handler. - if !transProto.HandleUnknownDestinationPacket(r, id, netHeader, vv) { + if !transProto.HandleUnknownDestinationPacket(r, id, pkt) { n.stack.stats.MalformedRcvdPackets.Increment() } } // DeliverTransportControlPacket delivers control packets to the appropriate // transport protocol endpoint. -func (n *NIC) DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, vv buffer.VectorisedView) { +func (n *NIC) DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt tcpip.PacketBuffer) { state, ok := n.stack.transportProtocols[trans] if !ok { return @@ -890,17 +890,17 @@ func (n *NIC) DeliverTransportControlPacket(local, remote tcpip.Address, net tcp // ICMPv4 only guarantees that 8 bytes of the transport protocol will // be present in the payload. We know that the ports are within the // first 8 bytes for all known transport protocols. - if len(vv.First()) < 8 { + if len(pkt.Data.First()) < 8 { return } - srcPort, dstPort, err := transProto.ParsePorts(vv.First()) + srcPort, dstPort, err := transProto.ParsePorts(pkt.Data.First()) if err != nil { return } id := TransportEndpointID{srcPort, local, dstPort, remote} - if n.stack.demux.deliverControlPacket(n, net, trans, typ, extra, vv, id) { + if n.stack.demux.deliverControlPacket(n, net, trans, typ, extra, pkt, id) { return } } diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go index d7c124e81..5806d294c 100644 --- a/pkg/tcpip/stack/registration.go +++ b/pkg/tcpip/stack/registration.go @@ -64,16 +64,15 @@ type TransportEndpoint interface { UniqueID() uint64 // HandlePacket is called by the stack when new packets arrive to - // this transport endpoint. + // this transport endpoint. It sets pkt.TransportHeader. // - // HandlePacket takes ownership of vv. - HandlePacket(r *Route, id TransportEndpointID, vv buffer.VectorisedView) + // HandlePacket takes ownership of pkt. + HandlePacket(r *Route, id TransportEndpointID, pkt tcpip.PacketBuffer) - // HandleControlPacket is called by the stack when new control (e.g., + // HandleControlPacket is called by the stack when new control (e.g. // ICMP) packets arrive to this transport endpoint. - // - // HandleControlPacket takes ownership of vv. - HandleControlPacket(id TransportEndpointID, typ ControlType, extra uint32, vv buffer.VectorisedView) + // HandleControlPacket takes ownership of pkt. + HandleControlPacket(id TransportEndpointID, typ ControlType, extra uint32, pkt tcpip.PacketBuffer) // Close puts the endpoint in a closed state and frees all resources // associated with it. This cleanup may happen asynchronously. Wait can @@ -99,8 +98,8 @@ type RawTransportEndpoint interface { // this transport endpoint. The packet contains all data from the link // layer up. // - // HandlePacket takes ownership of packet and netHeader. - HandlePacket(r *Route, netHeader buffer.View, packet buffer.VectorisedView) + // HandlePacket takes ownership of pkt. + HandlePacket(r *Route, pkt tcpip.PacketBuffer) } // PacketEndpoint is the interface that needs to be implemented by packet @@ -117,8 +116,8 @@ type PacketEndpoint interface { // linkHeader may have a length of 0, in which case the PacketEndpoint // should construct its own ethernet header for applications. // - // HandlePacket takes ownership of packet and linkHeader. - HandlePacket(nicid tcpip.NICID, addr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, packet buffer.VectorisedView, linkHeader buffer.View) + // HandlePacket takes ownership of pkt. + HandlePacket(nicid tcpip.NICID, addr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) } // TransportProtocol is the interface that needs to be implemented by transport @@ -148,7 +147,9 @@ type TransportProtocol interface { // // The return value indicates whether the packet was well-formed (for // stats purposes only). - HandleUnknownDestinationPacket(r *Route, id TransportEndpointID, netHeader buffer.View, vv buffer.VectorisedView) bool + // + // HandleUnknownDestinationPacket takes ownership of pkt. + HandleUnknownDestinationPacket(r *Route, id TransportEndpointID, pkt tcpip.PacketBuffer) bool // SetOption allows enabling/disabling protocol specific features. // SetOption returns an error if the option is not supported or the @@ -166,17 +167,21 @@ type TransportProtocol interface { // the network layer. type TransportDispatcher interface { // DeliverTransportPacket delivers packets to the appropriate - // transport protocol endpoint. It also returns the network layer - // header for the enpoint to inspect or pass up the stack. + // transport protocol endpoint. + // + // pkt.NetworkHeader must be set before calling DeliverTransportPacket. // - // DeliverTransportPacket takes ownership of vv and netHeader. - DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, netHeader buffer.View, vv buffer.VectorisedView) + // DeliverTransportPacket takes ownership of pkt. + DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt tcpip.PacketBuffer) // DeliverTransportControlPacket delivers control packets to the // appropriate transport protocol endpoint. // - // DeliverTransportControlPacket takes ownership of vv. - DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, vv buffer.VectorisedView) + // pkt.NetworkHeader must be set before calling + // DeliverTransportControlPacket. + // + // DeliverTransportControlPacket takes ownership of pkt. + DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt tcpip.PacketBuffer) } // PacketLooping specifies where an outbound packet should be sent. @@ -248,10 +253,10 @@ type NetworkEndpoint interface { NICID() tcpip.NICID // HandlePacket is called by the link layer when new packets arrive to - // this network endpoint. + // this network endpoint. It sets pkt.NetworkHeader. // - // HandlePacket takes ownership of vv. - HandlePacket(r *Route, vv buffer.VectorisedView) + // HandlePacket takes ownership of pkt. + HandlePacket(r *Route, pkt tcpip.PacketBuffer) // Close is called when the endpoint is reomved from a stack. Close() @@ -294,11 +299,14 @@ type NetworkProtocol interface { // the data link layer. type NetworkDispatcher interface { // DeliverNetworkPacket finds the appropriate network protocol endpoint - // and hands the packet over for further processing. linkHeader may have - // length 0 when the caller does not have ethernet data. + // and hands the packet over for further processing. + // + // pkt.LinkHeader may or may not be set before calling + // DeliverNetworkPacket. Some packets do not have link headers (e.g. + // packets sent via loopback), and won't have the field set. // - // DeliverNetworkPacket takes ownership of vv and linkHeader. - DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView, linkHeader buffer.View) + // DeliverNetworkPacket takes ownership of pkt. + DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) } // LinkEndpointCapabilities is the type associated with the capabilities @@ -329,7 +337,9 @@ const ( // LinkEndpoint is the interface implemented by data link layer protocols (e.g., // ethernet, loopback, raw) and used by network layer protocols to send packets -// out through the implementer's data link endpoint. +// out through the implementer's data link endpoint. When a link header exists, +// it sets each tcpip.PacketBuffer's LinkHeader field before passing it up the +// stack. type LinkEndpoint interface { // MTU is the maximum transmission unit for this endpoint. This is // usually dictated by the backing physical network; when such a @@ -395,7 +405,7 @@ type InjectableLinkEndpoint interface { LinkEndpoint // InjectInbound injects an inbound packet. - InjectInbound(protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) + InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) // InjectOutbound writes a fully formed outbound packet directly to the // link. diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index 8b141cafd..08599d765 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -51,7 +51,7 @@ const ( type transportProtocolState struct { proto TransportProtocol - defaultHandler func(r *Route, id TransportEndpointID, netHeader buffer.View, vv buffer.VectorisedView) bool + defaultHandler func(r *Route, id TransportEndpointID, pkt tcpip.PacketBuffer) bool } // TCPProbeFunc is the expected function type for a TCP probe function to be @@ -641,7 +641,7 @@ func (s *Stack) TransportProtocolOption(transport tcpip.TransportProtocolNumber, // // It must be called only during initialization of the stack. Changing it as the // stack is operating is not supported. -func (s *Stack) SetTransportProtocolHandler(p tcpip.TransportProtocolNumber, h func(*Route, TransportEndpointID, buffer.View, buffer.VectorisedView) bool) { +func (s *Stack) SetTransportProtocolHandler(p tcpip.TransportProtocolNumber, h func(*Route, TransportEndpointID, tcpip.PacketBuffer) bool) { state := s.transportProtocols[p] if state != nil { state.defaultHandler = h diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go index 9dae853d0..1fac5477f 100644 --- a/pkg/tcpip/stack/stack_test.go +++ b/pkg/tcpip/stack/stack_test.go @@ -86,28 +86,28 @@ func (f *fakeNetworkEndpoint) ID() *stack.NetworkEndpointID { return &f.id } -func (f *fakeNetworkEndpoint) HandlePacket(r *stack.Route, vv buffer.VectorisedView) { +func (f *fakeNetworkEndpoint) HandlePacket(r *stack.Route, pkt tcpip.PacketBuffer) { // Increment the received packet count in the protocol descriptor. f.proto.packetCount[int(f.id.LocalAddress[0])%len(f.proto.packetCount)]++ // Consume the network header. - b := vv.First() - vv.TrimFront(fakeNetHeaderLen) + b := pkt.Data.First() + pkt.Data.TrimFront(fakeNetHeaderLen) // Handle control packets. if b[2] == uint8(fakeControlProtocol) { - nb := vv.First() + nb := pkt.Data.First() if len(nb) < fakeNetHeaderLen { return } - vv.TrimFront(fakeNetHeaderLen) - f.dispatcher.DeliverTransportControlPacket(tcpip.Address(nb[1:2]), tcpip.Address(nb[0:1]), fakeNetNumber, tcpip.TransportProtocolNumber(nb[2]), stack.ControlPortUnreachable, 0, vv) + pkt.Data.TrimFront(fakeNetHeaderLen) + f.dispatcher.DeliverTransportControlPacket(tcpip.Address(nb[1:2]), tcpip.Address(nb[0:1]), fakeNetNumber, tcpip.TransportProtocolNumber(nb[2]), stack.ControlPortUnreachable, 0, pkt) return } // Dispatch the packet to the transport protocol. - f.dispatcher.DeliverTransportPacket(r, tcpip.TransportProtocolNumber(b[2]), buffer.View([]byte{}), vv) + f.dispatcher.DeliverTransportPacket(r, tcpip.TransportProtocolNumber(b[2]), pkt) } func (f *fakeNetworkEndpoint) MaxHeaderLength() uint16 { @@ -138,7 +138,9 @@ func (f *fakeNetworkEndpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr bu views[0] = hdr.View() views = append(views, payload.Views()...) vv := buffer.NewVectorisedView(len(views[0])+payload.Size(), views) - f.HandlePacket(r, vv) + f.HandlePacket(r, tcpip.PacketBuffer{ + Data: vv, + }) } if loop&stack.PacketOut == 0 { return nil @@ -259,7 +261,9 @@ func TestNetworkReceive(t *testing.T) { // Make sure packet with wrong address is not delivered. buf[0] = 3 - ep.Inject(fakeNetNumber, buf.ToVectorisedView()) + ep.InjectInbound(fakeNetNumber, tcpip.PacketBuffer{ + Data: buf.ToVectorisedView(), + }) if fakeNet.packetCount[1] != 0 { t.Errorf("packetCount[1] = %d, want %d", fakeNet.packetCount[1], 0) } @@ -269,7 +273,9 @@ func TestNetworkReceive(t *testing.T) { // Make sure packet is delivered to first endpoint. buf[0] = 1 - ep.Inject(fakeNetNumber, buf.ToVectorisedView()) + ep.InjectInbound(fakeNetNumber, tcpip.PacketBuffer{ + Data: buf.ToVectorisedView(), + }) if fakeNet.packetCount[1] != 1 { t.Errorf("packetCount[1] = %d, want %d", fakeNet.packetCount[1], 1) } @@ -279,7 +285,9 @@ func TestNetworkReceive(t *testing.T) { // Make sure packet is delivered to second endpoint. buf[0] = 2 - ep.Inject(fakeNetNumber, buf.ToVectorisedView()) + ep.InjectInbound(fakeNetNumber, tcpip.PacketBuffer{ + Data: buf.ToVectorisedView(), + }) if fakeNet.packetCount[1] != 1 { t.Errorf("packetCount[1] = %d, want %d", fakeNet.packetCount[1], 1) } @@ -288,7 +296,9 @@ func TestNetworkReceive(t *testing.T) { } // Make sure packet is not delivered if protocol number is wrong. - ep.Inject(fakeNetNumber-1, buf.ToVectorisedView()) + ep.InjectInbound(fakeNetNumber-1, tcpip.PacketBuffer{ + Data: buf.ToVectorisedView(), + }) if fakeNet.packetCount[1] != 1 { t.Errorf("packetCount[1] = %d, want %d", fakeNet.packetCount[1], 1) } @@ -298,7 +308,9 @@ func TestNetworkReceive(t *testing.T) { // Make sure packet that is too small is dropped. buf.CapLength(2) - ep.Inject(fakeNetNumber, buf.ToVectorisedView()) + ep.InjectInbound(fakeNetNumber, tcpip.PacketBuffer{ + Data: buf.ToVectorisedView(), + }) if fakeNet.packetCount[1] != 1 { t.Errorf("packetCount[1] = %d, want %d", fakeNet.packetCount[1], 1) } @@ -373,7 +385,9 @@ func testFailingRecv(t *testing.T, fakeNet *fakeNetworkProtocol, localAddrByte b func testRecvInternal(t *testing.T, fakeNet *fakeNetworkProtocol, localAddrByte byte, ep *channel.Endpoint, buf buffer.View, want int) { t.Helper() - ep.Inject(fakeNetNumber, buf.ToVectorisedView()) + ep.InjectInbound(fakeNetNumber, tcpip.PacketBuffer{ + Data: buf.ToVectorisedView(), + }) if got := fakeNet.PacketCount(localAddrByte); got != want { t.Errorf("receive packet count: got = %d, want %d", got, want) } @@ -1795,7 +1809,9 @@ func TestNICStats(t *testing.T) { // Send a packet to address 1. buf := buffer.NewView(30) - ep1.Inject(fakeNetNumber, buf.ToVectorisedView()) + ep1.InjectInbound(fakeNetNumber, tcpip.PacketBuffer{ + Data: buf.ToVectorisedView(), + }) if got, want := s.NICInfo()[1].Stats.Rx.Packets.Value(), uint64(1); got != want { t.Errorf("got Rx.Packets.Value() = %d, want = %d", got, want) } @@ -1855,7 +1871,9 @@ func TestNICForwarding(t *testing.T) { // Send a packet to address 3. buf := buffer.NewView(30) buf[0] = 3 - ep1.Inject(fakeNetNumber, buf.ToVectorisedView()) + ep1.InjectInbound(fakeNetNumber, tcpip.PacketBuffer{ + Data: buf.ToVectorisedView(), + }) select { case <-ep2.C: diff --git a/pkg/tcpip/stack/transport_demuxer.go b/pkg/tcpip/stack/transport_demuxer.go index ccd3d030e..594570216 100644 --- a/pkg/tcpip/stack/transport_demuxer.go +++ b/pkg/tcpip/stack/transport_demuxer.go @@ -21,7 +21,6 @@ import ( "sync" "gvisor.dev/gvisor/pkg/tcpip" - "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/hash/jenkins" "gvisor.dev/gvisor/pkg/tcpip/header" ) @@ -86,7 +85,7 @@ func (epsByNic *endpointsByNic) transportEndpoints() []TransportEndpoint { // HandlePacket is called by the stack when new packets arrive to this transport // endpoint. -func (epsByNic *endpointsByNic) handlePacket(r *Route, id TransportEndpointID, vv buffer.VectorisedView) { +func (epsByNic *endpointsByNic) handlePacket(r *Route, id TransportEndpointID, pkt tcpip.PacketBuffer) { epsByNic.mu.RLock() mpep, ok := epsByNic.endpoints[r.ref.nic.ID()] @@ -100,18 +99,18 @@ func (epsByNic *endpointsByNic) handlePacket(r *Route, id TransportEndpointID, v // If this is a broadcast or multicast datagram, deliver the datagram to all // endpoints bound to the right device. if isMulticastOrBroadcast(id.LocalAddress) { - mpep.handlePacketAll(r, id, vv) + mpep.handlePacketAll(r, id, pkt) epsByNic.mu.RUnlock() // Don't use defer for performance reasons. return } // multiPortEndpoints are guaranteed to have at least one element. - selectEndpoint(id, mpep, epsByNic.seed).HandlePacket(r, id, vv) + selectEndpoint(id, mpep, epsByNic.seed).HandlePacket(r, id, pkt) epsByNic.mu.RUnlock() // Don't use defer for performance reasons. } // HandleControlPacket implements stack.TransportEndpoint.HandleControlPacket. -func (epsByNic *endpointsByNic) handleControlPacket(n *NIC, id TransportEndpointID, typ ControlType, extra uint32, vv buffer.VectorisedView) { +func (epsByNic *endpointsByNic) handleControlPacket(n *NIC, id TransportEndpointID, typ ControlType, extra uint32, pkt tcpip.PacketBuffer) { epsByNic.mu.RLock() defer epsByNic.mu.RUnlock() @@ -127,7 +126,7 @@ func (epsByNic *endpointsByNic) handleControlPacket(n *NIC, id TransportEndpoint // broadcast like we are doing with handlePacket above? // multiPortEndpoints are guaranteed to have at least one element. - selectEndpoint(id, mpep, epsByNic.seed).HandleControlPacket(id, typ, extra, vv) + selectEndpoint(id, mpep, epsByNic.seed).HandleControlPacket(id, typ, extra, pkt) } // registerEndpoint returns true if it succeeds. It fails and returns @@ -258,18 +257,16 @@ func selectEndpoint(id TransportEndpointID, mpep *multiPortEndpoint, seed uint32 return mpep.endpointsArr[idx] } -func (ep *multiPortEndpoint) handlePacketAll(r *Route, id TransportEndpointID, vv buffer.VectorisedView) { +func (ep *multiPortEndpoint) handlePacketAll(r *Route, id TransportEndpointID, pkt tcpip.PacketBuffer) { ep.mu.RLock() for i, endpoint := range ep.endpointsArr { - // HandlePacket modifies vv, so each endpoint needs its own copy except for - // the final one. + // HandlePacket takes ownership of pkt, so each endpoint needs + // its own copy except for the final one. if i == len(ep.endpointsArr)-1 { - endpoint.HandlePacket(r, id, vv) + endpoint.HandlePacket(r, id, pkt) break } - vvCopy := buffer.NewView(vv.Size()) - copy(vvCopy, vv.ToView()) - endpoint.HandlePacket(r, id, vvCopy.ToVectorisedView()) + endpoint.HandlePacket(r, id, pkt.Clone()) } ep.mu.RUnlock() // Don't use defer for performance reasons. } @@ -395,7 +392,7 @@ var loopbackSubnet = func() tcpip.Subnet { // deliverPacket attempts to find one or more matching transport endpoints, and // then, if matches are found, delivers the packet to them. Returns true if it // found one or more endpoints, false otherwise. -func (d *transportDemuxer) deliverPacket(r *Route, protocol tcpip.TransportProtocolNumber, netHeader buffer.View, vv buffer.VectorisedView, id TransportEndpointID) bool { +func (d *transportDemuxer) deliverPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt tcpip.PacketBuffer, id TransportEndpointID) bool { eps, ok := d.protocol[protocolIDs{r.NetProto, protocol}] if !ok { return false @@ -408,8 +405,8 @@ func (d *transportDemuxer) deliverPacket(r *Route, protocol tcpip.TransportProto // transport endpoints. var destEps []*endpointsByNic if protocol == header.UDPProtocolNumber && isMulticastOrBroadcast(id.LocalAddress) { - destEps = d.findAllEndpointsLocked(eps, vv, id) - } else if ep := d.findEndpointLocked(eps, vv, id); ep != nil { + destEps = d.findAllEndpointsLocked(eps, id) + } else if ep := d.findEndpointLocked(eps, id); ep != nil { destEps = append(destEps, ep) } @@ -424,17 +421,19 @@ func (d *transportDemuxer) deliverPacket(r *Route, protocol tcpip.TransportProto return false } - // Deliver the packet. - for _, ep := range destEps { - ep.handlePacket(r, id, vv) + // HandlePacket takes ownership of pkt, so each endpoint needs its own + // copy except for the final one. + for _, ep := range destEps[:len(destEps)-1] { + ep.handlePacket(r, id, pkt.Clone()) } + destEps[len(destEps)-1].handlePacket(r, id, pkt) return true } // deliverRawPacket attempts to deliver the given packet and returns whether it // was delivered successfully. -func (d *transportDemuxer) deliverRawPacket(r *Route, protocol tcpip.TransportProtocolNumber, netHeader buffer.View, vv buffer.VectorisedView) bool { +func (d *transportDemuxer) deliverRawPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt tcpip.PacketBuffer) bool { eps, ok := d.protocol[protocolIDs{r.NetProto, protocol}] if !ok { return false @@ -448,7 +447,7 @@ func (d *transportDemuxer) deliverRawPacket(r *Route, protocol tcpip.TransportPr for _, rawEP := range eps.rawEndpoints { // Each endpoint gets its own copy of the packet for the sake // of save/restore. - rawEP.HandlePacket(r, buffer.NewViewFromBytes(netHeader), vv.ToView().ToVectorisedView()) + rawEP.HandlePacket(r, pkt) foundRaw = true } eps.mu.RUnlock() @@ -458,7 +457,7 @@ func (d *transportDemuxer) deliverRawPacket(r *Route, protocol tcpip.TransportPr // deliverControlPacket attempts to deliver the given control packet. Returns // true if it found an endpoint, false otherwise. -func (d *transportDemuxer) deliverControlPacket(n *NIC, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, vv buffer.VectorisedView, id TransportEndpointID) bool { +func (d *transportDemuxer) deliverControlPacket(n *NIC, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt tcpip.PacketBuffer, id TransportEndpointID) bool { eps, ok := d.protocol[protocolIDs{net, trans}] if !ok { return false @@ -466,7 +465,7 @@ func (d *transportDemuxer) deliverControlPacket(n *NIC, net tcpip.NetworkProtoco // Try to find the endpoint. eps.mu.RLock() - ep := d.findEndpointLocked(eps, vv, id) + ep := d.findEndpointLocked(eps, id) eps.mu.RUnlock() // Fail if we didn't find one. @@ -475,12 +474,12 @@ func (d *transportDemuxer) deliverControlPacket(n *NIC, net tcpip.NetworkProtoco } // Deliver the packet. - ep.handleControlPacket(n, id, typ, extra, vv) + ep.handleControlPacket(n, id, typ, extra, pkt) return true } -func (d *transportDemuxer) findAllEndpointsLocked(eps *transportEndpoints, vv buffer.VectorisedView, id TransportEndpointID) []*endpointsByNic { +func (d *transportDemuxer) findAllEndpointsLocked(eps *transportEndpoints, id TransportEndpointID) []*endpointsByNic { var matchedEPs []*endpointsByNic // Try to find a match with the id as provided. if ep, ok := eps.endpoints[id]; ok { @@ -514,8 +513,8 @@ func (d *transportDemuxer) findAllEndpointsLocked(eps *transportEndpoints, vv bu // findEndpointLocked returns the endpoint that most closely matches the given // id. -func (d *transportDemuxer) findEndpointLocked(eps *transportEndpoints, vv buffer.VectorisedView, id TransportEndpointID) *endpointsByNic { - if matchedEPs := d.findAllEndpointsLocked(eps, vv, id); len(matchedEPs) > 0 { +func (d *transportDemuxer) findEndpointLocked(eps *transportEndpoints, id TransportEndpointID) *endpointsByNic { + if matchedEPs := d.findAllEndpointsLocked(eps, id); len(matchedEPs) > 0 { return matchedEPs[0] } return nil diff --git a/pkg/tcpip/stack/transport_demuxer_test.go b/pkg/tcpip/stack/transport_demuxer_test.go index 210233dc0..f54117c4e 100644 --- a/pkg/tcpip/stack/transport_demuxer_test.go +++ b/pkg/tcpip/stack/transport_demuxer_test.go @@ -156,7 +156,9 @@ func (c *testContext) sendV6Packet(payload []byte, h *headers, linkEpName string u.SetChecksum(^u.CalculateChecksum(xsum)) // Inject packet. - c.linkEPs[linkEpName].Inject(ipv6.ProtocolNumber, buf.ToVectorisedView()) + c.linkEPs[linkEpName].InjectInbound(ipv6.ProtocolNumber, tcpip.PacketBuffer{ + Data: buf.ToVectorisedView(), + }) } func TestTransportDemuxerRegister(t *testing.T) { diff --git a/pkg/tcpip/stack/transport_test.go b/pkg/tcpip/stack/transport_test.go index 203e79f56..2cacea99a 100644 --- a/pkg/tcpip/stack/transport_test.go +++ b/pkg/tcpip/stack/transport_test.go @@ -197,7 +197,7 @@ func (*fakeTransportEndpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Erro return tcpip.FullAddress{}, nil } -func (f *fakeTransportEndpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, _ buffer.VectorisedView) { +func (f *fakeTransportEndpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, _ tcpip.PacketBuffer) { // Increment the number of received packets. f.proto.packetCount++ if f.acceptQueue != nil { @@ -214,7 +214,7 @@ func (f *fakeTransportEndpoint) HandlePacket(r *stack.Route, id stack.TransportE } } -func (f *fakeTransportEndpoint) HandleControlPacket(stack.TransportEndpointID, stack.ControlType, uint32, buffer.VectorisedView) { +func (f *fakeTransportEndpoint) HandleControlPacket(stack.TransportEndpointID, stack.ControlType, uint32, tcpip.PacketBuffer) { // Increment the number of received control packets. f.proto.controlCount++ } @@ -271,7 +271,7 @@ func (*fakeTransportProtocol) ParsePorts(buffer.View) (src, dst uint16, err *tcp return 0, 0, nil } -func (*fakeTransportProtocol) HandleUnknownDestinationPacket(*stack.Route, stack.TransportEndpointID, buffer.View, buffer.VectorisedView) bool { +func (*fakeTransportProtocol) HandleUnknownDestinationPacket(*stack.Route, stack.TransportEndpointID, tcpip.PacketBuffer) bool { return true } @@ -342,7 +342,9 @@ func TestTransportReceive(t *testing.T) { // Make sure packet with wrong protocol is not delivered. buf[0] = 1 buf[2] = 0 - linkEP.Inject(fakeNetNumber, buf.ToVectorisedView()) + linkEP.InjectInbound(fakeNetNumber, tcpip.PacketBuffer{ + Data: buf.ToVectorisedView(), + }) if fakeTrans.packetCount != 0 { t.Errorf("packetCount = %d, want %d", fakeTrans.packetCount, 0) } @@ -351,7 +353,9 @@ func TestTransportReceive(t *testing.T) { buf[0] = 1 buf[1] = 3 buf[2] = byte(fakeTransNumber) - linkEP.Inject(fakeNetNumber, buf.ToVectorisedView()) + linkEP.InjectInbound(fakeNetNumber, tcpip.PacketBuffer{ + Data: buf.ToVectorisedView(), + }) if fakeTrans.packetCount != 0 { t.Errorf("packetCount = %d, want %d", fakeTrans.packetCount, 0) } @@ -360,7 +364,9 @@ func TestTransportReceive(t *testing.T) { buf[0] = 1 buf[1] = 2 buf[2] = byte(fakeTransNumber) - linkEP.Inject(fakeNetNumber, buf.ToVectorisedView()) + linkEP.InjectInbound(fakeNetNumber, tcpip.PacketBuffer{ + Data: buf.ToVectorisedView(), + }) if fakeTrans.packetCount != 1 { t.Errorf("packetCount = %d, want %d", fakeTrans.packetCount, 1) } @@ -413,7 +419,9 @@ func TestTransportControlReceive(t *testing.T) { buf[fakeNetHeaderLen+0] = 0 buf[fakeNetHeaderLen+1] = 1 buf[fakeNetHeaderLen+2] = 0 - linkEP.Inject(fakeNetNumber, buf.ToVectorisedView()) + linkEP.InjectInbound(fakeNetNumber, tcpip.PacketBuffer{ + Data: buf.ToVectorisedView(), + }) if fakeTrans.controlCount != 0 { t.Errorf("controlCount = %d, want %d", fakeTrans.controlCount, 0) } @@ -422,7 +430,9 @@ func TestTransportControlReceive(t *testing.T) { buf[fakeNetHeaderLen+0] = 3 buf[fakeNetHeaderLen+1] = 1 buf[fakeNetHeaderLen+2] = byte(fakeTransNumber) - linkEP.Inject(fakeNetNumber, buf.ToVectorisedView()) + linkEP.InjectInbound(fakeNetNumber, tcpip.PacketBuffer{ + Data: buf.ToVectorisedView(), + }) if fakeTrans.controlCount != 0 { t.Errorf("controlCount = %d, want %d", fakeTrans.controlCount, 0) } @@ -431,7 +441,9 @@ func TestTransportControlReceive(t *testing.T) { buf[fakeNetHeaderLen+0] = 2 buf[fakeNetHeaderLen+1] = 1 buf[fakeNetHeaderLen+2] = byte(fakeTransNumber) - linkEP.Inject(fakeNetNumber, buf.ToVectorisedView()) + linkEP.InjectInbound(fakeNetNumber, tcpip.PacketBuffer{ + Data: buf.ToVectorisedView(), + }) if fakeTrans.controlCount != 1 { t.Errorf("controlCount = %d, want %d", fakeTrans.controlCount, 1) } @@ -584,7 +596,9 @@ func TestTransportForwarding(t *testing.T) { req[0] = 1 req[1] = 3 req[2] = byte(fakeTransNumber) - ep2.Inject(fakeNetNumber, req.ToVectorisedView()) + ep2.InjectInbound(fakeNetNumber, tcpip.PacketBuffer{ + Data: req.ToVectorisedView(), + }) aep, _, err := ep.Accept() if err != nil || aep == nil { diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go index 33405eb7d..0092d0ea9 100644 --- a/pkg/tcpip/transport/icmp/endpoint.go +++ b/pkg/tcpip/transport/icmp/endpoint.go @@ -718,18 +718,18 @@ func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask { // HandlePacket is called by the stack when new packets arrive to this transport // endpoint. -func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, vv buffer.VectorisedView) { +func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt tcpip.PacketBuffer) { // Only accept echo replies. switch e.NetProto { case header.IPv4ProtocolNumber: - h := header.ICMPv4(vv.First()) + h := header.ICMPv4(pkt.Data.First()) if h.Type() != header.ICMPv4EchoReply { e.stack.Stats().DroppedPackets.Increment() e.stats.ReceiveErrors.MalformedPacketsReceived.Increment() return } case header.IPv6ProtocolNumber: - h := header.ICMPv6(vv.First()) + h := header.ICMPv6(pkt.Data.First()) if h.Type() != header.ICMPv6EchoReply { e.stack.Stats().DroppedPackets.Increment() e.stats.ReceiveErrors.MalformedPacketsReceived.Increment() @@ -757,19 +757,19 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, vv wasEmpty := e.rcvBufSize == 0 // Push new packet into receive list and increment the buffer size. - pkt := &icmpPacket{ + packet := &icmpPacket{ senderAddress: tcpip.FullAddress{ NIC: r.NICID(), Addr: id.RemoteAddress, }, } - pkt.data = vv + packet.data = pkt.Data - e.rcvList.PushBack(pkt) - e.rcvBufSize += pkt.data.Size() + e.rcvList.PushBack(packet) + e.rcvBufSize += packet.data.Size() - pkt.timestamp = e.stack.NowNanoseconds() + packet.timestamp = e.stack.NowNanoseconds() e.rcvMu.Unlock() e.stats.PacketsReceived.Increment() @@ -780,7 +780,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, vv } // HandleControlPacket implements stack.TransportEndpoint.HandleControlPacket. -func (e *endpoint) HandleControlPacket(id stack.TransportEndpointID, typ stack.ControlType, extra uint32, vv buffer.VectorisedView) { +func (e *endpoint) HandleControlPacket(id stack.TransportEndpointID, typ stack.ControlType, extra uint32, pkt tcpip.PacketBuffer) { } // State implements tcpip.Endpoint.State. The ICMP endpoint currently doesn't diff --git a/pkg/tcpip/transport/icmp/protocol.go b/pkg/tcpip/transport/icmp/protocol.go index bfb16f7c3..9ce500e80 100644 --- a/pkg/tcpip/transport/icmp/protocol.go +++ b/pkg/tcpip/transport/icmp/protocol.go @@ -104,7 +104,7 @@ func (p *protocol) ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) // HandleUnknownDestinationPacket handles packets targeted at this protocol but // that don't match any existing endpoint. -func (p *protocol) HandleUnknownDestinationPacket(*stack.Route, stack.TransportEndpointID, buffer.View, buffer.VectorisedView) bool { +func (p *protocol) HandleUnknownDestinationPacket(*stack.Route, stack.TransportEndpointID, tcpip.PacketBuffer) bool { return true } diff --git a/pkg/tcpip/transport/packet/endpoint.go b/pkg/tcpip/transport/packet/endpoint.go index ead83b83d..26335094e 100644 --- a/pkg/tcpip/transport/packet/endpoint.go +++ b/pkg/tcpip/transport/packet/endpoint.go @@ -266,7 +266,7 @@ func (ep *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { } // HandlePacket implements stack.PacketEndpoint.HandlePacket. -func (ep *endpoint) HandlePacket(nicid tcpip.NICID, localAddr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, vv buffer.VectorisedView, ethHeader buffer.View) { +func (ep *endpoint) HandlePacket(nicid tcpip.NICID, localAddr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) { ep.rcvMu.Lock() // Drop the packet if our buffer is currently full. @@ -289,9 +289,9 @@ func (ep *endpoint) HandlePacket(nicid tcpip.NICID, localAddr tcpip.LinkAddress, // Push new packet into receive list and increment the buffer size. var packet packet // TODO(b/129292371): Return network protocol. - if len(ethHeader) > 0 { + if len(pkt.LinkHeader) > 0 { // Get info directly from the ethernet header. - hdr := header.Ethernet(ethHeader) + hdr := header.Ethernet(pkt.LinkHeader) packet.senderAddr = tcpip.FullAddress{ NIC: nicid, Addr: tcpip.Address(hdr.SourceAddress()), @@ -306,11 +306,12 @@ func (ep *endpoint) HandlePacket(nicid tcpip.NICID, localAddr tcpip.LinkAddress, if ep.cooked { // Cooked packets can simply be queued. - packet.data = vv + packet.data = pkt.Data } else { // Raw packets need their ethernet headers prepended before // queueing. - if len(ethHeader) == 0 { + var linkHeader buffer.View + if len(pkt.LinkHeader) == 0 { // We weren't provided with an actual ethernet header, // so fake one. ethFields := header.EthernetFields{ @@ -320,10 +321,12 @@ func (ep *endpoint) HandlePacket(nicid tcpip.NICID, localAddr tcpip.LinkAddress, } fakeHeader := make(header.Ethernet, header.EthernetMinimumSize) fakeHeader.Encode(ðFields) - ethHeader = buffer.View(fakeHeader) + linkHeader = buffer.View(fakeHeader) + } else { + linkHeader = append(buffer.View(nil), pkt.LinkHeader...) } - combinedVV := buffer.View(ethHeader).ToVectorisedView() - combinedVV.Append(vv) + combinedVV := linkHeader.ToVectorisedView() + combinedVV.Append(pkt.Data) packet.data = combinedVV } packet.timestampNS = ep.stack.NowNanoseconds() diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go index 23922a30e..230a1537a 100644 --- a/pkg/tcpip/transport/raw/endpoint.go +++ b/pkg/tcpip/transport/raw/endpoint.go @@ -555,7 +555,7 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { } // HandlePacket implements stack.RawTransportEndpoint.HandlePacket. -func (e *endpoint) HandlePacket(route *stack.Route, netHeader buffer.View, vv buffer.VectorisedView) { +func (e *endpoint) HandlePacket(route *stack.Route, pkt tcpip.PacketBuffer) { e.rcvMu.Lock() // Drop the packet if our buffer is currently full. @@ -596,20 +596,21 @@ func (e *endpoint) HandlePacket(route *stack.Route, netHeader buffer.View, vv bu wasEmpty := e.rcvBufSize == 0 // Push new packet into receive list and increment the buffer size. - pkt := &rawPacket{ + packet := &rawPacket{ senderAddr: tcpip.FullAddress{ NIC: route.NICID(), Addr: route.RemoteAddress, }, } - combinedVV := netHeader.ToVectorisedView() - combinedVV.Append(vv) - pkt.data = combinedVV - pkt.timestampNS = e.stack.NowNanoseconds() + networkHeader := append(buffer.View(nil), pkt.NetworkHeader...) + combinedVV := networkHeader.ToVectorisedView() + combinedVV.Append(pkt.Data) + packet.data = combinedVV + packet.timestampNS = e.stack.NowNanoseconds() - e.rcvList.PushBack(pkt) - e.rcvBufSize += pkt.data.Size() + e.rcvList.PushBack(packet) + e.rcvBufSize += packet.data.Size() e.rcvMu.Unlock() e.stats.PacketsReceived.Increment() diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index a1efd8d55..e31464c9b 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -2029,8 +2029,8 @@ func (e *endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) { // HandlePacket is called by the stack when new packets arrive to this transport // endpoint. -func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, vv buffer.VectorisedView) { - s := newSegment(r, id, vv) +func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt tcpip.PacketBuffer) { + s := newSegment(r, id, pkt) if !s.parse() { e.stack.Stats().MalformedRcvdPackets.Increment() e.stack.Stats().TCP.InvalidSegmentsReceived.Increment() @@ -2065,7 +2065,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, vv } // HandleControlPacket implements stack.TransportEndpoint.HandleControlPacket. -func (e *endpoint) HandleControlPacket(id stack.TransportEndpointID, typ stack.ControlType, extra uint32, vv buffer.VectorisedView) { +func (e *endpoint) HandleControlPacket(id stack.TransportEndpointID, typ stack.ControlType, extra uint32, pkt tcpip.PacketBuffer) { switch typ { case stack.ControlPacketTooBig: e.sndBufMu.Lock() diff --git a/pkg/tcpip/transport/tcp/forwarder.go b/pkg/tcpip/transport/tcp/forwarder.go index 63666f0b3..4983bca81 100644 --- a/pkg/tcpip/transport/tcp/forwarder.go +++ b/pkg/tcpip/transport/tcp/forwarder.go @@ -18,7 +18,6 @@ import ( "sync" "gvisor.dev/gvisor/pkg/tcpip" - "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/seqnum" "gvisor.dev/gvisor/pkg/tcpip/stack" @@ -63,8 +62,8 @@ func NewForwarder(s *stack.Stack, rcvWnd, maxInFlight int, handler func(*Forward // // This function is expected to be passed as an argument to the // stack.SetTransportProtocolHandler function. -func (f *Forwarder) HandlePacket(r *stack.Route, id stack.TransportEndpointID, netHeader buffer.View, vv buffer.VectorisedView) bool { - s := newSegment(r, id, vv) +func (f *Forwarder) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt tcpip.PacketBuffer) bool { + s := newSegment(r, id, pkt) defer s.decRef() // We only care about well-formed SYN packets. diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go index db40785d3..c4f1a84bb 100644 --- a/pkg/tcpip/transport/tcp/protocol.go +++ b/pkg/tcpip/transport/tcp/protocol.go @@ -126,8 +126,8 @@ func (*protocol) ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) { // a reset is sent in response to any incoming segment except another reset. In // particular, SYNs addressed to a non-existent connection are rejected by this // means." -func (*protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.TransportEndpointID, netHeader buffer.View, vv buffer.VectorisedView) bool { - s := newSegment(r, id, vv) +func (*protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.TransportEndpointID, pkt tcpip.PacketBuffer) bool { + s := newSegment(r, id, pkt) defer s.decRef() if !s.parse() || !s.csumValid { diff --git a/pkg/tcpip/transport/tcp/segment.go b/pkg/tcpip/transport/tcp/segment.go index c4a89525e..1c10da5ca 100644 --- a/pkg/tcpip/transport/tcp/segment.go +++ b/pkg/tcpip/transport/tcp/segment.go @@ -18,6 +18,7 @@ import ( "sync/atomic" "time" + "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/seqnum" @@ -60,13 +61,13 @@ type segment struct { xmitTime time.Time `state:".(unixTime)"` } -func newSegment(r *stack.Route, id stack.TransportEndpointID, vv buffer.VectorisedView) *segment { +func newSegment(r *stack.Route, id stack.TransportEndpointID, pkt tcpip.PacketBuffer) *segment { s := &segment{ refCnt: 1, id: id, route: r.Clone(), } - s.data = vv.Clone(s.views[:]) + s.data = pkt.Data.Clone(s.views[:]) s.rcvdTime = time.Now() return s } diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go index ef823e4ae..4854e719d 100644 --- a/pkg/tcpip/transport/tcp/testing/context/context.go +++ b/pkg/tcpip/transport/tcp/testing/context/context.go @@ -302,7 +302,9 @@ func (c *Context) SendICMPPacket(typ header.ICMPv4Type, code uint8, p1, p2 []byt copy(icmp[header.ICMPv4PayloadOffset:], p2) // Inject packet. - c.linkEP.Inject(ipv4.ProtocolNumber, buf.ToVectorisedView()) + c.linkEP.InjectInbound(ipv4.ProtocolNumber, tcpip.PacketBuffer{ + Data: buf.ToVectorisedView(), + }) } // BuildSegment builds a TCP segment based on the given Headers and payload. @@ -350,13 +352,17 @@ func (c *Context) BuildSegment(payload []byte, h *Headers) buffer.VectorisedView // SendSegment sends a TCP segment that has already been built and written to a // buffer.VectorisedView. func (c *Context) SendSegment(s buffer.VectorisedView) { - c.linkEP.Inject(ipv4.ProtocolNumber, s) + c.linkEP.InjectInbound(ipv4.ProtocolNumber, tcpip.PacketBuffer{ + Data: s, + }) } // SendPacket builds and sends a TCP segment(with the provided payload & TCP // headers) in an IPv4 packet via the link layer endpoint. func (c *Context) SendPacket(payload []byte, h *Headers) { - c.linkEP.Inject(ipv4.ProtocolNumber, c.BuildSegment(payload, h)) + c.linkEP.InjectInbound(ipv4.ProtocolNumber, tcpip.PacketBuffer{ + Data: c.BuildSegment(payload, h), + }) } // SendAck sends an ACK packet. @@ -518,7 +524,9 @@ func (c *Context) SendV6Packet(payload []byte, h *Headers) { t.SetChecksum(^t.CalculateChecksum(xsum)) // Inject packet. - c.linkEP.Inject(ipv6.ProtocolNumber, buf.ToVectorisedView()) + c.linkEP.InjectInbound(ipv6.ProtocolNumber, tcpip.PacketBuffer{ + Data: buf.ToVectorisedView(), + }) } // CreateConnected creates a connected TCP endpoint. diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 03bd5c8fd..4e11de9db 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -1158,17 +1158,17 @@ func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask { // HandlePacket is called by the stack when new packets arrive to this transport // endpoint. -func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, vv buffer.VectorisedView) { +func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt tcpip.PacketBuffer) { // Get the header then trim it from the view. - hdr := header.UDP(vv.First()) - if int(hdr.Length()) > vv.Size() { + hdr := header.UDP(pkt.Data.First()) + if int(hdr.Length()) > pkt.Data.Size() { // Malformed packet. e.stack.Stats().UDP.MalformedPacketsReceived.Increment() e.stats.ReceiveErrors.MalformedPacketsReceived.Increment() return } - vv.TrimFront(header.UDPMinimumSize) + pkt.Data.TrimFront(header.UDPMinimumSize) e.rcvMu.Lock() e.stack.Stats().UDP.PacketsReceived.Increment() @@ -1192,18 +1192,18 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, vv wasEmpty := e.rcvBufSize == 0 // Push new packet into receive list and increment the buffer size. - pkt := &udpPacket{ + packet := &udpPacket{ senderAddress: tcpip.FullAddress{ NIC: r.NICID(), Addr: id.RemoteAddress, Port: hdr.SourcePort(), }, } - pkt.data = vv - e.rcvList.PushBack(pkt) - e.rcvBufSize += vv.Size() + packet.data = pkt.Data + e.rcvList.PushBack(packet) + e.rcvBufSize += pkt.Data.Size() - pkt.timestamp = e.stack.NowNanoseconds() + packet.timestamp = e.stack.NowNanoseconds() e.rcvMu.Unlock() @@ -1214,7 +1214,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, vv } // HandleControlPacket implements stack.TransportEndpoint.HandleControlPacket. -func (e *endpoint) HandleControlPacket(id stack.TransportEndpointID, typ stack.ControlType, extra uint32, vv buffer.VectorisedView) { +func (e *endpoint) HandleControlPacket(id stack.TransportEndpointID, typ stack.ControlType, extra uint32, pkt tcpip.PacketBuffer) { } // State implements tcpip.Endpoint.State. diff --git a/pkg/tcpip/transport/udp/forwarder.go b/pkg/tcpip/transport/udp/forwarder.go index d399ec722..fc706ede2 100644 --- a/pkg/tcpip/transport/udp/forwarder.go +++ b/pkg/tcpip/transport/udp/forwarder.go @@ -16,7 +16,6 @@ package udp import ( "gvisor.dev/gvisor/pkg/tcpip" - "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/waiter" ) @@ -44,12 +43,12 @@ func NewForwarder(s *stack.Stack, handler func(*ForwarderRequest)) *Forwarder { // // This function is expected to be passed as an argument to the // stack.SetTransportProtocolHandler function. -func (f *Forwarder) HandlePacket(r *stack.Route, id stack.TransportEndpointID, netHeader buffer.View, vv buffer.VectorisedView) bool { +func (f *Forwarder) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt tcpip.PacketBuffer) bool { f.handler(&ForwarderRequest{ stack: f.stack, route: r, id: id, - vv: vv, + pkt: pkt, }) return true @@ -62,7 +61,7 @@ type ForwarderRequest struct { stack *stack.Stack route *stack.Route id stack.TransportEndpointID - vv buffer.VectorisedView + pkt tcpip.PacketBuffer } // ID returns the 4-tuple (src address, src port, dst address, dst port) that @@ -90,7 +89,7 @@ func (r *ForwarderRequest) CreateEndpoint(queue *waiter.Queue) (tcpip.Endpoint, ep.rcvReady = true ep.rcvMu.Unlock() - ep.HandlePacket(r.route, r.id, r.vv) + ep.HandlePacket(r.route, r.id, r.pkt) return ep, nil } diff --git a/pkg/tcpip/transport/udp/protocol.go b/pkg/tcpip/transport/udp/protocol.go index 5c3358a5e..43f11b700 100644 --- a/pkg/tcpip/transport/udp/protocol.go +++ b/pkg/tcpip/transport/udp/protocol.go @@ -66,10 +66,10 @@ func (*protocol) ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) { // HandleUnknownDestinationPacket handles packets targeted at this protocol but // that don't match any existing endpoint. -func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.TransportEndpointID, netHeader buffer.View, vv buffer.VectorisedView) bool { +func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.TransportEndpointID, pkt tcpip.PacketBuffer) bool { // Get the header then trim it from the view. - hdr := header.UDP(vv.First()) - if int(hdr.Length()) > vv.Size() { + hdr := header.UDP(pkt.Data.First()) + if int(hdr.Length()) > pkt.Data.Size() { // Malformed packet. r.Stack().Stats().UDP.MalformedPacketsReceived.Increment() return true @@ -116,20 +116,18 @@ func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.Trans } headerLen := int(r.MaxHeaderLength()) + header.ICMPv4MinimumSize available := int(mtu) - headerLen - payloadLen := len(netHeader) + vv.Size() + payloadLen := len(pkt.NetworkHeader) + pkt.Data.Size() if payloadLen > available { payloadLen = available } - // The buffers used by vv and netHeader may be used elsewhere - // in the system. For example, a raw or packet socket may use - // what UDP considers an unreachable destination. Thus we deep - // copy vv and netHeader to prevent multiple ownership and SR - // errors. - newNetHeader := make(buffer.View, len(netHeader)) - copy(newNetHeader, netHeader) - payload := buffer.NewVectorisedView(len(newNetHeader), []buffer.View{newNetHeader}) - payload.Append(vv.ToView().ToVectorisedView()) + // The buffers used by pkt may be used elsewhere in the system. + // For example, a raw or packet socket may use what UDP + // considers an unreachable destination. Thus we deep copy pkt + // to prevent multiple ownership and SR errors. + newNetHeader := append(buffer.View(nil), pkt.NetworkHeader...) + payload := newNetHeader.ToVectorisedView() + payload.Append(pkt.Data.ToView().ToVectorisedView()) payload.CapLength(payloadLen) hdr := buffer.NewPrependable(headerLen) @@ -158,12 +156,12 @@ func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.Trans } headerLen := int(r.MaxHeaderLength()) + header.ICMPv6DstUnreachableMinimumSize available := int(mtu) - headerLen - payloadLen := len(netHeader) + vv.Size() + payloadLen := len(pkt.NetworkHeader) + pkt.Data.Size() if payloadLen > available { payloadLen = available } - payload := buffer.NewVectorisedView(len(netHeader), []buffer.View{netHeader}) - payload.Append(vv) + payload := buffer.NewVectorisedView(len(pkt.NetworkHeader), []buffer.View{pkt.NetworkHeader}) + payload.Append(pkt.Data) payload.CapLength(payloadLen) hdr := buffer.NewPrependable(headerLen) diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go index b724d788c..30ee9801b 100644 --- a/pkg/tcpip/transport/udp/udp_test.go +++ b/pkg/tcpip/transport/udp/udp_test.go @@ -397,7 +397,8 @@ func (c *testContext) injectPacket(flow testFlow, payload []byte) { func (c *testContext) injectV6Packet(payload []byte, h *header4Tuple, valid bool) { // Allocate a buffer for data and headers. buf := buffer.NewView(header.UDPMinimumSize + header.IPv6MinimumSize + len(payload)) - copy(buf[len(buf)-len(payload):], payload) + payloadStart := len(buf) - len(payload) + copy(buf[payloadStart:], payload) // Initialize the IP header. ip := header.IPv6(buf) @@ -431,7 +432,11 @@ func (c *testContext) injectV6Packet(payload []byte, h *header4Tuple, valid bool u.SetChecksum(^u.CalculateChecksum(xsum)) // Inject packet. - c.linkEP.Inject(ipv6.ProtocolNumber, buf.ToVectorisedView()) + c.linkEP.InjectInbound(ipv6.ProtocolNumber, tcpip.PacketBuffer{ + Data: buf.ToVectorisedView(), + NetworkHeader: buffer.View(ip), + TransportHeader: buffer.View(u), + }) } // injectV4Packet creates a V4 test packet with the given payload and header @@ -441,7 +446,8 @@ func (c *testContext) injectV6Packet(payload []byte, h *header4Tuple, valid bool func (c *testContext) injectV4Packet(payload []byte, h *header4Tuple, valid bool) { // Allocate a buffer for data and headers. buf := buffer.NewView(header.UDPMinimumSize + header.IPv4MinimumSize + len(payload)) - copy(buf[len(buf)-len(payload):], payload) + payloadStart := len(buf) - len(payload) + copy(buf[payloadStart:], payload) // Initialize the IP header. ip := header.IPv4(buf) @@ -471,7 +477,12 @@ func (c *testContext) injectV4Packet(payload []byte, h *header4Tuple, valid bool u.SetChecksum(^u.CalculateChecksum(xsum)) // Inject packet. - c.linkEP.Inject(ipv4.ProtocolNumber, buf.ToVectorisedView()) + + c.linkEP.InjectInbound(ipv4.ProtocolNumber, tcpip.PacketBuffer{ + Data: buf.ToVectorisedView(), + NetworkHeader: buffer.View(ip), + TransportHeader: buffer.View(u), + }) } func newPayload() []byte { diff --git a/test/syscalls/linux/raw_socket_icmp.cc b/test/syscalls/linux/raw_socket_icmp.cc index 8bcaba6f1..3de898df7 100644 --- a/test/syscalls/linux/raw_socket_icmp.cc +++ b/test/syscalls/linux/raw_socket_icmp.cc @@ -129,7 +129,7 @@ TEST_F(RawSocketICMPTest, SendAndReceiveBadChecksum) { EXPECT_THAT(RetryEINTR(recv)(s_, recv_buf, sizeof(recv_buf), MSG_DONTWAIT), SyscallFailsWithErrno(EAGAIN)); } -// + // Send and receive an ICMP packet. TEST_F(RawSocketICMPTest, SendAndReceive) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); -- cgit v1.2.3 From 3552691137284525a33d3de7e3c2d170da66c8ac Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Wed, 6 Nov 2019 22:28:41 -0800 Subject: Fix data race in syscall_test_runner.go Fixes #1140 PiperOrigin-RevId: 279012793 --- test/syscalls/syscall_test_runner.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/test/syscalls/syscall_test_runner.go b/test/syscalls/syscall_test_runner.go index 856398994..7186a8ddc 100644 --- a/test/syscalls/syscall_test_runner.go +++ b/test/syscalls/syscall_test_runner.go @@ -208,14 +208,15 @@ func runRunsc(tc gtest.TestCase, spec *specs.Spec) error { } log.Warningf("%s: Got signal: %v", name, s) done := make(chan bool) - go func() { - dArgs := append(args, "-alsologtostderr=true", "debug", "--stacks", id) + dArgs := append([]string{}, args...) + dArgs = append(dArgs, "-alsologtostderr=true", "debug", "--stacks", id) + go func(dArgs []string) { cmd := exec.Command(*runscPath, dArgs...) cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr cmd.Run() done <- true - }() + }(dArgs) timeout := time.After(3 * time.Second) select { @@ -225,7 +226,7 @@ func runRunsc(tc gtest.TestCase, spec *specs.Spec) error { } log.Warningf("Send SIGTERM to the sandbox process") - dArgs := append(args, "debug", + dArgs = append(args, "debug", fmt.Sprintf("--signal=%d", syscall.SIGTERM), id) cmd = exec.Command(*runscPath, dArgs...) -- cgit v1.2.3 From 2326224a9652201938df2881be055ab352672587 Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Wed, 6 Nov 2019 23:50:54 -0800 Subject: Fix yet another data race. Fixes #1140 PiperOrigin-RevId: 279020846 --- test/syscalls/syscall_test_runner.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/syscalls/syscall_test_runner.go b/test/syscalls/syscall_test_runner.go index 7186a8ddc..accf46347 100644 --- a/test/syscalls/syscall_test_runner.go +++ b/test/syscalls/syscall_test_runner.go @@ -229,7 +229,7 @@ func runRunsc(tc gtest.TestCase, spec *specs.Spec) error { dArgs = append(args, "debug", fmt.Sprintf("--signal=%d", syscall.SIGTERM), id) - cmd = exec.Command(*runscPath, dArgs...) + cmd := exec.Command(*runscPath, dArgs...) cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr cmd.Run() -- cgit v1.2.3 From 66ebb6575f929a389d3c929977ed5e31d706fcfe Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Thu, 7 Nov 2019 09:45:26 -0800 Subject: Add support for TIME_WAIT timeout. This change adds explicit support for honoring the 2MSL timeout for sockets in TIME_WAIT state. It also adds support for the TCP_LINGER2 option that allows modification of the FIN_WAIT2 state timeout duration for a given socket. It also adds an option to modify the Stack wide TIME_WAIT timeout but this is only for testing. On Linux this is fixed at 60s. Further, we also now correctly process RST's in CLOSE_WAIT and close the socket similar to linux without moving it to error state. We also now handle SYN in ESTABLISHED state as per RFC5961#section-4.1. Earlier we would just drop these SYNs. Which can result in some tests that pass on linux to fail on gVisor. Netstack now honors TIME_WAIT correctly as well as handles the following cases correctly. - TCP RSTs in TIME_WAIT are ignored. - A duplicate TCP FIN during TIME_WAIT extends the TIME_WAIT and a dup ACK is sent in response to the FIN as the dup FIN indicates potential loss of the original final ACK. - An out of order segment during TIME_WAIT generates a dup ACK. - A new SYN w/ a sequence number > the highest sequence number in the previous connection closes the TIME_WAIT early and opens a new connection. Further to make the SYN case work correctly the ISN (Initial Sequence Number) generation for Netstack has been updated to be as per RFC. Its not a pure random number anymore and follows the recommendation in https://tools.ietf.org/html/rfc6528#page-3. The current hash used is not a cryptographically secure hash function. A separate change will update the hash function used to Siphash similar to what is used in Linux. PiperOrigin-RevId: 279106406 --- pkg/sentry/socket/netstack/netstack.go | 20 + pkg/tcpip/adapters/gonet/gonet_test.go | 12 +- pkg/tcpip/stack/stack.go | 20 +- pkg/tcpip/stack/transport_demuxer.go | 33 +- pkg/tcpip/tcpip.go | 12 +- pkg/tcpip/transport/tcp/BUILD | 2 +- pkg/tcpip/transport/tcp/accept.go | 17 +- pkg/tcpip/transport/tcp/connect.go | 322 ++++++++++++-- pkg/tcpip/transport/tcp/endpoint.go | 101 ++++- pkg/tcpip/transport/tcp/endpoint_state.go | 26 +- pkg/tcpip/transport/tcp/protocol.go | 43 ++ pkg/tcpip/transport/tcp/rcv.go | 167 ++++++- pkg/tcpip/transport/tcp/tcp_test.go | 622 ++++++++++++++++++++++++++- test/syscalls/BUILD | 22 +- test/syscalls/linux/BUILD | 1 + test/syscalls/linux/socket_inet_loopback.cc | 336 +++++++++++++++ test/syscalls/linux/socket_ip_tcp_generic.cc | 93 ++++ 17 files changed, 1736 insertions(+), 113 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 27c6692c4..d92399efd 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -1173,6 +1173,18 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa copy(b, v) return b, nil + case linux.TCP_LINGER2: + if outLen < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + + var v tcpip.TCPLingerTimeoutOption + if err := ep.GetSockOpt(&v); err != nil { + return nil, syserr.TranslateNetstackError(err) + } + + return int32(time.Duration(v) / time.Second), nil + default: emitUnimplementedEventTCP(t, name) } @@ -1556,6 +1568,14 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) * } return nil + case linux.TCP_LINGER2: + if len(optVal) < sizeOfInt32 { + return syserr.ErrInvalidArgument + } + + v := usermem.ByteOrder.Uint32(optVal) + return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.TCPLingerTimeoutOption(time.Second * time.Duration(v)))) + case linux.TCP_REPAIR_OPTIONS: t.Kernel().EmitUnimplementedEvent(t) diff --git a/pkg/tcpip/adapters/gonet/gonet_test.go b/pkg/tcpip/adapters/gonet/gonet_test.go index 8ced960bb..ee077ae83 100644 --- a/pkg/tcpip/adapters/gonet/gonet_test.go +++ b/pkg/tcpip/adapters/gonet/gonet_test.go @@ -151,10 +151,8 @@ func TestCloseReader(t *testing.T) { buf := make([]byte, 256) n, err := c.Read(buf) - got, ok := err.(*net.OpError) - want := tcpip.ErrConnectionAborted - if n != 0 || !ok || got.Err.Error() != want.String() { - t.Errorf("c.Read() = (%d, %v), want (0, OpError(%v))", n, err, want) + if n != 0 || err != io.EOF { + t.Errorf("c.Read() = (%d, %v), want (0, EOF)", n, err) } }() sender, err := connect(s, addr) @@ -203,10 +201,8 @@ func TestCloseReaderWithForwarder(t *testing.T) { buf := make([]byte, 256) n, e := c.Read(buf) - got, ok := e.(*net.OpError) - want := tcpip.ErrConnectionAborted - if n != 0 || !ok || got.Err.Error() != want.String() { - t.Errorf("c.Read() = (%d, %v), want (0, OpError(%v))", n, e, want) + if n != 0 || e != io.EOF { + t.Errorf("c.Read() = (%d, %v), want (0, EOF)", n, e) } }) s.SetTransportProtocolHandler(tcp.ProtocolNumber, fwd.HandlePacket) diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index 99809df75..2f8d8e822 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -402,11 +402,11 @@ type Stack struct { // by the stack. icmpRateLimiter *ICMPRateLimiter - // portSeed is a one-time random value initialized at stack startup + // seed is a one-time random value initialized at stack startup // and is used to seed the TCP port picking on active connections // // TODO(gvisor.dev/issue/940): S/R this field. - portSeed uint32 + seed uint32 // ndpConfigs is the default NDP configurations used by interfaces. ndpConfigs NDPConfigurations @@ -544,7 +544,7 @@ func New(opts Options) *Stack { stats: opts.Stats.FillIn(), handleLocal: opts.HandleLocal, icmpRateLimiter: NewICMPRateLimiter(), - portSeed: generateRandUint32(), + seed: generateRandUint32(), ndpConfigs: opts.NDPConfigs, autoGenIPv6LinkLocal: opts.AutoGenIPv6LinkLocal, uniqueIDGenerator: opts.UniqueID, @@ -1186,6 +1186,12 @@ func (s *Stack) CompleteTransportEndpointCleanup(ep TransportEndpoint) { s.mu.Unlock() } +// FindTransportEndpoint finds an endpoint that most closely matches the provided +// id. If no endpoint is found it returns nil. +func (s *Stack) FindTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, id TransportEndpointID, r *Route) TransportEndpoint { + return s.demux.findTransportEndpoint(netProto, transProto, id, r) +} + // RegisterRawTransportEndpoint registers the given endpoint with the stack // transport dispatcher. Received packets that match the provided transport // protocol will be delivered to the given endpoint. @@ -1573,12 +1579,12 @@ func (s *Stack) HandleNDPRA(id tcpip.NICID, ip tcpip.Address, ra header.NDPRoute return nil } -// PortSeed returns a 32 bit value that can be used as a seed value for port -// picking. +// Seed returns a 32 bit value that can be used as a seed value for port +// picking, ISN generation etc. // // NOTE: The seed is generated once during stack initialization only. -func (s *Stack) PortSeed() uint32 { - return s.portSeed +func (s *Stack) Seed() uint32 { + return s.seed } func generateRandUint32() uint32 { diff --git a/pkg/tcpip/stack/transport_demuxer.go b/pkg/tcpip/stack/transport_demuxer.go index 594570216..cb805522b 100644 --- a/pkg/tcpip/stack/transport_demuxer.go +++ b/pkg/tcpip/stack/transport_demuxer.go @@ -103,7 +103,6 @@ func (epsByNic *endpointsByNic) handlePacket(r *Route, id TransportEndpointID, p epsByNic.mu.RUnlock() // Don't use defer for performance reasons. return } - // multiPortEndpoints are guaranteed to have at least one element. selectEndpoint(id, mpep, epsByNic.seed).HandlePacket(r, id, pkt) epsByNic.mu.RUnlock() // Don't use defer for performance reasons. @@ -507,10 +506,40 @@ func (d *transportDemuxer) findAllEndpointsLocked(eps *transportEndpoints, id Tr if ep, ok := eps.endpoints[nid]; ok { matchedEPs = append(matchedEPs, ep) } - return matchedEPs } +// findTransportEndpoint find a single endpoint that most closely matches the provided id. +func (d *transportDemuxer) findTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, id TransportEndpointID, r *Route) TransportEndpoint { + eps, ok := d.protocol[protocolIDs{netProto, transProto}] + if !ok { + return nil + } + // Try to find the endpoint. + eps.mu.RLock() + epsByNic := d.findEndpointLocked(eps, id) + // Fail if we didn't find one. + if epsByNic == nil { + eps.mu.RUnlock() + return nil + } + + epsByNic.mu.RLock() + eps.mu.RUnlock() + + mpep, ok := epsByNic.endpoints[r.ref.nic.ID()] + if !ok { + if mpep, ok = epsByNic.endpoints[0]; !ok { + epsByNic.mu.RUnlock() // Don't use defer for performance reasons. + return nil + } + } + + ep := selectEndpoint(id, mpep, epsByNic.seed) + epsByNic.mu.RUnlock() + return ep +} + // findEndpointLocked returns the endpoint that most closely matches the given // id. func (d *transportDemuxer) findEndpointLocked(eps *transportEndpoints, id TransportEndpointID) *endpointsByNic { diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 3edb513d4..bd5eb89ca 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -586,6 +586,16 @@ type MaxSegOption int // A zero value indicates the default. type TTLOption uint8 +// TCPLingerTimeoutOption is used by SetSockOpt/GetSockOpt to set/get the +// maximum duration for which a socket lingers in the TCP_FIN_WAIT_2 state +// before being marked closed. +type TCPLingerTimeoutOption time.Duration + +// TCPTimeWaitTimeoutOption is used by SetSockOpt/GetSockOpt to set/get the +// maximum duration for which a socket lingers in the TIME_WAIT state +// before being marked closed. +type TCPTimeWaitTimeoutOption time.Duration + // MulticastTTLOption is used by SetSockOpt/GetSockOpt to control the default // TTL value for multicast messages. The default is 1. type MulticastTTLOption uint8 @@ -1329,8 +1339,8 @@ var ( // GetDanglingEndpoints returns all dangling endpoints. func GetDanglingEndpoints() []Endpoint { - es := make([]Endpoint, 0, len(danglingEndpoints)) danglingEndpointsMu.Lock() + es := make([]Endpoint, 0, len(danglingEndpoints)) for e := range danglingEndpoints { es = append(es, e) } diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD index f1dbc6f91..3f47b328d 100644 --- a/pkg/tcpip/transport/tcp/BUILD +++ b/pkg/tcpip/transport/tcp/BUILD @@ -71,7 +71,7 @@ filegroup( go_test( name = "tcp_test", - size = "small", + size = "medium", srcs = [ "dual_stack_test.go", "sack_scoreboard_test.go", diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go index cb0e13ebc..0e8e0a2b4 100644 --- a/pkg/tcpip/transport/tcp/accept.go +++ b/pkg/tcpip/transport/tcp/accept.go @@ -269,8 +269,8 @@ func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, i func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *header.TCPSynOptions) (*endpoint, *tcpip.Error) { // Create new endpoint. irs := s.sequenceNumber - cookie := l.createCookie(s.id, irs, encodeMSS(opts.MSS)) - ep, err := l.createConnectingEndpoint(s, cookie, irs, opts) + isn := generateSecureISN(s.id, l.stack.Seed()) + ep, err := l.createConnectingEndpoint(s, isn, irs, opts) if err != nil { return nil, err } @@ -289,7 +289,7 @@ func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *head // Perform the 3-way handshake. h := newHandshake(ep, seqnum.Size(ep.initialReceiveWindow())) - h.resetToSynRcvd(cookie, irs, opts) + h.resetToSynRcvd(isn, irs, opts) if err := h.execute(); err != nil { ep.Close() if l.listenEP != nil { @@ -361,6 +361,7 @@ func (e *endpoint) handleSynSegment(ctx *listenContext, s *segment, opts *header defer decSynRcvdCount() defer e.decSynRcvdCount() defer s.decRef() + n, err := ctx.createEndpointAndPerformHandshake(s, opts) if err != nil { e.stack.Stats().TCP.FailedConnectionAttempts.Increment() @@ -368,6 +369,11 @@ func (e *endpoint) handleSynSegment(ctx *listenContext, s *segment, opts *header return } ctx.removePendingEndpoint(n) + // Start the protocol goroutine. + wq := &waiter.Queue{} + n.startAcceptedLoop(wq) + e.stack.Stats().TCP.PassiveConnectionOpenings.Increment() + e.deliverAccepted(n) } @@ -543,6 +549,11 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) { // number of goroutines as we do check before // entering here that there was at least some // space available in the backlog. + + // Start the protocol goroutine. + wq := &waiter.Queue{} + n.startAcceptedLoop(wq) + e.stack.Stats().TCP.PassiveConnectionOpenings.Increment() go e.deliverAccepted(n) } } diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index ca982c451..a114c06c1 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -15,6 +15,7 @@ package tcp import ( + "encoding/binary" "sync" "time" @@ -22,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/sleep" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/hash/jenkins" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/seqnum" "gvisor.dev/gvisor/pkg/tcpip/stack" @@ -139,7 +141,32 @@ func (h *handshake) resetState() { h.flags = header.TCPFlagSyn h.ackNum = 0 h.mss = 0 - h.iss = seqnum.Value(uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24) + h.iss = generateSecureISN(h.ep.ID, h.ep.stack.Seed()) +} + +// generateSecureISN generates a secure Initial Sequence number based on the +// recommendation here https://tools.ietf.org/html/rfc6528#page-3. +func generateSecureISN(id stack.TransportEndpointID, seed uint32) seqnum.Value { + isnHasher := jenkins.Sum32(seed) + isnHasher.Write([]byte(id.LocalAddress)) + isnHasher.Write([]byte(id.RemoteAddress)) + portBuf := make([]byte, 2) + binary.LittleEndian.PutUint16(portBuf, id.LocalPort) + isnHasher.Write(portBuf) + binary.LittleEndian.PutUint16(portBuf, id.RemotePort) + isnHasher.Write(portBuf) + // The time period here is 64ns. This is similar to what linux uses + // generate a sequence number that overlaps less than one + // time per MSL (2 minutes). + // + // A 64ns clock ticks 10^9/64 = 15625000) times in a second. + // To wrap the whole 32 bit space would require + // 2^32/1562500 ~ 274 seconds. + // + // Which sort of guarantees that we won't reuse the ISN for a new + // connection for the same tuple for at least 274s. + isn := isnHasher.Sum32() + uint32(time.Now().UnixNano()>>6) + return seqnum.Value(isn) } // effectiveRcvWndScale returns the effective receive window scale to be used. @@ -809,7 +836,19 @@ func (e *endpoint) resetConnectionLocked(err *tcpip.Error) { e.state = StateError e.HardError = err if err != tcpip.ErrConnectionReset { - e.sendRaw(buffer.VectorisedView{}, header.TCPFlagAck|header.TCPFlagRst, e.snd.sndUna, e.rcv.rcvNxt, 0) + // The exact sequence number to be used for the RST is the same as the + // one used by Linux. We need to handle the case of window being shrunk + // which can cause sndNxt to be outside the acceptable window on the + // receiver. + // + // See: https://www.snellman.net/blog/archive/2016-02-01-tcp-rst/ for more + // information. + sndWndEnd := e.snd.sndUna.Add(e.snd.sndWnd) + resetSeqNum := sndWndEnd + if !sndWndEnd.LessThan(e.snd.sndNxt) || e.snd.sndNxt.Size(sndWndEnd) < (1< + // + // After sending the acknowledgment, TCP MUST drop the unacceptable + // segment and stop processing further. + // + // By sending an ACK, the remote peer is challenged to confirm the loss + // of the previous connection and the request to start a new connection. + // A legitimate peer, after restart, would not have a TCB in the + // synchronized state. Thus, when the ACK arrives, the peer should send + // a RST segment back with the sequence number derived from the ACK + // field that caused the RST. + + // This RST will confirm that the remote peer has indeed closed the + // previous connection. Upon receipt of a valid RST, the local TCP + // endpoint MUST terminate its connection. The local TCP endpoint + // should then rely on SYN retransmission from the remote end to + // re-establish the connection. + + e.snd.sendAck() } else if s.flagIsSet(header.TCPFlagAck) { // Patch the window size in the segment according to the // send window scale. @@ -856,7 +960,15 @@ func (e *endpoint) handleSegments() *tcpip.Error { // RFC 793, page 41 states that "once in the ESTABLISHED // state all segments must carry current acknowledgment // information." - e.rcv.handleRcvdSegment(s) + drop, err := e.rcv.handleRcvdSegment(s) + if err != nil { + s.decRef() + return err + } + if drop { + s.decRef() + continue + } e.snd.handleRcvdSegment(s) } s.decRef() @@ -955,7 +1067,6 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { } e.mu.Unlock() - // When the protocol loop exits we should wake up our waiters. e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut) } @@ -1001,6 +1112,10 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { // RTT itself. e.rcvAutoParams.prevCopied = initialRcvWnd e.rcvListMu.Unlock() + e.stack.Stats().TCP.CurrentEstablished.Increment() + e.mu.Lock() + e.state = StateEstablished + e.mu.Unlock() } e.keepalive.timer.init(&e.keepalive.waker) @@ -1008,10 +1123,6 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { // Tell waiters that the endpoint is connected and writable. e.mu.Lock() - if e.state != StateEstablished { - e.stack.Stats().TCP.CurrentEstablished.Increment() - e.state = StateEstablished - } drained := e.drainDone != nil e.mu.Unlock() if drained { @@ -1042,7 +1153,13 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { { w: &closeWaker, f: func() *tcpip.Error { - return tcpip.ErrConnectionAborted + // This means the socket is being closed due + // to the TCP_FIN_WAIT2 timeout was hit. Just + // mark the socket as closed. + e.mu.Lock() + e.state = StateClose + e.mu.Unlock() + return nil }, }, { @@ -1085,17 +1202,18 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { e.resetConnectionLocked(tcpip.ErrConnectionAborted) e.mu.Unlock() } + if n¬ifyClose != 0 && closeTimer == nil { - // Reset the connection 3 seconds after - // the endpoint has been closed. - // - // The timer could fire in background - // when the endpoint is drained. That's - // OK as the loop here will not honor - // the firing until the undrain arrives. - closeTimer = time.AfterFunc(3*time.Second, func() { - closeWaker.Assert() - }) + e.mu.Lock() + if e.state == StateFinWait2 && e.closed { + // The socket has been closed and we are in FIN_WAIT2 + // so start the FIN_WAIT2 timer. + closeTimer = time.AfterFunc(e.tcpLingerTimeout, func() { + closeWaker.Assert() + }) + e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut) + } + e.mu.Unlock() } if n¬ifyKeepaliveChanged != 0 { @@ -1117,6 +1235,12 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { } } + if n¬ifyTickleWorker != 0 { + // Just a tickle notification. No need to do + // anything. + return nil + } + return nil }, }, @@ -1143,15 +1267,16 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { } e.rcvListMu.Unlock() - e.mu.RLock() + e.mu.Lock() if e.workerCleanup { e.notifyProtocolGoroutine(notifyClose) } - e.mu.RUnlock() // Main loop. Handle segments until both send and receive ends of the // connection have completed. - for !e.rcv.closed || !e.snd.closed || e.snd.sndUna != e.snd.sndNxtList { + + for e.state != StateTimeWait && e.state != StateClose && e.state != StateError { + e.mu.Unlock() e.workMu.Unlock() v, _ := s.Fetch(true) e.workMu.Lock() @@ -1167,6 +1292,23 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { return nil } + e.mu.Lock() + } + + state := e.state + e.mu.Unlock() + var reuseTW func() + if state == StateTimeWait { + // Disable close timer as we now entering real TIME_WAIT. + if closeTimer != nil { + closeTimer.Stop() + } + // Mark the current sleeper done so as to free all associated + // wakers. + s.Done() + // Wake up any waiters before we enter TIME_WAIT. + e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut) + reuseTW = e.doTimeWait() } // Mark endpoint as closed. @@ -1176,8 +1318,130 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { e.stack.Stats().TCP.CurrentEstablished.Decrement() e.state = StateClose } + // Lock released below. epilogue() + // A new SYN was received during TIME_WAIT and we need to abort + // the timewait and redirect the segment to the listener queue + if reuseTW != nil { + reuseTW() + } + return nil } + +// handleTimeWaitSegments processes segments received during TIME_WAIT +// state. +func (e *endpoint) handleTimeWaitSegments() (extendTimeWait bool, reuseTW func()) { + checkRequeue := true + for i := 0; i < maxSegmentsPerWake; i++ { + s := e.segmentQueue.dequeue() + if s == nil { + checkRequeue = false + break + } + extTW, newSyn := e.rcv.handleTimeWaitSegment(s) + if newSyn { + info := e.EndpointInfo.TransportEndpointInfo + newID := info.ID + newID.RemoteAddress = "" + newID.RemotePort = 0 + netProtos := []tcpip.NetworkProtocolNumber{info.NetProto} + // If the local address is an IPv4 address then also + // look for IPv6 dual stack endpoints that might be + // listening on the local address. + if newID.LocalAddress.To4() != "" { + netProtos = []tcpip.NetworkProtocolNumber{header.IPv4ProtocolNumber, header.IPv6ProtocolNumber} + } + for _, netProto := range netProtos { + if listenEP := e.stack.FindTransportEndpoint(netProto, info.TransProto, newID, &s.route); listenEP != nil { + tcpEP := listenEP.(*endpoint) + if EndpointState(tcpEP.State()) == StateListen { + reuseTW = func() { + tcpEP.enqueueSegment(s) + } + // We explicitly do not decRef + // the segment as it's still + // valid and being reflected to + // a listening endpoint. + return false, reuseTW + } + } + } + } + if extTW { + extendTimeWait = true + } + s.decRef() + } + if checkRequeue && !e.segmentQueue.empty() { + e.newSegmentWaker.Assert() + } + return extendTimeWait, nil +} + +// doTimeWait is responsible for handling the TCP behaviour once a socket +// enters the TIME_WAIT state. Optionally it can return a closure that +// should be executed after releasing the endpoint registrations. This is +// done in cases where a new SYN is received during TIME_WAIT that carries +// a sequence number larger than one see on the connection. +func (e *endpoint) doTimeWait() (twReuse func()) { + // Trigger a 2 * MSL time wait state. During this period + // we will drop all incoming segments. + // NOTE: On Linux this is not configurable and is fixed at 60 seconds. + timeWaitDuration := DefaultTCPTimeWaitTimeout + + // Get the stack wide configuration. + var tcpTW tcpip.TCPTimeWaitTimeoutOption + if err := e.stack.TransportProtocolOption(ProtocolNumber, &tcpTW); err == nil { + timeWaitDuration = time.Duration(tcpTW) + } + + const newSegment = 1 + const notification = 2 + const timeWaitDone = 3 + + s := sleep.Sleeper{} + s.AddWaker(&e.newSegmentWaker, newSegment) + s.AddWaker(&e.notificationWaker, notification) + + var timeWaitWaker sleep.Waker + s.AddWaker(&timeWaitWaker, timeWaitDone) + timeWaitTimer := time.AfterFunc(timeWaitDuration, timeWaitWaker.Assert) + defer timeWaitTimer.Stop() + + for { + e.workMu.Unlock() + v, _ := s.Fetch(true) + e.workMu.Lock() + switch v { + case newSegment: + extendTimeWait, reuseTW := e.handleTimeWaitSegments() + if reuseTW != nil { + return reuseTW + } + if extendTimeWait { + timeWaitTimer.Reset(timeWaitDuration) + } + case notification: + n := e.fetchNotifications() + if n¬ifyClose != 0 { + return nil + } + if n¬ifyDrain != 0 { + for !e.segmentQueue.empty() { + // Ignore extending TIME_WAIT during a + // save. For sockets in TIME_WAIT we just + // terminate the TIME_WAIT early. + e.handleTimeWaitSegments() + } + close(e.drainDone) + <-e.undrain + return nil + } + case timeWaitDone: + return nil + } + } +} diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 79fec6b77..04c92c04c 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -121,6 +121,11 @@ const ( notifyReset notifyKeepaliveChanged notifyMSSChanged + // notifyTickleWorker is used to tickle the protocol main loop during a + // restore after we update the endpoint state to the correct one. This + // ensures the loop terminates if the final state of the endpoint is + // say TIME_WAIT. + notifyTickleWorker ) // SACKInfo holds TCP SACK related information for a given endpoint. @@ -320,6 +325,11 @@ type endpoint struct { state EndpointState `state:".(EndpointState)"` + // origEndpointState is only used during a restore phase to save the + // endpoint state at restore time as the socket is moved to it's correct + // state. + origEndpointState EndpointState `state:"nosave"` + isPortReserved bool `state:"manual"` isRegistered bool boundNICID tcpip.NICID `state:"manual"` @@ -503,6 +513,16 @@ type endpoint struct { // TODO(b/142022063): Add ability to save and restore per endpoint stats. stats Stats `state:"nosave"` + + // tcpLingerTimeout is the maximum amount of a time a socket + // a socket stays in TIME_WAIT state before being marked + // closed. + tcpLingerTimeout time.Duration + + // closed indicates that the user has called closed on the + // endpoint and at this point the endpoint is only around + // to complete the TCP shutdown. + closed bool } // UniqueID implements stack.TransportEndpoint.UniqueID. @@ -599,6 +619,11 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue e.SetSockOptInt(tcpip.DelayOption, 1) } + var tcpLT tcpip.TCPLingerTimeoutOption + if err := s.TransportProtocolOption(ProtocolNumber, &tcpLT); err == nil { + e.tcpLingerTimeout = time.Duration(tcpLT) + } + if p := s.GetTCPProbe(); p != nil { e.probe = p } @@ -686,6 +711,13 @@ func (e *endpoint) notifyProtocolGoroutine(n uint32) { // with it. It must be called only once and with no other concurrent calls to // the endpoint. func (e *endpoint) Close() { + e.mu.Lock() + closed := e.closed + e.mu.Unlock() + if closed { + return + } + // Issue a shutdown so that the peer knows we won't send any more data // if we're connected, or stop accepting if we're listening. e.Shutdown(tcpip.ShutdownWrite | tcpip.ShutdownRead) @@ -706,6 +738,8 @@ func (e *endpoint) Close() { e.isPortReserved = false } + // Mark endpoint as closed. + e.closed = true // Either perform the local cleanup or kick the worker to make sure it // knows it needs to cleanup. tcpip.AddDanglingEndpoint(e) @@ -731,9 +765,7 @@ func (e *endpoint) closePendingAcceptableConnectionsLocked() { go func() { defer close(done) for n := range e.acceptedChan { - n.mu.Lock() - n.resetConnectionLocked(tcpip.ErrConnectionAborted) - n.mu.Unlock() + n.notifyProtocolGoroutine(notifyReset) n.Close() } }() @@ -1349,6 +1381,28 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { e.mu.Unlock() return nil + case tcpip.TCPLingerTimeoutOption: + e.mu.Lock() + if v < 0 { + // Same as effectively disabling TCPLinger timeout. + v = 0 + } + var stkTCPLingerTimeout tcpip.TCPLingerTimeoutOption + if err := e.stack.TransportProtocolOption(header.TCPProtocolNumber, &stkTCPLingerTimeout); err != nil { + // We were unable to retrieve a stack config, just use + // the DefaultTCPLingerTimeout. + if v > tcpip.TCPLingerTimeoutOption(DefaultTCPLingerTimeout) { + stkTCPLingerTimeout = tcpip.TCPLingerTimeoutOption(DefaultTCPLingerTimeout) + } + } + // Cap it to the stack wide TCPLinger timeout. + if v > stkTCPLingerTimeout { + v = stkTCPLingerTimeout + } + e.tcpLingerTimeout = time.Duration(v) + e.mu.Unlock() + return nil + default: return nil } @@ -1562,6 +1616,12 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { e.mu.RUnlock() return nil + case *tcpip.TCPLingerTimeoutOption: + e.mu.Lock() + *o = tcpip.TCPLingerTimeoutOption(e.tcpLingerTimeout) + e.mu.Unlock() + return nil + default: return tcpip.ErrUnknownProtocolOption } @@ -1696,7 +1756,7 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc // src IP to ensure that for a given tuple (srcIP, destIP, // destPort) the offset used as a starting point is the same to // ensure that we can cycle through the port space effectively. - h := jenkins.Sum32(e.stack.PortSeed()) + h := jenkins.Sum32(e.stack.Seed()) h.Write([]byte(e.ID.LocalAddress)) h.Write([]byte(e.ID.RemoteAddress)) portBuf := make([]byte, 2) @@ -1782,9 +1842,8 @@ func (*endpoint) ConnectEndpoint(tcpip.Endpoint) *tcpip.Error { // peer. func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error { e.mu.Lock() - defer e.mu.Unlock() e.shutdownFlags |= flags - + finQueued := false switch { case e.state.connected(): // Close for read. @@ -1799,6 +1858,7 @@ func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error { // the connection with a RST. if (e.shutdownFlags&tcpip.ShutdownWrite) != 0 && rcvBufUsed > 0 { e.notifyProtocolGoroutine(notifyReset) + e.mu.Unlock() return nil } } @@ -1817,14 +1877,11 @@ func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error { s := newSegmentFromView(&e.route, e.ID, nil) e.sndQueue.PushBack(s) e.sndBufInQueue++ - + finQueued = true // Mark endpoint as closed. e.sndClosed = true e.sndBufMu.Unlock() - - // Tell protocol goroutine to close. - e.sndCloseWaker.Assert() } case e.state == StateListen: @@ -1832,11 +1889,20 @@ func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error { if flags&tcpip.ShutdownRead != 0 { e.notifyProtocolGoroutine(notifyClose) } - default: + e.mu.Unlock() return tcpip.ErrNotConnected } - + e.mu.Unlock() + if finQueued { + if e.workMu.TryLock() { + e.handleClose() + e.workMu.Unlock() + } else { + // Tell protocol goroutine to close. + e.sndCloseWaker.Assert() + } + } return nil } @@ -1928,12 +1994,7 @@ func (e *endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) { return nil, nil, tcpip.ErrWouldBlock } - // Start the protocol goroutine. - wq := &waiter.Queue{} - n.startAcceptedLoop(wq) - e.stack.Stats().TCP.PassiveConnectionOpenings.Increment() - - return n, wq, nil + return n, n.waiterQueue, nil } // Bind binds the endpoint to a specific local port and optionally address. @@ -2058,6 +2119,10 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk e.stack.Stats().TCP.ResetsReceived.Increment() } + e.enqueueSegment(s) +} + +func (e *endpoint) enqueueSegment(s *segment) { // Send packet to worker goroutine. if e.segmentQueue.enqueue(s) { e.newSegmentWaker.Assert() diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go index 19f003b6b..7aa4c3f0e 100644 --- a/pkg/tcpip/transport/tcp/endpoint_state.go +++ b/pkg/tcpip/transport/tcp/endpoint_state.go @@ -78,7 +78,7 @@ func (e *endpoint) beforeSave() { } fallthrough case StateError, StateClose: - for e.state == StateError && e.workerRunning { + for (e.state == StateError || e.state == StateClose) && e.workerRunning { e.mu.Unlock() time.Sleep(100 * time.Millisecond) e.mu.Lock() @@ -165,6 +165,12 @@ func (e *endpoint) loadState(state EndpointState) { // afterLoad is invoked by stateify. func (e *endpoint) afterLoad() { + // Freeze segment queue before registering to prevent any segments + // from being delivered while it is being restored. + e.origEndpointState = e.state + // Restore the endpoint to InitialState as it will be moved to + // its origEndpointState during Resume. + e.state = StateInitial stack.StackFromEnv.RegisterRestoredEndpoint(e) } @@ -173,8 +179,8 @@ func (e *endpoint) Resume(s *stack.Stack) { e.stack = s e.segmentQueue.setLimit(MaxUnprocessedSegments) e.workMu.Init() + state := e.origEndpointState - state := e.state switch state { case StateInitial, StateBound, StateListen, StateConnecting, StateEstablished: var ss SendBufferSizeOption @@ -189,7 +195,6 @@ func (e *endpoint) Resume(s *stack.Stack) { } bind := func() { - e.state = StateInitial if len(e.BindAddr) == 0 { e.BindAddr = e.ID.LocalAddress } @@ -219,6 +224,16 @@ func (e *endpoint) Resume(s *stack.Stack) { if err := e.connect(tcpip.FullAddress{NIC: e.boundNICID, Addr: e.connectingAddress, Port: e.ID.RemotePort}, false, e.workerRunning); err != tcpip.ErrConnectStarted { panic("endpoint connecting failed: " + err.String()) } + e.mu.Lock() + e.state = e.origEndpointState + closed := e.closed + e.mu.Unlock() + e.notifyProtocolGoroutine(notifyTickleWorker) + if state == StateFinWait2 && closed { + // If the endpoint has been closed then make sure we notify so + // that the FIN_WAIT2 timer is started after a restore. + e.notifyProtocolGoroutine(notifyClose) + } connectedLoading.Done() case StateListen: tcpip.AsyncLoading.Add(1) @@ -265,8 +280,11 @@ func (e *endpoint) Resume(s *stack.Stack) { tcpip.AsyncLoading.Done() }() } - fallthrough + e.state = StateClose + e.stack.CompleteTransportEndpointCleanup(e) + tcpip.DeleteDanglingEndpoint(e) case StateError: + e.state = StateError e.stack.CompleteTransportEndpointCleanup(e) tcpip.DeleteDanglingEndpoint(e) } diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go index c8e4a0d7e..89b965c23 100644 --- a/pkg/tcpip/transport/tcp/protocol.go +++ b/pkg/tcpip/transport/tcp/protocol.go @@ -23,6 +23,7 @@ package tcp import ( "strings" "sync" + "time" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" @@ -54,6 +55,14 @@ const ( // MaxUnprocessedSegments is the maximum number of unprocessed segments // that can be queued for a given endpoint. MaxUnprocessedSegments = 300 + + // DefaultTCPLingerTimeout is the amount of time that sockets linger in + // FIN_WAIT_2 state before being marked closed. + DefaultTCPLingerTimeout = 60 * time.Second + + // DefaultTCPTimeWaitTimeout is the amount of time that sockets linger + // in TIME_WAIT state before being marked closed. + DefaultTCPTimeWaitTimeout = 60 * time.Second ) // SACKEnabled option can be used to enable SACK support in the TCP @@ -93,6 +102,8 @@ type protocol struct { congestionControl string availableCongestionControl []string moderateReceiveBuffer bool + tcpLingerTimeout time.Duration + tcpTimeWaitTimeout time.Duration } // Number returns the tcp protocol number. @@ -212,6 +223,24 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error { p.mu.Unlock() return nil + case tcpip.TCPLingerTimeoutOption: + if v < 0 { + v = 0 + } + p.mu.Lock() + p.tcpLingerTimeout = time.Duration(v) + p.mu.Unlock() + return nil + + case tcpip.TCPTimeWaitTimeoutOption: + if v < 0 { + v = 0 + } + p.mu.Lock() + p.tcpTimeWaitTimeout = time.Duration(v) + p.mu.Unlock() + return nil + default: return tcpip.ErrUnknownProtocolOption } @@ -262,6 +291,18 @@ func (p *protocol) Option(option interface{}) *tcpip.Error { p.mu.Unlock() return nil + case *tcpip.TCPLingerTimeoutOption: + p.mu.Lock() + *v = tcpip.TCPLingerTimeoutOption(p.tcpLingerTimeout) + p.mu.Unlock() + return nil + + case *tcpip.TCPTimeWaitTimeoutOption: + p.mu.Lock() + *v = tcpip.TCPTimeWaitTimeoutOption(p.tcpTimeWaitTimeout) + p.mu.Unlock() + return nil + default: return tcpip.ErrUnknownProtocolOption } @@ -274,5 +315,7 @@ func NewProtocol() stack.TransportProtocol { recvBufferSize: ReceiveBufferSizeOption{MinBufferSize, DefaultReceiveBufferSize, MaxBufferSize}, congestionControl: ccReno, availableCongestionControl: []string{ccReno, ccCubic}, + tcpLingerTimeout: DefaultTCPLingerTimeout, + tcpTimeWaitTimeout: DefaultTCPTimeWaitTimeout, } } diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go index e90f9a7d9..068b90fb6 100644 --- a/pkg/tcpip/transport/tcp/rcv.go +++ b/pkg/tcpip/transport/tcp/rcv.go @@ -18,6 +18,7 @@ import ( "container/heap" "time" + "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/seqnum" ) @@ -209,6 +210,11 @@ func (r *receiver) consumeSegment(s *segment, segSeq seqnum.Value, segLen seqnum switch r.ep.state { case StateFinWait1: r.ep.state = StateFinWait2 + // Notify protocol goroutine that we have received an + // ACK to our FIN so that it can start the FIN_WAIT2 + // timer to abort connection if the other side does + // not close within 2MSL. + r.ep.notifyProtocolGoroutine(notifyClose) case StateClosing: r.ep.state = StateTimeWait case StateLastAck: @@ -253,23 +259,105 @@ func (r *receiver) updateRTT() { r.ep.rcvListMu.Unlock() } -// handleRcvdSegment handles TCP segments directed at the connection managed by -// r as they arrive. It is called by the protocol main loop. -func (r *receiver) handleRcvdSegment(s *segment) { +func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, closed bool) (drop bool, err *tcpip.Error) { + r.ep.rcvListMu.Lock() + rcvClosed := r.ep.rcvClosed || r.closed + r.ep.rcvListMu.Unlock() + + // If we are in one of the shutdown states then we need to do + // additional checks before we try and process the segment. + switch state { + case StateCloseWait, StateClosing, StateLastAck: + if !s.sequenceNumber.LessThanEq(r.rcvNxt) { + s.decRef() + // Just drop the segment as we have + // already received a FIN and this + // segment is after the sequence number + // for the FIN. + return true, nil + } + fallthrough + case StateFinWait1: + fallthrough + case StateFinWait2: + // If we are closed for reads (either due to an + // incoming FIN or the user calling shutdown(.., + // SHUT_RD) then any data past the rcvNxt should + // trigger a RST. + endDataSeq := s.sequenceNumber.Add(seqnum.Size(s.data.Size())) + if rcvClosed && r.rcvNxt.LessThan(endDataSeq) { + s.decRef() + return true, tcpip.ErrConnectionAborted + } + if state == StateFinWait1 { + break + } + + // If it's a retransmission of an old data segment + // or a pure ACK then allow it. + if s.sequenceNumber.Add(s.logicalLen()).LessThanEq(r.rcvNxt) || + s.logicalLen() == 0 { + break + } + + // In FIN-WAIT2 if the socket is fully + // closed(not owned by application on our end + // then the only acceptable segment is a + // FIN. Since FIN can technically also carry + // data we verify that the segment carrying a + // FIN ends at exactly e.rcvNxt+1. + // + // From RFC793 page 25. + // + // For sequence number purposes, the SYN is + // considered to occur before the first actual + // data octet of the segment in which it occurs, + // while the FIN is considered to occur after + // the last actual data octet in a segment in + // which it occurs. + if closed && (!s.flagIsSet(header.TCPFlagFin) || s.sequenceNumber.Add(s.logicalLen()) != r.rcvNxt+1) { + s.decRef() + return true, tcpip.ErrConnectionAborted + } + } + // We don't care about receive processing anymore if the receive side // is closed. - if r.closed { - return + // + // NOTE: We still want to permit a FIN as it's possible only our + // end has closed and the peer is yet to send a FIN. Hence we + // compare only the payload. + segEnd := s.sequenceNumber.Add(seqnum.Size(s.data.Size())) + if rcvClosed && !segEnd.LessThanEq(r.rcvNxt) { + return true, nil + } + return false, nil +} + +// handleRcvdSegment handles TCP segments directed at the connection managed by +// r as they arrive. It is called by the protocol main loop. +func (r *receiver) handleRcvdSegment(s *segment) (drop bool, err *tcpip.Error) { + r.ep.mu.RLock() + state := r.ep.state + closed := r.ep.closed + r.ep.mu.RUnlock() + + if state != StateEstablished { + drop, err := r.handleRcvdSegmentClosing(s, state, closed) + if drop || err != nil { + return drop, err + } } segLen := seqnum.Size(s.data.Size()) segSeq := s.sequenceNumber // If the sequence number range is outside the acceptable range, just - // send an ACK. This is according to RFC 793, page 37. + // send an ACK and stop further processing of the segment. + // This is according to RFC 793, page 68. if !r.acceptable(segSeq, segLen) { r.ep.snd.sendAck() - return + return true, nil } // Defer segment processing if it can't be consumed now. @@ -288,7 +376,7 @@ func (r *receiver) handleRcvdSegment(s *segment) { // have to retransmit. r.ep.snd.sendAck() } - return + return false, nil } // Since we consumed a segment update the receiver's RTT estimate @@ -315,4 +403,67 @@ func (r *receiver) handleRcvdSegment(s *segment) { r.pendingBufUsed -= s.logicalLen() s.decRef() } + return false, nil +} + +// handleTimeWaitSegment handles inbound segments received when the endpoint +// has entered the TIME_WAIT state. +func (r *receiver) handleTimeWaitSegment(s *segment) (resetTimeWait bool, newSyn bool) { + segSeq := s.sequenceNumber + segLen := seqnum.Size(s.data.Size()) + + // Just silently drop any RST packets in TIME_WAIT. We do not support + // TIME_WAIT assasination as a result we confirm w/ fix 1 as described + // in https://tools.ietf.org/html/rfc1337#section-3. + if s.flagIsSet(header.TCPFlagRst) { + return false, false + } + + // If it's a SYN and the sequence number is higher than any seen before + // for this connection then try and redirect it to a listening endpoint + // if available. + // + // RFC 1122: + // "When a connection is [...] on TIME-WAIT state [...] + // [a TCP] MAY accept a new SYN from the remote TCP to + // reopen the connection directly, if it: + + // (1) assigns its initial sequence number for the new + // connection to be larger than the largest sequence + // number it used on the previous connection incarnation, + // and + + // (2) returns to TIME-WAIT state if the SYN turns out + // to be an old duplicate". + if s.flagIsSet(header.TCPFlagSyn) && r.rcvNxt.LessThan(segSeq) { + + return false, true + } + + // Drop the segment if it does not contain an ACK. + if !s.flagIsSet(header.TCPFlagAck) { + return false, false + } + + // Update Timestamp if required. See RFC7323, section-4.3. + if r.ep.sendTSOk && s.parsedOptions.TS { + r.ep.updateRecentTimestamp(s.parsedOptions.TSVal, r.ep.snd.maxSentAck, segSeq) + } + + if segSeq.Add(1) == r.rcvNxt && s.flagIsSet(header.TCPFlagFin) { + // If it's a FIN-ACK then resetTimeWait and send an ACK, as it + // indicates our final ACK could have been lost. + r.ep.snd.sendAck() + return true, false + } + + // If the sequence number range is outside the acceptable range or + // carries data then just send an ACK. This is according to RFC 793, + // page 37. + // + // NOTE: In TIME_WAIT the only acceptable sequence number is rcvNxt. + if segSeq != r.rcvNxt || segLen != 0 { + r.ep.snd.sendAck() + } + return false, false } diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index f4ea5f091..0c1704d74 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -206,17 +206,18 @@ func TestTCPResetSentForACKWhenNotUsingSynCookies(t *testing.T) { c := context.New(t, defaultMTU) defer c.Cleanup() + // Set TCPLingerTimeout to 5 seconds so that sockets are marked closed wq := &waiter.Queue{} ep, err := c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, wq) if err != nil { - t.Fatalf("NewEndpoint failed: %v", err) + t.Fatalf("NewEndpoint failed: %s", err) } if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { - t.Fatalf("Bind failed: %v", err) + t.Fatalf("Bind failed: %s", err) } if err := ep.Listen(10); err != nil { - t.Fatalf("Listen failed: %v", err) + t.Fatalf("Listen failed: %s", err) } // Send a SYN request. @@ -256,7 +257,7 @@ func TestTCPResetSentForACKWhenNotUsingSynCookies(t *testing.T) { case <-ch: c.EP, _, err = ep.Accept() if err != nil { - t.Fatalf("Accept failed: %v", err) + t.Fatalf("Accept failed: %s", err) } case <-time.After(1 * time.Second): @@ -264,6 +265,13 @@ func TestTCPResetSentForACKWhenNotUsingSynCookies(t *testing.T) { } } + // Lower stackwide TIME_WAIT timeout so that the reservations + // are released instantly on Close. + tcpTW := tcpip.TCPTimeWaitTimeoutOption(1 * time.Millisecond) + if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpTW); err != nil { + t.Fatalf("e.stack.SetTransportProtocolOption(%d, %s) = %s", tcp.ProtocolNumber, tcpTW, err) + } + c.EP.Close() checker.IPv4(t, c.GetPacket(), checker.TCP( checker.SrcPort(context.StackPort), @@ -285,6 +293,11 @@ func TestTCPResetSentForACKWhenNotUsingSynCookies(t *testing.T) { // Get the ACK to the FIN we just sent. c.GetPacket() + // Since an active close was done we need to wait for a little more than + // tcpLingerTimeout for the port reservations to be released and the + // socket to move to a CLOSED state. + time.Sleep(20 * time.Millisecond) + // Now resend the same ACK, this ACK should generate a RST as there // should be no endpoint in SYN-RCVD state and we are not using // syn-cookies yet. The reason we send the same ACK is we need a valid @@ -376,6 +389,13 @@ func TestConnectResetAfterClose(t *testing.T) { c := context.New(t, defaultMTU) defer c.Cleanup() + // Set TCPLinger to 3 seconds so that sockets are marked closed + // after 3 second in FIN_WAIT2 state. + tcpLingerTimeout := 3 * time.Second + if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPLingerTimeoutOption(tcpLingerTimeout)); err != nil { + t.Fatalf("c.stack.SetTransportProtocolOption(tcp, tcpip.TCPLingerTimeoutOption(%d) failed: %s", tcpLingerTimeout, err) + } + c.CreateConnected(789, 30000, -1 /* epRcvBuf */) ep := c.EP c.EP = nil @@ -396,12 +416,24 @@ func TestConnectResetAfterClose(t *testing.T) { DstPort: c.Port, Flags: header.TCPFlagAck, SeqNum: 790, - AckNum: c.IRS.Add(1), + AckNum: c.IRS.Add(2), + RcvWnd: 30000, + }) + + // Wait for the ep to give up waiting for a FIN. + time.Sleep(tcpLingerTimeout + 1*time.Second) + + // Now send an ACK and it should trigger a RST as the endpoint should + // not exist anymore. + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: c.Port, + Flags: header.TCPFlagAck, + SeqNum: 790, + AckNum: c.IRS.Add(2), RcvWnd: 30000, }) - // Wait for the ep to give up waiting for a FIN, and send a RST. - time.Sleep(3 * time.Second) for { b := c.GetPacket() tcpHdr := header.TCP(header.IPv4(b).Payload()) @@ -413,7 +445,7 @@ func TestConnectResetAfterClose(t *testing.T) { checker.IPv4(t, b, checker.TCP( checker.DstPort(context.TestPort), - checker.SeqNum(uint32(c.IRS)+1), + checker.SeqNum(uint32(c.IRS)+2), checker.AckNum(790), checker.TCPFlags(header.TCPFlagAck|header.TCPFlagRst), ), @@ -1110,8 +1142,7 @@ func TestRstOnCloseWithUnreadDataFinConvertRst(t *testing.T) { checker.TCP( checker.DstPort(context.TestPort), checker.TCPFlags(header.TCPFlagAck|header.TCPFlagRst), - // We shouldn't consume a sequence number on RST. - checker.SeqNum(uint32(c.IRS)+1), + checker.SeqNum(uint32(c.IRS)+2), )) // The RST puts the endpoint into an error state. if got, want := tcp.EndpointState(c.EP.State()), tcp.StateError; got != want { @@ -3085,6 +3116,13 @@ func TestReadAfterClosedState(t *testing.T) { c := context.New(t, defaultMTU) defer c.Cleanup() + // Set TCPTimeWaitTimeout to 1 seconds so that sockets are marked closed + // after 1 second in TIME_WAIT state. + tcpTimeWaitTimeout := 1 * time.Second + if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPTimeWaitTimeoutOption(tcpTimeWaitTimeout)); err != nil { + t.Fatalf("c.stack.SetTransportProtocolOption(tcp, tcpip.TCPTimeWaitTimeout(%d) failed: %s", tcpTimeWaitTimeout, err) + } + c.CreateConnected(789, 30000, -1 /* epRcvBuf */) we, ch := waiter.NewChannelEntry(nil) @@ -3092,12 +3130,12 @@ func TestReadAfterClosedState(t *testing.T) { defer c.WQ.EventUnregister(&we) if _, _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock { - t.Fatalf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrWouldBlock) + t.Fatalf("got c.EP.Read(nil) = %v, want = %s", err, tcpip.ErrWouldBlock) } // Shutdown immediately for write, check that we get a FIN. if err := c.EP.Shutdown(tcpip.ShutdownWrite); err != nil { - t.Fatalf("Shutdown failed: %v", err) + t.Fatalf("Shutdown failed: %s", err) } checker.IPv4(t, c.GetPacket(), @@ -3135,10 +3173,9 @@ func TestReadAfterClosedState(t *testing.T) { ), ) - // Give the stack the chance to transition to closed state. Note that since - // both the sender and receiver are now closed, we effectively skip the - // TIME-WAIT state. - time.Sleep(1 * time.Second) + // Give the stack the chance to transition to closed state from + // TIME_WAIT. + time.Sleep(tcpTimeWaitTimeout * 2) if got, want := tcp.EndpointState(c.EP.State()), tcp.StateClose; got != want { t.Errorf("Unexpected endpoint state: want %v, got %v", want, got) @@ -3155,7 +3192,7 @@ func TestReadAfterClosedState(t *testing.T) { peekBuf := make([]byte, 10) n, _, err := c.EP.Peek([][]byte{peekBuf}) if err != nil { - t.Fatalf("Peek failed: %v", err) + t.Fatalf("Peek failed: %s", err) } peekBuf = peekBuf[:n] @@ -3166,7 +3203,7 @@ func TestReadAfterClosedState(t *testing.T) { // Receive data. v, _, err := c.EP.Read(nil) if err != nil { - t.Fatalf("Read failed: %v", err) + t.Fatalf("Read failed: %s", err) } if !bytes.Equal(data, v) { @@ -3176,11 +3213,11 @@ func TestReadAfterClosedState(t *testing.T) { // Now that we drained the queue, check that functions fail with the // right error code. if _, _, err := c.EP.Read(nil); err != tcpip.ErrClosedForReceive { - t.Fatalf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrClosedForReceive) + t.Fatalf("got c.EP.Read(nil) = %v, want = %s", err, tcpip.ErrClosedForReceive) } if _, _, err := c.EP.Peek([][]byte{peekBuf}); err != tcpip.ErrClosedForReceive { - t.Fatalf("got c.EP.Peek(...) = %v, want = %v", err, tcpip.ErrClosedForReceive) + t.Fatalf("got c.EP.Peek(...) = %v, want = %s", err, tcpip.ErrClosedForReceive) } } @@ -4347,7 +4384,8 @@ func TestListenBacklogFullSynCookieInUse(t *testing.T) { // Send a SYN request. irs := seqnum.Value(789) c.SendPacket(nil, &context.Headers{ - SrcPort: context.TestPort, + // pick a different src port for new SYN. + SrcPort: context.TestPort + 1, DstPort: context.StackPort, Flags: header.TCPFlagSyn, SeqNum: irs, @@ -4893,3 +4931,545 @@ func checkDelayOption(t *testing.T, c *context.Context, wantDelayEnabled tcp.Del t.Errorf("ep.GetSockOptInt(tcpip.DelayOption) got: %d, want: %d", gotDelayOption, wantDelayOption) } } + +func TestTCPLingerTimeout(t *testing.T) { + c := context.New(t, 1500 /* mtu */) + defer c.Cleanup() + + c.CreateConnected(789, 30000, -1 /* epRcvBuf */) + + testCases := []struct { + name string + tcpLingerTimeout time.Duration + want time.Duration + }{ + {"NegativeLingerTimeout", -123123, 0}, + {"ZeroLingerTimeout", 0, 0}, + {"InRangeLingerTimeout", 10 * time.Second, 10 * time.Second}, + // Values > stack's TCPLingerTimeout are capped to the stack's + // value. Defaults to tcp.DefaultTCPLingerTimeout(60 seconds) + {"AboveMaxLingerTimeout", 65 * time.Second, 60 * time.Second}, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + if err := c.EP.SetSockOpt(tcpip.TCPLingerTimeoutOption(tc.tcpLingerTimeout)); err != nil { + t.Fatalf("SetSockOpt(%s) = %s", tc.tcpLingerTimeout, err) + } + var v tcpip.TCPLingerTimeoutOption + if err := c.EP.GetSockOpt(&v); err != nil { + t.Fatalf("GetSockOpt(tcpip.TCPLingerTimeoutOption) = %s", err) + } + if got, want := time.Duration(v), tc.want; got != want { + t.Fatalf("unexpected linger timeout got: %s, want: %s", got, want) + } + }) + } +} + +func TestTCPTimeWaitRSTIgnored(t *testing.T) { + c := context.New(t, defaultMTU) + defer c.Cleanup() + + wq := &waiter.Queue{} + ep, err := c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, wq) + if err != nil { + t.Fatalf("NewEndpoint failed: %s", err) + } + if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { + t.Fatalf("Bind failed: %s", err) + } + + if err := ep.Listen(10); err != nil { + t.Fatalf("Listen failed: %s", err) + } + + // Send a SYN request. + iss := seqnum.Value(789) + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagSyn, + SeqNum: iss, + }) + + // Receive the SYN-ACK reply. + b := c.GetPacket() + tcpHdr := header.TCP(header.IPv4(b).Payload()) + c.IRS = seqnum.Value(tcpHdr.SequenceNumber()) + + ackHeaders := &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagAck, + SeqNum: iss + 1, + AckNum: c.IRS + 1, + } + + // Send ACK. + c.SendPacket(nil, ackHeaders) + + // Try to accept the connection. + we, ch := waiter.NewChannelEntry(nil) + wq.EventRegister(&we, waiter.EventIn) + defer wq.EventUnregister(&we) + + c.EP, _, err = ep.Accept() + if err == tcpip.ErrWouldBlock { + // Wait for connection to be established. + select { + case <-ch: + c.EP, _, err = ep.Accept() + if err != nil { + t.Fatalf("Accept failed: %s", err) + } + + case <-time.After(1 * time.Second): + t.Fatalf("Timed out waiting for accept") + } + } + + c.EP.Close() + checker.IPv4(t, c.GetPacket(), checker.TCP( + checker.SrcPort(context.StackPort), + checker.DstPort(context.TestPort), + checker.SeqNum(uint32(c.IRS+1)), + checker.AckNum(uint32(iss)+1), + checker.TCPFlags(header.TCPFlagFin|header.TCPFlagAck))) + + finHeaders := &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagAck | header.TCPFlagFin, + SeqNum: iss + 1, + AckNum: c.IRS + 2, + } + + c.SendPacket(nil, finHeaders) + + // Get the ACK to the FIN we just sent. + checker.IPv4(t, c.GetPacket(), checker.TCP( + checker.SrcPort(context.StackPort), + checker.DstPort(context.TestPort), + checker.SeqNum(uint32(c.IRS+2)), + checker.AckNum(uint32(iss)+2), + checker.TCPFlags(header.TCPFlagAck))) + + // Now send a RST and this should be ignored and not + // generate an ACK. + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagRst, + SeqNum: iss + 1, + AckNum: c.IRS + 2, + }) + + c.CheckNoPacketTimeout("unexpected packet received in TIME_WAIT state", 1*time.Second) + + // Out of order ACK should generate an immediate ACK in + // TIME_WAIT. + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagAck, + SeqNum: iss + 1, + AckNum: c.IRS + 3, + }) + + checker.IPv4(t, c.GetPacket(), checker.TCP( + checker.SrcPort(context.StackPort), + checker.DstPort(context.TestPort), + checker.SeqNum(uint32(c.IRS+2)), + checker.AckNum(uint32(iss)+2), + checker.TCPFlags(header.TCPFlagAck))) +} + +func TestTCPTimeWaitOutOfOrder(t *testing.T) { + c := context.New(t, defaultMTU) + defer c.Cleanup() + + wq := &waiter.Queue{} + ep, err := c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, wq) + if err != nil { + t.Fatalf("NewEndpoint failed: %s", err) + } + if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { + t.Fatalf("Bind failed: %s", err) + } + + if err := ep.Listen(10); err != nil { + t.Fatalf("Listen failed: %s", err) + } + + // Send a SYN request. + iss := seqnum.Value(789) + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagSyn, + SeqNum: iss, + }) + + // Receive the SYN-ACK reply. + b := c.GetPacket() + tcpHdr := header.TCP(header.IPv4(b).Payload()) + c.IRS = seqnum.Value(tcpHdr.SequenceNumber()) + + ackHeaders := &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagAck, + SeqNum: iss + 1, + AckNum: c.IRS + 1, + } + + // Send ACK. + c.SendPacket(nil, ackHeaders) + + // Try to accept the connection. + we, ch := waiter.NewChannelEntry(nil) + wq.EventRegister(&we, waiter.EventIn) + defer wq.EventUnregister(&we) + + c.EP, _, err = ep.Accept() + if err == tcpip.ErrWouldBlock { + // Wait for connection to be established. + select { + case <-ch: + c.EP, _, err = ep.Accept() + if err != nil { + t.Fatalf("Accept failed: %s", err) + } + + case <-time.After(1 * time.Second): + t.Fatalf("Timed out waiting for accept") + } + } + + c.EP.Close() + checker.IPv4(t, c.GetPacket(), checker.TCP( + checker.SrcPort(context.StackPort), + checker.DstPort(context.TestPort), + checker.SeqNum(uint32(c.IRS+1)), + checker.AckNum(uint32(iss)+1), + checker.TCPFlags(header.TCPFlagFin|header.TCPFlagAck))) + + finHeaders := &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagAck | header.TCPFlagFin, + SeqNum: iss + 1, + AckNum: c.IRS + 2, + } + + c.SendPacket(nil, finHeaders) + + // Get the ACK to the FIN we just sent. + checker.IPv4(t, c.GetPacket(), checker.TCP( + checker.SrcPort(context.StackPort), + checker.DstPort(context.TestPort), + checker.SeqNum(uint32(c.IRS+2)), + checker.AckNum(uint32(iss)+2), + checker.TCPFlags(header.TCPFlagAck))) + + // Out of order ACK should generate an immediate ACK in + // TIME_WAIT. + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagAck, + SeqNum: iss + 1, + AckNum: c.IRS + 3, + }) + + checker.IPv4(t, c.GetPacket(), checker.TCP( + checker.SrcPort(context.StackPort), + checker.DstPort(context.TestPort), + checker.SeqNum(uint32(c.IRS+2)), + checker.AckNum(uint32(iss)+2), + checker.TCPFlags(header.TCPFlagAck))) +} + +func TestTCPTimeWaitNewSyn(t *testing.T) { + c := context.New(t, defaultMTU) + defer c.Cleanup() + + wq := &waiter.Queue{} + ep, err := c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, wq) + if err != nil { + t.Fatalf("NewEndpoint failed: %s", err) + } + if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { + t.Fatalf("Bind failed: %s", err) + } + + if err := ep.Listen(10); err != nil { + t.Fatalf("Listen failed: %s", err) + } + + // Send a SYN request. + iss := seqnum.Value(789) + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagSyn, + SeqNum: iss, + }) + + // Receive the SYN-ACK reply. + b := c.GetPacket() + tcpHdr := header.TCP(header.IPv4(b).Payload()) + c.IRS = seqnum.Value(tcpHdr.SequenceNumber()) + + ackHeaders := &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagAck, + SeqNum: iss + 1, + AckNum: c.IRS + 1, + } + + // Send ACK. + c.SendPacket(nil, ackHeaders) + + // Try to accept the connection. + we, ch := waiter.NewChannelEntry(nil) + wq.EventRegister(&we, waiter.EventIn) + defer wq.EventUnregister(&we) + + c.EP, _, err = ep.Accept() + if err == tcpip.ErrWouldBlock { + // Wait for connection to be established. + select { + case <-ch: + c.EP, _, err = ep.Accept() + if err != nil { + t.Fatalf("Accept failed: %s", err) + } + + case <-time.After(1 * time.Second): + t.Fatalf("Timed out waiting for accept") + } + } + + c.EP.Close() + checker.IPv4(t, c.GetPacket(), checker.TCP( + checker.SrcPort(context.StackPort), + checker.DstPort(context.TestPort), + checker.SeqNum(uint32(c.IRS+1)), + checker.AckNum(uint32(iss)+1), + checker.TCPFlags(header.TCPFlagFin|header.TCPFlagAck))) + + finHeaders := &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagAck | header.TCPFlagFin, + SeqNum: iss + 1, + AckNum: c.IRS + 2, + } + + c.SendPacket(nil, finHeaders) + + // Get the ACK to the FIN we just sent. + checker.IPv4(t, c.GetPacket(), checker.TCP( + checker.SrcPort(context.StackPort), + checker.DstPort(context.TestPort), + checker.SeqNum(uint32(c.IRS+2)), + checker.AckNum(uint32(iss)+2), + checker.TCPFlags(header.TCPFlagAck))) + + // Send a SYN request w/ sequence number lower than + // the highest sequence number sent. We just reuse + // the same number. + iss = seqnum.Value(789) + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagSyn, + SeqNum: iss, + }) + + c.CheckNoPacketTimeout("unexpected packet received in response to SYN", 1*time.Second) + + // Send a SYN request w/ sequence number higher than + // the highest sequence number sent. + iss = seqnum.Value(792) + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagSyn, + SeqNum: iss, + }) + + // Receive the SYN-ACK reply. + b = c.GetPacket() + tcpHdr = header.TCP(header.IPv4(b).Payload()) + c.IRS = seqnum.Value(tcpHdr.SequenceNumber()) + + ackHeaders = &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagAck, + SeqNum: iss + 1, + AckNum: c.IRS + 1, + } + + // Send ACK. + c.SendPacket(nil, ackHeaders) + + // Try to accept the connection. + c.EP, _, err = ep.Accept() + if err == tcpip.ErrWouldBlock { + // Wait for connection to be established. + select { + case <-ch: + c.EP, _, err = ep.Accept() + if err != nil { + t.Fatalf("Accept failed: %s", err) + } + + case <-time.After(1 * time.Second): + t.Fatalf("Timed out waiting for accept") + } + } +} + +func TestTCPTimeWaitDuplicateFINExtendsTimeWait(t *testing.T) { + c := context.New(t, defaultMTU) + defer c.Cleanup() + + // Set TCPTimeWaitTimeout to 5 seconds so that sockets are marked closed + // after 5 seconds in TIME_WAIT state. + tcpTimeWaitTimeout := 5 * time.Second + if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPTimeWaitTimeoutOption(tcpTimeWaitTimeout)); err != nil { + t.Fatalf("c.stack.SetTransportProtocolOption(tcp, tcpip.TCPLingerTimeoutOption(%d) failed: %s", tcpTimeWaitTimeout, err) + } + + wq := &waiter.Queue{} + ep, err := c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, wq) + if err != nil { + t.Fatalf("NewEndpoint failed: %s", err) + } + if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { + t.Fatalf("Bind failed: %s", err) + } + + if err := ep.Listen(10); err != nil { + t.Fatalf("Listen failed: %s", err) + } + + // Send a SYN request. + iss := seqnum.Value(789) + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagSyn, + SeqNum: iss, + }) + + // Receive the SYN-ACK reply. + b := c.GetPacket() + tcpHdr := header.TCP(header.IPv4(b).Payload()) + c.IRS = seqnum.Value(tcpHdr.SequenceNumber()) + + ackHeaders := &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagAck, + SeqNum: iss + 1, + AckNum: c.IRS + 1, + } + + // Send ACK. + c.SendPacket(nil, ackHeaders) + + // Try to accept the connection. + we, ch := waiter.NewChannelEntry(nil) + wq.EventRegister(&we, waiter.EventIn) + defer wq.EventUnregister(&we) + + c.EP, _, err = ep.Accept() + if err == tcpip.ErrWouldBlock { + // Wait for connection to be established. + select { + case <-ch: + c.EP, _, err = ep.Accept() + if err != nil { + t.Fatalf("Accept failed: %s", err) + } + + case <-time.After(1 * time.Second): + t.Fatalf("Timed out waiting for accept") + } + } + + c.EP.Close() + checker.IPv4(t, c.GetPacket(), checker.TCP( + checker.SrcPort(context.StackPort), + checker.DstPort(context.TestPort), + checker.SeqNum(uint32(c.IRS+1)), + checker.AckNum(uint32(iss)+1), + checker.TCPFlags(header.TCPFlagFin|header.TCPFlagAck))) + + finHeaders := &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagAck | header.TCPFlagFin, + SeqNum: iss + 1, + AckNum: c.IRS + 2, + } + + c.SendPacket(nil, finHeaders) + + // Get the ACK to the FIN we just sent. + checker.IPv4(t, c.GetPacket(), checker.TCP( + checker.SrcPort(context.StackPort), + checker.DstPort(context.TestPort), + checker.SeqNum(uint32(c.IRS+2)), + checker.AckNum(uint32(iss)+2), + checker.TCPFlags(header.TCPFlagAck))) + + time.Sleep(2 * time.Second) + + // Now send a duplicate FIN. This should cause the TIME_WAIT to extend + // by another 5 seconds and also send us a duplicate ACK as it should + // indicate that the final ACK was potentially lost. + c.SendPacket(nil, finHeaders) + + // Get the ACK to the FIN we just sent. + checker.IPv4(t, c.GetPacket(), checker.TCP( + checker.SrcPort(context.StackPort), + checker.DstPort(context.TestPort), + checker.SeqNum(uint32(c.IRS+2)), + checker.AckNum(uint32(iss)+2), + checker.TCPFlags(header.TCPFlagAck))) + + // Sleep for 4 seconds so at this point we are 1 second past the + // original tcpLingerTimeout of 5 seconds. + time.Sleep(4 * time.Second) + + // Send an ACK and it should not generate any packet as the socket + // should still be in TIME_WAIT for another another 5 seconds due + // to the duplicate FIN we sent earlier. + *ackHeaders = *finHeaders + ackHeaders.SeqNum = ackHeaders.SeqNum + 1 + ackHeaders.Flags = header.TCPFlagAck + c.SendPacket(nil, ackHeaders) + + c.CheckNoPacketTimeout("unexpected packet received from endpoint in TIME_WAIT", 1*time.Second) + // Now sleep for another 2 seconds so that we are past the + // extended TIME_WAIT of 7 seconds (2 + 5). + time.Sleep(2 * time.Second) + + // Resend the same ACK. + c.SendPacket(nil, ackHeaders) + + // Receive the RST that should be generated as there is no valid + // endpoint. + checker.IPv4(t, c.GetPacket(), checker.TCP( + checker.SrcPort(context.StackPort), + checker.DstPort(context.TestPort), + checker.SeqNum(uint32(ackHeaders.AckNum)), + checker.AckNum(uint32(ackHeaders.SeqNum)), + checker.TCPFlags(header.TCPFlagRst|header.TCPFlagAck))) +} diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 3e5b6b3c3..722d14b53 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -9,7 +9,7 @@ syscall_test(test = "//test/syscalls/linux:accept_bind_stream_test") syscall_test( size = "large", - shard_count = 10, + shard_count = 50, test = "//test/syscalls/linux:accept_bind_test", ) @@ -434,7 +434,7 @@ syscall_test( syscall_test( size = "large", - shard_count = 10, + shard_count = 50, test = "//test/syscalls/linux:socket_abstract_test", ) @@ -445,7 +445,7 @@ syscall_test( syscall_test( size = "large", - shard_count = 10, + shard_count = 50, test = "//test/syscalls/linux:socket_domain_test", ) @@ -458,19 +458,19 @@ syscall_test( syscall_test( size = "large", add_overlay = True, - shard_count = 10, + shard_count = 50, test = "//test/syscalls/linux:socket_filesystem_test", ) syscall_test( size = "large", - shard_count = 10, + shard_count = 50, test = "//test/syscalls/linux:socket_inet_loopback_test", ) syscall_test( size = "large", - shard_count = 10, + shard_count = 50, test = "//test/syscalls/linux:socket_ip_tcp_generic_loopback_test", ) @@ -481,13 +481,13 @@ syscall_test( syscall_test( size = "large", - shard_count = 10, + shard_count = 50, test = "//test/syscalls/linux:socket_ip_tcp_loopback_test", ) syscall_test( size = "medium", - shard_count = 10, + shard_count = 50, test = "//test/syscalls/linux:socket_ip_tcp_udp_generic_loopback_test", ) @@ -498,7 +498,7 @@ syscall_test( syscall_test( size = "large", - shard_count = 10, + shard_count = 50, test = "//test/syscalls/linux:socket_ip_udp_loopback_test", ) @@ -560,7 +560,7 @@ syscall_test( syscall_test( size = "large", add_overlay = True, - shard_count = 10, + shard_count = 50, test = "//test/syscalls/linux:socket_unix_pair_test", ) @@ -599,7 +599,7 @@ syscall_test( syscall_test( size = "large", - shard_count = 10, + shard_count = 50, test = "//test/syscalls/linux:socket_unix_unbound_stream_test", ) diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 93bff8299..f8b8cb724 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -2141,6 +2141,7 @@ cc_library( deps = [ ":socket_test_util", "//test/util:test_util", + "//test/util:thread_util", "@com_google_googletest//:gtest", ], alwayslink = 1, diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index ab375aaaf..2eeee352e 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -31,6 +32,7 @@ #include "gtest/gtest.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" +#include "absl/time/clock.h" #include "absl/time/time.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/util/file_descriptor.h" @@ -267,6 +269,340 @@ TEST_P(SocketInetLoopbackTest, TCPbacklog) { } } +// TCPFinWait2Test creates a pair of connected sockets then closes one end to +// trigger FIN_WAIT2 state for the closed endpoint. Then it binds the same local +// IP/port on a new socket and tries to connect. The connect should fail w/ +// an EADDRINUSE. Then we wait till the FIN_WAIT2 timeout is over and try the +// connect again with a new socket and this time it should succeed. +// +// TCP timers are not S/R today, this can cause this test to be flaky when run +// under random S/R due to timer being reset on a restore. +TEST_P(SocketInetLoopbackTest, TCPFinWait2Test_NoRandomSave) { + auto const& param = GetParam(); + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + // Create the listening socket. + const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast(&listen_addr), &addrlen), + SyscallSucceeds()); + + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + // Connect to the listening socket. + FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + // Lower FIN_WAIT2 state to 5 seconds for test. + constexpr int kTCPLingerTimeout = 5; + EXPECT_THAT(setsockopt(conn_fd.get(), IPPROTO_TCP, TCP_LINGER2, + &kTCPLingerTimeout, sizeof(kTCPLingerTimeout)), + SyscallSucceedsWithValue(0)); + + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(), + reinterpret_cast(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + + // Accept the connection. + auto accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + + // Get the address/port bound by the connecting socket. + sockaddr_storage conn_bound_addr; + socklen_t conn_addrlen = connector.addr_len; + ASSERT_THAT( + getsockname(conn_fd.get(), reinterpret_cast(&conn_bound_addr), + &conn_addrlen), + SyscallSucceeds()); + + // close the connecting FD to trigger FIN_WAIT2 on the connected fd. + conn_fd.reset(); + + // Now bind and connect a new socket. + const FileDescriptor conn_fd2 = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + // Disable cooperative saves after this point. As a save between the first + // bind/connect and the second one can cause the linger timeout timer to + // be restarted causing the final bind/connect to fail. + DisableSave ds; + + // TODO(gvisor.dev/issue/1030): Portmanager does not track all 5 tuple + // reservations which causes the bind() to succeed on gVisor but connect + // correctly fails. + if (IsRunningOnGvisor()) { + ASSERT_THAT( + bind(conn_fd2.get(), reinterpret_cast(&conn_bound_addr), + conn_addrlen), + SyscallSucceeds()); + ASSERT_THAT(RetryEINTR(connect)(conn_fd2.get(), + reinterpret_cast(&conn_addr), + conn_addrlen), + SyscallFailsWithErrno(EADDRINUSE)); + } else { + ASSERT_THAT( + bind(conn_fd2.get(), reinterpret_cast(&conn_bound_addr), + conn_addrlen), + SyscallFailsWithErrno(EADDRINUSE)); + } + + // Sleep for a little over the linger timeout to reduce flakiness in + // save/restore tests. + absl::SleepFor(absl::Seconds(kTCPLingerTimeout + 1)); + + ds.reset(); + + if (!IsRunningOnGvisor()) { + ASSERT_THAT( + bind(conn_fd2.get(), reinterpret_cast(&conn_bound_addr), + conn_addrlen), + SyscallSucceeds()); + } + ASSERT_THAT(RetryEINTR(connect)(conn_fd2.get(), + reinterpret_cast(&conn_addr), + conn_addrlen), + SyscallSucceeds()); +} + +// TCPLinger2TimeoutAfterClose creates a pair of connected sockets +// then closes one end to trigger FIN_WAIT2 state for the closed endpont. +// It then sleeps for the TCP_LINGER2 timeout and verifies that bind/ +// connecting the same address succeeds. +// +// TCP timers are not S/R today, this can cause this test to be flaky when run +// under random S/R due to timer being reset on a restore. +TEST_P(SocketInetLoopbackTest, TCPLinger2TimeoutAfterClose_NoRandomSave) { + auto const& param = GetParam(); + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + // Create the listening socket. + const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast(&listen_addr), &addrlen), + SyscallSucceeds()); + + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + // Connect to the listening socket. + FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(), + reinterpret_cast(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + + // Accept the connection. + auto accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + + // Get the address/port bound by the connecting socket. + sockaddr_storage conn_bound_addr; + socklen_t conn_addrlen = connector.addr_len; + ASSERT_THAT( + getsockname(conn_fd.get(), reinterpret_cast(&conn_bound_addr), + &conn_addrlen), + SyscallSucceeds()); + + constexpr int kTCPLingerTimeout = 5; + EXPECT_THAT(setsockopt(conn_fd.get(), IPPROTO_TCP, TCP_LINGER2, + &kTCPLingerTimeout, sizeof(kTCPLingerTimeout)), + SyscallSucceedsWithValue(0)); + + // close the connecting FD to trigger FIN_WAIT2 on the connected fd. + conn_fd.reset(); + + absl::SleepFor(absl::Seconds(kTCPLingerTimeout + 1)); + + // Now bind and connect a new socket and verify that we can immediately + // rebind the address bound by the conn_fd as it never entered TIME_WAIT. + const FileDescriptor conn_fd2 = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + ASSERT_THAT(bind(conn_fd2.get(), + reinterpret_cast(&conn_bound_addr), conn_addrlen), + SyscallSucceeds()); + ASSERT_THAT(RetryEINTR(connect)(conn_fd2.get(), + reinterpret_cast(&conn_addr), + conn_addrlen), + SyscallSucceeds()); +} + +// TCPResetAfterClose creates a pair of connected sockets then closes +// one end to trigger FIN_WAIT2 state for the closed endpoint verifies +// that we generate RSTs for any new data after the socket is fully +// closed. +TEST_P(SocketInetLoopbackTest, TCPResetAfterClose) { + auto const& param = GetParam(); + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + // Create the listening socket. + const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast(&listen_addr), &addrlen), + SyscallSucceeds()); + + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + // Connect to the listening socket. + FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(), + reinterpret_cast(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + + // Accept the connection. + auto accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + + // close the connecting FD to trigger FIN_WAIT2 on the connected fd. + conn_fd.reset(); + + int data = 1234; + + // Now send data which should trigger a RST as the other end should + // have timed out and closed the socket. + EXPECT_THAT(RetryEINTR(send)(accepted.get(), &data, sizeof(data), 0), + SyscallSucceeds()); + // Sleep for a shortwhile to get a RST back. + absl::SleepFor(absl::Seconds(1)); + + // Try writing again and we should get an EPIPE back. + EXPECT_THAT(RetryEINTR(send)(accepted.get(), &data, sizeof(data), 0), + SyscallFailsWithErrno(EPIPE)); + + // Trying to read should return zero as the other end did send + // us a FIN. We do it twice to verify that the RST does not cause an + // ECONNRESET on the read after EOF has been read by applicaiton. + EXPECT_THAT(RetryEINTR(recv)(accepted.get(), &data, sizeof(data), 0), + SyscallSucceedsWithValue(0)); + EXPECT_THAT(RetryEINTR(recv)(accepted.get(), &data, sizeof(data), 0), + SyscallSucceedsWithValue(0)); +} + +// This test is disabled under random save as the the restore run +// results in the stack.Seed() being different which can cause +// sequence number of final connect to be one that is considered +// old and can cause the test to be flaky. +TEST_P(SocketInetLoopbackTest, TCPTimeWaitTest_NoRandomSave) { + auto const& param = GetParam(); + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + // Create the listening socket. + const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast(&listen_addr), &addrlen), + SyscallSucceeds()); + + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + // Connect to the listening socket. + FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + // We disable saves after this point as a S/R causes the netstack seed + // to be regenerated which changes what ports/ISN is picked for a given + // tuple (src ip,src port, dst ip, dst port). This can cause the final + // SYN to use a sequence number that looks like one from the current + // connection in TIME_WAIT and will not be accepted causing the test + // to timeout. + // + // TODO(gvisor.dev/issue/940): S/R portSeed/portHint + DisableSave ds; + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(), + reinterpret_cast(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + + // Accept the connection. + auto accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + + // Get the address/port bound by the connecting socket. + sockaddr_storage conn_bound_addr; + socklen_t conn_addrlen = connector.addr_len; + ASSERT_THAT( + getsockname(conn_fd.get(), reinterpret_cast(&conn_bound_addr), + &conn_addrlen), + SyscallSucceeds()); + + // close the accept FD to trigger TIME_WAIT on the accepted socket which + // should cause the conn_fd to follow CLOSE_WAIT->LAST_ACK->CLOSED instead of + // TIME_WAIT. + accepted.reset(); + absl::SleepFor(absl::Seconds(1)); + conn_fd.reset(); + absl::SleepFor(absl::Seconds(1)); + + // Now bind and connect a new socket and verify that we can immediately + // rebind the address bound by the conn_fd as it never entered TIME_WAIT. + const FileDescriptor conn_fd2 = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + ASSERT_THAT(bind(conn_fd2.get(), + reinterpret_cast(&conn_bound_addr), conn_addrlen), + SyscallSucceeds()); + ASSERT_THAT(RetryEINTR(connect)(conn_fd2.get(), + reinterpret_cast(&conn_addr), + conn_addrlen), + SyscallSucceeds()); +} + INSTANTIATE_TEST_SUITE_P( All, SocketInetLoopbackTest, ::testing::Values( diff --git a/test/syscalls/linux/socket_ip_tcp_generic.cc b/test/syscalls/linux/socket_ip_tcp_generic.cc index 592448289..a37b49447 100644 --- a/test/syscalls/linux/socket_ip_tcp_generic.cc +++ b/test/syscalls/linux/socket_ip_tcp_generic.cc @@ -26,6 +26,7 @@ #include "gtest/gtest.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/util/test_util.h" +#include "test/util/thread_util.h" namespace gvisor { namespace testing { @@ -243,6 +244,31 @@ TEST_P(TCPSocketPairTest, ShutdownRdAllowsReadOfReceivedDataBeforeEOF) { SyscallSucceedsWithValue(0)); } +// This test verifies that a shutdown(wr) by the server after sending +// data allows the client to still read() the queued data and a client +// close after sending response allows server to read the incoming +// response. +TEST_P(TCPSocketPairTest, ShutdownWrServerClientClose) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + char buf[10] = {}; + ScopedThread t([&]() { + ASSERT_THAT(RetryEINTR(read)(sockets->first_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + ASSERT_THAT(RetryEINTR(write)(sockets->first_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + ASSERT_THAT(close(sockets->release_first_fd()), + SyscallSucceedsWithValue(0)); + }); + ASSERT_THAT(RetryEINTR(write)(sockets->second_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + ASSERT_THAT(RetryEINTR(shutdown)(sockets->second_fd(), SHUT_WR), + SyscallSucceedsWithValue(0)); + t.Join(); + + ASSERT_THAT(RetryEINTR(read)(sockets->second_fd(), buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); +} + TEST_P(TCPSocketPairTest, ClosedReadNonBlockingSocket) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); @@ -696,5 +722,72 @@ TEST_P(TCPSocketPairTest, SetCongestionControlFailsForUnsupported) { EXPECT_EQ(0, memcmp(got_cc, old_cc, sizeof(old_cc))); } +// Linux and Netstack both default to a 60s TCP_LINGER2 timeout. +constexpr int kDefaultTCPLingerTimeout = 60; + +TEST_P(TCPSocketPairTest, TCPLingerTimeoutDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kDefaultTCPLingerTimeout); +} + +TEST_P(TCPSocketPairTest, SetTCPLingerTimeoutZeroOrLess) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr int kZero = 0; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &kZero, + sizeof(kZero)), + SyscallSucceedsWithValue(0)); + + constexpr int kNegative = -1234; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, + &kNegative, sizeof(kNegative)), + SyscallSucceedsWithValue(0)); +} + +TEST_P(TCPSocketPairTest, SetTCPLingerTimeoutAboveDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // Values above the net.ipv4.tcp_fin_timeout are capped to tcp_fin_timeout + // on linux (defaults to 60 seconds on linux). + constexpr int kAboveDefault = kDefaultTCPLingerTimeout + 1; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, + &kAboveDefault, sizeof(kAboveDefault)), + SyscallSucceedsWithValue(0)); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kDefaultTCPLingerTimeout); +} + +TEST_P(TCPSocketPairTest, SetTCPLingerTimeout) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // Values above the net.ipv4.tcp_fin_timeout are capped to tcp_fin_timeout + // on linux (defaults to 60 seconds on linux). + constexpr int kTCPLingerTimeout = kDefaultTCPLingerTimeout - 1; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, + &kTCPLingerTimeout, sizeof(kTCPLingerTimeout)), + SyscallSucceedsWithValue(0)); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kTCPLingerTimeout); +} + } // namespace testing } // namespace gvisor -- cgit v1.2.3 From 773071680021a2fb985f3a3af7e9f65cdc1bd1ed Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Mon, 11 Nov 2019 14:13:50 -0800 Subject: Make `connect` on socket returned by `accept` correctly error out with EISCONN PiperOrigin-RevId: 279814493 --- pkg/tcpip/transport/tcp/accept.go | 2 ++ pkg/tcpip/transport/tcp/tcp_test.go | 3 +++ test/syscalls/linux/tcp_socket.cc | 13 +++++++++++++ 3 files changed, 18 insertions(+) (limited to 'test') diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go index 0e8e0a2b4..f24b51b91 100644 --- a/pkg/tcpip/transport/tcp/accept.go +++ b/pkg/tcpip/transport/tcp/accept.go @@ -300,6 +300,7 @@ func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *head ep.mu.Lock() ep.stack.Stats().TCP.CurrentEstablished.Increment() ep.state = StateEstablished + ep.isConnectNotified = true ep.mu.Unlock() // Update the receive window scaling. We can't do it before the @@ -539,6 +540,7 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) { // Switch state to connected. n.stack.Stats().TCP.CurrentEstablished.Increment() n.state = StateEstablished + n.isConnectNotified = true // Do the delivery in a separate goroutine so // that we don't block the listen loop in case diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 0c1704d74..84579ce52 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -4599,6 +4599,9 @@ func TestEndpointBindListenAcceptState(t *testing.T) { if got, want := tcp.EndpointState(aep.State()), tcp.StateEstablished; got != want { t.Errorf("Unexpected endpoint state: want %v, got %v", want, got) } + if err := aep.Connect(tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}); err != tcpip.ErrAlreadyConnected { + t.Errorf("Unexpected error attempting to call connect on an established endpoint, got: %v, want: %v", err, tcpip.ErrAlreadyConnected) + } // Listening endpoint remains in listen state. if got, want := tcp.EndpointState(ep.State()), tcp.StateListen; got != want { t.Errorf("Unexpected endpoint state: want %v, got %v", want, got) diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc index 277d6835a..bfc77ffc2 100644 --- a/test/syscalls/linux/tcp_socket.cc +++ b/test/syscalls/linux/tcp_socket.cc @@ -130,6 +130,19 @@ void TcpSocketTest::TearDown() { } } +TEST_P(TcpSocketTest, ConnectOnEstablishedConnection) { + sockaddr_storage addr = + ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam())); + socklen_t addrlen = sizeof(addr); + + ASSERT_THAT( + connect(s_, reinterpret_cast(&addr), addrlen), + SyscallFailsWithErrno(EISCONN)); + ASSERT_THAT( + connect(t_, reinterpret_cast(&addr), addrlen), + SyscallFailsWithErrno(EISCONN)); +} + TEST_P(TcpSocketTest, DataCoalesced) { char buf[10]; -- cgit v1.2.3 From 2b0e4dc6aa7fb8a3f619220b72537a8fff2f95b4 Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Mon, 11 Nov 2019 15:49:49 -0800 Subject: Remove obsolete TODO. This is now fixed. PiperOrigin-RevId: 279835100 --- test/syscalls/linux/tcp_socket.cc | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc index bfc77ffc2..99863b0ed 100644 --- a/test/syscalls/linux/tcp_socket.cc +++ b/test/syscalls/linux/tcp_socket.cc @@ -425,6 +425,11 @@ TEST_P(TcpSocketTest, PollWithFullBufferBlocks) { } // The last error should have been EWOULDBLOCK. ASSERT_EQ(errno, EWOULDBLOCK); + + // Now polling on the FD with a timeout should return 0 corresponding to no + // FDs ready. + struct pollfd poll_fd = {s_, POLLOUT, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10), SyscallSucceedsWithValue(0)); } TEST_P(TcpSocketTest, MsgTrunc) { -- cgit v1.2.3 From b82bd24f9495435cadd2713db829b19ce8fcce9d Mon Sep 17 00:00:00 2001 From: Ian Gudger Date: Mon, 11 Nov 2019 18:34:28 -0800 Subject: Update ephemeral port reservation tests. The existing tests which are disabled on gVisor are failing because we default to SO_REUSEADDR being enabled for TCP sockets. Update the test comments. Also add new tests for enabled SO_REUSEADDR. PiperOrigin-RevId: 279862275 --- test/syscalls/linux/socket_inet_loopback.cc | 223 ++++++++++++++++++++++++++-- 1 file changed, 212 insertions(+), 11 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 2eeee352e..96a1731cf 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -1156,10 +1156,9 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V6OnlyV6AnyReservesV6) { sockaddr_storage addr_dual = test_addr_dual.addr; const FileDescriptor fd_dual = ASSERT_NO_ERRNO_AND_VALUE( Socket(test_addr_dual.family(), param.type, 0)); - int one = 1; - EXPECT_THAT( - setsockopt(fd_dual.get(), IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)), - SyscallSucceeds()); + EXPECT_THAT(setsockopt(fd_dual.get(), IPPROTO_IPV6, IPV6_V6ONLY, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast(&addr_dual), test_addr_dual.addr_len), SyscallSucceeds()); @@ -1207,7 +1206,8 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V6OnlyV6AnyReservesV6) { TEST_P(SocketMultiProtocolInetLoopbackTest, V6EphemeralPortReserved) { auto const& param = GetParam(); - // FIXME(b/114268588) + // FIXME(b/76031995): Support disabling SO_REUSEADDR for TCP sockets and make + // it disabled by default. SKIP_IF(IsRunningOnGvisor() && param.type == SOCK_STREAM); for (int i = 0; true; i++) { @@ -1305,10 +1305,76 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V6EphemeralPortReserved) { } } +TEST_P(SocketMultiProtocolInetLoopbackTest, V6EphemeralPortReservedReuseAddr) { + auto const& param = GetParam(); + + // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. + SKIP_IF(IsRunningOnGvisor() && param.type == SOCK_DGRAM); + + // Bind the v6 loopback on a dual stack socket. + TestAddress const& test_addr = V6Loopback(); + sockaddr_storage bound_addr = test_addr.addr; + const FileDescriptor bound_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast(&bound_addr), + test_addr.addr_len), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(bound_fd.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Listen iff TCP. + if (param.type == SOCK_STREAM) { + ASSERT_THAT(listen(bound_fd.get(), SOMAXCONN), SyscallSucceeds()); + } + + // Get the port that we bound. + socklen_t bound_addr_len = test_addr.addr_len; + ASSERT_THAT( + getsockname(bound_fd.get(), reinterpret_cast(&bound_addr), + &bound_addr_len), + SyscallSucceeds()); + + // Connect to bind an ephemeral port. + const FileDescriptor connected_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(setsockopt(connected_fd.get(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(connect(connected_fd.get(), + reinterpret_cast(&bound_addr), bound_addr_len), + SyscallSucceeds()); + + // Get the ephemeral port. + sockaddr_storage connected_addr = {}; + socklen_t connected_addr_len = sizeof(connected_addr); + ASSERT_THAT(getsockname(connected_fd.get(), + reinterpret_cast(&connected_addr), + &connected_addr_len), + SyscallSucceeds()); + uint16_t const ephemeral_port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr)); + + // Verify that we actually got an ephemeral port. + ASSERT_NE(ephemeral_port, 0); + + // Verify that the ephemeral port is not reserved. + const FileDescriptor checking_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(setsockopt(checking_fd.get(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + EXPECT_THAT( + bind(checking_fd.get(), reinterpret_cast(&connected_addr), + connected_addr_len), + SyscallSucceeds()); +} + TEST_P(SocketMultiProtocolInetLoopbackTest, V4MappedEphemeralPortReserved) { auto const& param = GetParam(); - // FIXME(b/114268588) + // FIXME(b/76031995): Support disabling SO_REUSEADDR for TCP sockets and make + // it disabled by default. SKIP_IF(IsRunningOnGvisor() && param.type == SOCK_STREAM); for (int i = 0; true; i++) { @@ -1408,9 +1474,8 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V4MappedEphemeralPortReserved) { // v6-only socket. const FileDescriptor fd_v6_only_any = ASSERT_NO_ERRNO_AND_VALUE( Socket(test_addr_v6_any.family(), param.type, 0)); - int one = 1; EXPECT_THAT(setsockopt(fd_v6_only_any.get(), IPPROTO_IPV6, IPV6_V6ONLY, - &one, sizeof(one)), + &kSockOptOn, sizeof(kSockOptOn)), SyscallSucceeds()); ret = bind(fd_v6_only_any.get(), reinterpret_cast(&addr_v6_any), @@ -1429,10 +1494,78 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V4MappedEphemeralPortReserved) { } } +TEST_P(SocketMultiProtocolInetLoopbackTest, + V4MappedEphemeralPortReservedResueAddr) { + auto const& param = GetParam(); + + // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. + SKIP_IF(IsRunningOnGvisor() && param.type == SOCK_DGRAM); + + // Bind the v4 loopback on a dual stack socket. + TestAddress const& test_addr = V4MappedLoopback(); + sockaddr_storage bound_addr = test_addr.addr; + const FileDescriptor bound_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast(&bound_addr), + test_addr.addr_len), + SyscallSucceeds()); + + ASSERT_THAT(setsockopt(bound_fd.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Listen iff TCP. + if (param.type == SOCK_STREAM) { + ASSERT_THAT(listen(bound_fd.get(), SOMAXCONN), SyscallSucceeds()); + } + + // Get the port that we bound. + socklen_t bound_addr_len = test_addr.addr_len; + ASSERT_THAT( + getsockname(bound_fd.get(), reinterpret_cast(&bound_addr), + &bound_addr_len), + SyscallSucceeds()); + + // Connect to bind an ephemeral port. + const FileDescriptor connected_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(setsockopt(connected_fd.get(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(connect(connected_fd.get(), + reinterpret_cast(&bound_addr), bound_addr_len), + SyscallSucceeds()); + + // Get the ephemeral port. + sockaddr_storage connected_addr = {}; + socklen_t connected_addr_len = sizeof(connected_addr); + ASSERT_THAT(getsockname(connected_fd.get(), + reinterpret_cast(&connected_addr), + &connected_addr_len), + SyscallSucceeds()); + uint16_t const ephemeral_port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr)); + + // Verify that we actually got an ephemeral port. + ASSERT_NE(ephemeral_port, 0); + + // Verify that the ephemeral port is not reserved. + const FileDescriptor checking_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(setsockopt(checking_fd.get(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + EXPECT_THAT( + bind(checking_fd.get(), reinterpret_cast(&connected_addr), + connected_addr_len), + SyscallSucceeds()); +} + TEST_P(SocketMultiProtocolInetLoopbackTest, V4EphemeralPortReserved) { auto const& param = GetParam(); - // FIXME(b/114268588) + // FIXME(b/76031995): Support disabling SO_REUSEADDR for TCP sockets and make + // it disabled by default. SKIP_IF(IsRunningOnGvisor() && param.type == SOCK_STREAM); for (int i = 0; true; i++) { @@ -1533,9 +1666,8 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V4EphemeralPortReserved) { // v6-only socket. const FileDescriptor fd_v6_only_any = ASSERT_NO_ERRNO_AND_VALUE( Socket(test_addr_v6_any.family(), param.type, 0)); - int one = 1; EXPECT_THAT(setsockopt(fd_v6_only_any.get(), IPPROTO_IPV6, IPV6_V6ONLY, - &one, sizeof(one)), + &kSockOptOn, sizeof(kSockOptOn)), SyscallSucceeds()); ret = bind(fd_v6_only_any.get(), reinterpret_cast(&addr_v6_any), @@ -1554,6 +1686,75 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V4EphemeralPortReserved) { } } +TEST_P(SocketMultiProtocolInetLoopbackTest, V4EphemeralPortReservedReuseAddr) { + auto const& param = GetParam(); + + // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. + SKIP_IF(IsRunningOnGvisor() && param.type == SOCK_DGRAM); + + // Bind the v4 loopback on a v4 socket. + TestAddress const& test_addr = V4Loopback(); + sockaddr_storage bound_addr = test_addr.addr; + const FileDescriptor bound_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + + ASSERT_THAT(setsockopt(bound_fd.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast(&bound_addr), + test_addr.addr_len), + SyscallSucceeds()); + + // Listen iff TCP. + if (param.type == SOCK_STREAM) { + ASSERT_THAT(listen(bound_fd.get(), SOMAXCONN), SyscallSucceeds()); + } + + // Get the port that we bound. + socklen_t bound_addr_len = test_addr.addr_len; + ASSERT_THAT( + getsockname(bound_fd.get(), reinterpret_cast(&bound_addr), + &bound_addr_len), + SyscallSucceeds()); + + // Connect to bind an ephemeral port. + const FileDescriptor connected_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + + ASSERT_THAT(setsockopt(connected_fd.get(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + ASSERT_THAT(connect(connected_fd.get(), + reinterpret_cast(&bound_addr), bound_addr_len), + SyscallSucceeds()); + + // Get the ephemeral port. + sockaddr_storage connected_addr = {}; + socklen_t connected_addr_len = sizeof(connected_addr); + ASSERT_THAT(getsockname(connected_fd.get(), + reinterpret_cast(&connected_addr), + &connected_addr_len), + SyscallSucceeds()); + uint16_t const ephemeral_port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr)); + + // Verify that we actually got an ephemeral port. + ASSERT_NE(ephemeral_port, 0); + + // Verify that the ephemeral port is not reserved. + const FileDescriptor checking_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + ASSERT_THAT(setsockopt(checking_fd.get(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + EXPECT_THAT( + bind(checking_fd.get(), reinterpret_cast(&connected_addr), + connected_addr_len), + SyscallSucceeds()); +} + TEST_P(SocketMultiProtocolInetLoopbackTest, PortReuseTwoSockets) { auto const& param = GetParam(); TestAddress const& test_addr = V4Loopback(); -- cgit v1.2.3 From 57a2a5ea3359e0879f5e4cc40fdb9ad973c689a8 Mon Sep 17 00:00:00 2001 From: Ian Gudger Date: Tue, 12 Nov 2019 14:02:53 -0800 Subject: Add tests for SO_REUSEADDR and SO_REUSEPORT. * Basic tests for the SO_REUSEADDR and SO_REUSEPORT options. * SO_REUSEADDR functional tests for TCP and UDP. * SO_REUSEADDR and SO_REUSEPORT interaction tests for UDP. * Stubbed support for UDP getsockopt(SO_REUSEADDR). PiperOrigin-RevId: 280049265 --- pkg/tcpip/transport/udp/endpoint.go | 4 + test/syscalls/linux/BUILD | 1 + test/syscalls/linux/socket_ip_udp_generic.cc | 128 ++- test/syscalls/linux/socket_ipv4_udp_unbound.cc | 1116 ++++++++++++-------- test/syscalls/linux/socket_ipv4_udp_unbound.h | 4 +- .../linux/socket_ipv4_udp_unbound_loopback.cc | 13 +- test/syscalls/linux/socket_test_util.h | 3 + 7 files changed, 789 insertions(+), 480 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 5270f24df..dda7af910 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -732,6 +732,10 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { *o = tcpip.MulticastLoopOption(v) return nil + case *tcpip.ReuseAddressOption: + *o = 0 + return nil + case *tcpip.ReusePortOption: e.mu.RLock() v := e.reusePort diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index f8b8cb724..6345ea28c 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -2234,6 +2234,7 @@ cc_library( ":ip_socket_test_util", ":socket_test_util", "//test/util:test_util", + "@com_google_absl//absl/memory", "@com_google_googletest//:gtest", ], alwayslink = 1, diff --git a/test/syscalls/linux/socket_ip_udp_generic.cc b/test/syscalls/linux/socket_ip_udp_generic.cc index 2a4ed04a5..66eb68857 100644 --- a/test/syscalls/linux/socket_ip_udp_generic.cc +++ b/test/syscalls/linux/socket_ip_udp_generic.cc @@ -35,7 +35,7 @@ TEST_P(UDPSocketPairTest, MulticastTTLDefault) { int get = -1; socklen_t get_len = sizeof(get); - EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, &get, &get_len), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_len, sizeof(get)); @@ -52,7 +52,7 @@ TEST_P(UDPSocketPairTest, SetUDPMulticastTTLMin) { int get = -1; socklen_t get_len = sizeof(get); - EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, &get, &get_len), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_len, sizeof(get)); @@ -69,7 +69,7 @@ TEST_P(UDPSocketPairTest, SetUDPMulticastTTLMax) { int get = -1; socklen_t get_len = sizeof(get); - EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, &get, &get_len), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_len, sizeof(get)); @@ -91,7 +91,7 @@ TEST_P(UDPSocketPairTest, SetUDPMulticastTTLNegativeOne) { int get = -1; socklen_t get_len = sizeof(get); - EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, &get, &get_len), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_len, sizeof(get)); @@ -126,7 +126,7 @@ TEST_P(UDPSocketPairTest, SetUDPMulticastTTLChar) { int get = -1; socklen_t get_len = sizeof(get); - EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, &get, &get_len), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_len, sizeof(get)); @@ -147,7 +147,7 @@ TEST_P(UDPSocketPairTest, MulticastLoopDefault) { int get = -1; socklen_t get_len = sizeof(get); - EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, &get, &get_len), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_len, sizeof(get)); @@ -163,7 +163,7 @@ TEST_P(UDPSocketPairTest, SetMulticastLoop) { int get = -1; socklen_t get_len = sizeof(get); - EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, &get, &get_len), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_len, sizeof(get)); @@ -173,7 +173,7 @@ TEST_P(UDPSocketPairTest, SetMulticastLoop) { &kSockOptOn, sizeof(kSockOptOn)), SyscallSucceeds()); - EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, &get, &get_len), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_len, sizeof(get)); @@ -192,7 +192,7 @@ TEST_P(UDPSocketPairTest, SetMulticastLoopChar) { int get = -1; socklen_t get_len = sizeof(get); - EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, &get, &get_len), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_len, sizeof(get)); @@ -202,12 +202,120 @@ TEST_P(UDPSocketPairTest, SetMulticastLoopChar) { &kSockOptOnChar, sizeof(kSockOptOnChar)), SyscallSucceeds()); - EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, &get, &get_len), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_len, sizeof(get)); EXPECT_EQ(get, kSockOptOn); } +TEST_P(UDPSocketPairTest, ReuseAddrDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +TEST_P(UDPSocketPairTest, SetReuseAddr) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. + SKIP_IF(IsRunningOnGvisor()); + + ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); + + ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOff, sizeof(kSockOptOff)), + SyscallSucceeds()); + + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +TEST_P(UDPSocketPairTest, ReusePortDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEPORT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +TEST_P(UDPSocketPairTest, SetReusePort) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEPORT, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEPORT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); + + ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEPORT, + &kSockOptOff, sizeof(kSockOptOff)), + SyscallSucceeds()); + + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEPORT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +TEST_P(UDPSocketPairTest, SetReuseAddrReusePort) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. + SKIP_IF(IsRunningOnGvisor()); + + ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEPORT, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); + + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEPORT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); +} + } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound.cc b/test/syscalls/linux/socket_ipv4_udp_unbound.cc index b828b6844..00dc24928 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound.cc @@ -18,9 +18,11 @@ #include #include #include + #include #include "gtest/gtest.h" +#include "absl/memory/memory.h" #include "test/syscalls/linux/ip_socket_test_util.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/util/test_util.h" @@ -51,26 +53,27 @@ TestAddress V4Broadcast() { // Check that packets are not received without a group membership. Default send // interface configured by bind. -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackNoGroup) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackNoGroup) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Bind the first FD to the loopback. This is an alternative to // IP_MULTICAST_IF for setting the default send interface. auto sender_addr = V4Loopback(); EXPECT_THAT( - bind(sockets->first_fd(), reinterpret_cast(&sender_addr.addr), + bind(socket1->get(), reinterpret_cast(&sender_addr.addr), sender_addr.addr_len), SyscallSucceeds()); // Bind the second FD to the v4 any address. If multicast worked like unicast, // this would ensure that we get the packet. auto receiver_addr = V4Any(); - EXPECT_THAT(bind(sockets->second_fd(), - reinterpret_cast(&receiver_addr.addr), - receiver_addr.addr_len), - SyscallSucceeds()); + EXPECT_THAT( + bind(socket2->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); socklen_t receiver_addr_len = receiver_addr.addr_len; - ASSERT_THAT(getsockname(sockets->second_fd(), + ASSERT_THAT(getsockname(socket2->get(), reinterpret_cast(&receiver_addr.addr), &receiver_addr_len), SyscallSucceeds()); @@ -82,33 +85,33 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackNoGroup) { reinterpret_cast(&receiver_addr.addr)->sin_port; char send_buf[200]; RandomizeBuffer(send_buf, sizeof(send_buf)); - EXPECT_THAT( - RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0, - reinterpret_cast(&send_addr.addr), - send_addr.addr_len), - SyscallSucceedsWithValue(sizeof(send_buf))); + EXPECT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); // Check that we did not receive the multicast packet. char recv_buf[sizeof(send_buf)] = {}; - EXPECT_THAT(RetryEINTR(recv)(sockets->second_fd(), recv_buf, sizeof(recv_buf), + EXPECT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), MSG_DONTWAIT), SyscallFailsWithErrno(EAGAIN)); } // Check that not setting a default send interface prevents multicast packets // from being sent. Group membership interface configured by address. -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackAddrNoDefaultSendIf) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackAddrNoDefaultSendIf) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Bind the second FD to the v4 any address to ensure that we can receive any // unicast packet. auto receiver_addr = V4Any(); - EXPECT_THAT(bind(sockets->second_fd(), - reinterpret_cast(&receiver_addr.addr), - receiver_addr.addr_len), - SyscallSucceeds()); + EXPECT_THAT( + bind(socket2->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); socklen_t receiver_addr_len = receiver_addr.addr_len; - ASSERT_THAT(getsockname(sockets->second_fd(), + ASSERT_THAT(getsockname(socket2->get(), reinterpret_cast(&receiver_addr.addr), &receiver_addr_len), SyscallSucceeds()); @@ -118,8 +121,8 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackAddrNoDefaultSendIf) { ip_mreq group = {}; group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); group.imr_interface.s_addr = htonl(INADDR_LOOPBACK); - EXPECT_THAT(setsockopt(sockets->second_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, - &group, sizeof(group)), + EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); // Send a multicast packet. @@ -128,27 +131,27 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackAddrNoDefaultSendIf) { reinterpret_cast(&receiver_addr.addr)->sin_port; char send_buf[200]; RandomizeBuffer(send_buf, sizeof(send_buf)); - EXPECT_THAT( - RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0, - reinterpret_cast(&send_addr.addr), - send_addr.addr_len), - SyscallFailsWithErrno(ENETUNREACH)); + EXPECT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast(&send_addr.addr), + send_addr.addr_len), + SyscallFailsWithErrno(ENETUNREACH)); } // Check that not setting a default send interface prevents multicast packets // from being sent. Group membership interface configured by NIC ID. -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackNicNoDefaultSendIf) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackNicNoDefaultSendIf) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Bind the second FD to the v4 any address to ensure that we can receive any // unicast packet. auto receiver_addr = V4Any(); - ASSERT_THAT(bind(sockets->second_fd(), - reinterpret_cast(&receiver_addr.addr), - receiver_addr.addr_len), - SyscallSucceeds()); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); socklen_t receiver_addr_len = receiver_addr.addr_len; - ASSERT_THAT(getsockname(sockets->second_fd(), + ASSERT_THAT(getsockname(socket2->get(), reinterpret_cast(&receiver_addr.addr), &receiver_addr_len), SyscallSucceeds()); @@ -158,8 +161,8 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackNicNoDefaultSendIf) { ip_mreqn group = {}; group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); - EXPECT_THAT(setsockopt(sockets->second_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, - &group, sizeof(group)), + EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); // Send a multicast packet. @@ -168,35 +171,35 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackNicNoDefaultSendIf) { reinterpret_cast(&receiver_addr.addr)->sin_port; char send_buf[200]; RandomizeBuffer(send_buf, sizeof(send_buf)); - EXPECT_THAT( - RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0, - reinterpret_cast(&send_addr.addr), - send_addr.addr_len), - SyscallFailsWithErrno(ENETUNREACH)); + EXPECT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast(&send_addr.addr), + send_addr.addr_len), + SyscallFailsWithErrno(ENETUNREACH)); } // Check that multicast works when the default send interface is configured by // bind and the group membership is configured by address. -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackAddr) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackAddr) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Bind the first FD to the loopback. This is an alternative to // IP_MULTICAST_IF for setting the default send interface. auto sender_addr = V4Loopback(); ASSERT_THAT( - bind(sockets->first_fd(), reinterpret_cast(&sender_addr.addr), + bind(socket1->get(), reinterpret_cast(&sender_addr.addr), sender_addr.addr_len), SyscallSucceeds()); // Bind the second FD to the v4 any address to ensure that we can receive the // multicast packet. auto receiver_addr = V4Any(); - ASSERT_THAT(bind(sockets->second_fd(), - reinterpret_cast(&receiver_addr.addr), - receiver_addr.addr_len), - SyscallSucceeds()); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); socklen_t receiver_addr_len = receiver_addr.addr_len; - ASSERT_THAT(getsockname(sockets->second_fd(), + ASSERT_THAT(getsockname(socket2->get(), reinterpret_cast(&receiver_addr.addr), &receiver_addr_len), SyscallSucceeds()); @@ -206,8 +209,8 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackAddr) { ip_mreq group = {}; group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); group.imr_interface.s_addr = htonl(INADDR_LOOPBACK); - ASSERT_THAT(setsockopt(sockets->second_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, - &group, sizeof(group)), + ASSERT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); // Send a multicast packet. @@ -216,43 +219,42 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackAddr) { reinterpret_cast(&receiver_addr.addr)->sin_port; char send_buf[200]; RandomizeBuffer(send_buf, sizeof(send_buf)); - ASSERT_THAT( - RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0, - reinterpret_cast(&send_addr.addr), - send_addr.addr_len), - SyscallSucceedsWithValue(sizeof(send_buf))); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); // Check that we received the multicast packet. char recv_buf[sizeof(send_buf)] = {}; - ASSERT_THAT( - RetryEINTR(recv)(sockets->second_fd(), recv_buf, sizeof(recv_buf), 0), - SyscallSucceedsWithValue(sizeof(recv_buf))); + ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); } // Check that multicast works when the default send interface is configured by // bind and the group membership is configured by NIC ID. -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackNic) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackNic) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Bind the first FD to the loopback. This is an alternative to // IP_MULTICAST_IF for setting the default send interface. auto sender_addr = V4Loopback(); ASSERT_THAT( - bind(sockets->first_fd(), reinterpret_cast(&sender_addr.addr), + bind(socket1->get(), reinterpret_cast(&sender_addr.addr), sender_addr.addr_len), SyscallSucceeds()); // Bind the second FD to the v4 any address to ensure that we can receive the // multicast packet. auto receiver_addr = V4Any(); - ASSERT_THAT(bind(sockets->second_fd(), - reinterpret_cast(&receiver_addr.addr), - receiver_addr.addr_len), - SyscallSucceeds()); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); socklen_t receiver_addr_len = receiver_addr.addr_len; - ASSERT_THAT(getsockname(sockets->second_fd(), + ASSERT_THAT(getsockname(socket2->get(), reinterpret_cast(&receiver_addr.addr), &receiver_addr_len), SyscallSucceeds()); @@ -262,8 +264,8 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackNic) { ip_mreqn group = {}; group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); - ASSERT_THAT(setsockopt(sockets->second_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, - &group, sizeof(group)), + ASSERT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); // Send a multicast packet. @@ -272,17 +274,15 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackNic) { reinterpret_cast(&receiver_addr.addr)->sin_port; char send_buf[200]; RandomizeBuffer(send_buf, sizeof(send_buf)); - ASSERT_THAT( - RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0, - reinterpret_cast(&send_addr.addr), - send_addr.addr_len), - SyscallSucceedsWithValue(sizeof(send_buf))); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); // Check that we received the multicast packet. char recv_buf[sizeof(send_buf)] = {}; - ASSERT_THAT( - RetryEINTR(recv)(sockets->second_fd(), recv_buf, sizeof(recv_buf), 0), - SyscallSucceedsWithValue(sizeof(recv_buf))); + ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); } @@ -290,25 +290,26 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackNic) { // Check that multicast works when the default send interface is configured by // IP_MULTICAST_IF, the send address is specified in sendto, and the group // membership is configured by address. -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfAddr) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfAddr) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Set the default send interface. ip_mreq iface = {}; iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, - &iface, sizeof(iface)), + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), SyscallSucceeds()); // Bind the second FD to the v4 any address to ensure that we can receive the // multicast packet. auto receiver_addr = V4Any(); - ASSERT_THAT(bind(sockets->second_fd(), - reinterpret_cast(&receiver_addr.addr), - receiver_addr.addr_len), - SyscallSucceeds()); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); socklen_t receiver_addr_len = receiver_addr.addr_len; - ASSERT_THAT(getsockname(sockets->second_fd(), + ASSERT_THAT(getsockname(socket2->get(), reinterpret_cast(&receiver_addr.addr), &receiver_addr_len), SyscallSucceeds()); @@ -318,8 +319,8 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfAddr) { ip_mreq group = {}; group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); group.imr_interface.s_addr = htonl(INADDR_LOOPBACK); - ASSERT_THAT(setsockopt(sockets->second_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, - &group, sizeof(group)), + ASSERT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); // Send a multicast packet. @@ -328,17 +329,15 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfAddr) { reinterpret_cast(&receiver_addr.addr)->sin_port; char send_buf[200]; RandomizeBuffer(send_buf, sizeof(send_buf)); - ASSERT_THAT( - RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0, - reinterpret_cast(&send_addr.addr), - send_addr.addr_len), - SyscallSucceedsWithValue(sizeof(send_buf))); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); // Check that we received the multicast packet. char recv_buf[sizeof(send_buf)] = {}; - ASSERT_THAT( - RetryEINTR(recv)(sockets->second_fd(), recv_buf, sizeof(recv_buf), 0), - SyscallSucceedsWithValue(sizeof(recv_buf))); + ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); } @@ -346,25 +345,26 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfAddr) { // Check that multicast works when the default send interface is configured by // IP_MULTICAST_IF, the send address is specified in sendto, and the group // membership is configured by NIC ID. -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfNic) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfNic) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Set the default send interface. ip_mreqn iface = {}; iface.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, - &iface, sizeof(iface)), + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), SyscallSucceeds()); // Bind the second FD to the v4 any address to ensure that we can receive the // multicast packet. auto receiver_addr = V4Any(); - ASSERT_THAT(bind(sockets->second_fd(), - reinterpret_cast(&receiver_addr.addr), - receiver_addr.addr_len), - SyscallSucceeds()); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); socklen_t receiver_addr_len = receiver_addr.addr_len; - ASSERT_THAT(getsockname(sockets->second_fd(), + ASSERT_THAT(getsockname(socket2->get(), reinterpret_cast(&receiver_addr.addr), &receiver_addr_len), SyscallSucceeds()); @@ -374,8 +374,8 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfNic) { ip_mreqn group = {}; group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); - ASSERT_THAT(setsockopt(sockets->second_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, - &group, sizeof(group)), + ASSERT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); // Send a multicast packet. @@ -384,17 +384,15 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfNic) { reinterpret_cast(&receiver_addr.addr)->sin_port; char send_buf[200]; RandomizeBuffer(send_buf, sizeof(send_buf)); - ASSERT_THAT( - RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0, - reinterpret_cast(&send_addr.addr), - send_addr.addr_len), - SyscallSucceedsWithValue(sizeof(send_buf))); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); // Check that we received the multicast packet. char recv_buf[sizeof(send_buf)] = {}; - ASSERT_THAT( - RetryEINTR(recv)(sockets->second_fd(), recv_buf, sizeof(recv_buf), 0), - SyscallSucceedsWithValue(sizeof(recv_buf))); + ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); } @@ -402,25 +400,26 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfNic) { // Check that multicast works when the default send interface is configured by // IP_MULTICAST_IF, the send address is specified in connect, and the group // membership is configured by address. -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfAddrConnect) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfAddrConnect) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Set the default send interface. ip_mreq iface = {}; iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, - &iface, sizeof(iface)), + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), SyscallSucceeds()); // Bind the second FD to the v4 any address to ensure that we can receive the // multicast packet. auto receiver_addr = V4Any(); - ASSERT_THAT(bind(sockets->second_fd(), - reinterpret_cast(&receiver_addr.addr), - receiver_addr.addr_len), - SyscallSucceeds()); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); socklen_t receiver_addr_len = receiver_addr.addr_len; - ASSERT_THAT(getsockname(sockets->second_fd(), + ASSERT_THAT(getsockname(socket2->get(), reinterpret_cast(&receiver_addr.addr), &receiver_addr_len), SyscallSucceeds()); @@ -430,8 +429,8 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfAddrConnect) { ip_mreq group = {}; group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); group.imr_interface.s_addr = htonl(INADDR_LOOPBACK); - ASSERT_THAT(setsockopt(sockets->second_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, - &group, sizeof(group)), + ASSERT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); // Send a multicast packet. @@ -439,22 +438,20 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfAddrConnect) { reinterpret_cast(&connect_addr.addr)->sin_port = reinterpret_cast(&receiver_addr.addr)->sin_port; ASSERT_THAT( - RetryEINTR(connect)(sockets->first_fd(), + RetryEINTR(connect)(socket1->get(), reinterpret_cast(&connect_addr.addr), connect_addr.addr_len), SyscallSucceeds()); char send_buf[200]; RandomizeBuffer(send_buf, sizeof(send_buf)); - ASSERT_THAT( - RetryEINTR(send)(sockets->first_fd(), send_buf, sizeof(send_buf), 0), - SyscallSucceedsWithValue(sizeof(send_buf))); + ASSERT_THAT(RetryEINTR(send)(socket1->get(), send_buf, sizeof(send_buf), 0), + SyscallSucceedsWithValue(sizeof(send_buf))); // Check that we received the multicast packet. char recv_buf[sizeof(send_buf)] = {}; - ASSERT_THAT( - RetryEINTR(recv)(sockets->second_fd(), recv_buf, sizeof(recv_buf), 0), - SyscallSucceedsWithValue(sizeof(recv_buf))); + ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); } @@ -462,25 +459,26 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfAddrConnect) { // Check that multicast works when the default send interface is configured by // IP_MULTICAST_IF, the send address is specified in connect, and the group // membership is configured by NIC ID. -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfNicConnect) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfNicConnect) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Set the default send interface. ip_mreqn iface = {}; iface.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, - &iface, sizeof(iface)), + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), SyscallSucceeds()); // Bind the second FD to the v4 any address to ensure that we can receive the // multicast packet. auto receiver_addr = V4Any(); - ASSERT_THAT(bind(sockets->second_fd(), - reinterpret_cast(&receiver_addr.addr), - receiver_addr.addr_len), - SyscallSucceeds()); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); socklen_t receiver_addr_len = receiver_addr.addr_len; - ASSERT_THAT(getsockname(sockets->second_fd(), + ASSERT_THAT(getsockname(socket2->get(), reinterpret_cast(&receiver_addr.addr), &receiver_addr_len), SyscallSucceeds()); @@ -490,8 +488,8 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfNicConnect) { ip_mreqn group = {}; group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); - ASSERT_THAT(setsockopt(sockets->second_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, - &group, sizeof(group)), + ASSERT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); // Send a multicast packet. @@ -499,22 +497,20 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfNicConnect) { reinterpret_cast(&connect_addr.addr)->sin_port = reinterpret_cast(&receiver_addr.addr)->sin_port; ASSERT_THAT( - RetryEINTR(connect)(sockets->first_fd(), + RetryEINTR(connect)(socket1->get(), reinterpret_cast(&connect_addr.addr), connect_addr.addr_len), SyscallSucceeds()); char send_buf[200]; RandomizeBuffer(send_buf, sizeof(send_buf)); - ASSERT_THAT( - RetryEINTR(send)(sockets->first_fd(), send_buf, sizeof(send_buf), 0), - SyscallSucceedsWithValue(sizeof(send_buf))); + ASSERT_THAT(RetryEINTR(send)(socket1->get(), send_buf, sizeof(send_buf), 0), + SyscallSucceedsWithValue(sizeof(send_buf))); // Check that we received the multicast packet. char recv_buf[sizeof(send_buf)] = {}; - ASSERT_THAT( - RetryEINTR(recv)(sockets->second_fd(), recv_buf, sizeof(recv_buf), 0), - SyscallSucceedsWithValue(sizeof(recv_buf))); + ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); } @@ -522,25 +518,26 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfNicConnect) { // Check that multicast works when the default send interface is configured by // IP_MULTICAST_IF, the send address is specified in sendto, and the group // membership is configured by address. -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfAddrSelf) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfAddrSelf) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Set the default send interface. ip_mreq iface = {}; iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, - &iface, sizeof(iface)), + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), SyscallSucceeds()); // Bind the first FD to the v4 any address to ensure that we can receive the // multicast packet. auto receiver_addr = V4Any(); - ASSERT_THAT(bind(sockets->first_fd(), - reinterpret_cast(&receiver_addr.addr), - receiver_addr.addr_len), - SyscallSucceeds()); + ASSERT_THAT( + bind(socket1->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); socklen_t receiver_addr_len = receiver_addr.addr_len; - ASSERT_THAT(getsockname(sockets->first_fd(), + ASSERT_THAT(getsockname(socket1->get(), reinterpret_cast(&receiver_addr.addr), &receiver_addr_len), SyscallSucceeds()); @@ -550,8 +547,8 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfAddrSelf) { ip_mreq group = {}; group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); group.imr_interface.s_addr = htonl(INADDR_LOOPBACK); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, - &group, sizeof(group)), + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); // Send a multicast packet. @@ -560,17 +557,15 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfAddrSelf) { reinterpret_cast(&receiver_addr.addr)->sin_port; char send_buf[200]; RandomizeBuffer(send_buf, sizeof(send_buf)); - ASSERT_THAT( - RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0, - reinterpret_cast(&send_addr.addr), - send_addr.addr_len), - SyscallSucceedsWithValue(sizeof(send_buf))); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); // Check that we received the multicast packet. char recv_buf[sizeof(send_buf)] = {}; - ASSERT_THAT( - RetryEINTR(recv)(sockets->first_fd(), recv_buf, sizeof(recv_buf), 0), - SyscallSucceedsWithValue(sizeof(recv_buf))); + ASSERT_THAT(RetryEINTR(recv)(socket1->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); } @@ -578,25 +573,26 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfAddrSelf) { // Check that multicast works when the default send interface is configured by // IP_MULTICAST_IF, the send address is specified in sendto, and the group // membership is configured by NIC ID. -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfNicSelf) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfNicSelf) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Set the default send interface. ip_mreqn iface = {}; iface.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, - &iface, sizeof(iface)), + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), SyscallSucceeds()); // Bind the first FD to the v4 any address to ensure that we can receive the // multicast packet. auto receiver_addr = V4Any(); - ASSERT_THAT(bind(sockets->first_fd(), - reinterpret_cast(&receiver_addr.addr), - receiver_addr.addr_len), - SyscallSucceeds()); + ASSERT_THAT( + bind(socket1->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); socklen_t receiver_addr_len = receiver_addr.addr_len; - ASSERT_THAT(getsockname(sockets->first_fd(), + ASSERT_THAT(getsockname(socket1->get(), reinterpret_cast(&receiver_addr.addr), &receiver_addr_len), SyscallSucceeds()); @@ -606,8 +602,8 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfNicSelf) { ip_mreqn group = {}; group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, - &group, sizeof(group)), + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); // Send a multicast packet. @@ -616,17 +612,15 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfNicSelf) { reinterpret_cast(&receiver_addr.addr)->sin_port; char send_buf[200]; RandomizeBuffer(send_buf, sizeof(send_buf)); - ASSERT_THAT( - RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0, - reinterpret_cast(&send_addr.addr), - send_addr.addr_len), - SyscallSucceedsWithValue(sizeof(send_buf))); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); // Check that we received the multicast packet. char recv_buf[sizeof(send_buf)] = {}; - ASSERT_THAT( - RetryEINTR(recv)(sockets->first_fd(), recv_buf, sizeof(recv_buf), 0), - SyscallSucceedsWithValue(sizeof(recv_buf))); + ASSERT_THAT(RetryEINTR(recv)(socket1->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); } @@ -634,25 +628,26 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfNicSelf) { // Check that multicast works when the default send interface is configured by // IP_MULTICAST_IF, the send address is specified in connect, and the group // membership is configured by address. -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfAddrSelfConnect) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfAddrSelfConnect) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Set the default send interface. ip_mreq iface = {}; iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, - &iface, sizeof(iface)), + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), SyscallSucceeds()); // Bind the first FD to the v4 any address to ensure that we can receive the // multicast packet. auto receiver_addr = V4Any(); - ASSERT_THAT(bind(sockets->first_fd(), - reinterpret_cast(&receiver_addr.addr), - receiver_addr.addr_len), - SyscallSucceeds()); + ASSERT_THAT( + bind(socket1->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); socklen_t receiver_addr_len = receiver_addr.addr_len; - ASSERT_THAT(getsockname(sockets->first_fd(), + ASSERT_THAT(getsockname(socket1->get(), reinterpret_cast(&receiver_addr.addr), &receiver_addr_len), SyscallSucceeds()); @@ -662,8 +657,8 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfAddrSelfConnect) { ip_mreq group = {}; group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); group.imr_interface.s_addr = htonl(INADDR_LOOPBACK); - EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, - &group, sizeof(group)), + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); // Send a multicast packet. @@ -671,20 +666,19 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfAddrSelfConnect) { reinterpret_cast(&connect_addr.addr)->sin_port = reinterpret_cast(&receiver_addr.addr)->sin_port; EXPECT_THAT( - RetryEINTR(connect)(sockets->first_fd(), + RetryEINTR(connect)(socket1->get(), reinterpret_cast(&connect_addr.addr), connect_addr.addr_len), SyscallSucceeds()); char send_buf[200]; RandomizeBuffer(send_buf, sizeof(send_buf)); - ASSERT_THAT( - RetryEINTR(send)(sockets->first_fd(), send_buf, sizeof(send_buf), 0), - SyscallSucceedsWithValue(sizeof(send_buf))); + ASSERT_THAT(RetryEINTR(send)(socket1->get(), send_buf, sizeof(send_buf), 0), + SyscallSucceedsWithValue(sizeof(send_buf))); // Check that we did not receive the multicast packet. char recv_buf[sizeof(send_buf)] = {}; - EXPECT_THAT(RetryEINTR(recv)(sockets->first_fd(), recv_buf, sizeof(recv_buf), + EXPECT_THAT(RetryEINTR(recv)(socket1->get(), recv_buf, sizeof(recv_buf), MSG_DONTWAIT), SyscallFailsWithErrno(EAGAIN)); } @@ -692,25 +686,26 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfAddrSelfConnect) { // Check that multicast works when the default send interface is configured by // IP_MULTICAST_IF, the send address is specified in connect, and the group // membership is configured by NIC ID. -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfNicSelfConnect) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfNicSelfConnect) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Set the default send interface. ip_mreqn iface = {}; iface.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, - &iface, sizeof(iface)), + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), SyscallSucceeds()); // Bind the first FD to the v4 any address to ensure that we can receive the // multicast packet. auto receiver_addr = V4Any(); - ASSERT_THAT(bind(sockets->first_fd(), - reinterpret_cast(&receiver_addr.addr), - receiver_addr.addr_len), - SyscallSucceeds()); + ASSERT_THAT( + bind(socket1->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); socklen_t receiver_addr_len = receiver_addr.addr_len; - ASSERT_THAT(getsockname(sockets->first_fd(), + ASSERT_THAT(getsockname(socket1->get(), reinterpret_cast(&receiver_addr.addr), &receiver_addr_len), SyscallSucceeds()); @@ -720,8 +715,8 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfNicSelfConnect) { ip_mreqn group = {}; group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, - &group, sizeof(group)), + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); // Send a multicast packet. @@ -729,20 +724,19 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfNicSelfConnect) { reinterpret_cast(&connect_addr.addr)->sin_port = reinterpret_cast(&receiver_addr.addr)->sin_port; ASSERT_THAT( - RetryEINTR(connect)(sockets->first_fd(), + RetryEINTR(connect)(socket1->get(), reinterpret_cast(&connect_addr.addr), connect_addr.addr_len), SyscallSucceeds()); char send_buf[200]; RandomizeBuffer(send_buf, sizeof(send_buf)); - ASSERT_THAT( - RetryEINTR(send)(sockets->first_fd(), send_buf, sizeof(send_buf), 0), - SyscallSucceedsWithValue(sizeof(send_buf))); + ASSERT_THAT(RetryEINTR(send)(socket1->get(), send_buf, sizeof(send_buf), 0), + SyscallSucceedsWithValue(sizeof(send_buf))); // Check that we did not receive the multicast packet. char recv_buf[sizeof(send_buf)] = {}; - EXPECT_THAT(RetryEINTR(recv)(sockets->first_fd(), recv_buf, sizeof(recv_buf), + EXPECT_THAT(RetryEINTR(recv)(socket1->get(), recv_buf, sizeof(recv_buf), MSG_DONTWAIT), SyscallFailsWithErrno(EAGAIN)); } @@ -750,29 +744,30 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfNicSelfConnect) { // Check that multicast works when the default send interface is configured by // IP_MULTICAST_IF, the send address is specified in sendto, and the group // membership is configured by address. -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfAddrSelfNoLoop) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfAddrSelfNoLoop) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Set the default send interface. ip_mreq iface = {}; iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK); - EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, - &iface, sizeof(iface)), + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), SyscallSucceeds()); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_LOOP, &kSockOptOff, sizeof(kSockOptOff)), SyscallSucceeds()); // Bind the first FD to the v4 any address to ensure that we can receive the // multicast packet. auto receiver_addr = V4Any(); - ASSERT_THAT(bind(sockets->first_fd(), - reinterpret_cast(&receiver_addr.addr), - receiver_addr.addr_len), - SyscallSucceeds()); + ASSERT_THAT( + bind(socket1->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); socklen_t receiver_addr_len = receiver_addr.addr_len; - ASSERT_THAT(getsockname(sockets->first_fd(), + ASSERT_THAT(getsockname(socket1->get(), reinterpret_cast(&receiver_addr.addr), &receiver_addr_len), SyscallSucceeds()); @@ -782,8 +777,8 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfAddrSelfNoLoop) { ip_mreq group = {}; group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); group.imr_interface.s_addr = htonl(INADDR_LOOPBACK); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, - &group, sizeof(group)), + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); // Send a multicast packet. @@ -792,17 +787,15 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfAddrSelfNoLoop) { reinterpret_cast(&receiver_addr.addr)->sin_port; char send_buf[200]; RandomizeBuffer(send_buf, sizeof(send_buf)); - ASSERT_THAT( - RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0, - reinterpret_cast(&send_addr.addr), - send_addr.addr_len), - SyscallSucceedsWithValue(sizeof(send_buf))); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); // Check that we received the multicast packet. char recv_buf[sizeof(send_buf)] = {}; - ASSERT_THAT( - RetryEINTR(recv)(sockets->first_fd(), recv_buf, sizeof(recv_buf), 0), - SyscallSucceedsWithValue(sizeof(recv_buf))); + ASSERT_THAT(RetryEINTR(recv)(socket1->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); } @@ -810,29 +803,30 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfAddrSelfNoLoop) { // Check that multicast works when the default send interface is configured by // IP_MULTICAST_IF, the send address is specified in sendto, and the group // membership is configured by NIC ID. -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfNicSelfNoLoop) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackIfNicSelfNoLoop) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Set the default send interface. ip_mreqn iface = {}; iface.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, - &iface, sizeof(iface)), + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), SyscallSucceeds()); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_LOOP, &kSockOptOff, sizeof(kSockOptOff)), SyscallSucceeds()); // Bind the second FD to the v4 any address to ensure that we can receive the // multicast packet. auto receiver_addr = V4Any(); - ASSERT_THAT(bind(sockets->first_fd(), - reinterpret_cast(&receiver_addr.addr), - receiver_addr.addr_len), - SyscallSucceeds()); + ASSERT_THAT( + bind(socket1->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); socklen_t receiver_addr_len = receiver_addr.addr_len; - ASSERT_THAT(getsockname(sockets->first_fd(), + ASSERT_THAT(getsockname(socket1->get(), reinterpret_cast(&receiver_addr.addr), &receiver_addr_len), SyscallSucceeds()); @@ -842,8 +836,8 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfNicSelfNoLoop) { ip_mreqn group = {}; group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); - EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, - &group, sizeof(group)), + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); // Send a multicast packet. @@ -852,57 +846,57 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastLoopbackIfNicSelfNoLoop) { reinterpret_cast(&receiver_addr.addr)->sin_port; char send_buf[200]; RandomizeBuffer(send_buf, sizeof(send_buf)); - ASSERT_THAT( - RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0, - reinterpret_cast(&send_addr.addr), - send_addr.addr_len), - SyscallSucceedsWithValue(sizeof(send_buf))); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); // Check that we received the multicast packet. char recv_buf[sizeof(send_buf)] = {}; - ASSERT_THAT( - RetryEINTR(recv)(sockets->first_fd(), recv_buf, sizeof(recv_buf), 0), - SyscallSucceedsWithValue(sizeof(recv_buf))); + ASSERT_THAT(RetryEINTR(recv)(socket1->get(), recv_buf, sizeof(recv_buf), 0), + SyscallSucceedsWithValue(sizeof(recv_buf))); EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); } // Check that dropping a group membership that does not exist fails. -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastInvalidDrop) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastInvalidDrop) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Unregister from a membership that we didn't have. ip_mreq group = {}; group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); group.imr_interface.s_addr = htonl(INADDR_LOOPBACK); - EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_DROP_MEMBERSHIP, - &group, sizeof(group)), + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_DROP_MEMBERSHIP, &group, + sizeof(group)), SyscallFailsWithErrno(EADDRNOTAVAIL)); } // Check that dropping a group membership prevents multicast packets from being // delivered. Default send address configured by bind and group membership // interface configured by address. -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastDropAddr) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastDropAddr) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Bind the first FD to the loopback. This is an alternative to // IP_MULTICAST_IF for setting the default send interface. auto sender_addr = V4Loopback(); EXPECT_THAT( - bind(sockets->first_fd(), reinterpret_cast(&sender_addr.addr), + bind(socket1->get(), reinterpret_cast(&sender_addr.addr), sender_addr.addr_len), SyscallSucceeds()); // Bind the second FD to the v4 any address to ensure that we can receive the // multicast packet. auto receiver_addr = V4Any(); - EXPECT_THAT(bind(sockets->second_fd(), - reinterpret_cast(&receiver_addr.addr), - receiver_addr.addr_len), - SyscallSucceeds()); + EXPECT_THAT( + bind(socket2->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); socklen_t receiver_addr_len = receiver_addr.addr_len; - ASSERT_THAT(getsockname(sockets->second_fd(), + ASSERT_THAT(getsockname(socket2->get(), reinterpret_cast(&receiver_addr.addr), &receiver_addr_len), SyscallSucceeds()); @@ -912,11 +906,11 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastDropAddr) { ip_mreq group = {}; group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); group.imr_interface.s_addr = htonl(INADDR_LOOPBACK); - EXPECT_THAT(setsockopt(sockets->second_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, - &group, sizeof(group)), + EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); - EXPECT_THAT(setsockopt(sockets->second_fd(), IPPROTO_IP, IP_DROP_MEMBERSHIP, - &group, sizeof(group)), + EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_DROP_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); // Send a multicast packet. @@ -925,15 +919,14 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastDropAddr) { reinterpret_cast(&receiver_addr.addr)->sin_port; char send_buf[200]; RandomizeBuffer(send_buf, sizeof(send_buf)); - EXPECT_THAT( - RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0, - reinterpret_cast(&send_addr.addr), - send_addr.addr_len), - SyscallSucceedsWithValue(sizeof(send_buf))); + EXPECT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); // Check that we did not receive the multicast packet. char recv_buf[sizeof(send_buf)] = {}; - EXPECT_THAT(RetryEINTR(recv)(sockets->second_fd(), recv_buf, sizeof(recv_buf), + EXPECT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), MSG_DONTWAIT), SyscallFailsWithErrno(EAGAIN)); } @@ -941,26 +934,27 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastDropAddr) { // Check that dropping a group membership prevents multicast packets from being // delivered. Default send address configured by bind and group membership // interface configured by NIC ID. -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastDropNic) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastDropNic) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Bind the first FD to the loopback. This is an alternative to // IP_MULTICAST_IF for setting the default send interface. auto sender_addr = V4Loopback(); EXPECT_THAT( - bind(sockets->first_fd(), reinterpret_cast(&sender_addr.addr), + bind(socket1->get(), reinterpret_cast(&sender_addr.addr), sender_addr.addr_len), SyscallSucceeds()); // Bind the second FD to the v4 any address to ensure that we can receive the // multicast packet. auto receiver_addr = V4Any(); - EXPECT_THAT(bind(sockets->second_fd(), - reinterpret_cast(&receiver_addr.addr), - receiver_addr.addr_len), - SyscallSucceeds()); + EXPECT_THAT( + bind(socket2->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); socklen_t receiver_addr_len = receiver_addr.addr_len; - ASSERT_THAT(getsockname(sockets->second_fd(), + ASSERT_THAT(getsockname(socket2->get(), reinterpret_cast(&receiver_addr.addr), &receiver_addr_len), SyscallSucceeds()); @@ -970,11 +964,11 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastDropNic) { ip_mreqn group = {}; group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); - EXPECT_THAT(setsockopt(sockets->second_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, - &group, sizeof(group)), + EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); - EXPECT_THAT(setsockopt(sockets->second_fd(), IPPROTO_IP, IP_DROP_MEMBERSHIP, - &group, sizeof(group)), + EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_DROP_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); // Send a multicast packet. @@ -983,50 +977,53 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastDropNic) { reinterpret_cast(&receiver_addr.addr)->sin_port; char send_buf[200]; RandomizeBuffer(send_buf, sizeof(send_buf)); - EXPECT_THAT( - RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0, - reinterpret_cast(&send_addr.addr), - send_addr.addr_len), - SyscallSucceedsWithValue(sizeof(send_buf))); + EXPECT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); // Check that we did not receive the multicast packet. char recv_buf[sizeof(send_buf)] = {}; - EXPECT_THAT(RetryEINTR(recv)(sockets->second_fd(), recv_buf, sizeof(recv_buf), + EXPECT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), MSG_DONTWAIT), SyscallFailsWithErrno(EAGAIN)); } -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastIfZero) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfZero) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); ip_mreqn iface = {}; - EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, - &iface, sizeof(iface)), + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), SyscallSucceeds()); } -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastIfInvalidNic) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfInvalidNic) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); ip_mreqn iface = {}; iface.imr_ifindex = -1; - EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, - &iface, sizeof(iface)), + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), SyscallFailsWithErrno(EADDRNOTAVAIL)); } -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastIfInvalidAddr) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfInvalidAddr) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); ip_mreq iface = {}; iface.imr_interface.s_addr = inet_addr("255.255.255"); - EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, - &iface, sizeof(iface)), + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), SyscallFailsWithErrno(EADDRNOTAVAIL)); } -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastIfSetShort) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfSetShort) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Create a valid full-sized request. ip_mreqn iface = {}; @@ -1034,29 +1031,31 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastIfSetShort) { // Send an optlen of 1 to check that optlen is enforced. EXPECT_THAT( - setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, &iface, 1), + setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, 1), SyscallFailsWithErrno(EINVAL)); } -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastIfDefault) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfDefault) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); in_addr get = {}; socklen_t size = sizeof(get); ASSERT_THAT( - getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), + getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), SyscallSucceeds()); EXPECT_EQ(size, sizeof(get)); EXPECT_EQ(get.s_addr, 0); } -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastIfDefaultReqn) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfDefaultReqn) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); ip_mreqn get = {}; socklen_t size = sizeof(get); ASSERT_THAT( - getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), + getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), SyscallSucceeds()); // getsockopt(IP_MULTICAST_IF) can only return an in_addr, so it treats the @@ -1071,19 +1070,20 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastIfDefaultReqn) { EXPECT_EQ(get.imr_ifindex, 0); } -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastIfSetAddrGetReqn) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfSetAddrGetReqn) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); in_addr set = {}; set.s_addr = htonl(INADDR_LOOPBACK); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, &set, + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &set, sizeof(set)), SyscallSucceeds()); ip_mreqn get = {}; socklen_t size = sizeof(get); ASSERT_THAT( - getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), + getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), SyscallSucceeds()); // getsockopt(IP_MULTICAST_IF) can only return an in_addr, so it treats the @@ -1095,19 +1095,20 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastIfSetAddrGetReqn) { EXPECT_EQ(get.imr_ifindex, 0); } -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastIfSetReqAddrGetReqn) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfSetReqAddrGetReqn) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); ip_mreq set = {}; set.imr_interface.s_addr = htonl(INADDR_LOOPBACK); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, &set, + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &set, sizeof(set)), SyscallSucceeds()); ip_mreqn get = {}; socklen_t size = sizeof(get); ASSERT_THAT( - getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), + getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), SyscallSucceeds()); // getsockopt(IP_MULTICAST_IF) can only return an in_addr, so it treats the @@ -1119,19 +1120,20 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastIfSetReqAddrGetReqn) { EXPECT_EQ(get.imr_ifindex, 0); } -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastIfSetNicGetReqn) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfSetNicGetReqn) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); ip_mreqn set = {}; set.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, &set, + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &set, sizeof(set)), SyscallSucceeds()); ip_mreqn get = {}; socklen_t size = sizeof(get); ASSERT_THAT( - getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), + getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), SyscallSucceeds()); EXPECT_EQ(size, sizeof(in_addr)); EXPECT_EQ(get.imr_multiaddr.s_addr, 0); @@ -1139,87 +1141,93 @@ TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastIfSetNicGetReqn) { EXPECT_EQ(get.imr_ifindex, 0); } -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastIfSetAddr) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfSetAddr) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); in_addr set = {}; set.s_addr = htonl(INADDR_LOOPBACK); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, &set, + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &set, sizeof(set)), SyscallSucceeds()); in_addr get = {}; socklen_t size = sizeof(get); ASSERT_THAT( - getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), + getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), SyscallSucceeds()); EXPECT_EQ(size, sizeof(get)); EXPECT_EQ(get.s_addr, set.s_addr); } -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastIfSetReqAddr) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfSetReqAddr) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); ip_mreq set = {}; set.imr_interface.s_addr = htonl(INADDR_LOOPBACK); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, &set, + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &set, sizeof(set)), SyscallSucceeds()); in_addr get = {}; socklen_t size = sizeof(get); ASSERT_THAT( - getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), + getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), SyscallSucceeds()); EXPECT_EQ(size, sizeof(get)); EXPECT_EQ(get.s_addr, set.imr_interface.s_addr); } -TEST_P(IPv4UDPUnboundSocketPairTest, IpMulticastIfSetNic) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIfSetNic) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); ip_mreqn set = {}; set.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, &set, + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &set, sizeof(set)), SyscallSucceeds()); in_addr get = {}; socklen_t size = sizeof(get); ASSERT_THAT( - getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), + getsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &get, &size), SyscallSucceeds()); EXPECT_EQ(size, sizeof(get)); EXPECT_EQ(get.s_addr, 0); } -TEST_P(IPv4UDPUnboundSocketPairTest, TestJoinGroupNoIf) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, TestJoinGroupNoIf) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); ip_mreqn group = {}; group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); - EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, - &group, sizeof(group)), + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), SyscallFailsWithErrno(ENODEV)); } -TEST_P(IPv4UDPUnboundSocketPairTest, TestJoinGroupInvalidIf) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, TestJoinGroupInvalidIf) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); ip_mreqn group = {}; group.imr_address.s_addr = inet_addr("255.255.255"); group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); - EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, - &group, sizeof(group)), + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), SyscallFailsWithErrno(ENODEV)); } // Check that multiple memberships are not allowed on the same socket. -TEST_P(IPv4UDPUnboundSocketPairTest, TestMultipleJoinsOnSingleSocket) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); - auto fd = sockets->first_fd(); +TEST_P(IPv4UDPUnboundSocketTest, TestMultipleJoinsOnSingleSocket) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto fd = socket1->get(); ip_mreqn group = {}; group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); @@ -1234,41 +1242,44 @@ TEST_P(IPv4UDPUnboundSocketPairTest, TestMultipleJoinsOnSingleSocket) { } // Check that two sockets can join the same multicast group at the same time. -TEST_P(IPv4UDPUnboundSocketPairTest, TestTwoSocketsJoinSameMulticastGroup) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, TestTwoSocketsJoinSameMulticastGroup) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); ip_mreqn group = {}; group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); - EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, - &group, sizeof(group)), + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); - EXPECT_THAT(setsockopt(sockets->second_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, - &group, sizeof(group)), + EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); // Drop the membership twice on each socket, the second call for each socket // should fail. - EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_DROP_MEMBERSHIP, - &group, sizeof(group)), + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_DROP_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); - EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_DROP_MEMBERSHIP, - &group, sizeof(group)), + EXPECT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_DROP_MEMBERSHIP, &group, + sizeof(group)), SyscallFailsWithErrno(EADDRNOTAVAIL)); - EXPECT_THAT(setsockopt(sockets->second_fd(), IPPROTO_IP, IP_DROP_MEMBERSHIP, - &group, sizeof(group)), + EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_DROP_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); - EXPECT_THAT(setsockopt(sockets->second_fd(), IPPROTO_IP, IP_DROP_MEMBERSHIP, - &group, sizeof(group)), + EXPECT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_DROP_MEMBERSHIP, &group, + sizeof(group)), SyscallFailsWithErrno(EADDRNOTAVAIL)); } // Check that two sockets can join the same multicast group at the same time, // and both will receive data on it. -TEST_P(IPv4UDPUnboundSocketPairTest, TestMcastReceptionOnTwoSockets) { +TEST_P(IPv4UDPUnboundSocketTest, TestMcastReceptionOnTwoSockets) { std::unique_ptr socket_pairs[2] = { - ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()), - ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair())}; + absl::make_unique(ASSERT_NO_ERRNO_AND_VALUE(NewSocket()), + ASSERT_NO_ERRNO_AND_VALUE(NewSocket())), + absl::make_unique(ASSERT_NO_ERRNO_AND_VALUE(NewSocket()), + ASSERT_NO_ERRNO_AND_VALUE(NewSocket()))}; ip_mreq iface = {}, group = {}; iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK); @@ -1338,11 +1349,12 @@ TEST_P(IPv4UDPUnboundSocketPairTest, TestMcastReceptionOnTwoSockets) { // Check that on two sockets that joined a group and listen on ANY, dropping // memberships one by one will continue to deliver packets to both sockets until // both memberships have been dropped. -TEST_P(IPv4UDPUnboundSocketPairTest, - TestMcastReceptionWhenDroppingMemberships) { +TEST_P(IPv4UDPUnboundSocketTest, TestMcastReceptionWhenDroppingMemberships) { std::unique_ptr socket_pairs[2] = { - ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()), - ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair())}; + absl::make_unique(ASSERT_NO_ERRNO_AND_VALUE(NewSocket()), + ASSERT_NO_ERRNO_AND_VALUE(NewSocket())), + absl::make_unique(ASSERT_NO_ERRNO_AND_VALUE(NewSocket()), + ASSERT_NO_ERRNO_AND_VALUE(NewSocket()))}; ip_mreq iface = {}, group = {}; iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK); @@ -1437,18 +1449,19 @@ TEST_P(IPv4UDPUnboundSocketPairTest, // Check that a receiving socket can bind to the multicast address before // joining the group and receive data once the group has been joined. -TEST_P(IPv4UDPUnboundSocketPairTest, TestBindToMcastThenJoinThenReceive) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, TestBindToMcastThenJoinThenReceive) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Bind second socket (receiver) to the multicast address. auto receiver_addr = V4Multicast(); - ASSERT_THAT(bind(sockets->second_fd(), - reinterpret_cast(&receiver_addr.addr), - receiver_addr.addr_len), - SyscallSucceeds()); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); // Update receiver_addr with the correct port number. socklen_t receiver_addr_len = receiver_addr.addr_len; - ASSERT_THAT(getsockname(sockets->second_fd(), + ASSERT_THAT(getsockname(socket2->get(), reinterpret_cast(&receiver_addr.addr), &receiver_addr_len), SyscallSucceeds()); @@ -1458,30 +1471,29 @@ TEST_P(IPv4UDPUnboundSocketPairTest, TestBindToMcastThenJoinThenReceive) { ip_mreqn group = {}; group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); - ASSERT_THAT(setsockopt(sockets->second_fd(), IPPROTO_IP, IP_ADD_MEMBERSHIP, - &group, sizeof(group)), + ASSERT_THAT(setsockopt(socket2->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, &group, + sizeof(group)), SyscallSucceeds()); // Send a multicast packet on the first socket out the loopback interface. ip_mreq iface = {}; iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, - &iface, sizeof(iface)), + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), SyscallSucceeds()); auto sendto_addr = V4Multicast(); reinterpret_cast(&sendto_addr.addr)->sin_port = reinterpret_cast(&receiver_addr.addr)->sin_port; char send_buf[200]; RandomizeBuffer(send_buf, sizeof(send_buf)); - ASSERT_THAT( - RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0, - reinterpret_cast(&sendto_addr.addr), - sendto_addr.addr_len), - SyscallSucceedsWithValue(sizeof(send_buf))); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast(&sendto_addr.addr), + sendto_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); // Check that we received the multicast packet. char recv_buf[sizeof(send_buf)] = {}; - ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), recv_buf, sizeof(recv_buf), + ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), MSG_DONTWAIT), SyscallSucceedsWithValue(sizeof(recv_buf))); EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); @@ -1489,18 +1501,19 @@ TEST_P(IPv4UDPUnboundSocketPairTest, TestBindToMcastThenJoinThenReceive) { // Check that a receiving socket can bind to the multicast address and won't // receive multicast data if it hasn't joined the group. -TEST_P(IPv4UDPUnboundSocketPairTest, TestBindToMcastThenNoJoinThenNoReceive) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, TestBindToMcastThenNoJoinThenNoReceive) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Bind second socket (receiver) to the multicast address. auto receiver_addr = V4Multicast(); - ASSERT_THAT(bind(sockets->second_fd(), - reinterpret_cast(&receiver_addr.addr), - receiver_addr.addr_len), - SyscallSucceeds()); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); // Update receiver_addr with the correct port number. socklen_t receiver_addr_len = receiver_addr.addr_len; - ASSERT_THAT(getsockname(sockets->second_fd(), + ASSERT_THAT(getsockname(socket2->get(), reinterpret_cast(&receiver_addr.addr), &receiver_addr_len), SyscallSucceeds()); @@ -1509,40 +1522,40 @@ TEST_P(IPv4UDPUnboundSocketPairTest, TestBindToMcastThenNoJoinThenNoReceive) { // Send a multicast packet on the first socket out the loopback interface. ip_mreq iface = {}; iface.imr_interface.s_addr = htonl(INADDR_LOOPBACK); - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_IF, - &iface, sizeof(iface)), + ASSERT_THAT(setsockopt(socket1->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, + sizeof(iface)), SyscallSucceeds()); auto sendto_addr = V4Multicast(); reinterpret_cast(&sendto_addr.addr)->sin_port = reinterpret_cast(&receiver_addr.addr)->sin_port; char send_buf[200]; RandomizeBuffer(send_buf, sizeof(send_buf)); - ASSERT_THAT( - RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0, - reinterpret_cast(&sendto_addr.addr), - sendto_addr.addr_len), - SyscallSucceedsWithValue(sizeof(send_buf))); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast(&sendto_addr.addr), + sendto_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); // Check that we don't receive the multicast packet. char recv_buf[sizeof(send_buf)] = {}; - ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), recv_buf, sizeof(recv_buf), + ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), MSG_DONTWAIT), SyscallFailsWithErrno(EAGAIN)); } // Check that a socket can bind to a multicast address and still send out // packets. -TEST_P(IPv4UDPUnboundSocketPairTest, TestBindToMcastThenSend) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, TestBindToMcastThenSend) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Bind second socket (receiver) to the ANY address. auto receiver_addr = V4Any(); - ASSERT_THAT(bind(sockets->second_fd(), - reinterpret_cast(&receiver_addr.addr), - receiver_addr.addr_len), - SyscallSucceeds()); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); socklen_t receiver_addr_len = receiver_addr.addr_len; - ASSERT_THAT(getsockname(sockets->second_fd(), + ASSERT_THAT(getsockname(socket2->get(), reinterpret_cast(&receiver_addr.addr), &receiver_addr_len), SyscallSucceeds()); @@ -1551,11 +1564,11 @@ TEST_P(IPv4UDPUnboundSocketPairTest, TestBindToMcastThenSend) { // Bind the first socket (sender) to the multicast address. auto sender_addr = V4Multicast(); ASSERT_THAT( - bind(sockets->first_fd(), reinterpret_cast(&sender_addr.addr), + bind(socket1->get(), reinterpret_cast(&sender_addr.addr), sender_addr.addr_len), SyscallSucceeds()); socklen_t sender_addr_len = sender_addr.addr_len; - ASSERT_THAT(getsockname(sockets->first_fd(), + ASSERT_THAT(getsockname(socket1->get(), reinterpret_cast(&sender_addr.addr), &sender_addr_len), SyscallSucceeds()); @@ -1567,15 +1580,14 @@ TEST_P(IPv4UDPUnboundSocketPairTest, TestBindToMcastThenSend) { reinterpret_cast(&receiver_addr.addr)->sin_port; char send_buf[200]; RandomizeBuffer(send_buf, sizeof(send_buf)); - ASSERT_THAT( - RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0, - reinterpret_cast(&sendto_addr.addr), - sendto_addr.addr_len), - SyscallSucceedsWithValue(sizeof(send_buf))); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast(&sendto_addr.addr), + sendto_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); // Check that we received the packet. char recv_buf[sizeof(send_buf)] = {}; - ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), recv_buf, sizeof(recv_buf), + ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), MSG_DONTWAIT), SyscallSucceedsWithValue(sizeof(recv_buf))); EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); @@ -1583,46 +1595,46 @@ TEST_P(IPv4UDPUnboundSocketPairTest, TestBindToMcastThenSend) { // Check that a receiving socket can bind to the broadcast address and receive // broadcast packets. -TEST_P(IPv4UDPUnboundSocketPairTest, TestBindToBcastThenReceive) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, TestBindToBcastThenReceive) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Bind second socket (receiver) to the broadcast address. auto receiver_addr = V4Broadcast(); - ASSERT_THAT(bind(sockets->second_fd(), - reinterpret_cast(&receiver_addr.addr), - receiver_addr.addr_len), - SyscallSucceeds()); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); socklen_t receiver_addr_len = receiver_addr.addr_len; - ASSERT_THAT(getsockname(sockets->second_fd(), + ASSERT_THAT(getsockname(socket2->get(), reinterpret_cast(&receiver_addr.addr), &receiver_addr_len), SyscallSucceeds()); EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); // Send a broadcast packet on the first socket out the loopback interface. - EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_BROADCAST, - &kSockOptOn, sizeof(kSockOptOn)), + EXPECT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_BROADCAST, &kSockOptOn, + sizeof(kSockOptOn)), SyscallSucceedsWithValue(0)); // Note: Binding to the loopback interface makes the broadcast go out of it. auto sender_bind_addr = V4Loopback(); - ASSERT_THAT(bind(sockets->first_fd(), - reinterpret_cast(&sender_bind_addr.addr), - sender_bind_addr.addr_len), - SyscallSucceeds()); + ASSERT_THAT( + bind(socket1->get(), reinterpret_cast(&sender_bind_addr.addr), + sender_bind_addr.addr_len), + SyscallSucceeds()); auto sendto_addr = V4Broadcast(); reinterpret_cast(&sendto_addr.addr)->sin_port = reinterpret_cast(&receiver_addr.addr)->sin_port; char send_buf[200]; RandomizeBuffer(send_buf, sizeof(send_buf)); - ASSERT_THAT( - RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0, - reinterpret_cast(&sendto_addr.addr), - sendto_addr.addr_len), - SyscallSucceedsWithValue(sizeof(send_buf))); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast(&sendto_addr.addr), + sendto_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); // Check that we received the multicast packet. char recv_buf[sizeof(send_buf)] = {}; - ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), recv_buf, sizeof(recv_buf), + ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), MSG_DONTWAIT), SyscallSucceedsWithValue(sizeof(recv_buf))); EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); @@ -1630,17 +1642,18 @@ TEST_P(IPv4UDPUnboundSocketPairTest, TestBindToBcastThenReceive) { // Check that a socket can bind to the broadcast address and still send out // packets. -TEST_P(IPv4UDPUnboundSocketPairTest, TestBindToBcastThenSend) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); +TEST_P(IPv4UDPUnboundSocketTest, TestBindToBcastThenSend) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Bind second socket (receiver) to the ANY address. auto receiver_addr = V4Any(); - ASSERT_THAT(bind(sockets->second_fd(), - reinterpret_cast(&receiver_addr.addr), - receiver_addr.addr_len), - SyscallSucceeds()); + ASSERT_THAT( + bind(socket2->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); socklen_t receiver_addr_len = receiver_addr.addr_len; - ASSERT_THAT(getsockname(sockets->second_fd(), + ASSERT_THAT(getsockname(socket2->get(), reinterpret_cast(&receiver_addr.addr), &receiver_addr_len), SyscallSucceeds()); @@ -1649,11 +1662,11 @@ TEST_P(IPv4UDPUnboundSocketPairTest, TestBindToBcastThenSend) { // Bind the first socket (sender) to the broadcast address. auto sender_addr = V4Broadcast(); ASSERT_THAT( - bind(sockets->first_fd(), reinterpret_cast(&sender_addr.addr), + bind(socket1->get(), reinterpret_cast(&sender_addr.addr), sender_addr.addr_len), SyscallSucceeds()); socklen_t sender_addr_len = sender_addr.addr_len; - ASSERT_THAT(getsockname(sockets->first_fd(), + ASSERT_THAT(getsockname(socket1->get(), reinterpret_cast(&sender_addr.addr), &sender_addr_len), SyscallSucceeds()); @@ -1665,19 +1678,202 @@ TEST_P(IPv4UDPUnboundSocketPairTest, TestBindToBcastThenSend) { reinterpret_cast(&receiver_addr.addr)->sin_port; char send_buf[200]; RandomizeBuffer(send_buf, sizeof(send_buf)); - ASSERT_THAT( - RetryEINTR(sendto)(sockets->first_fd(), send_buf, sizeof(send_buf), 0, - reinterpret_cast(&sendto_addr.addr), - sendto_addr.addr_len), - SyscallSucceedsWithValue(sizeof(send_buf))); + ASSERT_THAT(RetryEINTR(sendto)(socket1->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast(&sendto_addr.addr), + sendto_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); // Check that we received the packet. char recv_buf[sizeof(send_buf)] = {}; - ASSERT_THAT(RetryEINTR(recv)(sockets->second_fd(), recv_buf, sizeof(recv_buf), + ASSERT_THAT(RetryEINTR(recv)(socket2->get(), recv_buf, sizeof(recv_buf), MSG_DONTWAIT), SyscallSucceedsWithValue(sizeof(recv_buf))); EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); } +// Check that SO_REUSEADDR always delivers to the most recently bound socket. +TEST_P(IPv4UDPUnboundSocketTest, ReuseAddrDistribution) { + // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. + SKIP_IF(IsRunningOnGvisor()); + + std::vector> sockets; + sockets.emplace_back(ASSERT_NO_ERRNO_AND_VALUE(NewSocket())); + + ASSERT_THAT(setsockopt(sockets[0]->get(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(sockets[0]->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(sockets[0]->get(), + reinterpret_cast(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + constexpr int kMessageSize = 200; + + for (int i = 0; i < 10; i++) { + // Add a new receiver. + sockets.emplace_back(ASSERT_NO_ERRNO_AND_VALUE(NewSocket())); + auto& last = sockets.back(); + ASSERT_THAT(setsockopt(last->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(last->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + + // Send a new message to the SO_REUSEADDR group. We use a new socket each + // time so that a new ephemeral port will be used each time. This ensures + // that we aren't doing REUSEPORT-like hash load blancing. + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + char send_buf[kMessageSize]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + EXPECT_THAT(RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Verify that the most recent socket got the message. We don't expect any + // of the other sockets to have received it, but we will check that later. + char recv_buf[sizeof(send_buf)] = {}; + EXPECT_THAT( + RetryEINTR(recv)(last->get(), recv_buf, sizeof(recv_buf), MSG_DONTWAIT), + SyscallSucceedsWithValue(sizeof(send_buf))); + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); + } + + // Verify that no other messages were received. + for (auto& socket : sockets) { + char recv_buf[kMessageSize] = {}; + EXPECT_THAT(RetryEINTR(recv)(socket->get(), recv_buf, sizeof(recv_buf), + MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); + } +} + +TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrThenReusePort) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind socket1 with REUSEADDR. + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(socket1->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + // Bind socket2 to the same address as socket1, only with REUSEPORT. + ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket2->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); +} + +TEST_P(IPv4UDPUnboundSocketTest, BindReusePortThenReuseAddr) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind socket1 with REUSEPORT. + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(socket1->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + // Bind socket2 to the same address as socket1, only with REUSEADDR. + ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket2->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); +} + +// Check that REUSEPORT takes precedence over REUSEADDR. +TEST_P(IPv4UDPUnboundSocketTest, ReuseAddrReusePortDistribution) { + auto receiver1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto receiver2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + ASSERT_THAT(setsockopt(receiver1->get(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(receiver1->get(), SOL_SOCKET, SO_REUSEPORT, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(receiver1->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(receiver1->get(), + reinterpret_cast(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + // Bind receiver2 to the same address as socket1, also with REUSEADDR and + // REUSEPORT. + ASSERT_THAT(setsockopt(receiver2->get(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(receiver2->get(), SOL_SOCKET, SO_REUSEPORT, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(receiver2->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + + constexpr int kMessageSize = 10; + + for (int i = 0; i < 100; ++i) { + // Send a new message to the REUSEADDR/REUSEPORT group. We use a new socket + // each time so that a new ephemerial port will be used each time. This + // ensures that we cycle through hashes. + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + char send_buf[kMessageSize] = {}; + EXPECT_THAT(RetryEINTR(sendto)(sender->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + } + + // Check that both receivers got messages. This checks that we are using load + // balancing (REUSEPORT) instead of the most recently bound socket + // (REUSEADDR). + char recv_buf[kMessageSize] = {}; + EXPECT_THAT(RetryEINTR(recv)(receiver1->get(), recv_buf, sizeof(recv_buf), + MSG_DONTWAIT), + SyscallSucceedsWithValue(kMessageSize)); + EXPECT_THAT(RetryEINTR(recv)(receiver2->get(), recv_buf, sizeof(recv_buf), + MSG_DONTWAIT), + SyscallSucceedsWithValue(kMessageSize)); +} + } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound.h b/test/syscalls/linux/socket_ipv4_udp_unbound.h index 8e07bfbbf..f64c57645 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound.h +++ b/test/syscalls/linux/socket_ipv4_udp_unbound.h @@ -20,8 +20,8 @@ namespace gvisor { namespace testing { -// Test fixture for tests that apply to pairs of IPv4 UDP sockets. -using IPv4UDPUnboundSocketPairTest = SocketPairTest; +// Test fixture for tests that apply to IPv4 UDP sockets. +using IPv4UDPUnboundSocketTest = SimpleSocketTest; } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_loopback.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_loopback.cc index cb0105471..f121c044d 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound_loopback.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound_loopback.cc @@ -22,14 +22,11 @@ namespace gvisor { namespace testing { -std::vector GetSocketPairs() { - return ApplyVec( - IPv4UDPUnboundSocketPair, - AllBitwiseCombinations(List{0, SOCK_NONBLOCK})); -} - -INSTANTIATE_TEST_SUITE_P(IPv4UDPSockets, IPv4UDPUnboundSocketPairTest, - ::testing::ValuesIn(GetSocketPairs())); +INSTANTIATE_TEST_SUITE_P( + IPv4UDPSockets, IPv4UDPUnboundSocketTest, + ::testing::ValuesIn(ApplyVec(IPv4UDPUnboundSocket, + AllBitwiseCombinations(List{ + 0, SOCK_NONBLOCK})))); } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_test_util.h b/test/syscalls/linux/socket_test_util.h index be38907c2..2dbb8bed3 100644 --- a/test/syscalls/linux/socket_test_util.h +++ b/test/syscalls/linux/socket_test_util.h @@ -114,6 +114,9 @@ class FDSocketPair : public SocketPair { public: FDSocketPair(int first_fd, int second_fd) : first_(first_fd), second_(second_fd) {} + FDSocketPair(std::unique_ptr first_fd, + std::unique_ptr second_fd) + : first_(first_fd->release()), second_(second_fd->release()) {} int first_fd() const override { return first_.get(); } int second_fd() const override { return second_.get(); } -- cgit v1.2.3 From 2c6c9af904c99371fe4381517375cd114917db59 Mon Sep 17 00:00:00 2001 From: Ian Gudger Date: Tue, 12 Nov 2019 20:37:40 -0800 Subject: Add UDP SO_REUSEADDR/SO_REUSEPORT conversion tests. Add additional tests for UDP SO_REUSEADDR and SO_REUSEPORT interaction. If all existing all currently bound sockets as well as the current binding socket have SO_REUSEADDR, or if all existing all currently bound sockets as well as the current binding socket have SO_REUSEPORT, binding a currently bound address is allowed. This seems odd since it means that the SO_REUSEADDR/SO_REUSEPORT behavior can change with the binding of additional sockets. PiperOrigin-RevId: 280116163 --- test/syscalls/linux/socket_ipv4_udp_unbound.cc | 274 +++++++++++++++++++++++++ 1 file changed, 274 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound.cc b/test/syscalls/linux/socket_ipv4_udp_unbound.cc index 00dc24928..6b1af6c17 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound.cc @@ -1814,6 +1814,280 @@ TEST_P(IPv4UDPUnboundSocketTest, BindReusePortThenReuseAddr) { SyscallFailsWithErrno(EADDRINUSE)); } +TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConvertableToReusePort) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind socket1 with REUSEADDR and REUSEPORT. + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(socket1->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + // Bind socket2 to the same address as socket1, only with REUSEPORT. + ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket2->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + + // Bind socket3 to the same address as socket1, only with REUSEADDR. + ASSERT_THAT(setsockopt(socket3->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket3->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); +} + +TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConvertableToReuseAddr) { + // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. + SKIP_IF(IsRunningOnGvisor()); + + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind socket1 with REUSEADDR and REUSEPORT. + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(socket1->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + // Bind socket2 to the same address as socket1, only with REUSEADDR. + ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket2->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + + // Bind socket3 to the same address as socket1, only with REUSEPORT. + ASSERT_THAT(setsockopt(socket3->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket3->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); +} + +TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConversionReversable1) { + // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. + SKIP_IF(IsRunningOnGvisor()); + + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind socket1 with REUSEADDR and REUSEPORT. + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(socket1->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + // Bind socket2 to the same address as socket1, only with REUSEPORT. + ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket2->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + + // Close socket2 to revert to just socket1 with REUSEADDR and REUSEPORT. + socket2->reset(); + + // Bind socket3 to the same address as socket1, only with REUSEADDR. + ASSERT_THAT(setsockopt(socket3->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket3->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); +} + +TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConversionReversable2) { + // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. + SKIP_IF(IsRunningOnGvisor()); + + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind socket1 with REUSEADDR and REUSEPORT. + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(socket1->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + // Bind socket2 to the same address as socket1, only with REUSEADDR. + ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket2->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + + // Close socket2 to revert to just socket1 with REUSEADDR and REUSEPORT. + socket2->reset(); + + // Bind socket3 to the same address as socket1, only with REUSEPORT. + ASSERT_THAT(setsockopt(socket3->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket3->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); +} + +TEST_P(IPv4UDPUnboundSocketTest, BindDoubleReuseAddrReusePortThenReusePort) { + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind socket1 with REUSEADDR and REUSEPORT. + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(socket1->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + // Bind socket2 to the same address as socket1, also with REUSEADDR and + // REUSEPORT. + ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + ASSERT_THAT(bind(socket2->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + + // Bind socket3 to the same address as socket1, only with REUSEPORT. + ASSERT_THAT(setsockopt(socket3->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket3->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); +} + +TEST_P(IPv4UDPUnboundSocketTest, BindDoubleReuseAddrReusePortThenReuseAddr) { + // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. + SKIP_IF(IsRunningOnGvisor()); + + auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind socket1 with REUSEADDR and REUSEPORT. + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(socket1->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(socket1->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(socket1->get(), + reinterpret_cast(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + // Bind socket2 to the same address as socket1, also with REUSEADDR and + // REUSEPORT. + ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(setsockopt(socket2->get(), SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + ASSERT_THAT(bind(socket2->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + + // Bind socket3 to the same address as socket1, only with REUSEADDR. + ASSERT_THAT(setsockopt(socket3->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(socket3->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); +} + // Check that REUSEPORT takes precedence over REUSEADDR. TEST_P(IPv4UDPUnboundSocketTest, ReuseAddrReusePortDistribution) { auto receiver1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); -- cgit v1.2.3 From 683e8798ab4c2bde60f067563eef0cf06dc9bda5 Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Wed, 13 Nov 2019 13:20:29 -0800 Subject: Extract linux-specific test setup to separate file PiperOrigin-RevId: 280264564 --- test/util/BUILD | 8 +++++++- test/util/test_util.cc | 13 ------------- test/util/test_util.h | 2 ++ test/util/test_util_impl.cc | 38 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 47 insertions(+), 14 deletions(-) create mode 100644 test/util/test_util_impl.cc (limited to 'test') diff --git a/test/util/BUILD b/test/util/BUILD index 5d2a9cc2c..4526bb3f1 100644 --- a/test/util/BUILD +++ b/test/util/BUILD @@ -232,7 +232,13 @@ cc_library( cc_library( name = "test_util", testonly = 1, - srcs = ["test_util.cc"], + srcs = [ + "test_util.cc", + ] + select_for_linux( + [ + "test_util_impl.cc", + ], + ), hdrs = ["test_util.h"], deps = [ ":fs_util", diff --git a/test/util/test_util.cc b/test/util/test_util.cc index ba0dcf7d0..9cb050735 100644 --- a/test/util/test_util.cc +++ b/test/util/test_util.cc @@ -116,9 +116,6 @@ PosixErrorOr GetKernelVersion() { return ParseKernelVersion(buf.release); } -void SetupGvisorDeathTest() { -} - std::string CPUSetToString(const cpu_set_t& set, size_t cpus) { std::string str = "cpuset["; for (unsigned int n = 0; n < cpus; n++) { @@ -224,15 +221,5 @@ bool Equivalent(uint64_t current, uint64_t target, double tolerance) { return abs_diff <= static_cast(tolerance * target); } -void TestInit(int* argc, char*** argv) { - ::testing::InitGoogleTest(argc, *argv); - ::absl::ParseCommandLine(*argc, *argv); - - // Always mask SIGPIPE as it's common and tests aren't expected to handle it. - struct sigaction sa = {}; - sa.sa_handler = SIG_IGN; - TEST_CHECK(sigaction(SIGPIPE, &sa, nullptr) == 0); -} - } // namespace testing } // namespace gvisor diff --git a/test/util/test_util.h b/test/util/test_util.h index b9d2dc2ba..dc30575b8 100644 --- a/test/util/test_util.h +++ b/test/util/test_util.h @@ -221,7 +221,9 @@ enum class Platform { bool IsRunningOnGvisor(); Platform GvisorPlatform(); +#ifdef __linux__ void SetupGvisorDeathTest(); +#endif struct KernelVersion { int major; diff --git a/test/util/test_util_impl.cc b/test/util/test_util_impl.cc new file mode 100644 index 000000000..ba7c0a85b --- /dev/null +++ b/test/util/test_util_impl.cc @@ -0,0 +1,38 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "gtest/gtest.h" +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "test/util/logging.h" + +namespace gvisor { +namespace testing { + +void SetupGvisorDeathTest() {} + +void TestInit(int* argc, char*** argv) { + ::testing::InitGoogleTest(argc, *argv); + ::absl::ParseCommandLine(*argc, *argv); + + // Always mask SIGPIPE as it's common and tests aren't expected to handle it. + struct sigaction sa = {}; + sa.sa_handler = SIG_IGN; + TEST_CHECK(sigaction(SIGPIPE, &sa, nullptr) == 0); +} + +} // namespace testing +} // namespace gvisor -- cgit v1.2.3 From 1e55eb3800a60c1a1118b84f2534b78481702f38 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 13 Nov 2019 15:34:47 -0800 Subject: test/syscalls/proc: check an return code of waitid PiperOrigin-RevId: 280295208 --- test/syscalls/linux/proc.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index e4c030bbb..512de5ee0 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -183,7 +183,8 @@ PosixError WithSubprocess(SubprocessCallback const& running, siginfo_t info; // Wait until the child process has exited (WEXITED flag) but don't // reap the child (WNOWAIT flag). - waitid(P_PID, child_pid, &info, WNOWAIT | WEXITED); + EXPECT_THAT(waitid(P_PID, child_pid, &info, WNOWAIT | WEXITED), + SyscallSucceeds()); if (zombied) { // Arg of "Z" refers to a Zombied Process. -- cgit v1.2.3 From 1e1f5ce08210af6211bcb1c8da293a63a79165fe Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Thu, 14 Nov 2019 15:04:25 -0800 Subject: Allow all runtime tests for a language to be run via a single command. This was intended behavior per the README, but running tests without the --test flag caused an error. Users can now omit the --test flag to run every test for a runtime. PiperOrigin-RevId: 280522025 --- test/runtimes/images/proctor/proctor.go | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'test') diff --git a/test/runtimes/images/proctor/proctor.go b/test/runtimes/images/proctor/proctor.go index e6178e82b..b54abe434 100644 --- a/test/runtimes/images/proctor/proctor.go +++ b/test/runtimes/images/proctor/proctor.go @@ -39,10 +39,10 @@ type TestRunner interface { } var ( - runtime = flag.String("runtime", "", "name of runtime") - list = flag.Bool("list", false, "list all available tests") - test = flag.String("test", "", "run a single test from the list of available tests") - pause = flag.Bool("pause", false, "cause container to pause indefinitely, reaping any zombie children") + runtime = flag.String("runtime", "", "name of runtime") + list = flag.Bool("list", false, "list all available tests") + testName = flag.String("test", "", "run a single test from the list of available tests") + pause = flag.Bool("pause", false, "cause container to pause indefinitely, reaping any zombie children") ) func main() { @@ -74,14 +74,23 @@ func main() { return } - // Run a single test. - if *test == "" { - log.Fatalf("test flag must be provided") + var tests []string + if *testName == "" { + // Run every test. + tests, err = tr.ListTests() + if err != nil { + log.Fatalf("failed to get all tests: %v", err) + } + } else { + // Run a single test. + tests = []string{*testName} } - cmd := tr.TestCmd(*test) - cmd.Stdout, cmd.Stderr = os.Stdout, os.Stderr - if err := cmd.Run(); err != nil { - log.Fatalf("FAIL: %v", err) + for _, test := range tests { + cmd := tr.TestCmd(test) + cmd.Stdout, cmd.Stderr = os.Stdout, os.Stderr + if err := cmd.Run(); err != nil { + log.Fatalf("FAIL: %v", err) + } } } -- cgit v1.2.3 From 339536de5eefe782813aabae4aeeb312b3c4dde7 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Thu, 14 Nov 2019 15:55:07 -0800 Subject: Check that a file is a regular file with open(O_TRUNC). It was possible to panic the sentry by opening a cache revalidating folder with O_TRUNC|O_CREAT. Avoids breaking php tests. PiperOrigin-RevId: 280533213 --- pkg/sentry/fs/inode.go | 4 ++++ pkg/sentry/fs/tty/master.go | 1 + pkg/sentry/fs/tty/slave.go | 1 + pkg/sentry/syscalls/linux/sys_file.go | 9 +++++---- test/syscalls/linux/open.cc | 22 ++++++++++++++++++++++ test/syscalls/linux/open_create.cc | 24 ++++++++++++++++++++++++ test/syscalls/linux/pty.cc | 20 +++++++++++++++++++- test/util/pty_util.cc | 10 +++++++++- test/util/pty_util.h | 3 +++ 9 files changed, 88 insertions(+), 6 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index f4ddfa406..2d43dff1d 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -344,6 +344,10 @@ func (i *Inode) SetTimestamps(ctx context.Context, d *Dirent, ts TimeSpec) error // Truncate calls i.InodeOperations.Truncate with i as the Inode. func (i *Inode) Truncate(ctx context.Context, d *Dirent, size int64) error { + if IsDir(i.StableAttr) { + return syserror.EISDIR + } + if i.overlay != nil { return overlayTruncate(ctx, i.overlay, d, size) } diff --git a/pkg/sentry/fs/tty/master.go b/pkg/sentry/fs/tty/master.go index 19b7557d5..bc56be696 100644 --- a/pkg/sentry/fs/tty/master.go +++ b/pkg/sentry/fs/tty/master.go @@ -32,6 +32,7 @@ import ( // +stateify savable type masterInodeOperations struct { fsutil.SimpleFileInode + fsutil.InodeNoopTruncate // d is the containing dir. d *dirInodeOperations diff --git a/pkg/sentry/fs/tty/slave.go b/pkg/sentry/fs/tty/slave.go index 944c4ada1..4cbea0367 100644 --- a/pkg/sentry/fs/tty/slave.go +++ b/pkg/sentry/fs/tty/slave.go @@ -31,6 +31,7 @@ import ( // +stateify savable type slaveInodeOperations struct { fsutil.SimpleFileInode + fsutil.InodeNoopTruncate // d is the containing dir. d *dirInodeOperations diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index b9a8e3e21..167c2b60b 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -169,10 +169,11 @@ func openAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint) (fd uint if dirPath { return syserror.ENOTDIR } - if flags&linux.O_TRUNC != 0 { - if err := d.Inode.Truncate(t, d, 0); err != nil { - return err - } + } + + if flags&linux.O_TRUNC != 0 { + if err := d.Inode.Truncate(t, d, 0); err != nil { + return err } } diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc index 2b1df52ce..267ae19f6 100644 --- a/test/syscalls/linux/open.cc +++ b/test/syscalls/linux/open.cc @@ -73,6 +73,28 @@ class OpenTest : public FileTest { const std::string test_data_ = "hello world\n"; }; +TEST_F(OpenTest, OTrunc) { + auto dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncd"); + ASSERT_THAT(mkdir(dirpath.c_str(), 0777), SyscallSucceeds()); + ASSERT_THAT(open(dirpath.c_str(), O_TRUNC, 0666), + SyscallFailsWithErrno(EISDIR)); +} + +TEST_F(OpenTest, OTruncAndReadOnlyDir) { + auto dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncd"); + ASSERT_THAT(mkdir(dirpath.c_str(), 0777), SyscallSucceeds()); + ASSERT_THAT(open(dirpath.c_str(), O_TRUNC | O_RDONLY, 0666), + SyscallFailsWithErrno(EISDIR)); +} + +TEST_F(OpenTest, OTruncAndReadOnlyFile) { + auto dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncfile"); + const FileDescriptor existing = + ASSERT_NO_ERRNO_AND_VALUE(Open(dirpath.c_str(), O_RDWR | O_CREAT, 0666)); + const FileDescriptor otrunc = ASSERT_NO_ERRNO_AND_VALUE( + Open(dirpath.c_str(), O_TRUNC | O_RDONLY, 0666)); +} + TEST_F(OpenTest, ReadOnly) { char buf; const FileDescriptor ro_file = diff --git a/test/syscalls/linux/open_create.cc b/test/syscalls/linux/open_create.cc index e5a85ef9d..431733dbe 100644 --- a/test/syscalls/linux/open_create.cc +++ b/test/syscalls/linux/open_create.cc @@ -88,6 +88,30 @@ TEST(CreateTest, CreateExclusively) { SyscallFailsWithErrno(EEXIST)); } +TEST(CreateTeast, CreatWithOTrunc) { + std::string dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncd"); + ASSERT_THAT(mkdir(dirpath.c_str(), 0777), SyscallSucceeds()); + ASSERT_THAT(open(dirpath.c_str(), O_CREAT | O_TRUNC, 0666), + SyscallFailsWithErrno(EISDIR)); +} + +TEST(CreateTeast, CreatDirWithOTruncAndReadOnly) { + std::string dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncd"); + ASSERT_THAT(mkdir(dirpath.c_str(), 0777), SyscallSucceeds()); + ASSERT_THAT(open(dirpath.c_str(), O_CREAT | O_TRUNC | O_RDONLY, 0666), + SyscallFailsWithErrno(EISDIR)); +} + +TEST(CreateTeast, CreatFileWithOTruncAndReadOnly) { + std::string dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncfile"); + int dirfd; + ASSERT_THAT(dirfd = open(dirpath.c_str(), O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + ASSERT_THAT(open(dirpath.c_str(), O_CREAT | O_TRUNC | O_RDONLY, 0666), + SyscallSucceeds()); + ASSERT_THAT(close(dirfd), SyscallSucceeds()); +} + TEST(CreateTest, CreateFailsOnUnpermittedDir) { // Make sure we don't have CAP_DAC_OVERRIDE, since that allows the user to // always override directory permissions. diff --git a/test/syscalls/linux/pty.cc b/test/syscalls/linux/pty.cc index 99a0df235..dafe64d20 100644 --- a/test/syscalls/linux/pty.cc +++ b/test/syscalls/linux/pty.cc @@ -70,6 +70,8 @@ constexpr absl::Duration kTimeout = absl::Seconds(20); // The maximum line size in bytes returned per read from a pty file. constexpr int kMaxLineSize = 4096; +constexpr char kMasterPath[] = "/dev/ptmx"; + // glibc defines its own, different, version of struct termios. We care about // what the kernel does, not glibc. #define KERNEL_NCCS 19 @@ -376,9 +378,25 @@ PosixErrorOr PollAndReadFd(int fd, void* buf, size_t count, return PosixError(ETIMEDOUT, "Poll timed out"); } +TEST(PtyTrunc, Truncate) { + // Opening PTYs with O_TRUNC shouldn't cause an error, but calls to + // (f)truncate should. + FileDescriptor master = + ASSERT_NO_ERRNO_AND_VALUE(Open(kMasterPath, O_RDWR | O_TRUNC)); + int n = ASSERT_NO_ERRNO_AND_VALUE(SlaveID(master)); + std::string spath = absl::StrCat("/dev/pts/", n); + FileDescriptor slave = + ASSERT_NO_ERRNO_AND_VALUE(Open(spath, O_RDWR | O_NONBLOCK | O_TRUNC)); + + EXPECT_THAT(truncate(kMasterPath, 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(truncate(spath.c_str(), 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(ftruncate(master.get(), 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(ftruncate(slave.get(), 0), SyscallFailsWithErrno(EINVAL)); +} + TEST(BasicPtyTest, StatUnopenedMaster) { struct stat s; - ASSERT_THAT(stat("/dev/ptmx", &s), SyscallSucceeds()); + ASSERT_THAT(stat(kMasterPath, &s), SyscallSucceeds()); EXPECT_EQ(s.st_rdev, makedev(TTYAUX_MAJOR, kPtmxMinor)); EXPECT_EQ(s.st_size, 0); diff --git a/test/util/pty_util.cc b/test/util/pty_util.cc index c0fd9a095..c01f916aa 100644 --- a/test/util/pty_util.cc +++ b/test/util/pty_util.cc @@ -24,6 +24,14 @@ namespace gvisor { namespace testing { PosixErrorOr OpenSlave(const FileDescriptor& master) { + PosixErrorOr n = SlaveID(master); + if (!n.ok()) { + return PosixErrorOr(n.error()); + } + return Open(absl::StrCat("/dev/pts/", n.ValueOrDie()), O_RDWR | O_NONBLOCK); +} + +PosixErrorOr SlaveID(const FileDescriptor& master) { // Get pty index. int n; int ret = ioctl(master.get(), TIOCGPTN, &n); @@ -38,7 +46,7 @@ PosixErrorOr OpenSlave(const FileDescriptor& master) { return PosixError(errno, "ioctl(TIOSPTLCK) failed"); } - return Open(absl::StrCat("/dev/pts/", n), O_RDWR | O_NONBLOCK); + return n; } } // namespace testing diff --git a/test/util/pty_util.h b/test/util/pty_util.h index 367b14f15..0722da379 100644 --- a/test/util/pty_util.h +++ b/test/util/pty_util.h @@ -24,6 +24,9 @@ namespace testing { // Opens the slave end of the passed master as R/W and nonblocking. PosixErrorOr OpenSlave(const FileDescriptor& master); +// Get the number of the slave end of the master. +PosixErrorOr SlaveID(const FileDescriptor& master); + } // namespace testing } // namespace gvisor -- cgit v1.2.3 From af323eb7c1830053627de6161f8ce73ac5f06d4e Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Thu, 14 Nov 2019 17:02:59 -0800 Subject: Fix return codes for {get,set}sockopt for some nullptr cases. Updates #1092 PiperOrigin-RevId: 280547239 --- pkg/sentry/syscalls/linux/sys_socket.go | 23 +++++++++-------------- test/syscalls/linux/socket_ip_unbound.cc | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 14 deletions(-) (limited to 'test') diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go index b5a72ce63..ab1001f16 100644 --- a/pkg/sentry/syscalls/linux/sys_socket.go +++ b/pkg/sentry/syscalls/linux/sys_socket.go @@ -447,16 +447,13 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy return 0, nil, syserror.ENOTSOCK } - // Read the length if present. Reject negative values. + // Read the length. Reject negative values. optLen := int32(0) - if optLenAddr != 0 { - if _, err := t.CopyIn(optLenAddr, &optLen); err != nil { - return 0, nil, err - } - - if optLen < 0 { - return 0, nil, syserror.EINVAL - } + if _, err := t.CopyIn(optLenAddr, &optLen); err != nil { + return 0, nil, err + } + if optLen < 0 { + return 0, nil, syserror.EINVAL } // Call syscall implementation then copy both value and value len out. @@ -465,11 +462,9 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy return 0, nil, e.ToError() } - if optLenAddr != 0 { - vLen := int32(binary.Size(v)) - if _, err := t.CopyOut(optLenAddr, vLen); err != nil { - return 0, nil, err - } + vLen := int32(binary.Size(v)) + if _, err := t.CopyOut(optLenAddr, vLen); err != nil { + return 0, nil, err } if v != nil { diff --git a/test/syscalls/linux/socket_ip_unbound.cc b/test/syscalls/linux/socket_ip_unbound.cc index b02872308..b6754111f 100644 --- a/test/syscalls/linux/socket_ip_unbound.cc +++ b/test/syscalls/linux/socket_ip_unbound.cc @@ -354,6 +354,38 @@ TEST_P(IPUnboundSocketTest, InvalidNegativeTOS) { EXPECT_EQ(get, expect); } +TEST_P(IPUnboundSocketTest, NullTOS) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + TOSOption t = GetTOSOption(GetParam().domain); + int set_sz = sizeof(int); + if (GetParam().domain == AF_INET) { + EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, nullptr, set_sz), + SyscallFailsWithErrno(EFAULT)); + } else { // AF_INET6 + // The AF_INET6 behavior is not yet compatible. gVisor will try to read + // optval from user memory at syscall handler, it needs substantial + // refactoring to implement this behavior just for IPv6. + if (IsRunningOnGvisor()) { + EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, nullptr, set_sz), + SyscallFailsWithErrno(EFAULT)); + } else { + // Linux's IPv6 stack treats nullptr optval as input of 0, so the call + // succeeds. (net/ipv6/ipv6_sockglue.c, do_ipv6_setsockopt()) + // + // Linux's implementation would need fixing as passing a nullptr as optval + // and non-zero optlen may not be valid. + EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, nullptr, set_sz), + SyscallSucceedsWithValue(0)); + } + } + socklen_t get_sz = sizeof(int); + EXPECT_THAT(getsockopt(socket->get(), t.level, t.option, nullptr, &get_sz), + SyscallFailsWithErrno(EFAULT)); + int get = -1; + EXPECT_THAT(getsockopt(socket->get(), t.level, t.option, &get, nullptr), + SyscallFailsWithErrno(EFAULT)); +} + INSTANTIATE_TEST_SUITE_P( IPUnboundSockets, IPUnboundSocketTest, ::testing::ValuesIn(VecCat(VecCat( -- cgit v1.2.3 From c0f89eba6ebdec08460bd796fc62d6aef674d141 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Thu, 21 Nov 2019 11:29:49 -0800 Subject: Import and structure cleanup. PiperOrigin-RevId: 281795269 --- pkg/eventchannel/BUILD | 1 + pkg/flipcall/BUILD | 2 +- pkg/flipcall/flipcall_unsafe.go | 10 +- pkg/sentry/BUILD | 3 + pkg/sentry/fs/BUILD | 2 +- pkg/sentry/fs/fdpipe/pipe_opener_test.go | 1 + pkg/sentry/fs/overlay.go | 4 +- pkg/sentry/fsimpl/memfs/BUILD | 3 +- pkg/sentry/kernel/BUILD | 4 +- pkg/sentry/kernel/auth/BUILD | 2 +- pkg/sentry/kernel/futex/BUILD | 2 +- pkg/sentry/kernel/signalfd/BUILD | 4 +- pkg/sentry/kernel/task.go | 4 +- pkg/sentry/mm/BUILD | 2 +- pkg/sentry/mm/mm.go | 6 +- pkg/sentry/strace/strace.proto | 3 +- pkg/sentry/time/BUILD | 4 +- pkg/sentry/vfs/BUILD | 2 +- pkg/sentry/vfs/mount_unsafe.go | 4 +- pkg/state/object.proto | 56 ++++---- pkg/syncutil/BUILD | 54 ++++++++ pkg/syncutil/LICENSE | 27 ++++ pkg/syncutil/README.md | 5 + pkg/syncutil/atomicptr_unsafe.go | 47 +++++++ pkg/syncutil/atomicptrtest/BUILD | 29 ++++ pkg/syncutil/atomicptrtest/atomicptr_test.go | 31 +++++ pkg/syncutil/downgradable_rwmutex_1_12_unsafe.go | 21 +++ pkg/syncutil/downgradable_rwmutex_1_13_unsafe.go | 16 +++ pkg/syncutil/downgradable_rwmutex_test.go | 150 ++++++++++++++++++++ pkg/syncutil/downgradable_rwmutex_unsafe.go | 143 +++++++++++++++++++ pkg/syncutil/memmove_unsafe.go | 28 ++++ pkg/syncutil/norace_unsafe.go | 35 +++++ pkg/syncutil/race_unsafe.go | 41 ++++++ pkg/syncutil/seqatomic_unsafe.go | 72 ++++++++++ pkg/syncutil/seqatomictest/BUILD | 35 +++++ pkg/syncutil/seqatomictest/seqatomic_test.go | 132 ++++++++++++++++++ pkg/syncutil/seqcount.go | 149 ++++++++++++++++++++ pkg/syncutil/seqcount_test.go | 153 +++++++++++++++++++++ pkg/syncutil/syncutil.go | 7 + test/syscalls/linux/accept_bind.cc | 2 + test/syscalls/linux/accept_bind_stream.cc | 2 + test/syscalls/linux/chmod.cc | 1 + test/syscalls/linux/chroot.cc | 1 + test/syscalls/linux/clock_gettime.cc | 1 + test/syscalls/linux/concurrency.cc | 1 + test/syscalls/linux/exec_binary.cc | 1 + test/syscalls/linux/file_base.h | 1 + test/syscalls/linux/flock.cc | 1 + test/syscalls/linux/fork.cc | 1 + test/syscalls/linux/getdents.cc | 1 + test/syscalls/linux/ip_socket_test_util.cc | 5 +- test/syscalls/linux/memory_accounting.cc | 1 + test/syscalls/linux/mlock.cc | 1 + test/syscalls/linux/mmap.cc | 1 + test/syscalls/linux/mount.cc | 1 + test/syscalls/linux/read.cc | 1 + test/syscalls/linux/rename.cc | 1 + test/syscalls/linux/seccomp.cc | 1 + test/syscalls/linux/select.cc | 1 + test/syscalls/linux/shm.cc | 1 - test/syscalls/linux/socket_blocking.cc | 1 + test/syscalls/linux/socket_ip_loopback_blocking.cc | 1 + .../linux/socket_ip_tcp_generic_loopback.cc | 1 + .../linux/socket_ip_tcp_loopback_blocking.cc | 1 + .../linux/socket_ip_tcp_loopback_nonblock.cc | 1 + .../socket_ipv4_tcp_unbound_external_networking.cc | 1 + ...et_ipv4_tcp_unbound_external_networking_test.cc | 3 +- ...et_ipv4_udp_unbound_external_networking_test.cc | 3 +- test/syscalls/linux/socket_netlink_util.cc | 4 +- test/syscalls/linux/socket_unix_blocking_local.cc | 3 +- test/syscalls/linux/socket_unix_dgram.cc | 1 + .../linux/socket_unix_dgram_non_blocking.cc | 1 + .../linux/socket_unix_non_stream_blocking_local.cc | 3 +- test/syscalls/linux/socket_unix_seqpacket.cc | 1 + .../linux/socket_unix_stream_blocking_local.cc | 3 +- .../linux/socket_unix_stream_nonblock_local.cc | 3 +- .../syscalls/linux/socket_unix_unbound_abstract.cc | 1 + .../linux/socket_unix_unbound_filesystem.cc | 1 + .../linux/socket_unix_unbound_seqpacket.cc | 1 + test/syscalls/linux/socket_unix_unbound_stream.cc | 1 + test/syscalls/linux/sync.cc | 3 +- test/syscalls/linux/truncate.cc | 1 + .../syscalls/linux/unix_domain_socket_test_util.cc | 1 + test/syscalls/linux/unix_domain_socket_test_util.h | 1 + test/syscalls/linux/utimes.cc | 1 + test/syscalls/linux/vdso_clock_gettime.cc | 1 + test/util/fs_util_test.cc | 4 +- test/util/mount_util.h | 1 + test/util/posix_error_test.cc | 1 + test/util/rlimit_util.cc | 1 + test/util/signal_util.cc | 1 + test/util/signal_util.h | 1 + test/util/temp_path.h | 1 + test/util/test_util_test.cc | 1 + third_party/gvsync/BUILD | 53 ------- third_party/gvsync/LICENSE | 27 ---- third_party/gvsync/README.md | 3 - third_party/gvsync/atomicptr_unsafe.go | 47 ------- third_party/gvsync/atomicptrtest/BUILD | 28 ---- third_party/gvsync/atomicptrtest/atomicptr_test.go | 31 ----- .../gvsync/downgradable_rwmutex_1_12_unsafe.go | 21 --- .../gvsync/downgradable_rwmutex_1_13_unsafe.go | 16 --- third_party/gvsync/downgradable_rwmutex_test.go | 150 -------------------- third_party/gvsync/downgradable_rwmutex_unsafe.go | 143 ------------------- third_party/gvsync/gvsync.go | 7 - third_party/gvsync/memmove_unsafe.go | 28 ---- third_party/gvsync/norace_unsafe.go | 35 ----- third_party/gvsync/race_unsafe.go | 41 ------ third_party/gvsync/seqatomic_unsafe.go | 72 ---------- third_party/gvsync/seqatomictest/BUILD | 34 ----- third_party/gvsync/seqatomictest/seqatomic_test.go | 132 ------------------ third_party/gvsync/seqcount.go | 149 -------------------- third_party/gvsync/seqcount_test.go | 153 --------------------- tools/go_marshal/test/BUILD | 3 +- tools/go_marshal/test/external/BUILD | 4 +- 115 files changed, 1302 insertions(+), 1250 deletions(-) create mode 100644 pkg/syncutil/BUILD create mode 100644 pkg/syncutil/LICENSE create mode 100644 pkg/syncutil/README.md create mode 100644 pkg/syncutil/atomicptr_unsafe.go create mode 100644 pkg/syncutil/atomicptrtest/BUILD create mode 100644 pkg/syncutil/atomicptrtest/atomicptr_test.go create mode 100644 pkg/syncutil/downgradable_rwmutex_1_12_unsafe.go create mode 100644 pkg/syncutil/downgradable_rwmutex_1_13_unsafe.go create mode 100644 pkg/syncutil/downgradable_rwmutex_test.go create mode 100644 pkg/syncutil/downgradable_rwmutex_unsafe.go create mode 100644 pkg/syncutil/memmove_unsafe.go create mode 100644 pkg/syncutil/norace_unsafe.go create mode 100644 pkg/syncutil/race_unsafe.go create mode 100644 pkg/syncutil/seqatomic_unsafe.go create mode 100644 pkg/syncutil/seqatomictest/BUILD create mode 100644 pkg/syncutil/seqatomictest/seqatomic_test.go create mode 100644 pkg/syncutil/seqcount.go create mode 100644 pkg/syncutil/seqcount_test.go create mode 100644 pkg/syncutil/syncutil.go delete mode 100644 third_party/gvsync/BUILD delete mode 100644 third_party/gvsync/LICENSE delete mode 100644 third_party/gvsync/README.md delete mode 100644 third_party/gvsync/atomicptr_unsafe.go delete mode 100644 third_party/gvsync/atomicptrtest/BUILD delete mode 100644 third_party/gvsync/atomicptrtest/atomicptr_test.go delete mode 100644 third_party/gvsync/downgradable_rwmutex_1_12_unsafe.go delete mode 100644 third_party/gvsync/downgradable_rwmutex_1_13_unsafe.go delete mode 100644 third_party/gvsync/downgradable_rwmutex_test.go delete mode 100644 third_party/gvsync/downgradable_rwmutex_unsafe.go delete mode 100644 third_party/gvsync/gvsync.go delete mode 100644 third_party/gvsync/memmove_unsafe.go delete mode 100644 third_party/gvsync/norace_unsafe.go delete mode 100644 third_party/gvsync/race_unsafe.go delete mode 100644 third_party/gvsync/seqatomic_unsafe.go delete mode 100644 third_party/gvsync/seqatomictest/BUILD delete mode 100644 third_party/gvsync/seqatomictest/seqatomic_test.go delete mode 100644 third_party/gvsync/seqcount.go delete mode 100644 third_party/gvsync/seqcount_test.go (limited to 'test') diff --git a/pkg/eventchannel/BUILD b/pkg/eventchannel/BUILD index 71f2abc83..0b4b7cc44 100644 --- a/pkg/eventchannel/BUILD +++ b/pkg/eventchannel/BUILD @@ -25,6 +25,7 @@ go_library( proto_library( name = "eventchannel_proto", srcs = ["event.proto"], + visibility = ["//:sandbox"], ) go_proto_library( diff --git a/pkg/flipcall/BUILD b/pkg/flipcall/BUILD index 5643d5f26..e590a71ba 100644 --- a/pkg/flipcall/BUILD +++ b/pkg/flipcall/BUILD @@ -19,7 +19,7 @@ go_library( "//pkg/abi/linux", "//pkg/log", "//pkg/memutil", - "//third_party/gvsync", + "//pkg/syncutil", ], ) diff --git a/pkg/flipcall/flipcall_unsafe.go b/pkg/flipcall/flipcall_unsafe.go index a37952637..27b8939fc 100644 --- a/pkg/flipcall/flipcall_unsafe.go +++ b/pkg/flipcall/flipcall_unsafe.go @@ -18,7 +18,7 @@ import ( "reflect" "unsafe" - "gvisor.dev/gvisor/third_party/gvsync" + "gvisor.dev/gvisor/pkg/syncutil" ) // Packets consist of a 16-byte header followed by an arbitrarily-sized @@ -75,13 +75,13 @@ func (ep *Endpoint) Data() []byte { var ioSync int64 func raceBecomeActive() { - if gvsync.RaceEnabled { - gvsync.RaceAcquire((unsafe.Pointer)(&ioSync)) + if syncutil.RaceEnabled { + syncutil.RaceAcquire((unsafe.Pointer)(&ioSync)) } } func raceBecomeInactive() { - if gvsync.RaceEnabled { - gvsync.RaceReleaseMerge((unsafe.Pointer)(&ioSync)) + if syncutil.RaceEnabled { + syncutil.RaceReleaseMerge((unsafe.Pointer)(&ioSync)) } } diff --git a/pkg/sentry/BUILD b/pkg/sentry/BUILD index 2d6379c86..2a7122957 100644 --- a/pkg/sentry/BUILD +++ b/pkg/sentry/BUILD @@ -10,5 +10,8 @@ package_group( "//runsc/...", # Code generated by go_marshal relies on go_marshal libraries. "//tools/go_marshal/...", + + # Keep the old paths as a temporary measure. + "//third_party/golang/gvisor/pkg/sentry/...", ], ) diff --git a/pkg/sentry/fs/BUILD b/pkg/sentry/fs/BUILD index 378602cc9..c035ffff7 100644 --- a/pkg/sentry/fs/BUILD +++ b/pkg/sentry/fs/BUILD @@ -68,9 +68,9 @@ go_library( "//pkg/sentry/usage", "//pkg/sentry/usermem", "//pkg/state", + "//pkg/syncutil", "//pkg/syserror", "//pkg/waiter", - "//third_party/gvsync", ], ) diff --git a/pkg/sentry/fs/fdpipe/pipe_opener_test.go b/pkg/sentry/fs/fdpipe/pipe_opener_test.go index 8e4d839e1..577445148 100644 --- a/pkg/sentry/fs/fdpipe/pipe_opener_test.go +++ b/pkg/sentry/fs/fdpipe/pipe_opener_test.go @@ -25,6 +25,7 @@ import ( "time" "github.com/google/uuid" + "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/context/contexttest" diff --git a/pkg/sentry/fs/overlay.go b/pkg/sentry/fs/overlay.go index 1d3ff39e0..25573e986 100644 --- a/pkg/sentry/fs/overlay.go +++ b/pkg/sentry/fs/overlay.go @@ -23,8 +23,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syncutil" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/third_party/gvsync" ) // The virtual filesystem implements an overlay configuration. For a high-level @@ -199,7 +199,7 @@ type overlayEntry struct { upper *Inode // dirCacheMu protects dirCache. - dirCacheMu gvsync.DowngradableRWMutex `state:"nosave"` + dirCacheMu syncutil.DowngradableRWMutex `state:"nosave"` // dirCache is cache of DentAttrs from upper and lower Inodes. dirCache *SortedDentryMap diff --git a/pkg/sentry/fsimpl/memfs/BUILD b/pkg/sentry/fsimpl/memfs/BUILD index 04d667273..952b20c51 100644 --- a/pkg/sentry/fsimpl/memfs/BUILD +++ b/pkg/sentry/fsimpl/memfs/BUILD @@ -1,10 +1,9 @@ load("//tools/go_stateify:defs.bzl", "go_library") load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools/go_generics:defs.bzl", "go_template_instance") package(licenses = ["notice"]) -load("//tools/go_generics:defs.bzl", "go_template_instance") - go_template_instance( name = "dentry_list", out = "dentry_list.go", diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index e041c51b3..2706927ff 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -35,7 +35,7 @@ go_template_instance( out = "seqatomic_taskgoroutineschedinfo_unsafe.go", package = "kernel", suffix = "TaskGoroutineSchedInfo", - template = "//third_party/gvsync:generic_seqatomic", + template = "//pkg/syncutil:generic_seqatomic", types = { "Value": "TaskGoroutineSchedInfo", }, @@ -209,12 +209,12 @@ go_library( "//pkg/sentry/usermem", "//pkg/state", "//pkg/state/statefile", + "//pkg/syncutil", "//pkg/syserr", "//pkg/syserror", "//pkg/tcpip", "//pkg/tcpip/stack", "//pkg/waiter", - "//third_party/gvsync", ], ) diff --git a/pkg/sentry/kernel/auth/BUILD b/pkg/sentry/kernel/auth/BUILD index 51de4568a..04c244447 100644 --- a/pkg/sentry/kernel/auth/BUILD +++ b/pkg/sentry/kernel/auth/BUILD @@ -8,7 +8,7 @@ go_template_instance( out = "atomicptr_credentials_unsafe.go", package = "auth", suffix = "Credentials", - template = "//third_party/gvsync:generic_atomicptr", + template = "//pkg/syncutil:generic_atomicptr", types = { "Value": "Credentials", }, diff --git a/pkg/sentry/kernel/futex/BUILD b/pkg/sentry/kernel/futex/BUILD index 34286c7a8..75ec31761 100644 --- a/pkg/sentry/kernel/futex/BUILD +++ b/pkg/sentry/kernel/futex/BUILD @@ -9,7 +9,7 @@ go_template_instance( out = "atomicptr_bucket_unsafe.go", package = "futex", suffix = "Bucket", - template = "//third_party/gvsync:generic_atomicptr", + template = "//pkg/syncutil:generic_atomicptr", types = { "Value": "bucket", }, diff --git a/pkg/sentry/kernel/signalfd/BUILD b/pkg/sentry/kernel/signalfd/BUILD index 50b69d154..9f7e19b4d 100644 --- a/pkg/sentry/kernel/signalfd/BUILD +++ b/pkg/sentry/kernel/signalfd/BUILD @@ -1,7 +1,7 @@ -package(licenses = ["notice"]) - load("//tools/go_stateify:defs.bzl", "go_library") +package(licenses = ["notice"]) + go_library( name = "signalfd", srcs = ["signalfd.go"], diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index 9be3dae3c..80c8e5464 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -35,8 +35,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/uniqueid" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syncutil" "gvisor.dev/gvisor/pkg/waiter" - "gvisor.dev/gvisor/third_party/gvsync" ) // Task represents a thread of execution in the untrusted app. It @@ -83,7 +83,7 @@ type Task struct { // // gosched is protected by goschedSeq. gosched is owned by the task // goroutine. - goschedSeq gvsync.SeqCount `state:"nosave"` + goschedSeq syncutil.SeqCount `state:"nosave"` gosched TaskGoroutineSchedInfo // yieldCount is the number of times the task goroutine has called diff --git a/pkg/sentry/mm/BUILD b/pkg/sentry/mm/BUILD index a804b8b5c..839931f67 100644 --- a/pkg/sentry/mm/BUILD +++ b/pkg/sentry/mm/BUILD @@ -118,9 +118,9 @@ go_library( "//pkg/sentry/safemem", "//pkg/sentry/usage", "//pkg/sentry/usermem", + "//pkg/syncutil", "//pkg/syserror", "//pkg/tcpip/buffer", - "//third_party/gvsync", ], ) diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go index f350e0109..58a5c186d 100644 --- a/pkg/sentry/mm/mm.go +++ b/pkg/sentry/mm/mm.go @@ -44,7 +44,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/third_party/gvsync" + "gvisor.dev/gvisor/pkg/syncutil" ) // MemoryManager implements a virtual address space. @@ -82,7 +82,7 @@ type MemoryManager struct { users int32 // mappingMu is analogous to Linux's struct mm_struct::mmap_sem. - mappingMu gvsync.DowngradableRWMutex `state:"nosave"` + mappingMu syncutil.DowngradableRWMutex `state:"nosave"` // vmas stores virtual memory areas. Since vmas are stored by value, // clients should usually use vmaIterator.ValuePtr() instead of @@ -125,7 +125,7 @@ type MemoryManager struct { // activeMu is loosely analogous to Linux's struct // mm_struct::page_table_lock. - activeMu gvsync.DowngradableRWMutex `state:"nosave"` + activeMu syncutil.DowngradableRWMutex `state:"nosave"` // pmas stores platform mapping areas used to implement vmas. Since pmas // are stored by value, clients should usually use pmaIterator.ValuePtr() diff --git a/pkg/sentry/strace/strace.proto b/pkg/sentry/strace/strace.proto index 4b2f73a5f..906c52c51 100644 --- a/pkg/sentry/strace/strace.proto +++ b/pkg/sentry/strace/strace.proto @@ -32,8 +32,7 @@ message Strace { } } -message StraceEnter { -} +message StraceEnter {} message StraceExit { // Return value formatted as string. diff --git a/pkg/sentry/time/BUILD b/pkg/sentry/time/BUILD index d3a4cd943..18e212dff 100644 --- a/pkg/sentry/time/BUILD +++ b/pkg/sentry/time/BUILD @@ -9,7 +9,7 @@ go_template_instance( out = "seqatomic_parameters_unsafe.go", package = "time", suffix = "Parameters", - template = "//third_party/gvsync:generic_seqatomic", + template = "//pkg/syncutil:generic_seqatomic", types = { "Value": "Parameters", }, @@ -36,8 +36,8 @@ go_library( deps = [ "//pkg/log", "//pkg/metric", + "//pkg/syncutil", "//pkg/syserror", - "//third_party/gvsync", ], ) diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD index 4f2c2de9f..74a325309 100644 --- a/pkg/sentry/vfs/BUILD +++ b/pkg/sentry/vfs/BUILD @@ -33,9 +33,9 @@ go_library( "//pkg/sentry/kernel/auth", "//pkg/sentry/memmap", "//pkg/sentry/usermem", + "//pkg/syncutil", "//pkg/syserror", "//pkg/waiter", - "//third_party/gvsync", ], ) diff --git a/pkg/sentry/vfs/mount_unsafe.go b/pkg/sentry/vfs/mount_unsafe.go index 75e6c7dfa..c98b42f91 100644 --- a/pkg/sentry/vfs/mount_unsafe.go +++ b/pkg/sentry/vfs/mount_unsafe.go @@ -26,7 +26,7 @@ import ( "sync/atomic" "unsafe" - "gvisor.dev/gvisor/third_party/gvsync" + "gvisor.dev/gvisor/pkg/syncutil" ) // mountKey represents the location at which a Mount is mounted. It is @@ -72,7 +72,7 @@ type mountTable struct { // intrinsics and inline assembly, limiting the performance of this // approach.) - seq gvsync.SeqCount + seq syncutil.SeqCount seed uint32 // for hashing keys // size holds both length (number of elements) and capacity (number of diff --git a/pkg/state/object.proto b/pkg/state/object.proto index 952289069..5ebcfb151 100644 --- a/pkg/state/object.proto +++ b/pkg/state/object.proto @@ -18,8 +18,8 @@ package gvisor.state.statefile; // Slice is a slice value. message Slice { - uint32 length = 1; - uint32 capacity = 2; + uint32 length = 1; + uint32 capacity = 2; uint64 ref_value = 3; } @@ -30,13 +30,13 @@ message Array { // Map is a map value. message Map { - repeated Object keys = 1; + repeated Object keys = 1; repeated Object values = 2; } // Interface is an interface value. message Interface { - string type = 1; + string type = 1; Object value = 2; } @@ -47,7 +47,7 @@ message Struct { // Field encodes a single field. message Field { - string name = 1; + string name = 1; Object value = 2; } @@ -113,28 +113,28 @@ message Float32s { // Note that ref_value references an Object.id, below. message Object { oneof value { - bool bool_value = 1; - bytes string_value = 2; - int64 int64_value = 3; - uint64 uint64_value = 4; - double double_value = 5; - uint64 ref_value = 6; - Slice slice_value = 7; - Array array_value = 8; - Interface interface_value = 9; - Struct struct_value = 10; - Map map_value = 11; - bytes byte_array_value = 12; - Uint16s uint16_array_value = 13; - Uint32s uint32_array_value = 14; - Uint64s uint64_array_value = 15; - Uintptrs uintptr_array_value = 16; - Int8s int8_array_value = 17; - Int16s int16_array_value = 18; - Int32s int32_array_value = 19; - Int64s int64_array_value = 20; - Bools bool_array_value = 21; - Float64s float64_array_value = 22; - Float32s float32_array_value = 23; + bool bool_value = 1; + bytes string_value = 2; + int64 int64_value = 3; + uint64 uint64_value = 4; + double double_value = 5; + uint64 ref_value = 6; + Slice slice_value = 7; + Array array_value = 8; + Interface interface_value = 9; + Struct struct_value = 10; + Map map_value = 11; + bytes byte_array_value = 12; + Uint16s uint16_array_value = 13; + Uint32s uint32_array_value = 14; + Uint64s uint64_array_value = 15; + Uintptrs uintptr_array_value = 16; + Int8s int8_array_value = 17; + Int16s int16_array_value = 18; + Int32s int32_array_value = 19; + Int64s int64_array_value = 20; + Bools bool_array_value = 21; + Float64s float64_array_value = 22; + Float32s float32_array_value = 23; } } diff --git a/pkg/syncutil/BUILD b/pkg/syncutil/BUILD new file mode 100644 index 000000000..b06a90bef --- /dev/null +++ b/pkg/syncutil/BUILD @@ -0,0 +1,54 @@ +load("//tools/go_stateify:defs.bzl", "go_library") +load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools/go_generics:defs.bzl", "go_template") + +package( + default_visibility = ["//:sandbox"], + licenses = ["notice"], +) + +exports_files(["LICENSE"]) + +go_template( + name = "generic_atomicptr", + srcs = ["atomicptr_unsafe.go"], + types = [ + "Value", + ], +) + +go_template( + name = "generic_seqatomic", + srcs = ["seqatomic_unsafe.go"], + types = [ + "Value", + ], + deps = [ + ":sync", + ], +) + +go_library( + name = "syncutil", + srcs = [ + "downgradable_rwmutex_1_12_unsafe.go", + "downgradable_rwmutex_1_13_unsafe.go", + "downgradable_rwmutex_unsafe.go", + "memmove_unsafe.go", + "norace_unsafe.go", + "race_unsafe.go", + "seqcount.go", + "syncutil.go", + ], + importpath = "gvisor.dev/gvisor/pkg/syncutil", +) + +go_test( + name = "syncutil_test", + size = "small", + srcs = [ + "downgradable_rwmutex_test.go", + "seqcount_test.go", + ], + embed = [":syncutil"], +) diff --git a/pkg/syncutil/LICENSE b/pkg/syncutil/LICENSE new file mode 100644 index 000000000..6a66aea5e --- /dev/null +++ b/pkg/syncutil/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/pkg/syncutil/README.md b/pkg/syncutil/README.md new file mode 100644 index 000000000..2183c4e20 --- /dev/null +++ b/pkg/syncutil/README.md @@ -0,0 +1,5 @@ +# Syncutil + +This package provides additional synchronization primitives not provided by the +Go stdlib 'sync' package. It is partially derived from the upstream 'sync' +package from go1.10. diff --git a/pkg/syncutil/atomicptr_unsafe.go b/pkg/syncutil/atomicptr_unsafe.go new file mode 100644 index 000000000..525c4beed --- /dev/null +++ b/pkg/syncutil/atomicptr_unsafe.go @@ -0,0 +1,47 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package template doesn't exist. This file must be instantiated using the +// go_template_instance rule in tools/go_generics/defs.bzl. +package template + +import ( + "sync/atomic" + "unsafe" +) + +// Value is a required type parameter. +type Value struct{} + +// An AtomicPtr is a pointer to a value of type Value that can be atomically +// loaded and stored. The zero value of an AtomicPtr represents nil. +// +// Note that copying AtomicPtr by value performs a non-atomic read of the +// stored pointer, which is unsafe if Store() can be called concurrently; in +// this case, do `dst.Store(src.Load())` instead. +// +// +stateify savable +type AtomicPtr struct { + ptr unsafe.Pointer `state:".(*Value)"` +} + +func (p *AtomicPtr) savePtr() *Value { + return p.Load() +} + +func (p *AtomicPtr) loadPtr(v *Value) { + p.Store(v) +} + +// Load returns the value set by the most recent Store. It returns nil if there +// has been no previous call to Store. +func (p *AtomicPtr) Load() *Value { + return (*Value)(atomic.LoadPointer(&p.ptr)) +} + +// Store sets the value returned by Load to x. +func (p *AtomicPtr) Store(x *Value) { + atomic.StorePointer(&p.ptr, (unsafe.Pointer)(x)) +} diff --git a/pkg/syncutil/atomicptrtest/BUILD b/pkg/syncutil/atomicptrtest/BUILD new file mode 100644 index 000000000..63f411a90 --- /dev/null +++ b/pkg/syncutil/atomicptrtest/BUILD @@ -0,0 +1,29 @@ +load("//tools/go_stateify:defs.bzl", "go_library") +load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools/go_generics:defs.bzl", "go_template_instance") + +package(licenses = ["notice"]) + +go_template_instance( + name = "atomicptr_int", + out = "atomicptr_int_unsafe.go", + package = "atomicptr", + suffix = "Int", + template = "//pkg/syncutil:generic_atomicptr", + types = { + "Value": "int", + }, +) + +go_library( + name = "atomicptr", + srcs = ["atomicptr_int_unsafe.go"], + importpath = "gvisor.dev/gvisor/pkg/syncutil/atomicptr", +) + +go_test( + name = "atomicptr_test", + size = "small", + srcs = ["atomicptr_test.go"], + embed = [":atomicptr"], +) diff --git a/pkg/syncutil/atomicptrtest/atomicptr_test.go b/pkg/syncutil/atomicptrtest/atomicptr_test.go new file mode 100644 index 000000000..8fdc5112e --- /dev/null +++ b/pkg/syncutil/atomicptrtest/atomicptr_test.go @@ -0,0 +1,31 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package atomicptr + +import ( + "testing" +) + +func newInt(val int) *int { + return &val +} + +func TestAtomicPtr(t *testing.T) { + var p AtomicPtrInt + if got := p.Load(); got != nil { + t.Errorf("initial value is %p (%v), wanted nil", got, got) + } + want := newInt(42) + p.Store(want) + if got := p.Load(); got != want { + t.Errorf("wrong value: got %p (%v), wanted %p (%v)", got, got, want, want) + } + want = newInt(100) + p.Store(want) + if got := p.Load(); got != want { + t.Errorf("wrong value: got %p (%v), wanted %p (%v)", got, got, want, want) + } +} diff --git a/pkg/syncutil/downgradable_rwmutex_1_12_unsafe.go b/pkg/syncutil/downgradable_rwmutex_1_12_unsafe.go new file mode 100644 index 000000000..7c6336e62 --- /dev/null +++ b/pkg/syncutil/downgradable_rwmutex_1_12_unsafe.go @@ -0,0 +1,21 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Copyright 2019 The gVisor Authors. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build go1.12 +// +build !go1.13 + +// TODO(b/133868570): Delete once Go 1.12 is no longer supported. + +package syncutil + +import _ "unsafe" + +//go:linkname runtimeSemrelease112 sync.runtime_Semrelease +func runtimeSemrelease112(s *uint32, handoff bool) + +func runtimeSemrelease(s *uint32, handoff bool, skipframes int) { + // 'skipframes' is only available starting from 1.13. + runtimeSemrelease112(s, handoff) +} diff --git a/pkg/syncutil/downgradable_rwmutex_1_13_unsafe.go b/pkg/syncutil/downgradable_rwmutex_1_13_unsafe.go new file mode 100644 index 000000000..3c3673119 --- /dev/null +++ b/pkg/syncutil/downgradable_rwmutex_1_13_unsafe.go @@ -0,0 +1,16 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Copyright 2019 The gVisor Authors. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build go1.13 +// +build !go1.15 + +// Check go:linkname function signatures when updating Go version. + +package syncutil + +import _ "unsafe" + +//go:linkname runtimeSemrelease sync.runtime_Semrelease +func runtimeSemrelease(s *uint32, handoff bool, skipframes int) diff --git a/pkg/syncutil/downgradable_rwmutex_test.go b/pkg/syncutil/downgradable_rwmutex_test.go new file mode 100644 index 000000000..ffaf7ecc7 --- /dev/null +++ b/pkg/syncutil/downgradable_rwmutex_test.go @@ -0,0 +1,150 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Copyright 2019 The gVisor Authors. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// GOMAXPROCS=10 go test + +// Copy/pasted from the standard library's sync/rwmutex_test.go, except for the +// addition of downgradingWriter and the renaming of num_iterations to +// numIterations to shut up Golint. + +package syncutil + +import ( + "fmt" + "runtime" + "sync/atomic" + "testing" +) + +func parallelReader(m *DowngradableRWMutex, clocked, cunlock, cdone chan bool) { + m.RLock() + clocked <- true + <-cunlock + m.RUnlock() + cdone <- true +} + +func doTestParallelReaders(numReaders, gomaxprocs int) { + runtime.GOMAXPROCS(gomaxprocs) + var m DowngradableRWMutex + clocked := make(chan bool) + cunlock := make(chan bool) + cdone := make(chan bool) + for i := 0; i < numReaders; i++ { + go parallelReader(&m, clocked, cunlock, cdone) + } + // Wait for all parallel RLock()s to succeed. + for i := 0; i < numReaders; i++ { + <-clocked + } + for i := 0; i < numReaders; i++ { + cunlock <- true + } + // Wait for the goroutines to finish. + for i := 0; i < numReaders; i++ { + <-cdone + } +} + +func TestParallelReaders(t *testing.T) { + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(-1)) + doTestParallelReaders(1, 4) + doTestParallelReaders(3, 4) + doTestParallelReaders(4, 2) +} + +func reader(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { + for i := 0; i < numIterations; i++ { + rwm.RLock() + n := atomic.AddInt32(activity, 1) + if n < 1 || n >= 10000 { + panic(fmt.Sprintf("wlock(%d)\n", n)) + } + for i := 0; i < 100; i++ { + } + atomic.AddInt32(activity, -1) + rwm.RUnlock() + } + cdone <- true +} + +func writer(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { + for i := 0; i < numIterations; i++ { + rwm.Lock() + n := atomic.AddInt32(activity, 10000) + if n != 10000 { + panic(fmt.Sprintf("wlock(%d)\n", n)) + } + for i := 0; i < 100; i++ { + } + atomic.AddInt32(activity, -10000) + rwm.Unlock() + } + cdone <- true +} + +func downgradingWriter(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { + for i := 0; i < numIterations; i++ { + rwm.Lock() + n := atomic.AddInt32(activity, 10000) + if n != 10000 { + panic(fmt.Sprintf("wlock(%d)\n", n)) + } + for i := 0; i < 100; i++ { + } + atomic.AddInt32(activity, -10000) + rwm.DowngradeLock() + n = atomic.AddInt32(activity, 1) + if n < 1 || n >= 10000 { + panic(fmt.Sprintf("wlock(%d)\n", n)) + } + for i := 0; i < 100; i++ { + } + n = atomic.AddInt32(activity, -1) + rwm.RUnlock() + } + cdone <- true +} + +func HammerDowngradableRWMutex(gomaxprocs, numReaders, numIterations int) { + runtime.GOMAXPROCS(gomaxprocs) + // Number of active readers + 10000 * number of active writers. + var activity int32 + var rwm DowngradableRWMutex + cdone := make(chan bool) + go writer(&rwm, numIterations, &activity, cdone) + go downgradingWriter(&rwm, numIterations, &activity, cdone) + var i int + for i = 0; i < numReaders/2; i++ { + go reader(&rwm, numIterations, &activity, cdone) + } + go writer(&rwm, numIterations, &activity, cdone) + go downgradingWriter(&rwm, numIterations, &activity, cdone) + for ; i < numReaders; i++ { + go reader(&rwm, numIterations, &activity, cdone) + } + // Wait for the 4 writers and all readers to finish. + for i := 0; i < 4+numReaders; i++ { + <-cdone + } +} + +func TestDowngradableRWMutex(t *testing.T) { + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(-1)) + n := 1000 + if testing.Short() { + n = 5 + } + HammerDowngradableRWMutex(1, 1, n) + HammerDowngradableRWMutex(1, 3, n) + HammerDowngradableRWMutex(1, 10, n) + HammerDowngradableRWMutex(4, 1, n) + HammerDowngradableRWMutex(4, 3, n) + HammerDowngradableRWMutex(4, 10, n) + HammerDowngradableRWMutex(10, 1, n) + HammerDowngradableRWMutex(10, 3, n) + HammerDowngradableRWMutex(10, 10, n) + HammerDowngradableRWMutex(10, 5, n) +} diff --git a/pkg/syncutil/downgradable_rwmutex_unsafe.go b/pkg/syncutil/downgradable_rwmutex_unsafe.go new file mode 100644 index 000000000..07feca402 --- /dev/null +++ b/pkg/syncutil/downgradable_rwmutex_unsafe.go @@ -0,0 +1,143 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Copyright 2019 The gVisor Authors. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build go1.12 +// +build !go1.15 + +// Check go:linkname function signatures when updating Go version. + +// This is mostly copied from the standard library's sync/rwmutex.go. +// +// Happens-before relationships indicated to the race detector: +// - Unlock -> Lock (via writerSem) +// - Unlock -> RLock (via readerSem) +// - RUnlock -> Lock (via writerSem) +// - DowngradeLock -> RLock (via readerSem) + +package syncutil + +import ( + "sync" + "sync/atomic" + "unsafe" +) + +//go:linkname runtimeSemacquire sync.runtime_Semacquire +func runtimeSemacquire(s *uint32) + +// DowngradableRWMutex is identical to sync.RWMutex, but adds the DowngradeLock +// method. +type DowngradableRWMutex struct { + w sync.Mutex // held if there are pending writers + writerSem uint32 // semaphore for writers to wait for completing readers + readerSem uint32 // semaphore for readers to wait for completing writers + readerCount int32 // number of pending readers + readerWait int32 // number of departing readers +} + +const rwmutexMaxReaders = 1 << 30 + +// RLock locks rw for reading. +func (rw *DowngradableRWMutex) RLock() { + if RaceEnabled { + RaceDisable() + } + if atomic.AddInt32(&rw.readerCount, 1) < 0 { + // A writer is pending, wait for it. + runtimeSemacquire(&rw.readerSem) + } + if RaceEnabled { + RaceEnable() + RaceAcquire(unsafe.Pointer(&rw.readerSem)) + } +} + +// RUnlock undoes a single RLock call. +func (rw *DowngradableRWMutex) RUnlock() { + if RaceEnabled { + RaceReleaseMerge(unsafe.Pointer(&rw.writerSem)) + RaceDisable() + } + if r := atomic.AddInt32(&rw.readerCount, -1); r < 0 { + if r+1 == 0 || r+1 == -rwmutexMaxReaders { + panic("RUnlock of unlocked DowngradableRWMutex") + } + // A writer is pending. + if atomic.AddInt32(&rw.readerWait, -1) == 0 { + // The last reader unblocks the writer. + runtimeSemrelease(&rw.writerSem, false, 0) + } + } + if RaceEnabled { + RaceEnable() + } +} + +// Lock locks rw for writing. +func (rw *DowngradableRWMutex) Lock() { + if RaceEnabled { + RaceDisable() + } + // First, resolve competition with other writers. + rw.w.Lock() + // Announce to readers there is a pending writer. + r := atomic.AddInt32(&rw.readerCount, -rwmutexMaxReaders) + rwmutexMaxReaders + // Wait for active readers. + if r != 0 && atomic.AddInt32(&rw.readerWait, r) != 0 { + runtimeSemacquire(&rw.writerSem) + } + if RaceEnabled { + RaceEnable() + RaceAcquire(unsafe.Pointer(&rw.writerSem)) + } +} + +// Unlock unlocks rw for writing. +func (rw *DowngradableRWMutex) Unlock() { + if RaceEnabled { + RaceRelease(unsafe.Pointer(&rw.writerSem)) + RaceRelease(unsafe.Pointer(&rw.readerSem)) + RaceDisable() + } + // Announce to readers there is no active writer. + r := atomic.AddInt32(&rw.readerCount, rwmutexMaxReaders) + if r >= rwmutexMaxReaders { + panic("Unlock of unlocked DowngradableRWMutex") + } + // Unblock blocked readers, if any. + for i := 0; i < int(r); i++ { + runtimeSemrelease(&rw.readerSem, false, 0) + } + // Allow other writers to proceed. + rw.w.Unlock() + if RaceEnabled { + RaceEnable() + } +} + +// DowngradeLock atomically unlocks rw for writing and locks it for reading. +func (rw *DowngradableRWMutex) DowngradeLock() { + if RaceEnabled { + RaceRelease(unsafe.Pointer(&rw.readerSem)) + RaceDisable() + } + // Announce to readers there is no active writer and one additional reader. + r := atomic.AddInt32(&rw.readerCount, rwmutexMaxReaders+1) + if r >= rwmutexMaxReaders+1 { + panic("DowngradeLock of unlocked DowngradableRWMutex") + } + // Unblock blocked readers, if any. Note that this loop starts as 1 since r + // includes this goroutine. + for i := 1; i < int(r); i++ { + runtimeSemrelease(&rw.readerSem, false, 0) + } + // Allow other writers to proceed to rw.w.Lock(). Note that they will still + // block on rw.writerSem since at least this reader exists, such that + // DowngradeLock() is atomic with the previous write lock. + rw.w.Unlock() + if RaceEnabled { + RaceEnable() + } +} diff --git a/pkg/syncutil/memmove_unsafe.go b/pkg/syncutil/memmove_unsafe.go new file mode 100644 index 000000000..348675baa --- /dev/null +++ b/pkg/syncutil/memmove_unsafe.go @@ -0,0 +1,28 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build go1.12 +// +build !go1.15 + +// Check go:linkname function signatures when updating Go version. + +package syncutil + +import ( + "unsafe" +) + +//go:linkname memmove runtime.memmove +//go:noescape +func memmove(to, from unsafe.Pointer, n uintptr) + +// Memmove is exported for SeqAtomicLoad/SeqAtomicTryLoad, which can't +// define it because go_generics can't update the go:linkname annotation. +// Furthermore, go:linkname silently doesn't work if the local name is exported +// (this is of course undocumented), which is why this indirection is +// necessary. +func Memmove(to, from unsafe.Pointer, n uintptr) { + memmove(to, from, n) +} diff --git a/pkg/syncutil/norace_unsafe.go b/pkg/syncutil/norace_unsafe.go new file mode 100644 index 000000000..0a0a9deda --- /dev/null +++ b/pkg/syncutil/norace_unsafe.go @@ -0,0 +1,35 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !race + +package syncutil + +import ( + "unsafe" +) + +// RaceEnabled is true if the Go data race detector is enabled. +const RaceEnabled = false + +// RaceDisable has the same semantics as runtime.RaceDisable. +func RaceDisable() { +} + +// RaceEnable has the same semantics as runtime.RaceEnable. +func RaceEnable() { +} + +// RaceAcquire has the same semantics as runtime.RaceAcquire. +func RaceAcquire(addr unsafe.Pointer) { +} + +// RaceRelease has the same semantics as runtime.RaceRelease. +func RaceRelease(addr unsafe.Pointer) { +} + +// RaceReleaseMerge has the same semantics as runtime.RaceReleaseMerge. +func RaceReleaseMerge(addr unsafe.Pointer) { +} diff --git a/pkg/syncutil/race_unsafe.go b/pkg/syncutil/race_unsafe.go new file mode 100644 index 000000000..206067ec1 --- /dev/null +++ b/pkg/syncutil/race_unsafe.go @@ -0,0 +1,41 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build race + +package syncutil + +import ( + "runtime" + "unsafe" +) + +// RaceEnabled is true if the Go data race detector is enabled. +const RaceEnabled = true + +// RaceDisable has the same semantics as runtime.RaceDisable. +func RaceDisable() { + runtime.RaceDisable() +} + +// RaceEnable has the same semantics as runtime.RaceEnable. +func RaceEnable() { + runtime.RaceEnable() +} + +// RaceAcquire has the same semantics as runtime.RaceAcquire. +func RaceAcquire(addr unsafe.Pointer) { + runtime.RaceAcquire(addr) +} + +// RaceRelease has the same semantics as runtime.RaceRelease. +func RaceRelease(addr unsafe.Pointer) { + runtime.RaceRelease(addr) +} + +// RaceReleaseMerge has the same semantics as runtime.RaceReleaseMerge. +func RaceReleaseMerge(addr unsafe.Pointer) { + runtime.RaceReleaseMerge(addr) +} diff --git a/pkg/syncutil/seqatomic_unsafe.go b/pkg/syncutil/seqatomic_unsafe.go new file mode 100644 index 000000000..cb6d2eb22 --- /dev/null +++ b/pkg/syncutil/seqatomic_unsafe.go @@ -0,0 +1,72 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package template doesn't exist. This file must be instantiated using the +// go_template_instance rule in tools/go_generics/defs.bzl. +package template + +import ( + "fmt" + "reflect" + "strings" + "unsafe" + + "gvisor.dev/gvisor/pkg/syncutil" +) + +// Value is a required type parameter. +// +// Value must not contain any pointers, including interface objects, function +// objects, slices, maps, channels, unsafe.Pointer, and arrays or structs +// containing any of the above. An init() function will panic if this property +// does not hold. +type Value struct{} + +// SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race +// with any writer critical sections in sc. +func SeqAtomicLoad(sc *syncutil.SeqCount, ptr *Value) Value { + // This function doesn't use SeqAtomicTryLoad because doing so is + // measurably, significantly (~20%) slower; Go is awful at inlining. + var val Value + for { + epoch := sc.BeginRead() + if syncutil.RaceEnabled { + // runtime.RaceDisable() doesn't actually stop the race detector, + // so it can't help us here. Instead, call runtime.memmove + // directly, which is not instrumented by the race detector. + syncutil.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val)) + } else { + // This is ~40% faster for short reads than going through memmove. + val = *ptr + } + if sc.ReadOk(epoch) { + break + } + } + return val +} + +// SeqAtomicTryLoad returns a copy of *ptr while in a reader critical section +// in sc initiated by a call to sc.BeginRead() that returned epoch. If the read +// would race with a writer critical section, SeqAtomicTryLoad returns +// (unspecified, false). +func SeqAtomicTryLoad(sc *syncutil.SeqCount, epoch syncutil.SeqCountEpoch, ptr *Value) (Value, bool) { + var val Value + if syncutil.RaceEnabled { + syncutil.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val)) + } else { + val = *ptr + } + return val, sc.ReadOk(epoch) +} + +func init() { + var val Value + typ := reflect.TypeOf(val) + name := typ.Name() + if ptrs := syncutil.PointersInType(typ, name); len(ptrs) != 0 { + panic(fmt.Sprintf("SeqAtomicLoad<%s> is invalid since values %s of type %s contain pointers:\n%s", typ, name, typ, strings.Join(ptrs, "\n"))) + } +} diff --git a/pkg/syncutil/seqatomictest/BUILD b/pkg/syncutil/seqatomictest/BUILD new file mode 100644 index 000000000..ba18f3238 --- /dev/null +++ b/pkg/syncutil/seqatomictest/BUILD @@ -0,0 +1,35 @@ +load("//tools/go_stateify:defs.bzl", "go_library") +load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools/go_generics:defs.bzl", "go_template_instance") + +package(licenses = ["notice"]) + +go_template_instance( + name = "seqatomic_int", + out = "seqatomic_int_unsafe.go", + package = "seqatomic", + suffix = "Int", + template = "//pkg/syncutil:generic_seqatomic", + types = { + "Value": "int", + }, +) + +go_library( + name = "seqatomic", + srcs = ["seqatomic_int_unsafe.go"], + importpath = "gvisor.dev/gvisor/pkg/syncutil/seqatomic", + deps = [ + "//pkg/syncutil", + ], +) + +go_test( + name = "seqatomic_test", + size = "small", + srcs = ["seqatomic_test.go"], + embed = [":seqatomic"], + deps = [ + "//pkg/syncutil", + ], +) diff --git a/pkg/syncutil/seqatomictest/seqatomic_test.go b/pkg/syncutil/seqatomictest/seqatomic_test.go new file mode 100644 index 000000000..b0db44999 --- /dev/null +++ b/pkg/syncutil/seqatomictest/seqatomic_test.go @@ -0,0 +1,132 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package seqatomic + +import ( + "sync/atomic" + "testing" + "time" + + "gvisor.dev/gvisor/pkg/syncutil" +) + +func TestSeqAtomicLoadUncontended(t *testing.T) { + var seq syncutil.SeqCount + const want = 1 + data := want + if got := SeqAtomicLoadInt(&seq, &data); got != want { + t.Errorf("SeqAtomicLoadInt: got %v, wanted %v", got, want) + } +} + +func TestSeqAtomicLoadAfterWrite(t *testing.T) { + var seq syncutil.SeqCount + var data int + const want = 1 + seq.BeginWrite() + data = want + seq.EndWrite() + if got := SeqAtomicLoadInt(&seq, &data); got != want { + t.Errorf("SeqAtomicLoadInt: got %v, wanted %v", got, want) + } +} + +func TestSeqAtomicLoadDuringWrite(t *testing.T) { + var seq syncutil.SeqCount + var data int + const want = 1 + seq.BeginWrite() + go func() { + time.Sleep(time.Second) + data = want + seq.EndWrite() + }() + if got := SeqAtomicLoadInt(&seq, &data); got != want { + t.Errorf("SeqAtomicLoadInt: got %v, wanted %v", got, want) + } +} + +func TestSeqAtomicTryLoadUncontended(t *testing.T) { + var seq syncutil.SeqCount + const want = 1 + data := want + epoch := seq.BeginRead() + if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); !ok || got != want { + t.Errorf("SeqAtomicTryLoadInt: got (%v, %v), wanted (%v, true)", got, ok, want) + } +} + +func TestSeqAtomicTryLoadDuringWrite(t *testing.T) { + var seq syncutil.SeqCount + var data int + epoch := seq.BeginRead() + seq.BeginWrite() + if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); ok { + t.Errorf("SeqAtomicTryLoadInt: got (%v, true), wanted (_, false)", got) + } + seq.EndWrite() +} + +func TestSeqAtomicTryLoadAfterWrite(t *testing.T) { + var seq syncutil.SeqCount + var data int + epoch := seq.BeginRead() + seq.BeginWrite() + seq.EndWrite() + if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); ok { + t.Errorf("SeqAtomicTryLoadInt: got (%v, true), wanted (_, false)", got) + } +} + +func BenchmarkSeqAtomicLoadIntUncontended(b *testing.B) { + var seq syncutil.SeqCount + const want = 42 + data := want + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + if got := SeqAtomicLoadInt(&seq, &data); got != want { + b.Fatalf("SeqAtomicLoadInt: got %v, wanted %v", got, want) + } + } + }) +} + +func BenchmarkSeqAtomicTryLoadIntUncontended(b *testing.B) { + var seq syncutil.SeqCount + const want = 42 + data := want + b.RunParallel(func(pb *testing.PB) { + epoch := seq.BeginRead() + for pb.Next() { + if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); !ok || got != want { + b.Fatalf("SeqAtomicTryLoadInt: got (%v, %v), wanted (%v, true)", got, ok, want) + } + } + }) +} + +// For comparison: +func BenchmarkAtomicValueLoadIntUncontended(b *testing.B) { + var a atomic.Value + const want = 42 + a.Store(int(want)) + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + if got := a.Load().(int); got != want { + b.Fatalf("atomic.Value.Load: got %v, wanted %v", got, want) + } + } + }) +} diff --git a/pkg/syncutil/seqcount.go b/pkg/syncutil/seqcount.go new file mode 100644 index 000000000..11d8dbfaa --- /dev/null +++ b/pkg/syncutil/seqcount.go @@ -0,0 +1,149 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syncutil + +import ( + "fmt" + "reflect" + "runtime" + "sync/atomic" +) + +// SeqCount is a synchronization primitive for optimistic reader/writer +// synchronization in cases where readers can work with stale data and +// therefore do not need to block writers. +// +// Compared to sync/atomic.Value: +// +// - Mutation of SeqCount-protected data does not require memory allocation, +// whereas atomic.Value generally does. This is a significant advantage when +// writes are common. +// +// - Atomic reads of SeqCount-protected data require copying. This is a +// disadvantage when atomic reads are common. +// +// - SeqCount may be more flexible: correct use of SeqCount.ReadOk allows other +// operations to be made atomic with reads of SeqCount-protected data. +// +// - SeqCount may be less flexible: as of this writing, SeqCount-protected data +// cannot include pointers. +// +// - SeqCount is more cumbersome to use; atomic reads of SeqCount-protected +// data require instantiating function templates using go_generics (see +// seqatomic.go). +type SeqCount struct { + // epoch is incremented by BeginWrite and EndWrite, such that epoch is odd + // if a writer critical section is active, and a read from data protected + // by this SeqCount is atomic iff epoch is the same even value before and + // after the read. + epoch uint32 +} + +// SeqCountEpoch tracks writer critical sections in a SeqCount. +type SeqCountEpoch struct { + val uint32 +} + +// We assume that: +// +// - All functions in sync/atomic that perform a memory read are at least a +// read fence: memory reads before calls to such functions cannot be reordered +// after the call, and memory reads after calls to such functions cannot be +// reordered before the call, even if those reads do not use sync/atomic. +// +// - All functions in sync/atomic that perform a memory write are at least a +// write fence: memory writes before calls to such functions cannot be +// reordered after the call, and memory writes after calls to such functions +// cannot be reordered before the call, even if those writes do not use +// sync/atomic. +// +// As of this writing, the Go memory model completely fails to describe +// sync/atomic, but these properties are implied by +// https://groups.google.com/forum/#!topic/golang-nuts/7EnEhM3U7B8. + +// BeginRead indicates the beginning of a reader critical section. Reader +// critical sections DO NOT BLOCK writer critical sections, so operations in a +// reader critical section MAY RACE with writer critical sections. Races are +// detected by ReadOk at the end of the reader critical section. Thus, the +// low-level structure of readers is generally: +// +// for { +// epoch := seq.BeginRead() +// // do something idempotent with seq-protected data +// if seq.ReadOk(epoch) { +// break +// } +// } +// +// However, since reader critical sections may race with writer critical +// sections, the Go race detector will (accurately) flag data races in readers +// using this pattern. Most users of SeqCount will need to use the +// SeqAtomicLoad function template in seqatomic.go. +func (s *SeqCount) BeginRead() SeqCountEpoch { + epoch := atomic.LoadUint32(&s.epoch) + for epoch&1 != 0 { + runtime.Gosched() + epoch = atomic.LoadUint32(&s.epoch) + } + return SeqCountEpoch{epoch} +} + +// ReadOk returns true if the reader critical section initiated by a previous +// call to BeginRead() that returned epoch did not race with any writer critical +// sections. +// +// ReadOk may be called any number of times during a reader critical section. +// Reader critical sections do not need to be explicitly terminated; the last +// call to ReadOk is implicitly the end of the reader critical section. +func (s *SeqCount) ReadOk(epoch SeqCountEpoch) bool { + return atomic.LoadUint32(&s.epoch) == epoch.val +} + +// BeginWrite indicates the beginning of a writer critical section. +// +// SeqCount does not support concurrent writer critical sections; clients with +// concurrent writers must synchronize them using e.g. sync.Mutex. +func (s *SeqCount) BeginWrite() { + if epoch := atomic.AddUint32(&s.epoch, 1); epoch&1 == 0 { + panic("SeqCount.BeginWrite during writer critical section") + } +} + +// EndWrite ends the effect of a preceding BeginWrite. +func (s *SeqCount) EndWrite() { + if epoch := atomic.AddUint32(&s.epoch, 1); epoch&1 != 0 { + panic("SeqCount.EndWrite outside writer critical section") + } +} + +// PointersInType returns a list of pointers reachable from values named +// valName of the given type. +// +// PointersInType is not exhaustive, but it is guaranteed that if typ contains +// at least one pointer, then PointersInTypeOf returns a non-empty list. +func PointersInType(typ reflect.Type, valName string) []string { + switch kind := typ.Kind(); kind { + case reflect.Bool, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: + return nil + + case reflect.Chan, reflect.Func, reflect.Interface, reflect.Map, reflect.Ptr, reflect.Slice, reflect.String, reflect.UnsafePointer: + return []string{valName} + + case reflect.Array: + return PointersInType(typ.Elem(), valName+"[]") + + case reflect.Struct: + var ptrs []string + for i, n := 0, typ.NumField(); i < n; i++ { + field := typ.Field(i) + ptrs = append(ptrs, PointersInType(field.Type, fmt.Sprintf("%s.%s", valName, field.Name))...) + } + return ptrs + + default: + return []string{fmt.Sprintf("%s (of type %s with unknown kind %s)", valName, typ, kind)} + } +} diff --git a/pkg/syncutil/seqcount_test.go b/pkg/syncutil/seqcount_test.go new file mode 100644 index 000000000..14d6aedea --- /dev/null +++ b/pkg/syncutil/seqcount_test.go @@ -0,0 +1,153 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syncutil + +import ( + "reflect" + "testing" + "time" +) + +func TestSeqCountWriteUncontended(t *testing.T) { + var seq SeqCount + seq.BeginWrite() + seq.EndWrite() +} + +func TestSeqCountReadUncontended(t *testing.T) { + var seq SeqCount + epoch := seq.BeginRead() + if !seq.ReadOk(epoch) { + t.Errorf("ReadOk: got false, wanted true") + } +} + +func TestSeqCountBeginReadAfterWrite(t *testing.T) { + var seq SeqCount + var data int32 + const want = 1 + seq.BeginWrite() + data = want + seq.EndWrite() + epoch := seq.BeginRead() + if data != want { + t.Errorf("Reader: got %v, wanted %v", data, want) + } + if !seq.ReadOk(epoch) { + t.Errorf("ReadOk: got false, wanted true") + } +} + +func TestSeqCountBeginReadDuringWrite(t *testing.T) { + var seq SeqCount + var data int + const want = 1 + seq.BeginWrite() + go func() { + time.Sleep(time.Second) + data = want + seq.EndWrite() + }() + epoch := seq.BeginRead() + if data != want { + t.Errorf("Reader: got %v, wanted %v", data, want) + } + if !seq.ReadOk(epoch) { + t.Errorf("ReadOk: got false, wanted true") + } +} + +func TestSeqCountReadOkAfterWrite(t *testing.T) { + var seq SeqCount + epoch := seq.BeginRead() + seq.BeginWrite() + seq.EndWrite() + if seq.ReadOk(epoch) { + t.Errorf("ReadOk: got true, wanted false") + } +} + +func TestSeqCountReadOkDuringWrite(t *testing.T) { + var seq SeqCount + epoch := seq.BeginRead() + seq.BeginWrite() + if seq.ReadOk(epoch) { + t.Errorf("ReadOk: got true, wanted false") + } + seq.EndWrite() +} + +func BenchmarkSeqCountWriteUncontended(b *testing.B) { + var seq SeqCount + for i := 0; i < b.N; i++ { + seq.BeginWrite() + seq.EndWrite() + } +} + +func BenchmarkSeqCountReadUncontended(b *testing.B) { + var seq SeqCount + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + epoch := seq.BeginRead() + if !seq.ReadOk(epoch) { + b.Fatalf("ReadOk: got false, wanted true") + } + } + }) +} + +func TestPointersInType(t *testing.T) { + for _, test := range []struct { + name string // used for both test and value name + val interface{} + ptrs []string + }{ + { + name: "EmptyStruct", + val: struct{}{}, + }, + { + name: "Int", + val: int(0), + }, + { + name: "MixedStruct", + val: struct { + b bool + I int + ExportedPtr *struct{} + unexportedPtr *struct{} + arr [2]int + ptrArr [2]*int + nestedStruct struct { + nestedNonptr int + nestedPtr *int + } + structArr [1]struct { + nonptr int + ptr *int + } + }{}, + ptrs: []string{ + "MixedStruct.ExportedPtr", + "MixedStruct.unexportedPtr", + "MixedStruct.ptrArr[]", + "MixedStruct.nestedStruct.nestedPtr", + "MixedStruct.structArr[].ptr", + }, + }, + } { + t.Run(test.name, func(t *testing.T) { + typ := reflect.TypeOf(test.val) + ptrs := PointersInType(typ, test.name) + t.Logf("Found pointers: %v", ptrs) + if (len(ptrs) != 0 || len(test.ptrs) != 0) && !reflect.DeepEqual(ptrs, test.ptrs) { + t.Errorf("Got %v, wanted %v", ptrs, test.ptrs) + } + }) + } +} diff --git a/pkg/syncutil/syncutil.go b/pkg/syncutil/syncutil.go new file mode 100644 index 000000000..66e750d06 --- /dev/null +++ b/pkg/syncutil/syncutil.go @@ -0,0 +1,7 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package syncutil provides synchronization primitives. +package syncutil diff --git a/test/syscalls/linux/accept_bind.cc b/test/syscalls/linux/accept_bind.cc index 427c42ede..e08c578f0 100644 --- a/test/syscalls/linux/accept_bind.cc +++ b/test/syscalls/linux/accept_bind.cc @@ -14,8 +14,10 @@ #include #include + #include #include + #include "gtest/gtest.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" diff --git a/test/syscalls/linux/accept_bind_stream.cc b/test/syscalls/linux/accept_bind_stream.cc index 7bcd91e9e..4857f160b 100644 --- a/test/syscalls/linux/accept_bind_stream.cc +++ b/test/syscalls/linux/accept_bind_stream.cc @@ -14,8 +14,10 @@ #include #include + #include #include + #include "gtest/gtest.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" diff --git a/test/syscalls/linux/chmod.cc b/test/syscalls/linux/chmod.cc index 7e918b9b2..a06b5cfd6 100644 --- a/test/syscalls/linux/chmod.cc +++ b/test/syscalls/linux/chmod.cc @@ -16,6 +16,7 @@ #include #include #include + #include #include "gtest/gtest.h" diff --git a/test/syscalls/linux/chroot.cc b/test/syscalls/linux/chroot.cc index de1611c21..04bc2d7b9 100644 --- a/test/syscalls/linux/chroot.cc +++ b/test/syscalls/linux/chroot.cc @@ -19,6 +19,7 @@ #include #include #include + #include #include diff --git a/test/syscalls/linux/clock_gettime.cc b/test/syscalls/linux/clock_gettime.cc index c9e3ed6b2..2aa91691e 100644 --- a/test/syscalls/linux/clock_gettime.cc +++ b/test/syscalls/linux/clock_gettime.cc @@ -14,6 +14,7 @@ #include #include + #include #include #include diff --git a/test/syscalls/linux/concurrency.cc b/test/syscalls/linux/concurrency.cc index 4e0a13f8b..00b96b34a 100644 --- a/test/syscalls/linux/concurrency.cc +++ b/test/syscalls/linux/concurrency.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include #include "gtest/gtest.h" diff --git a/test/syscalls/linux/exec_binary.cc b/test/syscalls/linux/exec_binary.cc index 0a3931e5a..736452b0c 100644 --- a/test/syscalls/linux/exec_binary.cc +++ b/test/syscalls/linux/exec_binary.cc @@ -20,6 +20,7 @@ #include #include #include + #include #include #include diff --git a/test/syscalls/linux/file_base.h b/test/syscalls/linux/file_base.h index 4d155b618..4e048320e 100644 --- a/test/syscalls/linux/file_base.h +++ b/test/syscalls/linux/file_base.h @@ -27,6 +27,7 @@ #include #include #include + #include #include diff --git a/test/syscalls/linux/flock.cc b/test/syscalls/linux/flock.cc index b4a91455d..3ecb8db8e 100644 --- a/test/syscalls/linux/flock.cc +++ b/test/syscalls/linux/flock.cc @@ -14,6 +14,7 @@ #include #include + #include #include "gtest/gtest.h" diff --git a/test/syscalls/linux/fork.cc b/test/syscalls/linux/fork.cc index dd6e1a422..371890110 100644 --- a/test/syscalls/linux/fork.cc +++ b/test/syscalls/linux/fork.cc @@ -20,6 +20,7 @@ #include #include #include + #include #include diff --git a/test/syscalls/linux/getdents.cc b/test/syscalls/linux/getdents.cc index fe9cfafe8..ad2dbacb8 100644 --- a/test/syscalls/linux/getdents.cc +++ b/test/syscalls/linux/getdents.cc @@ -23,6 +23,7 @@ #include #include #include + #include #include #include diff --git a/test/syscalls/linux/ip_socket_test_util.cc b/test/syscalls/linux/ip_socket_test_util.cc index 57e99596f..8398fc95f 100644 --- a/test/syscalls/linux/ip_socket_test_util.cc +++ b/test/syscalls/linux/ip_socket_test_util.cc @@ -12,13 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "test/syscalls/linux/ip_socket_test_util.h" + #include #include #include #include -#include -#include "test/syscalls/linux/ip_socket_test_util.h" +#include namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/memory_accounting.cc b/test/syscalls/linux/memory_accounting.cc index ff2f49863..94aea4077 100644 --- a/test/syscalls/linux/memory_accounting.cc +++ b/test/syscalls/linux/memory_accounting.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include #include "gtest/gtest.h" diff --git a/test/syscalls/linux/mlock.cc b/test/syscalls/linux/mlock.cc index 283c21ed3..620b4f8b4 100644 --- a/test/syscalls/linux/mlock.cc +++ b/test/syscalls/linux/mlock.cc @@ -16,6 +16,7 @@ #include #include #include + #include #include diff --git a/test/syscalls/linux/mmap.cc b/test/syscalls/linux/mmap.cc index a112316e9..6f2639d8a 100644 --- a/test/syscalls/linux/mmap.cc +++ b/test/syscalls/linux/mmap.cc @@ -28,6 +28,7 @@ #include #include #include + #include #include "gmock/gmock.h" diff --git a/test/syscalls/linux/mount.cc b/test/syscalls/linux/mount.cc index e35be3cab..a3e9745cf 100644 --- a/test/syscalls/linux/mount.cc +++ b/test/syscalls/linux/mount.cc @@ -18,6 +18,7 @@ #include #include #include + #include #include #include diff --git a/test/syscalls/linux/read.cc b/test/syscalls/linux/read.cc index 4430fa3c2..2633ba31b 100644 --- a/test/syscalls/linux/read.cc +++ b/test/syscalls/linux/read.cc @@ -14,6 +14,7 @@ #include #include + #include #include "gtest/gtest.h" diff --git a/test/syscalls/linux/rename.cc b/test/syscalls/linux/rename.cc index 5b474ff32..833c0dc4f 100644 --- a/test/syscalls/linux/rename.cc +++ b/test/syscalls/linux/rename.cc @@ -14,6 +14,7 @@ #include #include + #include #include "gtest/gtest.h" diff --git a/test/syscalls/linux/seccomp.cc b/test/syscalls/linux/seccomp.cc index e77586852..7e41fe7d8 100644 --- a/test/syscalls/linux/seccomp.cc +++ b/test/syscalls/linux/seccomp.cc @@ -25,6 +25,7 @@ #include #include #include + #include #include "gmock/gmock.h" diff --git a/test/syscalls/linux/select.cc b/test/syscalls/linux/select.cc index e06a2666d..424e2a67f 100644 --- a/test/syscalls/linux/select.cc +++ b/test/syscalls/linux/select.cc @@ -16,6 +16,7 @@ #include #include #include + #include #include #include diff --git a/test/syscalls/linux/shm.cc b/test/syscalls/linux/shm.cc index eb7a3966f..7ba752599 100644 --- a/test/syscalls/linux/shm.cc +++ b/test/syscalls/linux/shm.cc @@ -13,7 +13,6 @@ // limitations under the License. #include - #include #include #include diff --git a/test/syscalls/linux/socket_blocking.cc b/test/syscalls/linux/socket_blocking.cc index d7ce57566..7e88aa2d9 100644 --- a/test/syscalls/linux/socket_blocking.cc +++ b/test/syscalls/linux/socket_blocking.cc @@ -17,6 +17,7 @@ #include #include #include + #include #include "gtest/gtest.h" diff --git a/test/syscalls/linux/socket_ip_loopback_blocking.cc b/test/syscalls/linux/socket_ip_loopback_blocking.cc index d7fc9715b..e58eedaba 100644 --- a/test/syscalls/linux/socket_ip_loopback_blocking.cc +++ b/test/syscalls/linux/socket_ip_loopback_blocking.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include #include "test/syscalls/linux/ip_socket_test_util.h" diff --git a/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc b/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc index 0dc274e2d..d11f7cc23 100644 --- a/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc +++ b/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include #include "test/syscalls/linux/ip_socket_test_util.h" diff --git a/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc b/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc index cd3ad97d0..fcd20102f 100644 --- a/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc +++ b/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include #include "test/syscalls/linux/ip_socket_test_util.h" diff --git a/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc b/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc index 1acdecc17..63a05b799 100644 --- a/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc +++ b/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include #include "test/syscalls/linux/ip_socket_test_util.h" diff --git a/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.cc b/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.cc index 3c3712b50..80f12b0a9 100644 --- a/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.cc +++ b/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.cc @@ -18,6 +18,7 @@ #include #include #include + #include #include diff --git a/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking_test.cc b/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking_test.cc index 92f03e045..3ac790873 100644 --- a/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking_test.cc +++ b/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking_test.cc @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.h" + #include #include "test/syscalls/linux/ip_socket_test_util.h" -#include "test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/util/test_util.h" diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking_test.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking_test.cc index 9d4e1ab97..8f47952b0 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking_test.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking_test.cc @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h" + #include #include "test/syscalls/linux/ip_socket_test_util.h" -#include "test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/util/test_util.h" diff --git a/test/syscalls/linux/socket_netlink_util.cc b/test/syscalls/linux/socket_netlink_util.cc index 5f05bab10..723f5d728 100644 --- a/test/syscalls/linux/socket_netlink_util.cc +++ b/test/syscalls/linux/socket_netlink_util.cc @@ -12,15 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include "test/syscalls/linux/socket_netlink_util.h" #include #include +#include #include #include "absl/strings/str_cat.h" -#include "test/syscalls/linux/socket_netlink_util.h" #include "test/syscalls/linux/socket_test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_blocking_local.cc b/test/syscalls/linux/socket_unix_blocking_local.cc index 1994139e6..6f84221b2 100644 --- a/test/syscalls/linux/socket_unix_blocking_local.cc +++ b/test/syscalls/linux/socket_unix_blocking_local.cc @@ -12,10 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "test/syscalls/linux/socket_blocking.h" - #include +#include "test/syscalls/linux/socket_blocking.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/test_util.h" diff --git a/test/syscalls/linux/socket_unix_dgram.cc b/test/syscalls/linux/socket_unix_dgram.cc index 3245cf7c9..af0df4fb4 100644 --- a/test/syscalls/linux/socket_unix_dgram.cc +++ b/test/syscalls/linux/socket_unix_dgram.cc @@ -16,6 +16,7 @@ #include #include + #include "gtest/gtest.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" diff --git a/test/syscalls/linux/socket_unix_dgram_non_blocking.cc b/test/syscalls/linux/socket_unix_dgram_non_blocking.cc index cd4fba25c..2db8b68d3 100644 --- a/test/syscalls/linux/socket_unix_dgram_non_blocking.cc +++ b/test/syscalls/linux/socket_unix_dgram_non_blocking.cc @@ -14,6 +14,7 @@ #include #include + #include "gtest/gtest.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" diff --git a/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc b/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc index da762cd83..8855d5001 100644 --- a/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc +++ b/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc @@ -12,10 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "test/syscalls/linux/socket_non_stream_blocking.h" - #include +#include "test/syscalls/linux/socket_non_stream_blocking.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/test_util.h" diff --git a/test/syscalls/linux/socket_unix_seqpacket.cc b/test/syscalls/linux/socket_unix_seqpacket.cc index 60fa9e38a..84d3a569e 100644 --- a/test/syscalls/linux/socket_unix_seqpacket.cc +++ b/test/syscalls/linux/socket_unix_seqpacket.cc @@ -16,6 +16,7 @@ #include #include + #include "gtest/gtest.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" diff --git a/test/syscalls/linux/socket_unix_stream_blocking_local.cc b/test/syscalls/linux/socket_unix_stream_blocking_local.cc index fa0a9d367..08e579ba7 100644 --- a/test/syscalls/linux/socket_unix_stream_blocking_local.cc +++ b/test/syscalls/linux/socket_unix_stream_blocking_local.cc @@ -12,10 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "test/syscalls/linux/socket_stream_blocking.h" - #include +#include "test/syscalls/linux/socket_stream_blocking.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/test_util.h" diff --git a/test/syscalls/linux/socket_unix_stream_nonblock_local.cc b/test/syscalls/linux/socket_unix_stream_nonblock_local.cc index ec777c59f..1936aa135 100644 --- a/test/syscalls/linux/socket_unix_stream_nonblock_local.cc +++ b/test/syscalls/linux/socket_unix_stream_nonblock_local.cc @@ -11,10 +11,9 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#include "test/syscalls/linux/socket_stream_nonblock.h" - #include +#include "test/syscalls/linux/socket_stream_nonblock.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/test_util.h" diff --git a/test/syscalls/linux/socket_unix_unbound_abstract.cc b/test/syscalls/linux/socket_unix_unbound_abstract.cc index 7f5816ace..8b1762000 100644 --- a/test/syscalls/linux/socket_unix_unbound_abstract.cc +++ b/test/syscalls/linux/socket_unix_unbound_abstract.cc @@ -14,6 +14,7 @@ #include #include + #include "gtest/gtest.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" diff --git a/test/syscalls/linux/socket_unix_unbound_filesystem.cc b/test/syscalls/linux/socket_unix_unbound_filesystem.cc index b14f24086..cab912152 100644 --- a/test/syscalls/linux/socket_unix_unbound_filesystem.cc +++ b/test/syscalls/linux/socket_unix_unbound_filesystem.cc @@ -14,6 +14,7 @@ #include #include + #include "gtest/gtest.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" diff --git a/test/syscalls/linux/socket_unix_unbound_seqpacket.cc b/test/syscalls/linux/socket_unix_unbound_seqpacket.cc index 50ffa1d04..cb99030f5 100644 --- a/test/syscalls/linux/socket_unix_unbound_seqpacket.cc +++ b/test/syscalls/linux/socket_unix_unbound_seqpacket.cc @@ -14,6 +14,7 @@ #include #include + #include "gtest/gtest.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" diff --git a/test/syscalls/linux/socket_unix_unbound_stream.cc b/test/syscalls/linux/socket_unix_unbound_stream.cc index 344918c34..f185dded3 100644 --- a/test/syscalls/linux/socket_unix_unbound_stream.cc +++ b/test/syscalls/linux/socket_unix_unbound_stream.cc @@ -14,6 +14,7 @@ #include #include + #include "gtest/gtest.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" diff --git a/test/syscalls/linux/sync.cc b/test/syscalls/linux/sync.cc index fe479390d..8aa2525a9 100644 --- a/test/syscalls/linux/sync.cc +++ b/test/syscalls/linux/sync.cc @@ -14,10 +14,9 @@ #include #include -#include - #include #include + #include #include "gtest/gtest.h" diff --git a/test/syscalls/linux/truncate.cc b/test/syscalls/linux/truncate.cc index e5cc5d97c..c988c6380 100644 --- a/test/syscalls/linux/truncate.cc +++ b/test/syscalls/linux/truncate.cc @@ -19,6 +19,7 @@ #include #include #include + #include #include diff --git a/test/syscalls/linux/unix_domain_socket_test_util.cc b/test/syscalls/linux/unix_domain_socket_test_util.cc index 7fb9eed8d..b05ab2900 100644 --- a/test/syscalls/linux/unix_domain_socket_test_util.cc +++ b/test/syscalls/linux/unix_domain_socket_test_util.cc @@ -15,6 +15,7 @@ #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include + #include #include "gtest/gtest.h" diff --git a/test/syscalls/linux/unix_domain_socket_test_util.h b/test/syscalls/linux/unix_domain_socket_test_util.h index 5eca0b7f0..b8073db17 100644 --- a/test/syscalls/linux/unix_domain_socket_test_util.h +++ b/test/syscalls/linux/unix_domain_socket_test_util.h @@ -16,6 +16,7 @@ #define GVISOR_TEST_SYSCALLS_UNIX_DOMAIN_SOCKET_TEST_UTIL_H_ #include + #include "test/syscalls/linux/socket_test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/utimes.cc b/test/syscalls/linux/utimes.cc index 80716859a..12b925a51 100644 --- a/test/syscalls/linux/utimes.cc +++ b/test/syscalls/linux/utimes.cc @@ -20,6 +20,7 @@ #include #include #include + #include #include "absl/time/time.h" diff --git a/test/syscalls/linux/vdso_clock_gettime.cc b/test/syscalls/linux/vdso_clock_gettime.cc index 40c0014b9..ce1899f45 100644 --- a/test/syscalls/linux/vdso_clock_gettime.cc +++ b/test/syscalls/linux/vdso_clock_gettime.cc @@ -17,6 +17,7 @@ #include #include #include + #include #include #include diff --git a/test/util/fs_util_test.cc b/test/util/fs_util_test.cc index 2a200320a..657b6a46e 100644 --- a/test/util/fs_util_test.cc +++ b/test/util/fs_util_test.cc @@ -12,12 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "test/util/fs_util.h" + #include + #include #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "test/util/fs_util.h" #include "test/util/posix_error.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" diff --git a/test/util/mount_util.h b/test/util/mount_util.h index 38ec6c8a1..484de560e 100644 --- a/test/util/mount_util.h +++ b/test/util/mount_util.h @@ -17,6 +17,7 @@ #include #include + #include #include diff --git a/test/util/posix_error_test.cc b/test/util/posix_error_test.cc index d67270842..bf9465abb 100644 --- a/test/util/posix_error_test.cc +++ b/test/util/posix_error_test.cc @@ -15,6 +15,7 @@ #include "test/util/posix_error.h" #include + #include "gmock/gmock.h" #include "gtest/gtest.h" diff --git a/test/util/rlimit_util.cc b/test/util/rlimit_util.cc index 684253f78..d7bfc1606 100644 --- a/test/util/rlimit_util.cc +++ b/test/util/rlimit_util.cc @@ -15,6 +15,7 @@ #include "test/util/rlimit_util.h" #include + #include #include "test/util/cleanup.h" diff --git a/test/util/signal_util.cc b/test/util/signal_util.cc index 26738864f..5ee95ee80 100644 --- a/test/util/signal_util.cc +++ b/test/util/signal_util.cc @@ -15,6 +15,7 @@ #include "test/util/signal_util.h" #include + #include #include "gtest/gtest.h" diff --git a/test/util/signal_util.h b/test/util/signal_util.h index 7fd2af015..bcf85c337 100644 --- a/test/util/signal_util.h +++ b/test/util/signal_util.h @@ -18,6 +18,7 @@ #include #include #include + #include #include "gmock/gmock.h" diff --git a/test/util/temp_path.h b/test/util/temp_path.h index 92d669503..9e5ac11f4 100644 --- a/test/util/temp_path.h +++ b/test/util/temp_path.h @@ -16,6 +16,7 @@ #define GVISOR_TEST_UTIL_TEMP_PATH_H_ #include + #include #include diff --git a/test/util/test_util_test.cc b/test/util/test_util_test.cc index b7300d9e5..f42100374 100644 --- a/test/util/test_util_test.cc +++ b/test/util/test_util_test.cc @@ -15,6 +15,7 @@ #include "test/util/test_util.h" #include + #include #include "gmock/gmock.h" diff --git a/third_party/gvsync/BUILD b/third_party/gvsync/BUILD deleted file mode 100644 index 7d6d59c48..000000000 --- a/third_party/gvsync/BUILD +++ /dev/null @@ -1,53 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template") - -package( - default_visibility = ["//:sandbox"], - licenses = ["notice"], -) - -exports_files(["LICENSE"]) - -go_template( - name = "generic_atomicptr", - srcs = ["atomicptr_unsafe.go"], - types = [ - "Value", - ], -) - -go_template( - name = "generic_seqatomic", - srcs = ["seqatomic_unsafe.go"], - types = [ - "Value", - ], - deps = [ - ":sync", - ], -) - -go_library( - name = "gvsync", - srcs = [ - "downgradable_rwmutex_1_12_unsafe.go", - "downgradable_rwmutex_1_13_unsafe.go", - "downgradable_rwmutex_unsafe.go", - "gvsync.go", - "memmove_unsafe.go", - "norace_unsafe.go", - "race_unsafe.go", - "seqcount.go", - ], - importpath = "gvisor.dev/gvisor/third_party/gvsync", -) - -go_test( - name = "gvsync_test", - size = "small", - srcs = [ - "downgradable_rwmutex_test.go", - "seqcount_test.go", - ], - embed = [":gvsync"], -) diff --git a/third_party/gvsync/LICENSE b/third_party/gvsync/LICENSE deleted file mode 100644 index 6a66aea5e..000000000 --- a/third_party/gvsync/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/third_party/gvsync/README.md b/third_party/gvsync/README.md deleted file mode 100644 index fcc7e6f44..000000000 --- a/third_party/gvsync/README.md +++ /dev/null @@ -1,3 +0,0 @@ -This package provides additional synchronization primitives not provided by the -Go stdlib 'sync' package. It is partially derived from the upstream 'sync' -package. diff --git a/third_party/gvsync/atomicptr_unsafe.go b/third_party/gvsync/atomicptr_unsafe.go deleted file mode 100644 index 525c4beed..000000000 --- a/third_party/gvsync/atomicptr_unsafe.go +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package template doesn't exist. This file must be instantiated using the -// go_template_instance rule in tools/go_generics/defs.bzl. -package template - -import ( - "sync/atomic" - "unsafe" -) - -// Value is a required type parameter. -type Value struct{} - -// An AtomicPtr is a pointer to a value of type Value that can be atomically -// loaded and stored. The zero value of an AtomicPtr represents nil. -// -// Note that copying AtomicPtr by value performs a non-atomic read of the -// stored pointer, which is unsafe if Store() can be called concurrently; in -// this case, do `dst.Store(src.Load())` instead. -// -// +stateify savable -type AtomicPtr struct { - ptr unsafe.Pointer `state:".(*Value)"` -} - -func (p *AtomicPtr) savePtr() *Value { - return p.Load() -} - -func (p *AtomicPtr) loadPtr(v *Value) { - p.Store(v) -} - -// Load returns the value set by the most recent Store. It returns nil if there -// has been no previous call to Store. -func (p *AtomicPtr) Load() *Value { - return (*Value)(atomic.LoadPointer(&p.ptr)) -} - -// Store sets the value returned by Load to x. -func (p *AtomicPtr) Store(x *Value) { - atomic.StorePointer(&p.ptr, (unsafe.Pointer)(x)) -} diff --git a/third_party/gvsync/atomicptrtest/BUILD b/third_party/gvsync/atomicptrtest/BUILD deleted file mode 100644 index 447ecf96a..000000000 --- a/third_party/gvsync/atomicptrtest/BUILD +++ /dev/null @@ -1,28 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "atomicptr_int", - out = "atomicptr_int_unsafe.go", - package = "atomicptr", - suffix = "Int", - template = "//third_party/gvsync:generic_atomicptr", - types = { - "Value": "int", - }, -) - -go_library( - name = "atomicptr", - srcs = ["atomicptr_int_unsafe.go"], - importpath = "gvisor.dev/gvisor/third_party/gvsync/atomicptr", -) - -go_test( - name = "atomicptr_test", - size = "small", - srcs = ["atomicptr_test.go"], - embed = [":atomicptr"], -) diff --git a/third_party/gvsync/atomicptrtest/atomicptr_test.go b/third_party/gvsync/atomicptrtest/atomicptr_test.go deleted file mode 100644 index 8fdc5112e..000000000 --- a/third_party/gvsync/atomicptrtest/atomicptr_test.go +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package atomicptr - -import ( - "testing" -) - -func newInt(val int) *int { - return &val -} - -func TestAtomicPtr(t *testing.T) { - var p AtomicPtrInt - if got := p.Load(); got != nil { - t.Errorf("initial value is %p (%v), wanted nil", got, got) - } - want := newInt(42) - p.Store(want) - if got := p.Load(); got != want { - t.Errorf("wrong value: got %p (%v), wanted %p (%v)", got, got, want, want) - } - want = newInt(100) - p.Store(want) - if got := p.Load(); got != want { - t.Errorf("wrong value: got %p (%v), wanted %p (%v)", got, got, want, want) - } -} diff --git a/third_party/gvsync/downgradable_rwmutex_1_12_unsafe.go b/third_party/gvsync/downgradable_rwmutex_1_12_unsafe.go deleted file mode 100644 index 855b2a2b1..000000000 --- a/third_party/gvsync/downgradable_rwmutex_1_12_unsafe.go +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Copyright 2019 The gVisor Authors. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build go1.12 -// +build !go1.13 - -// TODO(b/133868570): Delete once Go 1.12 is no longer supported. - -package gvsync - -import _ "unsafe" - -//go:linkname runtimeSemrelease112 sync.runtime_Semrelease -func runtimeSemrelease112(s *uint32, handoff bool) - -func runtimeSemrelease(s *uint32, handoff bool, skipframes int) { - // 'skipframes' is only available starting from 1.13. - runtimeSemrelease112(s, handoff) -} diff --git a/third_party/gvsync/downgradable_rwmutex_1_13_unsafe.go b/third_party/gvsync/downgradable_rwmutex_1_13_unsafe.go deleted file mode 100644 index 3b9346843..000000000 --- a/third_party/gvsync/downgradable_rwmutex_1_13_unsafe.go +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Copyright 2019 The gVisor Authors. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build go1.13 -// +build !go1.15 - -// Check go:linkname function signatures when updating Go version. - -package gvsync - -import _ "unsafe" - -//go:linkname runtimeSemrelease sync.runtime_Semrelease -func runtimeSemrelease(s *uint32, handoff bool, skipframes int) diff --git a/third_party/gvsync/downgradable_rwmutex_test.go b/third_party/gvsync/downgradable_rwmutex_test.go deleted file mode 100644 index 40c384b8b..000000000 --- a/third_party/gvsync/downgradable_rwmutex_test.go +++ /dev/null @@ -1,150 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Copyright 2019 The gVisor Authors. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// GOMAXPROCS=10 go test - -// Copy/pasted from the standard library's sync/rwmutex_test.go, except for the -// addition of downgradingWriter and the renaming of num_iterations to -// numIterations to shut up Golint. - -package gvsync - -import ( - "fmt" - "runtime" - "sync/atomic" - "testing" -) - -func parallelReader(m *DowngradableRWMutex, clocked, cunlock, cdone chan bool) { - m.RLock() - clocked <- true - <-cunlock - m.RUnlock() - cdone <- true -} - -func doTestParallelReaders(numReaders, gomaxprocs int) { - runtime.GOMAXPROCS(gomaxprocs) - var m DowngradableRWMutex - clocked := make(chan bool) - cunlock := make(chan bool) - cdone := make(chan bool) - for i := 0; i < numReaders; i++ { - go parallelReader(&m, clocked, cunlock, cdone) - } - // Wait for all parallel RLock()s to succeed. - for i := 0; i < numReaders; i++ { - <-clocked - } - for i := 0; i < numReaders; i++ { - cunlock <- true - } - // Wait for the goroutines to finish. - for i := 0; i < numReaders; i++ { - <-cdone - } -} - -func TestParallelReaders(t *testing.T) { - defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(-1)) - doTestParallelReaders(1, 4) - doTestParallelReaders(3, 4) - doTestParallelReaders(4, 2) -} - -func reader(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { - for i := 0; i < numIterations; i++ { - rwm.RLock() - n := atomic.AddInt32(activity, 1) - if n < 1 || n >= 10000 { - panic(fmt.Sprintf("wlock(%d)\n", n)) - } - for i := 0; i < 100; i++ { - } - atomic.AddInt32(activity, -1) - rwm.RUnlock() - } - cdone <- true -} - -func writer(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { - for i := 0; i < numIterations; i++ { - rwm.Lock() - n := atomic.AddInt32(activity, 10000) - if n != 10000 { - panic(fmt.Sprintf("wlock(%d)\n", n)) - } - for i := 0; i < 100; i++ { - } - atomic.AddInt32(activity, -10000) - rwm.Unlock() - } - cdone <- true -} - -func downgradingWriter(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { - for i := 0; i < numIterations; i++ { - rwm.Lock() - n := atomic.AddInt32(activity, 10000) - if n != 10000 { - panic(fmt.Sprintf("wlock(%d)\n", n)) - } - for i := 0; i < 100; i++ { - } - atomic.AddInt32(activity, -10000) - rwm.DowngradeLock() - n = atomic.AddInt32(activity, 1) - if n < 1 || n >= 10000 { - panic(fmt.Sprintf("wlock(%d)\n", n)) - } - for i := 0; i < 100; i++ { - } - n = atomic.AddInt32(activity, -1) - rwm.RUnlock() - } - cdone <- true -} - -func HammerDowngradableRWMutex(gomaxprocs, numReaders, numIterations int) { - runtime.GOMAXPROCS(gomaxprocs) - // Number of active readers + 10000 * number of active writers. - var activity int32 - var rwm DowngradableRWMutex - cdone := make(chan bool) - go writer(&rwm, numIterations, &activity, cdone) - go downgradingWriter(&rwm, numIterations, &activity, cdone) - var i int - for i = 0; i < numReaders/2; i++ { - go reader(&rwm, numIterations, &activity, cdone) - } - go writer(&rwm, numIterations, &activity, cdone) - go downgradingWriter(&rwm, numIterations, &activity, cdone) - for ; i < numReaders; i++ { - go reader(&rwm, numIterations, &activity, cdone) - } - // Wait for the 4 writers and all readers to finish. - for i := 0; i < 4+numReaders; i++ { - <-cdone - } -} - -func TestDowngradableRWMutex(t *testing.T) { - defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(-1)) - n := 1000 - if testing.Short() { - n = 5 - } - HammerDowngradableRWMutex(1, 1, n) - HammerDowngradableRWMutex(1, 3, n) - HammerDowngradableRWMutex(1, 10, n) - HammerDowngradableRWMutex(4, 1, n) - HammerDowngradableRWMutex(4, 3, n) - HammerDowngradableRWMutex(4, 10, n) - HammerDowngradableRWMutex(10, 1, n) - HammerDowngradableRWMutex(10, 3, n) - HammerDowngradableRWMutex(10, 10, n) - HammerDowngradableRWMutex(10, 5, n) -} diff --git a/third_party/gvsync/downgradable_rwmutex_unsafe.go b/third_party/gvsync/downgradable_rwmutex_unsafe.go deleted file mode 100644 index b7862d185..000000000 --- a/third_party/gvsync/downgradable_rwmutex_unsafe.go +++ /dev/null @@ -1,143 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Copyright 2019 The gVisor Authors. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build go1.12 -// +build !go1.15 - -// Check go:linkname function signatures when updating Go version. - -// This is mostly copied from the standard library's sync/rwmutex.go. -// -// Happens-before relationships indicated to the race detector: -// - Unlock -> Lock (via writerSem) -// - Unlock -> RLock (via readerSem) -// - RUnlock -> Lock (via writerSem) -// - DowngradeLock -> RLock (via readerSem) - -package gvsync - -import ( - "sync" - "sync/atomic" - "unsafe" -) - -//go:linkname runtimeSemacquire sync.runtime_Semacquire -func runtimeSemacquire(s *uint32) - -// DowngradableRWMutex is identical to sync.RWMutex, but adds the DowngradeLock -// method. -type DowngradableRWMutex struct { - w sync.Mutex // held if there are pending writers - writerSem uint32 // semaphore for writers to wait for completing readers - readerSem uint32 // semaphore for readers to wait for completing writers - readerCount int32 // number of pending readers - readerWait int32 // number of departing readers -} - -const rwmutexMaxReaders = 1 << 30 - -// RLock locks rw for reading. -func (rw *DowngradableRWMutex) RLock() { - if RaceEnabled { - RaceDisable() - } - if atomic.AddInt32(&rw.readerCount, 1) < 0 { - // A writer is pending, wait for it. - runtimeSemacquire(&rw.readerSem) - } - if RaceEnabled { - RaceEnable() - RaceAcquire(unsafe.Pointer(&rw.readerSem)) - } -} - -// RUnlock undoes a single RLock call. -func (rw *DowngradableRWMutex) RUnlock() { - if RaceEnabled { - RaceReleaseMerge(unsafe.Pointer(&rw.writerSem)) - RaceDisable() - } - if r := atomic.AddInt32(&rw.readerCount, -1); r < 0 { - if r+1 == 0 || r+1 == -rwmutexMaxReaders { - panic("RUnlock of unlocked DowngradableRWMutex") - } - // A writer is pending. - if atomic.AddInt32(&rw.readerWait, -1) == 0 { - // The last reader unblocks the writer. - runtimeSemrelease(&rw.writerSem, false, 0) - } - } - if RaceEnabled { - RaceEnable() - } -} - -// Lock locks rw for writing. -func (rw *DowngradableRWMutex) Lock() { - if RaceEnabled { - RaceDisable() - } - // First, resolve competition with other writers. - rw.w.Lock() - // Announce to readers there is a pending writer. - r := atomic.AddInt32(&rw.readerCount, -rwmutexMaxReaders) + rwmutexMaxReaders - // Wait for active readers. - if r != 0 && atomic.AddInt32(&rw.readerWait, r) != 0 { - runtimeSemacquire(&rw.writerSem) - } - if RaceEnabled { - RaceEnable() - RaceAcquire(unsafe.Pointer(&rw.writerSem)) - } -} - -// Unlock unlocks rw for writing. -func (rw *DowngradableRWMutex) Unlock() { - if RaceEnabled { - RaceRelease(unsafe.Pointer(&rw.writerSem)) - RaceRelease(unsafe.Pointer(&rw.readerSem)) - RaceDisable() - } - // Announce to readers there is no active writer. - r := atomic.AddInt32(&rw.readerCount, rwmutexMaxReaders) - if r >= rwmutexMaxReaders { - panic("Unlock of unlocked DowngradableRWMutex") - } - // Unblock blocked readers, if any. - for i := 0; i < int(r); i++ { - runtimeSemrelease(&rw.readerSem, false, 0) - } - // Allow other writers to proceed. - rw.w.Unlock() - if RaceEnabled { - RaceEnable() - } -} - -// DowngradeLock atomically unlocks rw for writing and locks it for reading. -func (rw *DowngradableRWMutex) DowngradeLock() { - if RaceEnabled { - RaceRelease(unsafe.Pointer(&rw.readerSem)) - RaceDisable() - } - // Announce to readers there is no active writer and one additional reader. - r := atomic.AddInt32(&rw.readerCount, rwmutexMaxReaders+1) - if r >= rwmutexMaxReaders+1 { - panic("DowngradeLock of unlocked DowngradableRWMutex") - } - // Unblock blocked readers, if any. Note that this loop starts as 1 since r - // includes this goroutine. - for i := 1; i < int(r); i++ { - runtimeSemrelease(&rw.readerSem, false, 0) - } - // Allow other writers to proceed to rw.w.Lock(). Note that they will still - // block on rw.writerSem since at least this reader exists, such that - // DowngradeLock() is atomic with the previous write lock. - rw.w.Unlock() - if RaceEnabled { - RaceEnable() - } -} diff --git a/third_party/gvsync/gvsync.go b/third_party/gvsync/gvsync.go deleted file mode 100644 index 3bbef13c3..000000000 --- a/third_party/gvsync/gvsync.go +++ /dev/null @@ -1,7 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package gvsync provides synchronization primitives. -package gvsync diff --git a/third_party/gvsync/memmove_unsafe.go b/third_party/gvsync/memmove_unsafe.go deleted file mode 100644 index 9dd1d6142..000000000 --- a/third_party/gvsync/memmove_unsafe.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build go1.12 -// +build !go1.15 - -// Check go:linkname function signatures when updating Go version. - -package gvsync - -import ( - "unsafe" -) - -//go:linkname memmove runtime.memmove -//go:noescape -func memmove(to, from unsafe.Pointer, n uintptr) - -// Memmove is exported for SeqAtomicLoad/SeqAtomicTryLoad, which can't -// define it because go_generics can't update the go:linkname annotation. -// Furthermore, go:linkname silently doesn't work if the local name is exported -// (this is of course undocumented), which is why this indirection is -// necessary. -func Memmove(to, from unsafe.Pointer, n uintptr) { - memmove(to, from, n) -} diff --git a/third_party/gvsync/norace_unsafe.go b/third_party/gvsync/norace_unsafe.go deleted file mode 100644 index e3852db8c..000000000 --- a/third_party/gvsync/norace_unsafe.go +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !race - -package gvsync - -import ( - "unsafe" -) - -// RaceEnabled is true if the Go data race detector is enabled. -const RaceEnabled = false - -// RaceDisable has the same semantics as runtime.RaceDisable. -func RaceDisable() { -} - -// RaceEnable has the same semantics as runtime.RaceEnable. -func RaceEnable() { -} - -// RaceAcquire has the same semantics as runtime.RaceAcquire. -func RaceAcquire(addr unsafe.Pointer) { -} - -// RaceRelease has the same semantics as runtime.RaceRelease. -func RaceRelease(addr unsafe.Pointer) { -} - -// RaceReleaseMerge has the same semantics as runtime.RaceReleaseMerge. -func RaceReleaseMerge(addr unsafe.Pointer) { -} diff --git a/third_party/gvsync/race_unsafe.go b/third_party/gvsync/race_unsafe.go deleted file mode 100644 index 13c02a830..000000000 --- a/third_party/gvsync/race_unsafe.go +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build race - -package gvsync - -import ( - "runtime" - "unsafe" -) - -// RaceEnabled is true if the Go data race detector is enabled. -const RaceEnabled = true - -// RaceDisable has the same semantics as runtime.RaceDisable. -func RaceDisable() { - runtime.RaceDisable() -} - -// RaceEnable has the same semantics as runtime.RaceEnable. -func RaceEnable() { - runtime.RaceEnable() -} - -// RaceAcquire has the same semantics as runtime.RaceAcquire. -func RaceAcquire(addr unsafe.Pointer) { - runtime.RaceAcquire(addr) -} - -// RaceRelease has the same semantics as runtime.RaceRelease. -func RaceRelease(addr unsafe.Pointer) { - runtime.RaceRelease(addr) -} - -// RaceReleaseMerge has the same semantics as runtime.RaceReleaseMerge. -func RaceReleaseMerge(addr unsafe.Pointer) { - runtime.RaceReleaseMerge(addr) -} diff --git a/third_party/gvsync/seqatomic_unsafe.go b/third_party/gvsync/seqatomic_unsafe.go deleted file mode 100644 index 382eeed43..000000000 --- a/third_party/gvsync/seqatomic_unsafe.go +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package template doesn't exist. This file must be instantiated using the -// go_template_instance rule in tools/go_generics/defs.bzl. -package template - -import ( - "fmt" - "reflect" - "strings" - "unsafe" - - "gvisor.dev/gvisor/third_party/gvsync" -) - -// Value is a required type parameter. -// -// Value must not contain any pointers, including interface objects, function -// objects, slices, maps, channels, unsafe.Pointer, and arrays or structs -// containing any of the above. An init() function will panic if this property -// does not hold. -type Value struct{} - -// SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race -// with any writer critical sections in sc. -func SeqAtomicLoad(sc *gvsync.SeqCount, ptr *Value) Value { - // This function doesn't use SeqAtomicTryLoad because doing so is - // measurably, significantly (~20%) slower; Go is awful at inlining. - var val Value - for { - epoch := sc.BeginRead() - if gvsync.RaceEnabled { - // runtime.RaceDisable() doesn't actually stop the race detector, - // so it can't help us here. Instead, call runtime.memmove - // directly, which is not instrumented by the race detector. - gvsync.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val)) - } else { - // This is ~40% faster for short reads than going through memmove. - val = *ptr - } - if sc.ReadOk(epoch) { - break - } - } - return val -} - -// SeqAtomicTryLoad returns a copy of *ptr while in a reader critical section -// in sc initiated by a call to sc.BeginRead() that returned epoch. If the read -// would race with a writer critical section, SeqAtomicTryLoad returns -// (unspecified, false). -func SeqAtomicTryLoad(sc *gvsync.SeqCount, epoch gvsync.SeqCountEpoch, ptr *Value) (Value, bool) { - var val Value - if gvsync.RaceEnabled { - gvsync.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val)) - } else { - val = *ptr - } - return val, sc.ReadOk(epoch) -} - -func init() { - var val Value - typ := reflect.TypeOf(val) - name := typ.Name() - if ptrs := gvsync.PointersInType(typ, name); len(ptrs) != 0 { - panic(fmt.Sprintf("SeqAtomicLoad<%s> is invalid since values %s of type %s contain pointers:\n%s", typ, name, typ, strings.Join(ptrs, "\n"))) - } -} diff --git a/third_party/gvsync/seqatomictest/BUILD b/third_party/gvsync/seqatomictest/BUILD deleted file mode 100644 index c858c20c4..000000000 --- a/third_party/gvsync/seqatomictest/BUILD +++ /dev/null @@ -1,34 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "seqatomic_int", - out = "seqatomic_int_unsafe.go", - package = "seqatomic", - suffix = "Int", - template = "//third_party/gvsync:generic_seqatomic", - types = { - "Value": "int", - }, -) - -go_library( - name = "seqatomic", - srcs = ["seqatomic_int_unsafe.go"], - importpath = "gvisor.dev/gvisor/third_party/gvsync/seqatomic", - deps = [ - "//third_party/gvsync", - ], -) - -go_test( - name = "seqatomic_test", - size = "small", - srcs = ["seqatomic_test.go"], - embed = [":seqatomic"], - deps = [ - "//third_party/gvsync", - ], -) diff --git a/third_party/gvsync/seqatomictest/seqatomic_test.go b/third_party/gvsync/seqatomictest/seqatomic_test.go deleted file mode 100644 index a5447f589..000000000 --- a/third_party/gvsync/seqatomictest/seqatomic_test.go +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package seqatomic - -import ( - "sync/atomic" - "testing" - "time" - - "gvisor.dev/gvisor/third_party/gvsync" -) - -func TestSeqAtomicLoadUncontended(t *testing.T) { - var seq gvsync.SeqCount - const want = 1 - data := want - if got := SeqAtomicLoadInt(&seq, &data); got != want { - t.Errorf("SeqAtomicLoadInt: got %v, wanted %v", got, want) - } -} - -func TestSeqAtomicLoadAfterWrite(t *testing.T) { - var seq gvsync.SeqCount - var data int - const want = 1 - seq.BeginWrite() - data = want - seq.EndWrite() - if got := SeqAtomicLoadInt(&seq, &data); got != want { - t.Errorf("SeqAtomicLoadInt: got %v, wanted %v", got, want) - } -} - -func TestSeqAtomicLoadDuringWrite(t *testing.T) { - var seq gvsync.SeqCount - var data int - const want = 1 - seq.BeginWrite() - go func() { - time.Sleep(time.Second) - data = want - seq.EndWrite() - }() - if got := SeqAtomicLoadInt(&seq, &data); got != want { - t.Errorf("SeqAtomicLoadInt: got %v, wanted %v", got, want) - } -} - -func TestSeqAtomicTryLoadUncontended(t *testing.T) { - var seq gvsync.SeqCount - const want = 1 - data := want - epoch := seq.BeginRead() - if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); !ok || got != want { - t.Errorf("SeqAtomicTryLoadInt: got (%v, %v), wanted (%v, true)", got, ok, want) - } -} - -func TestSeqAtomicTryLoadDuringWrite(t *testing.T) { - var seq gvsync.SeqCount - var data int - epoch := seq.BeginRead() - seq.BeginWrite() - if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); ok { - t.Errorf("SeqAtomicTryLoadInt: got (%v, true), wanted (_, false)", got) - } - seq.EndWrite() -} - -func TestSeqAtomicTryLoadAfterWrite(t *testing.T) { - var seq gvsync.SeqCount - var data int - epoch := seq.BeginRead() - seq.BeginWrite() - seq.EndWrite() - if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); ok { - t.Errorf("SeqAtomicTryLoadInt: got (%v, true), wanted (_, false)", got) - } -} - -func BenchmarkSeqAtomicLoadIntUncontended(b *testing.B) { - var seq gvsync.SeqCount - const want = 42 - data := want - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - if got := SeqAtomicLoadInt(&seq, &data); got != want { - b.Fatalf("SeqAtomicLoadInt: got %v, wanted %v", got, want) - } - } - }) -} - -func BenchmarkSeqAtomicTryLoadIntUncontended(b *testing.B) { - var seq gvsync.SeqCount - const want = 42 - data := want - b.RunParallel(func(pb *testing.PB) { - epoch := seq.BeginRead() - for pb.Next() { - if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); !ok || got != want { - b.Fatalf("SeqAtomicTryLoadInt: got (%v, %v), wanted (%v, true)", got, ok, want) - } - } - }) -} - -// For comparison: -func BenchmarkAtomicValueLoadIntUncontended(b *testing.B) { - var a atomic.Value - const want = 42 - a.Store(int(want)) - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - if got := a.Load().(int); got != want { - b.Fatalf("atomic.Value.Load: got %v, wanted %v", got, want) - } - } - }) -} diff --git a/third_party/gvsync/seqcount.go b/third_party/gvsync/seqcount.go deleted file mode 100644 index 2c9c2c3d6..000000000 --- a/third_party/gvsync/seqcount.go +++ /dev/null @@ -1,149 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package gvsync - -import ( - "fmt" - "reflect" - "runtime" - "sync/atomic" -) - -// SeqCount is a synchronization primitive for optimistic reader/writer -// synchronization in cases where readers can work with stale data and -// therefore do not need to block writers. -// -// Compared to sync/atomic.Value: -// -// - Mutation of SeqCount-protected data does not require memory allocation, -// whereas atomic.Value generally does. This is a significant advantage when -// writes are common. -// -// - Atomic reads of SeqCount-protected data require copying. This is a -// disadvantage when atomic reads are common. -// -// - SeqCount may be more flexible: correct use of SeqCount.ReadOk allows other -// operations to be made atomic with reads of SeqCount-protected data. -// -// - SeqCount may be less flexible: as of this writing, SeqCount-protected data -// cannot include pointers. -// -// - SeqCount is more cumbersome to use; atomic reads of SeqCount-protected -// data require instantiating function templates using go_generics (see -// seqatomic.go). -type SeqCount struct { - // epoch is incremented by BeginWrite and EndWrite, such that epoch is odd - // if a writer critical section is active, and a read from data protected - // by this SeqCount is atomic iff epoch is the same even value before and - // after the read. - epoch uint32 -} - -// SeqCountEpoch tracks writer critical sections in a SeqCount. -type SeqCountEpoch struct { - val uint32 -} - -// We assume that: -// -// - All functions in sync/atomic that perform a memory read are at least a -// read fence: memory reads before calls to such functions cannot be reordered -// after the call, and memory reads after calls to such functions cannot be -// reordered before the call, even if those reads do not use sync/atomic. -// -// - All functions in sync/atomic that perform a memory write are at least a -// write fence: memory writes before calls to such functions cannot be -// reordered after the call, and memory writes after calls to such functions -// cannot be reordered before the call, even if those writes do not use -// sync/atomic. -// -// As of this writing, the Go memory model completely fails to describe -// sync/atomic, but these properties are implied by -// https://groups.google.com/forum/#!topic/golang-nuts/7EnEhM3U7B8. - -// BeginRead indicates the beginning of a reader critical section. Reader -// critical sections DO NOT BLOCK writer critical sections, so operations in a -// reader critical section MAY RACE with writer critical sections. Races are -// detected by ReadOk at the end of the reader critical section. Thus, the -// low-level structure of readers is generally: -// -// for { -// epoch := seq.BeginRead() -// // do something idempotent with seq-protected data -// if seq.ReadOk(epoch) { -// break -// } -// } -// -// However, since reader critical sections may race with writer critical -// sections, the Go race detector will (accurately) flag data races in readers -// using this pattern. Most users of SeqCount will need to use the -// SeqAtomicLoad function template in seqatomic.go. -func (s *SeqCount) BeginRead() SeqCountEpoch { - epoch := atomic.LoadUint32(&s.epoch) - for epoch&1 != 0 { - runtime.Gosched() - epoch = atomic.LoadUint32(&s.epoch) - } - return SeqCountEpoch{epoch} -} - -// ReadOk returns true if the reader critical section initiated by a previous -// call to BeginRead() that returned epoch did not race with any writer critical -// sections. -// -// ReadOk may be called any number of times during a reader critical section. -// Reader critical sections do not need to be explicitly terminated; the last -// call to ReadOk is implicitly the end of the reader critical section. -func (s *SeqCount) ReadOk(epoch SeqCountEpoch) bool { - return atomic.LoadUint32(&s.epoch) == epoch.val -} - -// BeginWrite indicates the beginning of a writer critical section. -// -// SeqCount does not support concurrent writer critical sections; clients with -// concurrent writers must synchronize them using e.g. sync.Mutex. -func (s *SeqCount) BeginWrite() { - if epoch := atomic.AddUint32(&s.epoch, 1); epoch&1 == 0 { - panic("SeqCount.BeginWrite during writer critical section") - } -} - -// EndWrite ends the effect of a preceding BeginWrite. -func (s *SeqCount) EndWrite() { - if epoch := atomic.AddUint32(&s.epoch, 1); epoch&1 != 0 { - panic("SeqCount.EndWrite outside writer critical section") - } -} - -// PointersInType returns a list of pointers reachable from values named -// valName of the given type. -// -// PointersInType is not exhaustive, but it is guaranteed that if typ contains -// at least one pointer, then PointersInTypeOf returns a non-empty list. -func PointersInType(typ reflect.Type, valName string) []string { - switch kind := typ.Kind(); kind { - case reflect.Bool, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: - return nil - - case reflect.Chan, reflect.Func, reflect.Interface, reflect.Map, reflect.Ptr, reflect.Slice, reflect.String, reflect.UnsafePointer: - return []string{valName} - - case reflect.Array: - return PointersInType(typ.Elem(), valName+"[]") - - case reflect.Struct: - var ptrs []string - for i, n := 0, typ.NumField(); i < n; i++ { - field := typ.Field(i) - ptrs = append(ptrs, PointersInType(field.Type, fmt.Sprintf("%s.%s", valName, field.Name))...) - } - return ptrs - - default: - return []string{fmt.Sprintf("%s (of type %s with unknown kind %s)", valName, typ, kind)} - } -} diff --git a/third_party/gvsync/seqcount_test.go b/third_party/gvsync/seqcount_test.go deleted file mode 100644 index 085e574b3..000000000 --- a/third_party/gvsync/seqcount_test.go +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package gvsync - -import ( - "reflect" - "testing" - "time" -) - -func TestSeqCountWriteUncontended(t *testing.T) { - var seq SeqCount - seq.BeginWrite() - seq.EndWrite() -} - -func TestSeqCountReadUncontended(t *testing.T) { - var seq SeqCount - epoch := seq.BeginRead() - if !seq.ReadOk(epoch) { - t.Errorf("ReadOk: got false, wanted true") - } -} - -func TestSeqCountBeginReadAfterWrite(t *testing.T) { - var seq SeqCount - var data int32 - const want = 1 - seq.BeginWrite() - data = want - seq.EndWrite() - epoch := seq.BeginRead() - if data != want { - t.Errorf("Reader: got %v, wanted %v", data, want) - } - if !seq.ReadOk(epoch) { - t.Errorf("ReadOk: got false, wanted true") - } -} - -func TestSeqCountBeginReadDuringWrite(t *testing.T) { - var seq SeqCount - var data int - const want = 1 - seq.BeginWrite() - go func() { - time.Sleep(time.Second) - data = want - seq.EndWrite() - }() - epoch := seq.BeginRead() - if data != want { - t.Errorf("Reader: got %v, wanted %v", data, want) - } - if !seq.ReadOk(epoch) { - t.Errorf("ReadOk: got false, wanted true") - } -} - -func TestSeqCountReadOkAfterWrite(t *testing.T) { - var seq SeqCount - epoch := seq.BeginRead() - seq.BeginWrite() - seq.EndWrite() - if seq.ReadOk(epoch) { - t.Errorf("ReadOk: got true, wanted false") - } -} - -func TestSeqCountReadOkDuringWrite(t *testing.T) { - var seq SeqCount - epoch := seq.BeginRead() - seq.BeginWrite() - if seq.ReadOk(epoch) { - t.Errorf("ReadOk: got true, wanted false") - } - seq.EndWrite() -} - -func BenchmarkSeqCountWriteUncontended(b *testing.B) { - var seq SeqCount - for i := 0; i < b.N; i++ { - seq.BeginWrite() - seq.EndWrite() - } -} - -func BenchmarkSeqCountReadUncontended(b *testing.B) { - var seq SeqCount - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - epoch := seq.BeginRead() - if !seq.ReadOk(epoch) { - b.Fatalf("ReadOk: got false, wanted true") - } - } - }) -} - -func TestPointersInType(t *testing.T) { - for _, test := range []struct { - name string // used for both test and value name - val interface{} - ptrs []string - }{ - { - name: "EmptyStruct", - val: struct{}{}, - }, - { - name: "Int", - val: int(0), - }, - { - name: "MixedStruct", - val: struct { - b bool - I int - ExportedPtr *struct{} - unexportedPtr *struct{} - arr [2]int - ptrArr [2]*int - nestedStruct struct { - nestedNonptr int - nestedPtr *int - } - structArr [1]struct { - nonptr int - ptr *int - } - }{}, - ptrs: []string{ - "MixedStruct.ExportedPtr", - "MixedStruct.unexportedPtr", - "MixedStruct.ptrArr[]", - "MixedStruct.nestedStruct.nestedPtr", - "MixedStruct.structArr[].ptr", - }, - }, - } { - t.Run(test.name, func(t *testing.T) { - typ := reflect.TypeOf(test.val) - ptrs := PointersInType(typ, test.name) - t.Logf("Found pointers: %v", ptrs) - if (len(ptrs) != 0 || len(test.ptrs) != 0) && !reflect.DeepEqual(ptrs, test.ptrs) { - t.Errorf("Got %v, wanted %v", ptrs, test.ptrs) - } - }) - } -} diff --git a/tools/go_marshal/test/BUILD b/tools/go_marshal/test/BUILD index fa82f8e9b..d412e1ccf 100644 --- a/tools/go_marshal/test/BUILD +++ b/tools/go_marshal/test/BUILD @@ -1,9 +1,8 @@ load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools/go_marshal:defs.bzl", "go_library") package(licenses = ["notice"]) -load("//tools/go_marshal:defs.bzl", "go_library") - package_group( name = "gomarshal_test", packages = [ diff --git a/tools/go_marshal/test/external/BUILD b/tools/go_marshal/test/external/BUILD index 8fb43179b..9bb89e1da 100644 --- a/tools/go_marshal/test/external/BUILD +++ b/tools/go_marshal/test/external/BUILD @@ -1,7 +1,7 @@ -package(licenses = ["notice"]) - load("//tools/go_marshal:defs.bzl", "go_library") +package(licenses = ["notice"]) + go_library( name = "external", testonly = 1, -- cgit v1.2.3 From 4e27ba372e12e3186c0d03b32a7829b0d50f7a89 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Fri, 22 Nov 2019 10:42:57 -0800 Subject: tests: include sys/socket.h before linux/if_arp.h This is how it has to be accoding to the man page. PiperOrigin-RevId: 281998068 --- test/syscalls/linux/socket_netlink_util.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/socket_netlink_util.h b/test/syscalls/linux/socket_netlink_util.h index da99f0d60..76e772c48 100644 --- a/test/syscalls/linux/socket_netlink_util.h +++ b/test/syscalls/linux/socket_netlink_util.h @@ -15,6 +15,8 @@ #ifndef GVISOR_TEST_SYSCALLS_SOCKET_NETLINK_UTIL_H_ #define GVISOR_TEST_SYSCALLS_SOCKET_NETLINK_UTIL_H_ +#include +// socket.h has to be included before if_arp.h. #include #include -- cgit v1.2.3 From 20279c305ece6a458006999c8dafc5672ca92803 Mon Sep 17 00:00:00 2001 From: Ian Lewis Date: Tue, 26 Nov 2019 18:19:47 -0800 Subject: Allow open(O_TRUNC) and (f)truncate for proc files. This allows writable proc and devices files to be opened with O_CREAT|O_TRUNC. This is encountered most frequently when interacting with proc or devices files via the command line. e.g. $ echo 8192 1048576 4194304 > /proc/sys/net/ipv4/tcp_rmem Also adds a test to test the behavior of open(O_TRUNC), truncate, and ftruncate on named pipes. Fixes #1116 PiperOrigin-RevId: 282677425 --- pkg/sentry/fs/proc/sys_net.go | 17 ++++++++++++++--- pkg/sentry/fs/tty/master.go | 6 +++++- pkg/sentry/fs/tty/slave.go | 6 +++++- pkg/sentry/syscalls/linux/sys_file.go | 12 ++++++++++-- test/syscalls/linux/pipe.cc | 14 ++++++++++++++ 5 files changed, 48 insertions(+), 7 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go index f3b63dfc2..bd93f83fa 100644 --- a/pkg/sentry/fs/proc/sys_net.go +++ b/pkg/sentry/fs/proc/sys_net.go @@ -64,7 +64,7 @@ var _ fs.InodeOperations = (*tcpMemInode)(nil) func newTCPMemInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack, dir tcpMemDir) *fs.Inode { tm := &tcpMemInode{ - SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC), + SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0644), linux.PROC_SUPER_MAGIC), s: s, dir: dir, } @@ -77,6 +77,11 @@ func newTCPMemInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack, dir return fs.NewInode(ctx, tm, msrc, sattr) } +// Truncate implements fs.InodeOperations.Truncate. +func (tcpMemInode) Truncate(context.Context, *fs.Inode, int64) error { + return nil +} + // GetFile implements fs.InodeOperations.GetFile. func (m *tcpMemInode) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { flags.Pread = true @@ -168,14 +173,15 @@ func writeSize(dirType tcpMemDir, s inet.Stack, size inet.TCPBufferSize) error { // +stateify savable type tcpSack struct { + fsutil.SimpleFileInode + stack inet.Stack `state:"wait"` enabled *bool - fsutil.SimpleFileInode } func newTCPSackInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode { ts := &tcpSack{ - SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC), + SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0644), linux.PROC_SUPER_MAGIC), stack: s, } sattr := fs.StableAttr{ @@ -187,6 +193,11 @@ func newTCPSackInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *f return fs.NewInode(ctx, ts, msrc, sattr) } +// Truncate implements fs.InodeOperations.Truncate. +func (tcpSack) Truncate(context.Context, *fs.Inode, int64) error { + return nil +} + // GetFile implements fs.InodeOperations.GetFile. func (s *tcpSack) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { flags.Pread = true diff --git a/pkg/sentry/fs/tty/master.go b/pkg/sentry/fs/tty/master.go index bc56be696..934828c12 100644 --- a/pkg/sentry/fs/tty/master.go +++ b/pkg/sentry/fs/tty/master.go @@ -32,7 +32,6 @@ import ( // +stateify savable type masterInodeOperations struct { fsutil.SimpleFileInode - fsutil.InodeNoopTruncate // d is the containing dir. d *dirInodeOperations @@ -77,6 +76,11 @@ func newMasterInode(ctx context.Context, d *dirInodeOperations, owner fs.FileOwn func (mi *masterInodeOperations) Release(ctx context.Context) { } +// Truncate implements fs.InodeOperations.Truncate. +func (masterInodeOperations) Truncate(context.Context, *fs.Inode, int64) error { + return nil +} + // GetFile implements fs.InodeOperations.GetFile. // // It allocates a new terminal. diff --git a/pkg/sentry/fs/tty/slave.go b/pkg/sentry/fs/tty/slave.go index 4cbea0367..2a51e6bab 100644 --- a/pkg/sentry/fs/tty/slave.go +++ b/pkg/sentry/fs/tty/slave.go @@ -31,7 +31,6 @@ import ( // +stateify savable type slaveInodeOperations struct { fsutil.SimpleFileInode - fsutil.InodeNoopTruncate // d is the containing dir. d *dirInodeOperations @@ -73,6 +72,11 @@ func (si *slaveInodeOperations) Release(ctx context.Context) { si.t.DecRef() } +// Truncate implements fs.InodeOperations.Truncate. +func (slaveInodeOperations) Truncate(context.Context, *fs.Inode, int64) error { + return nil +} + // GetFile implements fs.InodeOperations.GetFile. // // This may race with destruction of the terminal. If the terminal is gone, it diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index 167c2b60b..3b9181002 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -171,6 +171,9 @@ func openAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint) (fd uint } } + // Truncate is called when O_TRUNC is specified for any kind of + // existing Dirent. Behavior is delegated to the entry's Truncate + // implementation. if flags&linux.O_TRUNC != 0 { if err := d.Inode.Truncate(t, d, 0); err != nil { return err @@ -397,7 +400,9 @@ func createAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint, mode l return err } - // Should we truncate the file? + // Truncate is called when O_TRUNC is specified for any kind of + // existing Dirent. Behavior is delegated to the entry's Truncate + // implementation. if flags&linux.O_TRUNC != 0 { if err := found.Inode.Truncate(t, found, 0); err != nil { return err @@ -1484,6 +1489,8 @@ func Truncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if fs.IsDir(d.Inode.StableAttr) { return syserror.EISDIR } + // In contrast to open(O_TRUNC), truncate(2) is only valid for file + // types. if !fs.IsFile(d.Inode.StableAttr) { return syserror.EINVAL } @@ -1522,7 +1529,8 @@ func Ftruncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, syserror.EINVAL } - // Note that this is different from truncate(2) above, where a + // In contrast to open(O_TRUNC), truncate(2) is only valid for file + // types. Note that this is different from truncate(2) above, where a // directory returns EISDIR. if !fs.IsFile(file.Dirent.Inode.StableAttr) { return 0, nil, syserror.EINVAL diff --git a/test/syscalls/linux/pipe.cc b/test/syscalls/linux/pipe.cc index c0b354e65..ac9b21b24 100644 --- a/test/syscalls/linux/pipe.cc +++ b/test/syscalls/linux/pipe.cc @@ -212,6 +212,20 @@ TEST(Pipe2Test, BadOptions) { EXPECT_THAT(pipe2(fds, 0xDEAD), SyscallFailsWithErrno(EINVAL)); } +// Tests that opening named pipes with O_TRUNC shouldn't cause an error, but +// calls to (f)truncate should. +TEST(NamedPipeTest, Truncate) { + const std::string tmp_path = NewTempAbsPath(); + SKIP_IF(mkfifo(tmp_path.c_str(), 0644) != 0); + + ASSERT_THAT(open(tmp_path.c_str(), O_NONBLOCK | O_RDONLY), SyscallSucceeds()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(tmp_path.c_str(), O_RDWR | O_NONBLOCK | O_TRUNC)); + + ASSERT_THAT(truncate(tmp_path.c_str(), 0), SyscallFailsWithErrno(EINVAL)); + ASSERT_THAT(ftruncate(fd.get(), 0), SyscallFailsWithErrno(EINVAL)); +} + TEST_P(PipeTest, Seek) { SKIP_IF(!CreateBlocking()); -- cgit v1.2.3 From 58afb4be695e6804925ba2be5f2d8c245f079cba Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Wed, 27 Nov 2019 13:47:44 -0800 Subject: Add floating point exception tests PiperOrigin-RevId: 282828273 --- test/syscalls/linux/exceptions.cc | 181 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 181 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/exceptions.cc b/test/syscalls/linux/exceptions.cc index 370e85166..3d564e720 100644 --- a/test/syscalls/linux/exceptions.cc +++ b/test/syscalls/linux/exceptions.cc @@ -22,6 +22,23 @@ namespace gvisor { namespace testing { +// Default value for the x87 FPU control word. See Intel SDM Vol 1, Ch 8.1.5 +// "x87 FPU Control Word". +constexpr uint16_t kX87ControlWordDefault = 0x37f; + +// Mask for the divide-by-zero exception. +constexpr uint16_t kX87ControlWordDiv0Mask = 1 << 2; + +// Default value for the SSE control register (MXCSR). See Intel SDM Vol 1, Ch +// 11.6.4 "Initialization of SSE/SSE3 Extensions". +constexpr uint32_t kMXCSRDefault = 0x1f80; + +// Mask for the divide-by-zero exception. +constexpr uint32_t kMXCSRDiv0Mask = 1 << 9; + +// Flag for a pending divide-by-zero exception. +constexpr uint32_t kMXCSRDiv0Flag = 1 << 2; + void inline Halt() { asm("hlt\r\n"); } void inline SetAlignmentCheck() { @@ -107,6 +124,170 @@ TEST(ExceptionTest, DivideByZero) { ::testing::KilledBySignal(SIGFPE), ""); } +// By default, x87 exceptions are masked and simply return a default value. +TEST(ExceptionTest, X87DivideByZeroMasked) { + int32_t quotient; + int32_t value = 1; + int32_t divisor = 0; + asm("fildl %[value]\r\n" + "fidivl %[divisor]\r\n" + "fistpl %[quotient]\r\n" + : [ quotient ] "=m"(quotient) + : [ value ] "m"(value), [ divisor ] "m"(divisor)); + + EXPECT_EQ(quotient, INT32_MIN); +} + +// When unmasked, division by zero raises SIGFPE. +TEST(ExceptionTest, X87DivideByZeroUnmasked) { + // See above. + struct sigaction sa = {}; + sa.sa_handler = SIG_DFL; + auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGFPE, sa)); + + EXPECT_EXIT( + { + // Clear the divide by zero exception mask. + constexpr uint16_t kControlWord = + kX87ControlWordDefault & ~kX87ControlWordDiv0Mask; + + int32_t quotient; + int32_t value = 1; + int32_t divisor = 0; + asm volatile( + "fldcw %[cw]\r\n" + "fildl %[value]\r\n" + "fidivl %[divisor]\r\n" + "fistpl %[quotient]\r\n" + : [ quotient ] "=m"(quotient) + : [ cw ] "m"(kControlWord), [ value ] "m"(value), + [ divisor ] "m"(divisor)); + }, + ::testing::KilledBySignal(SIGFPE), ""); +} + +// Pending exceptions in the x87 status register are not clobbered by syscalls. +TEST(ExceptionTest, X87StatusClobber) { + // See above. + struct sigaction sa = {}; + sa.sa_handler = SIG_DFL; + auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGFPE, sa)); + + EXPECT_EXIT( + { + // Clear the divide by zero exception mask. + constexpr uint16_t kControlWord = + kX87ControlWordDefault & ~kX87ControlWordDiv0Mask; + + int32_t quotient; + int32_t value = 1; + int32_t divisor = 0; + asm volatile( + "fildl %[value]\r\n" + "fidivl %[divisor]\r\n" + // Exception is masked, so it does not occur here. + "fistpl %[quotient]\r\n" + + // SYS_getpid placed in rax by constraint. + "syscall\r\n" + + // Unmask exception. The syscall didn't clobber the pending + // exception, so now it can be raised. + // + // N.B. "a floating-point exception will be generated upon execution + // of the *next* floating-point instruction". + "fldcw %[cw]\r\n" + "fwait\r\n" + : [ quotient ] "=m"(quotient) + : [ value ] "m"(value), [ divisor ] "m"(divisor), "a"(SYS_getpid), + [ cw ] "m"(kControlWord) + : "rcx", "r11"); + }, + ::testing::KilledBySignal(SIGFPE), ""); +} + +// By default, SSE exceptions are masked and simply return a default value. +TEST(ExceptionTest, SSEDivideByZeroMasked) { + uint32_t status; + int32_t quotient; + int32_t value = 1; + int32_t divisor = 0; + asm("cvtsi2ssl %[value], %%xmm0\r\n" + "cvtsi2ssl %[divisor], %%xmm1\r\n" + "divss %%xmm1, %%xmm0\r\n" + "cvtss2sil %%xmm0, %[quotient]\r\n" + : [ quotient ] "=r"(quotient), [ status ] "=r"(status) + : [ value ] "r"(value), [ divisor ] "r"(divisor) + : "xmm0", "xmm1"); + + EXPECT_EQ(quotient, INT32_MIN); +} + +// When unmasked, division by zero raises SIGFPE. +TEST(ExceptionTest, SSEDivideByZeroUnmasked) { + // See above. + struct sigaction sa = {}; + sa.sa_handler = SIG_DFL; + auto const cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaction(SIGFPE, sa)); + + EXPECT_EXIT( + { + // Clear the divide by zero exception mask. + constexpr uint32_t kMXCSR = kMXCSRDefault & ~kMXCSRDiv0Mask; + + int32_t quotient; + int32_t value = 1; + int32_t divisor = 0; + asm volatile( + "ldmxcsr %[mxcsr]\r\n" + "cvtsi2ssl %[value], %%xmm0\r\n" + "cvtsi2ssl %[divisor], %%xmm1\r\n" + "divss %%xmm1, %%xmm0\r\n" + "cvtss2sil %%xmm0, %[quotient]\r\n" + : [ quotient ] "=r"(quotient) + : [ mxcsr ] "m"(kMXCSR), [ value ] "r"(value), + [ divisor ] "r"(divisor) + : "xmm0", "xmm1"); + }, + ::testing::KilledBySignal(SIGFPE), ""); +} + +// Pending exceptions in the SSE status register are not clobbered by syscalls. +TEST(ExceptionTest, SSEStatusClobber) { + uint32_t mxcsr; + int32_t quotient; + int32_t value = 1; + int32_t divisor = 0; + asm("cvtsi2ssl %[value], %%xmm0\r\n" + "cvtsi2ssl %[divisor], %%xmm1\r\n" + "divss %%xmm1, %%xmm0\r\n" + // Exception is masked, so it does not occur here. + "cvtss2sil %%xmm0, %[quotient]\r\n" + + // SYS_getpid placed in rax by constraint. + "syscall\r\n" + + // Intel SDM Vol 1, Ch 10.2.3.1 "SIMD Floating-Point Mask and Flag Bits": + // "If LDMXCSR or FXRSTOR clears a mask bit and sets the corresponding + // exception flag bit, a SIMD floating-point exception will not be + // generated as a result of this change. The unmasked exception will be + // generated only upon the execution of the next SSE/SSE2/SSE3 instruction + // that detects the unmasked exception condition." + // + // Though ambiguous, empirical evidence indicates that this means that + // exception flags set in the status register will never cause an + // exception to be raised; only a new exception condition will do so. + // + // Thus here we just check for the flag itself rather than trying to raise + // the exception. + "stmxcsr %[mxcsr]\r\n" + : [ quotient ] "=r"(quotient), [ mxcsr ] "+m"(mxcsr) + : [ value ] "r"(value), [ divisor ] "r"(divisor), "a"(SYS_getpid) + : "xmm0", "xmm1", "rcx", "r11"); + + EXPECT_TRUE(mxcsr & kMXCSRDiv0Flag); +} + TEST(ExceptionTest, IOAccessFault) { // See above. struct sigaction sa = {}; -- cgit v1.2.3 From aa70523da21534d8518eaa52f36db002e3d61885 Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Mon, 2 Dec 2019 05:37:09 -0800 Subject: Port tests in udp_socket.cc to Fuchsia Separate out a test in udp_socket.cc that depends on so the rest of the tests can run on Fuchsia. PiperOrigin-RevId: 283322633 --- test/syscalls/linux/BUILD | 23 +- test/syscalls/linux/udp_socket.cc | 1321 +------------------- .../linux/udp_socket_errqueue_test_case.cc | 54 + test/syscalls/linux/udp_socket_test_cases.cc | 1279 +++++++++++++++++++ test/syscalls/linux/udp_socket_test_cases.h | 74 ++ 5 files changed, 1427 insertions(+), 1324 deletions(-) create mode 100644 test/syscalls/linux/udp_socket_errqueue_test_case.cc create mode 100644 test/syscalls/linux/udp_socket_test_cases.cc create mode 100644 test/syscalls/linux/udp_socket_test_cases.h (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 6345ea28c..2dd115409 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -3351,11 +3351,15 @@ cc_binary( ], ) -cc_binary( - name = "udp_socket_test", +cc_library( + name = "udp_socket_test_cases", testonly = 1, - srcs = ["udp_socket.cc"], - linkstatic = 1, + srcs = [ + "udp_socket_test_cases.cc", + ] + select_for_linux([ + "udp_socket_errqueue_test_case.cc", + ]), + hdrs = ["udp_socket_test_cases.h"], deps = [ ":socket_test_util", ":unix_domain_socket_test_util", @@ -3366,6 +3370,17 @@ cc_binary( "@com_google_absl//absl/time", "@com_google_googletest//:gtest", ], + alwayslink = 1, +) + +cc_binary( + name = "udp_socket_test", + testonly = 1, + srcs = ["udp_socket.cc"], + linkstatic = 1, + deps = [ + ":udp_socket_test_cases", + ], ) cc_binary( diff --git a/test/syscalls/linux/udp_socket.cc b/test/syscalls/linux/udp_socket.cc index 111dbacdf..7a8ac30a4 100644 --- a/test/syscalls/linux/udp_socket.cc +++ b/test/syscalls/linux/udp_socket.cc @@ -12,1332 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include -#include -#include -#include -#include -#include - -#include "gtest/gtest.h" -#include "absl/base/macros.h" -#include "absl/time/clock.h" -#include "absl/time/time.h" -#include "test/syscalls/linux/socket_test_util.h" -#include "test/syscalls/linux/unix_domain_socket_test_util.h" -#include "test/util/test_util.h" -#include "test/util/thread_util.h" +#include "test/syscalls/linux/udp_socket_test_cases.h" namespace gvisor { namespace testing { namespace { -// The initial port to be be used on gvisor. -constexpr int TestPort = 40000; - -// Fixture for tests parameterized by the address family to use (AF_INET and -// AF_INET6) when creating sockets. -class UdpSocketTest : public ::testing::TestWithParam { - protected: - // Creates two sockets that will be used by test cases. - void SetUp() override; - - // Closes the sockets created by SetUp(). - void TearDown() override { - EXPECT_THAT(close(s_), SyscallSucceeds()); - EXPECT_THAT(close(t_), SyscallSucceeds()); - - for (size_t i = 0; i < ABSL_ARRAYSIZE(ports_); ++i) { - ASSERT_NO_ERRNO(FreeAvailablePort(ports_[i])); - } - } - - // First UDP socket. - int s_; - - // Second UDP socket. - int t_; - - // The length of the socket address. - socklen_t addrlen_; - - // Initialized address pointing to loopback and port TestPort+i. - struct sockaddr* addr_[3]; - - // Initialize "any" address. - struct sockaddr* anyaddr_; - - // Used ports. - int ports_[3]; - - private: - // Storage for the loopback addresses. - struct sockaddr_storage addr_storage_[3]; - - // Storage for the "any" address. - struct sockaddr_storage anyaddr_storage_; -}; - -// Gets a pointer to the port component of the given address. -uint16_t* Port(struct sockaddr_storage* addr) { - switch (addr->ss_family) { - case AF_INET: { - auto sin = reinterpret_cast(addr); - return &sin->sin_port; - } - case AF_INET6: { - auto sin6 = reinterpret_cast(addr); - return &sin6->sin6_port; - } - } - - return nullptr; -} - -void UdpSocketTest::SetUp() { - int type; - if (GetParam() == AddressFamily::kIpv4) { - type = AF_INET; - auto sin = reinterpret_cast(&anyaddr_storage_); - addrlen_ = sizeof(*sin); - sin->sin_addr.s_addr = htonl(INADDR_ANY); - } else { - type = AF_INET6; - auto sin6 = reinterpret_cast(&anyaddr_storage_); - addrlen_ = sizeof(*sin6); - if (GetParam() == AddressFamily::kIpv6) { - sin6->sin6_addr = IN6ADDR_ANY_INIT; - } else { - TestAddress const& v4_mapped_any = V4MappedAny(); - sin6->sin6_addr = - reinterpret_cast(&v4_mapped_any.addr) - ->sin6_addr; - } - } - ASSERT_THAT(s_ = socket(type, SOCK_DGRAM, IPPROTO_UDP), SyscallSucceeds()); - - ASSERT_THAT(t_ = socket(type, SOCK_DGRAM, IPPROTO_UDP), SyscallSucceeds()); - - memset(&anyaddr_storage_, 0, sizeof(anyaddr_storage_)); - anyaddr_ = reinterpret_cast(&anyaddr_storage_); - anyaddr_->sa_family = type; - - if (gvisor::testing::IsRunningOnGvisor()) { - for (size_t i = 0; i < ABSL_ARRAYSIZE(ports_); ++i) { - ports_[i] = TestPort + i; - } - } else { - // When not under gvisor, use utility function to pick port. Assert that - // all ports are different. - std::string error; - for (size_t i = 0; i < ABSL_ARRAYSIZE(ports_); ++i) { - // Find an unused port, we specify port 0 to allow the kernel to provide - // the port. - bool unique = true; - do { - ports_[i] = ASSERT_NO_ERRNO_AND_VALUE(PortAvailable( - 0, AddressFamily::kDualStack, SocketType::kUdp, false)); - ASSERT_GT(ports_[i], 0); - for (size_t j = 0; j < i; ++j) { - if (ports_[j] == ports_[i]) { - unique = false; - break; - } - } - } while (!unique); - } - } - - // Initialize the sockaddrs. - for (size_t i = 0; i < ABSL_ARRAYSIZE(addr_); ++i) { - memset(&addr_storage_[i], 0, sizeof(addr_storage_[i])); - - addr_[i] = reinterpret_cast(&addr_storage_[i]); - addr_[i]->sa_family = type; - - switch (type) { - case AF_INET: { - auto sin = reinterpret_cast(addr_[i]); - sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); - sin->sin_port = htons(ports_[i]); - break; - } - case AF_INET6: { - auto sin6 = reinterpret_cast(addr_[i]); - sin6->sin6_addr = in6addr_loopback; - sin6->sin6_port = htons(ports_[i]); - break; - } - } - } -} - -TEST_P(UdpSocketTest, Creation) { - int type = AF_INET6; - if (GetParam() == AddressFamily::kIpv4) { - type = AF_INET; - } - - int s_; - - ASSERT_THAT(s_ = socket(type, SOCK_DGRAM, IPPROTO_UDP), SyscallSucceeds()); - EXPECT_THAT(close(s_), SyscallSucceeds()); - - ASSERT_THAT(s_ = socket(type, SOCK_DGRAM, 0), SyscallSucceeds()); - EXPECT_THAT(close(s_), SyscallSucceeds()); - - ASSERT_THAT(s_ = socket(type, SOCK_STREAM, IPPROTO_UDP), SyscallFails()); -} - -TEST_P(UdpSocketTest, Getsockname) { - // Check that we're not bound. - struct sockaddr_storage addr; - socklen_t addrlen = sizeof(addr); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); - EXPECT_EQ(addrlen, addrlen_); - EXPECT_EQ(memcmp(&addr, anyaddr_, addrlen_), 0); - - // Bind, then check that we get the right address. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - - addrlen = sizeof(addr); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); - EXPECT_EQ(addrlen, addrlen_); - EXPECT_EQ(memcmp(&addr, addr_[0], addrlen_), 0); -} - -TEST_P(UdpSocketTest, Getpeername) { - // Check that we're not connected. - struct sockaddr_storage addr; - socklen_t addrlen = sizeof(addr); - EXPECT_THAT(getpeername(s_, reinterpret_cast(&addr), &addrlen), - SyscallFailsWithErrno(ENOTCONN)); - - // Connect, then check that we get the right address. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); - - addrlen = sizeof(addr); - EXPECT_THAT(getpeername(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); - EXPECT_EQ(addrlen, addrlen_); - EXPECT_EQ(memcmp(&addr, addr_[0], addrlen_), 0); -} - -TEST_P(UdpSocketTest, SendNotConnected) { - // Do send & write, they must fail. - char buf[512]; - EXPECT_THAT(send(s_, buf, sizeof(buf), 0), - SyscallFailsWithErrno(EDESTADDRREQ)); - - EXPECT_THAT(write(s_, buf, sizeof(buf)), SyscallFailsWithErrno(EDESTADDRREQ)); - - // Use sendto. - ASSERT_THAT(sendto(s_, buf, sizeof(buf), 0, addr_[0], addrlen_), - SyscallSucceedsWithValue(sizeof(buf))); - - // Check that we're bound now. - struct sockaddr_storage addr; - socklen_t addrlen = sizeof(addr); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); - EXPECT_EQ(addrlen, addrlen_); - EXPECT_NE(*Port(&addr), 0); -} - -TEST_P(UdpSocketTest, ConnectBinds) { - // Connect the socket. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); - - // Check that we're bound now. - struct sockaddr_storage addr; - socklen_t addrlen = sizeof(addr); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); - EXPECT_EQ(addrlen, addrlen_); - EXPECT_NE(*Port(&addr), 0); -} - -TEST_P(UdpSocketTest, ReceiveNotBound) { - char buf[512]; - EXPECT_THAT(recv(s_, buf, sizeof(buf), MSG_DONTWAIT), - SyscallFailsWithErrno(EWOULDBLOCK)); -} - -TEST_P(UdpSocketTest, Bind) { - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - - // Try to bind again. - EXPECT_THAT(bind(s_, addr_[1], addrlen_), SyscallFailsWithErrno(EINVAL)); - - // Check that we're still bound to the original address. - struct sockaddr_storage addr; - socklen_t addrlen = sizeof(addr); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); - EXPECT_EQ(addrlen, addrlen_); - EXPECT_EQ(memcmp(&addr, addr_[0], addrlen_), 0); -} - -TEST_P(UdpSocketTest, BindInUse) { - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - - // Try to bind again. - EXPECT_THAT(bind(t_, addr_[0], addrlen_), SyscallFailsWithErrno(EADDRINUSE)); -} - -TEST_P(UdpSocketTest, ReceiveAfterConnect) { - // Connect s_ to loopback:TestPort, and bind t_ to loopback:TestPort. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(bind(t_, addr_[0], addrlen_), SyscallSucceeds()); - - // Get the address s_ was bound to during connect. - struct sockaddr_storage addr; - socklen_t addrlen = sizeof(addr); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); - EXPECT_EQ(addrlen, addrlen_); - - // Send from t_ to s_. - char buf[512]; - RandomizeBuffer(buf, sizeof(buf)); - ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, - reinterpret_cast(&addr), addrlen), - SyscallSucceedsWithValue(sizeof(buf))); - - // Receive the data. - char received[sizeof(buf)]; - EXPECT_THAT(recv(s_, received, sizeof(received), 0), - SyscallSucceedsWithValue(sizeof(received))); - EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); -} - -TEST_P(UdpSocketTest, ReceiveAfterDisconnect) { - // Connect s_ to loopback:TestPort, and bind t_ to loopback:TestPort. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(bind(t_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(t_, addr_[1], addrlen_), SyscallSucceeds()); - - // Get the address s_ was bound to during connect. - struct sockaddr_storage addr; - socklen_t addrlen = sizeof(addr); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); - EXPECT_EQ(addrlen, addrlen_); - - for (int i = 0; i < 2; i++) { - // Send from t_ to s_. - char buf[512]; - RandomizeBuffer(buf, sizeof(buf)); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); - ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, - reinterpret_cast(&addr), addrlen), - SyscallSucceedsWithValue(sizeof(buf))); - - // Receive the data. - char received[sizeof(buf)]; - EXPECT_THAT(recv(s_, received, sizeof(received), 0), - SyscallSucceedsWithValue(sizeof(received))); - EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); - - // Disconnect s_. - struct sockaddr addr = {}; - addr.sa_family = AF_UNSPEC; - ASSERT_THAT(connect(s_, &addr, sizeof(addr.sa_family)), SyscallSucceeds()); - // Connect s_ loopback:TestPort. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); - } -} - -TEST_P(UdpSocketTest, Connect) { - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); - - // Check that we're connected to the right peer. - struct sockaddr_storage peer; - socklen_t peerlen = sizeof(peer); - EXPECT_THAT(getpeername(s_, reinterpret_cast(&peer), &peerlen), - SyscallSucceeds()); - EXPECT_EQ(peerlen, addrlen_); - EXPECT_EQ(memcmp(&peer, addr_[0], addrlen_), 0); - - // Try to bind after connect. - EXPECT_THAT(bind(s_, addr_[1], addrlen_), SyscallFailsWithErrno(EINVAL)); - - // Try to connect again. - EXPECT_THAT(connect(s_, addr_[2], addrlen_), SyscallSucceeds()); - - // Check that peer name changed. - peerlen = sizeof(peer); - EXPECT_THAT(getpeername(s_, reinterpret_cast(&peer), &peerlen), - SyscallSucceeds()); - EXPECT_EQ(peerlen, addrlen_); - EXPECT_EQ(memcmp(&peer, addr_[2], addrlen_), 0); -} - -void ConnectAny(AddressFamily family, int sockfd, uint16_t port) { - struct sockaddr_storage addr = {}; - - // Precondition check. - { - socklen_t addrlen = sizeof(addr); - EXPECT_THAT( - getsockname(sockfd, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); - - if (family == AddressFamily::kIpv4) { - auto addr_out = reinterpret_cast(&addr); - EXPECT_EQ(addrlen, sizeof(*addr_out)); - EXPECT_EQ(addr_out->sin_addr.s_addr, htonl(INADDR_ANY)); - } else { - auto addr_out = reinterpret_cast(&addr); - EXPECT_EQ(addrlen, sizeof(*addr_out)); - struct in6_addr any = IN6ADDR_ANY_INIT; - EXPECT_EQ(memcmp(&addr_out->sin6_addr, &any, sizeof(in6_addr)), 0); - } - - { - socklen_t addrlen = sizeof(addr); - EXPECT_THAT( - getpeername(sockfd, reinterpret_cast(&addr), &addrlen), - SyscallFailsWithErrno(ENOTCONN)); - } - - struct sockaddr_storage baddr = {}; - if (family == AddressFamily::kIpv4) { - auto addr_in = reinterpret_cast(&baddr); - addrlen = sizeof(*addr_in); - addr_in->sin_family = AF_INET; - addr_in->sin_addr.s_addr = htonl(INADDR_ANY); - addr_in->sin_port = port; - } else { - auto addr_in = reinterpret_cast(&baddr); - addrlen = sizeof(*addr_in); - addr_in->sin6_family = AF_INET6; - addr_in->sin6_port = port; - if (family == AddressFamily::kIpv6) { - addr_in->sin6_addr = IN6ADDR_ANY_INIT; - } else { - TestAddress const& v4_mapped_any = V4MappedAny(); - addr_in->sin6_addr = - reinterpret_cast(&v4_mapped_any.addr) - ->sin6_addr; - } - } - - // TODO(b/138658473): gVisor doesn't allow connecting to the zero port. - if (port == 0) { - SKIP_IF(IsRunningOnGvisor()); - } - - ASSERT_THAT(connect(sockfd, reinterpret_cast(&baddr), addrlen), - SyscallSucceeds()); - } - - // Postcondition check. - { - socklen_t addrlen = sizeof(addr); - EXPECT_THAT( - getsockname(sockfd, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); - - if (family == AddressFamily::kIpv4) { - auto addr_out = reinterpret_cast(&addr); - EXPECT_EQ(addrlen, sizeof(*addr_out)); - EXPECT_EQ(addr_out->sin_addr.s_addr, htonl(INADDR_LOOPBACK)); - } else { - auto addr_out = reinterpret_cast(&addr); - EXPECT_EQ(addrlen, sizeof(*addr_out)); - struct in6_addr loopback; - if (family == AddressFamily::kIpv6) { - loopback = IN6ADDR_LOOPBACK_INIT; - } else { - TestAddress const& v4_mapped_loopback = V4MappedLoopback(); - loopback = reinterpret_cast( - &v4_mapped_loopback.addr) - ->sin6_addr; - } - - EXPECT_EQ(memcmp(&addr_out->sin6_addr, &loopback, sizeof(in6_addr)), 0); - } - - addrlen = sizeof(addr); - if (port == 0) { - EXPECT_THAT( - getpeername(sockfd, reinterpret_cast(&addr), &addrlen), - SyscallFailsWithErrno(ENOTCONN)); - } else { - EXPECT_THAT( - getpeername(sockfd, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); - } - } -} - -TEST_P(UdpSocketTest, ConnectAny) { ConnectAny(GetParam(), s_, 0); } - -TEST_P(UdpSocketTest, ConnectAnyWithPort) { - auto port = *Port(reinterpret_cast(addr_[1])); - ConnectAny(GetParam(), s_, port); -} - -void DisconnectAfterConnectAny(AddressFamily family, int sockfd, int port) { - struct sockaddr_storage addr = {}; - - socklen_t addrlen = sizeof(addr); - struct sockaddr_storage baddr = {}; - if (family == AddressFamily::kIpv4) { - auto addr_in = reinterpret_cast(&baddr); - addrlen = sizeof(*addr_in); - addr_in->sin_family = AF_INET; - addr_in->sin_addr.s_addr = htonl(INADDR_ANY); - addr_in->sin_port = port; - } else { - auto addr_in = reinterpret_cast(&baddr); - addrlen = sizeof(*addr_in); - addr_in->sin6_family = AF_INET6; - addr_in->sin6_port = port; - if (family == AddressFamily::kIpv6) { - addr_in->sin6_addr = IN6ADDR_ANY_INIT; - } else { - TestAddress const& v4_mapped_any = V4MappedAny(); - addr_in->sin6_addr = - reinterpret_cast(&v4_mapped_any.addr) - ->sin6_addr; - } - } - - // TODO(b/138658473): gVisor doesn't allow connecting to the zero port. - if (port == 0) { - SKIP_IF(IsRunningOnGvisor()); - } - - ASSERT_THAT(connect(sockfd, reinterpret_cast(&baddr), addrlen), - SyscallSucceeds()); - // Now the socket is bound to the loopback address. - - // Disconnect - addrlen = sizeof(addr); - addr.ss_family = AF_UNSPEC; - ASSERT_THAT(connect(sockfd, reinterpret_cast(&addr), addrlen), - SyscallSucceeds()); - - // Check that after disconnect the socket is bound to the ANY address. - EXPECT_THAT(getsockname(sockfd, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); - if (family == AddressFamily::kIpv4) { - auto addr_out = reinterpret_cast(&addr); - EXPECT_EQ(addrlen, sizeof(*addr_out)); - EXPECT_EQ(addr_out->sin_addr.s_addr, htonl(INADDR_ANY)); - } else { - auto addr_out = reinterpret_cast(&addr); - EXPECT_EQ(addrlen, sizeof(*addr_out)); - struct in6_addr loopback = IN6ADDR_ANY_INIT; - - EXPECT_EQ(memcmp(&addr_out->sin6_addr, &loopback, sizeof(in6_addr)), 0); - } -} - -TEST_P(UdpSocketTest, DisconnectAfterConnectAny) { - DisconnectAfterConnectAny(GetParam(), s_, 0); -} - -TEST_P(UdpSocketTest, DisconnectAfterConnectAnyWithPort) { - auto port = *Port(reinterpret_cast(addr_[1])); - DisconnectAfterConnectAny(GetParam(), s_, port); -} - -TEST_P(UdpSocketTest, DisconnectAfterBind) { - ASSERT_THAT(bind(s_, addr_[1], addrlen_), SyscallSucceeds()); - // Connect the socket. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); - - struct sockaddr_storage addr = {}; - addr.ss_family = AF_UNSPEC; - EXPECT_THAT( - connect(s_, reinterpret_cast(&addr), sizeof(addr.ss_family)), - SyscallSucceeds()); - - // Check that we're still bound. - socklen_t addrlen = sizeof(addr); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); - - EXPECT_EQ(addrlen, addrlen_); - EXPECT_EQ(memcmp(&addr, addr_[1], addrlen_), 0); - - addrlen = sizeof(addr); - EXPECT_THAT(getpeername(s_, reinterpret_cast(&addr), &addrlen), - SyscallFailsWithErrno(ENOTCONN)); -} - -TEST_P(UdpSocketTest, DisconnectAfterBindToAny) { - struct sockaddr_storage baddr = {}; - socklen_t addrlen; - auto port = *Port(reinterpret_cast(addr_[1])); - if (GetParam() == AddressFamily::kIpv4) { - auto addr_in = reinterpret_cast(&baddr); - addr_in->sin_family = AF_INET; - addr_in->sin_port = port; - addr_in->sin_addr.s_addr = htonl(INADDR_ANY); - } else { - auto addr_in = reinterpret_cast(&baddr); - addr_in->sin6_family = AF_INET6; - addr_in->sin6_port = port; - addr_in->sin6_scope_id = 0; - addr_in->sin6_addr = IN6ADDR_ANY_INIT; - } - ASSERT_THAT(bind(s_, reinterpret_cast(&baddr), addrlen_), - SyscallSucceeds()); - // Connect the socket. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); - - struct sockaddr_storage addr = {}; - addr.ss_family = AF_UNSPEC; - EXPECT_THAT( - connect(s_, reinterpret_cast(&addr), sizeof(addr.ss_family)), - SyscallSucceeds()); - - // Check that we're still bound. - addrlen = sizeof(addr); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); - - EXPECT_EQ(addrlen, addrlen_); - EXPECT_EQ(memcmp(&addr, &baddr, addrlen), 0); - - addrlen = sizeof(addr); - EXPECT_THAT(getpeername(s_, reinterpret_cast(&addr), &addrlen), - SyscallFailsWithErrno(ENOTCONN)); -} - -TEST_P(UdpSocketTest, Disconnect) { - for (int i = 0; i < 2; i++) { - // Try to connect again. - EXPECT_THAT(connect(s_, addr_[2], addrlen_), SyscallSucceeds()); - - // Check that we're connected to the right peer. - struct sockaddr_storage peer; - socklen_t peerlen = sizeof(peer); - EXPECT_THAT(getpeername(s_, reinterpret_cast(&peer), &peerlen), - SyscallSucceeds()); - EXPECT_EQ(peerlen, addrlen_); - EXPECT_EQ(memcmp(&peer, addr_[2], addrlen_), 0); - - // Try to disconnect. - struct sockaddr_storage addr = {}; - addr.ss_family = AF_UNSPEC; - EXPECT_THAT( - connect(s_, reinterpret_cast(&addr), sizeof(addr.ss_family)), - SyscallSucceeds()); - - peerlen = sizeof(peer); - EXPECT_THAT(getpeername(s_, reinterpret_cast(&peer), &peerlen), - SyscallFailsWithErrno(ENOTCONN)); - - // Check that we're still bound. - socklen_t addrlen = sizeof(addr); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); - EXPECT_EQ(addrlen, addrlen_); - EXPECT_EQ(*Port(&addr), 0); - } -} - -TEST_P(UdpSocketTest, ConnectBadAddress) { - struct sockaddr addr = {}; - addr.sa_family = addr_[0]->sa_family; - ASSERT_THAT(connect(s_, &addr, sizeof(addr.sa_family)), - SyscallFailsWithErrno(EINVAL)); -} - -TEST_P(UdpSocketTest, SendToAddressOtherThanConnected) { - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); - - // Send to a different destination than we're connected to. - char buf[512]; - EXPECT_THAT(sendto(s_, buf, sizeof(buf), 0, addr_[1], addrlen_), - SyscallSucceedsWithValue(sizeof(buf))); -} - -TEST_P(UdpSocketTest, ZerolengthWriteAllowed) { - // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); - - // Bind t_ to loopback:TestPort+1. - ASSERT_THAT(bind(t_, addr_[1], addrlen_), SyscallSucceeds()); - - char buf[3]; - // Send zero length packet from s_ to t_. - ASSERT_THAT(write(s_, buf, 0), SyscallSucceedsWithValue(0)); - // Receive the packet. - char received[3]; - EXPECT_THAT(read(t_, received, sizeof(received)), - SyscallSucceedsWithValue(0)); -} - -TEST_P(UdpSocketTest, ZerolengthWriteAllowedNonBlockRead) { - // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); - - // Bind t_ to loopback:TestPort+1. - ASSERT_THAT(bind(t_, addr_[1], addrlen_), SyscallSucceeds()); - - // Set t_ to non-blocking. - int opts = 0; - ASSERT_THAT(opts = fcntl(t_, F_GETFL), SyscallSucceeds()); - ASSERT_THAT(fcntl(t_, F_SETFL, opts | O_NONBLOCK), SyscallSucceeds()); - - char buf[3]; - // Send zero length packet from s_ to t_. - ASSERT_THAT(write(s_, buf, 0), SyscallSucceedsWithValue(0)); - // Receive the packet. - char received[3]; - EXPECT_THAT(read(t_, received, sizeof(received)), - SyscallSucceedsWithValue(0)); - EXPECT_THAT(read(t_, received, sizeof(received)), - SyscallFailsWithErrno(EAGAIN)); -} - -TEST_P(UdpSocketTest, SendAndReceiveNotConnected) { - // Bind s_ to loopback. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - - // Send some data to s_. - char buf[512]; - RandomizeBuffer(buf, sizeof(buf)); - - ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), - SyscallSucceedsWithValue(sizeof(buf))); - - // Receive the data. - char received[sizeof(buf)]; - EXPECT_THAT(recv(s_, received, sizeof(received), 0), - SyscallSucceedsWithValue(sizeof(received))); - EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); -} - -TEST_P(UdpSocketTest, SendAndReceiveConnected) { - // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); - - // Bind t_ to loopback:TestPort+1. - ASSERT_THAT(bind(t_, addr_[1], addrlen_), SyscallSucceeds()); - - // Send some data from t_ to s_. - char buf[512]; - RandomizeBuffer(buf, sizeof(buf)); - - ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), - SyscallSucceedsWithValue(sizeof(buf))); - - // Receive the data. - char received[sizeof(buf)]; - EXPECT_THAT(recv(s_, received, sizeof(received), 0), - SyscallSucceedsWithValue(sizeof(received))); - EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); -} - -TEST_P(UdpSocketTest, ReceiveFromNotConnected) { - // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); - - // Bind t_ to loopback:TestPort+2. - ASSERT_THAT(bind(t_, addr_[2], addrlen_), SyscallSucceeds()); - - // Send some data from t_ to s_. - char buf[512]; - ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), - SyscallSucceedsWithValue(sizeof(buf))); - - // Check that the data isn't_ received because it was sent from a different - // address than we're connected. - EXPECT_THAT(recv(s_, buf, sizeof(buf), MSG_DONTWAIT), - SyscallFailsWithErrno(EWOULDBLOCK)); -} - -TEST_P(UdpSocketTest, ReceiveBeforeConnect) { - // Bind s_ to loopback:TestPort. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - - // Bind t_ to loopback:TestPort+2. - ASSERT_THAT(bind(t_, addr_[2], addrlen_), SyscallSucceeds()); - - // Send some data from t_ to s_. - char buf[512]; - RandomizeBuffer(buf, sizeof(buf)); - - ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), - SyscallSucceedsWithValue(sizeof(buf))); - - // Connect to loopback:TestPort+1. - ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); - - // Receive the data. It works because it was sent before the connect. - char received[sizeof(buf)]; - EXPECT_THAT(recv(s_, received, sizeof(received), 0), - SyscallSucceedsWithValue(sizeof(received))); - EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); - - // Send again. This time it should not be received. - ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), - SyscallSucceedsWithValue(sizeof(buf))); - - EXPECT_THAT(recv(s_, buf, sizeof(buf), MSG_DONTWAIT), - SyscallFailsWithErrno(EWOULDBLOCK)); -} - -TEST_P(UdpSocketTest, ReceiveFrom) { - // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); - - // Bind t_ to loopback:TestPort+1. - ASSERT_THAT(bind(t_, addr_[1], addrlen_), SyscallSucceeds()); - - // Send some data from t_ to s_. - char buf[512]; - RandomizeBuffer(buf, sizeof(buf)); - - ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), - SyscallSucceedsWithValue(sizeof(buf))); - - // Receive the data and sender address. - char received[sizeof(buf)]; - struct sockaddr_storage addr; - socklen_t addrlen = sizeof(addr); - EXPECT_THAT(recvfrom(s_, received, sizeof(received), 0, - reinterpret_cast(&addr), &addrlen), - SyscallSucceedsWithValue(sizeof(received))); - EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); - EXPECT_EQ(addrlen, addrlen_); - EXPECT_EQ(memcmp(&addr, addr_[1], addrlen_), 0); -} - -TEST_P(UdpSocketTest, Listen) { - ASSERT_THAT(listen(s_, SOMAXCONN), SyscallFailsWithErrno(EOPNOTSUPP)); -} - -TEST_P(UdpSocketTest, Accept) { - ASSERT_THAT(accept(s_, nullptr, nullptr), SyscallFailsWithErrno(EOPNOTSUPP)); -} - -// This test validates that a read shutdown with pending data allows the read -// to proceed with the data before returning EAGAIN. -TEST_P(UdpSocketTest, ReadShutdownNonblockPendingData) { - char received[512]; - - // Bind t_ to loopback:TestPort+2. - ASSERT_THAT(bind(t_, addr_[2], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(t_, addr_[1], addrlen_), SyscallSucceeds()); - - // Connect the socket, then try to shutdown again. - ASSERT_THAT(bind(s_, addr_[1], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(s_, addr_[2], addrlen_), SyscallSucceeds()); - - // Verify that we get EWOULDBLOCK when there is nothing to read. - EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), - SyscallFailsWithErrno(EWOULDBLOCK)); - - const char* buf = "abc"; - EXPECT_THAT(write(t_, buf, 3), SyscallSucceedsWithValue(3)); - - int opts = 0; - ASSERT_THAT(opts = fcntl(s_, F_GETFL), SyscallSucceeds()); - ASSERT_THAT(fcntl(s_, F_SETFL, opts | O_NONBLOCK), SyscallSucceeds()); - ASSERT_THAT(opts = fcntl(s_, F_GETFL), SyscallSucceeds()); - ASSERT_NE(opts & O_NONBLOCK, 0); - - EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); - - // We should get the data even though read has been shutdown. - EXPECT_THAT(recv(s_, received, 2, 0), SyscallSucceedsWithValue(2)); - - // Because we read less than the entire packet length, since it's a packet - // based socket any subsequent reads should return EWOULDBLOCK. - EXPECT_THAT(recv(s_, received, 1, 0), SyscallFailsWithErrno(EWOULDBLOCK)); -} - -// This test is validating that even after a socket is shutdown if it's -// reconnected it will reset the shutdown state. -TEST_P(UdpSocketTest, ReadShutdownSameSocketResetsShutdownState) { - char received[512]; - EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), - SyscallFailsWithErrno(EWOULDBLOCK)); - - EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallFailsWithErrno(ENOTCONN)); - - EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), - SyscallFailsWithErrno(EWOULDBLOCK)); - - // Connect the socket, then try to shutdown again. - ASSERT_THAT(bind(s_, addr_[1], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(s_, addr_[2], addrlen_), SyscallSucceeds()); - - EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), - SyscallFailsWithErrno(EWOULDBLOCK)); -} - -TEST_P(UdpSocketTest, ReadShutdown) { - char received[512]; - EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), - SyscallFailsWithErrno(EWOULDBLOCK)); - - EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallFailsWithErrno(ENOTCONN)); - - EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), - SyscallFailsWithErrno(EWOULDBLOCK)); - - // Connect the socket, then try to shutdown again. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); - - EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), - SyscallFailsWithErrno(EWOULDBLOCK)); - - EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); - - EXPECT_THAT(recv(s_, received, sizeof(received), 0), - SyscallSucceedsWithValue(0)); -} - -TEST_P(UdpSocketTest, ReadShutdownDifferentThread) { - char received[512]; - EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), - SyscallFailsWithErrno(EWOULDBLOCK)); - - // Connect the socket, then shutdown from another thread. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); - - EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), - SyscallFailsWithErrno(EWOULDBLOCK)); - - ScopedThread t([&] { - absl::SleepFor(absl::Milliseconds(200)); - EXPECT_THAT(shutdown(this->s_, SHUT_RD), SyscallSucceeds()); - }); - EXPECT_THAT(RetryEINTR(recv)(s_, received, sizeof(received), 0), - SyscallSucceedsWithValue(0)); - t.Join(); - - EXPECT_THAT(RetryEINTR(recv)(s_, received, sizeof(received), 0), - SyscallSucceedsWithValue(0)); -} - -TEST_P(UdpSocketTest, WriteShutdown) { - EXPECT_THAT(shutdown(s_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN)); - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); - EXPECT_THAT(shutdown(s_, SHUT_WR), SyscallSucceeds()); -} - -TEST_P(UdpSocketTest, SynchronousReceive) { - // Bind s_ to loopback. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - - // Send some data to s_ from another thread. - char buf[512]; - RandomizeBuffer(buf, sizeof(buf)); - - // Receive the data prior to actually starting the other thread. - char received[512]; - EXPECT_THAT(RetryEINTR(recv)(s_, received, sizeof(received), MSG_DONTWAIT), - SyscallFailsWithErrno(EWOULDBLOCK)); - - // Start the thread. - ScopedThread t([&] { - absl::SleepFor(absl::Milliseconds(200)); - ASSERT_THAT( - sendto(this->t_, buf, sizeof(buf), 0, this->addr_[0], this->addrlen_), - SyscallSucceedsWithValue(sizeof(buf))); - }); - - EXPECT_THAT(RetryEINTR(recv)(s_, received, sizeof(received), 0), - SyscallSucceedsWithValue(512)); - EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); -} - -TEST_P(UdpSocketTest, BoundaryPreserved_SendRecv) { - // Bind s_ to loopback:TestPort. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - - // Send 3 packets from t_ to s_. - constexpr int psize = 100; - char buf[3 * psize]; - RandomizeBuffer(buf, sizeof(buf)); - - for (int i = 0; i < 3; ++i) { - ASSERT_THAT(sendto(t_, buf + i * psize, psize, 0, addr_[0], addrlen_), - SyscallSucceedsWithValue(psize)); - } - - // Receive the data as 3 separate packets. - char received[6 * psize]; - for (int i = 0; i < 3; ++i) { - EXPECT_THAT(recv(s_, received + i * psize, 3 * psize, 0), - SyscallSucceedsWithValue(psize)); - } - EXPECT_EQ(memcmp(buf, received, 3 * psize), 0); -} - -TEST_P(UdpSocketTest, BoundaryPreserved_WritevReadv) { - // Bind s_ to loopback:TestPort. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - - // Direct writes from t_ to s_. - ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); - - // Send 2 packets from t_ to s_, where each packet's data consists of 2 - // discontiguous iovecs. - constexpr size_t kPieceSize = 100; - char buf[4 * kPieceSize]; - RandomizeBuffer(buf, sizeof(buf)); - - for (int i = 0; i < 2; i++) { - struct iovec iov[2]; - for (int j = 0; j < 2; j++) { - iov[j].iov_base = reinterpret_cast( - reinterpret_cast(buf) + (i + 2 * j) * kPieceSize); - iov[j].iov_len = kPieceSize; - } - ASSERT_THAT(writev(t_, iov, 2), SyscallSucceedsWithValue(2 * kPieceSize)); - } - - // Receive the data as 2 separate packets. - char received[6 * kPieceSize]; - for (int i = 0; i < 2; i++) { - struct iovec iov[3]; - for (int j = 0; j < 3; j++) { - iov[j].iov_base = reinterpret_cast( - reinterpret_cast(received) + (i + 2 * j) * kPieceSize); - iov[j].iov_len = kPieceSize; - } - ASSERT_THAT(readv(s_, iov, 3), SyscallSucceedsWithValue(2 * kPieceSize)); - } - EXPECT_EQ(memcmp(buf, received, 4 * kPieceSize), 0); -} - -TEST_P(UdpSocketTest, BoundaryPreserved_SendMsgRecvMsg) { - // Bind s_ to loopback:TestPort. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - - // Send 2 packets from t_ to s_, where each packet's data consists of 2 - // discontiguous iovecs. - constexpr size_t kPieceSize = 100; - char buf[4 * kPieceSize]; - RandomizeBuffer(buf, sizeof(buf)); - - for (int i = 0; i < 2; i++) { - struct iovec iov[2]; - for (int j = 0; j < 2; j++) { - iov[j].iov_base = reinterpret_cast( - reinterpret_cast(buf) + (i + 2 * j) * kPieceSize); - iov[j].iov_len = kPieceSize; - } - struct msghdr msg = {}; - msg.msg_name = addr_[0]; - msg.msg_namelen = addrlen_; - msg.msg_iov = iov; - msg.msg_iovlen = 2; - ASSERT_THAT(sendmsg(t_, &msg, 0), SyscallSucceedsWithValue(2 * kPieceSize)); - } - - // Receive the data as 2 separate packets. - char received[6 * kPieceSize]; - for (int i = 0; i < 2; i++) { - struct iovec iov[3]; - for (int j = 0; j < 3; j++) { - iov[j].iov_base = reinterpret_cast( - reinterpret_cast(received) + (i + 2 * j) * kPieceSize); - iov[j].iov_len = kPieceSize; - } - struct msghdr msg = {}; - msg.msg_iov = iov; - msg.msg_iovlen = 3; - ASSERT_THAT(recvmsg(s_, &msg, 0), SyscallSucceedsWithValue(2 * kPieceSize)); - } - EXPECT_EQ(memcmp(buf, received, 4 * kPieceSize), 0); -} - -TEST_P(UdpSocketTest, FIONREADShutdown) { - int n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); - EXPECT_EQ(n, 0); - - // A UDP socket must be connected before it can be shutdown. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); - - n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); - EXPECT_EQ(n, 0); - - EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); - - n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); - EXPECT_EQ(n, 0); -} - -TEST_P(UdpSocketTest, FIONREADWriteShutdown) { - int n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); - EXPECT_EQ(n, 0); - - // Bind s_ to loopback:TestPort. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - - // A UDP socket must be connected before it can be shutdown. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); - - n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); - EXPECT_EQ(n, 0); - - const char str[] = "abc"; - ASSERT_THAT(send(s_, str, sizeof(str), 0), - SyscallSucceedsWithValue(sizeof(str))); - - n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); - EXPECT_EQ(n, sizeof(str)); - - EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); - - n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); - EXPECT_EQ(n, sizeof(str)); -} - -TEST_P(UdpSocketTest, FIONREAD) { - // Bind s_ to loopback:TestPort. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - - // Check that the bound socket with an empty buffer reports an empty first - // packet. - int n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); - EXPECT_EQ(n, 0); - - // Send 3 packets from t_ to s_. - constexpr int psize = 100; - char buf[3 * psize]; - RandomizeBuffer(buf, sizeof(buf)); - - for (int i = 0; i < 3; ++i) { - ASSERT_THAT(sendto(t_, buf + i * psize, psize, 0, addr_[0], addrlen_), - SyscallSucceedsWithValue(psize)); - - // Check that regardless of how many packets are in the queue, the size - // reported is that of a single packet. - n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); - EXPECT_EQ(n, psize); - } -} - -TEST_P(UdpSocketTest, FIONREADZeroLengthPacket) { - // Bind s_ to loopback:TestPort. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - - // Check that the bound socket with an empty buffer reports an empty first - // packet. - int n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); - EXPECT_EQ(n, 0); - - // Send 3 packets from t_ to s_. - constexpr int psize = 100; - char buf[3 * psize]; - RandomizeBuffer(buf, sizeof(buf)); - - for (int i = 0; i < 3; ++i) { - ASSERT_THAT(sendto(t_, buf + i * psize, 0, 0, addr_[0], addrlen_), - SyscallSucceedsWithValue(0)); - - // Check that regardless of how many packets are in the queue, the size - // reported is that of a single packet. - n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); - EXPECT_EQ(n, 0); - } -} - -TEST_P(UdpSocketTest, FIONREADZeroLengthWriteShutdown) { - int n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); - EXPECT_EQ(n, 0); - - // Bind s_ to loopback:TestPort. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - - // A UDP socket must be connected before it can be shutdown. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); - - n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); - EXPECT_EQ(n, 0); - - const char str[] = "abc"; - ASSERT_THAT(send(s_, str, 0, 0), SyscallSucceedsWithValue(0)); - - n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); - EXPECT_EQ(n, 0); - - EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); - - n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); - EXPECT_EQ(n, 0); -} - -TEST_P(UdpSocketTest, ErrorQueue) { - char cmsgbuf[CMSG_SPACE(sizeof(sock_extended_err))]; - msghdr msg; - memset(&msg, 0, sizeof(msg)); - iovec iov; - memset(&iov, 0, sizeof(iov)); - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_control = cmsgbuf; - msg.msg_controllen = sizeof(cmsgbuf); - - // recv*(MSG_ERRQUEUE) never blocks, even without MSG_DONTWAIT. - EXPECT_THAT(RetryEINTR(recvmsg)(s_, &msg, MSG_ERRQUEUE), - SyscallFailsWithErrno(EAGAIN)); -} - -TEST_P(UdpSocketTest, SoTimestampOffByDefault) { - int v = -1; - socklen_t optlen = sizeof(v); - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_TIMESTAMP, &v, &optlen), - SyscallSucceeds()); - ASSERT_EQ(v, kSockOptOff); - ASSERT_EQ(optlen, sizeof(v)); -} - -TEST_P(UdpSocketTest, SoTimestamp) { - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); - - int v = 1; - ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_TIMESTAMP, &v, sizeof(v)), - SyscallSucceeds()); - - char buf[3]; - // Send zero length packet from t_ to s_. - ASSERT_THAT(RetryEINTR(write)(t_, buf, 0), SyscallSucceedsWithValue(0)); - - char cmsgbuf[CMSG_SPACE(sizeof(struct timeval))]; - msghdr msg; - memset(&msg, 0, sizeof(msg)); - iovec iov; - memset(&iov, 0, sizeof(iov)); - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_control = cmsgbuf; - msg.msg_controllen = sizeof(cmsgbuf); - - ASSERT_THAT(RetryEINTR(recvmsg)(s_, &msg, 0), SyscallSucceedsWithValue(0)); - - struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); - ASSERT_NE(cmsg, nullptr); - ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET); - ASSERT_EQ(cmsg->cmsg_type, SO_TIMESTAMP); - ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(struct timeval))); - - struct timeval tv = {}; - memcpy(&tv, CMSG_DATA(cmsg), sizeof(struct timeval)); - - ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0); - - // There should be nothing to get via ioctl. - ASSERT_THAT(ioctl(s_, SIOCGSTAMP, &tv), SyscallFailsWithErrno(ENOENT)); -} - -TEST_P(UdpSocketTest, WriteShutdownNotConnected) { - EXPECT_THAT(shutdown(s_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN)); -} - -TEST_P(UdpSocketTest, TimestampIoctl) { - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); - - char buf[3]; - // Send packet from t_ to s_. - ASSERT_THAT(RetryEINTR(write)(t_, buf, sizeof(buf)), - SyscallSucceedsWithValue(sizeof(buf))); - - // There should be no control messages. - char recv_buf[sizeof(buf)]; - ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(s_, recv_buf, sizeof(recv_buf))); - - // A nonzero timeval should be available via ioctl. - struct timeval tv = {}; - ASSERT_THAT(ioctl(s_, SIOCGSTAMP, &tv), SyscallSucceeds()); - ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0); -} - -TEST_P(UdpSocketTest, TimetstampIoctlNothingRead) { - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); - - struct timeval tv = {}; - ASSERT_THAT(ioctl(s_, SIOCGSTAMP, &tv), SyscallFailsWithErrno(ENOENT)); -} - -// Test that the timestamp accessed via SIOCGSTAMP is still accessible after -// SO_TIMESTAMP is enabled and used to retrieve a timestamp. -TEST_P(UdpSocketTest, TimestampIoctlPersistence) { - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); - - char buf[3]; - // Send packet from t_ to s_. - ASSERT_THAT(RetryEINTR(write)(t_, buf, sizeof(buf)), - SyscallSucceedsWithValue(sizeof(buf))); - ASSERT_THAT(RetryEINTR(write)(t_, buf, 0), SyscallSucceedsWithValue(0)); - - // There should be no control messages. - char recv_buf[sizeof(buf)]; - ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(s_, recv_buf, sizeof(recv_buf))); - - // A nonzero timeval should be available via ioctl. - struct timeval tv = {}; - ASSERT_THAT(ioctl(s_, SIOCGSTAMP, &tv), SyscallSucceeds()); - ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0); - - // Enable SO_TIMESTAMP and send a message. - int v = 1; - EXPECT_THAT(setsockopt(s_, SOL_SOCKET, SO_TIMESTAMP, &v, sizeof(v)), - SyscallSucceeds()); - ASSERT_THAT(RetryEINTR(write)(t_, buf, 0), SyscallSucceedsWithValue(0)); - - // There should be a message for SO_TIMESTAMP. - char cmsgbuf[CMSG_SPACE(sizeof(struct timeval))]; - msghdr msg = {}; - iovec iov = {}; - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_control = cmsgbuf; - msg.msg_controllen = sizeof(cmsgbuf); - ASSERT_THAT(RetryEINTR(recvmsg)(s_, &msg, 0), SyscallSucceedsWithValue(0)); - struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); - cmsg = CMSG_FIRSTHDR(&msg); - ASSERT_NE(cmsg, nullptr); - - // The ioctl should return the exact same values as before. - struct timeval tv2 = {}; - ASSERT_THAT(ioctl(s_, SIOCGSTAMP, &tv2), SyscallSucceeds()); - ASSERT_EQ(tv.tv_sec, tv2.tv_sec); - ASSERT_EQ(tv.tv_usec, tv2.tv_usec); -} - INSTANTIATE_TEST_SUITE_P(AllInetTests, UdpSocketTest, ::testing::Values(AddressFamily::kIpv4, AddressFamily::kIpv6, diff --git a/test/syscalls/linux/udp_socket_errqueue_test_case.cc b/test/syscalls/linux/udp_socket_errqueue_test_case.cc new file mode 100644 index 000000000..147978f46 --- /dev/null +++ b/test/syscalls/linux/udp_socket_errqueue_test_case.cc @@ -0,0 +1,54 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/udp_socket_test_cases.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +TEST_P(UdpSocketTest, ErrorQueue) { + char cmsgbuf[CMSG_SPACE(sizeof(sock_extended_err))]; + msghdr msg; + memset(&msg, 0, sizeof(msg)); + iovec iov; + memset(&iov, 0, sizeof(iov)); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + + // recv*(MSG_ERRQUEUE) never blocks, even without MSG_DONTWAIT. + EXPECT_THAT(RetryEINTR(recvmsg)(s_, &msg, MSG_ERRQUEUE), + SyscallFailsWithErrno(EAGAIN)); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc new file mode 100644 index 000000000..b6090ac66 --- /dev/null +++ b/test/syscalls/linux/udp_socket_test_cases.cc @@ -0,0 +1,1279 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/udp_socket_test_cases.h" + +#include +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +// Gets a pointer to the port component of the given address. +uint16_t* Port(struct sockaddr_storage* addr) { + switch (addr->ss_family) { + case AF_INET: { + auto sin = reinterpret_cast(addr); + return &sin->sin_port; + } + case AF_INET6: { + auto sin6 = reinterpret_cast(addr); + return &sin6->sin6_port; + } + } + + return nullptr; +} + +void UdpSocketTest::SetUp() { + int type; + if (GetParam() == AddressFamily::kIpv4) { + type = AF_INET; + auto sin = reinterpret_cast(&anyaddr_storage_); + addrlen_ = sizeof(*sin); + sin->sin_addr.s_addr = htonl(INADDR_ANY); + } else { + type = AF_INET6; + auto sin6 = reinterpret_cast(&anyaddr_storage_); + addrlen_ = sizeof(*sin6); + if (GetParam() == AddressFamily::kIpv6) { + sin6->sin6_addr = IN6ADDR_ANY_INIT; + } else { + TestAddress const& v4_mapped_any = V4MappedAny(); + sin6->sin6_addr = + reinterpret_cast(&v4_mapped_any.addr) + ->sin6_addr; + } + } + ASSERT_THAT(s_ = socket(type, SOCK_DGRAM, IPPROTO_UDP), SyscallSucceeds()); + + ASSERT_THAT(t_ = socket(type, SOCK_DGRAM, IPPROTO_UDP), SyscallSucceeds()); + + memset(&anyaddr_storage_, 0, sizeof(anyaddr_storage_)); + anyaddr_ = reinterpret_cast(&anyaddr_storage_); + anyaddr_->sa_family = type; + + if (gvisor::testing::IsRunningOnGvisor()) { + for (size_t i = 0; i < ABSL_ARRAYSIZE(ports_); ++i) { + ports_[i] = TestPort + i; + } + } else { + // When not under gvisor, use utility function to pick port. Assert that + // all ports are different. + std::string error; + for (size_t i = 0; i < ABSL_ARRAYSIZE(ports_); ++i) { + // Find an unused port, we specify port 0 to allow the kernel to provide + // the port. + bool unique = true; + do { + ports_[i] = ASSERT_NO_ERRNO_AND_VALUE(PortAvailable( + 0, AddressFamily::kDualStack, SocketType::kUdp, false)); + ASSERT_GT(ports_[i], 0); + for (size_t j = 0; j < i; ++j) { + if (ports_[j] == ports_[i]) { + unique = false; + break; + } + } + } while (!unique); + } + } + + // Initialize the sockaddrs. + for (size_t i = 0; i < ABSL_ARRAYSIZE(addr_); ++i) { + memset(&addr_storage_[i], 0, sizeof(addr_storage_[i])); + + addr_[i] = reinterpret_cast(&addr_storage_[i]); + addr_[i]->sa_family = type; + + switch (type) { + case AF_INET: { + auto sin = reinterpret_cast(addr_[i]); + sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + sin->sin_port = htons(ports_[i]); + break; + } + case AF_INET6: { + auto sin6 = reinterpret_cast(addr_[i]); + sin6->sin6_addr = in6addr_loopback; + sin6->sin6_port = htons(ports_[i]); + break; + } + } + } +} + +TEST_P(UdpSocketTest, Creation) { + int type = AF_INET6; + if (GetParam() == AddressFamily::kIpv4) { + type = AF_INET; + } + + int s_; + + ASSERT_THAT(s_ = socket(type, SOCK_DGRAM, IPPROTO_UDP), SyscallSucceeds()); + EXPECT_THAT(close(s_), SyscallSucceeds()); + + ASSERT_THAT(s_ = socket(type, SOCK_DGRAM, 0), SyscallSucceeds()); + EXPECT_THAT(close(s_), SyscallSucceeds()); + + ASSERT_THAT(s_ = socket(type, SOCK_STREAM, IPPROTO_UDP), SyscallFails()); +} + +TEST_P(UdpSocketTest, Getsockname) { + // Check that we're not bound. + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(memcmp(&addr, anyaddr_, addrlen_), 0); + + // Bind, then check that we get the right address. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + addrlen = sizeof(addr); + EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(memcmp(&addr, addr_[0], addrlen_), 0); +} + +TEST_P(UdpSocketTest, Getpeername) { + // Check that we're not connected. + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getpeername(s_, reinterpret_cast(&addr), &addrlen), + SyscallFailsWithErrno(ENOTCONN)); + + // Connect, then check that we get the right address. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + addrlen = sizeof(addr); + EXPECT_THAT(getpeername(s_, reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(memcmp(&addr, addr_[0], addrlen_), 0); +} + +TEST_P(UdpSocketTest, SendNotConnected) { + // Do send & write, they must fail. + char buf[512]; + EXPECT_THAT(send(s_, buf, sizeof(buf), 0), + SyscallFailsWithErrno(EDESTADDRREQ)); + + EXPECT_THAT(write(s_, buf, sizeof(buf)), SyscallFailsWithErrno(EDESTADDRREQ)); + + // Use sendto. + ASSERT_THAT(sendto(s_, buf, sizeof(buf), 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + + // Check that we're bound now. + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + EXPECT_NE(*Port(&addr), 0); +} + +TEST_P(UdpSocketTest, ConnectBinds) { + // Connect the socket. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Check that we're bound now. + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + EXPECT_NE(*Port(&addr), 0); +} + +TEST_P(UdpSocketTest, ReceiveNotBound) { + char buf[512]; + EXPECT_THAT(recv(s_, buf, sizeof(buf), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); +} + +TEST_P(UdpSocketTest, Bind) { + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Try to bind again. + EXPECT_THAT(bind(s_, addr_[1], addrlen_), SyscallFailsWithErrno(EINVAL)); + + // Check that we're still bound to the original address. + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(memcmp(&addr, addr_[0], addrlen_), 0); +} + +TEST_P(UdpSocketTest, BindInUse) { + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Try to bind again. + EXPECT_THAT(bind(t_, addr_[0], addrlen_), SyscallFailsWithErrno(EADDRINUSE)); +} + +TEST_P(UdpSocketTest, ReceiveAfterConnect) { + // Connect s_ to loopback:TestPort, and bind t_ to loopback:TestPort. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(bind(t_, addr_[0], addrlen_), SyscallSucceeds()); + + // Get the address s_ was bound to during connect. + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + + // Send from t_ to s_. + char buf[512]; + RandomizeBuffer(buf, sizeof(buf)); + ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, + reinterpret_cast(&addr), addrlen), + SyscallSucceedsWithValue(sizeof(buf))); + + // Receive the data. + char received[sizeof(buf)]; + EXPECT_THAT(recv(s_, received, sizeof(received), 0), + SyscallSucceedsWithValue(sizeof(received))); + EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); +} + +TEST_P(UdpSocketTest, ReceiveAfterDisconnect) { + // Connect s_ to loopback:TestPort, and bind t_ to loopback:TestPort. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(bind(t_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(t_, addr_[1], addrlen_), SyscallSucceeds()); + + // Get the address s_ was bound to during connect. + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + + for (int i = 0; i < 2; i++) { + // Send from t_ to s_. + char buf[512]; + RandomizeBuffer(buf, sizeof(buf)); + EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); + ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, + reinterpret_cast(&addr), addrlen), + SyscallSucceedsWithValue(sizeof(buf))); + + // Receive the data. + char received[sizeof(buf)]; + EXPECT_THAT(recv(s_, received, sizeof(received), 0), + SyscallSucceedsWithValue(sizeof(received))); + EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); + + // Disconnect s_. + struct sockaddr addr = {}; + addr.sa_family = AF_UNSPEC; + ASSERT_THAT(connect(s_, &addr, sizeof(addr.sa_family)), SyscallSucceeds()); + // Connect s_ loopback:TestPort. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + } +} + +TEST_P(UdpSocketTest, Connect) { + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Check that we're connected to the right peer. + struct sockaddr_storage peer; + socklen_t peerlen = sizeof(peer); + EXPECT_THAT(getpeername(s_, reinterpret_cast(&peer), &peerlen), + SyscallSucceeds()); + EXPECT_EQ(peerlen, addrlen_); + EXPECT_EQ(memcmp(&peer, addr_[0], addrlen_), 0); + + // Try to bind after connect. + EXPECT_THAT(bind(s_, addr_[1], addrlen_), SyscallFailsWithErrno(EINVAL)); + + // Try to connect again. + EXPECT_THAT(connect(s_, addr_[2], addrlen_), SyscallSucceeds()); + + // Check that peer name changed. + peerlen = sizeof(peer); + EXPECT_THAT(getpeername(s_, reinterpret_cast(&peer), &peerlen), + SyscallSucceeds()); + EXPECT_EQ(peerlen, addrlen_); + EXPECT_EQ(memcmp(&peer, addr_[2], addrlen_), 0); +} + +void ConnectAny(AddressFamily family, int sockfd, uint16_t port) { + struct sockaddr_storage addr = {}; + + // Precondition check. + { + socklen_t addrlen = sizeof(addr); + EXPECT_THAT( + getsockname(sockfd, reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); + + if (family == AddressFamily::kIpv4) { + auto addr_out = reinterpret_cast(&addr); + EXPECT_EQ(addrlen, sizeof(*addr_out)); + EXPECT_EQ(addr_out->sin_addr.s_addr, htonl(INADDR_ANY)); + } else { + auto addr_out = reinterpret_cast(&addr); + EXPECT_EQ(addrlen, sizeof(*addr_out)); + struct in6_addr any = IN6ADDR_ANY_INIT; + EXPECT_EQ(memcmp(&addr_out->sin6_addr, &any, sizeof(in6_addr)), 0); + } + + { + socklen_t addrlen = sizeof(addr); + EXPECT_THAT( + getpeername(sockfd, reinterpret_cast(&addr), &addrlen), + SyscallFailsWithErrno(ENOTCONN)); + } + + struct sockaddr_storage baddr = {}; + if (family == AddressFamily::kIpv4) { + auto addr_in = reinterpret_cast(&baddr); + addrlen = sizeof(*addr_in); + addr_in->sin_family = AF_INET; + addr_in->sin_addr.s_addr = htonl(INADDR_ANY); + addr_in->sin_port = port; + } else { + auto addr_in = reinterpret_cast(&baddr); + addrlen = sizeof(*addr_in); + addr_in->sin6_family = AF_INET6; + addr_in->sin6_port = port; + if (family == AddressFamily::kIpv6) { + addr_in->sin6_addr = IN6ADDR_ANY_INIT; + } else { + TestAddress const& v4_mapped_any = V4MappedAny(); + addr_in->sin6_addr = + reinterpret_cast(&v4_mapped_any.addr) + ->sin6_addr; + } + } + + // TODO(b/138658473): gVisor doesn't allow connecting to the zero port. + if (port == 0) { + SKIP_IF(IsRunningOnGvisor()); + } + + ASSERT_THAT(connect(sockfd, reinterpret_cast(&baddr), addrlen), + SyscallSucceeds()); + } + + // Postcondition check. + { + socklen_t addrlen = sizeof(addr); + EXPECT_THAT( + getsockname(sockfd, reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); + + if (family == AddressFamily::kIpv4) { + auto addr_out = reinterpret_cast(&addr); + EXPECT_EQ(addrlen, sizeof(*addr_out)); + EXPECT_EQ(addr_out->sin_addr.s_addr, htonl(INADDR_LOOPBACK)); + } else { + auto addr_out = reinterpret_cast(&addr); + EXPECT_EQ(addrlen, sizeof(*addr_out)); + struct in6_addr loopback; + if (family == AddressFamily::kIpv6) { + loopback = IN6ADDR_LOOPBACK_INIT; + } else { + TestAddress const& v4_mapped_loopback = V4MappedLoopback(); + loopback = reinterpret_cast( + &v4_mapped_loopback.addr) + ->sin6_addr; + } + + EXPECT_EQ(memcmp(&addr_out->sin6_addr, &loopback, sizeof(in6_addr)), 0); + } + + addrlen = sizeof(addr); + if (port == 0) { + EXPECT_THAT( + getpeername(sockfd, reinterpret_cast(&addr), &addrlen), + SyscallFailsWithErrno(ENOTCONN)); + } else { + EXPECT_THAT( + getpeername(sockfd, reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); + } + } +} + +TEST_P(UdpSocketTest, ConnectAny) { ConnectAny(GetParam(), s_, 0); } + +TEST_P(UdpSocketTest, ConnectAnyWithPort) { + auto port = *Port(reinterpret_cast(addr_[1])); + ConnectAny(GetParam(), s_, port); +} + +void DisconnectAfterConnectAny(AddressFamily family, int sockfd, int port) { + struct sockaddr_storage addr = {}; + + socklen_t addrlen = sizeof(addr); + struct sockaddr_storage baddr = {}; + if (family == AddressFamily::kIpv4) { + auto addr_in = reinterpret_cast(&baddr); + addrlen = sizeof(*addr_in); + addr_in->sin_family = AF_INET; + addr_in->sin_addr.s_addr = htonl(INADDR_ANY); + addr_in->sin_port = port; + } else { + auto addr_in = reinterpret_cast(&baddr); + addrlen = sizeof(*addr_in); + addr_in->sin6_family = AF_INET6; + addr_in->sin6_port = port; + if (family == AddressFamily::kIpv6) { + addr_in->sin6_addr = IN6ADDR_ANY_INIT; + } else { + TestAddress const& v4_mapped_any = V4MappedAny(); + addr_in->sin6_addr = + reinterpret_cast(&v4_mapped_any.addr) + ->sin6_addr; + } + } + + // TODO(b/138658473): gVisor doesn't allow connecting to the zero port. + if (port == 0) { + SKIP_IF(IsRunningOnGvisor()); + } + + ASSERT_THAT(connect(sockfd, reinterpret_cast(&baddr), addrlen), + SyscallSucceeds()); + // Now the socket is bound to the loopback address. + + // Disconnect + addrlen = sizeof(addr); + addr.ss_family = AF_UNSPEC; + ASSERT_THAT(connect(sockfd, reinterpret_cast(&addr), addrlen), + SyscallSucceeds()); + + // Check that after disconnect the socket is bound to the ANY address. + EXPECT_THAT(getsockname(sockfd, reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); + if (family == AddressFamily::kIpv4) { + auto addr_out = reinterpret_cast(&addr); + EXPECT_EQ(addrlen, sizeof(*addr_out)); + EXPECT_EQ(addr_out->sin_addr.s_addr, htonl(INADDR_ANY)); + } else { + auto addr_out = reinterpret_cast(&addr); + EXPECT_EQ(addrlen, sizeof(*addr_out)); + struct in6_addr loopback = IN6ADDR_ANY_INIT; + + EXPECT_EQ(memcmp(&addr_out->sin6_addr, &loopback, sizeof(in6_addr)), 0); + } +} + +TEST_P(UdpSocketTest, DisconnectAfterConnectAny) { + DisconnectAfterConnectAny(GetParam(), s_, 0); +} + +TEST_P(UdpSocketTest, DisconnectAfterConnectAnyWithPort) { + auto port = *Port(reinterpret_cast(addr_[1])); + DisconnectAfterConnectAny(GetParam(), s_, port); +} + +TEST_P(UdpSocketTest, DisconnectAfterBind) { + ASSERT_THAT(bind(s_, addr_[1], addrlen_), SyscallSucceeds()); + // Connect the socket. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + struct sockaddr_storage addr = {}; + addr.ss_family = AF_UNSPEC; + EXPECT_THAT( + connect(s_, reinterpret_cast(&addr), sizeof(addr.ss_family)), + SyscallSucceeds()); + + // Check that we're still bound. + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); + + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(memcmp(&addr, addr_[1], addrlen_), 0); + + addrlen = sizeof(addr); + EXPECT_THAT(getpeername(s_, reinterpret_cast(&addr), &addrlen), + SyscallFailsWithErrno(ENOTCONN)); +} + +TEST_P(UdpSocketTest, DisconnectAfterBindToAny) { + struct sockaddr_storage baddr = {}; + socklen_t addrlen; + auto port = *Port(reinterpret_cast(addr_[1])); + if (GetParam() == AddressFamily::kIpv4) { + auto addr_in = reinterpret_cast(&baddr); + addr_in->sin_family = AF_INET; + addr_in->sin_port = port; + addr_in->sin_addr.s_addr = htonl(INADDR_ANY); + } else { + auto addr_in = reinterpret_cast(&baddr); + addr_in->sin6_family = AF_INET6; + addr_in->sin6_port = port; + addr_in->sin6_scope_id = 0; + addr_in->sin6_addr = IN6ADDR_ANY_INIT; + } + ASSERT_THAT(bind(s_, reinterpret_cast(&baddr), addrlen_), + SyscallSucceeds()); + // Connect the socket. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + struct sockaddr_storage addr = {}; + addr.ss_family = AF_UNSPEC; + EXPECT_THAT( + connect(s_, reinterpret_cast(&addr), sizeof(addr.ss_family)), + SyscallSucceeds()); + + // Check that we're still bound. + addrlen = sizeof(addr); + EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); + + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(memcmp(&addr, &baddr, addrlen), 0); + + addrlen = sizeof(addr); + EXPECT_THAT(getpeername(s_, reinterpret_cast(&addr), &addrlen), + SyscallFailsWithErrno(ENOTCONN)); +} + +TEST_P(UdpSocketTest, Disconnect) { + for (int i = 0; i < 2; i++) { + // Try to connect again. + EXPECT_THAT(connect(s_, addr_[2], addrlen_), SyscallSucceeds()); + + // Check that we're connected to the right peer. + struct sockaddr_storage peer; + socklen_t peerlen = sizeof(peer); + EXPECT_THAT(getpeername(s_, reinterpret_cast(&peer), &peerlen), + SyscallSucceeds()); + EXPECT_EQ(peerlen, addrlen_); + EXPECT_EQ(memcmp(&peer, addr_[2], addrlen_), 0); + + // Try to disconnect. + struct sockaddr_storage addr = {}; + addr.ss_family = AF_UNSPEC; + EXPECT_THAT( + connect(s_, reinterpret_cast(&addr), sizeof(addr.ss_family)), + SyscallSucceeds()); + + peerlen = sizeof(peer); + EXPECT_THAT(getpeername(s_, reinterpret_cast(&peer), &peerlen), + SyscallFailsWithErrno(ENOTCONN)); + + // Check that we're still bound. + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(*Port(&addr), 0); + } +} + +TEST_P(UdpSocketTest, ConnectBadAddress) { + struct sockaddr addr = {}; + addr.sa_family = addr_[0]->sa_family; + ASSERT_THAT(connect(s_, &addr, sizeof(addr.sa_family)), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(UdpSocketTest, SendToAddressOtherThanConnected) { + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Send to a different destination than we're connected to. + char buf[512]; + EXPECT_THAT(sendto(s_, buf, sizeof(buf), 0, addr_[1], addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); +} + +TEST_P(UdpSocketTest, ZerolengthWriteAllowed) { + // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); + + // Bind t_ to loopback:TestPort+1. + ASSERT_THAT(bind(t_, addr_[1], addrlen_), SyscallSucceeds()); + + char buf[3]; + // Send zero length packet from s_ to t_. + ASSERT_THAT(write(s_, buf, 0), SyscallSucceedsWithValue(0)); + // Receive the packet. + char received[3]; + EXPECT_THAT(read(t_, received, sizeof(received)), + SyscallSucceedsWithValue(0)); +} + +TEST_P(UdpSocketTest, ZerolengthWriteAllowedNonBlockRead) { + // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); + + // Bind t_ to loopback:TestPort+1. + ASSERT_THAT(bind(t_, addr_[1], addrlen_), SyscallSucceeds()); + + // Set t_ to non-blocking. + int opts = 0; + ASSERT_THAT(opts = fcntl(t_, F_GETFL), SyscallSucceeds()); + ASSERT_THAT(fcntl(t_, F_SETFL, opts | O_NONBLOCK), SyscallSucceeds()); + + char buf[3]; + // Send zero length packet from s_ to t_. + ASSERT_THAT(write(s_, buf, 0), SyscallSucceedsWithValue(0)); + // Receive the packet. + char received[3]; + EXPECT_THAT(read(t_, received, sizeof(received)), + SyscallSucceedsWithValue(0)); + EXPECT_THAT(read(t_, received, sizeof(received)), + SyscallFailsWithErrno(EAGAIN)); +} + +TEST_P(UdpSocketTest, SendAndReceiveNotConnected) { + // Bind s_ to loopback. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Send some data to s_. + char buf[512]; + RandomizeBuffer(buf, sizeof(buf)); + + ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + + // Receive the data. + char received[sizeof(buf)]; + EXPECT_THAT(recv(s_, received, sizeof(received), 0), + SyscallSucceedsWithValue(sizeof(received))); + EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); +} + +TEST_P(UdpSocketTest, SendAndReceiveConnected) { + // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); + + // Bind t_ to loopback:TestPort+1. + ASSERT_THAT(bind(t_, addr_[1], addrlen_), SyscallSucceeds()); + + // Send some data from t_ to s_. + char buf[512]; + RandomizeBuffer(buf, sizeof(buf)); + + ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + + // Receive the data. + char received[sizeof(buf)]; + EXPECT_THAT(recv(s_, received, sizeof(received), 0), + SyscallSucceedsWithValue(sizeof(received))); + EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); +} + +TEST_P(UdpSocketTest, ReceiveFromNotConnected) { + // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); + + // Bind t_ to loopback:TestPort+2. + ASSERT_THAT(bind(t_, addr_[2], addrlen_), SyscallSucceeds()); + + // Send some data from t_ to s_. + char buf[512]; + ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + + // Check that the data isn't_ received because it was sent from a different + // address than we're connected. + EXPECT_THAT(recv(s_, buf, sizeof(buf), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); +} + +TEST_P(UdpSocketTest, ReceiveBeforeConnect) { + // Bind s_ to loopback:TestPort. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Bind t_ to loopback:TestPort+2. + ASSERT_THAT(bind(t_, addr_[2], addrlen_), SyscallSucceeds()); + + // Send some data from t_ to s_. + char buf[512]; + RandomizeBuffer(buf, sizeof(buf)); + + ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + + // Connect to loopback:TestPort+1. + ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); + + // Receive the data. It works because it was sent before the connect. + char received[sizeof(buf)]; + EXPECT_THAT(recv(s_, received, sizeof(received), 0), + SyscallSucceedsWithValue(sizeof(received))); + EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); + + // Send again. This time it should not be received. + ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(recv(s_, buf, sizeof(buf), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); +} + +TEST_P(UdpSocketTest, ReceiveFrom) { + // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); + + // Bind t_ to loopback:TestPort+1. + ASSERT_THAT(bind(t_, addr_[1], addrlen_), SyscallSucceeds()); + + // Send some data from t_ to s_. + char buf[512]; + RandomizeBuffer(buf, sizeof(buf)); + + ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + + // Receive the data and sender address. + char received[sizeof(buf)]; + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(recvfrom(s_, received, sizeof(received), 0, + reinterpret_cast(&addr), &addrlen), + SyscallSucceedsWithValue(sizeof(received))); + EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(memcmp(&addr, addr_[1], addrlen_), 0); +} + +TEST_P(UdpSocketTest, Listen) { + ASSERT_THAT(listen(s_, SOMAXCONN), SyscallFailsWithErrno(EOPNOTSUPP)); +} + +TEST_P(UdpSocketTest, Accept) { + ASSERT_THAT(accept(s_, nullptr, nullptr), SyscallFailsWithErrno(EOPNOTSUPP)); +} + +// This test validates that a read shutdown with pending data allows the read +// to proceed with the data before returning EAGAIN. +TEST_P(UdpSocketTest, ReadShutdownNonblockPendingData) { + char received[512]; + + // Bind t_ to loopback:TestPort+2. + ASSERT_THAT(bind(t_, addr_[2], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(t_, addr_[1], addrlen_), SyscallSucceeds()); + + // Connect the socket, then try to shutdown again. + ASSERT_THAT(bind(s_, addr_[1], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(s_, addr_[2], addrlen_), SyscallSucceeds()); + + // Verify that we get EWOULDBLOCK when there is nothing to read. + EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + const char* buf = "abc"; + EXPECT_THAT(write(t_, buf, 3), SyscallSucceedsWithValue(3)); + + int opts = 0; + ASSERT_THAT(opts = fcntl(s_, F_GETFL), SyscallSucceeds()); + ASSERT_THAT(fcntl(s_, F_SETFL, opts | O_NONBLOCK), SyscallSucceeds()); + ASSERT_THAT(opts = fcntl(s_, F_GETFL), SyscallSucceeds()); + ASSERT_NE(opts & O_NONBLOCK, 0); + + EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); + + // We should get the data even though read has been shutdown. + EXPECT_THAT(recv(s_, received, 2, 0), SyscallSucceedsWithValue(2)); + + // Because we read less than the entire packet length, since it's a packet + // based socket any subsequent reads should return EWOULDBLOCK. + EXPECT_THAT(recv(s_, received, 1, 0), SyscallFailsWithErrno(EWOULDBLOCK)); +} + +// This test is validating that even after a socket is shutdown if it's +// reconnected it will reset the shutdown state. +TEST_P(UdpSocketTest, ReadShutdownSameSocketResetsShutdownState) { + char received[512]; + EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallFailsWithErrno(ENOTCONN)); + + EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Connect the socket, then try to shutdown again. + ASSERT_THAT(bind(s_, addr_[1], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(s_, addr_[2], addrlen_), SyscallSucceeds()); + + EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); +} + +TEST_P(UdpSocketTest, ReadShutdown) { + char received[512]; + EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallFailsWithErrno(ENOTCONN)); + + EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Connect the socket, then try to shutdown again. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); + + EXPECT_THAT(recv(s_, received, sizeof(received), 0), + SyscallSucceedsWithValue(0)); +} + +TEST_P(UdpSocketTest, ReadShutdownDifferentThread) { + char received[512]; + EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Connect the socket, then shutdown from another thread. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + ScopedThread t([&] { + absl::SleepFor(absl::Milliseconds(200)); + EXPECT_THAT(shutdown(this->s_, SHUT_RD), SyscallSucceeds()); + }); + EXPECT_THAT(RetryEINTR(recv)(s_, received, sizeof(received), 0), + SyscallSucceedsWithValue(0)); + t.Join(); + + EXPECT_THAT(RetryEINTR(recv)(s_, received, sizeof(received), 0), + SyscallSucceedsWithValue(0)); +} + +TEST_P(UdpSocketTest, WriteShutdown) { + EXPECT_THAT(shutdown(s_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN)); + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + EXPECT_THAT(shutdown(s_, SHUT_WR), SyscallSucceeds()); +} + +TEST_P(UdpSocketTest, SynchronousReceive) { + // Bind s_ to loopback. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Send some data to s_ from another thread. + char buf[512]; + RandomizeBuffer(buf, sizeof(buf)); + + // Receive the data prior to actually starting the other thread. + char received[512]; + EXPECT_THAT(RetryEINTR(recv)(s_, received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Start the thread. + ScopedThread t([&] { + absl::SleepFor(absl::Milliseconds(200)); + ASSERT_THAT( + sendto(this->t_, buf, sizeof(buf), 0, this->addr_[0], this->addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); + }); + + EXPECT_THAT(RetryEINTR(recv)(s_, received, sizeof(received), 0), + SyscallSucceedsWithValue(512)); + EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); +} + +TEST_P(UdpSocketTest, BoundaryPreserved_SendRecv) { + // Bind s_ to loopback:TestPort. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Send 3 packets from t_ to s_. + constexpr int psize = 100; + char buf[3 * psize]; + RandomizeBuffer(buf, sizeof(buf)); + + for (int i = 0; i < 3; ++i) { + ASSERT_THAT(sendto(t_, buf + i * psize, psize, 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(psize)); + } + + // Receive the data as 3 separate packets. + char received[6 * psize]; + for (int i = 0; i < 3; ++i) { + EXPECT_THAT(recv(s_, received + i * psize, 3 * psize, 0), + SyscallSucceedsWithValue(psize)); + } + EXPECT_EQ(memcmp(buf, received, 3 * psize), 0); +} + +TEST_P(UdpSocketTest, BoundaryPreserved_WritevReadv) { + // Bind s_ to loopback:TestPort. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Direct writes from t_ to s_. + ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); + + // Send 2 packets from t_ to s_, where each packet's data consists of 2 + // discontiguous iovecs. + constexpr size_t kPieceSize = 100; + char buf[4 * kPieceSize]; + RandomizeBuffer(buf, sizeof(buf)); + + for (int i = 0; i < 2; i++) { + struct iovec iov[2]; + for (int j = 0; j < 2; j++) { + iov[j].iov_base = reinterpret_cast( + reinterpret_cast(buf) + (i + 2 * j) * kPieceSize); + iov[j].iov_len = kPieceSize; + } + ASSERT_THAT(writev(t_, iov, 2), SyscallSucceedsWithValue(2 * kPieceSize)); + } + + // Receive the data as 2 separate packets. + char received[6 * kPieceSize]; + for (int i = 0; i < 2; i++) { + struct iovec iov[3]; + for (int j = 0; j < 3; j++) { + iov[j].iov_base = reinterpret_cast( + reinterpret_cast(received) + (i + 2 * j) * kPieceSize); + iov[j].iov_len = kPieceSize; + } + ASSERT_THAT(readv(s_, iov, 3), SyscallSucceedsWithValue(2 * kPieceSize)); + } + EXPECT_EQ(memcmp(buf, received, 4 * kPieceSize), 0); +} + +TEST_P(UdpSocketTest, BoundaryPreserved_SendMsgRecvMsg) { + // Bind s_ to loopback:TestPort. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Send 2 packets from t_ to s_, where each packet's data consists of 2 + // discontiguous iovecs. + constexpr size_t kPieceSize = 100; + char buf[4 * kPieceSize]; + RandomizeBuffer(buf, sizeof(buf)); + + for (int i = 0; i < 2; i++) { + struct iovec iov[2]; + for (int j = 0; j < 2; j++) { + iov[j].iov_base = reinterpret_cast( + reinterpret_cast(buf) + (i + 2 * j) * kPieceSize); + iov[j].iov_len = kPieceSize; + } + struct msghdr msg = {}; + msg.msg_name = addr_[0]; + msg.msg_namelen = addrlen_; + msg.msg_iov = iov; + msg.msg_iovlen = 2; + ASSERT_THAT(sendmsg(t_, &msg, 0), SyscallSucceedsWithValue(2 * kPieceSize)); + } + + // Receive the data as 2 separate packets. + char received[6 * kPieceSize]; + for (int i = 0; i < 2; i++) { + struct iovec iov[3]; + for (int j = 0; j < 3; j++) { + iov[j].iov_base = reinterpret_cast( + reinterpret_cast(received) + (i + 2 * j) * kPieceSize); + iov[j].iov_len = kPieceSize; + } + struct msghdr msg = {}; + msg.msg_iov = iov; + msg.msg_iovlen = 3; + ASSERT_THAT(recvmsg(s_, &msg, 0), SyscallSucceedsWithValue(2 * kPieceSize)); + } + EXPECT_EQ(memcmp(buf, received, 4 * kPieceSize), 0); +} + +TEST_P(UdpSocketTest, FIONREADShutdown) { + int n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + // A UDP socket must be connected before it can be shutdown. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); + + n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); +} + +TEST_P(UdpSocketTest, FIONREADWriteShutdown) { + int n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + // Bind s_ to loopback:TestPort. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // A UDP socket must be connected before it can be shutdown. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + const char str[] = "abc"; + ASSERT_THAT(send(s_, str, sizeof(str), 0), + SyscallSucceedsWithValue(sizeof(str))); + + n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, sizeof(str)); + + EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); + + n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, sizeof(str)); +} + +TEST_P(UdpSocketTest, Fionread) { + // Bind s_ to loopback:TestPort. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Check that the bound socket with an empty buffer reports an empty first + // packet. + int n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + // Send 3 packets from t_ to s_. + constexpr int psize = 100; + char buf[3 * psize]; + RandomizeBuffer(buf, sizeof(buf)); + + for (int i = 0; i < 3; ++i) { + ASSERT_THAT(sendto(t_, buf + i * psize, psize, 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(psize)); + + // Check that regardless of how many packets are in the queue, the size + // reported is that of a single packet. + n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, psize); + } +} + +TEST_P(UdpSocketTest, FIONREADZeroLengthPacket) { + // Bind s_ to loopback:TestPort. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // Check that the bound socket with an empty buffer reports an empty first + // packet. + int n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + // Send 3 packets from t_ to s_. + constexpr int psize = 100; + char buf[3 * psize]; + RandomizeBuffer(buf, sizeof(buf)); + + for (int i = 0; i < 3; ++i) { + ASSERT_THAT(sendto(t_, buf + i * psize, 0, 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(0)); + + // Check that regardless of how many packets are in the queue, the size + // reported is that of a single packet. + n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + } +} + +TEST_P(UdpSocketTest, FIONREADZeroLengthWriteShutdown) { + int n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + // Bind s_ to loopback:TestPort. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + // A UDP socket must be connected before it can be shutdown. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + const char str[] = "abc"; + ASSERT_THAT(send(s_, str, 0, 0), SyscallSucceedsWithValue(0)); + + n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); + + EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); + + n = -1; + EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_EQ(n, 0); +} + +TEST_P(UdpSocketTest, SoTimestampOffByDefault) { + int v = -1; + socklen_t optlen = sizeof(v); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_TIMESTAMP, &v, &optlen), + SyscallSucceeds()); + ASSERT_EQ(v, kSockOptOff); + ASSERT_EQ(optlen, sizeof(v)); +} + +TEST_P(UdpSocketTest, SoTimestamp) { + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); + + int v = 1; + ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_TIMESTAMP, &v, sizeof(v)), + SyscallSucceeds()); + + char buf[3]; + // Send zero length packet from t_ to s_. + ASSERT_THAT(RetryEINTR(write)(t_, buf, 0), SyscallSucceedsWithValue(0)); + + char cmsgbuf[CMSG_SPACE(sizeof(struct timeval))]; + msghdr msg; + memset(&msg, 0, sizeof(msg)); + iovec iov; + memset(&iov, 0, sizeof(iov)); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + + ASSERT_THAT(RetryEINTR(recvmsg)(s_, &msg, 0), SyscallSucceedsWithValue(0)); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET); + ASSERT_EQ(cmsg->cmsg_type, SO_TIMESTAMP); + ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(struct timeval))); + + struct timeval tv = {}; + memcpy(&tv, CMSG_DATA(cmsg), sizeof(struct timeval)); + + ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0); + + // There should be nothing to get via ioctl. + ASSERT_THAT(ioctl(s_, SIOCGSTAMP, &tv), SyscallFailsWithErrno(ENOENT)); +} + +TEST_P(UdpSocketTest, WriteShutdownNotConnected) { + EXPECT_THAT(shutdown(s_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN)); +} + +TEST_P(UdpSocketTest, TimestampIoctl) { + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); + + char buf[3]; + // Send packet from t_ to s_. + ASSERT_THAT(RetryEINTR(write)(t_, buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + // There should be no control messages. + char recv_buf[sizeof(buf)]; + ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(s_, recv_buf, sizeof(recv_buf))); + + // A nonzero timeval should be available via ioctl. + struct timeval tv = {}; + ASSERT_THAT(ioctl(s_, SIOCGSTAMP, &tv), SyscallSucceeds()); + ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0); +} + +TEST_P(UdpSocketTest, TimetstampIoctlNothingRead) { + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); + + struct timeval tv = {}; + ASSERT_THAT(ioctl(s_, SIOCGSTAMP, &tv), SyscallFailsWithErrno(ENOENT)); +} + +// Test that the timestamp accessed via SIOCGSTAMP is still accessible after +// SO_TIMESTAMP is enabled and used to retrieve a timestamp. +TEST_P(UdpSocketTest, TimestampIoctlPersistence) { + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); + + char buf[3]; + // Send packet from t_ to s_. + ASSERT_THAT(RetryEINTR(write)(t_, buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + ASSERT_THAT(RetryEINTR(write)(t_, buf, 0), SyscallSucceedsWithValue(0)); + + // There should be no control messages. + char recv_buf[sizeof(buf)]; + ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(s_, recv_buf, sizeof(recv_buf))); + + // A nonzero timeval should be available via ioctl. + struct timeval tv = {}; + ASSERT_THAT(ioctl(s_, SIOCGSTAMP, &tv), SyscallSucceeds()); + ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0); + + // Enable SO_TIMESTAMP and send a message. + int v = 1; + EXPECT_THAT(setsockopt(s_, SOL_SOCKET, SO_TIMESTAMP, &v, sizeof(v)), + SyscallSucceeds()); + ASSERT_THAT(RetryEINTR(write)(t_, buf, 0), SyscallSucceedsWithValue(0)); + + // There should be a message for SO_TIMESTAMP. + char cmsgbuf[CMSG_SPACE(sizeof(struct timeval))]; + msghdr msg = {}; + iovec iov = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + ASSERT_THAT(RetryEINTR(recvmsg)(s_, &msg, 0), SyscallSucceedsWithValue(0)); + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(cmsg, nullptr); + + // The ioctl should return the exact same values as before. + struct timeval tv2 = {}; + ASSERT_THAT(ioctl(s_, SIOCGSTAMP, &tv2), SyscallSucceeds()); + ASSERT_EQ(tv.tv_sec, tv2.tv_sec); + ASSERT_EQ(tv.tv_usec, tv2.tv_usec); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/udp_socket_test_cases.h b/test/syscalls/linux/udp_socket_test_cases.h new file mode 100644 index 000000000..2fd79d99e --- /dev/null +++ b/test/syscalls/linux/udp_socket_test_cases.h @@ -0,0 +1,74 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_GOLANG_GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_H_ +#define THIRD_PARTY_GOLANG_GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_H_ + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" + +namespace gvisor { +namespace testing { + +// The initial port to be be used on gvisor. +constexpr int TestPort = 40000; + +// Fixture for tests parameterized by the address family to use (AF_INET and +// AF_INET6) when creating sockets. +class UdpSocketTest + : public ::testing::TestWithParam { + protected: + // Creates two sockets that will be used by test cases. + void SetUp() override; + + // Closes the sockets created by SetUp(). + void TearDown() override { + EXPECT_THAT(close(s_), SyscallSucceeds()); + EXPECT_THAT(close(t_), SyscallSucceeds()); + + for (size_t i = 0; i < ABSL_ARRAYSIZE(ports_); ++i) { + ASSERT_NO_ERRNO(FreeAvailablePort(ports_[i])); + } + } + + // First UDP socket. + int s_; + + // Second UDP socket. + int t_; + + // The length of the socket address. + socklen_t addrlen_; + + // Initialized address pointing to loopback and port TestPort+i. + struct sockaddr* addr_[3]; + + // Initialize "any" address. + struct sockaddr* anyaddr_; + + // Used ports. + int ports_[3]; + + private: + // Storage for the loopback addresses. + struct sockaddr_storage addr_storage_[3]; + + // Storage for the "any" address. + struct sockaddr_storage anyaddr_storage_; +}; + +} // namespace testing +} // namespace gvisor + +#endif // THIRD_PARTY_GOLANG_GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_H_ -- cgit v1.2.3 From 1518f7fd38cc2367ee966443a5895a3f25621d83 Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Mon, 2 Dec 2019 08:32:27 -0800 Subject: Fix typo, s/Convertable/Convertible/g PiperOrigin-RevId: 283345791 --- test/syscalls/linux/socket_ipv4_udp_unbound.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound.cc b/test/syscalls/linux/socket_ipv4_udp_unbound.cc index 6b1af6c17..aa6fb4e3f 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound.cc @@ -1814,7 +1814,7 @@ TEST_P(IPv4UDPUnboundSocketTest, BindReusePortThenReuseAddr) { SyscallFailsWithErrno(EADDRINUSE)); } -TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConvertableToReusePort) { +TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConvertibleToReusePort) { auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); @@ -1855,7 +1855,7 @@ TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConvertableToReusePort) { SyscallFailsWithErrno(EADDRINUSE)); } -TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConvertableToReuseAddr) { +TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConvertibleToReuseAddr) { // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. SKIP_IF(IsRunningOnGvisor()); -- cgit v1.2.3 From b41277049c6c6c15581d8698fd9418ef9c2cec8a Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 2 Dec 2019 15:35:51 -0800 Subject: test/syscal: Don't skip ClockGettime.CputimeId We skipped it due to the issue in the golang scheduler which has been fixed in go1.13. PiperOrigin-RevId: 283432226 --- test/syscalls/linux/clock_gettime.cc | 5 ----- 1 file changed, 5 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/clock_gettime.cc b/test/syscalls/linux/clock_gettime.cc index 2aa91691e..7f6015049 100644 --- a/test/syscalls/linux/clock_gettime.cc +++ b/test/syscalls/linux/clock_gettime.cc @@ -56,11 +56,6 @@ void spin_ns(int64_t ns) { // Test that CLOCK_PROCESS_CPUTIME_ID is a superset of CLOCK_THREAD_CPUTIME_ID. TEST(ClockGettime, CputimeId) { - // TODO(b/128871825,golang.org/issue/10958): Test times out when there is a - // small number of core because one goroutine starves the others. - printf("CPUS: %d\n", std::thread::hardware_concurrency()); - SKIP_IF(std::thread::hardware_concurrency() <= 2); - constexpr int kNumThreads = 13; // arbitrary absl::Duration spin_time = absl::Seconds(1); -- cgit v1.2.3 From d7cc2480cb6e465ce01eb245e7edbad2c68c44d8 Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Tue, 3 Dec 2019 12:45:43 -0800 Subject: Add RunfilesPath to test_util A few tests have their own ad-hoc implementations. Add a single common one. PiperOrigin-RevId: 283601666 --- test/syscalls/linux/exec.cc | 145 +++++++++++++++++-------------------- test/syscalls/linux/sigaltstack.cc | 8 +- test/util/BUILD | 2 + test/util/test_util.h | 6 ++ test/util/test_util_runfiles.cc | 46 ++++++++++++ 5 files changed, 123 insertions(+), 84 deletions(-) create mode 100644 test/util/test_util_runfiles.cc (limited to 'test') diff --git a/test/syscalls/linux/exec.cc b/test/syscalls/linux/exec.cc index 581f03533..b5e0a512b 100644 --- a/test/syscalls/linux/exec.cc +++ b/test/syscalls/linux/exec.cc @@ -47,23 +47,14 @@ namespace testing { namespace { -constexpr char kBasicWorkload[] = "exec_basic_workload"; -constexpr char kExitScript[] = "exit_script"; -constexpr char kStateWorkload[] = "exec_state_workload"; -constexpr char kProcExeWorkload[] = "exec_proc_exe_workload"; -constexpr char kAssertClosedWorkload[] = "exec_assert_closed_workload"; -constexpr char kPriorityWorkload[] = "priority_execve"; - -std::string WorkloadPath(absl::string_view binary) { - std::string full_path; - char* test_src = getenv("TEST_SRCDIR"); - if (test_src) { - full_path = JoinPath(test_src, "__main__/test/syscalls/linux", binary); - } - - TEST_CHECK(full_path.empty() == false); - return full_path; -} +constexpr char kBasicWorkload[] = "test/syscalls/linux/exec_basic_workload"; +constexpr char kExitScript[] = "test/syscalls/linux/exit_script"; +constexpr char kStateWorkload[] = "test/syscalls/linux/exec_state_workload"; +constexpr char kProcExeWorkload[] = + "test/syscalls/linux/exec_proc_exe_workload"; +constexpr char kAssertClosedWorkload[] = + "test/syscalls/linux/exec_assert_closed_workload"; +constexpr char kPriorityWorkload[] = "test/syscalls/linux/priority_execve"; constexpr char kExit42[] = "--exec_exit_42"; constexpr char kExecWithThread[] = "--exec_exec_with_thread"; @@ -171,44 +162,44 @@ TEST(ExecTest, EmptyPath) { } TEST(ExecTest, Basic) { - CheckExec(WorkloadPath(kBasicWorkload), {WorkloadPath(kBasicWorkload)}, {}, + CheckExec(RunfilePath(kBasicWorkload), {RunfilePath(kBasicWorkload)}, {}, ArgEnvExitStatus(0, 0), - absl::StrCat(WorkloadPath(kBasicWorkload), "\n")); + absl::StrCat(RunfilePath(kBasicWorkload), "\n")); } TEST(ExecTest, OneArg) { - CheckExec(WorkloadPath(kBasicWorkload), {WorkloadPath(kBasicWorkload), "1"}, - {}, ArgEnvExitStatus(1, 0), - absl::StrCat(WorkloadPath(kBasicWorkload), "\n1\n")); + CheckExec(RunfilePath(kBasicWorkload), {RunfilePath(kBasicWorkload), "1"}, {}, + ArgEnvExitStatus(1, 0), + absl::StrCat(RunfilePath(kBasicWorkload), "\n1\n")); } TEST(ExecTest, FiveArg) { - CheckExec(WorkloadPath(kBasicWorkload), - {WorkloadPath(kBasicWorkload), "1", "2", "3", "4", "5"}, {}, + CheckExec(RunfilePath(kBasicWorkload), + {RunfilePath(kBasicWorkload), "1", "2", "3", "4", "5"}, {}, ArgEnvExitStatus(5, 0), - absl::StrCat(WorkloadPath(kBasicWorkload), "\n1\n2\n3\n4\n5\n")); + absl::StrCat(RunfilePath(kBasicWorkload), "\n1\n2\n3\n4\n5\n")); } TEST(ExecTest, OneEnv) { - CheckExec(WorkloadPath(kBasicWorkload), {WorkloadPath(kBasicWorkload)}, {"1"}, + CheckExec(RunfilePath(kBasicWorkload), {RunfilePath(kBasicWorkload)}, {"1"}, ArgEnvExitStatus(0, 1), - absl::StrCat(WorkloadPath(kBasicWorkload), "\n1\n")); + absl::StrCat(RunfilePath(kBasicWorkload), "\n1\n")); } TEST(ExecTest, FiveEnv) { - CheckExec(WorkloadPath(kBasicWorkload), {WorkloadPath(kBasicWorkload)}, + CheckExec(RunfilePath(kBasicWorkload), {RunfilePath(kBasicWorkload)}, {"1", "2", "3", "4", "5"}, ArgEnvExitStatus(0, 5), - absl::StrCat(WorkloadPath(kBasicWorkload), "\n1\n2\n3\n4\n5\n")); + absl::StrCat(RunfilePath(kBasicWorkload), "\n1\n2\n3\n4\n5\n")); } TEST(ExecTest, OneArgOneEnv) { - CheckExec(WorkloadPath(kBasicWorkload), {WorkloadPath(kBasicWorkload), "arg"}, + CheckExec(RunfilePath(kBasicWorkload), {RunfilePath(kBasicWorkload), "arg"}, {"env"}, ArgEnvExitStatus(1, 1), - absl::StrCat(WorkloadPath(kBasicWorkload), "\narg\nenv\n")); + absl::StrCat(RunfilePath(kBasicWorkload), "\narg\nenv\n")); } TEST(ExecTest, InterpreterScript) { - CheckExec(WorkloadPath(kExitScript), {WorkloadPath(kExitScript), "25"}, {}, + CheckExec(RunfilePath(kExitScript), {RunfilePath(kExitScript), "25"}, {}, ArgEnvExitStatus(25, 0), ""); } @@ -216,7 +207,7 @@ TEST(ExecTest, InterpreterScript) { TEST(ExecTest, InterpreterScriptArgSplit) { // Symlink through /tmp to ensure the path is short enough. TempPath link = ASSERT_NO_ERRNO_AND_VALUE( - TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kBasicWorkload))); + TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload))); TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path(), " foo bar"), @@ -230,7 +221,7 @@ TEST(ExecTest, InterpreterScriptArgSplit) { TEST(ExecTest, InterpreterScriptArgvZero) { // Symlink through /tmp to ensure the path is short enough. TempPath link = ASSERT_NO_ERRNO_AND_VALUE( - TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kBasicWorkload))); + TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload))); TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path()), 0755)); @@ -244,7 +235,7 @@ TEST(ExecTest, InterpreterScriptArgvZero) { TEST(ExecTest, InterpreterScriptArgvZeroRelative) { // Symlink through /tmp to ensure the path is short enough. TempPath link = ASSERT_NO_ERRNO_AND_VALUE( - TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kBasicWorkload))); + TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload))); TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path()), 0755)); @@ -261,7 +252,7 @@ TEST(ExecTest, InterpreterScriptArgvZeroRelative) { TEST(ExecTest, InterpreterScriptArgvZeroAdded) { // Symlink through /tmp to ensure the path is short enough. TempPath link = ASSERT_NO_ERRNO_AND_VALUE( - TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kBasicWorkload))); + TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload))); TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path()), 0755)); @@ -274,7 +265,7 @@ TEST(ExecTest, InterpreterScriptArgvZeroAdded) { TEST(ExecTest, InterpreterScriptArgNUL) { // Symlink through /tmp to ensure the path is short enough. TempPath link = ASSERT_NO_ERRNO_AND_VALUE( - TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kBasicWorkload))); + TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload))); TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( GetAbsoluteTestTmpdir(), @@ -289,7 +280,7 @@ TEST(ExecTest, InterpreterScriptArgNUL) { TEST(ExecTest, InterpreterScriptTrailingWhitespace) { // Symlink through /tmp to ensure the path is short enough. TempPath link = ASSERT_NO_ERRNO_AND_VALUE( - TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kBasicWorkload))); + TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload))); TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path(), " "), 0755)); @@ -302,7 +293,7 @@ TEST(ExecTest, InterpreterScriptTrailingWhitespace) { TEST(ExecTest, InterpreterScriptArgWhitespace) { // Symlink through /tmp to ensure the path is short enough. TempPath link = ASSERT_NO_ERRNO_AND_VALUE( - TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kBasicWorkload))); + TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload))); TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path(), " foo"), 0755)); @@ -325,7 +316,7 @@ TEST(ExecTest, InterpreterScriptNoPath) { TEST(ExecTest, ExecFn) { // Symlink through /tmp to ensure the path is short enough. TempPath link = ASSERT_NO_ERRNO_AND_VALUE( - TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kStateWorkload))); + TempPath::CreateSymlinkTo("/tmp", RunfilePath(kStateWorkload))); TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( GetAbsoluteTestTmpdir(), absl::StrCat("#!", link.path(), " PrintExecFn"), @@ -342,7 +333,7 @@ TEST(ExecTest, ExecFn) { } TEST(ExecTest, ExecName) { - std::string path = WorkloadPath(kStateWorkload); + std::string path = RunfilePath(kStateWorkload); CheckExec(path, {path, "PrintExecName"}, {}, ArgEnvExitStatus(0, 0), absl::StrCat(Basename(path).substr(0, 15), "\n")); @@ -351,7 +342,7 @@ TEST(ExecTest, ExecName) { TEST(ExecTest, ExecNameScript) { // Symlink through /tmp to ensure the path is short enough. TempPath link = ASSERT_NO_ERRNO_AND_VALUE( - TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kStateWorkload))); + TempPath::CreateSymlinkTo("/tmp", RunfilePath(kStateWorkload))); TempPath script = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( GetAbsoluteTestTmpdir(), @@ -405,13 +396,13 @@ TEST(ExecStateTest, HandlerReset) { ASSERT_THAT(sigaction(SIGUSR1, &sa, nullptr), SyscallSucceeds()); ExecveArray args = { - WorkloadPath(kStateWorkload), + RunfilePath(kStateWorkload), "CheckSigHandler", absl::StrCat(SIGUSR1), absl::StrCat(absl::Hex(reinterpret_cast(SIG_DFL))), }; - CheckExec(WorkloadPath(kStateWorkload), args, {}, W_EXITCODE(0, 0), ""); + CheckExec(RunfilePath(kStateWorkload), args, {}, W_EXITCODE(0, 0), ""); } // Ignored signal dispositions are not reset. @@ -421,13 +412,13 @@ TEST(ExecStateTest, IgnorePreserved) { ASSERT_THAT(sigaction(SIGUSR1, &sa, nullptr), SyscallSucceeds()); ExecveArray args = { - WorkloadPath(kStateWorkload), + RunfilePath(kStateWorkload), "CheckSigHandler", absl::StrCat(SIGUSR1), absl::StrCat(absl::Hex(reinterpret_cast(SIG_IGN))), }; - CheckExec(WorkloadPath(kStateWorkload), args, {}, W_EXITCODE(0, 0), ""); + CheckExec(RunfilePath(kStateWorkload), args, {}, W_EXITCODE(0, 0), ""); } // Signal masks are not reset on exec @@ -438,12 +429,12 @@ TEST(ExecStateTest, SignalMask) { ASSERT_THAT(sigprocmask(SIG_BLOCK, &s, nullptr), SyscallSucceeds()); ExecveArray args = { - WorkloadPath(kStateWorkload), + RunfilePath(kStateWorkload), "CheckSigBlocked", absl::StrCat(SIGUSR1), }; - CheckExec(WorkloadPath(kStateWorkload), args, {}, W_EXITCODE(0, 0), ""); + CheckExec(RunfilePath(kStateWorkload), args, {}, W_EXITCODE(0, 0), ""); } // itimers persist across execve. @@ -471,7 +462,7 @@ TEST(ExecStateTest, ItimerPreserved) { } }; - std::string filename = WorkloadPath(kStateWorkload); + std::string filename = RunfilePath(kStateWorkload); ExecveArray argv = { filename, "CheckItimerEnabled", @@ -495,8 +486,8 @@ TEST(ExecStateTest, ItimerPreserved) { TEST(ProcSelfExe, ChangesAcrossExecve) { // See exec_proc_exe_workload for more details. We simply // assert that the /proc/self/exe link changes across execve. - CheckExec(WorkloadPath(kProcExeWorkload), - {WorkloadPath(kProcExeWorkload), + CheckExec(RunfilePath(kProcExeWorkload), + {RunfilePath(kProcExeWorkload), ASSERT_NO_ERRNO_AND_VALUE(ProcessExePath(getpid()))}, {}, W_EXITCODE(0, 0), ""); } @@ -507,8 +498,8 @@ TEST(ExecTest, CloexecNormalFile) { const FileDescriptor fd_closed_on_exec = ASSERT_NO_ERRNO_AND_VALUE(Open(tempFile.path(), O_RDONLY | O_CLOEXEC)); - CheckExec(WorkloadPath(kAssertClosedWorkload), - {WorkloadPath(kAssertClosedWorkload), + CheckExec(RunfilePath(kAssertClosedWorkload), + {RunfilePath(kAssertClosedWorkload), absl::StrCat(fd_closed_on_exec.get())}, {}, W_EXITCODE(0, 0), ""); @@ -517,10 +508,10 @@ TEST(ExecTest, CloexecNormalFile) { const FileDescriptor fd_open_on_exec = ASSERT_NO_ERRNO_AND_VALUE(Open(tempFile.path(), O_RDONLY)); - CheckExec(WorkloadPath(kAssertClosedWorkload), - {WorkloadPath(kAssertClosedWorkload), - absl::StrCat(fd_open_on_exec.get())}, - {}, W_EXITCODE(2, 0), ""); + CheckExec( + RunfilePath(kAssertClosedWorkload), + {RunfilePath(kAssertClosedWorkload), absl::StrCat(fd_open_on_exec.get())}, + {}, W_EXITCODE(2, 0), ""); } TEST(ExecTest, CloexecEventfd) { @@ -528,15 +519,15 @@ TEST(ExecTest, CloexecEventfd) { ASSERT_THAT(efd = eventfd(0, EFD_CLOEXEC), SyscallSucceeds()); FileDescriptor fd(efd); - CheckExec(WorkloadPath(kAssertClosedWorkload), - {WorkloadPath(kAssertClosedWorkload), absl::StrCat(fd.get())}, {}, + CheckExec(RunfilePath(kAssertClosedWorkload), + {RunfilePath(kAssertClosedWorkload), absl::StrCat(fd.get())}, {}, W_EXITCODE(0, 0), ""); } constexpr int kLinuxMaxSymlinks = 40; TEST(ExecTest, SymlinkLimitExceeded) { - std::string path = WorkloadPath(kBasicWorkload); + std::string path = RunfilePath(kBasicWorkload); // Hold onto TempPath objects so they are not destructed prematurely. std::vector symlinks; @@ -575,13 +566,13 @@ TEST(ExecTest, SymlinkLimitRefreshedForInterpreter) { } TEST(ExecveatTest, BasicWithFDCWD) { - std::string path = WorkloadPath(kBasicWorkload); + std::string path = RunfilePath(kBasicWorkload); CheckExecveat(AT_FDCWD, path, {path}, {}, /*flags=*/0, ArgEnvExitStatus(0, 0), absl::StrCat(path, "\n")); } TEST(ExecveatTest, Basic) { - std::string absolute_path = WorkloadPath(kBasicWorkload); + std::string absolute_path = RunfilePath(kBasicWorkload); std::string parent_dir = std::string(Dirname(absolute_path)); std::string base = std::string(Basename(absolute_path)); const FileDescriptor dirfd = @@ -592,7 +583,7 @@ TEST(ExecveatTest, Basic) { } TEST(ExecveatTest, FDNotADirectory) { - std::string absolute_path = WorkloadPath(kBasicWorkload); + std::string absolute_path = RunfilePath(kBasicWorkload); std::string base = std::string(Basename(absolute_path)); const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(absolute_path, 0)); @@ -604,13 +595,13 @@ TEST(ExecveatTest, FDNotADirectory) { } TEST(ExecveatTest, AbsolutePathWithFDCWD) { - std::string path = WorkloadPath(kBasicWorkload); + std::string path = RunfilePath(kBasicWorkload); CheckExecveat(AT_FDCWD, path, {path}, {}, ArgEnvExitStatus(0, 0), 0, absl::StrCat(path, "\n")); } TEST(ExecveatTest, AbsolutePath) { - std::string path = WorkloadPath(kBasicWorkload); + std::string path = RunfilePath(kBasicWorkload); // File descriptor should be ignored when an absolute path is given. const int32_t badFD = -1; CheckExecveat(badFD, path, {path}, {}, ArgEnvExitStatus(0, 0), 0, @@ -618,7 +609,7 @@ TEST(ExecveatTest, AbsolutePath) { } TEST(ExecveatTest, EmptyPathBasic) { - std::string path = WorkloadPath(kBasicWorkload); + std::string path = RunfilePath(kBasicWorkload); const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_PATH)); CheckExecveat(fd.get(), "", {path}, {}, AT_EMPTY_PATH, ArgEnvExitStatus(0, 0), @@ -626,7 +617,7 @@ TEST(ExecveatTest, EmptyPathBasic) { } TEST(ExecveatTest, EmptyPathWithDirFD) { - std::string path = WorkloadPath(kBasicWorkload); + std::string path = RunfilePath(kBasicWorkload); std::string parent_dir = std::string(Dirname(path)); const FileDescriptor dirfd = ASSERT_NO_ERRNO_AND_VALUE(Open(parent_dir, O_DIRECTORY)); @@ -639,7 +630,7 @@ TEST(ExecveatTest, EmptyPathWithDirFD) { } TEST(ExecveatTest, EmptyPathWithoutEmptyPathFlag) { - std::string path = WorkloadPath(kBasicWorkload); + std::string path = RunfilePath(kBasicWorkload); const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_PATH)); int execve_errno; @@ -649,7 +640,7 @@ TEST(ExecveatTest, EmptyPathWithoutEmptyPathFlag) { } TEST(ExecveatTest, AbsolutePathWithEmptyPathFlag) { - std::string path = WorkloadPath(kBasicWorkload); + std::string path = RunfilePath(kBasicWorkload); const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_PATH)); CheckExecveat(fd.get(), path, {path}, {}, AT_EMPTY_PATH, @@ -657,7 +648,7 @@ TEST(ExecveatTest, AbsolutePathWithEmptyPathFlag) { } TEST(ExecveatTest, RelativePathWithEmptyPathFlag) { - std::string absolute_path = WorkloadPath(kBasicWorkload); + std::string absolute_path = RunfilePath(kBasicWorkload); std::string parent_dir = std::string(Dirname(absolute_path)); std::string base = std::string(Basename(absolute_path)); const FileDescriptor dirfd = @@ -670,7 +661,7 @@ TEST(ExecveatTest, RelativePathWithEmptyPathFlag) { TEST(ExecveatTest, SymlinkNoFollowWithRelativePath) { std::string parent_dir = "/tmp"; TempPath link = ASSERT_NO_ERRNO_AND_VALUE( - TempPath::CreateSymlinkTo(parent_dir, WorkloadPath(kBasicWorkload))); + TempPath::CreateSymlinkTo(parent_dir, RunfilePath(kBasicWorkload))); const FileDescriptor dirfd = ASSERT_NO_ERRNO_AND_VALUE(Open(parent_dir, O_DIRECTORY)); std::string base = std::string(Basename(link.path())); @@ -685,7 +676,7 @@ TEST(ExecveatTest, SymlinkNoFollowWithRelativePath) { TEST(ExecveatTest, SymlinkNoFollowWithAbsolutePath) { std::string parent_dir = "/tmp"; TempPath link = ASSERT_NO_ERRNO_AND_VALUE( - TempPath::CreateSymlinkTo(parent_dir, WorkloadPath(kBasicWorkload))); + TempPath::CreateSymlinkTo(parent_dir, RunfilePath(kBasicWorkload))); std::string path = link.path(); int execve_errno; @@ -697,7 +688,7 @@ TEST(ExecveatTest, SymlinkNoFollowWithAbsolutePath) { TEST(ExecveatTest, SymlinkNoFollowAndEmptyPath) { TempPath link = ASSERT_NO_ERRNO_AND_VALUE( - TempPath::CreateSymlinkTo("/tmp", WorkloadPath(kBasicWorkload))); + TempPath::CreateSymlinkTo("/tmp", RunfilePath(kBasicWorkload))); std::string path = link.path(); const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, 0)); @@ -723,7 +714,7 @@ TEST(ExecveatTest, SymlinkNoFollowWithNormalFile) { } TEST(ExecveatTest, BasicWithCloexecFD) { - std::string path = WorkloadPath(kBasicWorkload); + std::string path = RunfilePath(kBasicWorkload); const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_CLOEXEC)); CheckExecveat(fd.get(), "", {path}, {}, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH, @@ -731,7 +722,7 @@ TEST(ExecveatTest, BasicWithCloexecFD) { } TEST(ExecveatTest, InterpreterScriptWithCloexecFD) { - std::string path = WorkloadPath(kExitScript); + std::string path = RunfilePath(kExitScript); const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_CLOEXEC)); int execve_errno; @@ -742,7 +733,7 @@ TEST(ExecveatTest, InterpreterScriptWithCloexecFD) { } TEST(ExecveatTest, InterpreterScriptWithCloexecDirFD) { - std::string absolute_path = WorkloadPath(kExitScript); + std::string absolute_path = RunfilePath(kExitScript); std::string parent_dir = std::string(Dirname(absolute_path)); std::string base = std::string(Basename(absolute_path)); const FileDescriptor dirfd = @@ -775,7 +766,7 @@ TEST(GetpriorityTest, ExecveMaintainsPriority) { // Program run (priority_execve) will exit(X) where // X=getpriority(PRIO_PROCESS,0). Check that this exit value is prio. - CheckExec(WorkloadPath(kPriorityWorkload), {WorkloadPath(kPriorityWorkload)}, + CheckExec(RunfilePath(kPriorityWorkload), {RunfilePath(kPriorityWorkload)}, {}, W_EXITCODE(expected_exit_code, 0), ""); } diff --git a/test/syscalls/linux/sigaltstack.cc b/test/syscalls/linux/sigaltstack.cc index 6fd3989a4..a778fa639 100644 --- a/test/syscalls/linux/sigaltstack.cc +++ b/test/syscalls/linux/sigaltstack.cc @@ -95,13 +95,7 @@ TEST(SigaltstackTest, ResetByExecve) { auto const cleanup_sigstack = ASSERT_NO_ERRNO_AND_VALUE(ScopedSigaltstack(stack)); - std::string full_path; - char* test_src = getenv("TEST_SRCDIR"); - if (test_src) { - full_path = JoinPath(test_src, "../../linux/sigaltstack_check"); - } - - ASSERT_FALSE(full_path.empty()); + std::string full_path = RunfilePath("test/syscalls/linux/sigaltstack_check"); pid_t child_pid = -1; int execve_errno = 0; diff --git a/test/util/BUILD b/test/util/BUILD index 4526bb3f1..cbc728159 100644 --- a/test/util/BUILD +++ b/test/util/BUILD @@ -237,6 +237,7 @@ cc_library( ] + select_for_linux( [ "test_util_impl.cc", + "test_util_runfiles.cc", ], ), hdrs = ["test_util.h"], @@ -245,6 +246,7 @@ cc_library( ":logging", ":posix_error", ":save_util", + "@bazel_tools//tools/cpp/runfiles", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/flags:flag", "@com_google_absl//absl/flags:parse", diff --git a/test/util/test_util.h b/test/util/test_util.h index dc30575b8..ee6c2bf4d 100644 --- a/test/util/test_util.h +++ b/test/util/test_util.h @@ -764,6 +764,12 @@ MATCHER_P2(EquivalentWithin, target, tolerance, return Equivalent(arg, target, tolerance); } +// Returns the absolute path to the a data dependency. 'path' is the runfile +// location relative to workspace root. +#ifdef __linux__ +std::string RunfilePath(std::string path); +#endif + void TestInit(int* argc, char*** argv); } // namespace testing diff --git a/test/util/test_util_runfiles.cc b/test/util/test_util_runfiles.cc new file mode 100644 index 000000000..7210094eb --- /dev/null +++ b/test/util/test_util_runfiles.cc @@ -0,0 +1,46 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "test/util/fs_util.h" +#include "test/util/test_util.h" +#include "tools/cpp/runfiles/runfiles.h" + +namespace gvisor { +namespace testing { + +std::string RunfilePath(std::string path) { + static const bazel::tools::cpp::runfiles::Runfiles* const runfiles = [] { + std::string error; + auto* runfiles = + bazel::tools::cpp::runfiles::Runfiles::CreateForTest(&error); + if (runfiles == nullptr) { + std::cerr << "Unable to find runfiles: " << error << std::endl; + } + return runfiles; + }(); + + if (!runfiles) { + // Can't find runfiles? This probably won't work, but __main__/path is our + // best guess. + return JoinPath("__main__", path); + } + + return runfiles->Rlocation(JoinPath("__main__", path)); +} + +} // namespace testing +} // namespace gvisor -- cgit v1.2.3 From 43643752f05a0b25259b116558ccd870a539cc05 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 3 Dec 2019 13:46:09 -0800 Subject: strace: don't create a slice with a negative value PiperOrigin-RevId: 283613824 --- pkg/sentry/strace/socket.go | 9 +++++++++ test/syscalls/linux/socket_unix_cmsg.cc | 29 +++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'test') diff --git a/pkg/sentry/strace/socket.go b/pkg/sentry/strace/socket.go index 94334f6d2..51f2efb39 100644 --- a/pkg/sentry/strace/socket.go +++ b/pkg/sentry/strace/socket.go @@ -208,6 +208,15 @@ func cmsghdr(t *kernel.Task, addr usermem.Addr, length uint64, maxBytes uint64) i += linux.SizeOfControlMessageHeader width := t.Arch().Width() length := int(h.Length) - linux.SizeOfControlMessageHeader + if length < 0 { + strs = append(strs, fmt.Sprintf( + "{level=%s, type=%s, length=%d, content too short}", + level, + typ, + h.Length, + )) + break + } if skipData { strs = append(strs, fmt.Sprintf("{level=%s, type=%s, length=%d}", level, typ, h.Length)) diff --git a/test/syscalls/linux/socket_unix_cmsg.cc b/test/syscalls/linux/socket_unix_cmsg.cc index 1159c5229..a16899493 100644 --- a/test/syscalls/linux/socket_unix_cmsg.cc +++ b/test/syscalls/linux/socket_unix_cmsg.cc @@ -149,6 +149,35 @@ TEST_P(UnixSocketPairCmsgTest, BadFDPass) { SyscallFailsWithErrno(EBADF)); } +TEST_P(UnixSocketPairCmsgTest, ShortCmsg) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[20]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + + int sent_fd = -1; + + struct msghdr msg = {}; + char control[CMSG_SPACE(sizeof(sent_fd))]; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_len = 1; + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), &sent_fd, sizeof(sent_fd)); + + struct iovec iov; + iov.iov_base = sent_data; + iov.iov_len = sizeof(sent_data); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(sendmsg)(sockets->first_fd(), &msg, 0), + SyscallFailsWithErrno(EINVAL)); +} + // BasicFDPassNoSpace starts off by sending a single FD just like BasicFDPass. // The difference is that when calling recvmsg, no space for FDs is provided, // only space for the cmsg header. -- cgit v1.2.3 From 27e2c4ddca553cf6867bd49f2847ef007ac560c0 Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Tue, 3 Dec 2019 14:40:22 -0800 Subject: Fix panic due to early transition to Closed. The code in rcv.consumeSegment incorrectly transitions to CLOSED state from LAST-ACK before the final ACK for the FIN. Further if receiving a segment changes a socket to a closed state then we should not invoke the sender as the socket is now closed and sending any segments is incorrect. PiperOrigin-RevId: 283625300 --- pkg/tcpip/transport/tcp/connect.go | 32 ++--- pkg/tcpip/transport/tcp/rcv.go | 2 +- pkg/tcpip/transport/tcp/tcp_test.go | 179 +++++++++++++++++++++++++++ test/syscalls/linux/BUILD | 1 + test/syscalls/linux/socket_ip_tcp_generic.cc | 23 ++++ 5 files changed, 222 insertions(+), 15 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index 4206db8b6..16f8aea12 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -953,20 +953,6 @@ func (e *endpoint) handleReset(s *segment) (ok bool, err *tcpip.Error) { func (e *endpoint) handleSegments() *tcpip.Error { checkRequeue := true for i := 0; i < maxSegmentsPerWake; i++ { - e.mu.RLock() - state := e.state - e.mu.RUnlock() - if state == StateClose { - // When we get into StateClose while processing from the queue, - // return immediately and let the protocolMainloop handle it. - // - // We can reach StateClose only while processing a previous segment - // or a notification from the protocolMainLoop (caller goroutine). - // This means that with this return, the segment dequeue below can - // never occur on a closed endpoint. - return nil - } - s := e.segmentQueue.dequeue() if s == nil { checkRequeue = false @@ -1024,6 +1010,24 @@ func (e *endpoint) handleSegments() *tcpip.Error { s.decRef() continue } + + // Now check if the received segment has caused us to transition + // to a CLOSED state, if yes then terminate processing and do + // not invoke the sender. + e.mu.RLock() + state := e.state + e.mu.RUnlock() + if state == StateClose { + // When we get into StateClose while processing from the queue, + // return immediately and let the protocolMainloop handle it. + // + // We can reach StateClose only while processing a previous segment + // or a notification from the protocolMainLoop (caller goroutine). + // This means that with this return, the segment dequeue below can + // never occur on a closed endpoint. + s.decRef() + return nil + } e.snd.handleRcvdSegment(s) } s.decRef() diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go index 857dc445f..5ee499c36 100644 --- a/pkg/tcpip/transport/tcp/rcv.go +++ b/pkg/tcpip/transport/tcp/rcv.go @@ -205,7 +205,7 @@ func (r *receiver) consumeSegment(s *segment, segSeq seqnum.Value, segLen seqnum // Handle ACK (not FIN-ACK, which we handled above) during one of the // shutdown states. - if s.flagIsSet(header.TCPFlagAck) { + if s.flagIsSet(header.TCPFlagAck) && s.ackNumber == r.ep.snd.sndNxt { r.ep.mu.Lock() switch r.ep.state { case StateFinWait1: diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 50829ae27..d1f0d6ce7 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -5632,6 +5632,7 @@ func TestTCPTimeWaitRSTIgnored(t *testing.T) { DstPort: context.StackPort, Flags: header.TCPFlagSyn, SeqNum: iss, + RcvWnd: 30000, }) // Receive the SYN-ACK reply. @@ -5750,6 +5751,7 @@ func TestTCPTimeWaitOutOfOrder(t *testing.T) { DstPort: context.StackPort, Flags: header.TCPFlagSyn, SeqNum: iss, + RcvWnd: 30000, }) // Receive the SYN-ACK reply. @@ -5856,6 +5858,7 @@ func TestTCPTimeWaitNewSyn(t *testing.T) { DstPort: context.StackPort, Flags: header.TCPFlagSyn, SeqNum: iss, + RcvWnd: 30000, }) // Receive the SYN-ACK reply. @@ -5929,6 +5932,7 @@ func TestTCPTimeWaitNewSyn(t *testing.T) { DstPort: context.StackPort, Flags: header.TCPFlagSyn, SeqNum: iss, + RcvWnd: 30000, }) c.CheckNoPacketTimeout("unexpected packet received in response to SYN", 1*time.Second) @@ -5941,6 +5945,7 @@ func TestTCPTimeWaitNewSyn(t *testing.T) { DstPort: context.StackPort, Flags: header.TCPFlagSyn, SeqNum: iss, + RcvWnd: 30000, }) // Receive the SYN-ACK reply. @@ -6007,6 +6012,7 @@ func TestTCPTimeWaitDuplicateFINExtendsTimeWait(t *testing.T) { DstPort: context.StackPort, Flags: header.TCPFlagSyn, SeqNum: iss, + RcvWnd: 30000, }) // Receive the SYN-ACK reply. @@ -6115,3 +6121,176 @@ func TestTCPTimeWaitDuplicateFINExtendsTimeWait(t *testing.T) { checker.AckNum(uint32(ackHeaders.SeqNum)), checker.TCPFlags(header.TCPFlagRst|header.TCPFlagAck))) } + +func TestTCPCloseWithData(t *testing.T) { + c := context.New(t, defaultMTU) + defer c.Cleanup() + + // Set TCPTimeWaitTimeout to 5 seconds so that sockets are marked closed + // after 5 seconds in TIME_WAIT state. + tcpTimeWaitTimeout := 5 * time.Second + if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPTimeWaitTimeoutOption(tcpTimeWaitTimeout)); err != nil { + t.Fatalf("c.stack.SetTransportProtocolOption(tcp, tcpip.TCPLingerTimeoutOption(%d) failed: %s", tcpTimeWaitTimeout, err) + } + + wq := &waiter.Queue{} + ep, err := c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, wq) + if err != nil { + t.Fatalf("NewEndpoint failed: %s", err) + } + if err := ep.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { + t.Fatalf("Bind failed: %s", err) + } + + if err := ep.Listen(10); err != nil { + t.Fatalf("Listen failed: %s", err) + } + + // Send a SYN request. + iss := seqnum.Value(789) + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagSyn, + SeqNum: iss, + RcvWnd: 30000, + }) + + // Receive the SYN-ACK reply. + b := c.GetPacket() + tcpHdr := header.TCP(header.IPv4(b).Payload()) + c.IRS = seqnum.Value(tcpHdr.SequenceNumber()) + + ackHeaders := &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagAck, + SeqNum: iss + 1, + AckNum: c.IRS + 1, + RcvWnd: 30000, + } + + // Send ACK. + c.SendPacket(nil, ackHeaders) + + // Try to accept the connection. + we, ch := waiter.NewChannelEntry(nil) + wq.EventRegister(&we, waiter.EventIn) + defer wq.EventUnregister(&we) + + c.EP, _, err = ep.Accept() + if err == tcpip.ErrWouldBlock { + // Wait for connection to be established. + select { + case <-ch: + c.EP, _, err = ep.Accept() + if err != nil { + t.Fatalf("Accept failed: %s", err) + } + + case <-time.After(1 * time.Second): + t.Fatalf("Timed out waiting for accept") + } + } + + // Now trigger a passive close by sending a FIN. + finHeaders := &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagAck | header.TCPFlagFin, + SeqNum: iss + 1, + AckNum: c.IRS + 2, + RcvWnd: 30000, + } + + c.SendPacket(nil, finHeaders) + + // Get the ACK to the FIN we just sent. + checker.IPv4(t, c.GetPacket(), checker.TCP( + checker.SrcPort(context.StackPort), + checker.DstPort(context.TestPort), + checker.SeqNum(uint32(c.IRS+1)), + checker.AckNum(uint32(iss)+2), + checker.TCPFlags(header.TCPFlagAck))) + + // Now write a few bytes and then close the endpoint. + data := []byte{1, 2, 3} + view := buffer.NewView(len(data)) + copy(view, data) + + if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil { + t.Fatalf("Write failed: %s", err) + } + + // Check that data is received. + b = c.GetPacket() + checker.IPv4(t, b, + checker.PayloadLen(len(data)+header.TCPMinimumSize), + checker.TCP( + checker.DstPort(context.TestPort), + checker.SeqNum(uint32(c.IRS)+1), + checker.AckNum(uint32(iss)+2), // Acknum is initial sequence number + 1 + checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)), + ), + ) + + if p := b[header.IPv4MinimumSize+header.TCPMinimumSize:]; !bytes.Equal(data, p) { + t.Errorf("got data = %x, want = %x", p, data) + } + + c.EP.Close() + // Check the FIN. + checker.IPv4(t, c.GetPacket(), checker.TCP( + checker.SrcPort(context.StackPort), + checker.DstPort(context.TestPort), + checker.SeqNum(uint32(c.IRS+1)+uint32(len(data))), + checker.AckNum(uint32(iss+2)), + checker.TCPFlags(header.TCPFlagFin|header.TCPFlagAck))) + + // First send a partial ACK. + ackHeaders = &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagAck, + SeqNum: iss + 2, + AckNum: c.IRS + 1 + seqnum.Value(len(data)-1), + RcvWnd: 30000, + } + c.SendPacket(nil, ackHeaders) + + // Now send a full ACK. + ackHeaders = &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagAck, + SeqNum: iss + 2, + AckNum: c.IRS + 1 + seqnum.Value(len(data)), + RcvWnd: 30000, + } + c.SendPacket(nil, ackHeaders) + + // Now ACK the FIN. + ackHeaders.AckNum++ + c.SendPacket(nil, ackHeaders) + + // Now send an ACK and we should get a RST back as the endpoint should + // be in CLOSED state. + ackHeaders = &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagAck, + SeqNum: iss + 2, + AckNum: c.IRS + 1 + seqnum.Value(len(data)), + RcvWnd: 30000, + } + c.SendPacket(nil, ackHeaders) + + // Check the RST. + checker.IPv4(t, c.GetPacket(), checker.TCP( + checker.SrcPort(context.StackPort), + checker.DstPort(context.TestPort), + checker.SeqNum(uint32(ackHeaders.AckNum)), + checker.AckNum(uint32(ackHeaders.SeqNum)), + checker.TCPFlags(header.TCPFlagRst|header.TCPFlagAck))) + +} diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 2dd115409..a865e8857 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -2142,6 +2142,7 @@ cc_library( ":socket_test_util", "//test/util:test_util", "//test/util:thread_util", + "@com_google_absl//absl/time", "@com_google_googletest//:gtest", ], alwayslink = 1, diff --git a/test/syscalls/linux/socket_ip_tcp_generic.cc b/test/syscalls/linux/socket_ip_tcp_generic.cc index a37b49447..c74273436 100644 --- a/test/syscalls/linux/socket_ip_tcp_generic.cc +++ b/test/syscalls/linux/socket_ip_tcp_generic.cc @@ -24,6 +24,8 @@ #include #include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" @@ -789,5 +791,26 @@ TEST_P(TCPSocketPairTest, SetTCPLingerTimeout) { EXPECT_EQ(get, kTCPLingerTimeout); } +TEST_P(TCPSocketPairTest, TestTCPCloseWithData) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ScopedThread t([&]() { + // Close one end to trigger sending of a FIN. + ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_WR), SyscallSucceeds()); + char buf[3]; + ASSERT_THAT(read(sockets->second_fd(), buf, 3), + SyscallSucceedsWithValue(3)); + absl::SleepFor(absl::Milliseconds(50)); + ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds()); + }); + + absl::SleepFor(absl::Milliseconds(50)); + // Send some data then close. + constexpr char kStr[] = "abc"; + ASSERT_THAT(write(sockets->first_fd(), kStr, 3), SyscallSucceedsWithValue(3)); + t.Join(); + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); +} + } // namespace testing } // namespace gvisor -- cgit v1.2.3 From cf7f27c16793eaa41743e96488dad2ddfd1f5d59 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 3 Dec 2019 16:30:38 -0800 Subject: net/udp: return a local route address as the bound-to address If the socket is bound to ANY and connected to a loopback address, getsockname() has to return the loopback address. Without this fix, getsockname() returns ANY. PiperOrigin-RevId: 283647781 --- pkg/tcpip/transport/udp/endpoint.go | 7 ++++- test/syscalls/linux/udp_socket_test_cases.cc | 39 ++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 24cb88c13..4b161e404 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -1134,9 +1134,14 @@ func (e *endpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) { e.mu.RLock() defer e.mu.RUnlock() + addr := e.ID.LocalAddress + if e.state == StateConnected { + addr = e.route.LocalAddress + } + return tcpip.FullAddress{ NIC: e.RegisterNICID, - Addr: e.ID.LocalAddress, + Addr: addr, Port: e.ID.LocalPort, }, nil } diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc index b6090ac66..63b92d6a7 100644 --- a/test/syscalls/linux/udp_socket_test_cases.cc +++ b/test/syscalls/linux/udp_socket_test_cases.cc @@ -527,6 +527,45 @@ TEST_P(UdpSocketTest, DisconnectAfterBind) { SyscallFailsWithErrno(ENOTCONN)); } +TEST_P(UdpSocketTest, BindToAnyConnnectToLocalhost) { + struct sockaddr_storage baddr = {}; + auto port = *Port(reinterpret_cast(addr_[1])); + if (GetParam() == AddressFamily::kIpv4) { + auto addr_in = reinterpret_cast(&baddr); + addr_in->sin_family = AF_INET; + addr_in->sin_port = port; + addr_in->sin_addr.s_addr = htonl(INADDR_ANY); + } else { + auto addr_in = reinterpret_cast(&baddr); + addr_in->sin6_family = AF_INET6; + addr_in->sin6_port = port; + addr_in->sin6_scope_id = 0; + addr_in->sin6_addr = IN6ADDR_ANY_INIT; + } + ASSERT_THAT(bind(s_, reinterpret_cast(&baddr), addrlen_), + SyscallSucceeds()); + // Connect the socket. + ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + + struct sockaddr_storage addr = {}; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); + + // If the socket is bound to ANY and connected to a loopback address, + // getsockname() has to return the loopback address. + if (GetParam() == AddressFamily::kIpv4) { + auto addr_out = reinterpret_cast(&addr); + EXPECT_EQ(addrlen, sizeof(*addr_out)); + EXPECT_EQ(addr_out->sin_addr.s_addr, htonl(INADDR_LOOPBACK)); + } else { + auto addr_out = reinterpret_cast(&addr); + struct in6_addr loopback = IN6ADDR_LOOPBACK_INIT; + EXPECT_EQ(addrlen, sizeof(*addr_out)); + EXPECT_EQ(memcmp(&addr_out->sin6_addr, &loopback, sizeof(in6_addr)), 0); + } +} + TEST_P(UdpSocketTest, DisconnectAfterBindToAny) { struct sockaddr_storage baddr = {}; socklen_t addrlen; -- cgit v1.2.3 From bb641c54035e79e3e4c2752e07e6ac55c620b93f Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Tue, 3 Dec 2019 17:32:27 -0800 Subject: Point TODO to gvisor.dev PiperOrigin-RevId: 283657725 --- test/syscalls/linux/aio.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/syscalls/linux/aio.cc b/test/syscalls/linux/aio.cc index b27d4e10a..a33daff17 100644 --- a/test/syscalls/linux/aio.cc +++ b/test/syscalls/linux/aio.cc @@ -129,7 +129,7 @@ TEST_F(AIOTest, BasicWrite) { // aio implementation uses aio_ring. gVisor doesn't and returns all zeroes. // Linux implements aio_ring, so skip the zeroes check. // - // TODO(b/65486370): Remove when gVisor implements aio_ring. + // TODO(gvisor.dev/issue/204): Remove when gVisor implements aio_ring. auto ring = reinterpret_cast(ctx_); auto magic = IsRunningOnGvisor() ? 0 : AIO_RING_MAGIC; EXPECT_EQ(ring->magic, magic); -- cgit v1.2.3 From 80b7ba0c9709c0c7f4c3aef5637d23225bcb866b Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Tue, 3 Dec 2019 19:40:56 -0800 Subject: Clean up readv_socket test suite. Get rid of the SocketTest class, which is only extended by ReadvSocketTest. Also, get rid of TCP sockets (which were unused anyway) from readv_socket.cc. This is a very old test suite that isn't the right place for TCP loopback tests. PiperOrigin-RevId: 283672772 --- test/syscalls/linux/BUILD | 1 - test/syscalls/linux/file_base.h | 89 ------------------------------------- test/syscalls/linux/readv_common.cc | 43 +++++++++++++++++- test/syscalls/linux/readv_socket.cc | 45 ++++++++++++++++--- 4 files changed, 80 insertions(+), 98 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index a865e8857..9cca78a93 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1795,7 +1795,6 @@ cc_binary( name = "readv_socket_test", testonly = 1, srcs = [ - "file_base.h", "readv_common.cc", "readv_common.h", "readv_socket.cc", diff --git a/test/syscalls/linux/file_base.h b/test/syscalls/linux/file_base.h index 4e048320e..6f80bc97c 100644 --- a/test/syscalls/linux/file_base.h +++ b/test/syscalls/linux/file_base.h @@ -111,95 +111,6 @@ class FileTest : public ::testing::Test { int test_pipe_[2]; }; -class SocketTest : public ::testing::Test { - public: - void SetUp() override { - test_unix_stream_socket_[0] = -1; - test_unix_stream_socket_[1] = -1; - test_unix_dgram_socket_[0] = -1; - test_unix_dgram_socket_[1] = -1; - test_unix_seqpacket_socket_[0] = -1; - test_unix_seqpacket_socket_[1] = -1; - test_tcp_socket_[0] = -1; - test_tcp_socket_[1] = -1; - - ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, test_unix_stream_socket_), - SyscallSucceeds()); - ASSERT_THAT(fcntl(test_unix_stream_socket_[0], F_SETFL, O_NONBLOCK), - SyscallSucceeds()); - ASSERT_THAT(socketpair(AF_UNIX, SOCK_DGRAM, 0, test_unix_dgram_socket_), - SyscallSucceeds()); - ASSERT_THAT(fcntl(test_unix_dgram_socket_[0], F_SETFL, O_NONBLOCK), - SyscallSucceeds()); - ASSERT_THAT( - socketpair(AF_UNIX, SOCK_SEQPACKET, 0, test_unix_seqpacket_socket_), - SyscallSucceeds()); - ASSERT_THAT(fcntl(test_unix_seqpacket_socket_[0], F_SETFL, O_NONBLOCK), - SyscallSucceeds()); - } - - void TearDown() override { - close(test_unix_stream_socket_[0]); - close(test_unix_stream_socket_[1]); - - close(test_unix_dgram_socket_[0]); - close(test_unix_dgram_socket_[1]); - - close(test_unix_seqpacket_socket_[0]); - close(test_unix_seqpacket_socket_[1]); - - close(test_tcp_socket_[0]); - close(test_tcp_socket_[1]); - } - - int test_unix_stream_socket_[2]; - int test_unix_dgram_socket_[2]; - int test_unix_seqpacket_socket_[2]; - int test_tcp_socket_[2]; -}; - -// MatchesStringLength checks that a tuple argument of (struct iovec *, int) -// corresponding to an iovec array and its length, contains data that matches -// the string length strlen. -MATCHER_P(MatchesStringLength, strlen, "") { - struct iovec* iovs = arg.first; - int niov = arg.second; - int offset = 0; - for (int i = 0; i < niov; i++) { - offset += iovs[i].iov_len; - } - if (offset != static_cast(strlen)) { - *result_listener << offset; - return false; - } - return true; -} - -// MatchesStringValue checks that a tuple argument of (struct iovec *, int) -// corresponding to an iovec array and its length, contains data that matches -// the string value str. -MATCHER_P(MatchesStringValue, str, "") { - struct iovec* iovs = arg.first; - int len = strlen(str); - int niov = arg.second; - int offset = 0; - for (int i = 0; i < niov; i++) { - struct iovec iov = iovs[i]; - if (len < offset) { - *result_listener << "strlen " << len << " < offset " << offset; - return false; - } - if (strncmp(static_cast(iov.iov_base), &str[offset], iov.iov_len)) { - absl::string_view iovec_string(static_cast(iov.iov_base), - iov.iov_len); - *result_listener << iovec_string << " @offset " << offset; - return false; - } - offset += iov.iov_len; - } - return true; -} - } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/readv_common.cc b/test/syscalls/linux/readv_common.cc index 9658f7d42..491d5f40f 100644 --- a/test/syscalls/linux/readv_common.cc +++ b/test/syscalls/linux/readv_common.cc @@ -19,12 +19,53 @@ #include #include "gtest/gtest.h" -#include "test/syscalls/linux/file_base.h" #include "test/util/test_util.h" namespace gvisor { namespace testing { +// MatchesStringLength checks that a tuple argument of (struct iovec *, int) +// corresponding to an iovec array and its length, contains data that matches +// the string length strlen. +MATCHER_P(MatchesStringLength, strlen, "") { + struct iovec* iovs = arg.first; + int niov = arg.second; + int offset = 0; + for (int i = 0; i < niov; i++) { + offset += iovs[i].iov_len; + } + if (offset != static_cast(strlen)) { + *result_listener << offset; + return false; + } + return true; +} + +// MatchesStringValue checks that a tuple argument of (struct iovec *, int) +// corresponding to an iovec array and its length, contains data that matches +// the string value str. +MATCHER_P(MatchesStringValue, str, "") { + struct iovec* iovs = arg.first; + int len = strlen(str); + int niov = arg.second; + int offset = 0; + for (int i = 0; i < niov; i++) { + struct iovec iov = iovs[i]; + if (len < offset) { + *result_listener << "strlen " << len << " < offset " << offset; + return false; + } + if (strncmp(static_cast(iov.iov_base), &str[offset], iov.iov_len)) { + absl::string_view iovec_string(static_cast(iov.iov_base), + iov.iov_len); + *result_listener << iovec_string << " @offset " << offset; + return false; + } + offset += iov.iov_len; + } + return true; +} + extern const char kReadvTestData[] = "127.0.0.1 localhost" "" diff --git a/test/syscalls/linux/readv_socket.cc b/test/syscalls/linux/readv_socket.cc index 9b6972201..dd6fb7008 100644 --- a/test/syscalls/linux/readv_socket.cc +++ b/test/syscalls/linux/readv_socket.cc @@ -19,7 +19,6 @@ #include #include "gtest/gtest.h" -#include "test/syscalls/linux/file_base.h" #include "test/syscalls/linux/readv_common.h" #include "test/util/test_util.h" @@ -28,9 +27,30 @@ namespace testing { namespace { -class ReadvSocketTest : public SocketTest { +class ReadvSocketTest : public ::testing::Test { + public: void SetUp() override { - SocketTest::SetUp(); + test_unix_stream_socket_[0] = -1; + test_unix_stream_socket_[1] = -1; + test_unix_dgram_socket_[0] = -1; + test_unix_dgram_socket_[1] = -1; + test_unix_seqpacket_socket_[0] = -1; + test_unix_seqpacket_socket_[1] = -1; + + ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, test_unix_stream_socket_), + SyscallSucceeds()); + ASSERT_THAT(fcntl(test_unix_stream_socket_[0], F_SETFL, O_NONBLOCK), + SyscallSucceeds()); + ASSERT_THAT(socketpair(AF_UNIX, SOCK_DGRAM, 0, test_unix_dgram_socket_), + SyscallSucceeds()); + ASSERT_THAT(fcntl(test_unix_dgram_socket_[0], F_SETFL, O_NONBLOCK), + SyscallSucceeds()); + ASSERT_THAT( + socketpair(AF_UNIX, SOCK_SEQPACKET, 0, test_unix_seqpacket_socket_), + SyscallSucceeds()); + ASSERT_THAT(fcntl(test_unix_seqpacket_socket_[0], F_SETFL, O_NONBLOCK), + SyscallSucceeds()); + ASSERT_THAT( write(test_unix_stream_socket_[1], kReadvTestData, kReadvTestDataSize), SyscallSucceedsWithValue(kReadvTestDataSize)); @@ -40,11 +60,22 @@ class ReadvSocketTest : public SocketTest { ASSERT_THAT(write(test_unix_seqpacket_socket_[1], kReadvTestData, kReadvTestDataSize), SyscallSucceedsWithValue(kReadvTestDataSize)); - // FIXME(b/69821513): Enable when possible. - // ASSERT_THAT(write(test_tcp_socket_[1], kReadvTestData, - // kReadvTestDataSize), - // SyscallSucceedsWithValue(kReadvTestDataSize)); } + + void TearDown() override { + close(test_unix_stream_socket_[0]); + close(test_unix_stream_socket_[1]); + + close(test_unix_dgram_socket_[0]); + close(test_unix_dgram_socket_[1]); + + close(test_unix_seqpacket_socket_[0]); + close(test_unix_seqpacket_socket_[1]); + } + + int test_unix_stream_socket_[2]; + int test_unix_dgram_socket_[2]; + int test_unix_seqpacket_socket_[2]; }; TEST_F(ReadvSocketTest, ReadOneBufferPerByte_StreamSocket) { -- cgit v1.2.3 From 6ae64d793593eaf3c1364354ef01a555f230a0fe Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Wed, 4 Dec 2019 23:44:25 -0800 Subject: Allow syscall tests to run with hostinet. Fixes #1207 PiperOrigin-RevId: 283914438 --- test/syscalls/build_defs.bzl | 17 +++++++++++++++++ test/syscalls/syscall_test_runner.go | 10 ++++++---- test/util/test_util.cc | 6 ++++++ test/util/test_util.h | 1 + 4 files changed, 30 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/test/syscalls/build_defs.bzl b/test/syscalls/build_defs.bzl index dcf5b73ed..aaf77c65b 100644 --- a/test/syscalls/build_defs.bzl +++ b/test/syscalls/build_defs.bzl @@ -9,6 +9,7 @@ def syscall_test( use_tmpfs = False, add_overlay = False, add_uds_tree = False, + add_hostinet = False, tags = None): _syscall_test( test = test, @@ -65,6 +66,18 @@ def syscall_test( file_access = "shared", ) + if add_hostinet: + _syscall_test( + test = test, + shard_count = shard_count, + size = size, + platform = "ptrace", + use_tmpfs = use_tmpfs, + network = "host", + add_uds_tree = add_uds_tree, + tags = tags, + ) + def _syscall_test( test, shard_count, @@ -72,6 +85,7 @@ def _syscall_test( platform, use_tmpfs, tags, + network = "none", file_access = "exclusive", overlay = False, add_uds_tree = False): @@ -85,6 +99,8 @@ def _syscall_test( name += "_shared" if overlay: name += "_overlay" + if network != "none": + name += "_" + network + "net" if tags == None: tags = [] @@ -107,6 +123,7 @@ def _syscall_test( # Arguments are passed directly to syscall_test_runner binary. "--test-name=" + test_name, "--platform=" + platform, + "--network=" + network, "--use-tmpfs=" + str(use_tmpfs), "--file-access=" + file_access, "--overlay=" + str(overlay), diff --git a/test/syscalls/syscall_test_runner.go b/test/syscalls/syscall_test_runner.go index accf46347..b9fd885ff 100644 --- a/test/syscalls/syscall_test_runner.go +++ b/test/syscalls/syscall_test_runner.go @@ -46,6 +46,7 @@ var ( debug = flag.Bool("debug", false, "enable debug logs") strace = flag.Bool("strace", false, "enable strace logs") platform = flag.String("platform", "ptrace", "platform to run on") + network = flag.String("network", "none", "network stack to run on (sandbox, host, none)") useTmpfs = flag.Bool("use-tmpfs", false, "mounts tmpfs for /tmp") fileAccess = flag.String("file-access", "exclusive", "mounts root in exclusive or shared mode") overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable tmpfs overlay") @@ -137,7 +138,7 @@ func runRunsc(tc gtest.TestCase, spec *specs.Spec) error { args := []string{ "-root", rootDir, - "-network=none", + "-network", *network, "-log-format=text", "-TESTONLY-unsafe-nonroot=true", "-net-raw=true", @@ -335,10 +336,11 @@ func runTestCaseRunsc(testBin string, tc gtest.TestCase, t *testing.T) { }) } - // Set environment variable that indicates we are - // running in gVisor and with the given platform. + // Set environment variables that indicate we are + // running in gVisor with the given platform and network. platformVar := "TEST_ON_GVISOR" - env := append(os.Environ(), platformVar+"="+*platform) + networkVar := "GVISOR_NETWORK" + env := append(os.Environ(), platformVar+"="+*platform, networkVar+"="+*network) // Remove env variables that cause the gunit binary to write output // files, since they will stomp on eachother, and on the output files diff --git a/test/util/test_util.cc b/test/util/test_util.cc index 9cb050735..848504c88 100644 --- a/test/util/test_util.cc +++ b/test/util/test_util.cc @@ -41,6 +41,7 @@ namespace gvisor { namespace testing { #define TEST_ON_GVISOR "TEST_ON_GVISOR" +#define GVISOR_NETWORK "GVISOR_NETWORK" bool IsRunningOnGvisor() { return GvisorPlatform() != Platform::kNative; } @@ -60,6 +61,11 @@ Platform GvisorPlatform() { abort(); } +bool IsRunningWithHostinet() { + char* env = getenv(GVISOR_NETWORK); + return env && strcmp(env, "host") == 0; +} + // Inline cpuid instruction. Preserve %ebx/%rbx register. In PIC compilations // %ebx contains the address of the global offset table. %rbx is occasionally // used to address stack variables in presence of dynamic allocas. diff --git a/test/util/test_util.h b/test/util/test_util.h index ee6c2bf4d..b3235c7e3 100644 --- a/test/util/test_util.h +++ b/test/util/test_util.h @@ -220,6 +220,7 @@ enum class Platform { }; bool IsRunningOnGvisor(); Platform GvisorPlatform(); +bool IsRunningWithHostinet(); #ifdef __linux__ void SetupGvisorDeathTest(); -- cgit v1.2.3 From 05758f34b2f65b7e6b118d3719cb8ce37eb4bc79 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Thu, 5 Dec 2019 05:43:52 -0800 Subject: Explicitly export files needed by other packages PiperOrigin-RevId: 283955946 --- test/syscalls/linux/BUILD | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 9cca78a93..7ce2e6270 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -6,6 +6,16 @@ package( licenses = ["notice"], ) +exports_files( + [ + "socket.cc", + "socket_ipv4_udp_unbound_loopback.cc", + "tcp_socket.cc", + "udp_socket.cc", + ], + visibility = ["//:sandbox"], +) + cc_binary( name = "sigaltstack_check", testonly = 1, -- cgit v1.2.3 From 0a32c0235744191947a6bf890031026e06788837 Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Thu, 5 Dec 2019 13:22:31 -0800 Subject: Create correct file for /proc/[pid]/task/[tid]/io PiperOrigin-RevId: 284038840 --- pkg/sentry/fs/proc/task.go | 32 +++++++++++++++++--------------- test/syscalls/linux/proc.cc | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 15 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index 2a598149d..0e46c5fb7 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -67,29 +67,28 @@ type taskDir struct { var _ fs.InodeOperations = (*taskDir)(nil) // newTaskDir creates a new proc task entry. -func (p *proc) newTaskDir(t *kernel.Task, msrc *fs.MountSource, showSubtasks bool) *fs.Inode { +func (p *proc) newTaskDir(t *kernel.Task, msrc *fs.MountSource, isThreadGroup bool) *fs.Inode { contents := map[string]*fs.Inode{ - "auxv": newAuxvec(t, msrc), - "cmdline": newExecArgInode(t, msrc, cmdlineExecArg), - "comm": newComm(t, msrc), - "environ": newExecArgInode(t, msrc, environExecArg), - "exe": newExe(t, msrc), - "fd": newFdDir(t, msrc), - "fdinfo": newFdInfoDir(t, msrc), - "gid_map": newGIDMap(t, msrc), - // FIXME(b/123511468): create the correct io file for threads. - "io": newIO(t, msrc), + "auxv": newAuxvec(t, msrc), + "cmdline": newExecArgInode(t, msrc, cmdlineExecArg), + "comm": newComm(t, msrc), + "environ": newExecArgInode(t, msrc, environExecArg), + "exe": newExe(t, msrc), + "fd": newFdDir(t, msrc), + "fdinfo": newFdInfoDir(t, msrc), + "gid_map": newGIDMap(t, msrc), + "io": newIO(t, msrc, isThreadGroup), "maps": newMaps(t, msrc), "mountinfo": seqfile.NewSeqFileInode(t, &mountInfoFile{t: t}, msrc), "mounts": seqfile.NewSeqFileInode(t, &mountsFile{t: t}, msrc), "ns": newNamespaceDir(t, msrc), "smaps": newSmaps(t, msrc), - "stat": newTaskStat(t, msrc, showSubtasks, p.pidns), + "stat": newTaskStat(t, msrc, isThreadGroup, p.pidns), "statm": newStatm(t, msrc), "status": newStatus(t, msrc, p.pidns), "uid_map": newUIDMap(t, msrc), } - if showSubtasks { + if isThreadGroup { contents["task"] = p.newSubtasks(t, msrc) } if len(p.cgroupControllers) > 0 { @@ -619,8 +618,11 @@ type ioData struct { ioUsage } -func newIO(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { - return newProcInode(t, seqfile.NewSeqFile(t, &ioData{t.ThreadGroup()}), msrc, fs.SpecialFile, t) +func newIO(t *kernel.Task, msrc *fs.MountSource, isThreadGroup bool) *fs.Inode { + if isThreadGroup { + return newProcInode(t, seqfile.NewSeqFile(t, &ioData{t.ThreadGroup()}), msrc, fs.SpecialFile, t) + } + return newProcInode(t, seqfile.NewSeqFile(t, &ioData{t}), msrc, fs.SpecialFile, t) } // NeedsUpdate returns whether the generation is old or not. diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index 512de5ee0..8cf08991b 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,7 @@ #include "absl/strings/str_split.h" #include "absl/strings/string_view.h" #include "absl/synchronization/mutex.h" +#include "absl/synchronization/notification.h" #include "absl/time/clock.h" #include "absl/time/time.h" #include "test/util/capability_util.h" @@ -1988,6 +1990,44 @@ TEST(Proc, GetdentsEnoent) { SyscallFailsWithErrno(ENOENT)); } +void CheckSyscwFromIOFile(const std::string& path, const std::string& regex) { + std::string output; + ASSERT_NO_ERRNO(GetContents(path, &output)); + ASSERT_THAT(output, ContainsRegex(absl::StrCat("syscw:\\s+", regex, "\n"))); +} + +// Checks that there is variable accounting of IO between threads/tasks. +TEST(Proc, PidTidIOAccounting) { + absl::Notification notification; + + // Run a thread with a bunch of writes. Check that io account records exactly + // the number of write calls. File open/close is there to prevent buffering. + ScopedThread writer([¬ification] { + const int num_writes = 100; + for (int i = 0; i < num_writes; i++) { + auto path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + ASSERT_NO_ERRNO(SetContents(path.path(), "a")); + } + notification.Notify(); + const std::string& writer_dir = + absl::StrCat("/proc/", getpid(), "/task/", gettid(), "/io"); + + CheckSyscwFromIOFile(writer_dir, std::to_string(num_writes)); + }); + + // Run a thread and do no writes. Check that no writes are recorded. + ScopedThread noop([¬ification] { + notification.WaitForNotification(); + const std::string& noop_dir = + absl::StrCat("/proc/", getpid(), "/task/", gettid(), "/io"); + + CheckSyscwFromIOFile(noop_dir, "0"); + }); + + writer.Join(); + noop.Join(); +} + } // namespace } // namespace testing } // namespace gvisor -- cgit v1.2.3 From 757adfa287c17d925aec7976a86986bd5b52229c Mon Sep 17 00:00:00 2001 From: Ian Lewis Date: Fri, 6 Dec 2019 06:39:35 +0900 Subject: Add docs for Kubernetes Runtime Class. (#33) Adds doc to explicitly create the Kubernetes RuntimeClass object needed to use the shim via the Kubernetes API. --- README.md | 4 +-- docs/runtime-handler-quickstart.md | 45 ++++++++++++++++++++++++++++++ docs/runtime-handler-shim-v2-quickstart.md | 45 ++++++++++++++++++++++++++++++ test/e2e/runtimeclass-install.sh | 33 ++++++++++++++++++++++ 4 files changed, 125 insertions(+), 2 deletions(-) create mode 100755 test/e2e/runtimeclass-install.sh (limited to 'test') diff --git a/README.md b/README.md index 26efdbc2a..5480ab905 100644 --- a/README.md +++ b/README.md @@ -13,8 +13,8 @@ gvisor-containerd-shim is a containerd shim for [gVisor](https://github.com/goog ## Installation - [Untrusted Workload Quick Start (containerd >=1.1)](docs/untrusted-workload-quickstart.md) -- [Runtime Handler Quick Start (containerd >=1.2)](docs/runtime-handler-quickstart.md) -- [Runtime Handler Quick Start (shim v2) (containerd >=1.2)](docs/runtime-handler-shim-v2-quickstart.md) +- [Runtime Handler/RuntimeClass Quick Start (containerd >=1.2)](docs/runtime-handler-quickstart.md) +- [Runtime Handler/RuntimeClass Quick Start (shim v2) (containerd >=1.2)](docs/runtime-handler-shim-v2-quickstart.md) # Contributing diff --git a/docs/runtime-handler-quickstart.md b/docs/runtime-handler-quickstart.md index e48b2dd1a..684390b55 100644 --- a/docs/runtime-handler-quickstart.md +++ b/docs/runtime-handler-quickstart.md @@ -204,3 +204,48 @@ sudo crictl inspect ${CONTAINER_ID} sudo crictl exec ${CONTAINER_ID} dmesg | grep -i gvisor } ``` + +### Set up the Kubernetes Runtime Class + +1. Install the Runtime Class for gVisor + +[embedmd]:# (../test/e2e/runtimeclass-install.sh shell /{ # Step 1/ /^}/) +```shell +{ # Step 1: Install a RuntimeClass +cat < Date: Thu, 5 Dec 2019 13:55:37 -0800 Subject: Reduce flakiness under gotsan runs. TcpPortReuseMultiThread creates lots of connections which result in a lot of goroutines in the sentry. This can cause gotsan runs to take really long and timeout. Increasing listen backlog and reducing number of connections should help the connections complete faster as well as reduce the number of goroutines that gotsan needs to track. PiperOrigin-RevId: 284046018 --- test/syscalls/linux/socket_inet_loopback.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 96a1731cf..fa4358ae4 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -635,7 +635,9 @@ INSTANTIATE_TEST_SUITE_P( using SocketInetReusePortTest = ::testing::TestWithParam; -TEST_P(SocketInetReusePortTest, TcpPortReuseMultiThread) { +// TODO(gvisor.dev/issue/940): Remove _NoRandomSave when portHint/stack.Seed is +// saved/restored. +TEST_P(SocketInetReusePortTest, TcpPortReuseMultiThread_NoRandomSave) { auto const& param = GetParam(); TestAddress const& listener = param.listener; @@ -643,6 +645,7 @@ TEST_P(SocketInetReusePortTest, TcpPortReuseMultiThread) { sockaddr_storage listen_addr = listener.addr; sockaddr_storage conn_addr = connector.addr; constexpr int kThreadCount = 3; + constexpr int kConnectAttempts = 4096; // Create the listening socket. FileDescriptor listener_fds[kThreadCount]; @@ -657,7 +660,7 @@ TEST_P(SocketInetReusePortTest, TcpPortReuseMultiThread) { ASSERT_THAT( bind(fd, reinterpret_cast(&listen_addr), listener.addr_len), SyscallSucceeds()); - ASSERT_THAT(listen(fd, 40), SyscallSucceeds()); + ASSERT_THAT(listen(fd, kConnectAttempts / 3), SyscallSucceeds()); // On the first bind we need to determine which port was bound. if (i != 0) { @@ -676,7 +679,6 @@ TEST_P(SocketInetReusePortTest, TcpPortReuseMultiThread) { ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); } - constexpr int kConnectAttempts = 10000; std::atomic connects_received = ATOMIC_VAR_INIT(0); std::unique_ptr listen_thread[kThreadCount]; int accept_counts[kThreadCount] = {}; -- cgit v1.2.3 From 13f0f6069af4d49e236cbee4f0284c190784db37 Mon Sep 17 00:00:00 2001 From: Ian Gudger Date: Thu, 5 Dec 2019 17:27:15 -0800 Subject: Implement F_GETOWN_EX and F_SETOWN_EX. Some versions of glibc will convert F_GETOWN fcntl(2) calls into F_GETOWN_EX in some cases. PiperOrigin-RevId: 284089373 --- pkg/abi/linux/fcntl.go | 41 ++++++--- pkg/sentry/syscalls/linux/sys_file.go | 70 +++++++++++++-- test/syscalls/linux/BUILD | 1 + test/syscalls/linux/fcntl.cc | 162 +++++++++++++++++++++++++++++++++- test/syscalls/linux/ioctl.cc | 3 +- 5 files changed, 255 insertions(+), 22 deletions(-) (limited to 'test') diff --git a/pkg/abi/linux/fcntl.go b/pkg/abi/linux/fcntl.go index f78315ebf..6663a199c 100644 --- a/pkg/abi/linux/fcntl.go +++ b/pkg/abi/linux/fcntl.go @@ -16,15 +16,17 @@ package linux // Commands from linux/fcntl.h. const ( - F_DUPFD = 0x0 - F_GETFD = 0x1 - F_SETFD = 0x2 - F_GETFL = 0x3 - F_SETFL = 0x4 - F_SETLK = 0x6 - F_SETLKW = 0x7 - F_SETOWN = 0x8 - F_GETOWN = 0x9 + F_DUPFD = 0 + F_GETFD = 1 + F_SETFD = 2 + F_GETFL = 3 + F_SETFL = 4 + F_SETLK = 6 + F_SETLKW = 7 + F_SETOWN = 8 + F_GETOWN = 9 + F_SETOWN_EX = 15 + F_GETOWN_EX = 16 F_DUPFD_CLOEXEC = 1024 + 6 F_SETPIPE_SZ = 1024 + 7 F_GETPIPE_SZ = 1024 + 8 @@ -32,9 +34,9 @@ const ( // Commands for F_SETLK. const ( - F_RDLCK = 0x0 - F_WRLCK = 0x1 - F_UNLCK = 0x2 + F_RDLCK = 0 + F_WRLCK = 1 + F_UNLCK = 2 ) // Flags for fcntl. @@ -42,7 +44,7 @@ const ( FD_CLOEXEC = 00000001 ) -// Lock structure for F_SETLK. +// Flock is the lock structure for F_SETLK. type Flock struct { Type int16 Whence int16 @@ -52,3 +54,16 @@ type Flock struct { Pid int32 _ [4]byte } + +// Flags for F_SETOWN_EX and F_GETOWN_EX. +const ( + F_OWNER_TID = 0 + F_OWNER_PID = 1 + F_OWNER_PGRP = 2 +) + +// FOwnerEx is the owner structure for F_SETOWN_EX and F_GETOWN_EX. +type FOwnerEx struct { + Type int32 + PID int32 +} diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index 3b9181002..9bc2445a5 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -840,25 +840,42 @@ func Dup3(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC return uintptr(newfd), nil, nil } -func fGetOwn(t *kernel.Task, file *fs.File) int32 { +func fGetOwnEx(t *kernel.Task, file *fs.File) linux.FOwnerEx { ma := file.Async(nil) if ma == nil { - return 0 + return linux.FOwnerEx{} } a := ma.(*fasync.FileAsync) ot, otg, opg := a.Owner() switch { case ot != nil: - return int32(t.PIDNamespace().IDOfTask(ot)) + return linux.FOwnerEx{ + Type: linux.F_OWNER_TID, + PID: int32(t.PIDNamespace().IDOfTask(ot)), + } case otg != nil: - return int32(t.PIDNamespace().IDOfThreadGroup(otg)) + return linux.FOwnerEx{ + Type: linux.F_OWNER_PID, + PID: int32(t.PIDNamespace().IDOfThreadGroup(otg)), + } case opg != nil: - return int32(-t.PIDNamespace().IDOfProcessGroup(opg)) + return linux.FOwnerEx{ + Type: linux.F_OWNER_PGRP, + PID: int32(t.PIDNamespace().IDOfProcessGroup(opg)), + } default: - return 0 + return linux.FOwnerEx{} } } +func fGetOwn(t *kernel.Task, file *fs.File) int32 { + owner := fGetOwnEx(t, file) + if owner.Type == linux.F_OWNER_PGRP { + return -owner.PID + } + return owner.PID +} + // fSetOwn sets the file's owner with the semantics of F_SETOWN in Linux. // // If who is positive, it represents a PID. If negative, it represents a PGID. @@ -901,11 +918,13 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall t.FDTable().SetFlags(fd, kernel.FDFlags{ CloseOnExec: flags&linux.FD_CLOEXEC != 0, }) + return 0, nil, nil case linux.F_GETFL: return uintptr(file.Flags().ToLinux()), nil, nil case linux.F_SETFL: flags := uint(args[2].Uint()) file.SetFlags(linuxToFlags(flags).Settable()) + return 0, nil, nil case linux.F_SETLK, linux.F_SETLKW: // In Linux the file system can choose to provide lock operations for an inode. // Normally pipe and socket types lack lock operations. We diverge and use a heavy @@ -1008,6 +1027,44 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case linux.F_SETOWN: fSetOwn(t, file, args[2].Int()) return 0, nil, nil + case linux.F_GETOWN_EX: + addr := args[2].Pointer() + owner := fGetOwnEx(t, file) + _, err := t.CopyOut(addr, &owner) + return 0, nil, err + case linux.F_SETOWN_EX: + addr := args[2].Pointer() + var owner linux.FOwnerEx + n, err := t.CopyIn(addr, &owner) + if err != nil { + return 0, nil, err + } + a := file.Async(fasync.New).(*fasync.FileAsync) + switch owner.Type { + case linux.F_OWNER_TID: + task := t.PIDNamespace().TaskWithID(kernel.ThreadID(owner.PID)) + if task == nil { + return 0, nil, syserror.ESRCH + } + a.SetOwnerTask(t, task) + return uintptr(n), nil, nil + case linux.F_OWNER_PID: + tg := t.PIDNamespace().ThreadGroupWithID(kernel.ThreadID(owner.PID)) + if tg == nil { + return 0, nil, syserror.ESRCH + } + a.SetOwnerThreadGroup(t, tg) + return uintptr(n), nil, nil + case linux.F_OWNER_PGRP: + pg := t.PIDNamespace().ProcessGroupWithID(kernel.ProcessGroupID(owner.PID)) + if pg == nil { + return 0, nil, syserror.ESRCH + } + a.SetOwnerProcessGroup(t, pg) + return uintptr(n), nil, nil + default: + return 0, nil, syserror.EINVAL + } case linux.F_GET_SEALS: val, err := tmpfs.GetSeals(file.Dirent.Inode) return uintptr(val), nil, err @@ -1035,7 +1092,6 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Everything else is not yet supported. return 0, nil, syserror.EINVAL } - return 0, nil, nil } const ( diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 7ce2e6270..61f310db9 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -753,6 +753,7 @@ cc_binary( "//test/util:eventfd_util", "//test/util:multiprocess_util", "//test/util:posix_error", + "//test/util:save_util", "//test/util:temp_path", "//test/util:test_util", "//test/util:timer_util", diff --git a/test/syscalls/linux/fcntl.cc b/test/syscalls/linux/fcntl.cc index 8a45be12a..4f3aa81d6 100644 --- a/test/syscalls/linux/fcntl.cc +++ b/test/syscalls/linux/fcntl.cc @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -32,6 +33,7 @@ #include "test/util/eventfd_util.h" #include "test/util/multiprocess_util.h" #include "test/util/posix_error.h" +#include "test/util/save_util.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" #include "test/util/timer_util.h" @@ -910,8 +912,166 @@ TEST(FcntlTest, GetOwn) { FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); - ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), + EXPECT_EQ(syscall(__NR_fcntl, s.get(), F_GETOWN), 0); + MaybeSave(); +} + +TEST(FcntlTest, GetOwnEx) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + f_owner_ex owner = {}; + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN_EX, &owner), + SyscallSucceedsWithValue(0)); +} + +TEST(FcntlTest, SetOwnExInvalidType) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + f_owner_ex owner = {}; + owner.type = __pid_type(-1); + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(FcntlTest, SetOwnExInvalidTid) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + f_owner_ex owner = {}; + owner.type = F_OWNER_TID; + owner.pid = -1; + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), + SyscallFailsWithErrno(ESRCH)); +} + +TEST(FcntlTest, SetOwnExInvalidPid) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + f_owner_ex owner = {}; + owner.type = F_OWNER_PID; + owner.pid = -1; + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), + SyscallFailsWithErrno(ESRCH)); +} + +TEST(FcntlTest, SetOwnExInvalidPgrp) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + f_owner_ex owner = {}; + owner.type = F_OWNER_PGRP; + owner.pid = -1; + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), + SyscallFailsWithErrno(ESRCH)); +} + +TEST(FcntlTest, SetOwnExTid) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + f_owner_ex owner = {}; + owner.type = F_OWNER_TID; + EXPECT_THAT(owner.pid = syscall(__NR_gettid), SyscallSucceeds()); + + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), + SyscallSucceeds()); + + EXPECT_EQ(syscall(__NR_fcntl, s.get(), F_GETOWN), owner.pid); + MaybeSave(); +} + +TEST(FcntlTest, SetOwnExPid) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + f_owner_ex owner = {}; + owner.type = F_OWNER_PID; + EXPECT_THAT(owner.pid = getpid(), SyscallSucceeds()); + + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), + SyscallSucceeds()); + + EXPECT_EQ(syscall(__NR_fcntl, s.get(), F_GETOWN), owner.pid); + MaybeSave(); +} + +TEST(FcntlTest, SetOwnExPgrp) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + f_owner_ex owner = {}; + owner.type = F_OWNER_PGRP; + EXPECT_THAT(owner.pid = getpgrp(), SyscallSucceeds()); + + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), + SyscallSucceeds()); + + // NOTE(igudger): I don't understand why, but this is flaky on Linux. + // GetOwnExPgrp (below) does not have this issue. + SKIP_IF(!IsRunningOnGvisor()); + + EXPECT_EQ(syscall(__NR_fcntl, s.get(), F_GETOWN), -owner.pid); + MaybeSave(); +} + +TEST(FcntlTest, GetOwnExTid) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + f_owner_ex set_owner = {}; + set_owner.type = F_OWNER_TID; + EXPECT_THAT(set_owner.pid = syscall(__NR_gettid), SyscallSucceeds()); + + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &set_owner), + SyscallSucceeds()); + + f_owner_ex got_owner = {}; + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN_EX, &got_owner), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(got_owner.type, set_owner.type); + EXPECT_EQ(got_owner.pid, set_owner.pid); +} + +TEST(FcntlTest, GetOwnExPid) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + f_owner_ex set_owner = {}; + set_owner.type = F_OWNER_PID; + EXPECT_THAT(set_owner.pid = getpid(), SyscallSucceeds()); + + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &set_owner), + SyscallSucceeds()); + + f_owner_ex got_owner = {}; + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN_EX, &got_owner), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(got_owner.type, set_owner.type); + EXPECT_EQ(got_owner.pid, set_owner.pid); +} + +TEST(FcntlTest, GetOwnExPgrp) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + f_owner_ex set_owner = {}; + set_owner.type = F_OWNER_PGRP; + EXPECT_THAT(set_owner.pid = getpgrp(), SyscallSucceeds()); + + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &set_owner), + SyscallSucceeds()); + + f_owner_ex got_owner = {}; + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN_EX, &got_owner), SyscallSucceedsWithValue(0)); + EXPECT_EQ(got_owner.type, set_owner.type); + EXPECT_EQ(got_owner.pid, set_owner.pid); } } // namespace diff --git a/test/syscalls/linux/ioctl.cc b/test/syscalls/linux/ioctl.cc index c4f8bff08..b0a07a064 100644 --- a/test/syscalls/linux/ioctl.cc +++ b/test/syscalls/linux/ioctl.cc @@ -215,7 +215,8 @@ TEST_F(IoctlTest, FIOASYNCSelfTarget2) { auto mask_cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_UNBLOCK, SIGIO)); - pid_t pid = getpid(); + pid_t pid = -1; + EXPECT_THAT(pid = getpid(), SyscallSucceeds()); EXPECT_THAT(ioctl(pair->second_fd(), FIOSETOWN, &pid), SyscallSucceeds()); int set = 1; -- cgit v1.2.3 From b0066217ecd830be1d816d2b4d824f89b278c556 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Fri, 6 Dec 2019 12:12:27 -0800 Subject: Add hostinet tests for UDP sockets. We need to skip a subset of the tests, because of features that hostinet does not currently support. Fixes #1209 PiperOrigin-RevId: 284235911 --- test/syscalls/BUILD | 1 + test/syscalls/linux/udp_socket_test_cases.cc | 35 ++++++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 722d14b53..6650984fa 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -669,6 +669,7 @@ syscall_test(test = "//test/syscalls/linux:udp_bind_test") syscall_test( size = "medium", + add_hostinet = True, shard_count = 10, test = "//test/syscalls/linux:udp_socket_test", ) diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc index 63b92d6a7..4556f16d6 100644 --- a/test/syscalls/linux/udp_socket_test_cases.cc +++ b/test/syscalls/linux/udp_socket_test_cases.cc @@ -656,6 +656,9 @@ TEST_P(UdpSocketTest, SendToAddressOtherThanConnected) { } TEST_P(UdpSocketTest, ZerolengthWriteAllowed) { + // TODO(gvisor.dev/issue/1202): Hostinet does not support zero length writes. + SKIP_IF(IsRunningWithHostinet()); + // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1. ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); @@ -673,6 +676,9 @@ TEST_P(UdpSocketTest, ZerolengthWriteAllowed) { } TEST_P(UdpSocketTest, ZerolengthWriteAllowedNonBlockRead) { + // TODO(gvisor.dev/issue/1202): Hostinet does not support zero length writes. + SKIP_IF(IsRunningWithHostinet()); + // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1. ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); @@ -878,6 +884,10 @@ TEST_P(UdpSocketTest, ReadShutdownSameSocketResetsShutdownState) { } TEST_P(UdpSocketTest, ReadShutdown) { + // TODO(gvisor.dev/issue/1202): Calling recv() after shutdown without + // MSG_DONTWAIT blocks indefinitely. + SKIP_IF(IsRunningWithHostinet()); + char received[512]; EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), SyscallFailsWithErrno(EWOULDBLOCK)); @@ -900,6 +910,10 @@ TEST_P(UdpSocketTest, ReadShutdown) { } TEST_P(UdpSocketTest, ReadShutdownDifferentThread) { + // TODO(gvisor.dev/issue/1202): Calling recv() after shutdown without + // MSG_DONTWAIT blocks indefinitely. + SKIP_IF(IsRunningWithHostinet()); + char received[512]; EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), SyscallFailsWithErrno(EWOULDBLOCK)); @@ -1189,6 +1203,10 @@ TEST_P(UdpSocketTest, FIONREADZeroLengthWriteShutdown) { } TEST_P(UdpSocketTest, SoTimestampOffByDefault) { + // TODO(gvisor.dev/issue/1202): SO_TIMESTAMP socket option not supported by + // hostinet. + SKIP_IF(IsRunningWithHostinet()); + int v = -1; socklen_t optlen = sizeof(v); ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_TIMESTAMP, &v, &optlen), @@ -1198,6 +1216,10 @@ TEST_P(UdpSocketTest, SoTimestampOffByDefault) { } TEST_P(UdpSocketTest, SoTimestamp) { + // TODO(gvisor.dev/issue/1202): ioctl() and SO_TIMESTAMP socket option are not + // supported by hostinet. + SKIP_IF(IsRunningWithHostinet()); + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); @@ -1241,6 +1263,9 @@ TEST_P(UdpSocketTest, WriteShutdownNotConnected) { } TEST_P(UdpSocketTest, TimestampIoctl) { + // TODO(gvisor.dev/issue/1202): ioctl() is not supported by hostinet. + SKIP_IF(IsRunningWithHostinet()); + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); @@ -1259,7 +1284,10 @@ TEST_P(UdpSocketTest, TimestampIoctl) { ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0); } -TEST_P(UdpSocketTest, TimetstampIoctlNothingRead) { +TEST_P(UdpSocketTest, TimestampIoctlNothingRead) { + // TODO(gvisor.dev/issue/1202): ioctl() is not supported by hostinet. + SKIP_IF(IsRunningWithHostinet()); + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); @@ -1270,6 +1298,10 @@ TEST_P(UdpSocketTest, TimetstampIoctlNothingRead) { // Test that the timestamp accessed via SIOCGSTAMP is still accessible after // SO_TIMESTAMP is enabled and used to retrieve a timestamp. TEST_P(UdpSocketTest, TimestampIoctlPersistence) { + // TODO(gvisor.dev/issue/1202): ioctl() and SO_TIMESTAMP socket option are not + // supported by hostinet. + SKIP_IF(IsRunningWithHostinet()); + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); @@ -1304,7 +1336,6 @@ TEST_P(UdpSocketTest, TimestampIoctlPersistence) { msg.msg_controllen = sizeof(cmsgbuf); ASSERT_THAT(RetryEINTR(recvmsg)(s_, &msg, 0), SyscallSucceedsWithValue(0)); struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); - cmsg = CMSG_FIRSTHDR(&msg); ASSERT_NE(cmsg, nullptr); // The ioctl should return the exact same values as before. -- cgit v1.2.3 From 498595d54347d711dbd24247ed12c659b9d89c58 Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Mon, 9 Dec 2019 11:21:01 -0800 Subject: Add tests for rseq(2) Add a decent set of syscall tests for rseq(2). These are a bit awkward because of issues with library integration. libc may register rseq on thread start (including before main on the initial thread), precluding much testing. Thus we run tests in a libc-free subprocess. Support for rseq(2) in gVisor will come in a later commit. PiperOrigin-RevId: 284595994 --- test/syscalls/BUILD | 2 + test/syscalls/linux/BUILD | 16 ++ test/syscalls/linux/rseq.cc | 198 +++++++++++++++++++ test/syscalls/linux/rseq/BUILD | 59 ++++++ test/syscalls/linux/rseq/critical.S | 66 +++++++ test/syscalls/linux/rseq/critical.h | 39 ++++ test/syscalls/linux/rseq/rseq.cc | 366 ++++++++++++++++++++++++++++++++++++ test/syscalls/linux/rseq/start.S | 45 +++++ test/syscalls/linux/rseq/syscalls.h | 66 +++++++ test/syscalls/linux/rseq/test.h | 43 +++++ test/syscalls/linux/rseq/types.h | 31 +++ test/syscalls/linux/rseq/uapi.h | 54 ++++++ 12 files changed, 985 insertions(+) create mode 100644 test/syscalls/linux/rseq.cc create mode 100644 test/syscalls/linux/rseq/BUILD create mode 100644 test/syscalls/linux/rseq/critical.S create mode 100644 test/syscalls/linux/rseq/critical.h create mode 100644 test/syscalls/linux/rseq/rseq.cc create mode 100644 test/syscalls/linux/rseq/start.S create mode 100644 test/syscalls/linux/rseq/syscalls.h create mode 100644 test/syscalls/linux/rseq/test.h create mode 100644 test/syscalls/linux/rseq/types.h create mode 100644 test/syscalls/linux/rseq/uapi.h (limited to 'test') diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 6650984fa..829693e8e 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -376,6 +376,8 @@ syscall_test( syscall_test(test = "//test/syscalls/linux:rlimits_test") +syscall_test(test = "//test/syscalls/linux:rseq_test") + syscall_test(test = "//test/syscalls/linux:rtsignal_test") syscall_test(test = "//test/syscalls/linux:sched_test") diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 61f310db9..c49445d62 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1852,6 +1852,22 @@ cc_binary( ], ) +cc_binary( + name = "rseq_test", + testonly = 1, + srcs = ["rseq.cc"], + data = ["//test/syscalls/linux/rseq"], + linkstatic = 1, + deps = [ + "//test/syscalls/linux/rseq:lib", + "//test/util:logging", + "//test/util:multiprocess_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + cc_binary( name = "rtsignal_test", testonly = 1, diff --git a/test/syscalls/linux/rseq.cc b/test/syscalls/linux/rseq.cc new file mode 100644 index 000000000..106c045e3 --- /dev/null +++ b/test/syscalls/linux/rseq.cc @@ -0,0 +1,198 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "test/syscalls/linux/rseq/test.h" +#include "test/syscalls/linux/rseq/uapi.h" +#include "test/util/logging.h" +#include "test/util/multiprocess_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Syscall test for rseq (restartable sequences). +// +// We must be very careful about how these tests are written. Each thread may +// only have one struct rseq registration, which may be done automatically at +// thread start (as of 2019-11-13, glibc does *not* support rseq and thus does +// not do so). +// +// Testing of rseq is thus done primarily in a child process with no +// registration. This means exec'ing a nostdlib binary, as rseq registration can +// only be cleared by execve (or knowing the old rseq address), and glibc (based +// on the current unmerged patches) register rseq before calling main()). + +int RSeq(struct rseq* rseq, uint32_t rseq_len, int flags, uint32_t sig) { + return syscall(kRseqSyscall, rseq, rseq_len, flags, sig); +} + +// Returns true if this kernel supports the rseq syscall. +PosixErrorOr RSeqSupported() { + // We have to be careful here, there are three possible cases: + // + // 1. rseq is not supported -> ENOSYS + // 2. rseq is supported and not registered -> success, but we should + // unregister. + // 3. rseq is supported and registered -> EINVAL (most likely). + + // The only validation done on new registrations is that rseq is aligned and + // writable. + rseq rseq = {}; + int ret = RSeq(&rseq, sizeof(rseq), 0, 0); + if (ret == 0) { + // Successfully registered, rseq is supported. Unregister. + ret = RSeq(&rseq, sizeof(rseq), kRseqFlagUnregister, 0); + if (ret != 0) { + return PosixError(errno); + } + return true; + } + + switch (errno) { + case ENOSYS: + // Not supported. + return false; + case EINVAL: + // Supported, but already registered. EINVAL returned because we provided + // a different address. + return true; + default: + // Unknown error. + return PosixError(errno); + } +} + +constexpr char kRseqBinary[] = "test/syscalls/linux/rseq/rseq"; + +void RunChildTest(std::string test_case, int want_status) { + std::string path = RunfilePath(kRseqBinary); + + pid_t child_pid = -1; + int execve_errno = 0; + auto cleanup = ASSERT_NO_ERRNO_AND_VALUE( + ForkAndExec(path, {path, test_case}, {}, &child_pid, &execve_errno)); + + ASSERT_GT(child_pid, 0); + ASSERT_EQ(execve_errno, 0); + + int status = 0; + ASSERT_THAT(RetryEINTR(waitpid)(child_pid, &status, 0), SyscallSucceeds()); + ASSERT_EQ(status, want_status); +} + +// Test that rseq must be aligned. +TEST(RseqTest, Unaligned) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestUnaligned, 0); +} + +// Sanity test that registration works. +TEST(RseqTest, Register) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestRegister, 0); +} + +// Registration can't be done twice. +TEST(RseqTest, DoubleRegister) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestDoubleRegister, 0); +} + +// Registration can be done again after unregister. +TEST(RseqTest, RegisterUnregister) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestRegisterUnregister, 0); +} + +// The pointer to rseq must match on register/unregister. +TEST(RseqTest, UnregisterDifferentPtr) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestUnregisterDifferentPtr, 0); +} + +// The signature must match on register/unregister. +TEST(RseqTest, UnregisterDifferentSignature) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestUnregisterDifferentSignature, 0); +} + +// The CPU ID is initialized. +TEST(RseqTest, CPU) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestCPU, 0); +} + +// Critical section is eventually aborted. +TEST(RseqTest, Abort) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestAbort, 0); +} + +// Abort may be before the critical section. +TEST(RseqTest, AbortBefore) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestAbortBefore, 0); +} + +// Signature must match. +TEST(RseqTest, AbortSignature) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestAbortSignature, SIGSEGV); +} + +// Abort must not be in the critical section. +TEST(RseqTest, AbortPreCommit) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestAbortPreCommit, SIGSEGV); +} + +// rseq.rseq_cs is cleared on abort. +TEST(RseqTest, AbortClearsCS) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestAbortClearsCS, 0); +} + +// rseq.rseq_cs is cleared on abort outside of critical section. +TEST(RseqTest, InvalidAbortClearsCS) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(RSeqSupported())); + + RunChildTest(kRseqTestInvalidAbortClearsCS, 0); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/rseq/BUILD b/test/syscalls/linux/rseq/BUILD new file mode 100644 index 000000000..5cfe4e56f --- /dev/null +++ b/test/syscalls/linux/rseq/BUILD @@ -0,0 +1,59 @@ +# This package contains a standalone rseq test binary. This binary must not +# depend on libc, which might use rseq itself. + +load("@bazel_tools//tools/cpp:cc_flags_supplier.bzl", "cc_flags_supplier") +load("@rules_cc//cc:defs.bzl", "cc_library") + +package(licenses = ["notice"]) + +genrule( + name = "rseq_binary", + srcs = [ + "critical.h", + "critical.S", + "rseq.cc", + "syscalls.h", + "start.S", + "test.h", + "types.h", + "uapi.h", + ], + outs = ["rseq"], + cmd = " ".join([ + "$(CC)", + "$(CC_FLAGS) ", + "-I.", + "-Wall", + "-Werror", + "-O2", + "-std=c++17", + "-static", + "-nostdlib", + "-ffreestanding", + "-o", + "$(location rseq)", + "$(location critical.S)", + "$(location rseq.cc)", + "$(location start.S)", + ]), + toolchains = [ + ":no_pie_cc_flags", + "@bazel_tools//tools/cpp:current_cc_toolchain", + ], + visibility = ["//:sandbox"], +) + +cc_flags_supplier( + name = "no_pie_cc_flags", + features = ["-pie"], +) + +cc_library( + name = "lib", + testonly = 1, + hdrs = [ + "test.h", + "uapi.h", + ], + visibility = ["//:sandbox"], +) diff --git a/test/syscalls/linux/rseq/critical.S b/test/syscalls/linux/rseq/critical.S new file mode 100644 index 000000000..8c0687e6d --- /dev/null +++ b/test/syscalls/linux/rseq/critical.S @@ -0,0 +1,66 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Restartable sequences critical sections. + +// Loops continuously until aborted. +// +// void rseq_loop(struct rseq* r, struct rseq_cs* cs) + + .text + .globl rseq_loop + .type rseq_loop, @function + +rseq_loop: + jmp begin + + // Abort block before the critical section. + // Abort signature is 4 nops for simplicity. + .byte 0x90, 0x90, 0x90, 0x90 + .globl rseq_loop_early_abort +rseq_loop_early_abort: + ret + +begin: + // r->rseq_cs = cs + movq %rsi, 8(%rdi) + + // N.B. rseq_cs will be cleared by any preempt, even outside the critical + // section. Thus it must be set in or immediately before the critical section + // to ensure it is not cleared before the section begins. + .globl rseq_loop_start +rseq_loop_start: + jmp rseq_loop_start + + // "Pre-commit": extra instructions inside the critical section. These are + // used as the abort point in TestAbortPreCommit, which is not valid. + .globl rseq_loop_pre_commit +rseq_loop_pre_commit: + // Extra abort signature + nop for TestAbortPostCommit. + .byte 0x90, 0x90, 0x90, 0x90 + nop + + // "Post-commit": never reached in this case. + .globl rseq_loop_post_commit +rseq_loop_post_commit: + + // Abort signature is 4 nops for simplicity. + .byte 0x90, 0x90, 0x90, 0x90 + + .globl rseq_loop_abort +rseq_loop_abort: + ret + + .size rseq_loop,.-rseq_loop + .section .note.GNU-stack,"",@progbits diff --git a/test/syscalls/linux/rseq/critical.h b/test/syscalls/linux/rseq/critical.h new file mode 100644 index 000000000..ac987a25e --- /dev/null +++ b/test/syscalls/linux/rseq/critical.h @@ -0,0 +1,39 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_RSEQ_CRITICAL_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_RSEQ_CRITICAL_H_ + +#include "test/syscalls/linux/rseq/types.h" +#include "test/syscalls/linux/rseq/uapi.h" + +constexpr uint32_t kRseqSignature = 0x90909090; + +extern "C" { + +extern void rseq_loop(struct rseq* r, struct rseq_cs* cs); +extern void* rseq_loop_early_abort; +extern void* rseq_loop_start; +extern void* rseq_loop_pre_commit; +extern void* rseq_loop_post_commit; +extern void* rseq_loop_abort; + +extern int rseq_getpid(struct rseq* r, struct rseq_cs* cs); +extern void* rseq_getpid_start; +extern void* rseq_getpid_post_commit; +extern void* rseq_getpid_abort; + +} // extern "C" + +#endif // GVISOR_TEST_SYSCALLS_LINUX_RSEQ_CRITICAL_H_ diff --git a/test/syscalls/linux/rseq/rseq.cc b/test/syscalls/linux/rseq/rseq.cc new file mode 100644 index 000000000..f036db26d --- /dev/null +++ b/test/syscalls/linux/rseq/rseq.cc @@ -0,0 +1,366 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/rseq/critical.h" +#include "test/syscalls/linux/rseq/syscalls.h" +#include "test/syscalls/linux/rseq/test.h" +#include "test/syscalls/linux/rseq/types.h" +#include "test/syscalls/linux/rseq/uapi.h" + +namespace gvisor { +namespace testing { + +extern "C" int main(int argc, char** argv, char** envp); + +// Standalone initialization before calling main(). +extern "C" void __init(uintptr_t* sp) { + int argc = sp[0]; + char** argv = reinterpret_cast(&sp[1]); + char** envp = &argv[argc + 1]; + + // Call main() and exit. + sys_exit_group(main(argc, argv, envp)); + + // sys_exit_group does not return +} + +int strcmp(const char* s1, const char* s2) { + const unsigned char* p1 = reinterpret_cast(s1); + const unsigned char* p2 = reinterpret_cast(s2); + + while (*p1 == *p2) { + if (!*p1) { + return 0; + } + ++p1; + ++p2; + } + return static_cast(*p1) - static_cast(*p2); +} + +int sys_rseq(struct rseq* rseq, uint32_t rseq_len, int flags, uint32_t sig) { + return raw_syscall(kRseqSyscall, rseq, rseq_len, flags, sig); +} + +// Test that rseq must be aligned. +int TestUnaligned() { + constexpr uintptr_t kRequiredAlignment = alignof(rseq); + + char buf[2 * kRequiredAlignment] = {}; + uintptr_t ptr = reinterpret_cast(&buf[0]); + if ((ptr & (kRequiredAlignment - 1)) == 0) { + // buf is already aligned. Misalign it. + ptr++; + } + + int ret = sys_rseq(reinterpret_cast(ptr), sizeof(rseq), 0, 0); + if (sys_errno(ret) != EINVAL) { + return 1; + } + return 0; +} + +// Sanity test that registration works. +int TestRegister() { + struct rseq r = {}; + if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) { + return 1; + } + return 0; +}; + +// Registration can't be done twice. +int TestDoubleRegister() { + struct rseq r = {}; + if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) { + return 1; + } + + if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != EBUSY) { + return 1; + } + + return 0; +}; + +// Registration can be done again after unregister. +int TestRegisterUnregister() { + struct rseq r = {}; + if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) { + return 1; + } + + if (int ret = sys_rseq(&r, sizeof(r), kRseqFlagUnregister, 0); + sys_errno(ret) != 0) { + return 1; + } + + if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) { + return 1; + } + + return 0; +}; + +// The pointer to rseq must match on register/unregister. +int TestUnregisterDifferentPtr() { + struct rseq r = {}; + if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) { + return 1; + } + + struct rseq r2 = {}; + if (int ret = sys_rseq(&r2, sizeof(r2), kRseqFlagUnregister, 0); + sys_errno(ret) != EINVAL) { + return 1; + } + + return 0; +}; + +// The signature must match on register/unregister. +int TestUnregisterDifferentSignature() { + constexpr int kSignature = 0; + + struct rseq r = {}; + if (int ret = sys_rseq(&r, sizeof(r), 0, kSignature); sys_errno(ret) != 0) { + return 1; + } + + if (int ret = sys_rseq(&r, sizeof(r), kRseqFlagUnregister, kSignature + 1); + sys_errno(ret) != EPERM) { + return 1; + } + + return 0; +}; + +// The CPU ID is initialized. +int TestCPU() { + struct rseq r = {}; + r.cpu_id = kRseqCPUIDUninitialized; + + if (int ret = sys_rseq(&r, sizeof(r), 0, 0); sys_errno(ret) != 0) { + return 1; + } + + if (__atomic_load_n(&r.cpu_id, __ATOMIC_RELAXED) < 0) { + return 1; + } + if (__atomic_load_n(&r.cpu_id_start, __ATOMIC_RELAXED) < 0) { + return 1; + } + + return 0; +}; + +// Critical section is eventually aborted. +int TestAbort() { + struct rseq r = {}; + if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature); + sys_errno(ret) != 0) { + return 1; + } + + struct rseq_cs cs = {}; + cs.version = 0; + cs.flags = 0; + cs.start_ip = reinterpret_cast(&rseq_loop_start); + cs.post_commit_offset = reinterpret_cast(&rseq_loop_post_commit) - + reinterpret_cast(&rseq_loop_start); + cs.abort_ip = reinterpret_cast(&rseq_loop_abort); + + // Loops until abort. If this returns then abort occurred. + rseq_loop(&r, &cs); + + return 0; +}; + +// Abort may be before the critical section. +int TestAbortBefore() { + struct rseq r = {}; + if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature); + sys_errno(ret) != 0) { + return 1; + } + + struct rseq_cs cs = {}; + cs.version = 0; + cs.flags = 0; + cs.start_ip = reinterpret_cast(&rseq_loop_start); + cs.post_commit_offset = reinterpret_cast(&rseq_loop_post_commit) - + reinterpret_cast(&rseq_loop_start); + cs.abort_ip = reinterpret_cast(&rseq_loop_early_abort); + + // Loops until abort. If this returns then abort occurred. + rseq_loop(&r, &cs); + + return 0; +}; + +// Signature must match. +int TestAbortSignature() { + struct rseq r = {}; + if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature + 1); + sys_errno(ret) != 0) { + return 1; + } + + struct rseq_cs cs = {}; + cs.version = 0; + cs.flags = 0; + cs.start_ip = reinterpret_cast(&rseq_loop_start); + cs.post_commit_offset = reinterpret_cast(&rseq_loop_post_commit) - + reinterpret_cast(&rseq_loop_start); + cs.abort_ip = reinterpret_cast(&rseq_loop_abort); + + // Loops until abort. This should SIGSEGV on abort. + rseq_loop(&r, &cs); + + return 1; +}; + +// Abort must not be in the critical section. +int TestAbortPreCommit() { + struct rseq r = {}; + if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature + 1); + sys_errno(ret) != 0) { + return 1; + } + + struct rseq_cs cs = {}; + cs.version = 0; + cs.flags = 0; + cs.start_ip = reinterpret_cast(&rseq_loop_start); + cs.post_commit_offset = reinterpret_cast(&rseq_loop_post_commit) - + reinterpret_cast(&rseq_loop_start); + cs.abort_ip = reinterpret_cast(&rseq_loop_pre_commit); + + // Loops until abort. This should SIGSEGV on abort. + rseq_loop(&r, &cs); + + return 1; +}; + +// rseq.rseq_cs is cleared on abort. +int TestAbortClearsCS() { + struct rseq r = {}; + if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature); + sys_errno(ret) != 0) { + return 1; + } + + struct rseq_cs cs = {}; + cs.version = 0; + cs.flags = 0; + cs.start_ip = reinterpret_cast(&rseq_loop_start); + cs.post_commit_offset = reinterpret_cast(&rseq_loop_post_commit) - + reinterpret_cast(&rseq_loop_start); + cs.abort_ip = reinterpret_cast(&rseq_loop_abort); + + // Loops until abort. If this returns then abort occurred. + rseq_loop(&r, &cs); + + if (__atomic_load_n(&r.rseq_cs, __ATOMIC_RELAXED)) { + return 1; + } + + return 0; +}; + +// rseq.rseq_cs is cleared on abort outside of critical section. +int TestInvalidAbortClearsCS() { + struct rseq r = {}; + if (int ret = sys_rseq(&r, sizeof(r), 0, kRseqSignature); + sys_errno(ret) != 0) { + return 1; + } + + struct rseq_cs cs = {}; + cs.version = 0; + cs.flags = 0; + cs.start_ip = reinterpret_cast(&rseq_loop_start); + cs.post_commit_offset = reinterpret_cast(&rseq_loop_post_commit) - + reinterpret_cast(&rseq_loop_start); + cs.abort_ip = reinterpret_cast(&rseq_loop_abort); + + __atomic_store_n(&r.rseq_cs, &cs, __ATOMIC_RELAXED); + + // When the next abort condition occurs, the kernel will clear cs once it + // determines we aren't in the critical section. + while (1) { + if (!__atomic_load_n(&r.rseq_cs, __ATOMIC_RELAXED)) { + break; + } + } + + return 0; +}; + +// Exit codes: +// 0 - Pass +// 1 - Fail +// 2 - Missing argument +// 3 - Unknown test case +extern "C" int main(int argc, char** argv, char** envp) { + if (argc != 2) { + // Usage: rseq + return 2; + } + + if (strcmp(argv[1], kRseqTestUnaligned) == 0) { + return TestUnaligned(); + } + if (strcmp(argv[1], kRseqTestRegister) == 0) { + return TestRegister(); + } + if (strcmp(argv[1], kRseqTestDoubleRegister) == 0) { + return TestDoubleRegister(); + } + if (strcmp(argv[1], kRseqTestRegisterUnregister) == 0) { + return TestRegisterUnregister(); + } + if (strcmp(argv[1], kRseqTestUnregisterDifferentPtr) == 0) { + return TestUnregisterDifferentPtr(); + } + if (strcmp(argv[1], kRseqTestUnregisterDifferentSignature) == 0) { + return TestUnregisterDifferentSignature(); + } + if (strcmp(argv[1], kRseqTestCPU) == 0) { + return TestCPU(); + } + if (strcmp(argv[1], kRseqTestAbort) == 0) { + return TestAbort(); + } + if (strcmp(argv[1], kRseqTestAbortBefore) == 0) { + return TestAbortBefore(); + } + if (strcmp(argv[1], kRseqTestAbortSignature) == 0) { + return TestAbortSignature(); + } + if (strcmp(argv[1], kRseqTestAbortPreCommit) == 0) { + return TestAbortPreCommit(); + } + if (strcmp(argv[1], kRseqTestAbortClearsCS) == 0) { + return TestAbortClearsCS(); + } + if (strcmp(argv[1], kRseqTestInvalidAbortClearsCS) == 0) { + return TestInvalidAbortClearsCS(); + } + + return 3; +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/rseq/start.S b/test/syscalls/linux/rseq/start.S new file mode 100644 index 000000000..b9611b276 --- /dev/null +++ b/test/syscalls/linux/rseq/start.S @@ -0,0 +1,45 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + + .text + .align 4 + .type _start,@function + .globl _start + +_start: + movq %rsp,%rdi + call __init + hlt + + .size _start,.-_start + .section .note.GNU-stack,"",@progbits + + .text + .globl raw_syscall + .type raw_syscall, @function + +raw_syscall: + mov %rdi,%rax // syscall # + mov %rsi,%rdi // arg0 + mov %rdx,%rsi // arg1 + mov %rcx,%rdx // arg2 + mov %r8,%r10 // arg3 (goes in r10 instead of rcx for system calls) + mov %r9,%r8 // arg4 + mov 0x8(%rsp),%r9 // arg5 + syscall + ret + + .size raw_syscall,.-raw_syscall + .section .note.GNU-stack,"",@progbits diff --git a/test/syscalls/linux/rseq/syscalls.h b/test/syscalls/linux/rseq/syscalls.h new file mode 100644 index 000000000..e5299c188 --- /dev/null +++ b/test/syscalls/linux/rseq/syscalls.h @@ -0,0 +1,66 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_RSEQ_SYSCALLS_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_RSEQ_SYSCALLS_H_ + +#include "test/syscalls/linux/rseq/types.h" + +#ifdef __x86_64__ +// Syscall numbers. +constexpr int kGetpid = 39; +constexpr int kExitGroup = 231; +#else +#error "Unknown architecture" +#endif + +namespace gvisor { +namespace testing { + +// Standalone system call interfaces. +// Note that these are all "raw" system call interfaces which encode +// errors by setting the return value to a small negative number. +// Use sys_errno() to check system call return values for errors. + +// Maximum Linux error number. +constexpr int kMaxErrno = 4095; + +// Errno values. +#define EPERM 1 +#define EFAULT 14 +#define EBUSY 16 +#define EINVAL 22 + +// Get the error number from a raw system call return value. +// Returns a positive error number or 0 if there was no error. +static inline int sys_errno(uintptr_t rval) { + if (rval >= static_cast(-kMaxErrno)) { + return -static_cast(rval); + } + return 0; +} + +extern "C" uintptr_t raw_syscall(int number, ...); + +static inline void sys_exit_group(int status) { + raw_syscall(kExitGroup, status); +} +static inline int sys_getpid() { + return static_cast(raw_syscall(kGetpid)); +} + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_RSEQ_SYSCALLS_H_ diff --git a/test/syscalls/linux/rseq/test.h b/test/syscalls/linux/rseq/test.h new file mode 100644 index 000000000..3b7bb74b1 --- /dev/null +++ b/test/syscalls/linux/rseq/test.h @@ -0,0 +1,43 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_RSEQ_TEST_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_RSEQ_TEST_H_ + +namespace gvisor { +namespace testing { + +// Test cases supported by rseq binary. + +inline constexpr char kRseqTestUnaligned[] = "unaligned"; +inline constexpr char kRseqTestRegister[] = "register"; +inline constexpr char kRseqTestDoubleRegister[] = "double-register"; +inline constexpr char kRseqTestRegisterUnregister[] = "register-unregister"; +inline constexpr char kRseqTestUnregisterDifferentPtr[] = + "unregister-different-ptr"; +inline constexpr char kRseqTestUnregisterDifferentSignature[] = + "unregister-different-signature"; +inline constexpr char kRseqTestCPU[] = "cpu"; +inline constexpr char kRseqTestAbort[] = "abort"; +inline constexpr char kRseqTestAbortBefore[] = "abort-before"; +inline constexpr char kRseqTestAbortSignature[] = "abort-signature"; +inline constexpr char kRseqTestAbortPreCommit[] = "abort-precommit"; +inline constexpr char kRseqTestAbortClearsCS[] = "abort-clears-cs"; +inline constexpr char kRseqTestInvalidAbortClearsCS[] = + "invalid-abort-clears-cs"; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_RSEQ_TEST_H_ diff --git a/test/syscalls/linux/rseq/types.h b/test/syscalls/linux/rseq/types.h new file mode 100644 index 000000000..b6afe9817 --- /dev/null +++ b/test/syscalls/linux/rseq/types.h @@ -0,0 +1,31 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_RSEQ_TYPES_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_RSEQ_TYPES_H_ + +using size_t = __SIZE_TYPE__; +using uintptr_t = __UINTPTR_TYPE__; + +using uint8_t = __UINT8_TYPE__; +using uint16_t = __UINT16_TYPE__; +using uint32_t = __UINT32_TYPE__; +using uint64_t = __UINT64_TYPE__; + +using int8_t = __INT8_TYPE__; +using int16_t = __INT16_TYPE__; +using int32_t = __INT32_TYPE__; +using int64_t = __INT64_TYPE__; + +#endif // GVISOR_TEST_SYSCALLS_LINUX_RSEQ_TYPES_H_ diff --git a/test/syscalls/linux/rseq/uapi.h b/test/syscalls/linux/rseq/uapi.h new file mode 100644 index 000000000..e3ff0579a --- /dev/null +++ b/test/syscalls/linux/rseq/uapi.h @@ -0,0 +1,54 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_RSEQ_UAPI_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_RSEQ_UAPI_H_ + +// User-kernel ABI for restartable sequences. + +// Standard types. +// +// N.B. This header will be included in targets that do have the standard +// library, so we can't shadow the standard type names. +using __u32 = __UINT32_TYPE__; +using __u64 = __UINT64_TYPE__; + +#ifdef __x86_64__ +// Syscall numbers. +constexpr int kRseqSyscall = 334; +#else +#error "Unknown architecture" +#endif // __x86_64__ + +struct rseq_cs { + __u32 version; + __u32 flags; + __u64 start_ip; + __u64 post_commit_offset; + __u64 abort_ip; +} __attribute__((aligned(4 * sizeof(__u64)))); + +// N.B. alignment is enforced by the kernel. +struct rseq { + __u32 cpu_id_start; + __u32 cpu_id; + struct rseq_cs* rseq_cs; + __u32 flags; +} __attribute__((aligned(4 * sizeof(__u64)))); + +constexpr int kRseqFlagUnregister = 1 << 0; + +constexpr int kRseqCPUIDUninitialized = -1; + +#endif // GVISOR_TEST_SYSCALLS_LINUX_RSEQ_UAPI_H_ -- cgit v1.2.3 From cb5f9b8f863c93bb7e3757c1f4b3e1a64e6acdfb Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Mon, 9 Dec 2019 12:03:16 -0800 Subject: Mark test as non flaky. PiperOrigin-RevId: 284606133 --- test/syscalls/linux/BUILD | 2 -- 1 file changed, 2 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index c49445d62..6ea922fb4 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -3272,8 +3272,6 @@ cc_binary( testonly = 1, srcs = ["tcp_socket.cc"], linkstatic = 1, - # FIXME(b/135470853) - tags = ["flaky"], deps = [ ":socket_test_util", "//test/util:file_descriptor", -- cgit v1.2.3 From 17867c88f7afdac6ff1c212aeac9aee2045f4f5a Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Mon, 9 Dec 2019 13:35:56 -0800 Subject: Include for TCP enums in proc_net tests These are currently duplicated in ip_socket_test_util, so tests including both netinet/tcp.h and ip_socket_test_util won't compile. PiperOrigin-RevId: 284623958 --- test/syscalls/linux/ip_socket_test_util.h | 19 ------------------- test/syscalls/linux/proc_net_tcp.cc | 1 + test/syscalls/linux/proc_net_udp.cc | 1 + 3 files changed, 2 insertions(+), 19 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/ip_socket_test_util.h b/test/syscalls/linux/ip_socket_test_util.h index 072230d85..9cb4566db 100644 --- a/test/syscalls/linux/ip_socket_test_util.h +++ b/test/syscalls/linux/ip_socket_test_util.h @@ -26,25 +26,6 @@ namespace gvisor { namespace testing { -// Possible values of the "st" field in a /proc/net/{tcp,udp} entry. Source: -// Linux kernel, include/net/tcp_states.h. -enum { - TCP_ESTABLISHED = 1, - TCP_SYN_SENT, - TCP_SYN_RECV, - TCP_FIN_WAIT1, - TCP_FIN_WAIT2, - TCP_TIME_WAIT, - TCP_CLOSE, - TCP_CLOSE_WAIT, - TCP_LAST_ACK, - TCP_LISTEN, - TCP_CLOSING, - TCP_NEW_SYN_RECV, - - TCP_MAX_STATES -}; - // Extracts the IP address from an inet sockaddr in network byte order. uint32_t IPFromInetSockaddr(const struct sockaddr* addr); diff --git a/test/syscalls/linux/proc_net_tcp.cc b/test/syscalls/linux/proc_net_tcp.cc index 2659f6a98..5b6e3e3cd 100644 --- a/test/syscalls/linux/proc_net_tcp.cc +++ b/test/syscalls/linux/proc_net_tcp.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include diff --git a/test/syscalls/linux/proc_net_udp.cc b/test/syscalls/linux/proc_net_udp.cc index f06f1a24b..786b4b4af 100644 --- a/test/syscalls/linux/proc_net_udp.cc +++ b/test/syscalls/linux/proc_net_udp.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include -- cgit v1.2.3 From 18af75db9de5244bd3e180a86886a4b3cadd7547 Mon Sep 17 00:00:00 2001 From: Ian Gudger Date: Mon, 9 Dec 2019 15:51:24 -0800 Subject: Add UDP SO_REUSEADDR support to the port manager. Next steps include adding support to the transport demuxer and the UDP endpoint. PiperOrigin-RevId: 284652151 --- pkg/tcpip/ports/BUILD | 2 +- pkg/tcpip/ports/ports.go | 148 +++++++-- pkg/tcpip/ports/ports_test.go | 182 +++++++---- pkg/tcpip/transport/tcp/BUILD | 1 + pkg/tcpip/transport/tcp/endpoint.go | 25 +- pkg/tcpip/transport/udp/BUILD | 1 + pkg/tcpip/transport/udp/endpoint.go | 19 +- .../linux/socket_bind_to_device_sequence.cc | 353 +++++++++++++++------ 8 files changed, 536 insertions(+), 195 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/ports/BUILD b/pkg/tcpip/ports/BUILD index 4839f0a65..e156b01f6 100644 --- a/pkg/tcpip/ports/BUILD +++ b/pkg/tcpip/ports/BUILD @@ -1,5 +1,5 @@ -load("//tools/go_stateify:defs.bzl", "go_library") load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) diff --git a/pkg/tcpip/ports/ports.go b/pkg/tcpip/ports/ports.go index 30cea8996..6c5e19e8f 100644 --- a/pkg/tcpip/ports/ports.go +++ b/pkg/tcpip/ports/ports.go @@ -41,6 +41,30 @@ type portDescriptor struct { port uint16 } +// Flags represents the type of port reservation. +// +// +stateify savable +type Flags struct { + // MostRecent represents UDP SO_REUSEADDR. + MostRecent bool + + // LoadBalanced indicates SO_REUSEPORT. + // + // LoadBalanced takes precidence over MostRecent. + LoadBalanced bool +} + +func (f Flags) bits() reuseFlag { + var rf reuseFlag + if f.MostRecent { + rf |= mostRecentFlag + } + if f.LoadBalanced { + rf |= loadBalancedFlag + } + return rf +} + // PortManager manages allocating, reserving and releasing ports. type PortManager struct { mu sync.RWMutex @@ -54,9 +78,59 @@ type PortManager struct { hint uint32 } +type reuseFlag int + +const ( + mostRecentFlag reuseFlag = 1 << iota + loadBalancedFlag + nextFlag + + flagMask = nextFlag - 1 +) + type portNode struct { - reuse bool - refs int + // refs stores the count for each possible flag combination. + refs [nextFlag]int +} + +func (p portNode) totalRefs() int { + var total int + for _, r := range p.refs { + total += r + } + return total +} + +// flagRefs returns the number of references with all specified flags. +func (p portNode) flagRefs(flags reuseFlag) int { + var total int + for i, r := range p.refs { + if reuseFlag(i)&flags == flags { + total += r + } + } + return total +} + +// allRefsHave returns if all references have all specified flags. +func (p portNode) allRefsHave(flags reuseFlag) bool { + for i, r := range p.refs { + if reuseFlag(i)&flags == flags && r > 0 { + return false + } + } + return true +} + +// intersectionRefs returns the set of flags shared by all references. +func (p portNode) intersectionRefs() reuseFlag { + intersection := flagMask + for i, r := range p.refs { + if r > 0 { + intersection &= reuseFlag(i) + } + } + return intersection } // deviceNode is never empty. When it has no elements, it is removed from the @@ -66,30 +140,44 @@ type deviceNode map[tcpip.NICID]portNode // isAvailable checks whether binding is possible by device. If not binding to a // device, check against all portNodes. If binding to a specific device, check // against the unspecified device and the provided device. -func (d deviceNode) isAvailable(reuse bool, bindToDevice tcpip.NICID) bool { +// +// If either of the port reuse flags is enabled on any of the nodes, all nodes +// sharing a port must share at least one reuse flag. This matches Linux's +// behavior. +func (d deviceNode) isAvailable(flags Flags, bindToDevice tcpip.NICID) bool { + flagBits := flags.bits() if bindToDevice == 0 { // Trying to binding all devices. - if !reuse { + if flagBits == 0 { // Can't bind because the (addr,port) is already bound. return false } + intersection := flagMask for _, p := range d { - if !p.reuse { - // Can't bind because the (addr,port) was previously bound without reuse. + i := p.intersectionRefs() + intersection &= i + if intersection&flagBits == 0 { + // Can't bind because the (addr,port) was + // previously bound without reuse. return false } } return true } + intersection := flagMask + if p, ok := d[0]; ok { - if !reuse || !p.reuse { + intersection = p.intersectionRefs() + if intersection&flagBits == 0 { return false } } if p, ok := d[bindToDevice]; ok { - if !reuse || !p.reuse { + i := p.intersectionRefs() + intersection &= i + if intersection&flagBits == 0 { return false } } @@ -103,12 +191,12 @@ type bindAddresses map[tcpip.Address]deviceNode // isAvailable checks whether an IP address is available to bind to. If the // address is the "any" address, check all other addresses. Otherwise, just // check against the "any" address and the provided address. -func (b bindAddresses) isAvailable(addr tcpip.Address, reuse bool, bindToDevice tcpip.NICID) bool { +func (b bindAddresses) isAvailable(addr tcpip.Address, flags Flags, bindToDevice tcpip.NICID) bool { if addr == anyIPAddress { // If binding to the "any" address then check that there are no conflicts // with all addresses. for _, d := range b { - if !d.isAvailable(reuse, bindToDevice) { + if !d.isAvailable(flags, bindToDevice) { return false } } @@ -117,14 +205,14 @@ func (b bindAddresses) isAvailable(addr tcpip.Address, reuse bool, bindToDevice // Check that there is no conflict with the "any" address. if d, ok := b[anyIPAddress]; ok { - if !d.isAvailable(reuse, bindToDevice) { + if !d.isAvailable(flags, bindToDevice) { return false } } // Check that this is no conflict with the provided address. if d, ok := b[addr]; ok { - if !d.isAvailable(reuse, bindToDevice) { + if !d.isAvailable(flags, bindToDevice) { return false } } @@ -190,17 +278,17 @@ func (s *PortManager) pickEphemeralPort(offset, count uint32, testPort func(p ui } // IsPortAvailable tests if the given port is available on all given protocols. -func (s *PortManager) IsPortAvailable(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, reuse bool, bindToDevice tcpip.NICID) bool { +func (s *PortManager) IsPortAvailable(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID) bool { s.mu.Lock() defer s.mu.Unlock() - return s.isPortAvailableLocked(networks, transport, addr, port, reuse, bindToDevice) + return s.isPortAvailableLocked(networks, transport, addr, port, flags, bindToDevice) } -func (s *PortManager) isPortAvailableLocked(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, reuse bool, bindToDevice tcpip.NICID) bool { +func (s *PortManager) isPortAvailableLocked(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID) bool { for _, network := range networks { desc := portDescriptor{network, transport, port} if addrs, ok := s.allocatedPorts[desc]; ok { - if !addrs.isAvailable(addr, reuse, bindToDevice) { + if !addrs.isAvailable(addr, flags, bindToDevice) { return false } } @@ -212,14 +300,14 @@ func (s *PortManager) isPortAvailableLocked(networks []tcpip.NetworkProtocolNumb // reserved by another endpoint. If port is zero, ReservePort will search for // an unreserved ephemeral port and reserve it, returning its value in the // "port" return value. -func (s *PortManager) ReservePort(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, reuse bool, bindToDevice tcpip.NICID) (reservedPort uint16, err *tcpip.Error) { +func (s *PortManager) ReservePort(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID) (reservedPort uint16, err *tcpip.Error) { s.mu.Lock() defer s.mu.Unlock() // If a port is specified, just try to reserve it for all network // protocols. if port != 0 { - if !s.reserveSpecificPort(networks, transport, addr, port, reuse, bindToDevice) { + if !s.reserveSpecificPort(networks, transport, addr, port, flags, bindToDevice) { return 0, tcpip.ErrPortInUse } return port, nil @@ -227,15 +315,16 @@ func (s *PortManager) ReservePort(networks []tcpip.NetworkProtocolNumber, transp // A port wasn't specified, so try to find one. return s.PickEphemeralPort(func(p uint16) (bool, *tcpip.Error) { - return s.reserveSpecificPort(networks, transport, addr, p, reuse, bindToDevice), nil + return s.reserveSpecificPort(networks, transport, addr, p, flags, bindToDevice), nil }) } // reserveSpecificPort tries to reserve the given port on all given protocols. -func (s *PortManager) reserveSpecificPort(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, reuse bool, bindToDevice tcpip.NICID) bool { - if !s.isPortAvailableLocked(networks, transport, addr, port, reuse, bindToDevice) { +func (s *PortManager) reserveSpecificPort(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID) bool { + if !s.isPortAvailableLocked(networks, transport, addr, port, flags, bindToDevice) { return false } + flagBits := flags.bits() // Reserve port on all network protocols. for _, network := range networks { @@ -250,12 +339,9 @@ func (s *PortManager) reserveSpecificPort(networks []tcpip.NetworkProtocolNumber d = make(deviceNode) m[addr] = d } - if n, ok := d[bindToDevice]; ok { - n.refs++ - d[bindToDevice] = n - } else { - d[bindToDevice] = portNode{reuse: reuse, refs: 1} - } + n := d[bindToDevice] + n.refs[flagBits]++ + d[bindToDevice] = n } return true @@ -263,10 +349,12 @@ func (s *PortManager) reserveSpecificPort(networks []tcpip.NetworkProtocolNumber // ReleasePort releases the reservation on a port/IP combination so that it can // be reserved by other endpoints. -func (s *PortManager) ReleasePort(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, bindToDevice tcpip.NICID) { +func (s *PortManager) ReleasePort(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID) { s.mu.Lock() defer s.mu.Unlock() + flagBits := flags.bits() + for _, network := range networks { desc := portDescriptor{network, transport, port} if m, ok := s.allocatedPorts[desc]; ok { @@ -278,9 +366,9 @@ func (s *PortManager) ReleasePort(networks []tcpip.NetworkProtocolNumber, transp if !ok { continue } - n.refs-- + n.refs[flagBits]-- d[bindToDevice] = n - if n.refs == 0 { + if n.refs == [nextFlag]int{} { delete(d, bindToDevice) } if len(d) == 0 { diff --git a/pkg/tcpip/ports/ports_test.go b/pkg/tcpip/ports/ports_test.go index 19f4833fc..d6969d050 100644 --- a/pkg/tcpip/ports/ports_test.go +++ b/pkg/tcpip/ports/ports_test.go @@ -33,7 +33,7 @@ type portReserveTestAction struct { port uint16 ip tcpip.Address want *tcpip.Error - reuse bool + flags Flags release bool device tcpip.NICID } @@ -50,7 +50,7 @@ func TestPortReservation(t *testing.T) { {port: 80, ip: fakeIPAddress1, want: nil}, /* N.B. Order of tests matters! */ {port: 80, ip: anyIPAddress, want: tcpip.ErrPortInUse}, - {port: 80, ip: fakeIPAddress, want: tcpip.ErrPortInUse, reuse: true}, + {port: 80, ip: fakeIPAddress, want: tcpip.ErrPortInUse, flags: Flags{LoadBalanced: true}}, }, }, { @@ -61,7 +61,7 @@ func TestPortReservation(t *testing.T) { /* release fakeIPAddress, but anyIPAddress is still inuse */ {port: 22, ip: fakeIPAddress, release: true}, {port: 22, ip: fakeIPAddress, want: tcpip.ErrPortInUse}, - {port: 22, ip: fakeIPAddress, want: tcpip.ErrPortInUse, reuse: true}, + {port: 22, ip: fakeIPAddress, want: tcpip.ErrPortInUse, flags: Flags{LoadBalanced: true}}, /* Release port 22 from any IP address, then try to reserve fake IP address on 22 */ {port: 22, ip: anyIPAddress, want: nil, release: true}, {port: 22, ip: fakeIPAddress, want: nil}, @@ -71,36 +71,36 @@ func TestPortReservation(t *testing.T) { actions: []portReserveTestAction{ {port: 00, ip: fakeIPAddress, want: nil}, {port: 00, ip: fakeIPAddress, want: nil}, - {port: 00, ip: fakeIPAddress, reuse: true, want: nil}, + {port: 00, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: nil}, }, }, { tname: "bind to ip with reuseport", actions: []portReserveTestAction{ - {port: 25, ip: fakeIPAddress, reuse: true, want: nil}, - {port: 25, ip: fakeIPAddress, reuse: true, want: nil}, + {port: 25, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: nil}, + {port: 25, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: nil}, - {port: 25, ip: fakeIPAddress, reuse: false, want: tcpip.ErrPortInUse}, - {port: 25, ip: anyIPAddress, reuse: false, want: tcpip.ErrPortInUse}, + {port: 25, ip: fakeIPAddress, flags: Flags{}, want: tcpip.ErrPortInUse}, + {port: 25, ip: anyIPAddress, flags: Flags{}, want: tcpip.ErrPortInUse}, - {port: 25, ip: anyIPAddress, reuse: true, want: nil}, + {port: 25, ip: anyIPAddress, flags: Flags{LoadBalanced: true}, want: nil}, }, }, { tname: "bind to inaddr any with reuseport", actions: []portReserveTestAction{ - {port: 24, ip: anyIPAddress, reuse: true, want: nil}, - {port: 24, ip: anyIPAddress, reuse: true, want: nil}, + {port: 24, ip: anyIPAddress, flags: Flags{LoadBalanced: true}, want: nil}, + {port: 24, ip: anyIPAddress, flags: Flags{LoadBalanced: true}, want: nil}, - {port: 24, ip: anyIPAddress, reuse: false, want: tcpip.ErrPortInUse}, - {port: 24, ip: fakeIPAddress, reuse: false, want: tcpip.ErrPortInUse}, + {port: 24, ip: anyIPAddress, flags: Flags{}, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, flags: Flags{}, want: tcpip.ErrPortInUse}, - {port: 24, ip: fakeIPAddress, reuse: true, want: nil}, - {port: 24, ip: fakeIPAddress, release: true, want: nil}, + {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: nil}, + {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, release: true, want: nil}, - {port: 24, ip: anyIPAddress, release: true}, - {port: 24, ip: anyIPAddress, reuse: false, want: tcpip.ErrPortInUse}, + {port: 24, ip: anyIPAddress, flags: Flags{LoadBalanced: true}, release: true}, + {port: 24, ip: anyIPAddress, flags: Flags{}, want: tcpip.ErrPortInUse}, - {port: 24, ip: anyIPAddress, release: true}, - {port: 24, ip: anyIPAddress, reuse: false, want: nil}, + {port: 24, ip: anyIPAddress, flags: Flags{LoadBalanced: true}, release: true}, + {port: 24, ip: anyIPAddress, flags: Flags{}, want: nil}, }, }, { tname: "bind twice with device fails", @@ -125,88 +125,152 @@ func TestPortReservation(t *testing.T) { actions: []portReserveTestAction{ {port: 24, ip: fakeIPAddress, want: nil}, {port: 24, ip: fakeIPAddress, device: 123, want: tcpip.ErrPortInUse}, - {port: 24, ip: fakeIPAddress, device: 123, reuse: true, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{LoadBalanced: true}, want: tcpip.ErrPortInUse}, {port: 24, ip: fakeIPAddress, want: tcpip.ErrPortInUse}, - {port: 24, ip: fakeIPAddress, reuse: true, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: tcpip.ErrPortInUse}, }, }, { tname: "bind with device", actions: []portReserveTestAction{ {port: 24, ip: fakeIPAddress, device: 123, want: nil}, {port: 24, ip: fakeIPAddress, device: 123, want: tcpip.ErrPortInUse}, - {port: 24, ip: fakeIPAddress, device: 123, reuse: true, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{LoadBalanced: true}, want: tcpip.ErrPortInUse}, {port: 24, ip: fakeIPAddress, device: 0, want: tcpip.ErrPortInUse}, - {port: 24, ip: fakeIPAddress, device: 0, reuse: true, want: tcpip.ErrPortInUse}, - {port: 24, ip: fakeIPAddress, device: 456, reuse: true, want: nil}, + {port: 24, ip: fakeIPAddress, device: 0, flags: Flags{LoadBalanced: true}, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 456, flags: Flags{LoadBalanced: true}, want: nil}, {port: 24, ip: fakeIPAddress, device: 789, want: nil}, {port: 24, ip: fakeIPAddress, want: tcpip.ErrPortInUse}, - {port: 24, ip: fakeIPAddress, reuse: true, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: tcpip.ErrPortInUse}, }, }, { - tname: "bind with reuse", + tname: "bind with reuseport", actions: []portReserveTestAction{ - {port: 24, ip: fakeIPAddress, reuse: true, want: nil}, + {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: nil}, {port: 24, ip: fakeIPAddress, device: 123, want: tcpip.ErrPortInUse}, - {port: 24, ip: fakeIPAddress, device: 123, reuse: true, want: nil}, + {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{LoadBalanced: true}, want: nil}, {port: 24, ip: fakeIPAddress, device: 0, want: tcpip.ErrPortInUse}, - {port: 24, ip: fakeIPAddress, device: 0, reuse: true, want: nil}, + {port: 24, ip: fakeIPAddress, device: 0, flags: Flags{LoadBalanced: true}, want: nil}, }, }, { - tname: "binding with reuse and device", + tname: "binding with reuseport and device", actions: []portReserveTestAction{ - {port: 24, ip: fakeIPAddress, device: 123, reuse: true, want: nil}, + {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{LoadBalanced: true}, want: nil}, {port: 24, ip: fakeIPAddress, device: 123, want: tcpip.ErrPortInUse}, - {port: 24, ip: fakeIPAddress, device: 123, reuse: true, want: nil}, + {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{LoadBalanced: true}, want: nil}, {port: 24, ip: fakeIPAddress, device: 0, want: tcpip.ErrPortInUse}, - {port: 24, ip: fakeIPAddress, device: 456, reuse: true, want: nil}, - {port: 24, ip: fakeIPAddress, device: 0, reuse: true, want: nil}, - {port: 24, ip: fakeIPAddress, device: 789, reuse: true, want: nil}, + {port: 24, ip: fakeIPAddress, device: 456, flags: Flags{LoadBalanced: true}, want: nil}, + {port: 24, ip: fakeIPAddress, device: 0, flags: Flags{LoadBalanced: true}, want: nil}, + {port: 24, ip: fakeIPAddress, device: 789, flags: Flags{LoadBalanced: true}, want: nil}, {port: 24, ip: fakeIPAddress, device: 999, want: tcpip.ErrPortInUse}, }, }, { - tname: "mixing reuse and not reuse by binding to device", + tname: "mixing reuseport and not reuseport by binding to device", actions: []portReserveTestAction{ - {port: 24, ip: fakeIPAddress, device: 123, reuse: true, want: nil}, + {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{LoadBalanced: true}, want: nil}, {port: 24, ip: fakeIPAddress, device: 456, want: nil}, - {port: 24, ip: fakeIPAddress, device: 789, reuse: true, want: nil}, + {port: 24, ip: fakeIPAddress, device: 789, flags: Flags{LoadBalanced: true}, want: nil}, {port: 24, ip: fakeIPAddress, device: 999, want: nil}, }, }, { - tname: "can't bind to 0 after mixing reuse and not reuse", + tname: "can't bind to 0 after mixing reuseport and not reuseport", actions: []portReserveTestAction{ - {port: 24, ip: fakeIPAddress, device: 123, reuse: true, want: nil}, + {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{LoadBalanced: true}, want: nil}, {port: 24, ip: fakeIPAddress, device: 456, want: nil}, - {port: 24, ip: fakeIPAddress, device: 0, reuse: true, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 0, flags: Flags{LoadBalanced: true}, want: tcpip.ErrPortInUse}, }, }, { tname: "bind and release", actions: []portReserveTestAction{ - {port: 24, ip: fakeIPAddress, device: 123, reuse: true, want: nil}, - {port: 24, ip: fakeIPAddress, device: 0, reuse: true, want: nil}, - {port: 24, ip: fakeIPAddress, device: 345, reuse: false, want: tcpip.ErrPortInUse}, - {port: 24, ip: fakeIPAddress, device: 789, reuse: true, want: nil}, + {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{LoadBalanced: true}, want: nil}, + {port: 24, ip: fakeIPAddress, device: 0, flags: Flags{LoadBalanced: true}, want: nil}, + {port: 24, ip: fakeIPAddress, device: 345, flags: Flags{}, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 789, flags: Flags{LoadBalanced: true}, want: nil}, // Release the bind to device 0 and try again. - {port: 24, ip: fakeIPAddress, device: 0, reuse: true, want: nil, release: true}, - {port: 24, ip: fakeIPAddress, device: 345, reuse: false, want: nil}, + {port: 24, ip: fakeIPAddress, device: 0, flags: Flags{LoadBalanced: true}, want: nil, release: true}, + {port: 24, ip: fakeIPAddress, device: 345, flags: Flags{}, want: nil}, }, }, { - tname: "bind twice with reuse once", + tname: "bind twice with reuseport once", actions: []portReserveTestAction{ - {port: 24, ip: fakeIPAddress, device: 123, reuse: false, want: nil}, - {port: 24, ip: fakeIPAddress, device: 0, reuse: true, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{}, want: nil}, + {port: 24, ip: fakeIPAddress, device: 0, flags: Flags{LoadBalanced: true}, want: tcpip.ErrPortInUse}, }, }, { tname: "release an unreserved device", actions: []portReserveTestAction{ - {port: 24, ip: fakeIPAddress, device: 123, reuse: false, want: nil}, - {port: 24, ip: fakeIPAddress, device: 456, reuse: false, want: nil}, + {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{}, want: nil}, + {port: 24, ip: fakeIPAddress, device: 456, flags: Flags{}, want: nil}, // The below don't exist. - {port: 24, ip: fakeIPAddress, device: 345, reuse: false, want: nil, release: true}, - {port: 9999, ip: fakeIPAddress, device: 123, reuse: false, want: nil, release: true}, + {port: 24, ip: fakeIPAddress, device: 345, flags: Flags{}, want: nil, release: true}, + {port: 9999, ip: fakeIPAddress, device: 123, flags: Flags{}, want: nil, release: true}, // Release all. - {port: 24, ip: fakeIPAddress, device: 123, reuse: false, want: nil, release: true}, - {port: 24, ip: fakeIPAddress, device: 456, reuse: false, want: nil, release: true}, + {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{}, want: nil, release: true}, + {port: 24, ip: fakeIPAddress, device: 456, flags: Flags{}, want: nil, release: true}, + }, + }, { + tname: "bind with reuseaddr", + actions: []portReserveTestAction{ + {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true}, want: nil}, + {port: 24, ip: fakeIPAddress, device: 123, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{MostRecent: true}, want: nil}, + {port: 24, ip: fakeIPAddress, device: 0, want: tcpip.ErrPortInUse}, + {port: 24, ip: fakeIPAddress, device: 0, flags: Flags{MostRecent: true}, want: nil}, + }, + }, { + tname: "bind twice with reuseaddr once", + actions: []portReserveTestAction{ + {port: 24, ip: fakeIPAddress, device: 123, flags: Flags{}, want: nil}, + {port: 24, ip: fakeIPAddress, device: 0, flags: Flags{MostRecent: true}, want: tcpip.ErrPortInUse}, + }, + }, { + tname: "bind with reuseaddr and reuseport", + actions: []portReserveTestAction{ + {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true, LoadBalanced: true}, want: nil}, + {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true, LoadBalanced: true}, want: nil}, + {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true, LoadBalanced: true}, want: nil}, + }, + }, { + tname: "bind with reuseaddr and reuseport, and then reuseaddr", + actions: []portReserveTestAction{ + {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true, LoadBalanced: true}, want: nil}, + {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true}, want: nil}, + {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: tcpip.ErrPortInUse}, + }, + }, { + tname: "bind with reuseaddr and reuseport, and then reuseport", + actions: []portReserveTestAction{ + {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true, LoadBalanced: true}, want: nil}, + {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: nil}, + {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true}, want: tcpip.ErrPortInUse}, + }, + }, { + tname: "bind with reuseaddr and reuseport twice, and then reuseaddr", + actions: []portReserveTestAction{ + {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true, LoadBalanced: true}, want: nil}, + {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true, LoadBalanced: true}, want: nil}, + {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true}, want: nil}, + }, + }, { + tname: "bind with reuseaddr and reuseport twice, and then reuseport", + actions: []portReserveTestAction{ + {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true, LoadBalanced: true}, want: nil}, + {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true, LoadBalanced: true}, want: nil}, + {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: nil}, + }, + }, { + tname: "bind with reuseaddr, and then reuseaddr and reuseport", + actions: []portReserveTestAction{ + {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true}, want: nil}, + {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true, LoadBalanced: true}, want: nil}, + {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: tcpip.ErrPortInUse}, + }, + }, { + tname: "bind with reuseport, and then reuseaddr and reuseport", + actions: []portReserveTestAction{ + {port: 24, ip: fakeIPAddress, flags: Flags{LoadBalanced: true}, want: nil}, + {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true, LoadBalanced: true}, want: nil}, + {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true}, want: tcpip.ErrPortInUse}, }, }, } { @@ -216,12 +280,12 @@ func TestPortReservation(t *testing.T) { for _, test := range test.actions { if test.release { - pm.ReleasePort(net, fakeTransNumber, test.ip, test.port, test.device) + pm.ReleasePort(net, fakeTransNumber, test.ip, test.port, test.flags, test.device) continue } - gotPort, err := pm.ReservePort(net, fakeTransNumber, test.ip, test.port, test.reuse, test.device) + gotPort, err := pm.ReservePort(net, fakeTransNumber, test.ip, test.port, test.flags, test.device) if err != test.want { - t.Fatalf("ReservePort(.., .., %s, %d, %t, %d) = %v, want %v", test.ip, test.port, test.reuse, test.device, err, test.want) + t.Fatalf("ReservePort(.., .., %s, %d, %+v, %d) = %v, want %v", test.ip, test.port, test.flags, test.device, err, test.want) } if test.port == 0 && (gotPort == 0 || gotPort < FirstEphemeral) { t.Fatalf("ReservePort(.., .., .., 0) = %d, want port number >= %d to be picked", gotPort, FirstEphemeral) diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD index dd1728f9c..455a1c098 100644 --- a/pkg/tcpip/transport/tcp/BUILD +++ b/pkg/tcpip/transport/tcp/BUILD @@ -52,6 +52,7 @@ go_library( "//pkg/tcpip/hash/jenkins", "//pkg/tcpip/header", "//pkg/tcpip/iptables", + "//pkg/tcpip/ports", "//pkg/tcpip/seqnum", "//pkg/tcpip/stack", "//pkg/tcpip/transport/raw", diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 9d4a87e30..4861ab513 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -30,6 +30,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/hash/jenkins" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/iptables" + "gvisor.dev/gvisor/pkg/tcpip/ports" "gvisor.dev/gvisor/pkg/tcpip/seqnum" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/tmutex" @@ -343,6 +344,7 @@ type endpoint struct { // Values used to reserve a port or register a transport endpoint // (which ever happens first). boundBindToDevice tcpip.NICID + boundPortFlags ports.Flags // effectiveNetProtos contains the network protocols actually in use. In // most cases it will only contain "netProto", but in cases like IPv6 @@ -737,9 +739,10 @@ func (e *endpoint) Close() { e.isRegistered = false } - e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, e.boundBindToDevice) + e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, e.boundPortFlags, e.boundBindToDevice) e.isPortReserved = false e.boundBindToDevice = 0 + e.boundPortFlags = ports.Flags{} } // Mark endpoint as closed. @@ -800,10 +803,11 @@ func (e *endpoint) cleanupLocked() { } if e.isPortReserved { - e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, e.boundBindToDevice) + e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, e.boundPortFlags, e.boundBindToDevice) e.isPortReserved = false } e.boundBindToDevice = 0 + e.boundPortFlags = ports.Flags{} e.route.Release() e.stack.CompleteTransportEndpointCleanup(e) @@ -1775,7 +1779,7 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc } // reusePort is false below because connect cannot reuse a port even if // reusePort was set. - if !e.stack.IsPortAvailable(netProtos, ProtocolNumber, e.ID.LocalAddress, p, false /* reusePort */, e.bindToDevice) { + if !e.stack.IsPortAvailable(netProtos, ProtocolNumber, e.ID.LocalAddress, p, ports.Flags{LoadBalanced: false}, e.bindToDevice) { return false, nil } @@ -1802,7 +1806,7 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc // before Connect: in such a case we don't want to hold on to // reservations anymore. if e.isPortReserved { - e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, origID.LocalAddress, origID.LocalPort, e.boundBindToDevice) + e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, origID.LocalAddress, origID.LocalPort, e.boundPortFlags, e.boundBindToDevice) e.isPortReserved = false } @@ -2034,28 +2038,33 @@ func (e *endpoint) Bind(addr tcpip.FullAddress) (err *tcpip.Error) { } } - port, err := e.stack.ReservePort(netProtos, ProtocolNumber, addr.Addr, addr.Port, e.reusePort, e.bindToDevice) + flags := ports.Flags{ + LoadBalanced: e.reusePort, + } + port, err := e.stack.ReservePort(netProtos, ProtocolNumber, addr.Addr, addr.Port, flags, e.bindToDevice) if err != nil { return err } e.boundBindToDevice = e.bindToDevice + e.boundPortFlags = flags e.isPortReserved = true e.effectiveNetProtos = netProtos e.ID.LocalPort = port // Any failures beyond this point must remove the port registration. - defer func(bindToDevice tcpip.NICID) { + defer func(portFlags ports.Flags, bindToDevice tcpip.NICID) { if err != nil { - e.stack.ReleasePort(netProtos, ProtocolNumber, addr.Addr, port, bindToDevice) + e.stack.ReleasePort(netProtos, ProtocolNumber, addr.Addr, port, portFlags, bindToDevice) e.isPortReserved = false e.effectiveNetProtos = nil e.ID.LocalPort = 0 e.ID.LocalAddress = "" e.boundNICID = 0 e.boundBindToDevice = 0 + e.boundPortFlags = ports.Flags{} } - }(e.boundBindToDevice) + }(e.boundPortFlags, e.boundBindToDevice) // If an address is specified, we must ensure that it's one of our // local addresses. diff --git a/pkg/tcpip/transport/udp/BUILD b/pkg/tcpip/transport/udp/BUILD index 8d4c3808f..97e4d5825 100644 --- a/pkg/tcpip/transport/udp/BUILD +++ b/pkg/tcpip/transport/udp/BUILD @@ -34,6 +34,7 @@ go_library( "//pkg/tcpip/buffer", "//pkg/tcpip/header", "//pkg/tcpip/iptables", + "//pkg/tcpip/ports", "//pkg/tcpip/stack", "//pkg/tcpip/transport/raw", "//pkg/waiter", diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 4b161e404..1ac4705af 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/iptables" + "gvisor.dev/gvisor/pkg/tcpip/ports" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/waiter" ) @@ -107,6 +108,7 @@ type endpoint struct { // Values used to reserve a port or register a transport endpoint. // (which ever happens first). boundBindToDevice tcpip.NICID + boundPortFlags ports.Flags // sendTOS represents IPv4 TOS or IPv6 TrafficClass, // applied while sending packets. Defaults to 0 as on Linux. @@ -180,8 +182,9 @@ func (e *endpoint) Close() { switch e.state { case StateBound, StateConnected: e.stack.UnregisterTransportEndpoint(e.RegisterNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, e.boundBindToDevice) - e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, e.boundBindToDevice) + e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, e.boundPortFlags, e.boundBindToDevice) e.boundBindToDevice = 0 + e.boundPortFlags = ports.Flags{} } for _, mem := range e.multicastMemberships { @@ -895,7 +898,8 @@ func (e *endpoint) Disconnect() *tcpip.Error { } else { if e.ID.LocalPort != 0 { // Release the ephemeral port. - e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, e.boundBindToDevice) + e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, e.boundPortFlags, e.boundBindToDevice) + e.boundPortFlags = ports.Flags{} } e.state = StateInitial } @@ -1042,16 +1046,23 @@ func (*endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) { func (e *endpoint) registerWithStack(nicID tcpip.NICID, netProtos []tcpip.NetworkProtocolNumber, id stack.TransportEndpointID) (stack.TransportEndpointID, tcpip.NICID, *tcpip.Error) { if e.ID.LocalPort == 0 { - port, err := e.stack.ReservePort(netProtos, ProtocolNumber, id.LocalAddress, id.LocalPort, e.reusePort, e.bindToDevice) + flags := ports.Flags{ + LoadBalanced: e.reusePort, + // FIXME(b/129164367): Support SO_REUSEADDR. + MostRecent: false, + } + port, err := e.stack.ReservePort(netProtos, ProtocolNumber, id.LocalAddress, id.LocalPort, flags, e.bindToDevice) if err != nil { return id, e.bindToDevice, err } + e.boundPortFlags = flags id.LocalPort = port } err := e.stack.RegisterTransportEndpoint(nicID, netProtos, ProtocolNumber, id, e, e.reusePort, e.bindToDevice) if err != nil { - e.stack.ReleasePort(netProtos, ProtocolNumber, id.LocalAddress, id.LocalPort, e.bindToDevice) + e.stack.ReleasePort(netProtos, ProtocolNumber, id.LocalAddress, id.LocalPort, e.boundPortFlags, e.bindToDevice) + e.boundPortFlags = ports.Flags{} } return id, e.bindToDevice, err } diff --git a/test/syscalls/linux/socket_bind_to_device_sequence.cc b/test/syscalls/linux/socket_bind_to_device_sequence.cc index e4641c62e..033fd80a5 100644 --- a/test/syscalls/linux/socket_bind_to_device_sequence.cc +++ b/test/syscalls/linux/socket_bind_to_device_sequence.cc @@ -97,12 +97,12 @@ class BindToDeviceSequenceTest : public ::testing::TestWithParam { sockets_to_close_.erase(socket_id); } - // Bind a socket with the reuse option and bind_to_device options. Checks + // Bind a socket with the reuse options and bind_to_device options. Checks // that all steps succeed and that the bind command's error matches want. // Sets the socket_id to uniquely identify the socket bound if it is not // nullptr. - void BindSocket(bool reuse, int device_id = 0, int want = 0, - int *socket_id = nullptr) { + void BindSocket(bool reuse_port, bool reuse_addr, int device_id = 0, + int want = 0, int *socket_id = nullptr) { next_socket_id_++; sockets_to_close_[next_socket_id_] = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto socket_fd = sockets_to_close_[next_socket_id_]->get(); @@ -110,13 +110,20 @@ class BindToDeviceSequenceTest : public ::testing::TestWithParam { *socket_id = next_socket_id_; } - // If reuse is indicated, do that. - if (reuse) { + // If reuse_port is indicated, do that. + if (reuse_port) { EXPECT_THAT(setsockopt(socket_fd, SOL_SOCKET, SO_REUSEPORT, &kSockOptOn, sizeof(kSockOptOn)), SyscallSucceedsWithValue(0)); } + // If reuse_addr is indicated, do that. + if (reuse_addr) { + EXPECT_THAT(setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceedsWithValue(0)); + } + // If the device is non-zero, bind to that device. if (device_id != 0) { string device_name; @@ -182,129 +189,289 @@ class BindToDeviceSequenceTest : public ::testing::TestWithParam { }; TEST_P(BindToDeviceSequenceTest, BindTwiceWithDeviceFails) { - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 3)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 3, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ false, /* reuse_addr */ false, /* bind_to_device */ 3)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 3, EADDRINUSE)); } TEST_P(BindToDeviceSequenceTest, BindToDevice) { - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 1)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 2)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ false, /* reuse_addr */ false, /* bind_to_device */ 1)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ false, /* reuse_addr */ false, /* bind_to_device */ 2)); } TEST_P(BindToDeviceSequenceTest, BindToDeviceAndThenWithoutDevice) { - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 123)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 0, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 123)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); } TEST_P(BindToDeviceSequenceTest, BindWithoutDevice) { - ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse */ false)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 123, EADDRINUSE)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ true, /* bind_to_device */ 123, EADDRINUSE)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 0, EADDRINUSE)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ true, /* bind_to_device */ 0, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 123, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 123, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); } TEST_P(BindToDeviceSequenceTest, BindWithDevice) { - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 123, 0)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 123, EADDRINUSE)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ true, /* bind_to_device */ 123, EADDRINUSE)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 0, EADDRINUSE)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ true, /* bind_to_device */ 0, EADDRINUSE)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ true, /* bind_to_device */ 456, 0)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 789, 0)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 0, EADDRINUSE)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ true, /* bind_to_device */ 0, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 123, 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 123, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 123, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 456, 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 789, 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); } TEST_P(BindToDeviceSequenceTest, BindWithReuse) { - ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse */ true)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 123, EADDRINUSE)); ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ true, /* bind_to_device */ 123)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 0, EADDRINUSE)); - ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse */ true, /* bind_to_device */ 0)); + BindSocket(/* reusePort */ true, /* reuse_addr */ false)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 123, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ false, + /* bind_to_device */ 123)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 0)); } TEST_P(BindToDeviceSequenceTest, BindingWithReuseAndDevice) { - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ true, /* bind_to_device */ 123)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 123, EADDRINUSE)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ true, /* bind_to_device */ 123)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 0, EADDRINUSE)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ true, /* bind_to_device */ 456)); - ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse */ true)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ true, /* bind_to_device */ 789)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 999, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 123)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 123, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 123)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 456)); + ASSERT_NO_FATAL_FAILURE( + BindSocket(/* reuse_port */ true, /* reuse_addr */ false)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 789)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 999, EADDRINUSE)); } TEST_P(BindToDeviceSequenceTest, MixingReuseAndNotReuseByBindingToDevice) { - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ true, /* bind_to_device */ 123, 0)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 456, 0)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ true, /* bind_to_device */ 789, 0)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 999, 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 123, 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 456, 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 789, 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 999, 0)); } TEST_P(BindToDeviceSequenceTest, CannotBindTo0AfterMixingReuseAndNotReuse) { - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ true, /* bind_to_device */ 123)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 456)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ true, /* bind_to_device */ 0, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 123)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 456)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); } TEST_P(BindToDeviceSequenceTest, BindAndRelease) { - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ true, /* bind_to_device */ 123)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 123)); int to_release; - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ true, /* bind_to_device */ 0, 0, &to_release)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 345, EADDRINUSE)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ true, /* bind_to_device */ 789)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0, 0, &to_release)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 345, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 789)); // Release the bind to device 0 and try again. ASSERT_NO_FATAL_FAILURE(ReleaseSocket(to_release)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 345)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 345)); } TEST_P(BindToDeviceSequenceTest, BindTwiceWithReuseOnce) { + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 123)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); +} + +TEST_P(BindToDeviceSequenceTest, BindWithReuseAddr) { + // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. + SKIP_IF(IsRunningOnGvisor()); + ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ false, /* bind_to_device */ 123)); - ASSERT_NO_FATAL_FAILURE( - BindSocket(/* reuse */ true, /* bind_to_device */ 0, EADDRINUSE)); + BindSocket(/* reusePort */ false, /* reuse_addr */ true)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 123, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ false, /* reuse_addr */ true, /* bind_to_device */ 123)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ false, /* reuse_addr */ true, /* bind_to_device */ 0)); +} + +TEST_P(BindToDeviceSequenceTest, + CannotBindTo0AfterMixingReuseAddrAndNotReuseAddr) { + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 123)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 456)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ true, + /* bind_to_device */ 0, EADDRINUSE)); +} + +TEST_P(BindToDeviceSequenceTest, BindReuseAddrReusePortThenReusePort) { + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ true, + /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ true, + /* bind_to_device */ 0, EADDRINUSE)); +} + +TEST_P(BindToDeviceSequenceTest, BindReuseAddrReusePortThenReuseAddr) { + // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. + SKIP_IF(IsRunningOnGvisor()); + + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ true, + /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ true, + /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); +} + +TEST_P(BindToDeviceSequenceTest, BindDoubleReuseAddrReusePortThenReusePort) { + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ true, /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ true, + /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ true, + /* bind_to_device */ 0, EADDRINUSE)); +} + +TEST_P(BindToDeviceSequenceTest, BindDoubleReuseAddrReusePortThenReuseAddr) { + // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. + SKIP_IF(IsRunningOnGvisor()); + + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ true, /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ true, + /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ true, + /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); +} + +TEST_P(BindToDeviceSequenceTest, BindReusePortThenReuseAddrReusePort) { + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ true, /* reuse_addr */ false, /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ true, + /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ true, + /* bind_to_device */ 0, EADDRINUSE)); +} + +TEST_P(BindToDeviceSequenceTest, BindReuseAddrThenReuseAddr) { + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ false, /* reuse_addr */ true, /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0, EADDRINUSE)); +} + +// This behavior seems like a bug? +TEST_P(BindToDeviceSequenceTest, + BindReuseAddrThenReuseAddrReusePortThenReuseAddr) { + // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. + SKIP_IF(IsRunningOnGvisor()); + + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ false, /* reuse_addr */ true, /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ true, + /* bind_to_device */ 0)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, + /* reuse_addr */ false, + /* bind_to_device */ 0)); } INSTANTIATE_TEST_SUITE_P(BindToDeviceTest, BindToDeviceSequenceTest, -- cgit v1.2.3 From 98aafb1334b816596b462ad12fa3e96784703061 Mon Sep 17 00:00:00 2001 From: Ian Gudger Date: Mon, 9 Dec 2019 20:07:14 -0800 Subject: Add test for SO_BINDTODEVICE state bug. This was accidentally dropped from the change which fixed the bug. Updates #1217 PiperOrigin-RevId: 284689362 --- .../linux/socket_bind_to_device_sequence.cc | 29 ++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/socket_bind_to_device_sequence.cc b/test/syscalls/linux/socket_bind_to_device_sequence.cc index 033fd80a5..34b1058a9 100644 --- a/test/syscalls/linux/socket_bind_to_device_sequence.cc +++ b/test/syscalls/linux/socket_bind_to_device_sequence.cc @@ -97,6 +97,16 @@ class BindToDeviceSequenceTest : public ::testing::TestWithParam { sockets_to_close_.erase(socket_id); } + // SetDevice changes the bind_to_device option. It does not bind or re-bind. + void SetDevice(int socket_id, int device_id) { + auto socket_fd = sockets_to_close_[socket_id]->get(); + string device_name; + ASSERT_NO_FATAL_FAILURE(GetDevice(device_id, &device_name)); + EXPECT_THAT(setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, + device_name.c_str(), device_name.size() + 1), + SyscallSucceedsWithValue(0)); + } + // Bind a socket with the reuse options and bind_to_device options. Checks // that all steps succeed and that the bind command's error matches want. // Sets the socket_id to uniquely identify the socket bound if it is not @@ -474,6 +484,25 @@ TEST_P(BindToDeviceSequenceTest, /* bind_to_device */ 0)); } +// Repro test for gvisor.dev/issue/1217. Not replicated in ports_test.go as this +// test is different from the others and wouldn't fit well there. +TEST_P(BindToDeviceSequenceTest, BindAndReleaseDifferentDevice) { + int to_release; + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 3, 0, &to_release)); + ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, + /* reuse_addr */ false, + /* bind_to_device */ 3, EADDRINUSE)); + // Change the device. Since the socket was already bound, this should have no + // effect. + SetDevice(to_release, 2); + // Release the bind to device 3 and try again. + ASSERT_NO_FATAL_FAILURE(ReleaseSocket(to_release)); + ASSERT_NO_FATAL_FAILURE(BindSocket( + /* reuse_port */ false, /* reuse_addr */ false, /* bind_to_device */ 3)); +} + INSTANTIATE_TEST_SUITE_P(BindToDeviceTest, BindToDeviceSequenceTest, ::testing::Values(IPv4UDPUnboundSocket(0), IPv4TCPUnboundSocket(0))); -- cgit v1.2.3 From 4a19ebd431659578c9af0a91ff35d8b6d9de190e Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Tue, 10 Dec 2019 09:32:47 -0800 Subject: Add hostinet tests for sendmsg and recvmsg with TOS/TCLASS. PiperOrigin-RevId: 284786069 --- test/syscalls/linux/udp_socket_test_cases.cc | 149 +++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc index 4556f16d6..dc35c2f50 100644 --- a/test/syscalls/linux/udp_socket_test_cases.cc +++ b/test/syscalls/linux/udp_socket_test_cases.cc @@ -1345,5 +1345,154 @@ TEST_P(UdpSocketTest, TimestampIoctlPersistence) { ASSERT_EQ(tv.tv_usec, tv2.tv_usec); } +// Test that a socket with IP_TOS or IPV6_TCLASS set will set the TOS byte on +// outgoing packets, and that a receiving socket with IP_RECVTOS or +// IPV6_RECVTCLASS will create the corresponding control message. +TEST_P(UdpSocketTest, SetAndReceiveTOS) { + // TODO(b/68320120): IP_RECVTOS/IPV6_RECVTCLASS not supported for netstack. + SKIP_IF(IsRunningOnGvisor() && !IsRunningWithHostinet()); + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); + + // Allow socket to receive control message. + int recv_level = SOL_IP; + int recv_type = IP_RECVTOS; + if (GetParam() != AddressFamily::kIpv4) { + recv_level = SOL_IPV6; + recv_type = IPV6_RECVTCLASS; + } + ASSERT_THAT( + setsockopt(s_, recv_level, recv_type, &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Set socket TOS. + int sent_level = recv_level; + int sent_type = IP_TOS; + if (sent_level == SOL_IPV6) { + sent_type = IPV6_TCLASS; + } + int sent_tos = IPTOS_LOWDELAY; // Choose some TOS value. + ASSERT_THAT( + setsockopt(t_, sent_level, sent_type, &sent_tos, sizeof(sent_tos)), + SyscallSucceeds()); + + // Prepare message to send. + constexpr size_t kDataLength = 1024; + struct msghdr sent_msg = {}; + struct iovec sent_iov = {}; + char sent_data[kDataLength]; + sent_iov.iov_base = &sent_data[0]; + sent_iov.iov_len = kDataLength; + sent_msg.msg_iov = &sent_iov; + sent_msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(sendmsg)(t_, &sent_msg, 0), + SyscallSucceedsWithValue(kDataLength)); + + // Receive message. + struct msghdr received_msg = {}; + struct iovec received_iov = {}; + char received_data[kDataLength]; + received_iov.iov_base = &received_data[0]; + received_iov.iov_len = kDataLength; + received_msg.msg_iov = &received_iov; + received_msg.msg_iovlen = 1; + size_t cmsg_data_len = sizeof(int8_t); + if (sent_type == IPV6_TCLASS) { + cmsg_data_len = sizeof(int); + } + std::vector received_cmsgbuf(CMSG_SPACE(cmsg_data_len)); + received_msg.msg_control = &received_cmsgbuf[0]; + received_msg.msg_controllen = received_cmsgbuf.size(); + ASSERT_THAT(RetryEINTR(recvmsg)(s_, &received_msg, 0), + SyscallSucceedsWithValue(kDataLength)); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&received_msg); + ASSERT_NE(cmsg, nullptr); + EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(cmsg_data_len)); + EXPECT_EQ(cmsg->cmsg_level, sent_level); + EXPECT_EQ(cmsg->cmsg_type, sent_type); + int8_t received_tos = 0; + memcpy(&received_tos, CMSG_DATA(cmsg), sizeof(received_tos)); + EXPECT_EQ(received_tos, sent_tos); +} + +// Test that sendmsg with IP_TOS and IPV6_TCLASS control messages will set the +// TOS byte on outgoing packets, and that a receiving socket with IP_RECVTOS or +// IPV6_RECVTCLASS will create the corresponding control message. +TEST_P(UdpSocketTest, SendAndReceiveTOS) { + // TODO(b/68320120): IP_RECVTOS/IPV6_RECVTCLASS not supported for netstack. + SKIP_IF(IsRunningOnGvisor() && !IsRunningWithHostinet()); + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); + + // Allow socket to receive control message. + int recv_level = SOL_IP; + int recv_type = IP_RECVTOS; + if (GetParam() != AddressFamily::kIpv4) { + recv_level = SOL_IPV6; + recv_type = IPV6_RECVTCLASS; + } + int recv_opt = kSockOptOn; + ASSERT_THAT( + setsockopt(s_, recv_level, recv_type, &recv_opt, sizeof(recv_opt)), + SyscallSucceeds()); + + // Prepare message to send. + constexpr size_t kDataLength = 1024; + int sent_level = recv_level; + int sent_type = IP_TOS; + int sent_tos = IPTOS_LOWDELAY; // Choose some TOS value. + + struct msghdr sent_msg = {}; + struct iovec sent_iov = {}; + char sent_data[kDataLength]; + sent_iov.iov_base = &sent_data[0]; + sent_iov.iov_len = kDataLength; + sent_msg.msg_iov = &sent_iov; + sent_msg.msg_iovlen = 1; + size_t cmsg_data_len = sizeof(int8_t); + if (sent_level == SOL_IPV6) { + sent_type = IPV6_TCLASS; + cmsg_data_len = sizeof(int); + } + std::vector sent_cmsgbuf(CMSG_SPACE(cmsg_data_len)); + sent_msg.msg_control = &sent_cmsgbuf[0]; + sent_msg.msg_controllen = CMSG_LEN(cmsg_data_len); + + // Manually add control message. + struct cmsghdr* sent_cmsg = CMSG_FIRSTHDR(&sent_msg); + sent_cmsg->cmsg_len = CMSG_LEN(cmsg_data_len); + sent_cmsg->cmsg_level = sent_level; + sent_cmsg->cmsg_type = sent_type; + *(int8_t*)CMSG_DATA(sent_cmsg) = sent_tos; + + ASSERT_THAT(RetryEINTR(sendmsg)(t_, &sent_msg, 0), + SyscallSucceedsWithValue(kDataLength)); + + // Receive message. + struct msghdr received_msg = {}; + struct iovec received_iov = {}; + char received_data[kDataLength]; + received_iov.iov_base = &received_data[0]; + received_iov.iov_len = kDataLength; + received_msg.msg_iov = &received_iov; + received_msg.msg_iovlen = 1; + std::vector received_cmsgbuf(CMSG_SPACE(cmsg_data_len)); + received_msg.msg_control = &received_cmsgbuf[0]; + received_msg.msg_controllen = CMSG_LEN(cmsg_data_len); + ASSERT_THAT(RetryEINTR(recvmsg)(s_, &received_msg, 0), + SyscallSucceedsWithValue(kDataLength)); + + struct cmsghdr* cmsg = CMSG_FIRSTHDR(&received_msg); + ASSERT_NE(cmsg, nullptr); + EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(cmsg_data_len)); + EXPECT_EQ(cmsg->cmsg_level, sent_level); + EXPECT_EQ(cmsg->cmsg_type, sent_type); + int8_t received_tos = 0; + memcpy(&received_tos, CMSG_DATA(cmsg), sizeof(received_tos)); + EXPECT_EQ(received_tos, sent_tos); +} + } // namespace testing } // namespace gvisor -- cgit v1.2.3 From aadbf322c63b0aa1d34cd9755dc1266af2e5ac58 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Tue, 10 Dec 2019 09:36:52 -0800 Subject: Disable execveat test that is causing files in /bin to be deleted. Disable until gvisor.dev/issue/1366 is resolved. Updates #1366 PiperOrigin-RevId: 284786895 --- test/syscalls/linux/exec.cc | 9 --------- 1 file changed, 9 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/exec.cc b/test/syscalls/linux/exec.cc index b5e0a512b..e402d5b27 100644 --- a/test/syscalls/linux/exec.cc +++ b/test/syscalls/linux/exec.cc @@ -696,15 +696,6 @@ TEST(ExecveatTest, SymlinkNoFollowAndEmptyPath) { ArgEnvExitStatus(0, 0), absl::StrCat(path, "\n")); } -TEST(ExecveatTest, SymlinkNoFollowIgnoreSymlinkAncestor) { - TempPath parent_link = - ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateSymlinkTo("/tmp", "/bin")); - std::string path_with_symlink = JoinPath(parent_link.path(), "echo"); - - CheckExecveat(AT_FDCWD, path_with_symlink, {path_with_symlink}, {}, - AT_SYMLINK_NOFOLLOW, ArgEnvExitStatus(0, 0), ""); -} - TEST(ExecveatTest, SymlinkNoFollowWithNormalFile) { const FileDescriptor dirfd = ASSERT_NO_ERRNO_AND_VALUE(Open("/bin", O_DIRECTORY)); -- cgit v1.2.3 From f47eaffd5c59445b8cafda1b7a51e7f4be5d254a Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Tue, 10 Dec 2019 10:55:08 -0800 Subject: Do not consider symlinks as directories in fs utils. IsDirectory() is used in RecursivelyDelete(), which should not follow symlinks. The only other use (syscalls/linux/rename.cc) is not affected by this change. Updates #1366. PiperOrigin-RevId: 284803968 --- test/util/fs_util.cc | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/test/util/fs_util.cc b/test/util/fs_util.cc index 88b1e7911..042cec94a 100644 --- a/test/util/fs_util.cc +++ b/test/util/fs_util.cc @@ -105,6 +105,15 @@ PosixErrorOr Stat(absl::string_view path) { return stat_buf; } +PosixErrorOr Lstat(absl::string_view path) { + struct stat stat_buf; + int res = lstat(std::string(path).c_str(), &stat_buf); + if (res < 0) { + return PosixError(errno, absl::StrCat("lstat ", path)); + } + return stat_buf; +} + PosixErrorOr Fstat(int fd) { struct stat stat_buf; int res = fstat(fd, &stat_buf); @@ -127,7 +136,7 @@ PosixErrorOr Exists(absl::string_view path) { } PosixErrorOr IsDirectory(absl::string_view path) { - ASSIGN_OR_RETURN_ERRNO(struct stat stat_buf, Stat(path)); + ASSIGN_OR_RETURN_ERRNO(struct stat stat_buf, Lstat(path)); if (S_ISDIR(stat_buf.st_mode)) { return true; } -- cgit v1.2.3 From 769e1cdcbe539ca2347ad5ccd2706ae17777aed9 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Tue, 10 Dec 2019 11:40:29 -0800 Subject: Re-enable execveat test that was causing files in /bin to be deleted. Test now no longer deletes files incorrectly, due to a fix in fs utils used by TempPath (github.com/google/gvisor/pull/1368). Fixes #1366 PiperOrigin-RevId: 284814605 --- test/syscalls/linux/exec.cc | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/exec.cc b/test/syscalls/linux/exec.cc index e402d5b27..b5e0a512b 100644 --- a/test/syscalls/linux/exec.cc +++ b/test/syscalls/linux/exec.cc @@ -696,6 +696,15 @@ TEST(ExecveatTest, SymlinkNoFollowAndEmptyPath) { ArgEnvExitStatus(0, 0), absl::StrCat(path, "\n")); } +TEST(ExecveatTest, SymlinkNoFollowIgnoreSymlinkAncestor) { + TempPath parent_link = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateSymlinkTo("/tmp", "/bin")); + std::string path_with_symlink = JoinPath(parent_link.path(), "echo"); + + CheckExecveat(AT_FDCWD, path_with_symlink, {path_with_symlink}, {}, + AT_SYMLINK_NOFOLLOW, ArgEnvExitStatus(0, 0), ""); +} + TEST(ExecveatTest, SymlinkNoFollowWithNormalFile) { const FileDescriptor dirfd = ASSERT_NO_ERRNO_AND_VALUE(Open("/bin", O_DIRECTORY)); -- cgit v1.2.3 From 1601e78a52e9181d1ea8a3ff36399575e95ad0bf Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Wed, 11 Dec 2019 16:39:58 -0800 Subject: Add syscall tests for getxattr and setxattr. Support for getxattr and setxattr are in subsequent commits. PiperOrigin-RevId: 285088817 --- test/syscalls/BUILD | 5 + test/syscalls/linux/BUILD | 21 ++ test/syscalls/linux/xattr.cc | 491 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 517 insertions(+) create mode 100644 test/syscalls/linux/xattr.cc (limited to 'test') diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 829693e8e..a3a85917d 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -717,6 +717,11 @@ syscall_test(test = "//test/syscalls/linux:proc_net_tcp_test") syscall_test(test = "//test/syscalls/linux:proc_net_udp_test") +syscall_test( + add_overlay = True, + test = "//test/syscalls/linux:xattr_test", +) + go_binary( name = "syscall_test_runner", testonly = 1, diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 6ea922fb4..0bbaaf28a 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -3722,3 +3722,24 @@ cc_binary( "@com_google_googletest//:gtest", ], ) + +cc_binary( + name = "xattr_test", + testonly = 1, + srcs = [ + "file_base.h", + "xattr.cc", + ], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:posix_error", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", + ], +) diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc new file mode 100644 index 000000000..3e07b634b --- /dev/null +++ b/test/syscalls/linux/xattr.cc @@ -0,0 +1,491 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/syscalls/linux/file_base.h" +#include "test/util/capability_util.h" +#include "test/util/posix_error.h" +#include "test/util/temp_path.h" + +namespace gvisor { +namespace testing { + +namespace { + +class XattrTest : public FileTest {}; + +TEST_F(XattrTest, XattrNullName) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + + EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, /*flags=*/0), + SyscallFailsWithErrno(EFAULT)); + EXPECT_THAT(getxattr(path, nullptr, nullptr, 0), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(XattrTest, XattrEmptyName) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + + EXPECT_THAT(setxattr(path, "", nullptr, 0, /*flags=*/0), + SyscallFailsWithErrno(ERANGE)); + EXPECT_THAT(getxattr(path, "", nullptr, 0), SyscallFailsWithErrno(ERANGE)); +} + +TEST_F(XattrTest, XattrLargeName) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + std::string name = "user."; + name += std::string(XATTR_NAME_MAX - name.length(), 'a'); + EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), + SyscallSucceeds()); + EXPECT_THAT(getxattr(path, name.c_str(), nullptr, 0), + SyscallSucceedsWithValue(0)); + + name += "a"; + EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), + SyscallFailsWithErrno(ERANGE)); + EXPECT_THAT(getxattr(path, name.c_str(), nullptr, 0), + SyscallFailsWithErrno(ERANGE)); +} + +TEST_F(XattrTest, XattrInvalidPrefix) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + std::string name(XATTR_NAME_MAX, 'a'); + EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), + SyscallFailsWithErrno(EOPNOTSUPP)); + EXPECT_THAT(getxattr(path, name.c_str(), nullptr, 0), + SyscallFailsWithErrno(EOPNOTSUPP)); +} + +TEST_F(XattrTest, XattrReadOnly) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + const char* path = test_file_name_.c_str(); + char name[] = "user.abc"; + char val = 'a'; + size_t size = sizeof(val); + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); + + ASSERT_NO_ERRNO(testing::Chmod(test_file_name_, S_IRUSR)); + + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), + SyscallFailsWithErrno(EACCES)); + + char buf = '-'; + EXPECT_THAT(getxattr(path, name, &buf, size), SyscallSucceedsWithValue(size)); + EXPECT_EQ(buf, val); +} + +TEST_F(XattrTest, XattrWriteOnly) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + // Drop capabilities that allow us to override file and directory permissions. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + + ASSERT_NO_ERRNO(testing::Chmod(test_file_name_, S_IWUSR)); + + const char* path = test_file_name_.c_str(); + char name[] = "user.abc"; + char val = 'a'; + size_t size = sizeof(val); + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); + + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(EACCES)); +} + +TEST_F(XattrTest, XattrTrustedWithNonadmin) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + const char* path = test_file_name_.c_str(); + const char name[] = "trusted.abc"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), + SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); +} + +TEST_F(XattrTest, XattrOnDirectory) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + char name[] = "user.abc"; + EXPECT_THAT(setxattr(dir.path().c_str(), name, NULL, 0, /*flags=*/0), + SyscallSucceeds()); + EXPECT_THAT(getxattr(dir.path().c_str(), name, NULL, 0), + SyscallSucceedsWithValue(0)); +} + +TEST_F(XattrTest, XattrOnSymlink) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(dir.path(), test_file_name_)); + char name[] = "user.abc"; + EXPECT_THAT(setxattr(link.path().c_str(), name, NULL, 0, /*flags=*/0), + SyscallSucceeds()); + EXPECT_THAT(getxattr(link.path().c_str(), name, NULL, 0), + SyscallSucceedsWithValue(0)); +} + +TEST_F(XattrTest, XattrOnInvalidFileTypes) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + char name[] = "user.abc"; + + char char_device[] = "/dev/zero"; + EXPECT_THAT(setxattr(char_device, name, NULL, 0, /*flags=*/0), + SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(getxattr(char_device, name, NULL, 0), + SyscallFailsWithErrno(ENODATA)); + + // Use tmpfs, where creation of named pipes is supported. + const std::string fifo = NewTempAbsPathInDir("/dev/shm"); + const char* path = fifo.c_str(); + EXPECT_THAT(mknod(path, S_IFIFO | S_IRUSR | S_IWUSR, 0), SyscallSucceeds()); + EXPECT_THAT(setxattr(path, name, NULL, 0, /*flags=*/0), + SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(getxattr(path, name, NULL, 0), SyscallFailsWithErrno(ENODATA)); +} + +TEST_F(XattrTest, SetxattrSizeSmallerThanValue) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + char name[] = "user.abc"; + std::vector val = {'a', 'a'}; + size_t size = 1; + EXPECT_THAT(setxattr(path, name, val.data(), size, /*flags=*/0), + SyscallSucceeds()); + + std::vector buf = {'-', '-'}; + std::vector expected_buf = {'a', '-'}; + EXPECT_THAT(getxattr(path, name, buf.data(), buf.size()), + SyscallSucceedsWithValue(size)); + EXPECT_EQ(buf, expected_buf); +} + +TEST_F(XattrTest, SetxattrZeroSize) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + char name[] = "user.abc"; + char val = 'a'; + EXPECT_THAT(setxattr(path, name, &val, 0, /*flags=*/0), SyscallSucceeds()); + + char buf = '-'; + EXPECT_THAT(getxattr(path, name, &buf, XATTR_SIZE_MAX), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(buf, '-'); +} + +TEST_F(XattrTest, SetxattrSizeTooLarge) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + char name[] = "user.abc"; + + // Note that each particular fs implementation may stipulate a lower size + // limit, in which case we actually may fail (e.g. error with ENOSPC) for + // some sizes under XATTR_SIZE_MAX. + size_t size = XATTR_SIZE_MAX + 1; + std::vector val(size); + EXPECT_THAT(setxattr(path, name, val.data(), size, /*flags=*/0), + SyscallFailsWithErrno(E2BIG)); + + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); +} + +TEST_F(XattrTest, SetxattrNullValueAndNonzeroSize) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + char name[] = "user.abc"; + EXPECT_THAT(setxattr(path, name, nullptr, 1, /*flags=*/0), + SyscallFailsWithErrno(EFAULT)); + + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); +} + +TEST_F(XattrTest, SetxattrNullValueAndZeroSize) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + char name[] = "user.abc"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); + + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallSucceedsWithValue(0)); +} + +TEST_F(XattrTest, SetxattrValueTooLargeButOKSize) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + char name[] = "user.abc"; + std::vector val(XATTR_SIZE_MAX + 1); + std::fill(val.begin(), val.end(), 'a'); + size_t size = 1; + EXPECT_THAT(setxattr(path, name, val.data(), size, /*flags=*/0), + SyscallSucceeds()); + + std::vector buf = {'-', '-'}; + std::vector expected_buf = {'a', '-'}; + EXPECT_THAT(getxattr(path, name, buf.data(), size), + SyscallSucceedsWithValue(size)); + EXPECT_EQ(buf, expected_buf); +} + +TEST_F(XattrTest, SetxattrReplaceWithSmaller) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + char name[] = "user.abc"; + std::vector val = {'a', 'a'}; + EXPECT_THAT(setxattr(path, name, val.data(), 2, /*flags=*/0), + SyscallSucceeds()); + EXPECT_THAT(setxattr(path, name, val.data(), 1, /*flags=*/0), + SyscallSucceeds()); + + std::vector buf = {'-', '-'}; + std::vector expected_buf = {'a', '-'}; + EXPECT_THAT(getxattr(path, name, buf.data(), 2), SyscallSucceedsWithValue(1)); + EXPECT_EQ(buf, expected_buf); +} + +TEST_F(XattrTest, SetxattrReplaceWithLarger) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + char name[] = "user.abc"; + std::vector val = {'a', 'a'}; + EXPECT_THAT(setxattr(path, name, val.data(), 1, /*flags=*/0), + SyscallSucceeds()); + EXPECT_THAT(setxattr(path, name, val.data(), 2, /*flags=*/0), + SyscallSucceeds()); + + std::vector buf = {'-', '-'}; + EXPECT_THAT(getxattr(path, name, buf.data(), 2), SyscallSucceedsWithValue(2)); + EXPECT_EQ(buf, val); +} + +TEST_F(XattrTest, SetxattrCreateFlag) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + char name[] = "user.abc"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_CREATE), + SyscallSucceeds()); + EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_CREATE), + SyscallFailsWithErrno(EEXIST)); + + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallSucceedsWithValue(0)); +} + +TEST_F(XattrTest, SetxattrReplaceFlag) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + char name[] = "user.abc"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_REPLACE), + SyscallFailsWithErrno(ENODATA)); + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); + EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_REPLACE), + SyscallSucceeds()); + + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallSucceedsWithValue(0)); +} + +TEST_F(XattrTest, SetxattrInvalidFlags) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + int invalid_flags = 0xff; + EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, invalid_flags), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(XattrTest, Getxattr) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + char name[] = "user.abc"; + int val = 1234; + size_t size = sizeof(val); + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); + + int buf = 0; + EXPECT_THAT(getxattr(path, name, &buf, size), SyscallSucceedsWithValue(size)); + EXPECT_EQ(buf, val); +} + +TEST_F(XattrTest, GetxattrSizeSmallerThanValue) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + char name[] = "user.abc"; + std::vector val = {'a', 'a'}; + size_t size = val.size(); + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); + + char buf = '-'; + EXPECT_THAT(getxattr(path, name, &buf, 1), SyscallFailsWithErrno(ERANGE)); + EXPECT_EQ(buf, '-'); +} + +TEST_F(XattrTest, GetxattrSizeLargerThanValue) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + char name[] = "user.abc"; + char val = 'a'; + EXPECT_THAT(setxattr(path, name, &val, 1, /*flags=*/0), SyscallSucceeds()); + + std::vector buf(XATTR_SIZE_MAX); + std::fill(buf.begin(), buf.end(), '-'); + std::vector expected_buf = buf; + expected_buf[0] = 'a'; + EXPECT_THAT(getxattr(path, name, buf.data(), buf.size()), + SyscallSucceedsWithValue(1)); + EXPECT_EQ(buf, expected_buf); +} + +TEST_F(XattrTest, GetxattrZeroSize) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + char name[] = "user.abc"; + char val = 'a'; + EXPECT_THAT(setxattr(path, name, &val, sizeof(val), /*flags=*/0), + SyscallSucceeds()); + + char buf = '-'; + EXPECT_THAT(getxattr(path, name, &buf, 0), + SyscallSucceedsWithValue(sizeof(val))); + EXPECT_EQ(buf, '-'); +} + +TEST_F(XattrTest, GetxattrSizeTooLarge) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + char name[] = "user.abc"; + char val = 'a'; + EXPECT_THAT(setxattr(path, name, &val, sizeof(val), /*flags=*/0), + SyscallSucceeds()); + + std::vector buf(XATTR_SIZE_MAX + 1); + std::fill(buf.begin(), buf.end(), '-'); + std::vector expected_buf = buf; + expected_buf[0] = 'a'; + EXPECT_THAT(getxattr(path, name, buf.data(), buf.size()), + SyscallSucceedsWithValue(sizeof(val))); + EXPECT_EQ(buf, expected_buf); +} + +TEST_F(XattrTest, GetxattrNullValue) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + char name[] = "user.abc"; + char val = 'a'; + size_t size = sizeof(val); + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); + + EXPECT_THAT(getxattr(path, name, nullptr, size), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(XattrTest, GetxattrNullValueAndZeroSize) { + // TODO(b/127675828): Support setxattr and getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + char name[] = "user.abc"; + char val = 'a'; + size_t size = sizeof(val); + // Set value with zero size. + EXPECT_THAT(setxattr(path, name, &val, 0, /*flags=*/0), SyscallSucceeds()); + // Get value with nonzero size. + EXPECT_THAT(getxattr(path, name, nullptr, size), SyscallSucceedsWithValue(0)); + + // Set value with nonzero size. + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); + // Get value with zero size. + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallSucceedsWithValue(size)); +} + +TEST_F(XattrTest, GetxattrNonexistentName) { + // TODO(b/127675828): Support getxattr. + SKIP_IF(IsRunningOnGvisor()); + + const char* path = test_file_name_.c_str(); + std::string name = "user.nonexistent"; + EXPECT_THAT(getxattr(path, name.c_str(), nullptr, 0), + SyscallFailsWithErrno(ENODATA)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor -- cgit v1.2.3 From 6fc9f0aefd89ce42ef2c38ea7853f9ba7c4bee04 Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Wed, 11 Dec 2019 17:51:37 -0800 Subject: Add support for TCP_USER_TIMEOUT option. The implementation follows the linux behavior where specifying a TCP_USER_TIMEOUT will cause the resend timer to honor the user specified timeout rather than the default rto based timeout. Further it alters when connections are timedout due to keepalive failures. It does not alter the behavior of when keepalives are sent. This is as per the linux behavior. PiperOrigin-RevId: 285099795 --- pkg/sentry/socket/netstack/netstack.go | 23 ++++ pkg/tcpip/tcpip.go | 5 + pkg/tcpip/transport/tcp/BUILD | 1 + pkg/tcpip/transport/tcp/accept.go | 15 +++ pkg/tcpip/transport/tcp/connect.go | 19 ++- pkg/tcpip/transport/tcp/endpoint.go | 19 +++ pkg/tcpip/transport/tcp/protocol.go | 21 ++- pkg/tcpip/transport/tcp/rcv.go | 19 ++- pkg/tcpip/transport/tcp/rcv_state.go | 29 ++++ pkg/tcpip/transport/tcp/snd.go | 48 ++++++- pkg/tcpip/transport/tcp/snd_state.go | 10 ++ pkg/tcpip/transport/tcp/tcp_test.go | 194 ++++++++++++++++++++++++--- test/syscalls/linux/socket_inet_loopback.cc | 56 +++++++- test/syscalls/linux/socket_ip_tcp_generic.cc | 63 +++++++++ test/syscalls/linux/tcp_socket.cc | 25 ++++ 15 files changed, 509 insertions(+), 38 deletions(-) create mode 100644 pkg/tcpip/transport/tcp/rcv_state.go (limited to 'test') diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index fe5a46aa3..8a6522eac 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -1127,6 +1127,18 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa return int32(time.Duration(v) / time.Second), nil + case linux.TCP_USER_TIMEOUT: + if outLen < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + + var v tcpip.TCPUserTimeoutOption + if err := ep.GetSockOpt(&v); err != nil { + return nil, syserr.TranslateNetstackError(err) + } + + return int32(time.Duration(v) / time.Millisecond), nil + case linux.TCP_INFO: var v tcpip.TCPInfoOption if err := ep.GetSockOpt(&v); err != nil { @@ -1563,6 +1575,17 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) * } return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.KeepaliveIntervalOption(time.Second * time.Duration(v)))) + case linux.TCP_USER_TIMEOUT: + if len(optVal) < sizeOfInt32 { + return syserr.ErrInvalidArgument + } + + v := int32(usermem.ByteOrder.Uint32(optVal)) + if v < 0 { + return syserr.ErrInvalidArgument + } + return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.TCPUserTimeoutOption(time.Millisecond * time.Duration(v)))) + case linux.TCP_CONGESTION: v := tcpip.CongestionControlOption(optVal) if err := ep.SetSockOpt(v); err != nil { diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index d5bb5b6ed..f62fd729f 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -576,6 +576,11 @@ type KeepaliveIntervalOption time.Duration // closed. type KeepaliveCountOption int +// TCPUserTimeoutOption is used by SetSockOpt/GetSockOpt to specify a user +// specified timeout for a given TCP connection. +// See: RFC5482 for details. +type TCPUserTimeoutOption time.Duration + // CongestionControlOption is used by SetSockOpt/GetSockOpt to set/get // the current congestion control algorithm. type CongestionControlOption string diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD index 455a1c098..3b353d56c 100644 --- a/pkg/tcpip/transport/tcp/BUILD +++ b/pkg/tcpip/transport/tcp/BUILD @@ -28,6 +28,7 @@ go_library( "forwarder.go", "protocol.go", "rcv.go", + "rcv_state.go", "reno.go", "sack.go", "sack_scoreboard.go", diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go index 74df3edfb..5422ae80c 100644 --- a/pkg/tcpip/transport/tcp/accept.go +++ b/pkg/tcpip/transport/tcp/accept.go @@ -242,6 +242,13 @@ func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, i n.initGSO() + // Now inherit any socket options that should be inherited from the + // listening endpoint. + // In case of Forwarder listenEP will be nil and hence this check. + if l.listenEP != nil { + l.listenEP.propagateInheritableOptions(n) + } + // Register new endpoint so that packets are routed to it. if err := n.stack.RegisterTransportEndpoint(n.boundNICID, n.effectiveNetProtos, ProtocolNumber, n.ID, n, n.reusePort, n.boundBindToDevice); err != nil { n.Close() @@ -350,6 +357,14 @@ func (e *endpoint) deliverAccepted(n *endpoint) { } } +// propagateInheritableOptions propagates any options set on the listening +// endpoint to the newly created endpoint. +func (e *endpoint) propagateInheritableOptions(n *endpoint) { + e.mu.Lock() + n.userTimeout = e.userTimeout + e.mu.Unlock() +} + // handleSynSegment is called in its own goroutine once the listening endpoint // receives a SYN segment. It is responsible for completing the handshake and // queueing the new endpoint for acceptance. diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index 3d059c302..4c34fc9d2 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -862,7 +862,7 @@ func (e *endpoint) resetConnectionLocked(err *tcpip.Error) { } e.state = StateError e.HardError = err - if err != tcpip.ErrConnectionReset { + if err != tcpip.ErrConnectionReset && err != tcpip.ErrTimeout { // The exact sequence number to be used for the RST is the same as the // one used by Linux. We need to handle the case of window being shrunk // which can cause sndNxt to be outside the acceptable window on the @@ -1087,12 +1087,24 @@ func (e *endpoint) handleSegments() *tcpip.Error { // keepalive packets periodically when the connection is idle. If we don't hear // from the other side after a number of tries, we terminate the connection. func (e *endpoint) keepaliveTimerExpired() *tcpip.Error { + e.mu.RLock() + userTimeout := e.userTimeout + e.mu.RUnlock() + e.keepalive.Lock() if !e.keepalive.enabled || !e.keepalive.timer.checkExpiration() { e.keepalive.Unlock() return nil } + // If a userTimeout is set then abort the connection if it is + // exceeded. + if userTimeout != 0 && time.Since(e.rcv.lastRcvdAckTime) >= userTimeout && e.keepalive.unacked > 0 { + e.keepalive.Unlock() + e.stack.Stats().TCP.EstablishedTimedout.Increment() + return tcpip.ErrTimeout + } + if e.keepalive.unacked >= e.keepalive.count { e.keepalive.Unlock() e.stack.Stats().TCP.EstablishedTimedout.Increment() @@ -1112,7 +1124,6 @@ func (e *endpoint) keepaliveTimerExpired() *tcpip.Error { // whether it is enabled for this endpoint. func (e *endpoint) resetKeepaliveTimer(receivedData bool) { e.keepalive.Lock() - defer e.keepalive.Unlock() if receivedData { e.keepalive.unacked = 0 } @@ -1120,6 +1131,7 @@ func (e *endpoint) resetKeepaliveTimer(receivedData bool) { // data to send. if !e.keepalive.enabled || e.snd == nil || e.snd.sndUna != e.snd.sndNxt { e.keepalive.timer.disable() + e.keepalive.Unlock() return } if e.keepalive.unacked > 0 { @@ -1127,6 +1139,7 @@ func (e *endpoint) resetKeepaliveTimer(receivedData bool) { } else { e.keepalive.timer.enable(e.keepalive.idle) } + e.keepalive.Unlock() } // disableKeepaliveTimer stops the keepalive timer. @@ -1239,6 +1252,7 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { w: &e.snd.resendWaker, f: func() *tcpip.Error { if !e.snd.retransmitTimerExpired() { + e.stack.Stats().TCP.EstablishedTimedout.Increment() return tcpip.ErrTimeout } return nil @@ -1405,6 +1419,7 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { if s == nil { break } + e.tryDeliverSegmentFromClosedEndpoint(s) } diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 4861ab513..dd8b47cbe 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -341,6 +341,7 @@ type endpoint struct { // TCP should never broadcast but Linux nevertheless supports enabling/ // disabling SO_BROADCAST, albeit as a NOOP. broadcast bool + // Values used to reserve a port or register a transport endpoint // (which ever happens first). boundBindToDevice tcpip.NICID @@ -474,6 +475,12 @@ type endpoint struct { // without hearing a response, the connection is closed. keepalive keepalive + // userTimeout if non-zero specifies a user specified timeout for + // a connection w/ pending data to send. A connection that has pending + // unacked data will be forcibily aborted if the timeout is reached + // without any data being acked. + userTimeout time.Duration + // pendingAccepted is a synchronization primitive used to track number // of connections that are queued up to be delivered to the accepted // channel. We use this to ensure that all goroutines blocked on writing @@ -1333,6 +1340,12 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { e.notifyProtocolGoroutine(notifyKeepaliveChanged) return nil + case tcpip.TCPUserTimeoutOption: + e.mu.Lock() + e.userTimeout = time.Duration(v) + e.mu.Unlock() + return nil + case tcpip.BroadcastOption: e.mu.Lock() e.broadcast = v != 0 @@ -1591,6 +1604,12 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { e.keepalive.Unlock() return nil + case *tcpip.TCPUserTimeoutOption: + e.mu.Lock() + *o = tcpip.TCPUserTimeoutOption(e.userTimeout) + e.mu.Unlock() + return nil + case *tcpip.OutOfBandInlineOption: // We don't currently support disabling this option. *o = 1 diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go index 89b965c23..bc718064c 100644 --- a/pkg/tcpip/transport/tcp/protocol.go +++ b/pkg/tcpip/transport/tcp/protocol.go @@ -162,13 +162,26 @@ func (*protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.Transpo func replyWithReset(s *segment) { // Get the seqnum from the packet if the ack flag is set. seq := seqnum.Value(0) + ack := seqnum.Value(0) + flags := byte(header.TCPFlagRst) + // As per RFC 793 page 35 (Reset Generation) + // 1. If the connection does not exist (CLOSED) then a reset is sent + // in response to any incoming segment except another reset. In + // particular, SYNs addressed to a non-existent connection are rejected + // by this means. + + // If the incoming segment has an ACK field, the reset takes its + // sequence number from the ACK field of the segment, otherwise the + // reset has sequence number zero and the ACK field is set to the sum + // of the sequence number and segment length of the incoming segment. + // The connection remains in the CLOSED state. if s.flagIsSet(header.TCPFlagAck) { seq = s.ackNumber + } else { + flags |= header.TCPFlagAck + ack = s.sequenceNumber.Add(s.logicalLen()) } - - ack := s.sequenceNumber.Add(s.logicalLen()) - - sendTCP(&s.route, s.id, buffer.VectorisedView{}, s.route.DefaultTTL(), stack.DefaultTOS, header.TCPFlagRst|header.TCPFlagAck, seq, ack, 0 /* rcvWnd */, nil /* options */, nil /* gso */) + sendTCP(&s.route, s.id, buffer.VectorisedView{}, s.route.DefaultTTL(), stack.DefaultTOS, flags, seq, ack, 0 /* rcvWnd */, nil /* options */, nil /* gso */) } // SetOption implements TransportProtocol.SetOption. diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go index 5ee499c36..0a5534959 100644 --- a/pkg/tcpip/transport/tcp/rcv.go +++ b/pkg/tcpip/transport/tcp/rcv.go @@ -50,16 +50,20 @@ type receiver struct { pendingRcvdSegments segmentHeap pendingBufUsed seqnum.Size pendingBufSize seqnum.Size + + // Time when the last ack was received. + lastRcvdAckTime time.Time `state:".(unixTime)"` } func newReceiver(ep *endpoint, irs seqnum.Value, rcvWnd seqnum.Size, rcvWndScale uint8, pendingBufSize seqnum.Size) *receiver { return &receiver{ - ep: ep, - rcvNxt: irs + 1, - rcvAcc: irs.Add(rcvWnd + 1), - rcvWnd: rcvWnd, - rcvWndScale: rcvWndScale, - pendingBufSize: pendingBufSize, + ep: ep, + rcvNxt: irs + 1, + rcvAcc: irs.Add(rcvWnd + 1), + rcvWnd: rcvWnd, + rcvWndScale: rcvWndScale, + pendingBufSize: pendingBufSize, + lastRcvdAckTime: time.Now(), } } @@ -360,6 +364,9 @@ func (r *receiver) handleRcvdSegment(s *segment) (drop bool, err *tcpip.Error) { return true, nil } + // Store the time of the last ack. + r.lastRcvdAckTime = time.Now() + // Defer segment processing if it can't be consumed now. if !r.consumeSegment(s, segSeq, segLen) { if segLen > 0 || s.flagIsSet(header.TCPFlagFin) { diff --git a/pkg/tcpip/transport/tcp/rcv_state.go b/pkg/tcpip/transport/tcp/rcv_state.go new file mode 100644 index 000000000..2bf21a2e7 --- /dev/null +++ b/pkg/tcpip/transport/tcp/rcv_state.go @@ -0,0 +1,29 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp + +import ( + "time" +) + +// saveLastRcvdAckTime is invoked by stateify. +func (r *receiver) saveLastRcvdAckTime() unixTime { + return unixTime{r.lastRcvdAckTime.Unix(), r.lastRcvdAckTime.UnixNano()} +} + +// loadLastRcvdAckTime is invoked by stateify. +func (r *receiver) loadLastRcvdAckTime(unix unixTime) { + r.lastRcvdAckTime = time.Unix(unix.second, unix.nano) +} diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index 8332a0179..8a947dc66 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -28,8 +28,11 @@ import ( ) const ( - // minRTO is the minimum allowed value for the retransmit timeout. - minRTO = 200 * time.Millisecond + // MinRTO is the minimum allowed value for the retransmit timeout. + MinRTO = 200 * time.Millisecond + + // MaxRTO is the maximum allowed value for the retransmit timeout. + MaxRTO = 120 * time.Second // InitialCwnd is the initial congestion window. InitialCwnd = 10 @@ -134,6 +137,10 @@ type sender struct { // rttMeasureTime is the time when the rttMeasureSeqNum was sent. rttMeasureTime time.Time `state:".(unixTime)"` + // firstRetransmittedSegXmitTime is the original transmit time of + // the first segment that was retransmitted due to RTO expiration. + firstRetransmittedSegXmitTime time.Time `state:".(unixTime)"` + closed bool writeNext *segment writeList segmentList @@ -392,8 +399,8 @@ func (s *sender) updateRTO(rtt time.Duration) { s.rto = s.rtt.srtt + 4*s.rtt.rttvar s.rtt.Unlock() - if s.rto < minRTO { - s.rto = minRTO + if s.rto < MinRTO { + s.rto = MinRTO } } @@ -438,8 +445,30 @@ func (s *sender) retransmitTimerExpired() bool { s.ep.stack.Stats().TCP.Timeouts.Increment() s.ep.stats.SendErrors.Timeouts.Increment() - // Give up if we've waited more than a minute since the last resend. - if s.rto >= 60*time.Second { + // Give up if we've waited more than a minute since the last resend or + // if a user time out is set and we have exceeded the user specified + // timeout since the first retransmission. + s.ep.mu.RLock() + uto := s.ep.userTimeout + s.ep.mu.RUnlock() + + if s.firstRetransmittedSegXmitTime.IsZero() { + // We store the original xmitTime of the segment that we are + // about to retransmit as the retransmission time. This is + // required as by the time the retransmitTimer has expired the + // segment has already been sent and unacked for the RTO at the + // time the segment was sent. + s.firstRetransmittedSegXmitTime = s.writeList.Front().xmitTime + } + + elapsed := time.Since(s.firstRetransmittedSegXmitTime) + remaining := MaxRTO + if uto != 0 { + // Cap to the user specified timeout if one is specified. + remaining = uto - elapsed + } + + if remaining <= 0 || s.rto >= MaxRTO { return false } @@ -447,6 +476,11 @@ func (s *sender) retransmitTimerExpired() bool { // below. s.rto *= 2 + // Cap RTO to remaining time. + if s.rto > remaining { + s.rto = remaining + } + // See: https://tools.ietf.org/html/rfc6582#section-3.2 Step 4. // // Retransmit timeouts: @@ -1168,6 +1202,8 @@ func (s *sender) handleRcvdSegment(seg *segment) { // RFC 6298 Rule 5.3 if s.sndUna == s.sndNxt { s.outstanding = 0 + // Reset firstRetransmittedSegXmitTime to the zero value. + s.firstRetransmittedSegXmitTime = time.Time{} s.resendTimer.disable() } } diff --git a/pkg/tcpip/transport/tcp/snd_state.go b/pkg/tcpip/transport/tcp/snd_state.go index 12eff8afc..8b20c3455 100644 --- a/pkg/tcpip/transport/tcp/snd_state.go +++ b/pkg/tcpip/transport/tcp/snd_state.go @@ -48,3 +48,13 @@ func (s *sender) loadRttMeasureTime(unix unixTime) { func (s *sender) afterLoad() { s.resendTimer.init(&s.resendWaker) } + +// saveFirstRetransmittedSegXmitTime is invoked by stateify. +func (s *sender) saveFirstRetransmittedSegXmitTime() unixTime { + return unixTime{s.firstRetransmittedSegXmitTime.Unix(), s.firstRetransmittedSegXmitTime.UnixNano()} +} + +// loadFirstRetransmittedSegXmitTime is invoked by stateify. +func (s *sender) loadFirstRetransmittedSegXmitTime(unix unixTime) { + s.firstRetransmittedSegXmitTime = time.Unix(unix.second, unix.nano) +} diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index bc5cfcf0e..2a83f7bcc 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -323,8 +323,8 @@ func TestTCPResetSentForACKWhenNotUsingSynCookies(t *testing.T) { checker.SrcPort(context.StackPort), checker.DstPort(context.TestPort), checker.SeqNum(uint32(c.IRS+1)), - checker.AckNum(uint32(iss)+1), - checker.TCPFlags(header.TCPFlagRst|header.TCPFlagAck))) + checker.AckNum(0), + checker.TCPFlags(header.TCPFlagRst))) } func TestTCPResetsReceivedIncrement(t *testing.T) { @@ -460,18 +460,17 @@ func TestConnectResetAfterClose(t *testing.T) { checker.TCP( checker.DstPort(context.TestPort), checker.SeqNum(uint32(c.IRS)+2), - checker.AckNum(790), - checker.TCPFlags(header.TCPFlagAck|header.TCPFlagRst), + checker.AckNum(0), + checker.TCPFlags(header.TCPFlagRst), ), ) break } } -// TestClosingWithEnqueuedSegments tests handling of -// still enqueued segments when the endpoint transitions -// to StateClose. The in-flight segments would be re-enqueued -// to a any listening endpoint. +// TestClosingWithEnqueuedSegments tests handling of still enqueued segments +// when the endpoint transitions to StateClose. The in-flight segments would be +// re-enqueued to a any listening endpoint. func TestClosingWithEnqueuedSegments(t *testing.T) { c := context.New(t, defaultMTU) defer c.Cleanup() @@ -576,8 +575,8 @@ func TestClosingWithEnqueuedSegments(t *testing.T) { checker.TCP( checker.DstPort(context.TestPort), checker.SeqNum(uint32(c.IRS)+2), - checker.AckNum(793), - checker.TCPFlags(header.TCPFlagAck|header.TCPFlagRst), + checker.AckNum(0), + checker.TCPFlags(header.TCPFlagRst), ), ) } @@ -914,7 +913,7 @@ func TestSendRstOnListenerRxAckV4(t *testing.T) { checker.IPv4(t, c.GetPacket(), checker.TCP( checker.DstPort(context.TestPort), - checker.TCPFlags(header.TCPFlagRst|header.TCPFlagAck), + checker.TCPFlags(header.TCPFlagRst), checker.SeqNum(200))) } @@ -942,7 +941,7 @@ func TestSendRstOnListenerRxAckV6(t *testing.T) { checker.IPv6(t, c.GetV6Packet(), checker.TCP( checker.DstPort(context.TestPort), - checker.TCPFlags(header.TCPFlagRst|header.TCPFlagAck), + checker.TCPFlags(header.TCPFlagRst), checker.SeqNum(200))) } @@ -4291,8 +4290,9 @@ func TestKeepalive(t *testing.T) { c.CreateConnected(789, 30000, -1 /* epRcvBuf */) + const keepAliveInterval = 10 * time.Millisecond c.EP.SetSockOpt(tcpip.KeepaliveIdleOption(10 * time.Millisecond)) - c.EP.SetSockOpt(tcpip.KeepaliveIntervalOption(10 * time.Millisecond)) + c.EP.SetSockOpt(tcpip.KeepaliveIntervalOption(keepAliveInterval)) c.EP.SetSockOpt(tcpip.KeepaliveCountOption(5)) c.EP.SetSockOpt(tcpip.KeepaliveEnabledOption(1)) @@ -4382,13 +4382,29 @@ func TestKeepalive(t *testing.T) { ) } + // Sleep for a litte over the KeepAlive interval to make sure + // the timer has time to fire after the last ACK and close the + // close the socket. + time.Sleep(keepAliveInterval + 5*time.Millisecond) + // The connection should be terminated after 5 unacked keepalives. + // Send an ACK to trigger a RST from the stack as the endpoint should + // be dead. + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: c.Port, + Flags: header.TCPFlagAck, + SeqNum: 790, + AckNum: seqnum.Value(next), + RcvWnd: 30000, + }) + checker.IPv4(t, c.GetPacket(), checker.TCP( checker.DstPort(context.TestPort), checker.SeqNum(uint32(next)), - checker.AckNum(uint32(790)), - checker.TCPFlags(header.TCPFlagAck|header.TCPFlagRst), + checker.AckNum(uint32(0)), + checker.TCPFlags(header.TCPFlagRst), ), ) @@ -6157,8 +6173,8 @@ func TestTCPTimeWaitDuplicateFINExtendsTimeWait(t *testing.T) { checker.SrcPort(context.StackPort), checker.DstPort(context.TestPort), checker.SeqNum(uint32(ackHeaders.AckNum)), - checker.AckNum(uint32(ackHeaders.SeqNum)), - checker.TCPFlags(header.TCPFlagRst|header.TCPFlagAck))) + checker.AckNum(0), + checker.TCPFlags(header.TCPFlagRst))) if got := c.Stack().Stats().TCP.EstablishedClosed.Value(); got != want { t.Errorf("got c.Stack().Stats().TCP.EstablishedClosed = %v, want = %v", got, want) @@ -6336,7 +6352,147 @@ func TestTCPCloseWithData(t *testing.T) { checker.SrcPort(context.StackPort), checker.DstPort(context.TestPort), checker.SeqNum(uint32(ackHeaders.AckNum)), - checker.AckNum(uint32(ackHeaders.SeqNum)), - checker.TCPFlags(header.TCPFlagRst|header.TCPFlagAck))) + checker.AckNum(0), + checker.TCPFlags(header.TCPFlagRst))) +} + +func TestTCPUserTimeout(t *testing.T) { + c := context.New(t, defaultMTU) + defer c.Cleanup() + + c.CreateConnected(789, 30000, -1 /* epRcvBuf */) + + origEstablishedTimedout := c.Stack().Stats().TCP.EstablishedTimedout.Value() + userTimeout := 50 * time.Millisecond + c.EP.SetSockOpt(tcpip.TCPUserTimeoutOption(userTimeout)) + + // Send some data and wait before ACKing it. + view := buffer.NewView(3) + if _, _, err := c.EP.Write(tcpip.SlicePayload(view), tcpip.WriteOptions{}); err != nil { + t.Fatalf("Write failed: %v", err) + } + + next := uint32(c.IRS) + 1 + checker.IPv4(t, c.GetPacket(), + checker.PayloadLen(len(view)+header.TCPMinimumSize), + checker.TCP( + checker.DstPort(context.TestPort), + checker.SeqNum(next), + checker.AckNum(790), + checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)), + ), + ) + + // Wait for a little over the minimum retransmit timeout of 200ms for + // the retransmitTimer to fire and close the connection. + time.Sleep(tcp.MinRTO + 10*time.Millisecond) + + // No packet should be received as the connection should be silently + // closed due to timeout. + c.CheckNoPacket("unexpected packet received after userTimeout has expired") + + next += uint32(len(view)) + + // The connection should be terminated after userTimeout has expired. + // Send an ACK to trigger a RST from the stack as the endpoint should + // be dead. + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: c.Port, + Flags: header.TCPFlagAck, + SeqNum: 790, + AckNum: seqnum.Value(next), + RcvWnd: 30000, + }) + + checker.IPv4(t, c.GetPacket(), + checker.TCP( + checker.DstPort(context.TestPort), + checker.SeqNum(uint32(next)), + checker.AckNum(uint32(0)), + checker.TCPFlags(header.TCPFlagRst), + ), + ) + + if _, _, err := c.EP.Read(nil); err != tcpip.ErrTimeout { + t.Fatalf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrTimeout) + } + + if got, want := c.Stack().Stats().TCP.EstablishedTimedout.Value(), origEstablishedTimedout+1; got != want { + t.Errorf("got c.Stack().Stats().TCP.EstablishedTimedout = %v, want = %v", got, want) + } +} + +func TestKeepaliveWithUserTimeout(t *testing.T) { + c := context.New(t, defaultMTU) + defer c.Cleanup() + + c.CreateConnected(789, 30000, -1 /* epRcvBuf */) + + origEstablishedTimedout := c.Stack().Stats().TCP.EstablishedTimedout.Value() + + const keepAliveInterval = 10 * time.Millisecond + c.EP.SetSockOpt(tcpip.KeepaliveIdleOption(10 * time.Millisecond)) + c.EP.SetSockOpt(tcpip.KeepaliveIntervalOption(keepAliveInterval)) + c.EP.SetSockOpt(tcpip.KeepaliveCountOption(10)) + c.EP.SetSockOpt(tcpip.KeepaliveEnabledOption(1)) + + // Set userTimeout to be the duration for 3 keepalive probes. + userTimeout := 30 * time.Millisecond + c.EP.SetSockOpt(tcpip.TCPUserTimeoutOption(userTimeout)) + + // Check that the connection is still alive. + if _, _, err := c.EP.Read(nil); err != tcpip.ErrWouldBlock { + t.Fatalf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrWouldBlock) + } + + // Now receive 2 keepalives, but don't ACK them. The connection should + // be reset when the 3rd one should be sent due to userTimeout being + // 30ms and each keepalive probe should be sent 10ms apart as set above after + // the connection has been idle for 10ms. + for i := 0; i < 2; i++ { + b := c.GetPacket() + checker.IPv4(t, b, + checker.TCP( + checker.DstPort(context.TestPort), + checker.SeqNum(uint32(c.IRS)), + checker.AckNum(uint32(790)), + checker.TCPFlags(header.TCPFlagAck), + ), + ) + } + + // Sleep for a litte over the KeepAlive interval to make sure + // the timer has time to fire after the last ACK and close the + // close the socket. + time.Sleep(keepAliveInterval + 5*time.Millisecond) + + // The connection should be terminated after 30ms. + // Send an ACK to trigger a RST from the stack as the endpoint should + // be dead. + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: c.Port, + Flags: header.TCPFlagAck, + SeqNum: 790, + AckNum: seqnum.Value(c.IRS + 1), + RcvWnd: 30000, + }) + + checker.IPv4(t, c.GetPacket(), + checker.TCP( + checker.DstPort(context.TestPort), + checker.SeqNum(uint32(c.IRS+1)), + checker.AckNum(uint32(0)), + checker.TCPFlags(header.TCPFlagRst), + ), + ) + + if _, _, err := c.EP.Read(nil); err != tcpip.ErrTimeout { + t.Fatalf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrTimeout) + } + if got, want := c.Stack().Stats().TCP.EstablishedTimedout.Value(), origEstablishedTimedout+1; got != want { + t.Errorf("got c.Stack().Stats().TCP.EstablishedTimedout = %v, want = %v", got, want) + } } diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index fa4358ae4..761c3a9fe 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -206,7 +206,7 @@ TEST_P(SocketInetLoopbackTest, TCPListenClose) { } // TODO(b/138400178): Fix cooperative S/R failure when ds.reset() is invoked // before function end. - // ds.reset() + // ds.reset(); } TEST_P(SocketInetLoopbackTest, TCPbacklog) { @@ -603,6 +603,60 @@ TEST_P(SocketInetLoopbackTest, TCPTimeWaitTest_NoRandomSave) { SyscallSucceeds()); } +TEST_P(SocketInetLoopbackTest, AcceptedInheritsTCPUserTimeout) { + auto const& param = GetParam(); + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + // Create the listening socket. + const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast(&listen_addr), &addrlen), + SyscallSucceeds()); + + const uint16_t port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + // Set the userTimeout on the listening socket. + constexpr int kUserTimeout = 10; + ASSERT_THAT(setsockopt(listen_fd.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, + &kUserTimeout, sizeof(kUserTimeout)), + SyscallSucceeds()); + + // Connect to the listening socket. + FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(), + reinterpret_cast(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + + // Accept the connection. + auto accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + // Verify that the accepted socket inherited the user timeout set on + // listening socket. + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(accepted.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, &get, &get_len), + SyscallSucceeds()); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kUserTimeout); +} + INSTANTIATE_TEST_SUITE_P( All, SocketInetLoopbackTest, ::testing::Values( diff --git a/test/syscalls/linux/socket_ip_tcp_generic.cc b/test/syscalls/linux/socket_ip_tcp_generic.cc index c74273436..57ce8e169 100644 --- a/test/syscalls/linux/socket_ip_tcp_generic.cc +++ b/test/syscalls/linux/socket_ip_tcp_generic.cc @@ -812,5 +812,68 @@ TEST_P(TCPSocketPairTest, TestTCPCloseWithData) { ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); } +TEST_P(TCPSocketPairTest, TCPUserTimeoutDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_USER_TIMEOUT, + &get, &get_len), + SyscallSucceeds()); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, 0); // 0 ms (disabled). +} + +TEST_P(TCPSocketPairTest, SetTCPUserTimeoutZero) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr int kZero = 0; + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_USER_TIMEOUT, + &kZero, sizeof(kZero)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_USER_TIMEOUT, + &get, &get_len), + SyscallSucceeds()); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, 0); // 0 ms (disabled). +} + +TEST_P(TCPSocketPairTest, SetTCPUserTimeoutBelowZero) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr int kNeg = -10; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_USER_TIMEOUT, + &kNeg, sizeof(kNeg)), + SyscallFailsWithErrno(EINVAL)); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_USER_TIMEOUT, + &get, &get_len), + SyscallSucceeds()); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, 0); // 0 ms (disabled). +} + +TEST_P(TCPSocketPairTest, SetTCPUserTimeoutAboveZero) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr int kAbove = 10; + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_USER_TIMEOUT, + &kAbove, sizeof(kAbove)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_USER_TIMEOUT, + &get, &get_len), + SyscallSucceeds()); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kAbove); +} + } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc index 99863b0ed..c503f3568 100644 --- a/test/syscalls/linux/tcp_socket.cc +++ b/test/syscalls/linux/tcp_socket.cc @@ -1175,6 +1175,31 @@ TEST_P(SimpleTcpSocketTest, SetMaxSegFailsForInvalidMSSValues) { } } +TEST_P(SimpleTcpSocketTest, SetTCPUserTimeout) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + { + constexpr int kTCPUserTimeout = -1; + EXPECT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, + &kTCPUserTimeout, sizeof(kTCPUserTimeout)), + SyscallFailsWithErrno(EINVAL)); + } + + // kTCPUserTimeout is in milliseconds. + constexpr int kTCPUserTimeout = 100; + ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, + &kTCPUserTimeout, sizeof(kTCPUserTimeout)), + SyscallSucceedsWithValue(0)); + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kTCPUserTimeout); +} + INSTANTIATE_TEST_SUITE_P(AllInetTests, SimpleTcpSocketTest, ::testing::Values(AF_INET, AF_INET6)); -- cgit v1.2.3 From 378d6c1f3697b8b939e6632e980562bfc8fb2781 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 12 Dec 2019 11:07:25 -0800 Subject: unix: allow to bind unix sockets only to AF_UNIX addresses Reported-by: syzbot+2c0bcfd87fb4e8b7b009@syzkaller.appspotmail.com PiperOrigin-RevId: 285228312 --- pkg/sentry/socket/netstack/netstack.go | 2 +- pkg/sentry/socket/unix/unix.go | 3 +++ test/syscalls/linux/socket_unix.cc | 15 +++++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 8a6522eac..140851c17 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -326,7 +326,7 @@ func AddressAndFamily(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, } family := usermem.ByteOrder.Uint16(addr) - if family != uint16(sfamily) && (!strict && family != linux.AF_UNSPEC) { + if family != uint16(sfamily) && (strict || family != linux.AF_UNSPEC) { return tcpip.FullAddress{}, family, syserr.ErrAddressFamilyNotSupported } diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index 1aaae8487..885758054 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -118,6 +118,9 @@ func (s *SocketOperations) Endpoint() transport.Endpoint { func extractPath(sockaddr []byte) (string, *syserr.Error) { addr, _, err := netstack.AddressAndFamily(linux.AF_UNIX, sockaddr, true /* strict */) if err != nil { + if err == syserr.ErrAddressFamilyNotSupported { + err = syserr.ErrInvalidArgument + } return "", err } diff --git a/test/syscalls/linux/socket_unix.cc b/test/syscalls/linux/socket_unix.cc index 8a28202a8..4cf1f76f1 100644 --- a/test/syscalls/linux/socket_unix.cc +++ b/test/syscalls/linux/socket_unix.cc @@ -65,6 +65,21 @@ TEST_P(UnixSocketPairTest, BindToBadName) { SyscallFailsWithErrno(ENOENT)); } +TEST_P(UnixSocketPairTest, BindToBadFamily) { + auto pair = + ASSERT_NO_ERRNO_AND_VALUE(UnixDomainSocketPair(SOCK_SEQPACKET).Create()); + + constexpr char kBadName[] = "/some/path/that/does/not/exist"; + sockaddr_un sockaddr; + sockaddr.sun_family = AF_INET; + memcpy(sockaddr.sun_path, kBadName, sizeof(kBadName)); + + EXPECT_THAT( + bind(pair->first_fd(), reinterpret_cast(&sockaddr), + sizeof(sockaddr)), + SyscallFailsWithErrno(EINVAL)); +} + TEST_P(UnixSocketPairTest, RecvmmsgTimeoutAfterRecv) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); char sent_data[10]; -- cgit v1.2.3 From be2754a4b99cc92f13f479f74a5da8b0e6cb5839 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Thu, 12 Dec 2019 14:40:36 -0800 Subject: Add iptables testing framework. It would be preferrable to test iptables via syscall tests, but there are some problems with that approach: * We're limited to loopback-only, as syscall tests involve only a single container. Other link interfaces (e.g. fdbased) should be tested. * We'd have to shell out to call iptables anyways, as the iptables syscall interface itself is too large and complex to work with alone. * Running the Linux/native version of the syscall test will require root, which is a pain to configure, is inherently unsafe, and could leave host iptables misconfigured. Using the go_test target allows there to be no new test runner. PiperOrigin-RevId: 285274275 --- WORKSPACE | 39 +++++++++ kokoro/iptables_tests.cfg | 10 +++ runsc/dockerutil/dockerutil.go | 10 +++ scripts/iptables_tests.sh | 27 ++++++ test/iptables/BUILD | 31 +++++++ test/iptables/README.md | 44 ++++++++++ test/iptables/filter_input.go | 124 ++++++++++++++++++++++++++++ test/iptables/iptables.go | 53 ++++++++++++ test/iptables/iptables_test.go | 179 ++++++++++++++++++++++++++++++++++++++++ test/iptables/iptables_util.go | 82 ++++++++++++++++++ test/iptables/runner/BUILD | 16 ++++ test/iptables/runner/Dockerfile | 4 + test/iptables/runner/main.go | 70 ++++++++++++++++ 13 files changed, 689 insertions(+) create mode 100644 kokoro/iptables_tests.cfg create mode 100755 scripts/iptables_tests.sh create mode 100644 test/iptables/BUILD create mode 100644 test/iptables/README.md create mode 100644 test/iptables/filter_input.go create mode 100644 test/iptables/iptables.go create mode 100644 test/iptables/iptables_test.go create mode 100644 test/iptables/iptables_util.go create mode 100644 test/iptables/runner/BUILD create mode 100644 test/iptables/runner/Dockerfile create mode 100644 test/iptables/runner/main.go (limited to 'test') diff --git a/WORKSPACE b/WORKSPACE index 4561ed8fc..4b5a3bfe2 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -106,6 +106,45 @@ load("@rules_pkg//:deps.bzl", "rules_pkg_dependencies") rules_pkg_dependencies() +# Container rules. +http_archive( + name = "io_bazel_rules_docker", + sha256 = "14ac30773fdb393ddec90e158c9ec7ebb3f8a4fd533ec2abbfd8789ad81a284b", + strip_prefix = "rules_docker-0.12.1", + urls = ["https://github.com/bazelbuild/rules_docker/releases/download/v0.12.1/rules_docker-v0.12.1.tar.gz"], +) + +load( + "@io_bazel_rules_docker//repositories:repositories.bzl", + container_repositories = "repositories", +) + +container_repositories() + +load("@io_bazel_rules_docker//repositories:deps.bzl", container_deps = "deps") + +container_deps() + +load( + "@io_bazel_rules_docker//container:container.bzl", + "container_pull", +) + +# This container is built from the Dockerfile in test/iptables/runner. +container_pull( + name = "iptables-test", + registry = "gcr.io", + repository = "gvisor-presubmit/iptables-test", + digest = "sha256:a137d692a2eb9fc7bf95c5f4a568da090e2c31098e93634421ed88f3a3f1db65", +) + +load( + "@io_bazel_rules_docker//go:image.bzl", + _go_image_repos = "repositories", +) + +_go_image_repos() + # External repositories, in sorted order. go_repository( name = "com_github_cenkalti_backoff", diff --git a/kokoro/iptables_tests.cfg b/kokoro/iptables_tests.cfg new file mode 100644 index 000000000..7af20629a --- /dev/null +++ b/kokoro/iptables_tests.cfg @@ -0,0 +1,10 @@ +build_file: "repo/scripts/iptables_test.sh" + +action { + define_artifacts { + regex: "**/sponge_log.xml" + regex: "**/sponge_log.log" + regex: "**/outputs.zip" + regex: "**/runsc_logs_*.tar.gz" + } +} diff --git a/runsc/dockerutil/dockerutil.go b/runsc/dockerutil/dockerutil.go index 57f6ae8de..9b6346ca2 100644 --- a/runsc/dockerutil/dockerutil.go +++ b/runsc/dockerutil/dockerutil.go @@ -380,6 +380,16 @@ func (d *Docker) FindPort(sandboxPort int) (int, error) { return port, nil } +// FindIP returns the IP address of the container as a string. +func (d *Docker) FindIP() (string, error) { + const format = `{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}` + out, err := do("inspect", "-f", format, d.Name) + if err != nil { + return "", fmt.Errorf("error retrieving IP: %v", err) + } + return strings.TrimSpace(out), nil +} + // SandboxPid returns the PID to the sandbox process. func (d *Docker) SandboxPid() (int, error) { out, err := do("inspect", "-f={{.State.Pid}}", d.Name) diff --git a/scripts/iptables_tests.sh b/scripts/iptables_tests.sh new file mode 100755 index 000000000..c47cbd675 --- /dev/null +++ b/scripts/iptables_tests.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# Copyright 2018 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +source $(dirname $0)/common.sh + +install_runsc_for_test iptables + +# Build the docker image for the test. +run //test/iptables/runner --norun + +# TODO(gvisor.dev/issue/170): Also test this on runsc once iptables are better +# supported +test //test/iptables:iptables_test "--test_arg=--runtime=runc" \ + "--test_arg=--image=bazel/test/iptables/runner:runner" diff --git a/test/iptables/BUILD b/test/iptables/BUILD new file mode 100644 index 000000000..fa833c3b2 --- /dev/null +++ b/test/iptables/BUILD @@ -0,0 +1,31 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "iptables", + srcs = [ + "filter_input.go", + "iptables.go", + "iptables_util.go", + ], + importpath = "gvisor.dev/gvisor/test/iptables", + visibility = ["//test/iptables:__subpackages__"], +) + +go_test( + name = "iptables_test", + srcs = [ + "iptables_test.go", + ], + embed = [":iptables"], + tags = [ + "local", + "manual", + ], + deps = [ + "//pkg/log", + "//runsc/dockerutil", + "//runsc/testutil", + ], +) diff --git a/test/iptables/README.md b/test/iptables/README.md new file mode 100644 index 000000000..b37cb2a96 --- /dev/null +++ b/test/iptables/README.md @@ -0,0 +1,44 @@ +# iptables Tests + +iptables tests are run via `scripts/iptables\_test.sh`. + +## Test Structure + +Each test implements `TestCase`, providing (1) a function to run inside the +container and (2) a function to run locally. Those processes are given each +others' IP addresses. The test succeeds when both functions succeed. + +The function inside the container (`ContainerAction`) typically sets some +iptables rules and then tries to send or receive packets. The local function +(`LocalAction`) will typically just send or receive packets. + +### Adding Tests + +1) Add your test to the `iptables` package. + +2) Register the test in an `init` function via `RegisterTestCase` (see +`filter_input.go` as an example). + +3) Add it to `iptables_test.go` (see the other tests in that file). + +Your test is now runnable with bazel! + +## Run individual tests + +Build the testing Docker container: + +```bash +$ bazel run //test/iptables/runner -- --norun +``` + +Run an individual test via: + +```bash +$ bazel test //test/iptables:iptables_test --test_filter= +``` + +To run an individual test with `runc`: + +```bash +$ bazel test //test/iptables:iptables_test --test_filter= --test_arg=--runtime=runc +``` diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go new file mode 100644 index 000000000..923f44e68 --- /dev/null +++ b/test/iptables/filter_input.go @@ -0,0 +1,124 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iptables + +import ( + "fmt" + "net" + "time" +) + +const ( + dropPort = 2401 + acceptPort = 2402 + sendloopDuration = 2 * time.Second + network = "udp4" +) + +func init() { + RegisterTestCase(FilterInputDropUDP{}) + RegisterTestCase(FilterInputDropUDPPort{}) + RegisterTestCase(FilterInputDropDifferentUDPPort{}) +} + +// FilterInputDropUDP tests that we can drop UDP traffic. +type FilterInputDropUDP struct{} + +// Name implements TestCase.Name. +func (FilterInputDropUDP) Name() string { + return "FilterInputDropUDP" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterInputDropUDP) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "INPUT", "-p", "udp", "-j", "DROP"); err != nil { + return err + } + + // Listen for UDP packets on dropPort. + if err := listenUDP(dropPort, sendloopDuration); err == nil { + return fmt.Errorf("packets on port %d should have been dropped, but got a packet", dropPort) + } else if netErr, ok := err.(net.Error); !ok || !netErr.Timeout() { + return fmt.Errorf("error reading: %v", err) + } + + // At this point we know that reading timed out and never received a + // packet. + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (FilterInputDropUDP) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, dropPort, sendloopDuration) +} + +// FilterInputDropUDPPort tests that we can drop UDP traffic by port. +type FilterInputDropUDPPort struct{} + +// Name implements TestCase.Name. +func (FilterInputDropUDPPort) Name() string { + return "FilterInputDropUDPPort" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterInputDropUDPPort) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { + return err + } + + // Listen for UDP packets on dropPort. + if err := listenUDP(dropPort, sendloopDuration); err == nil { + return fmt.Errorf("packets on port %d should have been dropped, but got a packet", dropPort) + } else if netErr, ok := err.(net.Error); !ok || !netErr.Timeout() { + return fmt.Errorf("error reading: %v", err) + } + + // At this point we know that reading timed out and never received a + // packet. + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (FilterInputDropUDPPort) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, dropPort, sendloopDuration) +} + +// FilterInputDropDifferentUDPPort tests that dropping traffic for a single UDP port +// doesn't drop packets on other ports. +type FilterInputDropDifferentUDPPort struct{} + +// Name implements TestCase.Name. +func (FilterInputDropDifferentUDPPort) Name() string { + return "FilterInputDropDifferentUDPPort" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterInputDropDifferentUDPPort) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { + return err + } + + // Listen for UDP packets on another port. + if err := listenUDP(acceptPort, sendloopDuration); err != nil { + return fmt.Errorf("packets on port %d should be allowed, but encountered an error: %v", acceptPort, err) + } + + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (FilterInputDropDifferentUDPPort) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} diff --git a/test/iptables/iptables.go b/test/iptables/iptables.go new file mode 100644 index 000000000..2e565d988 --- /dev/null +++ b/test/iptables/iptables.go @@ -0,0 +1,53 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package iptables contains a set of iptables tests implemented as TestCases +package iptables + +import ( + "fmt" + "net" +) + +// IPExchangePort is the port the container listens on to receive the IP +// address of the local process. +const IPExchangePort = 2349 + +// A TestCase contains one action to run in the container and one to run +// locally. The actions run concurrently and each must succeed for the test +// pass. +type TestCase interface { + // Name returns the name of the test. + Name() string + + // ContainerAction runs inside the container. It receives the IP of the + // local process. + ContainerAction(ip net.IP) error + + // LocalAction runs locally. It receives the IP of the container. + LocalAction(ip net.IP) error +} + +// Tests maps test names to TestCase. +// +// New TestCases are added by calling RegisterTestCase in an init function. +var Tests = map[string]TestCase{} + +// RegisterTestCase registers tc so it can be run. +func RegisterTestCase(tc TestCase) { + if _, ok := Tests[tc.Name()]; ok { + panic(fmt.Sprintf("TestCase %s already registered.", tc.Name())) + } + Tests[tc.Name()] = tc +} diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go new file mode 100644 index 000000000..bfbf1bb87 --- /dev/null +++ b/test/iptables/iptables_test.go @@ -0,0 +1,179 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iptables + +import ( + "fmt" + "net" + "os" + "path" + "testing" + "time" + + "flag" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/dockerutil" + "gvisor.dev/gvisor/runsc/testutil" +) + +const timeout time.Duration = 10 * time.Second + +var image = flag.String("image", "bazel/test/iptables/runner:runner", "image to run tests in") + +type result struct { + output string + err error +} + +// singleTest runs a TestCase. Each test follows a pattern: +// - Create a container. +// - Get the container's IP. +// - Send the container our IP. +// - Start a new goroutine running the local action of the test. +// - Wait for both the container and local actions to finish. +// +// Container output is logged to $TEST_UNDECLARED_OUTPUTS_DIR if it exists, or +// to stderr. +func singleTest(test TestCase) error { + if _, ok := Tests[test.Name()]; !ok { + return fmt.Errorf("no test found with name %q. Has it been registered?", test.Name()) + } + + // Create and start the container. + cont := dockerutil.MakeDocker("gvisor-iptables") + defer cont.CleanUp() + resultChan := make(chan *result) + go func() { + output, err := cont.RunFg("--cap-add=NET_ADMIN", *image, "-name", test.Name()) + logContainer(output, err) + resultChan <- &result{output, err} + }() + + // Get the container IP. + ip, err := getIP(cont) + if err != nil { + return fmt.Errorf("failed to get container IP: %v", err) + } + + // Give the container our IP. + if err := sendIP(ip); err != nil { + return fmt.Errorf("failed to send IP to container: %v", err) + } + + // Run our side of the test. + errChan := make(chan error) + go func() { + errChan <- test.LocalAction(ip) + }() + + // Wait for both the container and local tests to finish. + var res *result + to := time.After(timeout) + for localDone := false; res == nil || !localDone; { + select { + case res = <-resultChan: + log.Infof("Container finished.") + case err, localDone = <-errChan: + log.Infof("Local finished.") + if err != nil { + return fmt.Errorf("local test failed: %v", err) + } + case <-to: + return fmt.Errorf("timed out after %f seconds", timeout.Seconds()) + } + } + + return res.err +} + +func getIP(cont dockerutil.Docker) (net.IP, error) { + // The container might not have started yet, so retry a few times. + var ipStr string + to := time.After(timeout) + for ipStr == "" { + ipStr, _ = cont.FindIP() + select { + case <-to: + return net.IP{}, fmt.Errorf("timed out getting IP after %f seconds", timeout.Seconds()) + default: + time.Sleep(250 * time.Millisecond) + } + } + ip := net.ParseIP(ipStr) + if ip == nil { + return net.IP{}, fmt.Errorf("invalid IP: %q", ipStr) + } + log.Infof("Container has IP of %s", ipStr) + return ip, nil +} + +func sendIP(ip net.IP) error { + contAddr := net.TCPAddr{ + IP: ip, + Port: IPExchangePort, + } + var conn *net.TCPConn + // The container may not be listening when we first connect, so retry + // upon error. + cb := func() error { + c, err := net.DialTCP("tcp4", nil, &contAddr) + conn = c + return err + } + if err := testutil.Poll(cb, timeout); err != nil { + return fmt.Errorf("timed out waiting to send IP, most recent error: %v", err) + } + if _, err := conn.Write([]byte{0}); err != nil { + return fmt.Errorf("error writing to container: %v", err) + } + return nil +} + +func logContainer(output string, err error) { + msg := fmt.Sprintf("Container error: %v\nContainer output:\n%v", err, output) + if artifactsDir := os.Getenv("TEST_UNDECLARED_OUTPUTS_DIR"); artifactsDir != "" { + fpath := path.Join(artifactsDir, "container.log") + if file, err := os.OpenFile(fpath, os.O_WRONLY|os.O_CREATE, 0644); err != nil { + log.Warningf("Failed to open log file %q: %v", fpath, err) + } else { + defer file.Close() + if _, err := file.Write([]byte(msg)); err == nil { + return + } + log.Warningf("Failed to write to log file %s: %v", fpath, err) + } + } + + // We couldn't write to the output directory -- just log to stderr. + log.Infof(msg) +} + +func TestFilterInputDropUDP(t *testing.T) { + if err := singleTest(FilterInputDropUDP{}); err != nil { + t.Fatal(err) + } +} + +func TestFilterInputDropUDPPort(t *testing.T) { + if err := singleTest(FilterInputDropUDPPort{}); err != nil { + t.Fatal(err) + } +} + +func TestFilterInputDropDifferentUDPPort(t *testing.T) { + if err := singleTest(FilterInputDropDifferentUDPPort{}); err != nil { + t.Fatal(err) + } +} diff --git a/test/iptables/iptables_util.go b/test/iptables/iptables_util.go new file mode 100644 index 000000000..3a4d11f1a --- /dev/null +++ b/test/iptables/iptables_util.go @@ -0,0 +1,82 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iptables + +import ( + "fmt" + "net" + "os/exec" + "time" +) + +const iptablesBinary = "iptables" + +// filterTable calls `iptables -t filter` with the given args. +func filterTable(args ...string) error { + args = append([]string{"-t", "filter"}, args...) + cmd := exec.Command(iptablesBinary, args...) + if out, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("error running iptables with args %v\nerror: %v\noutput: %s", args, err, string(out)) + } + return nil +} + +// listenUDP listens on a UDP port and returns the value of net.Conn.Read() for +// the first read on that port. +func listenUDP(port int, timeout time.Duration) error { + localAddr := net.UDPAddr{ + Port: port, + } + conn, err := net.ListenUDP(network, &localAddr) + if err != nil { + return err + } + defer conn.Close() + conn.SetDeadline(time.Now().Add(timeout)) + _, err = conn.Read([]byte{0}) + return err +} + +// sendUDPLoop sends 1 byte UDP packets repeatedly to the IP and port specified +// over a duration. +func sendUDPLoop(ip net.IP, port int, duration time.Duration) error { + // Send packets for a few seconds. + remote := net.UDPAddr{ + IP: ip, + Port: port, + } + conn, err := net.DialUDP(network, nil, &remote) + if err != nil { + return err + } + defer conn.Close() + + to := time.After(duration) + for timedOut := false; !timedOut; { + // This may return an error (connection refused) if the remote + // hasn't started listening yet or they're dropping our + // packets. So we ignore Write errors and depend on the remote + // to report a failure if it doesn't get a packet it needs. + conn.Write([]byte{0}) + select { + case <-to: + timedOut = true + default: + time.Sleep(200 * time.Millisecond) + } + } + + return nil +} diff --git a/test/iptables/runner/BUILD b/test/iptables/runner/BUILD new file mode 100644 index 000000000..1c59e26b9 --- /dev/null +++ b/test/iptables/runner/BUILD @@ -0,0 +1,16 @@ +load("@io_bazel_rules_docker//container:container.bzl", "container_image") +load("@io_bazel_rules_docker//go:image.bzl", "go_image") + +package(licenses = ["notice"]) + +container_image( + name = "iptables-base", + base = "@iptables-test//image", +) + +go_image( + name = "runner", + srcs = ["main.go"], + base = ":iptables-base", + deps = ["//test/iptables"], +) diff --git a/test/iptables/runner/Dockerfile b/test/iptables/runner/Dockerfile new file mode 100644 index 000000000..b77db44a1 --- /dev/null +++ b/test/iptables/runner/Dockerfile @@ -0,0 +1,4 @@ +# This Dockerfile builds the image hosted at +# gcr.io/gvisor-presubmit/iptables-test. +FROM ubuntu +RUN apt update && apt install -y iptables diff --git a/test/iptables/runner/main.go b/test/iptables/runner/main.go new file mode 100644 index 000000000..3c794114e --- /dev/null +++ b/test/iptables/runner/main.go @@ -0,0 +1,70 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package main runs iptables tests from within a docker container. +package main + +import ( + "flag" + "fmt" + "log" + "net" + + "gvisor.dev/gvisor/test/iptables" +) + +var name = flag.String("name", "", "name of the test to run") + +func main() { + flag.Parse() + + // Find out which test we're running. + test, ok := iptables.Tests[*name] + if !ok { + log.Fatalf("No test found named %q", *name) + } + log.Printf("Running test %q", *name) + + // Get the IP of the local process. + ip, err := getIP() + if err != nil { + log.Fatal(err) + } + + // Run the test. + if err := test.ContainerAction(ip); err != nil { + log.Fatalf("Failed running test %q: %v", *name, err) + } +} + +// getIP listens for a connection from the local process and returns the source +// IP of that connection. +func getIP() (net.IP, error) { + localAddr := net.TCPAddr{ + Port: iptables.IPExchangePort, + } + listener, err := net.ListenTCP("tcp4", &localAddr) + if err != nil { + return net.IP{}, fmt.Errorf("failed listening for IP: %v", err) + } + defer listener.Close() + conn, err := listener.AcceptTCP() + if err != nil { + return net.IP{}, fmt.Errorf("failed accepting IP: %v", err) + } + defer conn.Close() + log.Printf("Connected to %v", conn.RemoteAddr()) + + return conn.RemoteAddr().(*net.TCPAddr).IP, nil +} -- cgit v1.2.3 From e6f4124afd951c3b089f9c75c499c14f4d90a590 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Mon, 16 Dec 2019 13:18:36 -0800 Subject: Implement checks for get/setxattr at the syscall layer. Add checks for input arguments, file type, permissions, etc. that match the Linux implementation. A call to get/setxattr that passes all the checks will still currently return EOPNOTSUPP. Actual support will be added in following commits. Only allow user.* extended attributes for the time being. PiperOrigin-RevId: 285835159 --- pkg/abi/linux/BUILD | 1 + pkg/abi/linux/xattr.go | 27 +++++ pkg/sentry/fs/inode.go | 8 ++ pkg/sentry/fs/inode_overlay.go | 5 + pkg/sentry/syscalls/linux/BUILD | 1 + pkg/sentry/syscalls/linux/linux64_amd64.go | 4 +- pkg/sentry/syscalls/linux/linux64_arm64.go | 4 +- pkg/sentry/syscalls/linux/sys_xattr.go | 169 +++++++++++++++++++++++++++++ test/syscalls/linux/xattr.cc | 79 +++++++------- 9 files changed, 253 insertions(+), 45 deletions(-) create mode 100644 pkg/abi/linux/xattr.go create mode 100644 pkg/sentry/syscalls/linux/sys_xattr.go (limited to 'test') diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD index 51774c6b6..9553f164d 100644 --- a/pkg/abi/linux/BUILD +++ b/pkg/abi/linux/BUILD @@ -57,6 +57,7 @@ go_library( "uio.go", "utsname.go", "wait.go", + "xattr.go", ], importpath = "gvisor.dev/gvisor/pkg/abi/linux", visibility = ["//visibility:public"], diff --git a/pkg/abi/linux/xattr.go b/pkg/abi/linux/xattr.go new file mode 100644 index 000000000..a3b6406fa --- /dev/null +++ b/pkg/abi/linux/xattr.go @@ -0,0 +1,27 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Constants for extended attributes. +const ( + XATTR_NAME_MAX = 255 + XATTR_SIZE_MAX = 65536 + + XATTR_CREATE = 1 + XATTR_REPLACE = 2 + + XATTR_USER_PREFIX = "user." + XATTR_USER_PREFIX_LEN = len(XATTR_USER_PREFIX) +) diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index 2d43dff1d..91e2fde2f 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -270,6 +270,14 @@ func (i *Inode) Getxattr(name string) (string, error) { return i.InodeOperations.Getxattr(i, name) } +// Setxattr calls i.InodeOperations.Setxattr with i as the Inode. +func (i *Inode) Setxattr(name, value string) error { + if i.overlay != nil { + return overlaySetxattr(i.overlay, name, value) + } + return i.InodeOperations.Setxattr(i, name, value) +} + // Listxattr calls i.InodeOperations.Listxattr with i as the Inode. func (i *Inode) Listxattr() (map[string]struct{}, error) { if i.overlay != nil { diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go index a09147080..63a991beb 100644 --- a/pkg/sentry/fs/inode_overlay.go +++ b/pkg/sentry/fs/inode_overlay.go @@ -552,6 +552,11 @@ func overlayGetxattr(o *overlayEntry, name string) (string, error) { return s, err } +// TODO(b/146028302): Support setxattr for overlayfs. +func overlaySetxattr(o *overlayEntry, name, value string) error { + return syserror.EOPNOTSUPP +} + func overlayListxattr(o *overlayEntry) (map[string]struct{}, error) { o.copyMu.RLock() defer o.copyMu.RUnlock() diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD index 4c0bf96e4..6766ba587 100644 --- a/pkg/sentry/syscalls/linux/BUILD +++ b/pkg/sentry/syscalls/linux/BUILD @@ -49,6 +49,7 @@ go_library( "sys_tls.go", "sys_utsname.go", "sys_write.go", + "sys_xattr.go", "timespec.go", ], importpath = "gvisor.dev/gvisor/pkg/sentry/syscalls/linux", diff --git a/pkg/sentry/syscalls/linux/linux64_amd64.go b/pkg/sentry/syscalls/linux/linux64_amd64.go index 797542d28..272ae9991 100644 --- a/pkg/sentry/syscalls/linux/linux64_amd64.go +++ b/pkg/sentry/syscalls/linux/linux64_amd64.go @@ -228,10 +228,10 @@ var AMD64 = &kernel.SyscallTable{ 185: syscalls.Error("security", syserror.ENOSYS, "Not implemented in Linux.", nil), 186: syscalls.Supported("gettid", Gettid), 187: syscalls.Supported("readahead", Readahead), - 188: syscalls.Error("setxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), + 188: syscalls.PartiallySupported("setxattr", Setxattr, "Only supported for tmpfs.", nil), 189: syscalls.Error("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), 190: syscalls.Error("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 191: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), + 191: syscalls.PartiallySupported("getxattr", Getxattr, "Only supported for tmpfs.", nil), 192: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), 193: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), 194: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), diff --git a/pkg/sentry/syscalls/linux/linux64_arm64.go b/pkg/sentry/syscalls/linux/linux64_arm64.go index 2bc7faff5..3b584eed9 100644 --- a/pkg/sentry/syscalls/linux/linux64_arm64.go +++ b/pkg/sentry/syscalls/linux/linux64_arm64.go @@ -41,10 +41,10 @@ var ARM64 = &kernel.SyscallTable{ 2: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 5: syscalls.Error("setxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), + 5: syscalls.PartiallySupported("setxattr", Setxattr, "Only supported for tmpfs.", nil), 6: syscalls.Error("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), 7: syscalls.Error("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 8: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), + 8: syscalls.PartiallySupported("getxattr", Getxattr, "Only supported for tmpfs.", nil), 9: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), 10: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), 11: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), diff --git a/pkg/sentry/syscalls/linux/sys_xattr.go b/pkg/sentry/syscalls/linux/sys_xattr.go new file mode 100644 index 000000000..97d9a65ea --- /dev/null +++ b/pkg/sentry/syscalls/linux/sys_xattr.go @@ -0,0 +1,169 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +import ( + "strings" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" +) + +// Getxattr implements linux syscall getxattr(2). +func Getxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + pathAddr := args[0].Pointer() + nameAddr := args[1].Pointer() + valueAddr := args[2].Pointer() + size := args[3].SizeT() + + path, dirPath, err := copyInPath(t, pathAddr, false /* allowEmpty */) + if err != nil { + return 0, nil, err + } + + valueLen := 0 + err = fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { + value, err := getxattr(t, d, dirPath, nameAddr) + if err != nil { + return err + } + + valueLen = len(value) + if size == 0 { + return nil + } + if size > linux.XATTR_SIZE_MAX { + size = linux.XATTR_SIZE_MAX + } + if valueLen > int(size) { + return syserror.ERANGE + } + + _, err = t.CopyOutBytes(valueAddr, []byte(value)) + return err + }) + if err != nil { + return 0, nil, err + } + return uintptr(valueLen), nil, nil +} + +// getxattr implements getxattr from the given *fs.Dirent. +func getxattr(t *kernel.Task, d *fs.Dirent, dirPath bool, nameAddr usermem.Addr) (string, error) { + if dirPath && !fs.IsDir(d.Inode.StableAttr) { + return "", syserror.ENOTDIR + } + + if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Read: true}); err != nil { + return "", err + } + + name, err := copyInXattrName(t, nameAddr) + if err != nil { + return "", err + } + + if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) { + return "", syserror.EOPNOTSUPP + } + + return d.Inode.Getxattr(name) +} + +// Setxattr implements linux syscall setxattr(2). +func Setxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + pathAddr := args[0].Pointer() + nameAddr := args[1].Pointer() + valueAddr := args[2].Pointer() + size := args[3].SizeT() + flags := args[4].Uint() + + path, dirPath, err := copyInPath(t, pathAddr, false /* allowEmpty */) + if err != nil { + return 0, nil, err + } + + if flags&^(linux.XATTR_CREATE|linux.XATTR_REPLACE) != 0 { + return 0, nil, syserror.EINVAL + } + + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { + return setxattr(t, d, dirPath, nameAddr, valueAddr, size, flags) + }) +} + +// setxattr implements setxattr from the given *fs.Dirent. +func setxattr(t *kernel.Task, d *fs.Dirent, dirPath bool, nameAddr, valueAddr usermem.Addr, size uint, flags uint32) error { + if dirPath && !fs.IsDir(d.Inode.StableAttr) { + return syserror.ENOTDIR + } + + if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Write: true}); err != nil { + return err + } + + name, err := copyInXattrName(t, nameAddr) + if err != nil { + return err + } + + if size > linux.XATTR_SIZE_MAX { + return syserror.E2BIG + } + buf := make([]byte, size) + if _, err = t.CopyInBytes(valueAddr, buf); err != nil { + return err + } + value := string(buf) + + if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) { + return syserror.EOPNOTSUPP + } + + return d.Inode.Setxattr(name, value) +} + +func copyInXattrName(t *kernel.Task, nameAddr usermem.Addr) (string, error) { + name, err := t.CopyInString(nameAddr, linux.XATTR_NAME_MAX+1) + if err != nil { + if err == syserror.ENAMETOOLONG { + return "", syserror.ERANGE + } + return "", err + } + if len(name) == 0 { + return "", syserror.ERANGE + } + return name, nil +} + +func checkXattrPermissions(t *kernel.Task, i *fs.Inode, perms fs.PermMask) error { + // Restrict xattrs to regular files and directories. + // + // In Linux, this restriction technically only applies to xattrs in the + // "user.*" namespace, but we don't allow any other xattr prefixes anyway. + if !fs.IsRegular(i.StableAttr) && !fs.IsDir(i.StableAttr) { + if perms.Write { + return syserror.EPERM + } + return syserror.ENODATA + } + + return i.CheckPermission(t, perms) +} diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc index 3e07b634b..75740238c 100644 --- a/test/syscalls/linux/xattr.cc +++ b/test/syscalls/linux/xattr.cc @@ -28,6 +28,7 @@ #include "test/util/capability_util.h" #include "test/util/posix_error.h" #include "test/util/temp_path.h" +#include "test/util/test_util.h" namespace gvisor { namespace testing { @@ -37,9 +38,6 @@ namespace { class XattrTest : public FileTest {}; TEST_F(XattrTest, XattrNullName) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, /*flags=*/0), @@ -49,9 +47,6 @@ TEST_F(XattrTest, XattrNullName) { } TEST_F(XattrTest, XattrEmptyName) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); EXPECT_THAT(setxattr(path, "", nullptr, 0, /*flags=*/0), @@ -60,16 +55,17 @@ TEST_F(XattrTest, XattrEmptyName) { } TEST_F(XattrTest, XattrLargeName) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); std::string name = "user."; name += std::string(XATTR_NAME_MAX - name.length(), 'a'); - EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), - SyscallSucceeds()); - EXPECT_THAT(getxattr(path, name.c_str(), nullptr, 0), - SyscallSucceedsWithValue(0)); + + // TODO(b/127675828): Support setxattr and getxattr. + if (!IsRunningOnGvisor()) { + EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), + SyscallSucceeds()); + EXPECT_THAT(getxattr(path, name.c_str(), nullptr, 0), + SyscallSucceedsWithValue(0)); + } name += "a"; EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), @@ -79,9 +75,6 @@ TEST_F(XattrTest, XattrLargeName) { } TEST_F(XattrTest, XattrInvalidPrefix) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); std::string name(XATTR_NAME_MAX, 'a'); EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), @@ -91,9 +84,6 @@ TEST_F(XattrTest, XattrInvalidPrefix) { } TEST_F(XattrTest, XattrReadOnly) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - // Drop capabilities that allow us to override file and directory permissions. ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); @@ -102,22 +92,28 @@ TEST_F(XattrTest, XattrReadOnly) { char name[] = "user.abc"; char val = 'a'; size_t size = sizeof(val); - EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); + + // TODO(b/127675828): Support setxattr and getxattr. + if (!IsRunningOnGvisor()) { + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), + SyscallSucceeds()); + } ASSERT_NO_ERRNO(testing::Chmod(test_file_name_, S_IRUSR)); EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallFailsWithErrno(EACCES)); - char buf = '-'; - EXPECT_THAT(getxattr(path, name, &buf, size), SyscallSucceedsWithValue(size)); - EXPECT_EQ(buf, val); + // TODO(b/127675828): Support setxattr and getxattr. + if (!IsRunningOnGvisor()) { + char buf = '-'; + EXPECT_THAT(getxattr(path, name, &buf, size), + SyscallSucceedsWithValue(size)); + EXPECT_EQ(buf, val); + } } TEST_F(XattrTest, XattrWriteOnly) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - // Drop capabilities that allow us to override file and directory permissions. ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); @@ -128,7 +124,12 @@ TEST_F(XattrTest, XattrWriteOnly) { char name[] = "user.abc"; char val = 'a'; size_t size = sizeof(val); - EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); + + // TODO(b/127675828): Support setxattr and getxattr. + if (!IsRunningOnGvisor()) { + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), + SyscallSucceeds()); + } EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(EACCES)); } @@ -172,9 +173,6 @@ TEST_F(XattrTest, XattrOnSymlink) { } TEST_F(XattrTest, XattrOnInvalidFileTypes) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - char name[] = "user.abc"; char char_device[] = "/dev/zero"; @@ -226,9 +224,6 @@ TEST_F(XattrTest, SetxattrZeroSize) { } TEST_F(XattrTest, SetxattrSizeTooLarge) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); char name[] = "user.abc"; @@ -240,19 +235,24 @@ TEST_F(XattrTest, SetxattrSizeTooLarge) { EXPECT_THAT(setxattr(path, name, val.data(), size, /*flags=*/0), SyscallFailsWithErrno(E2BIG)); - EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); + // TODO(b/127675828): Support setxattr and getxattr. + if (!IsRunningOnGvisor()) { + EXPECT_THAT(getxattr(path, name, nullptr, 0), + SyscallFailsWithErrno(ENODATA)); + } } TEST_F(XattrTest, SetxattrNullValueAndNonzeroSize) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); char name[] = "user.abc"; EXPECT_THAT(setxattr(path, name, nullptr, 1, /*flags=*/0), SyscallFailsWithErrno(EFAULT)); - EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); + // TODO(b/127675828): Support setxattr and getxattr. + if (!IsRunningOnGvisor()) { + EXPECT_THAT(getxattr(path, name, nullptr, 0), + SyscallFailsWithErrno(ENODATA)); + } } TEST_F(XattrTest, SetxattrNullValueAndZeroSize) { @@ -350,9 +350,6 @@ TEST_F(XattrTest, SetxattrReplaceFlag) { } TEST_F(XattrTest, SetxattrInvalidFlags) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); int invalid_flags = 0xff; EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, invalid_flags), -- cgit v1.2.3 From 67000b929b9f5a3aedf6f5f56611c76411d02d78 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Tue, 17 Dec 2019 06:31:41 -0800 Subject: Explicitly export files needed by other packages PiperOrigin-RevId: 285968611 --- test/syscalls/linux/BUILD | 1 + 1 file changed, 1 insertion(+) (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 0bbaaf28a..e6568128e 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -9,6 +9,7 @@ package( exports_files( [ "socket.cc", + "socket_ip_loopback_blocking.cc", "socket_ipv4_udp_unbound_loopback.cc", "tcp_socket.cc", "udp_socket.cc", -- cgit v1.2.3 From 3f4d8fefb45d75937292302e4c158f76da5c7ca8 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Tue, 17 Dec 2019 10:08:47 -0800 Subject: Internal change. PiperOrigin-RevId: 286003946 --- pkg/tcpip/transport/tcp/connect.go | 8 +++++ pkg/tcpip/transport/tcp/tcp_test.go | 65 +++++++++++++++++++++++++++++++++ test/syscalls/linux/tcp_socket.cc | 72 +++++++++++++++++++++++++++++++++++++ 3 files changed, 145 insertions(+) (limited to 'test') diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index 4c34fc9d2..cdd69f360 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -218,6 +218,14 @@ func (h *handshake) synSentState(s *segment) *tcpip.Error { // acceptable if the ack field acknowledges the SYN. if s.flagIsSet(header.TCPFlagRst) { if s.flagIsSet(header.TCPFlagAck) && s.ackNumber == h.iss+1 { + // RFC 793, page 67, states that "If the RST bit is set [and] If the ACK + // was acceptable then signal the user "error: connection reset", drop + // the segment, enter CLOSED state, delete TCB, and return." + h.ep.mu.Lock() + h.ep.workerCleanup = true + h.ep.mu.Unlock() + // Although the RFC above calls out ECONNRESET, Linux actually returns + // ECONNREFUSED here so we do as well. return tcpip.ErrConnectionRefused } return nil diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 2a83f7bcc..e8fe4dab5 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -1140,6 +1140,71 @@ func TestConnectBindToDevice(t *testing.T) { } } +func TestRstOnSynSent(t *testing.T) { + c := context.New(t, defaultMTU) + defer c.Cleanup() + + // Create an endpoint, don't handshake because we want to interfere with the + // handshake process. + c.Create(-1) + + // Start connection attempt. + waitEntry, ch := waiter.NewChannelEntry(nil) + c.WQ.EventRegister(&waitEntry, waiter.EventOut) + defer c.WQ.EventUnregister(&waitEntry) + + addr := tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort} + if err := c.EP.Connect(addr); err != tcpip.ErrConnectStarted { + t.Fatalf("got Connect(%+v) = %v, want %s", addr, err, tcpip.ErrConnectStarted) + } + + // Receive SYN packet. + b := c.GetPacket() + checker.IPv4(t, b, + checker.TCP( + checker.DstPort(context.TestPort), + checker.TCPFlags(header.TCPFlagSyn), + ), + ) + + // Ensure that we've reached SynSent state + if got, want := tcp.EndpointState(c.EP.State()), tcp.StateSynSent; got != want { + t.Fatalf("got State() = %s, want %s", got, want) + } + tcpHdr := header.TCP(header.IPv4(b).Payload()) + c.IRS = seqnum.Value(tcpHdr.SequenceNumber()) + + // Send a packet with a proper ACK and a RST flag to cause the socket + // to Error and close out + iss := seqnum.Value(789) + rcvWnd := seqnum.Size(30000) + c.SendPacket(nil, &context.Headers{ + SrcPort: tcpHdr.DestinationPort(), + DstPort: tcpHdr.SourcePort(), + Flags: header.TCPFlagRst | header.TCPFlagAck, + SeqNum: iss, + AckNum: c.IRS.Add(1), + RcvWnd: rcvWnd, + TCPOpts: nil, + }) + + // Wait for receive to be notified. + select { + case <-ch: + case <-time.After(3 * time.Second): + t.Fatal("timed out waiting for packet to arrive") + } + + if _, _, err := c.EP.Read(nil); err != tcpip.ErrConnectionRefused { + t.Fatalf("got c.EP.Read(nil) = %v, want = %s", err, tcpip.ErrConnectionRefused) + } + + // Due to the RST the endpoint should be in an error state. + if got, want := tcp.EndpointState(c.EP.State()), tcp.StateError; got != want { + t.Fatalf("got State() = %s, want %s", got, want) + } +} + func TestOutOfOrderReceive(t *testing.T) { c := context.New(t, defaultMTU) defer c.Cleanup() diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc index c503f3568..6b99c021d 100644 --- a/test/syscalls/linux/tcp_socket.cc +++ b/test/syscalls/linux/tcp_socket.cc @@ -967,6 +967,78 @@ TEST_P(SimpleTcpSocketTest, BlockingConnectRefused) { EXPECT_THAT(close(s.release()), SyscallSucceeds()); } +// Test that connecting to a non-listening port and thus receiving a RST is +// handled appropriately by the socket - the port that the socket was bound to +// is released and the expected error is returned. +TEST_P(SimpleTcpSocketTest, CleanupOnConnectionRefused) { + // Create a socket that is known to not be listening. As is it bound but not + // listening, when another socket connects to the port, it will refuse.. + FileDescriptor bound_s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + sockaddr_storage bound_addr = + ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam())); + socklen_t bound_addrlen = sizeof(bound_addr); + + ASSERT_THAT( + bind(bound_s.get(), reinterpret_cast(&bound_addr), + bound_addrlen), + SyscallSucceeds()); + + // Get the addresses the socket is bound to because the port is chosen by the + // stack. + ASSERT_THAT(getsockname(bound_s.get(), + reinterpret_cast(&bound_addr), + &bound_addrlen), + SyscallSucceeds()); + + // Create, initialize, and bind the socket that is used to test connecting to + // the non-listening port. + FileDescriptor client_s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + // Initialize client address to the loopback one. + sockaddr_storage client_addr = + ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam())); + socklen_t client_addrlen = sizeof(client_addr); + + ASSERT_THAT( + bind(client_s.get(), reinterpret_cast(&client_addr), + client_addrlen), + SyscallSucceeds()); + + ASSERT_THAT(getsockname(client_s.get(), + reinterpret_cast(&client_addr), + &client_addrlen), + SyscallSucceeds()); + + // Now the test: connect to the bound but not listening socket with the + // client socket. The bound socket should return a RST and cause the client + // socket to return an error and clean itself up immediately. + // The error being ECONNREFUSED diverges with RFC 793, page 37, but does what + // Linux does. + ASSERT_THAT(connect(client_s.get(), + reinterpret_cast(&bound_addr), + bound_addrlen), + SyscallFailsWithErrno(ECONNREFUSED)); + + FileDescriptor new_s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + // Test binding to the address from the client socket. This should be okay + // if it was dropped correctly. + ASSERT_THAT( + bind(new_s.get(), reinterpret_cast(&client_addr), + client_addrlen), + SyscallSucceeds()); + + // Attempt #2, with the new socket and reused addr our connect should fail in + // the same way as before, not with an EADDRINUSE. + ASSERT_THAT(connect(client_s.get(), + reinterpret_cast(&bound_addr), + bound_addrlen), + SyscallFailsWithErrno(ECONNREFUSED)); +} + // Test that we get an ECONNREFUSED with a nonblocking socket. TEST_P(SimpleTcpSocketTest, NonBlockingConnectRefused) { FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( -- cgit v1.2.3 From 64d00cc63dc8c3cb5fde1f638d4525c8d329733d Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Tue, 17 Dec 2019 16:20:19 -0800 Subject: Internal change. PiperOrigin-RevId: 286083614 --- test/iptables/runner/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/iptables/runner/BUILD b/test/iptables/runner/BUILD index 1c59e26b9..c6c42d870 100644 --- a/test/iptables/runner/BUILD +++ b/test/iptables/runner/BUILD @@ -1,5 +1,5 @@ -load("@io_bazel_rules_docker//container:container.bzl", "container_image") load("@io_bazel_rules_docker//go:image.bzl", "go_image") +load("@io_bazel_rules_docker//container:container.bzl", "container_image") package(licenses = ["notice"]) -- cgit v1.2.3 From 65f53c583364295cbc211b38fae126fb88f08ec0 Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Wed, 18 Dec 2019 12:28:13 -0800 Subject: Put GetSocketPairs() in unnamed namespace This avoids conflicting definitions of GetSocketPairs() in outer namespace when multiple such cc files are complied for one binary. PiperOrigin-RevId: 286243045 --- test/syscalls/linux/BUILD | 1 + test/syscalls/linux/socket_ip_loopback_blocking.cc | 2 ++ test/syscalls/linux/socket_ip_tcp_loopback.cc | 2 ++ 3 files changed, 5 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index e6568128e..675ff5cdb 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -10,6 +10,7 @@ exports_files( [ "socket.cc", "socket_ip_loopback_blocking.cc", + "socket_ip_tcp_loopback.cc", "socket_ipv4_udp_unbound_loopback.cc", "tcp_socket.cc", "udp_socket.cc", diff --git a/test/syscalls/linux/socket_ip_loopback_blocking.cc b/test/syscalls/linux/socket_ip_loopback_blocking.cc index e58eedaba..fda252dd7 100644 --- a/test/syscalls/linux/socket_ip_loopback_blocking.cc +++ b/test/syscalls/linux/socket_ip_loopback_blocking.cc @@ -23,6 +23,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSocketPairs() { return VecCat( @@ -43,5 +44,6 @@ INSTANTIATE_TEST_SUITE_P( BlockingIPSockets, BlockingSocketPairTest, ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); +} // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_tcp_loopback.cc b/test/syscalls/linux/socket_ip_tcp_loopback.cc index 831de53b8..9db3037bc 100644 --- a/test/syscalls/linux/socket_ip_tcp_loopback.cc +++ b/test/syscalls/linux/socket_ip_tcp_loopback.cc @@ -21,6 +21,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSocketPairs() { return { @@ -34,5 +35,6 @@ INSTANTIATE_TEST_SUITE_P( AllUnixDomainSockets, AllSocketPairTest, ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); +} // namespace } // namespace testing } // namespace gvisor -- cgit v1.2.3 From 18d6e59b457c8a91bf7db518fbb9193c49d2ee7c Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Wed, 18 Dec 2019 13:06:02 -0800 Subject: Switch to netinet/tcp.h and poll.h to for better platform portability. PiperOrigin-RevId: 286249699 --- test/syscalls/linux/BUILD | 1 + test/syscalls/linux/socket_inet_loopback.cc | 67 +++++++++++++++-------------- 2 files changed, 36 insertions(+), 32 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 675ff5cdb..064ce8429 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -9,6 +9,7 @@ package( exports_files( [ "socket.cc", + "socket_inet_loopback.cc", "socket_ip_loopback_blocking.cc", "socket_ip_tcp_loopback.cc", "socket_ipv4_udp_unbound_loopback.cc", diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 761c3a9fe..5bb9d2e99 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -13,12 +13,10 @@ // limitations under the License. #include -#include #include +#include #include #include -#include -#include #include #include @@ -46,6 +44,8 @@ namespace testing { namespace { +using ::testing::Gt; + PosixErrorOr AddrPort(int family, sockaddr_storage const& addr) { switch (family) { case AF_INET: @@ -976,41 +976,44 @@ TEST_P(SocketInetReusePortTest, UdpPortReuseMultiThreadShort) { SyscallSucceedsWithValue(sizeof(i))); } - int epollfd; - ASSERT_THAT(epollfd = epoll_create1(0), SyscallSucceeds()); - + struct pollfd pollfds[kThreadCount]; for (int i = 0; i < kThreadCount; i++) { - int fd = listener_fds[i].get(); - struct epoll_event ev; - ev.data.fd = fd; - ev.events = EPOLLIN; - ASSERT_THAT(epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev), SyscallSucceeds()); + pollfds[i].fd = listener_fds[i].get(); + pollfds[i].events = POLLIN; } std::map portToFD; - for (int i = 0; i < kConnectAttempts * 2; i++) { - struct sockaddr_storage addr = {}; - socklen_t addrlen = sizeof(addr); - struct epoll_event ev; - int data, fd; + int received = 0; + while (received < kConnectAttempts * 2) { + ASSERT_THAT(poll(pollfds, kThreadCount, -1), + SyscallSucceedsWithValue(Gt(0))); - ASSERT_THAT(epoll_wait(epollfd, &ev, 1, -1), SyscallSucceedsWithValue(1)); + for (int i = 0; i < kThreadCount; i++) { + if ((pollfds[i].revents & POLLIN) == 0) { + continue; + } - fd = ev.data.fd; - EXPECT_THAT(RetryEINTR(recvfrom)(fd, &data, sizeof(data), 0, - reinterpret_cast(&addr), - &addrlen), - SyscallSucceedsWithValue(sizeof(data))); - uint16_t const port = - ASSERT_NO_ERRNO_AND_VALUE(AddrPort(connector.family(), addr)); - auto prev_port = portToFD.find(port); - // Check that all packets from one client have been delivered to the same - // server socket. - if (prev_port == portToFD.end()) { - portToFD[port] = ev.data.fd; - } else { - EXPECT_EQ(portToFD[port], ev.data.fd); + received++; + + const int fd = pollfds[i].fd; + struct sockaddr_storage addr = {}; + socklen_t addrlen = sizeof(addr); + int data; + EXPECT_THAT(RetryEINTR(recvfrom)( + fd, &data, sizeof(data), 0, + reinterpret_cast(&addr), &addrlen), + SyscallSucceedsWithValue(sizeof(data))); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(connector.family(), addr)); + auto prev_port = portToFD.find(port); + // Check that all packets from one client have been delivered to the + // same server socket. + if (prev_port == portToFD.end()) { + portToFD[port] = fd; + } else { + EXPECT_EQ(portToFD[port], fd); + } } } } @@ -1897,7 +1900,7 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, NoReusePortFollowingReusePort) { } INSTANTIATE_TEST_SUITE_P( - AllFamlies, SocketMultiProtocolInetLoopbackTest, + AllFamilies, SocketMultiProtocolInetLoopbackTest, ::testing::Values(ProtocolTestParam{"TCP", SOCK_STREAM}, ProtocolTestParam{"UDP", SOCK_DGRAM}), DescribeProtocolTestParam); -- cgit v1.2.3 From 57ce26c0b465dce332a59c9fabb05f737ff4241d Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 18 Dec 2019 18:22:50 -0800 Subject: net/tcp: allow to call listen without bind When listen(2) is called on an unbound socket, the socket is automatically bound to a random free port with the local address set to INADDR_ANY. PiperOrigin-RevId: 286305906 --- pkg/tcpip/transport/tcp/endpoint.go | 13 ++++++++++++ test/syscalls/linux/socket_inet_loopback.cc | 33 +++++++++++++++++++++-------- 2 files changed, 37 insertions(+), 9 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index dd8b47cbe..fe629aa40 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -1974,6 +1974,15 @@ func (e *endpoint) listen(backlog int) *tcpip.Error { return nil } + if e.state == StateInitial { + // The listen is called on an unbound socket, the socket is + // automatically bound to a random free port with the local + // address set to INADDR_ANY. + if err := e.bindLocked(tcpip.FullAddress{}); err != nil { + return err + } + } + // Endpoint must be bound before it can transition to listen mode. if e.state != StateBound { e.stats.ReadErrors.InvalidEndpointState.Increment() @@ -2033,6 +2042,10 @@ func (e *endpoint) Bind(addr tcpip.FullAddress) (err *tcpip.Error) { e.mu.Lock() defer e.mu.Unlock() + return e.bindLocked(addr) +} + +func (e *endpoint) bindLocked(addr tcpip.FullAddress) (err *tcpip.Error) { // Don't allow binding once endpoint is not in the initial state // anymore. This is because once the endpoint goes into a connected or // listen state, it is already bound. diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 5bb9d2e99..619d41901 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -102,19 +102,17 @@ TEST(BadSocketPairArgs, ValidateErrForBadCallsToSocketPair) { SyscallFailsWithErrno(EAFNOSUPPORT)); } -TEST_P(SocketInetLoopbackTest, TCP) { - auto const& param = GetParam(); - - TestAddress const& listener = param.listener; - TestAddress const& connector = param.connector; - +void tcpSimpleConnectTest(TestAddress const& listener, + TestAddress const& connector, bool unbound) { // Create the listening socket. const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); sockaddr_storage listen_addr = listener.addr; - ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast(&listen_addr), - listener.addr_len), - SyscallSucceeds()); + if (!unbound) { + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + } ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds()); // Get the port bound by the listening socket. @@ -148,6 +146,23 @@ TEST_P(SocketInetLoopbackTest, TCP) { ASSERT_THAT(shutdown(conn_fd.get(), SHUT_RDWR), SyscallSucceeds()); } +TEST_P(SocketInetLoopbackTest, TCP) { + auto const& param = GetParam(); + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + tcpSimpleConnectTest(listener, connector, true); +} + +TEST_P(SocketInetLoopbackTest, TCPListenUnbound) { + auto const& param = GetParam(); + + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + tcpSimpleConnectTest(listener, connector, false); +} + TEST_P(SocketInetLoopbackTest, TCPListenClose) { auto const& param = GetParam(); -- cgit v1.2.3 From 7419e0e5d74621b2be60e9b18e4e2d7bb2a65cc3 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Thu, 19 Dec 2019 16:05:35 -0800 Subject: Parameterize mmap tests. This test suite has existed for quite a while and has become kind of messy. Various tests can be joined together by parameterizing. PiperOrigin-RevId: 286482240 --- test/syscalls/linux/mmap.cc | 207 +++++++++++++++----------------------------- 1 file changed, 69 insertions(+), 138 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/mmap.cc b/test/syscalls/linux/mmap.cc index 6f2639d8a..1c4d9f1c7 100644 --- a/test/syscalls/linux/mmap.cc +++ b/test/syscalls/linux/mmap.cc @@ -814,23 +814,27 @@ class MMapFileTest : public MMapTest { } }; +class MMapFileParamTest + : public MMapFileTest, + public ::testing::WithParamInterface> { + protected: + int prot() const { return std::get<0>(GetParam()); } + + int flags() const { return std::get<1>(GetParam()); } +}; + // MAP_POPULATE allowed. // There isn't a good way to verify it actually did anything. -// -// FIXME(b/37222275): Parameterize. -TEST_F(MMapFileTest, MapPopulate) { - ASSERT_THAT( - Map(0, kPageSize, PROT_READ, MAP_PRIVATE | MAP_POPULATE, fd_.get(), 0), - SyscallSucceeds()); +TEST_P(MMapFileParamTest, MapPopulate) { + ASSERT_THAT(Map(0, kPageSize, prot(), flags() | MAP_POPULATE, fd_.get(), 0), + SyscallSucceeds()); } // MAP_POPULATE on a short file. -// -// FIXME(b/37222275): Parameterize. -TEST_F(MMapFileTest, MapPopulateShort) { - ASSERT_THAT(Map(0, 2 * kPageSize, PROT_READ, MAP_PRIVATE | MAP_POPULATE, - fd_.get(), 0), - SyscallSucceeds()); +TEST_P(MMapFileParamTest, MapPopulateShort) { + ASSERT_THAT( + Map(0, 2 * kPageSize, prot(), flags() | MAP_POPULATE, fd_.get(), 0), + SyscallSucceeds()); } // Read contents from mapped file. @@ -901,16 +905,6 @@ TEST_F(MMapFileTest, WritePrivateOnReadOnlyFd) { reinterpret_cast(addr)); } -// MAP_PRIVATE PROT_READ is not allowed on write-only FDs. -TEST_F(MMapFileTest, ReadPrivateOnWriteOnlyFd) { - const FileDescriptor fd = - ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_WRONLY)); - - uintptr_t addr; - EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_PRIVATE, fd.get(), 0), - SyscallFailsWithErrno(EACCES)); -} - // MAP_SHARED PROT_WRITE not allowed on read-only FDs. TEST_F(MMapFileTest, WriteSharedOnReadOnlyFd) { const FileDescriptor fd = @@ -922,28 +916,13 @@ TEST_F(MMapFileTest, WriteSharedOnReadOnlyFd) { SyscallFailsWithErrno(EACCES)); } -// MAP_SHARED PROT_READ not allowed on write-only FDs. -// -// FIXME(b/37222275): Parameterize. -TEST_F(MMapFileTest, ReadSharedOnWriteOnlyFd) { - const FileDescriptor fd = - ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_WRONLY)); - - uintptr_t addr; - EXPECT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd.get(), 0), - SyscallFailsWithErrno(EACCES)); -} - -// MAP_SHARED PROT_WRITE not allowed on write-only FDs. -// The FD must always be readable. -// -// FIXME(b/37222275): Parameterize. -TEST_F(MMapFileTest, WriteSharedOnWriteOnlyFd) { +// The FD must be readable. +TEST_P(MMapFileParamTest, WriteOnlyFd) { const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(filename_, O_WRONLY)); uintptr_t addr; - EXPECT_THAT(addr = Map(0, kPageSize, PROT_WRITE, MAP_SHARED, fd.get(), 0), + EXPECT_THAT(addr = Map(0, kPageSize, prot(), flags(), fd.get(), 0), SyscallFailsWithErrno(EACCES)); } @@ -1182,7 +1161,7 @@ TEST_F(MMapFileTest, ReadSharedTruncateDownThenUp) { ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), SyscallSucceeds()); - // Check that the memory contains he file data. + // Check that the memory contains the file data. EXPECT_EQ(0, memcmp(reinterpret_cast(addr), buf.c_str(), kPageSize)); // Truncate down, then up. @@ -1371,125 +1350,68 @@ TEST_F(MMapFileTest, WritePrivate) { EqualsMemory(std::string(len, '\0'))); } -// SIGBUS raised when writing past end of file to a private mapping. -// -// FIXME(b/37222275): Parameterize. -TEST_F(MMapFileTest, SigBusDeathWritePrivate) { +// SIGBUS raised when reading or writing past end of a mapped file. +TEST_P(MMapFileParamTest, SigBusDeath) { SetupGvisorDeathTest(); uintptr_t addr; - ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, - fd_.get(), 0), + ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0), SyscallSucceeds()); - // MMapFileTest makes a file kPageSize/2 long. The entire first page will be - // accessible. Write just beyond that. - size_t len = strlen(kFileContents); - EXPECT_EXIT(std::copy(kFileContents, kFileContents + len, - reinterpret_cast(addr + kPageSize)), - ::testing::KilledBySignal(SIGBUS), ""); -} - -// SIGBUS raised when reading past end of file on a shared mapping. -// -// FIXME(b/37222275): Parameterize. -TEST_F(MMapFileTest, SigBusDeathReadShared) { - SetupGvisorDeathTest(); - - uintptr_t addr; - ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), - SyscallSucceeds()); - - // MMapFileTest makes a file kPageSize/2 long. The entire first page will be - // accessible. Read just beyond that. - std::vector in(kPageSize); - EXPECT_EXIT( - std::copy(reinterpret_cast(addr + kPageSize), - reinterpret_cast(addr + kPageSize) + kPageSize, - in.data()), - ::testing::KilledBySignal(SIGBUS), ""); + auto* start = reinterpret_cast(addr + kPageSize); + + // MMapFileTest makes a file kPageSize/2 long. The entire first page should be + // accessible, but anything beyond it should not. + if (prot() & PROT_WRITE) { + // Write beyond first page. + size_t len = strlen(kFileContents); + EXPECT_EXIT(std::copy(kFileContents, kFileContents + len, start), + ::testing::KilledBySignal(SIGBUS), ""); + } else { + // Read beyond first page. + std::vector in(kPageSize); + EXPECT_EXIT(std::copy(start, start + kPageSize, in.data()), + ::testing::KilledBySignal(SIGBUS), ""); + } } -// SIGBUS raised when reading past end of file on a shared mapping. +// Tests that SIGBUS is not raised when reading or writing to a file-mapped +// page before EOF, even if part of the mapping extends beyond EOF. // -// FIXME(b/37222275): Parameterize. -TEST_F(MMapFileTest, SigBusDeathWriteShared) { - SetupGvisorDeathTest(); - +// See b/27877699. +TEST_P(MMapFileParamTest, NoSigBusOnPagesBeforeEOF) { uintptr_t addr; - ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, - fd_.get(), 0), - SyscallSucceeds()); - - // MMapFileTest makes a file kPageSize/2 long. The entire first page will be - // accessible. Write just beyond that. - size_t len = strlen(kFileContents); - EXPECT_EXIT(std::copy(kFileContents, kFileContents + len, - reinterpret_cast(addr + kPageSize)), - ::testing::KilledBySignal(SIGBUS), ""); -} - -// Tests that SIGBUS is not raised when writing to a file-mapped page before -// EOF, even if part of the mapping extends beyond EOF. -TEST_F(MMapFileTest, NoSigBusOnPagesBeforeEOF) { - uintptr_t addr; - ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, - fd_.get(), 0), + ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0), SyscallSucceeds()); // The test passes if this survives. - size_t len = strlen(kFileContents); - std::copy(kFileContents, kFileContents + len, - reinterpret_cast(addr)); -} - -// Tests that SIGBUS is not raised when writing to a file-mapped page containing -// EOF, *after* the EOF for a private mapping. -TEST_F(MMapFileTest, NoSigBusOnPageContainingEOFWritePrivate) { - uintptr_t addr; - ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, - fd_.get(), 0), - SyscallSucceeds()); - - // The test passes if this survives. (Technically addr+kPageSize/2 is already - // beyond EOF, but +1 to check for fencepost errors.) - size_t len = strlen(kFileContents); - std::copy(kFileContents, kFileContents + len, - reinterpret_cast(addr + (kPageSize / 2) + 1)); -} - -// Tests that SIGBUS is not raised when reading from a file-mapped page -// containing EOF, *after* the EOF for a shared mapping. -// -// FIXME(b/37222275): Parameterize. -TEST_F(MMapFileTest, NoSigBusOnPageContainingEOFReadShared) { - uintptr_t addr; - ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), - SyscallSucceeds()); - - // The test passes if this survives. (Technically addr+kPageSize/2 is already - // beyond EOF, but +1 to check for fencepost errors.) auto* start = reinterpret_cast(addr + (kPageSize / 2) + 1); size_t len = strlen(kFileContents); - std::vector in(len); - std::copy(start, start + len, in.data()); + if (prot() & PROT_WRITE) { + std::copy(kFileContents, kFileContents + len, start); + } else { + std::vector in(len); + std::copy(start, start + len, in.data()); + } } -// Tests that SIGBUS is not raised when writing to a file-mapped page containing -// EOF, *after* the EOF for a shared mapping. -// -// FIXME(b/37222275): Parameterize. -TEST_F(MMapFileTest, NoSigBusOnPageContainingEOFWriteShared) { +// Tests that SIGBUS is not raised when reading or writing from a file-mapped +// page containing EOF, *after* the EOF. +TEST_P(MMapFileParamTest, NoSigBusOnPageContainingEOF) { uintptr_t addr; - ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, - fd_.get(), 0), + ASSERT_THAT(addr = Map(0, 2 * kPageSize, prot(), flags(), fd_.get(), 0), SyscallSucceeds()); // The test passes if this survives. (Technically addr+kPageSize/2 is already // beyond EOF, but +1 to check for fencepost errors.) + auto* start = reinterpret_cast(addr + (kPageSize / 2) + 1); size_t len = strlen(kFileContents); - std::copy(kFileContents, kFileContents + len, - reinterpret_cast(addr + (kPageSize / 2) + 1)); + if (prot() & PROT_WRITE) { + std::copy(kFileContents, kFileContents + len, start); + } else { + std::vector in(len); + std::copy(start, start + len, in.data()); + } } // Tests that reading from writable shared file-mapped pages succeeds. @@ -1733,6 +1655,15 @@ TEST(MMapNoFixtureTest, Map32Bit) { #endif // defined(__x86_64__) +INSTANTIATE_TEST_SUITE_P( + ReadWriteSharedPrivate, MMapFileParamTest, + ::testing::Combine(::testing::ValuesIn({ + PROT_READ, + PROT_WRITE, + PROT_READ | PROT_WRITE, + }), + ::testing::ValuesIn({MAP_SHARED, MAP_PRIVATE}))); + } // namespace } // namespace testing -- cgit v1.2.3 From 29955a4797e8264f75886a989dbc81b2b5443f4c Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 19 Dec 2019 17:25:18 -0800 Subject: futex: wake one waiter if futex_wake is called with a non-positive value This change is needed to be compatible with the Linux kernel. There is no glibc wrapper for the futex system call, so it is easy to make a mistake and call syscall(__NR_futex, FUTEX_WAKE, addr) without the fourth argument. This works on Linux, because it wakes one waiter even if val is nonpositive. PiperOrigin-RevId: 286494396 --- pkg/sentry/syscalls/linux/sys_futex.go | 10 ++++++++++ test/syscalls/linux/futex.cc | 21 +++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'test') diff --git a/pkg/sentry/syscalls/linux/sys_futex.go b/pkg/sentry/syscalls/linux/sys_futex.go index b9bd25464..bde17a767 100644 --- a/pkg/sentry/syscalls/linux/sys_futex.go +++ b/pkg/sentry/syscalls/linux/sys_futex.go @@ -226,6 +226,11 @@ func Futex(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if mask == 0 { return 0, nil, syserror.EINVAL } + if val <= 0 { + // The Linux kernel wakes one waiter even if val is + // non-positive. + val = 1 + } n, err := t.Futex().Wake(t, addr, private, mask, val) return uintptr(n), nil, err @@ -242,6 +247,11 @@ func Futex(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case linux.FUTEX_WAKE_OP: op := uint32(val3) + if val <= 0 { + // The Linux kernel wakes one waiter even if val is + // non-positive. + val = 1 + } n, err := t.Futex().WakeOp(t, addr, naddr, private, val, nreq, op) return uintptr(n), nil, err diff --git a/test/syscalls/linux/futex.cc b/test/syscalls/linux/futex.cc index d3e3f998c..40c80a6e1 100644 --- a/test/syscalls/linux/futex.cc +++ b/test/syscalls/linux/futex.cc @@ -239,6 +239,27 @@ TEST_P(PrivateAndSharedFutexTest, Wake1_NoRandomSave) { EXPECT_THAT(futex_wake(IsPrivate(), &a, 1), SyscallSucceedsWithValue(1)); } +TEST_P(PrivateAndSharedFutexTest, Wake0_NoRandomSave) { + constexpr int kInitialValue = 1; + std::atomic a = ATOMIC_VAR_INIT(kInitialValue); + + // Prevent save/restore from interrupting futex_wait, which will cause it to + // return EAGAIN instead of the expected result if futex_wait is restarted + // after we change the value of a below. + DisableSave ds; + ScopedThread thread([&] { + EXPECT_THAT(futex_wait(IsPrivate(), &a, kInitialValue), + SyscallSucceedsWithValue(0)); + }); + absl::SleepFor(kWaiterStartupDelay); + + // Change a so that if futex_wake happens before futex_wait, the latter + // returns EAGAIN instead of hanging the test. + a.fetch_add(1); + // The Linux kernel wakes one waiter even if val is 0 or negative. + EXPECT_THAT(futex_wake(IsPrivate(), &a, 0), SyscallSucceedsWithValue(1)); +} + TEST_P(PrivateAndSharedFutexTest, WakeAll_NoRandomSave) { constexpr int kInitialValue = 1; std::atomic a = ATOMIC_VAR_INIT(kInitialValue); -- cgit v1.2.3 From e013c48c78c9a7daf245b7de9563e3a0bd8a1e97 Mon Sep 17 00:00:00 2001 From: Ryan Heacock Date: Tue, 24 Dec 2019 08:48:14 -0800 Subject: Enable IP_RECVTOS socket option for datagram sockets Added the ability to get/set the IP_RECVTOS socket option on UDP endpoints. If enabled, TOS from the incoming Network Header passed as ancillary data in the ControlMessages. Test: * Added unit test to udp_test.go that tests getting/setting as well as verifying that we receive expected TOS from incoming packet. * Added a syscall test PiperOrigin-RevId: 287029703 --- pkg/sentry/socket/control/control.go | 2 +- pkg/sentry/socket/netstack/netstack.go | 42 ++++++++++++++++- pkg/tcpip/checker/checker.go | 16 +++++++ pkg/tcpip/stack/nic.go | 2 +- pkg/tcpip/stack/stack.go | 2 +- pkg/tcpip/tcpip.go | 6 ++- pkg/tcpip/transport/raw/endpoint.go | 2 +- pkg/tcpip/transport/udp/endpoint.go | 31 ++++++++++++- pkg/tcpip/transport/udp/udp_test.go | 69 ++++++++++++++++++++++++---- test/syscalls/linux/socket_ip_udp_generic.cc | 40 ++++++++++++++++ test/syscalls/linux/udp_socket_test_cases.cc | 8 ++-- 11 files changed, 201 insertions(+), 19 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/control/control.go b/pkg/sentry/socket/control/control.go index af1a4e95f..b649dd021 100644 --- a/pkg/sentry/socket/control/control.go +++ b/pkg/sentry/socket/control/control.go @@ -327,7 +327,7 @@ func PackInq(t *kernel.Task, inq int32, buf []byte) []byte { } // PackTOS packs an IP_TOS socket control message. -func PackTOS(t *kernel.Task, tos int8, buf []byte) []byte { +func PackTOS(t *kernel.Task, tos uint8, buf []byte) []byte { return putCmsgStruct( buf, linux.SOL_IP, diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 140851c17..d2f263402 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -1323,6 +1323,21 @@ func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family in } return int32(v), nil + case linux.IP_RECVTOS: + if outLen < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + + var v tcpip.ReceiveTOSOption + if err := ep.GetSockOpt(&v); err != nil { + return nil, syserr.TranslateNetstackError(err) + } + + if v { + return int32(1), nil + } + return int32(0), nil + default: emitUnimplementedEventIP(t, name) } @@ -1808,6 +1823,16 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s } return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.IPv4TOSOption(v))) + case linux.IP_RECVTOS: + v, err := parseIntOrChar(optVal) + if err != nil { + return err + } + + return syserr.TranslateNetstackError(ep.SetSockOpt( + tcpip.ReceiveTOSOption(v != 0), + )) + case linux.IP_ADD_SOURCE_MEMBERSHIP, linux.IP_BIND_ADDRESS_NO_PORT, linux.IP_BLOCK_SOURCE, @@ -1828,7 +1853,6 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s linux.IP_RECVFRAGSIZE, linux.IP_RECVOPTS, linux.IP_RECVORIGDSTADDR, - linux.IP_RECVTOS, linux.IP_RECVTTL, linux.IP_RETOPTS, linux.IP_TRANSPARENT, @@ -2139,6 +2163,21 @@ func (s *SocketOperations) fillCmsgInq(cmsg *socket.ControlMessages) { cmsg.IP.Inq = int32(len(s.readView) + rcvBufUsed) } +func (s *SocketOperations) fillCmsgTOS(cmsg *socket.ControlMessages) { + if s.skType != linux.SOCK_DGRAM { + return + } + var receiveTOS tcpip.ReceiveTOSOption + if err := s.Endpoint.GetSockOpt(&receiveTOS); err != nil { + return + } + if !receiveTOS { + return + } + cmsg.IP.HasTOS = s.readCM.HasTOS + cmsg.IP.TOS = s.readCM.TOS +} + // nonBlockingRead issues a non-blocking read. // // TODO(b/78348848): Support timestamps for stream sockets. @@ -2244,6 +2283,7 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe cmsg := s.controlMessages() s.fillCmsgInq(&cmsg) + s.fillCmsgTOS(&cmsg) return n, flags, addr, addrLen, cmsg, syserr.FromError(err) } diff --git a/pkg/tcpip/checker/checker.go b/pkg/tcpip/checker/checker.go index 2f15bf1f1..542abc99d 100644 --- a/pkg/tcpip/checker/checker.go +++ b/pkg/tcpip/checker/checker.go @@ -33,6 +33,9 @@ type NetworkChecker func(*testing.T, []header.Network) // TransportChecker is a function to check a property of a transport packet. type TransportChecker func(*testing.T, header.Transport) +// ControlMessagesChecker is a function to check a property of ancillary data. +type ControlMessagesChecker func(*testing.T, tcpip.ControlMessages) + // IPv4 checks the validity and properties of the given IPv4 packet. It is // expected to be used in conjunction with other network checkers for specific // properties. For example, to check the source and destination address, one @@ -158,6 +161,19 @@ func FragmentFlags(flags uint8) NetworkChecker { } } +// ReceiveTOS creates a checker that checks the TOS field in ControlMessages. +func ReceiveTOS(want uint8) ControlMessagesChecker { + return func(t *testing.T, cm tcpip.ControlMessages) { + t.Helper() + if !cm.HasTOS { + t.Fatalf("got cm.HasTOS = %t, want cm.TOS = %d", cm.HasTOS, want) + } + if got := cm.TOS; got != want { + t.Fatalf("got cm.TOS = %d, want %d", got, want) + } + } +} + // TOS creates a checker that checks the TOS field. func TOS(tos uint8, label uint32) NetworkChecker { return func(t *testing.T, h []header.Network) { diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index ddd014658..a4556674b 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -575,7 +575,7 @@ func (n *NIC) RemoveAddressRange(subnet tcpip.Subnet) { n.mu.Unlock() } -// Subnets returns the Subnets associated with this NIC. +// AddressRanges returns the Subnets associated with this NIC. func (n *NIC) AddressRanges() []tcpip.Subnet { n.mu.RLock() defer n.mu.RUnlock() diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index 7a9600679..251336224 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -829,7 +829,7 @@ func (s *Stack) CheckNIC(id tcpip.NICID) bool { return false } -// NICSubnets returns a map of NICIDs to their associated subnets. +// NICAddressRanges returns a map of NICIDs to their associated subnets. func (s *Stack) NICAddressRanges() map[tcpip.NICID][]tcpip.Subnet { s.mu.RLock() defer s.mu.RUnlock() diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index f62fd729f..5c7b2af88 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -322,7 +322,7 @@ type ControlMessages struct { HasTOS bool // TOS is the IPv4 type of service of the associated packet. - TOS int8 + TOS uint8 // HasTClass indicates whether Tclass is valid/set. HasTClass bool @@ -666,6 +666,10 @@ type IPv4TOSOption uint8 // for all subsequent outgoing IPv6 packets from the endpoint. type IPv6TrafficClassOption uint8 +// ReceiveTOSOption is used by SetSockOpt/GetSockOpt to specify if the TOS +// ancillary message is passed with incoming packets. +type ReceiveTOSOption bool + // Route is a row in the routing table. It specifies through which NIC (and // gateway) sets of packets should be routed. A row is considered viable if the // masked target address matches the destination address in the row. diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go index 5aafe2615..6d23ab5a1 100644 --- a/pkg/tcpip/transport/raw/endpoint.go +++ b/pkg/tcpip/transport/raw/endpoint.go @@ -510,7 +510,7 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { } // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt. -func (ep *endpoint) SetSockOptInt(opt tcpip.SockOpt, v int) *tcpip.Error { +func (e *endpoint) SetSockOptInt(opt tcpip.SockOpt, v int) *tcpip.Error { return tcpip.ErrUnknownProtocolOption } diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 1ac4705af..269470ed4 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -32,6 +32,7 @@ type udpPacket struct { senderAddress tcpip.FullAddress data buffer.VectorisedView `state:".(buffer.VectorisedView)"` timestamp int64 + tos uint8 } // EndpointState represents the state of a UDP endpoint. @@ -114,6 +115,10 @@ type endpoint struct { // applied while sending packets. Defaults to 0 as on Linux. sendTOS uint8 + // receiveTOS determines if the incoming IPv4 TOS header field is passed + // as ancillary data to ControlMessages on Read. + receiveTOS bool + // shutdownFlags represent the current shutdown state of the endpoint. shutdownFlags tcpip.ShutdownFlags @@ -244,7 +249,12 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess *addr = p.senderAddress } - return p.data.ToView(), tcpip.ControlMessages{HasTimestamp: true, Timestamp: p.timestamp}, nil + return p.data.ToView(), tcpip.ControlMessages{ + HasTimestamp: true, + Timestamp: p.timestamp, + HasTOS: e.receiveTOS, + TOS: p.tos, + }, nil } // prepareForWrite prepares the endpoint for sending data. In particular, it @@ -656,6 +666,12 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { e.sendTOS = uint8(v) e.mu.Unlock() return nil + + case tcpip.ReceiveTOSOption: + e.mu.Lock() + e.receiveTOS = bool(v) + e.mu.Unlock() + return nil } return nil } @@ -792,6 +808,12 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { e.mu.RUnlock() return nil + case *tcpip.ReceiveTOSOption: + e.mu.RLock() + *o = tcpip.ReceiveTOSOption(e.receiveTOS) + e.mu.RUnlock() + return nil + default: return tcpip.ErrUnknownProtocolOption } @@ -1238,6 +1260,13 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk e.rcvList.PushBack(packet) e.rcvBufSize += pkt.Data.Size() + // Save any useful information from the NetworkHeader to the packet. + switch r.NetProto { + case header.IPv4ProtocolNumber: + // This packet has already been validated before being passed up the stack. + packet.tos, _ = header.IPv4(pkt.NetworkHeader).TOS() + } + packet.timestamp = e.stack.NowNanoseconds() e.rcvMu.Unlock() diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go index 7051a7a9c..43b8b35ba 100644 --- a/pkg/tcpip/transport/udp/udp_test.go +++ b/pkg/tcpip/transport/udp/udp_test.go @@ -56,6 +56,7 @@ const ( multicastAddr = "\xe8\x2b\xd3\xea" multicastV6Addr = "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" broadcastAddr = header.IPv4Broadcast + testTOS = 0x80 // defaultMTU is the MTU, in bytes, used throughout the tests, except // where another value is explicitly used. It is chosen to match the MTU @@ -453,6 +454,7 @@ func (c *testContext) injectV4Packet(payload []byte, h *header4Tuple, valid bool ip := header.IPv4(buf) ip.Encode(&header.IPv4Fields{ IHL: header.IPv4MinimumSize, + TOS: testTOS, TotalLength: uint16(len(buf)), TTL: 65, Protocol: uint8(udp.ProtocolNumber), @@ -556,8 +558,8 @@ func TestBindToDeviceOption(t *testing.T) { // testReadInternal sends a packet of the given test flow into the stack by // injecting it into the link endpoint. It then attempts to read it from the // UDP endpoint and depending on if this was expected to succeed verifies its -// correctness. -func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expectReadError bool) { +// correctness including any additional checker functions provided. +func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expectReadError bool, checkers ...checker.ControlMessagesChecker) { c.t.Helper() payload := newPayload() @@ -572,12 +574,12 @@ func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expe epstats := c.ep.Stats().(*tcpip.TransportEndpointStats).Clone() var addr tcpip.FullAddress - v, _, err := c.ep.Read(&addr) + v, cm, err := c.ep.Read(&addr) if err == tcpip.ErrWouldBlock { // Wait for data to become available. select { case <-ch: - v, _, err = c.ep.Read(&addr) + v, cm, err = c.ep.Read(&addr) case <-time.After(300 * time.Millisecond): if packetShouldBeDropped { @@ -610,15 +612,21 @@ func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expe if !bytes.Equal(payload, v) { c.t.Fatalf("bad payload: got %x, want %x", v, payload) } + + // Run any checkers against the ControlMessages. + for _, f := range checkers { + f(c.t, cm) + } + c.checkEndpointReadStats(1, epstats, err) } // testRead sends a packet of the given test flow into the stack by injecting it // into the link endpoint. It then reads it from the UDP endpoint and verifies -// its correctness. -func testRead(c *testContext, flow testFlow) { +// its correctness including any additional checker functions provided. +func testRead(c *testContext, flow testFlow, checkers ...checker.ControlMessagesChecker) { c.t.Helper() - testReadInternal(c, flow, false /* packetShouldBeDropped */, false /* expectReadError */) + testReadInternal(c, flow, false /* packetShouldBeDropped */, false /* expectReadError */, checkers...) } // testFailingRead sends a packet of the given test flow into the stack by @@ -1286,7 +1294,7 @@ func TestTOSV4(t *testing.T) { c.createEndpointForFlow(flow) - const tos = 0xC0 + const tos = testTOS var v tcpip.IPv4TOSOption if err := c.ep.GetSockOpt(&v); err != nil { c.t.Errorf("GetSockopt failed: %s", err) @@ -1321,7 +1329,7 @@ func TestTOSV6(t *testing.T) { c.createEndpointForFlow(flow) - const tos = 0xC0 + const tos = testTOS var v tcpip.IPv6TrafficClassOption if err := c.ep.GetSockOpt(&v); err != nil { c.t.Errorf("GetSockopt failed: %s", err) @@ -1348,6 +1356,49 @@ func TestTOSV6(t *testing.T) { } } +func TestReceiveTOSV4(t *testing.T) { + for _, flow := range []testFlow{unicastV4, broadcast} { + t.Run(fmt.Sprintf("flow:%s", flow), func(t *testing.T) { + c := newDualTestContext(t, defaultMTU) + defer c.cleanup() + + c.createEndpointForFlow(flow) + + // Verify that setting and reading the option works. + const recvTos = true + var v tcpip.ReceiveTOSOption + if err := c.ep.GetSockOpt(&v); err != nil { + c.t.Errorf("GetSockopt failed: %s", err) + } + // Test for expected default value. + if v != false { + c.t.Errorf("got GetSockOpt(...) = %t, want = %t", v, false) + } + + if err := c.ep.SetSockOpt(tcpip.ReceiveTOSOption(recvTos)); err != nil { + c.t.Errorf("SetSockOpt(%#v) failed: %s", tcpip.ReceiveTOSOption(recvTos), err) + } + + if err := c.ep.GetSockOpt(&v); err != nil { + c.t.Errorf("GetSockopt failed: %s", err) + } + + if want := tcpip.ReceiveTOSOption(recvTos); v != want { + c.t.Errorf("got GetSockOpt(...) = %t, want = %t", v, want) + } + + // Bind to wildcard. + if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil { + c.t.Fatalf("Bind failed: %s", err) + } + + // Verify that the correct received TOS is actually handed through as + // ancillary data to the ControlMessages struct. + testRead(c, flow, checker.ReceiveTOS(testTOS)) + }) + } +} + func TestMulticastInterfaceOption(t *testing.T) { for _, flow := range []testFlow{multicastV4, multicastV4in6, multicastV6, multicastV6Only} { t.Run(fmt.Sprintf("flow:%s", flow), func(t *testing.T) { diff --git a/test/syscalls/linux/socket_ip_udp_generic.cc b/test/syscalls/linux/socket_ip_udp_generic.cc index 66eb68857..53290bed7 100644 --- a/test/syscalls/linux/socket_ip_udp_generic.cc +++ b/test/syscalls/linux/socket_ip_udp_generic.cc @@ -209,6 +209,46 @@ TEST_P(UDPSocketPairTest, SetMulticastLoopChar) { EXPECT_EQ(get, kSockOptOn); } +// Ensure that Receiving TOS is off by default. +TEST_P(UDPSocketPairTest, RecvTosDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +// Test that setting and getting IP_RECVTOS works as expected. +TEST_P(UDPSocketPairTest, SetRecvTos) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, + &kSockOptOff, sizeof(kSockOptOff)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); + + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + ASSERT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); +} + TEST_P(UDPSocketPairTest, ReuseAddrDefault) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc index dc35c2f50..68e0a8109 100644 --- a/test/syscalls/linux/udp_socket_test_cases.cc +++ b/test/syscalls/linux/udp_socket_test_cases.cc @@ -1349,8 +1349,9 @@ TEST_P(UdpSocketTest, TimestampIoctlPersistence) { // outgoing packets, and that a receiving socket with IP_RECVTOS or // IPV6_RECVTCLASS will create the corresponding control message. TEST_P(UdpSocketTest, SetAndReceiveTOS) { - // TODO(b/68320120): IP_RECVTOS/IPV6_RECVTCLASS not supported for netstack. - SKIP_IF(IsRunningOnGvisor() && !IsRunningWithHostinet()); + // TODO(b/68320120): IPV6_RECVTCLASS not supported for netstack. + SKIP_IF((GetParam() != AddressFamily::kIpv4) && IsRunningOnGvisor() && + !IsRunningWithHostinet()); ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); @@ -1421,7 +1422,8 @@ TEST_P(UdpSocketTest, SetAndReceiveTOS) { // TOS byte on outgoing packets, and that a receiving socket with IP_RECVTOS or // IPV6_RECVTCLASS will create the corresponding control message. TEST_P(UdpSocketTest, SendAndReceiveTOS) { - // TODO(b/68320120): IP_RECVTOS/IPV6_RECVTCLASS not supported for netstack. + // TODO(b/68320120): IPV6_RECVTCLASS not supported for netstack. + // TODO(b/146661005): Setting TOS via cmsg not supported for netstack. SKIP_IF(IsRunningOnGvisor() && !IsRunningWithHostinet()); ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); -- cgit v1.2.3 From 87e4d03fdf576348ac7023c599e0fc66ad4cccbd Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Thu, 26 Dec 2019 13:04:14 -0800 Subject: Automated rollback of changelist 287029703 PiperOrigin-RevId: 287217899 --- pkg/sentry/socket/control/control.go | 2 +- pkg/sentry/socket/netstack/netstack.go | 42 +---------------- pkg/tcpip/checker/checker.go | 16 ------- pkg/tcpip/stack/nic.go | 2 +- pkg/tcpip/stack/stack.go | 2 +- pkg/tcpip/tcpip.go | 6 +-- pkg/tcpip/transport/raw/endpoint.go | 2 +- pkg/tcpip/transport/udp/endpoint.go | 31 +------------ pkg/tcpip/transport/udp/udp_test.go | 69 ++++------------------------ test/syscalls/linux/socket_ip_udp_generic.cc | 40 ---------------- test/syscalls/linux/udp_socket_test_cases.cc | 8 ++-- 11 files changed, 19 insertions(+), 201 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/control/control.go b/pkg/sentry/socket/control/control.go index b649dd021..af1a4e95f 100644 --- a/pkg/sentry/socket/control/control.go +++ b/pkg/sentry/socket/control/control.go @@ -327,7 +327,7 @@ func PackInq(t *kernel.Task, inq int32, buf []byte) []byte { } // PackTOS packs an IP_TOS socket control message. -func PackTOS(t *kernel.Task, tos uint8, buf []byte) []byte { +func PackTOS(t *kernel.Task, tos int8, buf []byte) []byte { return putCmsgStruct( buf, linux.SOL_IP, diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index d2f263402..140851c17 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -1323,21 +1323,6 @@ func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family in } return int32(v), nil - case linux.IP_RECVTOS: - if outLen < sizeOfInt32 { - return nil, syserr.ErrInvalidArgument - } - - var v tcpip.ReceiveTOSOption - if err := ep.GetSockOpt(&v); err != nil { - return nil, syserr.TranslateNetstackError(err) - } - - if v { - return int32(1), nil - } - return int32(0), nil - default: emitUnimplementedEventIP(t, name) } @@ -1823,16 +1808,6 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s } return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.IPv4TOSOption(v))) - case linux.IP_RECVTOS: - v, err := parseIntOrChar(optVal) - if err != nil { - return err - } - - return syserr.TranslateNetstackError(ep.SetSockOpt( - tcpip.ReceiveTOSOption(v != 0), - )) - case linux.IP_ADD_SOURCE_MEMBERSHIP, linux.IP_BIND_ADDRESS_NO_PORT, linux.IP_BLOCK_SOURCE, @@ -1853,6 +1828,7 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s linux.IP_RECVFRAGSIZE, linux.IP_RECVOPTS, linux.IP_RECVORIGDSTADDR, + linux.IP_RECVTOS, linux.IP_RECVTTL, linux.IP_RETOPTS, linux.IP_TRANSPARENT, @@ -2163,21 +2139,6 @@ func (s *SocketOperations) fillCmsgInq(cmsg *socket.ControlMessages) { cmsg.IP.Inq = int32(len(s.readView) + rcvBufUsed) } -func (s *SocketOperations) fillCmsgTOS(cmsg *socket.ControlMessages) { - if s.skType != linux.SOCK_DGRAM { - return - } - var receiveTOS tcpip.ReceiveTOSOption - if err := s.Endpoint.GetSockOpt(&receiveTOS); err != nil { - return - } - if !receiveTOS { - return - } - cmsg.IP.HasTOS = s.readCM.HasTOS - cmsg.IP.TOS = s.readCM.TOS -} - // nonBlockingRead issues a non-blocking read. // // TODO(b/78348848): Support timestamps for stream sockets. @@ -2283,7 +2244,6 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe cmsg := s.controlMessages() s.fillCmsgInq(&cmsg) - s.fillCmsgTOS(&cmsg) return n, flags, addr, addrLen, cmsg, syserr.FromError(err) } diff --git a/pkg/tcpip/checker/checker.go b/pkg/tcpip/checker/checker.go index 542abc99d..2f15bf1f1 100644 --- a/pkg/tcpip/checker/checker.go +++ b/pkg/tcpip/checker/checker.go @@ -33,9 +33,6 @@ type NetworkChecker func(*testing.T, []header.Network) // TransportChecker is a function to check a property of a transport packet. type TransportChecker func(*testing.T, header.Transport) -// ControlMessagesChecker is a function to check a property of ancillary data. -type ControlMessagesChecker func(*testing.T, tcpip.ControlMessages) - // IPv4 checks the validity and properties of the given IPv4 packet. It is // expected to be used in conjunction with other network checkers for specific // properties. For example, to check the source and destination address, one @@ -161,19 +158,6 @@ func FragmentFlags(flags uint8) NetworkChecker { } } -// ReceiveTOS creates a checker that checks the TOS field in ControlMessages. -func ReceiveTOS(want uint8) ControlMessagesChecker { - return func(t *testing.T, cm tcpip.ControlMessages) { - t.Helper() - if !cm.HasTOS { - t.Fatalf("got cm.HasTOS = %t, want cm.TOS = %d", cm.HasTOS, want) - } - if got := cm.TOS; got != want { - t.Fatalf("got cm.TOS = %d, want %d", got, want) - } - } -} - // TOS creates a checker that checks the TOS field. func TOS(tos uint8, label uint32) NetworkChecker { return func(t *testing.T, h []header.Network) { diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index a4556674b..ddd014658 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -575,7 +575,7 @@ func (n *NIC) RemoveAddressRange(subnet tcpip.Subnet) { n.mu.Unlock() } -// AddressRanges returns the Subnets associated with this NIC. +// Subnets returns the Subnets associated with this NIC. func (n *NIC) AddressRanges() []tcpip.Subnet { n.mu.RLock() defer n.mu.RUnlock() diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index 251336224..7a9600679 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -829,7 +829,7 @@ func (s *Stack) CheckNIC(id tcpip.NICID) bool { return false } -// NICAddressRanges returns a map of NICIDs to their associated subnets. +// NICSubnets returns a map of NICIDs to their associated subnets. func (s *Stack) NICAddressRanges() map[tcpip.NICID][]tcpip.Subnet { s.mu.RLock() defer s.mu.RUnlock() diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 5c7b2af88..f62fd729f 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -322,7 +322,7 @@ type ControlMessages struct { HasTOS bool // TOS is the IPv4 type of service of the associated packet. - TOS uint8 + TOS int8 // HasTClass indicates whether Tclass is valid/set. HasTClass bool @@ -666,10 +666,6 @@ type IPv4TOSOption uint8 // for all subsequent outgoing IPv6 packets from the endpoint. type IPv6TrafficClassOption uint8 -// ReceiveTOSOption is used by SetSockOpt/GetSockOpt to specify if the TOS -// ancillary message is passed with incoming packets. -type ReceiveTOSOption bool - // Route is a row in the routing table. It specifies through which NIC (and // gateway) sets of packets should be routed. A row is considered viable if the // masked target address matches the destination address in the row. diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go index 6d23ab5a1..5aafe2615 100644 --- a/pkg/tcpip/transport/raw/endpoint.go +++ b/pkg/tcpip/transport/raw/endpoint.go @@ -510,7 +510,7 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { } // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt. -func (e *endpoint) SetSockOptInt(opt tcpip.SockOpt, v int) *tcpip.Error { +func (ep *endpoint) SetSockOptInt(opt tcpip.SockOpt, v int) *tcpip.Error { return tcpip.ErrUnknownProtocolOption } diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 269470ed4..1ac4705af 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -32,7 +32,6 @@ type udpPacket struct { senderAddress tcpip.FullAddress data buffer.VectorisedView `state:".(buffer.VectorisedView)"` timestamp int64 - tos uint8 } // EndpointState represents the state of a UDP endpoint. @@ -115,10 +114,6 @@ type endpoint struct { // applied while sending packets. Defaults to 0 as on Linux. sendTOS uint8 - // receiveTOS determines if the incoming IPv4 TOS header field is passed - // as ancillary data to ControlMessages on Read. - receiveTOS bool - // shutdownFlags represent the current shutdown state of the endpoint. shutdownFlags tcpip.ShutdownFlags @@ -249,12 +244,7 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess *addr = p.senderAddress } - return p.data.ToView(), tcpip.ControlMessages{ - HasTimestamp: true, - Timestamp: p.timestamp, - HasTOS: e.receiveTOS, - TOS: p.tos, - }, nil + return p.data.ToView(), tcpip.ControlMessages{HasTimestamp: true, Timestamp: p.timestamp}, nil } // prepareForWrite prepares the endpoint for sending data. In particular, it @@ -666,12 +656,6 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { e.sendTOS = uint8(v) e.mu.Unlock() return nil - - case tcpip.ReceiveTOSOption: - e.mu.Lock() - e.receiveTOS = bool(v) - e.mu.Unlock() - return nil } return nil } @@ -808,12 +792,6 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { e.mu.RUnlock() return nil - case *tcpip.ReceiveTOSOption: - e.mu.RLock() - *o = tcpip.ReceiveTOSOption(e.receiveTOS) - e.mu.RUnlock() - return nil - default: return tcpip.ErrUnknownProtocolOption } @@ -1260,13 +1238,6 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk e.rcvList.PushBack(packet) e.rcvBufSize += pkt.Data.Size() - // Save any useful information from the NetworkHeader to the packet. - switch r.NetProto { - case header.IPv4ProtocolNumber: - // This packet has already been validated before being passed up the stack. - packet.tos, _ = header.IPv4(pkt.NetworkHeader).TOS() - } - packet.timestamp = e.stack.NowNanoseconds() e.rcvMu.Unlock() diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go index 43b8b35ba..7051a7a9c 100644 --- a/pkg/tcpip/transport/udp/udp_test.go +++ b/pkg/tcpip/transport/udp/udp_test.go @@ -56,7 +56,6 @@ const ( multicastAddr = "\xe8\x2b\xd3\xea" multicastV6Addr = "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" broadcastAddr = header.IPv4Broadcast - testTOS = 0x80 // defaultMTU is the MTU, in bytes, used throughout the tests, except // where another value is explicitly used. It is chosen to match the MTU @@ -454,7 +453,6 @@ func (c *testContext) injectV4Packet(payload []byte, h *header4Tuple, valid bool ip := header.IPv4(buf) ip.Encode(&header.IPv4Fields{ IHL: header.IPv4MinimumSize, - TOS: testTOS, TotalLength: uint16(len(buf)), TTL: 65, Protocol: uint8(udp.ProtocolNumber), @@ -558,8 +556,8 @@ func TestBindToDeviceOption(t *testing.T) { // testReadInternal sends a packet of the given test flow into the stack by // injecting it into the link endpoint. It then attempts to read it from the // UDP endpoint and depending on if this was expected to succeed verifies its -// correctness including any additional checker functions provided. -func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expectReadError bool, checkers ...checker.ControlMessagesChecker) { +// correctness. +func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expectReadError bool) { c.t.Helper() payload := newPayload() @@ -574,12 +572,12 @@ func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expe epstats := c.ep.Stats().(*tcpip.TransportEndpointStats).Clone() var addr tcpip.FullAddress - v, cm, err := c.ep.Read(&addr) + v, _, err := c.ep.Read(&addr) if err == tcpip.ErrWouldBlock { // Wait for data to become available. select { case <-ch: - v, cm, err = c.ep.Read(&addr) + v, _, err = c.ep.Read(&addr) case <-time.After(300 * time.Millisecond): if packetShouldBeDropped { @@ -612,21 +610,15 @@ func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expe if !bytes.Equal(payload, v) { c.t.Fatalf("bad payload: got %x, want %x", v, payload) } - - // Run any checkers against the ControlMessages. - for _, f := range checkers { - f(c.t, cm) - } - c.checkEndpointReadStats(1, epstats, err) } // testRead sends a packet of the given test flow into the stack by injecting it // into the link endpoint. It then reads it from the UDP endpoint and verifies -// its correctness including any additional checker functions provided. -func testRead(c *testContext, flow testFlow, checkers ...checker.ControlMessagesChecker) { +// its correctness. +func testRead(c *testContext, flow testFlow) { c.t.Helper() - testReadInternal(c, flow, false /* packetShouldBeDropped */, false /* expectReadError */, checkers...) + testReadInternal(c, flow, false /* packetShouldBeDropped */, false /* expectReadError */) } // testFailingRead sends a packet of the given test flow into the stack by @@ -1294,7 +1286,7 @@ func TestTOSV4(t *testing.T) { c.createEndpointForFlow(flow) - const tos = testTOS + const tos = 0xC0 var v tcpip.IPv4TOSOption if err := c.ep.GetSockOpt(&v); err != nil { c.t.Errorf("GetSockopt failed: %s", err) @@ -1329,7 +1321,7 @@ func TestTOSV6(t *testing.T) { c.createEndpointForFlow(flow) - const tos = testTOS + const tos = 0xC0 var v tcpip.IPv6TrafficClassOption if err := c.ep.GetSockOpt(&v); err != nil { c.t.Errorf("GetSockopt failed: %s", err) @@ -1356,49 +1348,6 @@ func TestTOSV6(t *testing.T) { } } -func TestReceiveTOSV4(t *testing.T) { - for _, flow := range []testFlow{unicastV4, broadcast} { - t.Run(fmt.Sprintf("flow:%s", flow), func(t *testing.T) { - c := newDualTestContext(t, defaultMTU) - defer c.cleanup() - - c.createEndpointForFlow(flow) - - // Verify that setting and reading the option works. - const recvTos = true - var v tcpip.ReceiveTOSOption - if err := c.ep.GetSockOpt(&v); err != nil { - c.t.Errorf("GetSockopt failed: %s", err) - } - // Test for expected default value. - if v != false { - c.t.Errorf("got GetSockOpt(...) = %t, want = %t", v, false) - } - - if err := c.ep.SetSockOpt(tcpip.ReceiveTOSOption(recvTos)); err != nil { - c.t.Errorf("SetSockOpt(%#v) failed: %s", tcpip.ReceiveTOSOption(recvTos), err) - } - - if err := c.ep.GetSockOpt(&v); err != nil { - c.t.Errorf("GetSockopt failed: %s", err) - } - - if want := tcpip.ReceiveTOSOption(recvTos); v != want { - c.t.Errorf("got GetSockOpt(...) = %t, want = %t", v, want) - } - - // Bind to wildcard. - if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil { - c.t.Fatalf("Bind failed: %s", err) - } - - // Verify that the correct received TOS is actually handed through as - // ancillary data to the ControlMessages struct. - testRead(c, flow, checker.ReceiveTOS(testTOS)) - }) - } -} - func TestMulticastInterfaceOption(t *testing.T) { for _, flow := range []testFlow{multicastV4, multicastV4in6, multicastV6, multicastV6Only} { t.Run(fmt.Sprintf("flow:%s", flow), func(t *testing.T) { diff --git a/test/syscalls/linux/socket_ip_udp_generic.cc b/test/syscalls/linux/socket_ip_udp_generic.cc index 53290bed7..66eb68857 100644 --- a/test/syscalls/linux/socket_ip_udp_generic.cc +++ b/test/syscalls/linux/socket_ip_udp_generic.cc @@ -209,46 +209,6 @@ TEST_P(UDPSocketPairTest, SetMulticastLoopChar) { EXPECT_EQ(get, kSockOptOn); } -// Ensure that Receiving TOS is off by default. -TEST_P(UDPSocketPairTest, RecvTosDefault) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); - - int get = -1; - socklen_t get_len = sizeof(get); - ASSERT_THAT( - getsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, &get, &get_len), - SyscallSucceedsWithValue(0)); - EXPECT_EQ(get_len, sizeof(get)); - EXPECT_EQ(get, kSockOptOff); -} - -// Test that setting and getting IP_RECVTOS works as expected. -TEST_P(UDPSocketPairTest, SetRecvTos) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); - - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, - &kSockOptOff, sizeof(kSockOptOff)), - SyscallSucceeds()); - - int get = -1; - socklen_t get_len = sizeof(get); - ASSERT_THAT( - getsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, &get, &get_len), - SyscallSucceedsWithValue(0)); - EXPECT_EQ(get_len, sizeof(get)); - EXPECT_EQ(get, kSockOptOff); - - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, - &kSockOptOn, sizeof(kSockOptOn)), - SyscallSucceeds()); - - ASSERT_THAT( - getsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, &get, &get_len), - SyscallSucceedsWithValue(0)); - EXPECT_EQ(get_len, sizeof(get)); - EXPECT_EQ(get, kSockOptOn); -} - TEST_P(UDPSocketPairTest, ReuseAddrDefault) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc index 68e0a8109..dc35c2f50 100644 --- a/test/syscalls/linux/udp_socket_test_cases.cc +++ b/test/syscalls/linux/udp_socket_test_cases.cc @@ -1349,9 +1349,8 @@ TEST_P(UdpSocketTest, TimestampIoctlPersistence) { // outgoing packets, and that a receiving socket with IP_RECVTOS or // IPV6_RECVTCLASS will create the corresponding control message. TEST_P(UdpSocketTest, SetAndReceiveTOS) { - // TODO(b/68320120): IPV6_RECVTCLASS not supported for netstack. - SKIP_IF((GetParam() != AddressFamily::kIpv4) && IsRunningOnGvisor() && - !IsRunningWithHostinet()); + // TODO(b/68320120): IP_RECVTOS/IPV6_RECVTCLASS not supported for netstack. + SKIP_IF(IsRunningOnGvisor() && !IsRunningWithHostinet()); ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); @@ -1422,8 +1421,7 @@ TEST_P(UdpSocketTest, SetAndReceiveTOS) { // TOS byte on outgoing packets, and that a receiving socket with IP_RECVTOS or // IPV6_RECVTCLASS will create the corresponding control message. TEST_P(UdpSocketTest, SendAndReceiveTOS) { - // TODO(b/68320120): IPV6_RECVTCLASS not supported for netstack. - // TODO(b/146661005): Setting TOS via cmsg not supported for netstack. + // TODO(b/68320120): IP_RECVTOS/IPV6_RECVTCLASS not supported for netstack. SKIP_IF(IsRunningOnGvisor() && !IsRunningWithHostinet()); ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); -- cgit v1.2.3 From 03e53745cc04f674d4795fcafcca755c836e526f Mon Sep 17 00:00:00 2001 From: Bin Lu Date: Tue, 24 Dec 2019 10:29:36 +0800 Subject: Add test/util/save_util_linux.cc:MaybeSave to support arm64 There is no syscall_create_module on Arm64. Signed-off-by: Bin Lu --- test/util/save_util_linux.cc | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/test/util/save_util_linux.cc b/test/util/save_util_linux.cc index 7a0f14342..cd56118c0 100644 --- a/test/util/save_util_linux.cc +++ b/test/util/save_util_linux.cc @@ -18,13 +18,25 @@ #include "test/util/save_util.h" +#if defined(__x86_64__) || defined(__i386__) +#define SYS_TRIGGER_SAVE SYS_create_module +#elif defined(__aarch64__) +#define SYS_TRIGGER_SAVE SYS_finit_module +#else +#error "Unknown architecture" +#endif + namespace gvisor { namespace testing { void MaybeSave() { if (internal::ShouldSave()) { int orig_errno = errno; - syscall(SYS_create_module, nullptr, 0); + // We use it to trigger saving the sentry state + // when this syscall is called. + // Notice: this needs to be a valid syscall + // that is not used in any of the syscall tests. + syscall(SYS_TRIGGER_SAVE, nullptr, 0); errno = orig_errno; } } -- cgit v1.2.3 From ed60bc326b7479995fee4a94d29cbc7d9dddef02 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Mon, 6 Jan 2020 16:48:07 -0800 Subject: Fix readme formatting. PiperOrigin-RevId: 288402480 --- test/iptables/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/iptables/README.md b/test/iptables/README.md index b37cb2a96..9f8e34420 100644 --- a/test/iptables/README.md +++ b/test/iptables/README.md @@ -1,6 +1,6 @@ # iptables Tests -iptables tests are run via `scripts/iptables\_test.sh`. +iptables tests are run via `scripts/iptables_test.sh`. ## Test Structure -- cgit v1.2.3 From e77ad574233b779519a253c6f58197c339e9100a Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Tue, 7 Jan 2020 17:25:18 -0800 Subject: Fix partial_bad_buffer write tests. The write tests are fitted to Linux-specific behavior, but it is not well-specified. Tweak the tests to allow for both acceptable outcomes. PiperOrigin-RevId: 288606386 --- test/syscalls/linux/partial_bad_buffer.cc | 138 ++++++++++++++---------------- 1 file changed, 64 insertions(+), 74 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/partial_bad_buffer.cc b/test/syscalls/linux/partial_bad_buffer.cc index 33822ee57..df7129acc 100644 --- a/test/syscalls/linux/partial_bad_buffer.cc +++ b/test/syscalls/linux/partial_bad_buffer.cc @@ -18,7 +18,9 @@ #include #include #include +#include #include +#include #include #include @@ -62,9 +64,9 @@ class PartialBadBufferTest : public ::testing::Test { // Write some initial data. size_t size = sizeof(kMessage) - 1; EXPECT_THAT(WriteFd(fd_, &kMessage, size), SyscallSucceedsWithValue(size)); - ASSERT_THAT(lseek(fd_, 0, SEEK_SET), SyscallSucceeds()); + // Map a useable buffer. addr_ = mmap(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); ASSERT_NE(addr_, MAP_FAILED); @@ -79,6 +81,15 @@ class PartialBadBufferTest : public ::testing::Test { bad_buffer_ = buf + kPageSize - 1; } + off_t Size() { + struct stat st; + int rc = fstat(fd_, &st); + if (rc < 0) { + return static_cast(rc); + } + return st.st_size; + } + void TearDown() override { EXPECT_THAT(munmap(addr_, 2 * kPageSize), SyscallSucceeds()) << addr_; EXPECT_THAT(close(fd_), SyscallSucceeds()); @@ -165,97 +176,99 @@ TEST_F(PartialBadBufferTest, PreadvSmall) { } TEST_F(PartialBadBufferTest, WriteBig) { - // FIXME(b/24788078): The sentry write syscalls will return immediately - // if Access returns an error, but Access may not return an error - // and the sentry will instead perform a partial write. - SKIP_IF(IsRunningOnGvisor()); + off_t orig_size = Size(); + int n; - EXPECT_THAT(RetryEINTR(write)(fd_, bad_buffer_, kPageSize), - SyscallFailsWithErrno(EFAULT)); + ASSERT_THAT(lseek(fd_, orig_size, SEEK_SET), SyscallSucceeds()); + EXPECT_THAT( + (n = RetryEINTR(write)(fd_, bad_buffer_, kPageSize)), + AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1))); + EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0)); } TEST_F(PartialBadBufferTest, WriteSmall) { - // FIXME(b/24788078): The sentry write syscalls will return immediately - // if Access returns an error, but Access may not return an error - // and the sentry will instead perform a partial write. - SKIP_IF(IsRunningOnGvisor()); + off_t orig_size = Size(); + int n; - EXPECT_THAT(RetryEINTR(write)(fd_, bad_buffer_, 10), - SyscallFailsWithErrno(EFAULT)); + ASSERT_THAT(lseek(fd_, orig_size, SEEK_SET), SyscallSucceeds()); + EXPECT_THAT( + (n = RetryEINTR(write)(fd_, bad_buffer_, 10)), + AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1))); + EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0)); } TEST_F(PartialBadBufferTest, PwriteBig) { - // FIXME(b/24788078): The sentry write syscalls will return immediately - // if Access returns an error, but Access may not return an error - // and the sentry will instead perform a partial write. - SKIP_IF(IsRunningOnGvisor()); + off_t orig_size = Size(); + int n; - EXPECT_THAT(RetryEINTR(pwrite)(fd_, bad_buffer_, kPageSize, 0), - SyscallFailsWithErrno(EFAULT)); + EXPECT_THAT( + (n = RetryEINTR(pwrite)(fd_, bad_buffer_, kPageSize, orig_size)), + AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1))); + EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0)); } TEST_F(PartialBadBufferTest, PwriteSmall) { - // FIXME(b/24788078): The sentry write syscalls will return immediately - // if Access returns an error, but Access may not return an error - // and the sentry will instead perform a partial write. - SKIP_IF(IsRunningOnGvisor()); + off_t orig_size = Size(); + int n; - EXPECT_THAT(RetryEINTR(pwrite)(fd_, bad_buffer_, 10, 0), - SyscallFailsWithErrno(EFAULT)); + EXPECT_THAT( + (n = RetryEINTR(pwrite)(fd_, bad_buffer_, 10, orig_size)), + AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1))); + EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0)); } TEST_F(PartialBadBufferTest, WritevBig) { - // FIXME(b/24788078): The sentry write syscalls will return immediately - // if Access returns an error, but Access may not return an error - // and the sentry will instead perform a partial write. - SKIP_IF(IsRunningOnGvisor()); - struct iovec vec; vec.iov_base = bad_buffer_; vec.iov_len = kPageSize; + off_t orig_size = Size(); + int n; - EXPECT_THAT(RetryEINTR(writev)(fd_, &vec, 1), SyscallFailsWithErrno(EFAULT)); + ASSERT_THAT(lseek(fd_, orig_size, SEEK_SET), SyscallSucceeds()); + EXPECT_THAT( + (n = RetryEINTR(writev)(fd_, &vec, 1)), + AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1))); + EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0)); } TEST_F(PartialBadBufferTest, WritevSmall) { - // FIXME(b/24788078): The sentry write syscalls will return immediately - // if Access returns an error, but Access may not return an error - // and the sentry will instead perform a partial write. - SKIP_IF(IsRunningOnGvisor()); - struct iovec vec; vec.iov_base = bad_buffer_; vec.iov_len = 10; + off_t orig_size = Size(); + int n; - EXPECT_THAT(RetryEINTR(writev)(fd_, &vec, 1), SyscallFailsWithErrno(EFAULT)); + ASSERT_THAT(lseek(fd_, orig_size, SEEK_SET), SyscallSucceeds()); + EXPECT_THAT( + (n = RetryEINTR(writev)(fd_, &vec, 1)), + AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1))); + EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0)); } TEST_F(PartialBadBufferTest, PwritevBig) { - // FIXME(b/24788078): The sentry write syscalls will return immediately - // if Access returns an error, but Access may not return an error - // and the sentry will instead perform a partial write. - SKIP_IF(IsRunningOnGvisor()); - struct iovec vec; vec.iov_base = bad_buffer_; vec.iov_len = kPageSize; + off_t orig_size = Size(); + int n; - EXPECT_THAT(RetryEINTR(pwritev)(fd_, &vec, 1, 0), - SyscallFailsWithErrno(EFAULT)); + EXPECT_THAT( + (n = RetryEINTR(pwritev)(fd_, &vec, 1, orig_size)), + AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1))); + EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0)); } TEST_F(PartialBadBufferTest, PwritevSmall) { - // FIXME(b/24788078): The sentry write syscalls will return immediately - // if Access returns an error, but Access may not return an error - // and the sentry will instead perform a partial write. - SKIP_IF(IsRunningOnGvisor()); - struct iovec vec; vec.iov_base = bad_buffer_; vec.iov_len = 10; + off_t orig_size = Size(); + int n; - EXPECT_THAT(RetryEINTR(pwritev)(fd_, &vec, 1, 0), - SyscallFailsWithErrno(EFAULT)); + EXPECT_THAT( + (n = RetryEINTR(pwritev)(fd_, &vec, 1, orig_size)), + AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(1))); + EXPECT_EQ(Size(), orig_size + (n >= 0 ? n : 0)); } // getdents returns EFAULT when the you claim the buffer is large enough, but @@ -283,29 +296,6 @@ TEST_F(PartialBadBufferTest, GetdentsOneEntry) { SyscallSucceedsWithValue(Gt(0))); } -// Verify that when write returns EFAULT the kernel hasn't silently written -// the initial valid bytes. -TEST_F(PartialBadBufferTest, WriteEfaultIsntPartial) { - // FIXME(b/24788078): The sentry write syscalls will return immediately - // if Access returns an error, but Access may not return an error - // and the sentry will instead perform a partial write. - SKIP_IF(IsRunningOnGvisor()); - - bad_buffer_[0] = 'A'; - EXPECT_THAT(RetryEINTR(write)(fd_, bad_buffer_, 10), - SyscallFailsWithErrno(EFAULT)); - - size_t size = 255; - char buf[255]; - memset(buf, 0, size); - - EXPECT_THAT(RetryEINTR(pread)(fd_, buf, size, 0), - SyscallSucceedsWithValue(sizeof(kMessage) - 1)); - - // 'A' has not been written. - EXPECT_STREQ(buf, kMessage); -} - PosixErrorOr InetLoopbackAddr(int family) { struct sockaddr_storage addr; memset(&addr, 0, sizeof(addr)); -- cgit v1.2.3 From a53ac7307abfeb7172e67f48d0a7aaa4b5c3f31e Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 7 Jan 2020 23:52:59 -0800 Subject: fs/splice: don't report a partialResult error if there is no data loss PiperOrigin-RevId: 288642552 --- pkg/sentry/fs/file.go | 7 +++++++ pkg/sentry/fs/splice.go | 5 +++++ test/syscalls/linux/inotify.cc | 28 ++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+) (limited to 'test') diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go index c0a6e884b..a2f966cb6 100644 --- a/pkg/sentry/fs/file.go +++ b/pkg/sentry/fs/file.go @@ -555,6 +555,10 @@ type lockedWriter struct { // // This applies only to Write, not WriteAt. Offset int64 + + // Err contains the first error encountered while copying. This is + // useful to determine whether Writer or Reader failed during io.Copy. + Err error } // Write implements io.Writer.Write. @@ -590,5 +594,8 @@ func (w *lockedWriter) WriteAt(buf []byte, offset int64) (int, error) { break } } + if w.Err == nil { + w.Err = err + } return written, err } diff --git a/pkg/sentry/fs/splice.go b/pkg/sentry/fs/splice.go index 311798811..389c330a0 100644 --- a/pkg/sentry/fs/splice.go +++ b/pkg/sentry/fs/splice.go @@ -167,6 +167,11 @@ func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64, if !srcPipe && !opts.SrcOffset { atomic.StoreInt64(&src.offset, src.offset+n) } + + // Don't report any errors if we have some progress without data loss. + if w.Err == nil { + err = nil + } } // Drop locks. diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc index 7384c27dc..59ec9940a 100644 --- a/test/syscalls/linux/inotify.cc +++ b/test/syscalls/linux/inotify.cc @@ -1591,6 +1591,34 @@ TEST(Inotify, EpollNoDeadlock) { } } +TEST(Inotify, SpliceEvent) { + int pipes[2]; + ASSERT_THAT(pipe2(pipes, O_NONBLOCK), SyscallSucceeds()); + + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + root.path(), "some content", TempPath::kDefaultFileMode)); + + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY)); + const int watcher = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + char buf; + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + + EXPECT_THAT(splice(fd.get(), nullptr, pipes[1], nullptr, + sizeof(struct inotify_event) + 1, SPLICE_F_NONBLOCK), + SyscallSucceedsWithValue(sizeof(struct inotify_event))); + + const FileDescriptor read_fd(pipes[0]); + const std::vector events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(read_fd.get())); + ASSERT_THAT(events, Are({Event(IN_ACCESS, watcher)})); +} + } // namespace } // namespace testing } // namespace gvisor -- cgit v1.2.3 From 8cc1c35bbdc5c9bd6b3965311497885ce72317a8 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Thu, 12 Dec 2019 15:48:24 -0800 Subject: Write simple ACCEPT rules to the filter table. This gets us closer to passing the iptables tests and opens up iptables so it can be worked on by multiple people. A few restrictions are enforced for security (i.e. we don't want to let users write a bunch of iptables rules and then just not enforce them): - Only the filter table is writable. - Only ACCEPT rules with no matching criteria can be added. --- go.mod | 31 +-- go.sum | 10 + pkg/abi/linux/netfilter.go | 82 +++--- pkg/sentry/socket/netfilter/BUILD | 1 + pkg/sentry/socket/netfilter/netfilter.go | 411 ++++++++++++++++++++++++------- pkg/sentry/socket/netstack/netstack.go | 23 +- pkg/tcpip/iptables/iptables.go | 114 ++++++--- pkg/tcpip/iptables/targets.go | 8 + pkg/tcpip/iptables/types.go | 55 ++--- test/iptables/filter_input.go | 1 + test/iptables/iptables_test.go | 23 +- 11 files changed, 550 insertions(+), 209 deletions(-) (limited to 'test') diff --git a/go.mod b/go.mod index 304b8bf13..4802359f8 100644 --- a/go.mod +++ b/go.mod @@ -3,19 +3,20 @@ module gvisor.dev/gvisor go 1.13 require ( - github.com/cenkalti/backoff v0.0.0-20190506075156-2146c9339422 - github.com/gofrs/flock v0.6.1-0.20180915234121-886344bea079 - github.com/golang/mock v1.3.1 - github.com/golang/protobuf v1.3.1 - github.com/google/btree v1.0.0 - github.com/google/go-cmp v0.2.0 - github.com/google/subcommands v0.0.0-20190508160503-636abe8753b8 - github.com/google/uuid v0.0.0-20171129191014-dec09d789f3d - github.com/kr/pty v1.1.1 - github.com/opencontainers/runtime-spec v0.1.2-0.20171211145439-b2d941ef6a78 - github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2 - github.com/vishvananda/netlink v1.0.1-0.20190318003149-adb577d4a45e - github.com/vishvananda/netns v0.0.0-20171111001504-be1fbeda1936 - golang.org/x/net v0.0.0-20190311183353-d8887717615a - golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a + github.com/cenkalti/backoff v0.0.0-20190506075156-2146c9339422 + github.com/gofrs/flock v0.6.1-0.20180915234121-886344bea079 + github.com/golang/mock v1.3.1 + github.com/golang/protobuf v1.3.1 + github.com/google/btree v1.0.0 + github.com/google/go-cmp v0.2.0 + github.com/google/subcommands v0.0.0-20190508160503-636abe8753b8 + github.com/google/uuid v0.0.0-20171129191014-dec09d789f3d + github.com/kr/pty v1.1.1 + github.com/opencontainers/runtime-spec v0.1.2-0.20171211145439-b2d941ef6a78 + github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2 + github.com/vishvananda/netlink v1.0.1-0.20190318003149-adb577d4a45e + github.com/vishvananda/netns v0.0.0-20171111001504-be1fbeda1936 + golang.org/x/net v0.0.0-20190311183353-d8887717615a + golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a + golang.org/x/time v0.0.0-20191024005414-555d28b269f0 ) diff --git a/go.sum b/go.sum index 7a0bc175a..cf092956e 100644 --- a/go.sum +++ b/go.sum @@ -1,19 +1,29 @@ +github.com/cenkalti/backoff v0.0.0-20190506075156-2146c9339422 h1:+FKjzBIdfBHYDvxCv+djmDJdes/AoDtg8gpcxowBlF8= github.com/cenkalti/backoff v0.0.0-20190506075156-2146c9339422/go.mod h1:b6Nc7NRH5C4aCISLry0tLnTjcuTEvoiqcWDdsU0sOGM= github.com/gofrs/flock v0.6.1-0.20180915234121-886344bea079/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU= github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= +github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/google/btree v1.0.0 h1:0udJVsspx3VBr5FwtLhQQtuAsVc79tTq0ocGIPAU6qo= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= +github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/subcommands v0.0.0-20190508160503-636abe8753b8/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3yTrtFlrHVk= github.com/google/uuid v0.0.0-20171129191014-dec09d789f3d/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/kr/pty v1.1.1 h1:VkoXIwSboBpnk99O/KFauAEILuNHv5DVFKZMBN/gUgw= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/opencontainers/runtime-spec v0.1.2-0.20171211145439-b2d941ef6a78 h1:d9F+LNYwMyi3BDN4GzZdaSiq4otb8duVEWyZjeUtOQI= github.com/opencontainers/runtime-spec v0.1.2-0.20171211145439-b2d941ef6a78/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2 h1:b6uOv7YOFK0TYG7HtkIgExQo+2RdLuwRft63jn2HWj8= github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/vishvananda/netlink v1.0.1-0.20190318003149-adb577d4a45e/go.mod h1:+SR5DhBJrl6ZM7CoCKvpw5BKroDKQ+PJqOg65H/2ktk= github.com/vishvananda/netns v0.0.0-20171111001504-be1fbeda1936/go.mod h1:ZjcWmFBXmLKZu9Nxj3WKYEafiSqer2rnvPr0en9UNpI= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/time v0.0.0-20191024005414-555d28b269f0 h1:/5xXl8Y5W96D+TtHSlonuFqGHIWVuyCkGJLwGh9JJFs= +golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= diff --git a/pkg/abi/linux/netfilter.go b/pkg/abi/linux/netfilter.go index 269ba5567..0bcb232de 100644 --- a/pkg/abi/linux/netfilter.go +++ b/pkg/abi/linux/netfilter.go @@ -42,6 +42,13 @@ const ( NF_RETURN = -NF_REPEAT - 1 ) +var VerdictStrings = map[int32]string{ + -NF_DROP - 1: "DROP", + -NF_ACCEPT - 1: "ACCEPT", + -NF_QUEUE - 1: "QUEUE", + NF_RETURN: "RETURN", +} + // Socket options. These correspond to values in // include/uapi/linux/netfilter_ipv4/ip_tables.h. const ( @@ -179,7 +186,7 @@ const SizeOfXTCounters = 16 // the user data. type XTEntryMatch struct { MatchSize uint16 - Name [XT_EXTENSION_MAXNAMELEN]byte + Name ExtensionName Revision uint8 // Data is omitted here because it would cause XTEntryMatch to be an // extra byte larger (see http://www.catb.org/esr/structure-packing/). @@ -199,7 +206,7 @@ const SizeOfXTEntryMatch = 32 // the user data. type XTEntryTarget struct { TargetSize uint16 - Name [XT_EXTENSION_MAXNAMELEN]byte + Name ExtensionName Revision uint8 // Data is omitted here because it would cause XTEntryTarget to be an // extra byte larger (see http://www.catb.org/esr/structure-packing/). @@ -226,9 +233,9 @@ const SizeOfXTStandardTarget = 40 // ErrorName. It corresponds to struct xt_error_target in // include/uapi/linux/netfilter/x_tables.h. type XTErrorTarget struct { - Target XTEntryTarget - ErrorName [XT_FUNCTION_MAXNAMELEN]byte - _ [2]byte + Target XTEntryTarget + Name ErrorName + _ [2]byte } // SizeOfXTErrorTarget is the size of an XTErrorTarget. @@ -237,7 +244,7 @@ const SizeOfXTErrorTarget = 64 // IPTGetinfo is the argument for the IPT_SO_GET_INFO sockopt. It corresponds // to struct ipt_getinfo in include/uapi/linux/netfilter_ipv4/ip_tables.h. type IPTGetinfo struct { - Name [XT_TABLE_MAXNAMELEN]byte + Name TableName ValidHooks uint32 HookEntry [NF_INET_NUMHOOKS]uint32 Underflow [NF_INET_NUMHOOKS]uint32 @@ -248,16 +255,11 @@ type IPTGetinfo struct { // SizeOfIPTGetinfo is the size of an IPTGetinfo. const SizeOfIPTGetinfo = 84 -// TableName returns the table name. -func (info *IPTGetinfo) TableName() string { - return tableName(info.Name[:]) -} - // IPTGetEntries is the argument for the IPT_SO_GET_ENTRIES sockopt. It // corresponds to struct ipt_get_entries in // include/uapi/linux/netfilter_ipv4/ip_tables.h. type IPTGetEntries struct { - Name [XT_TABLE_MAXNAMELEN]byte + Name TableName Size uint32 _ [4]byte // Entrytable is omitted here because it would cause IPTGetEntries to @@ -266,34 +268,22 @@ type IPTGetEntries struct { // Entrytable [0]IPTEntry } -// TableName returns the entries' table name. -func (entries *IPTGetEntries) TableName() string { - return tableName(entries.Name[:]) -} - // SizeOfIPTGetEntries is the size of an IPTGetEntries. const SizeOfIPTGetEntries = 40 -// KernelIPTGetEntries is identical to IPTEntry, but includes the Elems field. -// This struct marshaled via the binary package to write an KernelIPTGetEntries -// to userspace. +// KernelIPTGetEntries is identical to IPTGetEntries, but includes the +// Entrytable field. This struct marshaled via the binary package to write an +// KernelIPTGetEntries to userspace. type KernelIPTGetEntries struct { - Name [XT_TABLE_MAXNAMELEN]byte - Size uint32 - _ [4]byte + IPTGetEntries Entrytable []KernelIPTEntry } -// TableName returns the entries' table name. -func (entries *KernelIPTGetEntries) TableName() string { - return tableName(entries.Name[:]) -} - // IPTReplace is the argument for the IPT_SO_SET_REPLACE sockopt. It // corresponds to struct ipt_replace in // include/uapi/linux/netfilter_ipv4/ip_tables.h. type IPTReplace struct { - Name [XT_TABLE_MAXNAMELEN]byte + Name TableName ValidHooks uint32 NumEntries uint32 Size uint32 @@ -306,14 +296,40 @@ type IPTReplace struct { // Entries [0]IPTEntry } +type KernelIPTReplace struct { + IPTReplace + Entries [0]IPTEntry +} + // SizeOfIPTReplace is the size of an IPTReplace. const SizeOfIPTReplace = 96 -func tableName(name []byte) string { - for i, c := range name { +type ExtensionName [XT_EXTENSION_MAXNAMELEN]byte + +// String implements fmt.Stringer. +func (en ExtensionName) String() string { + return name(en[:]) +} + +type TableName [XT_TABLE_MAXNAMELEN]byte + +// String implements fmt.Stringer. +func (tn TableName) String() string { + return name(tn[:]) +} + +type ErrorName [XT_FUNCTION_MAXNAMELEN]byte + +// String implements fmt.Stringer. +func (fn ErrorName) String() string { + return name(fn[:]) +} + +func name(cstring []byte) string { + for i, c := range cstring { if c == 0 { - return string(name[:i]) + return string(cstring[:i]) } } - return string(name) + return string(cstring) } diff --git a/pkg/sentry/socket/netfilter/BUILD b/pkg/sentry/socket/netfilter/BUILD index 5eb06bbf4..b70047d81 100644 --- a/pkg/sentry/socket/netfilter/BUILD +++ b/pkg/sentry/socket/netfilter/BUILD @@ -14,6 +14,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/binary", + "//pkg/log", "//pkg/sentry/kernel", "//pkg/sentry/usermem", "//pkg/syserr", diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index 9f87c32f1..8c7f3c7fc 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserr" @@ -35,6 +36,7 @@ const errorTargetName = "ERROR" // metadata is opaque to netstack. It holds data that we need to translate // between Linux's and netstack's iptables representations. +// TODO(gvisor.dev/issue/170): This might be removable. type metadata struct { HookEntry [linux.NF_INET_NUMHOOKS]uint32 Underflow [linux.NF_INET_NUMHOOKS]uint32 @@ -51,7 +53,7 @@ func GetInfo(t *kernel.Task, ep tcpip.Endpoint, outPtr usermem.Addr) (linux.IPTG } // Find the appropriate table. - table, err := findTable(ep, info.TableName()) + table, err := findTable(ep, info.Name.String()) if err != nil { return linux.IPTGetinfo{}, err } @@ -82,18 +84,19 @@ func GetEntries(t *kernel.Task, ep tcpip.Endpoint, outPtr usermem.Addr, outLen i } // Find the appropriate table. - table, err := findTable(ep, userEntries.TableName()) + table, err := findTable(ep, userEntries.Name.String()) if err != nil { return linux.KernelIPTGetEntries{}, err } // Convert netstack's iptables rules to something that the iptables // tool can understand. - entries, _, err := convertNetstackToBinary(userEntries.TableName(), table) + entries, _, err := convertNetstackToBinary(userEntries.Name.String(), table) if err != nil { return linux.KernelIPTGetEntries{}, err } if binary.Size(entries) > uintptr(outLen) { + log.Infof("Insufficient GetEntries output size: %d", uintptr(outLen)) return linux.KernelIPTGetEntries{}, syserr.ErrInvalidArgument } @@ -142,103 +145,63 @@ func convertNetstackToBinary(name string, table iptables.Table) (linux.KernelIPT // The table name has to fit in the struct. if linux.XT_TABLE_MAXNAMELEN < len(name) { + log.Infof("Table name too long.") return linux.KernelIPTGetEntries{}, metadata{}, syserr.ErrInvalidArgument } copy(entries.Name[:], name) - // Deal with the built in chains first (INPUT, OUTPUT, etc.). Each of - // these chains ends with an unconditional policy entry. - for hook := iptables.Prerouting; hook < iptables.NumHooks; hook++ { - chain, ok := table.BuiltinChains[hook] - if !ok { - // This table doesn't support this hook. - continue - } - - // Sanity check. - if len(chain.Rules) < 1 { - return linux.KernelIPTGetEntries{}, metadata{}, syserr.ErrInvalidArgument - } - - for ruleIdx, rule := range chain.Rules { - // If this is the first rule of a builtin chain, set - // the metadata hook entry point. - if ruleIdx == 0 { + for ruleIdx, rule := range table.Rules { + // Is this a chain entry point? + for hook, hookRuleIdx := range table.BuiltinChains { + if hookRuleIdx == ruleIdx { meta.HookEntry[hook] = entries.Size } - - // Each rule corresponds to an entry. - entry := linux.KernelIPTEntry{ - IPTEntry: linux.IPTEntry{ - NextOffset: linux.SizeOfIPTEntry, - TargetOffset: linux.SizeOfIPTEntry, - }, + } + // Is this a chain underflow point? The underflow rule is the last rule + // in the chain, and is an unconditional rule (i.e. it matches any + // packet). This is enforced when saving iptables. + for underflow, underflowRuleIdx := range table.Underflows { + if underflowRuleIdx == ruleIdx { + meta.Underflow[underflow] = entries.Size } + } - for _, matcher := range rule.Matchers { - // Serialize the matcher and add it to the - // entry. - serialized := marshalMatcher(matcher) - entry.Elems = append(entry.Elems, serialized...) - entry.NextOffset += uint16(len(serialized)) - entry.TargetOffset += uint16(len(serialized)) - } + // Each rule corresponds to an entry. + entry := linux.KernelIPTEntry{ + IPTEntry: linux.IPTEntry{ + NextOffset: linux.SizeOfIPTEntry, + TargetOffset: linux.SizeOfIPTEntry, + }, + } - // Serialize and append the target. - serialized := marshalTarget(rule.Target) + for _, matcher := range rule.Matchers { + // Serialize the matcher and add it to the + // entry. + serialized := marshalMatcher(matcher) entry.Elems = append(entry.Elems, serialized...) entry.NextOffset += uint16(len(serialized)) - - // The underflow rule is the last rule in the chain, - // and is an unconditional rule (i.e. it matches any - // packet). This is enforced when saving iptables. - if ruleIdx == len(chain.Rules)-1 { - meta.Underflow[hook] = entries.Size - } - - entries.Size += uint32(entry.NextOffset) - entries.Entrytable = append(entries.Entrytable, entry) - meta.NumEntries++ + entry.TargetOffset += uint16(len(serialized)) } - } + // Serialize and append the target. + serialized := marshalTarget(rule.Target) + entry.Elems = append(entry.Elems, serialized...) + entry.NextOffset += uint16(len(serialized)) - // TODO(gvisor.dev/issue/170): Deal with the user chains here. Each of - // these starts with an error node holding the chain's name and ends - // with an unconditional return. - - // Lastly, each table ends with an unconditional error target rule as - // its final entry. - errorEntry := linux.KernelIPTEntry{ - IPTEntry: linux.IPTEntry{ - NextOffset: linux.SizeOfIPTEntry, - TargetOffset: linux.SizeOfIPTEntry, - }, + entries.Size += uint32(entry.NextOffset) + entries.Entrytable = append(entries.Entrytable, entry) + meta.NumEntries++ } - var errorTarget linux.XTErrorTarget - errorTarget.Target.TargetSize = linux.SizeOfXTErrorTarget - copy(errorTarget.ErrorName[:], errorTargetName) - copy(errorTarget.Target.Name[:], errorTargetName) - - // Serialize and add it to the list of entries. - errorTargetBuf := make([]byte, 0, linux.SizeOfXTErrorTarget) - serializedErrorTarget := binary.Marshal(errorTargetBuf, usermem.ByteOrder, errorTarget) - errorEntry.Elems = append(errorEntry.Elems, serializedErrorTarget...) - errorEntry.NextOffset += uint16(len(serializedErrorTarget)) - - entries.Size += uint32(errorEntry.NextOffset) - entries.Entrytable = append(entries.Entrytable, errorEntry) - meta.NumEntries++ - meta.Size = entries.Size + meta.Size = entries.Size return entries, meta, nil } func marshalMatcher(matcher iptables.Matcher) []byte { switch matcher.(type) { default: - // TODO(gvisor.dev/issue/170): We don't support any matchers yet, so - // any call to marshalMatcher will panic. + // TODO(gvisor.dev/issue/170): We don't support any matchers + // yet, so any call to marshalMatcher will panic. panic(fmt.Errorf("unknown matcher of type %T", matcher)) } } @@ -246,28 +209,46 @@ func marshalMatcher(matcher iptables.Matcher) []byte { func marshalTarget(target iptables.Target) []byte { switch target.(type) { case iptables.UnconditionalAcceptTarget: - return marshalUnconditionalAcceptTarget() + return marshalStandardTarget(iptables.Accept) + case iptables.UnconditionalDropTarget: + return marshalStandardTarget(iptables.Drop) + case iptables.PanicTarget: + return marshalPanicTarget() default: panic(fmt.Errorf("unknown target of type %T", target)) } } -func marshalUnconditionalAcceptTarget() []byte { +func marshalStandardTarget(verdict iptables.Verdict) []byte { // The target's name will be the empty string. target := linux.XTStandardTarget{ Target: linux.XTEntryTarget{ TargetSize: linux.SizeOfXTStandardTarget, }, - Verdict: translateStandardVerdict(iptables.Accept), + Verdict: translateFromStandardVerdict(verdict), } ret := make([]byte, 0, linux.SizeOfXTStandardTarget) return binary.Marshal(ret, usermem.ByteOrder, target) } -// translateStandardVerdict translates verdicts the same way as the iptables +func marshalPanicTarget() []byte { + // This is an error target named error + target := linux.XTErrorTarget{ + Target: linux.XTEntryTarget{ + TargetSize: linux.SizeOfXTErrorTarget, + }, + } + copy(target.Name[:], errorTargetName) + copy(target.Target.Name[:], errorTargetName) + + ret := make([]byte, 0, linux.SizeOfXTErrorTarget) + return binary.Marshal(ret, usermem.ByteOrder, target) +} + +// translateFromStandardVerdict translates verdicts the same way as the iptables // tool. -func translateStandardVerdict(verdict iptables.Verdict) int32 { +func translateFromStandardVerdict(verdict iptables.Verdict) int32 { switch verdict { case iptables.Accept: return -linux.NF_ACCEPT - 1 @@ -280,7 +261,269 @@ func translateStandardVerdict(verdict iptables.Verdict) int32 { case iptables.Jump: // TODO(gvisor.dev/issue/170): Support Jump. panic("Jump isn't supported yet") + } + panic(fmt.Sprintf("unknown standard verdict: %d", verdict)) +} + +// translateToStandardVerdict translates from the value in a +// linux.XTStandardTarget to an iptables.Verdict. +func translateToStandardVerdict(val int32) (iptables.Verdict, *syserr.Error) { + // TODO(gvisor.dev/issue/170): Support other verdicts. + switch val { + case -linux.NF_ACCEPT - 1: + return iptables.Accept, nil + case -linux.NF_DROP - 1: + return iptables.Drop, nil + case -linux.NF_QUEUE - 1: + log.Infof("Unsupported iptables verdict QUEUE.") + case linux.NF_RETURN: + log.Infof("Unsupported iptables verdict RETURN.") + } + log.Infof("Unknown iptables verdict %d.", val) + return iptables.Invalid, syserr.ErrInvalidArgument +} + +// SetEntries sets iptables rules for a single table. See +// net/ipv4/netfilter/ip_tables.c:translate_table for reference. +func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { + printReplace(optVal) + + // Get the basic rules data (struct ipt_replace). + if len(optVal) < linux.SizeOfIPTReplace { + return syserr.ErrInvalidArgument + } + var replace linux.IPTReplace + replaceBuf := optVal[:linux.SizeOfIPTReplace] + optVal = optVal[linux.SizeOfIPTReplace:] + binary.Unmarshal(replaceBuf, usermem.ByteOrder, &replace) + + // TODO(gvisor.dev/issue/170): Support other tables. + var table iptables.Table + switch replace.Name.String() { + case iptables.TablenameFilter: + table = iptables.EmptyFilterTable() default: - panic(fmt.Sprintf("unknown standard verdict: %d", verdict)) + log.Infof(fmt.Sprintf("We don't yet support writing to the %q table (gvisor.dev/issue/170)", replace.Name.String())) + return syserr.ErrInvalidArgument + } + + // Convert input into a list of rules and their offsets. + var offset uint32 + var offsets []uint32 + for entryIdx := uint32(0); entryIdx < replace.NumEntries; entryIdx++ { + // Get the struct ipt_entry. + if len(optVal) < linux.SizeOfIPTEntry { + return syserr.ErrInvalidArgument + } + var entry linux.IPTEntry + buf := optVal[:linux.SizeOfIPTEntry] + optVal = optVal[linux.SizeOfIPTEntry:] + binary.Unmarshal(buf, usermem.ByteOrder, &entry) + if entry.TargetOffset != linux.SizeOfIPTEntry { + // TODO(gvisor.dev/issue/170): Support matchers. + return syserr.ErrInvalidArgument + } + + // TODO(gvisor.dev/issue/170): We should support IPTIP + // filtering. We reject any nonzero IPTIP values for now. + emptyIPTIP := linux.IPTIP{} + if entry.IP != emptyIPTIP { + return syserr.ErrInvalidArgument + } + + // Get the target of the rule. + target, consumed, err := parseTarget(optVal) + if err != nil { + return err + } + optVal = optVal[consumed:] + + table.Rules = append(table.Rules, iptables.Rule{Target: target}) + offsets = append(offsets, offset) + offset += linux.SizeOfIPTEntry + consumed + } + + // Go through the list of supported hooks for this table and, for each + // one, set the rule it corresponds to. + for hook, _ := range replace.HookEntry { + if table.ValidHooks()&uint32(hook) != 0 { + hk := hookFromLinux(hook) + for ruleIdx, offset := range offsets { + if offset == replace.HookEntry[hook] { + table.BuiltinChains[hk] = ruleIdx + } + if offset == replace.Underflow[hook] { + table.Underflows[hk] = ruleIdx + } + } + if ruleIdx := table.BuiltinChains[hk]; ruleIdx == iptables.HookUnset { + log.Infof("Hook %v is unset.", hk) + return syserr.ErrInvalidArgument + } + if ruleIdx := table.Underflows[hk]; ruleIdx == iptables.HookUnset { + log.Infof("Underflow %v is unset.", hk) + return syserr.ErrInvalidArgument + } + } + } + + ipt := stack.IPTables() + table.SetMetadata(metadata{ + HookEntry: replace.HookEntry, + Underflow: replace.Underflow, + NumEntries: replace.NumEntries, + Size: replace.Size, + }) + ipt.Tables[replace.Name.String()] = table + // TODO: Do we need to worry about locking? We could write rules while + // packets traverse tables. + stack.SetIPTables(ipt) + + return nil +} + +// parseTarget parses a target from the start of optVal and returns the target +// along with the number of bytes it occupies in optVal. +func parseTarget(optVal []byte) (iptables.Target, uint32, *syserr.Error) { + if len(optVal) < linux.SizeOfXTEntryTarget { + return nil, 0, syserr.ErrInvalidArgument + } + var target linux.XTEntryTarget + buf := optVal[:linux.SizeOfXTEntryTarget] + binary.Unmarshal(buf, usermem.ByteOrder, &target) + switch target.Name.String() { + case "": + // Standard target. + if len(optVal) < linux.SizeOfXTStandardTarget { + return nil, 0, syserr.ErrInvalidArgument + } + var target linux.XTStandardTarget + buf = optVal[:linux.SizeOfXTStandardTarget] + binary.Unmarshal(buf, usermem.ByteOrder, &target) + + verdict, err := translateToStandardVerdict(target.Verdict) + if err != nil { + return nil, 0, err + } + switch verdict { + case iptables.Accept: + return iptables.UnconditionalAcceptTarget{}, linux.SizeOfXTStandardTarget, nil + case iptables.Drop: + // TODO(gvisor.dev/issue/170): Return an + // iptables.UnconditionalDropTarget to support DROP. + log.Infof("netfilter DROP is not supported yet.") + return nil, 0, syserr.ErrInvalidArgument + default: + panic(fmt.Sprintf("Unknown verdict: %v", verdict)) + } + + case errorTargetName: + // Error target. + if len(optVal) < linux.SizeOfXTErrorTarget { + return nil, 0, syserr.ErrInvalidArgument + } + var target linux.XTErrorTarget + buf = optVal[:linux.SizeOfXTErrorTarget] + binary.Unmarshal(buf, usermem.ByteOrder, &target) + + // Error targets are used in 2 cases: + // * An actual error case. These rules have an error + // named errorTargetName. The last entry of the table + // is usually an error case to catch any packets that + // somehow fall through every rule. + // * To mark the start of a user defined chain. These + // rules have an error with the name of the chain. + switch target.Name.String() { + case errorTargetName: + return iptables.PanicTarget{}, linux.SizeOfXTErrorTarget, nil + default: + log.Infof("Unknown error target %q doesn't exist or isn't supported yet.", target.Name.String()) + return nil, 0, syserr.ErrInvalidArgument + } + } + + // Unknown target. + log.Infof("Unknown target %q doesn't exist or isn't supported yet.", target.Name.String()) + return nil, 0, syserr.ErrInvalidArgument +} + +func chainNameFromHook(hook int) string { + switch hook { + case linux.NF_INET_PRE_ROUTING: + return iptables.ChainNamePrerouting + case linux.NF_INET_LOCAL_IN: + return iptables.ChainNameInput + case linux.NF_INET_FORWARD: + return iptables.ChainNameForward + case linux.NF_INET_LOCAL_OUT: + return iptables.ChainNameOutput + case linux.NF_INET_POST_ROUTING: + return iptables.ChainNamePostrouting + } + panic(fmt.Sprintf("Unknown hook %d does not correspond to a builtin chain")) +} + +func hookFromLinux(hook int) iptables.Hook { + switch hook { + case linux.NF_INET_PRE_ROUTING: + return iptables.Prerouting + case linux.NF_INET_LOCAL_IN: + return iptables.Input + case linux.NF_INET_FORWARD: + return iptables.Forward + case linux.NF_INET_LOCAL_OUT: + return iptables.Output + case linux.NF_INET_POST_ROUTING: + return iptables.Postrouting + } + panic(fmt.Sprintf("Unknown hook %d does not correspond to a builtin chain")) +} + +// printReplace prints information about the struct ipt_replace in optVal. It +// is only for debugging. +func printReplace(optVal []byte) { + // Basic replace info. + var replace linux.IPTReplace + replaceBuf := optVal[:linux.SizeOfIPTReplace] + optVal = optVal[linux.SizeOfIPTReplace:] + binary.Unmarshal(replaceBuf, usermem.ByteOrder, &replace) + log.Infof("kevin: Replacing table %q: %+v", replace.Name.String(), replace) + + // Read in the list of entries at the end of replace. + var totalOffset uint16 + for entryIdx := uint32(0); entryIdx < replace.NumEntries; entryIdx++ { + var entry linux.IPTEntry + entryBuf := optVal[:linux.SizeOfIPTEntry] + binary.Unmarshal(entryBuf, usermem.ByteOrder, &entry) + log.Infof("kevin: Entry %d (total offset %d): %+v", entryIdx, totalOffset, entry) + + totalOffset += entry.NextOffset + if entry.TargetOffset == linux.SizeOfIPTEntry { + log.Infof("kevin: Entry has no matches.") + } else { + log.Infof("kevin: Entry has matches.") + } + + var target linux.XTEntryTarget + targetBuf := optVal[entry.TargetOffset : entry.TargetOffset+linux.SizeOfXTEntryTarget] + binary.Unmarshal(targetBuf, usermem.ByteOrder, &target) + log.Infof("kevin: Target named %q: %+v", target.Name.String(), target) + + switch target.Name.String() { + case "": + var standardTarget linux.XTStandardTarget + stBuf := optVal[entry.TargetOffset : entry.TargetOffset+linux.SizeOfXTStandardTarget] + binary.Unmarshal(stBuf, usermem.ByteOrder, &standardTarget) + log.Infof("kevin: Standard target with verdict %q (%d).", linux.VerdictStrings[standardTarget.Verdict], standardTarget.Verdict) + case errorTargetName: + var errorTarget linux.XTErrorTarget + etBuf := optVal[entry.TargetOffset : entry.TargetOffset+linux.SizeOfXTErrorTarget] + binary.Unmarshal(etBuf, usermem.ByteOrder, &errorTarget) + log.Infof("kevin: Error target with name %q.", errorTarget.Name.String()) + default: + log.Infof("kevin: Unknown target type.") + } + + optVal = optVal[entry.NextOffset:] } } diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 140851c17..f7caa45b4 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -326,7 +326,7 @@ func AddressAndFamily(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, } family := usermem.ByteOrder.Uint16(addr) - if family != uint16(sfamily) && (strict || family != linux.AF_UNSPEC) { + if family != uint16(sfamily) && (!strict && family != linux.AF_UNSPEC) { return tcpip.FullAddress{}, family, syserr.ErrAddressFamilyNotSupported } @@ -1356,6 +1356,27 @@ func (s *SocketOperations) SetSockOpt(t *kernel.Task, level int, name int, optVa return nil } + if s.skType == linux.SOCK_RAW && level == linux.IPPROTO_IP { + if name == linux.IPT_SO_SET_REPLACE { + if len(optVal) < linux.SizeOfIPTReplace { + return syserr.ErrInvalidArgument + } + + stack := inet.StackFromContext(t) + if stack == nil { + return syserr.ErrNoDevice + } + // Stack must be a netstack stack. + if err := netfilter.SetEntries(stack.(*Stack).Stack, optVal); err != nil { + return err + } + return nil + } else if name == linux.IPT_SO_SET_ADD_COUNTERS { + // TODO(gvisor.dev/issue/170): Counter support. + return nil + } + } + return SetSockOpt(t, s, s.Endpoint, level, name, optVal) } diff --git a/pkg/tcpip/iptables/iptables.go b/pkg/tcpip/iptables/iptables.go index 68c68d4aa..9e7005374 100644 --- a/pkg/tcpip/iptables/iptables.go +++ b/pkg/tcpip/iptables/iptables.go @@ -17,65 +17,107 @@ package iptables const ( - tablenameNat = "nat" - tablenameMangle = "mangle" + TablenameNat = "nat" + TablenameMangle = "mangle" + TablenameFilter = "filter" ) +// TODO: Make this an iota? Faster! Do it. // Chain names as defined by net/ipv4/netfilter/ip_tables.c. const ( - chainNamePrerouting = "PREROUTING" - chainNameInput = "INPUT" - chainNameForward = "FORWARD" - chainNameOutput = "OUTPUT" - chainNamePostrouting = "POSTROUTING" + ChainNamePrerouting = "PREROUTING" + ChainNameInput = "INPUT" + ChainNameForward = "FORWARD" + ChainNameOutput = "OUTPUT" + ChainNamePostrouting = "POSTROUTING" ) +const HookUnset = -1 + // DefaultTables returns a default set of tables. Each chain is set to accept // all packets. func DefaultTables() IPTables { return IPTables{ Tables: map[string]Table{ - tablenameNat: Table{ - BuiltinChains: map[Hook]Chain{ - Prerouting: unconditionalAcceptChain(chainNamePrerouting), - Input: unconditionalAcceptChain(chainNameInput), - Output: unconditionalAcceptChain(chainNameOutput), - Postrouting: unconditionalAcceptChain(chainNamePostrouting), + TablenameNat: Table{ + Rules: []Rule{ + Rule{Target: UnconditionalAcceptTarget{}}, + Rule{Target: UnconditionalAcceptTarget{}}, + Rule{Target: UnconditionalAcceptTarget{}}, + Rule{Target: UnconditionalAcceptTarget{}}, + Rule{Target: PanicTarget{}}, + }, + BuiltinChains: map[Hook]int{ + Prerouting: 0, + Input: 1, + Output: 2, + Postrouting: 3, }, - DefaultTargets: map[Hook]Target{ - Prerouting: UnconditionalAcceptTarget{}, - Input: UnconditionalAcceptTarget{}, - Output: UnconditionalAcceptTarget{}, - Postrouting: UnconditionalAcceptTarget{}, + Underflows: map[Hook]int{ + Prerouting: 0, + Input: 1, + Output: 2, + Postrouting: 3, }, - UserChains: map[string]Chain{}, + UserChains: map[string]int{}, }, - tablenameMangle: Table{ - BuiltinChains: map[Hook]Chain{ - Prerouting: unconditionalAcceptChain(chainNamePrerouting), - Output: unconditionalAcceptChain(chainNameOutput), + TablenameMangle: Table{ + Rules: []Rule{ + Rule{Target: UnconditionalAcceptTarget{}}, + Rule{Target: UnconditionalAcceptTarget{}}, + Rule{Target: PanicTarget{}}, + }, + BuiltinChains: map[Hook]int{ + Prerouting: 0, + Output: 1, }, - DefaultTargets: map[Hook]Target{ - Prerouting: UnconditionalAcceptTarget{}, - Output: UnconditionalAcceptTarget{}, + Underflows: map[Hook]int{ + Prerouting: 0, + Output: 1, }, - UserChains: map[string]Chain{}, + UserChains: map[string]int{}, + }, + TablenameFilter: Table{ + Rules: []Rule{ + Rule{Target: UnconditionalAcceptTarget{}}, + Rule{Target: UnconditionalAcceptTarget{}}, + Rule{Target: UnconditionalAcceptTarget{}}, + Rule{Target: PanicTarget{}}, + }, + BuiltinChains: map[Hook]int{ + Input: 0, + Forward: 1, + Output: 2, + }, + Underflows: map[Hook]int{ + Input: 0, + Forward: 1, + Output: 2, + }, + UserChains: map[string]int{}, }, }, Priorities: map[Hook][]string{ - Prerouting: []string{tablenameMangle, tablenameNat}, - Output: []string{tablenameMangle, tablenameNat}, + Input: []string{TablenameNat, TablenameFilter}, + Prerouting: []string{TablenameMangle, TablenameNat}, + Output: []string{TablenameMangle, TablenameNat, TablenameFilter}, }, } } -func unconditionalAcceptChain(name string) Chain { - return Chain{ - Name: name, - Rules: []Rule{ - Rule{ - Target: UnconditionalAcceptTarget{}, - }, +func EmptyFilterTable() Table { + return Table{ + Rules: []Rule{}, + BuiltinChains: map[Hook]int{ + Input: HookUnset, + Forward: HookUnset, + Output: HookUnset, + }, + Underflows: map[Hook]int{ + Input: HookUnset, + Forward: HookUnset, + Output: HookUnset, }, + UserChains: map[string]int{}, } } diff --git a/pkg/tcpip/iptables/targets.go b/pkg/tcpip/iptables/targets.go index 19a7f77e3..03c9f19ff 100644 --- a/pkg/tcpip/iptables/targets.go +++ b/pkg/tcpip/iptables/targets.go @@ -33,3 +33,11 @@ type UnconditionalDropTarget struct{} func (UnconditionalDropTarget) Action(packet buffer.VectorisedView) (Verdict, string) { return Drop, "" } + +// PanicTarget just panics. +type PanicTarget struct{} + +// Actions implements Target.Action. +func (PanicTarget) Action(packet buffer.VectorisedView) (Verdict, string) { + panic("PanicTarget triggered.") +} diff --git a/pkg/tcpip/iptables/types.go b/pkg/tcpip/iptables/types.go index 42a79ef9f..76364ff1f 100644 --- a/pkg/tcpip/iptables/types.go +++ b/pkg/tcpip/iptables/types.go @@ -61,9 +61,12 @@ const ( type Verdict int const ( + // Invalid indicates an unkonwn or erroneous verdict. + Invalid Verdict = iota + // Accept indicates the packet should continue traversing netstack as // normal. - Accept Verdict = iota + Accept // Drop inicates the packet should be dropped, stopping traversing // netstack. @@ -109,24 +112,18 @@ type IPTables struct { // * nat // * mangle type Table struct { - // BuiltinChains holds the un-deletable chains built into netstack. If - // a hook isn't present in the map, this table doesn't utilize that - // hook. - BuiltinChains map[Hook]Chain + // A table is just a list of rules with some entrypoints. + Rules []Rule + + BuiltinChains map[Hook]int + + Underflows map[Hook]int - // DefaultTargets holds a target for each hook that will be executed if - // chain traversal doesn't yield a verdict. - DefaultTargets map[Hook]Target + // DefaultTargets map[Hook]int // UserChains holds user-defined chains for the keyed by name. Users // can give their chains arbitrary names. - UserChains map[string]Chain - - // Chains maps names to chains for both builtin and user-defined chains. - // Its entries point to Chains already either in BuiltinChains or - // UserChains, and its purpose is to make looking up tables by name - // fast. - Chains map[string]*Chain + UserChains map[string]int // Metadata holds information about the Table that is useful to users // of IPTables, but not to the netstack IPTables code itself. @@ -152,20 +149,20 @@ func (table *Table) SetMetadata(metadata interface{}) { table.metadata = metadata } -// A Chain defines a list of rules for packet processing. When a packet -// traverses a chain, it is checked against each rule until either a rule -// returns a verdict or the chain ends. -// -// By convention, builtin chains end with a rule that matches everything and -// returns either Accept or Drop. User-defined chains end with Return. These -// aren't strictly necessary here, but the iptables tool writes tables this way. -type Chain struct { - // Name is the chain name. - Name string - - // Rules is the list of rules to traverse. - Rules []Rule -} +//// A Chain defines a list of rules for packet processing. When a packet +//// traverses a chain, it is checked against each rule until either a rule +//// returns a verdict or the chain ends. +//// +//// By convention, builtin chains end with a rule that matches everything and +//// returns either Accept or Drop. User-defined chains end with Return. These +//// aren't strictly necessary here, but the iptables tool writes tables this way. +//type Chain struct { +// // Name is the chain name. +// Name string + +// // Rules is the list of rules to traverse. +// Rules []Rule +//} // A Rule is a packet processing rule. It consists of two pieces. First it // contains zero or more matchers, each of which is a specification of which diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index 923f44e68..0cb668635 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -44,6 +44,7 @@ func (FilterInputDropUDP) Name() string { // ContainerAction implements TestCase.ContainerAction. func (FilterInputDropUDP) ContainerAction(ip net.IP) error { if err := filterTable("-A", "INPUT", "-p", "udp", "-j", "DROP"); err != nil { + // if err := filterTable("-A", "INPUT", "-j", "ACCEPT"); err != nil { return err } diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index bfbf1bb87..e761e0f2f 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -23,6 +23,7 @@ import ( "time" "flag" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/runsc/dockerutil" "gvisor.dev/gvisor/runsc/testutil" @@ -166,14 +167,14 @@ func TestFilterInputDropUDP(t *testing.T) { } } -func TestFilterInputDropUDPPort(t *testing.T) { - if err := singleTest(FilterInputDropUDPPort{}); err != nil { - t.Fatal(err) - } -} - -func TestFilterInputDropDifferentUDPPort(t *testing.T) { - if err := singleTest(FilterInputDropDifferentUDPPort{}); err != nil { - t.Fatal(err) - } -} +// func TestFilterInputDropUDPPort(t *testing.T) { +// if err := singleTest(FilterInputDropUDPPort{}); err != nil { +// t.Fatal(err) +// } +// } + +// func TestFilterInputDropDifferentUDPPort(t *testing.T) { +// if err := singleTest(FilterInputDropDifferentUDPPort{}); err != nil { +// t.Fatal(err) +// } +// } -- cgit v1.2.3 From 1e1921e2acdb7357972257219fdffb9edf17bf55 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Wed, 8 Jan 2020 11:15:46 -0800 Subject: Minor fixes to comments and logging --- pkg/sentry/socket/netfilter/netfilter.go | 10 +++++++--- pkg/sentry/socket/netstack/netstack.go | 8 +++++--- pkg/sentry/syscalls/linux/sys_socket.go | 2 +- pkg/tcpip/iptables/targets.go | 2 +- pkg/tcpip/iptables/types.go | 28 ++++++---------------------- test/iptables/filter_input.go | 3 +-- test/iptables/iptables_test.go | 22 +++++++++++----------- 7 files changed, 32 insertions(+), 43 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index 8c7f3c7fc..b7867a576 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -157,9 +157,7 @@ func convertNetstackToBinary(name string, table iptables.Table) (linux.KernelIPT meta.HookEntry[hook] = entries.Size } } - // Is this a chain underflow point? The underflow rule is the last rule - // in the chain, and is an unconditional rule (i.e. it matches any - // packet). This is enforced when saving iptables. + // Is this a chain underflow point? for underflow, underflowRuleIdx := range table.Underflows { if underflowRuleIdx == ruleIdx { meta.Underflow[underflow] = entries.Size @@ -290,6 +288,7 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { // Get the basic rules data (struct ipt_replace). if len(optVal) < linux.SizeOfIPTReplace { + log.Infof("netfilter.SetEntries: optVal has insufficient size for replace %d", len(optVal)) return syserr.ErrInvalidArgument } var replace linux.IPTReplace @@ -313,6 +312,7 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { for entryIdx := uint32(0); entryIdx < replace.NumEntries; entryIdx++ { // Get the struct ipt_entry. if len(optVal) < linux.SizeOfIPTEntry { + log.Infof("netfilter: optVal has insufficient size for entry %d", len(optVal)) return syserr.ErrInvalidArgument } var entry linux.IPTEntry @@ -328,6 +328,7 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { // filtering. We reject any nonzero IPTIP values for now. emptyIPTIP := linux.IPTIP{} if entry.IP != emptyIPTIP { + log.Infof("netfilter: non-empty struct iptip found") return syserr.ErrInvalidArgument } @@ -386,6 +387,7 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { // along with the number of bytes it occupies in optVal. func parseTarget(optVal []byte) (iptables.Target, uint32, *syserr.Error) { if len(optVal) < linux.SizeOfXTEntryTarget { + log.Infof("netfilter: optVal has insufficient size for entry target %d", len(optVal)) return nil, 0, syserr.ErrInvalidArgument } var target linux.XTEntryTarget @@ -395,6 +397,7 @@ func parseTarget(optVal []byte) (iptables.Target, uint32, *syserr.Error) { case "": // Standard target. if len(optVal) < linux.SizeOfXTStandardTarget { + log.Infof("netfilter.SetEntries: optVal has insufficient size for standard target %d", len(optVal)) return nil, 0, syserr.ErrInvalidArgument } var target linux.XTStandardTarget @@ -420,6 +423,7 @@ func parseTarget(optVal []byte) (iptables.Target, uint32, *syserr.Error) { case errorTargetName: // Error target. if len(optVal) < linux.SizeOfXTErrorTarget { + log.Infof("netfilter.SetEntries: optVal has insufficient size for error target %d", len(optVal)) return nil, 0, syserr.ErrInvalidArgument } var target linux.XTErrorTarget diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index f7caa45b4..8c07eef4b 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -326,7 +326,7 @@ func AddressAndFamily(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, } family := usermem.ByteOrder.Uint16(addr) - if family != uint16(sfamily) && (!strict && family != linux.AF_UNSPEC) { + if family != uint16(sfamily) && (strict || family != linux.AF_UNSPEC) { return tcpip.FullAddress{}, family, syserr.ErrAddressFamilyNotSupported } @@ -1357,7 +1357,8 @@ func (s *SocketOperations) SetSockOpt(t *kernel.Task, level int, name int, optVa } if s.skType == linux.SOCK_RAW && level == linux.IPPROTO_IP { - if name == linux.IPT_SO_SET_REPLACE { + switch name { + case linux.IPT_SO_SET_REPLACE: if len(optVal) < linux.SizeOfIPTReplace { return syserr.ErrInvalidArgument } @@ -1371,7 +1372,8 @@ func (s *SocketOperations) SetSockOpt(t *kernel.Task, level int, name int, optVa return err } return nil - } else if name == linux.IPT_SO_SET_ADD_COUNTERS { + + case linux.IPT_SO_SET_ADD_COUNTERS: // TODO(gvisor.dev/issue/170): Counter support. return nil } diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go index 4b5aafcc0..cda517a81 100644 --- a/pkg/sentry/syscalls/linux/sys_socket.go +++ b/pkg/sentry/syscalls/linux/sys_socket.go @@ -41,7 +41,7 @@ const maxListenBacklog = 1024 const maxAddrLen = 200 // maxOptLen is the maximum sockopt parameter length we're willing to accept. -const maxOptLen = 1024 +const maxOptLen = 1024 * 8 // maxControlLen is the maximum length of the msghdr.msg_control buffer we're // willing to accept. Note that this limit is smaller than Linux, which allows diff --git a/pkg/tcpip/iptables/targets.go b/pkg/tcpip/iptables/targets.go index 03c9f19ff..2c3598e3d 100644 --- a/pkg/tcpip/iptables/targets.go +++ b/pkg/tcpip/iptables/targets.go @@ -34,7 +34,7 @@ func (UnconditionalDropTarget) Action(packet buffer.VectorisedView) (Verdict, st return Drop, "" } -// PanicTarget just panics. +// PanicTarget just panics. It represents a target that should be unreachable. type PanicTarget struct{} // Actions implements Target.Action. diff --git a/pkg/tcpip/iptables/types.go b/pkg/tcpip/iptables/types.go index 76364ff1f..fe0394a31 100644 --- a/pkg/tcpip/iptables/types.go +++ b/pkg/tcpip/iptables/types.go @@ -107,20 +107,19 @@ type IPTables struct { Priorities map[Hook][]string } -// A Table defines a set of chains and hooks into the network stack. The -// currently supported tables are: -// * nat -// * mangle +// A Table defines a set of chains and hooks into the network stack. It is +// really just a list of rules with some metadata for entrypoints and such. type Table struct { - // A table is just a list of rules with some entrypoints. + // Rules holds the rules that make up the table. Rules []Rule + // BuiltinChains maps builtin chains to their entrypoints. BuiltinChains map[Hook]int + // Underflows maps builtin chains to their underflow point (i.e. the + // rule to execute if the chain returns without a verdict). Underflows map[Hook]int - // DefaultTargets map[Hook]int - // UserChains holds user-defined chains for the keyed by name. Users // can give their chains arbitrary names. UserChains map[string]int @@ -149,21 +148,6 @@ func (table *Table) SetMetadata(metadata interface{}) { table.metadata = metadata } -//// A Chain defines a list of rules for packet processing. When a packet -//// traverses a chain, it is checked against each rule until either a rule -//// returns a verdict or the chain ends. -//// -//// By convention, builtin chains end with a rule that matches everything and -//// returns either Accept or Drop. User-defined chains end with Return. These -//// aren't strictly necessary here, but the iptables tool writes tables this way. -//type Chain struct { -// // Name is the chain name. -// Name string - -// // Rules is the list of rules to traverse. -// Rules []Rule -//} - // A Rule is a packet processing rule. It consists of two pieces. First it // contains zero or more matchers, each of which is a specification of which // packets this rule applies to. If there are no matchers in the rule, it diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index 0cb668635..34a85db97 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -43,8 +43,7 @@ func (FilterInputDropUDP) Name() string { // ContainerAction implements TestCase.ContainerAction. func (FilterInputDropUDP) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "INPUT", "-p", "udp", "-j", "DROP"); err != nil { - // if err := filterTable("-A", "INPUT", "-j", "ACCEPT"); err != nil { + if err := filterTable("-A", "INPUT", "-j", "ACCEPT"); err != nil { return err } diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index e761e0f2f..2465a4e65 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -167,14 +167,14 @@ func TestFilterInputDropUDP(t *testing.T) { } } -// func TestFilterInputDropUDPPort(t *testing.T) { -// if err := singleTest(FilterInputDropUDPPort{}); err != nil { -// t.Fatal(err) -// } -// } - -// func TestFilterInputDropDifferentUDPPort(t *testing.T) { -// if err := singleTest(FilterInputDropDifferentUDPPort{}); err != nil { -// t.Fatal(err) -// } -// } +func TestFilterInputDropUDPPort(t *testing.T) { + if err := singleTest(FilterInputDropUDPPort{}); err != nil { + t.Fatal(err) + } +} + +func TestFilterInputDropDifferentUDPPort(t *testing.T) { + if err := singleTest(FilterInputDropDifferentUDPPort{}); err != nil { + t.Fatal(err) + } +} -- cgit v1.2.3 From 899309c4ebadd0e4e22fbc4da12fbd6719d68a3a Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Wed, 8 Jan 2020 11:16:41 -0800 Subject: Revert filter_input change --- test/iptables/filter_input.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index 34a85db97..923f44e68 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -43,7 +43,7 @@ func (FilterInputDropUDP) Name() string { // ContainerAction implements TestCase.ContainerAction. func (FilterInputDropUDP) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "INPUT", "-j", "ACCEPT"); err != nil { + if err := filterTable("-A", "INPUT", "-p", "udp", "-j", "DROP"); err != nil { return err } -- cgit v1.2.3 From 2f02e15e54d41d72eb5815a98b49c265977c6507 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Wed, 8 Jan 2020 11:17:15 -0800 Subject: Newline --- test/iptables/iptables_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'test') diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 2465a4e65..23d15bf71 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -15,6 +15,7 @@ package iptables import ( + "flag" "fmt" "net" "os" @@ -22,8 +23,6 @@ import ( "testing" "time" - "flag" - "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/runsc/dockerutil" "gvisor.dev/gvisor/runsc/testutil" -- cgit v1.2.3 From 447f64c561e6b5893c1bbae7d641187b7aca64ac Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Wed, 8 Jan 2020 12:48:17 -0800 Subject: Added test for unconditional DROP on the filter INPUT chain --- test/iptables/filter_input.go | 32 ++++++++++++++++++++++++++++++++ test/iptables/iptables_test.go | 6 ++++++ 2 files changed, 38 insertions(+) (limited to 'test') diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index 923f44e68..1723a4d3e 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -31,6 +31,7 @@ func init() { RegisterTestCase(FilterInputDropUDP{}) RegisterTestCase(FilterInputDropUDPPort{}) RegisterTestCase(FilterInputDropDifferentUDPPort{}) + RegisterTestCase(FilterInputDropAll{}) } // FilterInputDropUDP tests that we can drop UDP traffic. @@ -122,3 +123,34 @@ func (FilterInputDropDifferentUDPPort) ContainerAction(ip net.IP) error { func (FilterInputDropDifferentUDPPort) LocalAction(ip net.IP) error { return sendUDPLoop(ip, acceptPort, sendloopDuration) } + +// FilterInputDropAll tests that we can drop all traffic to the INPUT chain. +type FilterInputDropAll struct{} + +// Name implements TestCase.Name. +func (FilterInputDropAll) Name() string { + return "FilterInputDropAll" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterInputDropAll) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "INPUT", "-j", "DROP"); err != nil { + return err + } + + // Listen for All packets on dropPort. + if err := listenUDP(dropPort, sendloopDuration); err == nil { + return fmt.Errorf("packets should have been dropped, but got a packet") + } else if netErr, ok := err.(net.Error); !ok || !netErr.Timeout() { + return fmt.Errorf("error reading: %v", err) + } + + // At this point we know that reading timed out and never received a + // packet. + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (FilterInputDropAll) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, dropPort, sendloopDuration) +} diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 23d15bf71..5927eb017 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -177,3 +177,9 @@ func TestFilterInputDropDifferentUDPPort(t *testing.T) { t.Fatal(err) } } + +func TestFilterInputDropAll(t *testing.T) { + if err := singleTest(FilterInputDropAll{}); err != nil { + t.Fatal(err) + } +} -- cgit v1.2.3 From d01240d871c8737989b1af27c137f6ae40bc6d37 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Wed, 8 Jan 2020 13:52:56 -0800 Subject: Take addresses as const PiperOrigin-RevId: 288767927 --- test/syscalls/linux/ip_socket_test_util.cc | 10 +++++----- test/syscalls/linux/ip_socket_test_util.h | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/ip_socket_test_util.cc b/test/syscalls/linux/ip_socket_test_util.cc index 8398fc95f..6b472eb2f 100644 --- a/test/syscalls/linux/ip_socket_test_util.cc +++ b/test/syscalls/linux/ip_socket_test_util.cc @@ -187,24 +187,24 @@ PosixErrorOr IfAddrHelper::GetIndex(std::string name) { return InterfaceIndex(name); } -std::string GetAddr4Str(in_addr* a) { +std::string GetAddr4Str(const in_addr* a) { char str[INET_ADDRSTRLEN]; inet_ntop(AF_INET, a, str, sizeof(str)); return std::string(str); } -std::string GetAddr6Str(in6_addr* a) { +std::string GetAddr6Str(const in6_addr* a) { char str[INET6_ADDRSTRLEN]; inet_ntop(AF_INET6, a, str, sizeof(str)); return std::string(str); } -std::string GetAddrStr(sockaddr* a) { +std::string GetAddrStr(const sockaddr* a) { if (a->sa_family == AF_INET) { - auto src = &(reinterpret_cast(a)->sin_addr); + auto src = &(reinterpret_cast(a)->sin_addr); return GetAddr4Str(src); } else if (a->sa_family == AF_INET6) { - auto src = &(reinterpret_cast(a)->sin6_addr); + auto src = &(reinterpret_cast(a)->sin6_addr); return GetAddr6Str(src); } return std::string(""); diff --git a/test/syscalls/linux/ip_socket_test_util.h b/test/syscalls/linux/ip_socket_test_util.h index 9cb4566db..0f58e0f77 100644 --- a/test/syscalls/linux/ip_socket_test_util.h +++ b/test/syscalls/linux/ip_socket_test_util.h @@ -105,14 +105,14 @@ class IfAddrHelper { }; // GetAddr4Str returns the given IPv4 network address structure as a string. -std::string GetAddr4Str(in_addr* a); +std::string GetAddr4Str(const in_addr* a); // GetAddr6Str returns the given IPv6 network address structure as a string. -std::string GetAddr6Str(in6_addr* a); +std::string GetAddr6Str(const in6_addr* a); // GetAddrStr returns the given IPv4 or IPv6 network address structure as a // string. -std::string GetAddrStr(sockaddr* a); +std::string GetAddrStr(const sockaddr* a); } // namespace testing } // namespace gvisor -- cgit v1.2.3 From b2a881784c8e525c1fea71c6f23663413d107f05 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Wed, 8 Jan 2020 14:48:47 -0800 Subject: Built dead-simple traversal, but now getting depedency cycle error :'( --- pkg/sentry/socket/netfilter/netfilter.go | 4 +++ pkg/tcpip/iptables/BUILD | 4 ++- pkg/tcpip/iptables/iptables.go | 59 ++++++++++++++++++++++++++++++++ pkg/tcpip/network/ipv4/ipv4.go | 6 ++++ test/iptables/filter_input.go | 2 +- 5 files changed, 73 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index e4c493141..57785220e 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -368,6 +368,10 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { } } + // TODO(gvisor.dev/issue/170): Check the following conditions: + // - There are no loops. + // - There are no chains without an unconditional final rule. + ipt := stack.IPTables() table.SetMetadata(metadata{ HookEntry: replace.HookEntry, diff --git a/pkg/tcpip/iptables/BUILD b/pkg/tcpip/iptables/BUILD index cc5f531e2..6ed7c6da0 100644 --- a/pkg/tcpip/iptables/BUILD +++ b/pkg/tcpip/iptables/BUILD @@ -11,5 +11,7 @@ go_library( ], importpath = "gvisor.dev/gvisor/pkg/tcpip/iptables", visibility = ["//visibility:public"], - deps = ["//pkg/tcpip/buffer"], + deps = [ + "//pkg/tcpip", + ], ) diff --git a/pkg/tcpip/iptables/iptables.go b/pkg/tcpip/iptables/iptables.go index 9e7005374..025a4679d 100644 --- a/pkg/tcpip/iptables/iptables.go +++ b/pkg/tcpip/iptables/iptables.go @@ -16,6 +16,8 @@ // tool. package iptables +import "github.com/google/netstack/tcpip" + const ( TablenameNat = "nat" TablenameMangle = "mangle" @@ -121,3 +123,60 @@ func EmptyFilterTable() Table { UserChains: map[string]int{}, } } + +// Check runs pkt through the rules for hook. It returns true when the packet +// should continue traversing the network stack and false when it should be +// dropped. +func (it *IPTables) Check(hook Hook, pkt tcpip.PacketBuffer) bool { + // TODO(gvisor.dev/issue/170): A lot of this is uncomplicated because + // we're missing features. Jumps, the call stack, etc. aren't checked + // for yet because we're yet to support them. + log.Infof("kevin: iptables.IPTables: checking hook %v", hook) + + // Go through each table containing the hook. + for _, tablename := range it.Priorities[hook] { + verdict := it.checkTable(tablename) + switch verdict { + // TODO: We either got a final verdict or simply continue on. + } + } +} + +func (it *IPTables) checkTable(hook Hook, pkt tcpip.PacketBuffer, tablename string) bool { + log.Infof("kevin: iptables.IPTables: checking table %q", tablename) + table := it.Tables[tablename] + ruleIdx := table.BuiltinChains[hook] + + // Start from ruleIdx and go down until a rule gives us a verdict. + for ruleIdx := table.BuiltinChains[hook]; ruleIdx < len(table.Rules); ruleIdx++ { + verdict := checkRule(hook, pkt, table, ruleIdx) + switch verdict { + case Accept, Drop: + return verdict + case Continue: + continue + case Stolen, Queue, Repeat, None, Jump, Return: + } + } + + panic("Traversed past the entire list of iptables rules.") +} + +func (it *IPTables) checkRule(hook Hook, pkt tcpip.PacketBuffer, table Table, ruleIdx int) Verdict { + rule := table.Rules[ruleIdx] + // Go through each rule matcher. If they all match, run + // the rule target. + for _, matcher := range rule.Matchers { + matches, hotdrop := matcher.Match(hook, pkt, "") + if hotdrop { + return Drop + } + if !matches { + return Continue + } + } + + // All the matchers matched, so run the target. + verdict, _ := rule.Target.Action(pkt) + return verdict +} diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go index e645cf62c..bbb5aafee 100644 --- a/pkg/tcpip/network/ipv4/ipv4.go +++ b/pkg/tcpip/network/ipv4/ipv4.go @@ -350,6 +350,12 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt tcpip.PacketBuffer) { } pkt.NetworkHeader = headerView[:h.HeaderLength()] + // iptables filtering. + if ok := iptables.Check(iptables.Input, pkt); !ok { + // iptables is telling us to drop the packet. + return + } + hlen := int(h.HeaderLength()) tlen := int(h.TotalLength()) pkt.Data.TrimFront(hlen) diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index 1723a4d3e..7c4d469fa 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -138,7 +138,7 @@ func (FilterInputDropAll) ContainerAction(ip net.IP) error { return err } - // Listen for All packets on dropPort. + // Listen for all packets on dropPort. if err := listenUDP(dropPort, sendloopDuration); err == nil { return fmt.Errorf("packets should have been dropped, but got a packet") } else if netErr, ok := err.(net.Error); !ok || !netErr.Timeout() { -- cgit v1.2.3 From b3ae8a62cfdf13821d35467d4150ed983ac556f1 Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Wed, 8 Jan 2020 16:29:12 -0800 Subject: Fix slice bounds out of range panic in parsing socket control message. Panic found by syzakller. PiperOrigin-RevId: 288799046 --- pkg/sentry/socket/control/control.go | 6 ++++++ test/syscalls/linux/socket_ip_unbound.cc | 33 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'test') diff --git a/pkg/sentry/socket/control/control.go b/pkg/sentry/socket/control/control.go index af1a4e95f..4301b697c 100644 --- a/pkg/sentry/socket/control/control.go +++ b/pkg/sentry/socket/control/control.go @@ -471,6 +471,9 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte) (socket.Con case linux.SOL_IP: switch h.Type { case linux.IP_TOS: + if length < linux.SizeOfControlMessageTOS { + return socket.ControlMessages{}, syserror.EINVAL + } cmsgs.IP.HasTOS = true binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageTOS], usermem.ByteOrder, &cmsgs.IP.TOS) i += AlignUp(length, width) @@ -481,6 +484,9 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte) (socket.Con case linux.SOL_IPV6: switch h.Type { case linux.IPV6_TCLASS: + if length < linux.SizeOfControlMessageTClass { + return socket.ControlMessages{}, syserror.EINVAL + } cmsgs.IP.HasTClass = true binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageTClass], usermem.ByteOrder, &cmsgs.IP.TClass) i += AlignUp(length, width) diff --git a/test/syscalls/linux/socket_ip_unbound.cc b/test/syscalls/linux/socket_ip_unbound.cc index b6754111f..ca597e267 100644 --- a/test/syscalls/linux/socket_ip_unbound.cc +++ b/test/syscalls/linux/socket_ip_unbound.cc @@ -129,6 +129,7 @@ TEST_P(IPUnboundSocketTest, InvalidNegativeTtl) { struct TOSOption { int level; int option; + int cmsg_level; }; constexpr int INET_ECN_MASK = 3; @@ -139,10 +140,12 @@ static TOSOption GetTOSOption(int domain) { case AF_INET: opt.level = IPPROTO_IP; opt.option = IP_TOS; + opt.cmsg_level = SOL_IP; break; case AF_INET6: opt.level = IPPROTO_IPV6; opt.option = IPV6_TCLASS; + opt.cmsg_level = SOL_IPV6; break; } return opt; @@ -386,6 +389,36 @@ TEST_P(IPUnboundSocketTest, NullTOS) { SyscallFailsWithErrno(EFAULT)); } +TEST_P(IPUnboundSocketTest, InsufficientBufferTOS) { + SKIP_IF(GetParam().protocol == IPPROTO_TCP); + + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + TOSOption t = GetTOSOption(GetParam().domain); + + in_addr addr4; + in6_addr addr6; + ASSERT_THAT(inet_pton(AF_INET, "127.0.0.1", &addr4), ::testing::Eq(1)); + ASSERT_THAT(inet_pton(AF_INET6, "fe80::", &addr6), ::testing::Eq(1)); + + cmsghdr cmsg = {}; + cmsg.cmsg_len = sizeof(cmsg); + cmsg.cmsg_level = t.cmsg_level; + cmsg.cmsg_type = t.option; + + msghdr msg = {}; + msg.msg_control = &cmsg; + msg.msg_controllen = sizeof(cmsg); + if (GetParam().domain == AF_INET) { + msg.msg_name = &addr4; + msg.msg_namelen = sizeof(addr4); + } else { + msg.msg_name = &addr6; + msg.msg_namelen = sizeof(addr6); + } + + EXPECT_THAT(sendmsg(socket->get(), &msg, 0), SyscallFailsWithErrno(EINVAL)); +} + INSTANTIATE_TEST_SUITE_P( IPUnboundSockets, IPUnboundSocketTest, ::testing::ValuesIn(VecCat(VecCat( -- cgit v1.2.3 From fbb2c008e26a7e9d860f6cbf796ea7c375858502 Mon Sep 17 00:00:00 2001 From: Ian Lewis Date: Wed, 8 Jan 2020 16:35:43 -0800 Subject: Return correct length with MSG_TRUNC for unix sockets. This change calls a new Truncate method on the EndpointReader in RecvMsg for both netlink and unix sockets. This allows readers such as sockets to peek at the length of data without actually reading it to a buffer. Fixes #993 #1240 PiperOrigin-RevId: 288800167 --- pkg/sentry/socket/netlink/BUILD | 1 - pkg/sentry/socket/netlink/socket.go | 29 +++--- pkg/sentry/socket/unix/io.go | 13 +++ pkg/sentry/socket/unix/unix.go | 23 ++++- test/syscalls/linux/BUILD | 1 - test/syscalls/linux/socket_non_stream.cc | 113 +++++++++++++++++++++- test/syscalls/linux/socket_non_stream_blocking.cc | 37 +++++++ test/syscalls/linux/socket_stream.cc | 55 ++++++++++- 8 files changed, 250 insertions(+), 22 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD index 79589e3c8..136821963 100644 --- a/pkg/sentry/socket/netlink/BUILD +++ b/pkg/sentry/socket/netlink/BUILD @@ -22,7 +22,6 @@ go_library( "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/time", - "//pkg/sentry/safemem", "//pkg/sentry/socket", "//pkg/sentry/socket/netlink/port", "//pkg/sentry/socket/unix", diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go index 4a1b87a9a..d2e3644a6 100644 --- a/pkg/sentry/socket/netlink/socket.go +++ b/pkg/sentry/socket/netlink/socket.go @@ -29,7 +29,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/netlink/port" "gvisor.dev/gvisor/pkg/sentry/socket/unix" @@ -500,29 +499,29 @@ func (s *Socket) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, have trunc := flags&linux.MSG_TRUNC != 0 r := unix.EndpointReader{ + Ctx: t, Endpoint: s.ep, Peek: flags&linux.MSG_PEEK != 0, } + doRead := func() (int64, error) { + return dst.CopyOutFrom(t, &r) + } + // If MSG_TRUNC is set with a zero byte destination then we still need // to read the message and discard it, or in the case where MSG_PEEK is // set, leave it be. In both cases the full message length must be - // returned. However, the memory manager for the destination will not read - // the endpoint if the destination is zero length. - // - // In order for the endpoint to be read when the destination size is zero, - // we must cause a read of the endpoint by using a separate fake zero - // length block sequence and calling the EndpointReader directly. + // returned. if trunc && dst.Addrs.NumBytes() == 0 { - // Perform a read to a zero byte block sequence. We can ignore the - // original destination since it was zero bytes. The length returned by - // ReadToBlocks is ignored and we return the full message length to comply - // with MSG_TRUNC. - _, err := r.ReadToBlocks(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(make([]byte, 0)))) - return int(r.MsgSize), linux.MSG_TRUNC, from, fromLen, socket.ControlMessages{}, syserr.FromError(err) + doRead = func() (int64, error) { + err := r.Truncate() + // Always return zero for bytes read since the destination size is + // zero. + return 0, err + } } - if n, err := dst.CopyOutFrom(t, &r); err != syserror.ErrWouldBlock || flags&linux.MSG_DONTWAIT != 0 { + if n, err := doRead(); err != syserror.ErrWouldBlock || flags&linux.MSG_DONTWAIT != 0 { var mflags int if n < int64(r.MsgSize) { mflags |= linux.MSG_TRUNC @@ -540,7 +539,7 @@ func (s *Socket) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, have defer s.EventUnregister(&e) for { - if n, err := dst.CopyOutFrom(t, &r); err != syserror.ErrWouldBlock { + if n, err := doRead(); err != syserror.ErrWouldBlock { var mflags int if n < int64(r.MsgSize) { mflags |= linux.MSG_TRUNC diff --git a/pkg/sentry/socket/unix/io.go b/pkg/sentry/socket/unix/io.go index 2ec1a662d..2447f24ef 100644 --- a/pkg/sentry/socket/unix/io.go +++ b/pkg/sentry/socket/unix/io.go @@ -83,6 +83,19 @@ type EndpointReader struct { ControlTrunc bool } +// Truncate calls RecvMsg on the endpoint without writing to a destination. +func (r *EndpointReader) Truncate() error { + // Ignore bytes read since it will always be zero. + _, ms, c, ct, err := r.Endpoint.RecvMsg(r.Ctx, [][]byte{}, r.Creds, r.NumRights, r.Peek, r.From) + r.Control = c + r.ControlTrunc = ct + r.MsgSize = ms + if err != nil { + return err.ToError() + } + return nil +} + // ReadToBlocks implements safemem.Reader.ReadToBlocks. func (r *EndpointReader) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) { return safemem.FromVecReaderFunc{func(bufs [][]byte) (int64, error) { diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index 885758054..91effe89a 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -544,8 +544,27 @@ func (s *SocketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags if senderRequested { r.From = &tcpip.FullAddress{} } + + doRead := func() (int64, error) { + return dst.CopyOutFrom(t, &r) + } + + // If MSG_TRUNC is set with a zero byte destination then we still need + // to read the message and discard it, or in the case where MSG_PEEK is + // set, leave it be. In both cases the full message length must be + // returned. + if trunc && dst.Addrs.NumBytes() == 0 { + doRead = func() (int64, error) { + err := r.Truncate() + // Always return zero for bytes read since the destination size is + // zero. + return 0, err + } + + } + var total int64 - if n, err := dst.CopyOutFrom(t, &r); err != syserror.ErrWouldBlock || dontWait { + if n, err := doRead(); err != syserror.ErrWouldBlock || dontWait { var from linux.SockAddr var fromLen uint32 if r.From != nil && len([]byte(r.From.Addr)) != 0 { @@ -580,7 +599,7 @@ func (s *SocketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags defer s.EventUnregister(&e) for { - if n, err := dst.CopyOutFrom(t, &r); err != syserror.ErrWouldBlock { + if n, err := doRead(); err != syserror.ErrWouldBlock { var from linux.SockAddr var fromLen uint32 if r.From != nil { diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 064ce8429..ce8abe217 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -2888,7 +2888,6 @@ cc_library( ":unix_domain_socket_test_util", "//test/util:test_util", "//test/util:thread_util", - "//test/util:timer_util", "@com_google_absl//absl/time", "@com_google_googletest//:gtest", ], diff --git a/test/syscalls/linux/socket_non_stream.cc b/test/syscalls/linux/socket_non_stream.cc index d91c5ed39..c61817f14 100644 --- a/test/syscalls/linux/socket_non_stream.cc +++ b/test/syscalls/linux/socket_non_stream.cc @@ -113,7 +113,7 @@ TEST_P(NonStreamSocketPairTest, RecvmsgMsghdrFlagMsgTrunc) { EXPECT_EQ(0, memcmp(received_data, sent_data, sizeof(received_data))); // Check that msghdr flags were updated. - EXPECT_EQ(msg.msg_flags, MSG_TRUNC); + EXPECT_EQ(msg.msg_flags & MSG_TRUNC, MSG_TRUNC); } // Stream sockets allow data sent with multiple sends to be peeked at in a @@ -193,7 +193,7 @@ TEST_P(NonStreamSocketPairTest, MsgTruncTruncationRecvmsgMsghdrFlagMsgTrunc) { EXPECT_EQ(0, memcmp(received_data, sent_data, sizeof(received_data))); // Check that msghdr flags were updated. - EXPECT_EQ(msg.msg_flags, MSG_TRUNC); + EXPECT_EQ(msg.msg_flags & MSG_TRUNC, MSG_TRUNC); } TEST_P(NonStreamSocketPairTest, MsgTruncSameSize) { @@ -224,5 +224,114 @@ TEST_P(NonStreamSocketPairTest, MsgTruncNotFull) { EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); } +// This test tests reading from a socket with MSG_TRUNC and a zero length +// receive buffer. The user should be able to get the message length. +TEST_P(NonStreamSocketPairTest, RecvmsgMsgTruncZeroLen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[10]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + + // The receive buffer is of zero length. + char received_data[0] = {}; + + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + struct msghdr msg = {}; + msg.msg_flags = -1; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + // The syscall succeeds returning the full size of the message on the socket. + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_TRUNC), + SyscallSucceedsWithValue(sizeof(sent_data))); + + // Check that MSG_TRUNC is set on msghdr flags. + EXPECT_EQ(msg.msg_flags & MSG_TRUNC, MSG_TRUNC); +} + +// This test tests reading from a socket with MSG_TRUNC | MSG_PEEK and a zero +// length receive buffer. The user should be able to get the message length +// without reading data off the socket. +TEST_P(NonStreamSocketPairTest, RecvmsgMsgTruncMsgPeekZeroLen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[10]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + + // The receive buffer is of zero length. + char peek_data[0] = {}; + + struct iovec peek_iov; + peek_iov.iov_base = peek_data; + peek_iov.iov_len = sizeof(peek_data); + struct msghdr peek_msg = {}; + peek_msg.msg_flags = -1; + peek_msg.msg_iov = &peek_iov; + peek_msg.msg_iovlen = 1; + + // The syscall succeeds returning the full size of the message on the socket. + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &peek_msg, + MSG_TRUNC | MSG_PEEK), + SyscallSucceedsWithValue(sizeof(sent_data))); + + // Check that MSG_TRUNC is set on msghdr flags because the receive buffer is + // smaller than the message size. + EXPECT_EQ(peek_msg.msg_flags & MSG_TRUNC, MSG_TRUNC); + + char received_data[sizeof(sent_data)] = {}; + + struct iovec received_iov; + received_iov.iov_base = received_data; + received_iov.iov_len = sizeof(received_data); + struct msghdr received_msg = {}; + received_msg.msg_flags = -1; + received_msg.msg_iov = &received_iov; + received_msg.msg_iovlen = 1; + + // Next we can read the actual data. + ASSERT_THAT( + RetryEINTR(recvmsg)(sockets->second_fd(), &received_msg, MSG_TRUNC), + SyscallSucceedsWithValue(sizeof(sent_data))); + + EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); + + // Check that MSG_TRUNC is not set on msghdr flags because we read the whole + // message. + EXPECT_EQ(received_msg.msg_flags & MSG_TRUNC, 0); +} + +// This test tests reading from a socket with MSG_TRUNC | MSG_PEEK and a zero +// length receive buffer and MSG_DONTWAIT. The user should be able to get an +// EAGAIN or EWOULDBLOCK error response. +TEST_P(NonStreamSocketPairTest, RecvmsgTruncPeekDontwaitZeroLen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // NOTE: We don't send any data on the socket. + + // The receive buffer is of zero length. + char peek_data[0] = {}; + + struct iovec peek_iov; + peek_iov.iov_base = peek_data; + peek_iov.iov_len = sizeof(peek_data); + struct msghdr peek_msg = {}; + peek_msg.msg_flags = -1; + peek_msg.msg_iov = &peek_iov; + peek_msg.msg_iovlen = 1; + + // recvmsg fails with EAGAIN because no data is available on the socket. + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &peek_msg, + MSG_TRUNC | MSG_PEEK | MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_non_stream_blocking.cc b/test/syscalls/linux/socket_non_stream_blocking.cc index 62d87c1af..b052f6e61 100644 --- a/test/syscalls/linux/socket_non_stream_blocking.cc +++ b/test/syscalls/linux/socket_non_stream_blocking.cc @@ -25,6 +25,7 @@ #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/test_util.h" +#include "test/util/thread_util.h" namespace gvisor { namespace testing { @@ -44,5 +45,41 @@ TEST_P(BlockingNonStreamSocketPairTest, RecvLessThanBufferWaitAll) { SyscallSucceedsWithValue(sizeof(sent_data))); } +// This test tests reading from a socket with MSG_TRUNC | MSG_PEEK and a zero +// length receive buffer and MSG_DONTWAIT. The recvmsg call should block on +// reading the data. +TEST_P(BlockingNonStreamSocketPairTest, + RecvmsgTruncPeekDontwaitZeroLenBlocking) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // NOTE: We don't initially send any data on the socket. + const int data_size = 10; + char sent_data[data_size]; + RandomizeBuffer(sent_data, data_size); + + // The receive buffer is of zero length. + char peek_data[0] = {}; + + struct iovec peek_iov; + peek_iov.iov_base = peek_data; + peek_iov.iov_len = sizeof(peek_data); + struct msghdr peek_msg = {}; + peek_msg.msg_flags = -1; + peek_msg.msg_iov = &peek_iov; + peek_msg.msg_iovlen = 1; + + ScopedThread t([&]() { + // The syscall succeeds returning the full size of the message on the + // socket. This should block until there is data on the socket. + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &peek_msg, + MSG_TRUNC | MSG_PEEK), + SyscallSucceedsWithValue(data_size)); + }); + + absl::SleepFor(absl::Seconds(1)); + ASSERT_THAT(RetryEINTR(send)(sockets->first_fd(), sent_data, data_size, 0), + SyscallSucceedsWithValue(data_size)); +} + } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_stream.cc b/test/syscalls/linux/socket_stream.cc index 346443f96..6522b2e01 100644 --- a/test/syscalls/linux/socket_stream.cc +++ b/test/syscalls/linux/socket_stream.cc @@ -104,7 +104,60 @@ TEST_P(StreamSocketPairTest, RecvmsgMsghdrFlagsNoMsgTrunc) { EXPECT_EQ(0, memcmp(received_data, sent_data, sizeof(received_data))); // Check that msghdr flags were cleared (MSG_TRUNC was not set). - EXPECT_EQ(msg.msg_flags, 0); + ASSERT_EQ(msg.msg_flags & MSG_TRUNC, 0); +} + +TEST_P(StreamSocketPairTest, RecvmsgTruncZeroLen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[10]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[0] = {}; + + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + struct msghdr msg = {}; + msg.msg_flags = -1; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT(RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_TRUNC), + SyscallSucceedsWithValue(0)); + + // Check that msghdr flags were cleared (MSG_TRUNC was not set). + ASSERT_EQ(msg.msg_flags & MSG_TRUNC, 0); +} + +TEST_P(StreamSocketPairTest, RecvmsgTruncPeekZeroLen) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + char sent_data[10]; + RandomizeBuffer(sent_data, sizeof(sent_data)); + ASSERT_THAT( + RetryEINTR(send)(sockets->first_fd(), sent_data, sizeof(sent_data), 0), + SyscallSucceedsWithValue(sizeof(sent_data))); + + char received_data[0] = {}; + + struct iovec iov; + iov.iov_base = received_data; + iov.iov_len = sizeof(received_data); + struct msghdr msg = {}; + msg.msg_flags = -1; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ASSERT_THAT( + RetryEINTR(recvmsg)(sockets->second_fd(), &msg, MSG_TRUNC | MSG_PEEK), + SyscallSucceedsWithValue(0)); + + // Check that msghdr flags were cleared (MSG_TRUNC was not set). + ASSERT_EQ(msg.msg_flags & MSG_TRUNC, 0); } TEST_P(StreamSocketPairTest, MsgTrunc) { -- cgit v1.2.3 From aeb3a4017b9bc038ebe5630fe270d5ea8691d141 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Wed, 8 Jan 2020 22:10:35 -0800 Subject: Working on filtering by protocol. --- pkg/tcpip/iptables/types.go | 20 ++++++++++++++++++++ test/iptables/iptables_test.go | 34 +++++++++++++++++----------------- 2 files changed, 37 insertions(+), 17 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/iptables/types.go b/pkg/tcpip/iptables/types.go index 9f6906100..4b2a9c294 100644 --- a/pkg/tcpip/iptables/types.go +++ b/pkg/tcpip/iptables/types.go @@ -151,6 +151,9 @@ func (table *Table) SetMetadata(metadata interface{}) { // packets this rule applies to. If there are no matchers in the rule, it // applies to any packet. type Rule struct { + // IPHeaderFilters holds basic IP filtering fields common to every rule. + IPHeaderFilter IPHeaderFilter + // Matchers is the list of matchers for this rule. Matchers []Matcher @@ -158,6 +161,23 @@ type Rule struct { Target Target } +// TODO: This is gross. +// TODO: Save this in SetEntries. +// TODO: Utilize this when traversing tables. +type IPHeaderFilter struct { + Source [4]byte + Destination [4]byte + SourceMask [4]byte + DestinationMask [4]byte + OutputInterface string + InputInterface string + OutputInterfaceMask string + InputInterfaceMask string + Protocol uint16 + Flags uint8 + InverseFlags uint8 +} + // A Matcher is the interface for matching packets. type Matcher interface { // Match returns whether the packet matches and whether the packet diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 5927eb017..d040e971a 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -166,20 +166,20 @@ func TestFilterInputDropUDP(t *testing.T) { } } -func TestFilterInputDropUDPPort(t *testing.T) { - if err := singleTest(FilterInputDropUDPPort{}); err != nil { - t.Fatal(err) - } -} - -func TestFilterInputDropDifferentUDPPort(t *testing.T) { - if err := singleTest(FilterInputDropDifferentUDPPort{}); err != nil { - t.Fatal(err) - } -} - -func TestFilterInputDropAll(t *testing.T) { - if err := singleTest(FilterInputDropAll{}); err != nil { - t.Fatal(err) - } -} +// func TestFilterInputDropUDPPort(t *testing.T) { +// if err := singleTest(FilterInputDropUDPPort{}); err != nil { +// t.Fatal(err) +// } +// } + +// func TestFilterInputDropDifferentUDPPort(t *testing.T) { +// if err := singleTest(FilterInputDropDifferentUDPPort{}); err != nil { +// t.Fatal(err) +// } +// } + +// func TestFilterInputDropAll(t *testing.T) { +// if err := singleTest(FilterInputDropAll{}); err != nil { +// t.Fatal(err) +// } +// } -- cgit v1.2.3 From 6cc8e2d814f99439e01c308e16f6631d75578ec0 Mon Sep 17 00:00:00 2001 From: Nayana Bidari Date: Thu, 9 Jan 2020 10:03:22 -0800 Subject: Add test to check iptables redirect port rule --- test/iptables/filter_input.go | 28 ++++++++++++++++++++++++++++ test/iptables/iptables_test.go | 7 +++++++ 2 files changed, 35 insertions(+) (limited to 'test') diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index 923f44e68..41bb85369 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -23,6 +23,7 @@ import ( const ( dropPort = 2401 acceptPort = 2402 + redirectPort = 42 sendloopDuration = 2 * time.Second network = "udp4" ) @@ -31,6 +32,7 @@ func init() { RegisterTestCase(FilterInputDropUDP{}) RegisterTestCase(FilterInputDropUDPPort{}) RegisterTestCase(FilterInputDropDifferentUDPPort{}) + RegisterTestCase(FilterInputRedirectUDPPort{}) } // FilterInputDropUDP tests that we can drop UDP traffic. @@ -122,3 +124,29 @@ func (FilterInputDropDifferentUDPPort) ContainerAction(ip net.IP) error { func (FilterInputDropDifferentUDPPort) LocalAction(ip net.IP) error { return sendUDPLoop(ip, acceptPort, sendloopDuration) } + +// FilterInputRedirectUDPPort tests that packets are redirected to different port. +type FilterInputRedirectUDPPort struct{} + +// Name implements TestCase.Name. +func (FilterInputRedirectUDPPort) Name() string { + return "FilterInputRedirectUDPPort" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterInputRedirectUDPPort) ContainerAction(ip net.IP) error { + if err := filterTable("-t", "nat", "-A", "PREROUTING", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil { + return err + } + + if err := listenUDP(redirectPort, sendloopDuration); err != nil { + return fmt.Errorf("packets on port %d should be allowed, but encountered an error: %v", acceptPort, redirectPort, err) + } + + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (FilterInputRedirectUDPPort) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index bfbf1bb87..d57ddc0fe 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -177,3 +177,10 @@ func TestFilterInputDropDifferentUDPPort(t *testing.T) { t.Fatal(err) } } + +func TestFilterInputRedirectUDPPort(t *testing.T) { + if err := singleTest(FilterInputRedirectUDPPort{}); err != nil { + t.Fatal(err) + } +} + -- cgit v1.2.3 From 89d11b4d96b0c40e373f14ba72d570c9b894f976 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Thu, 9 Jan 2020 13:41:52 -0800 Subject: Added a test that we don't pass yet --- pkg/sentry/socket/netfilter/netfilter.go | 37 +++++++++++++++++++++++++---- pkg/tcpip/iptables/iptables.go | 7 ++++++ pkg/tcpip/iptables/types.go | 4 ++-- test/iptables/BUILD | 4 ++++ test/iptables/filter_input.go | 30 ++++++++++++++++++++++++ test/iptables/iptables_test.go | 16 +++++++++---- test/iptables/iptables_util.go | 40 ++++++++++++++++++++++++++++++++ test/iptables/runner/BUILD | 1 + 8 files changed, 127 insertions(+), 12 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index 014dfa625..f30461936 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -323,10 +323,9 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { // TODO(gvisor.dev/issue/170): We should support IPTIP // filtering. We reject any nonzero IPTIP values for now. - emptyIPTIP := linux.IPTIP{} - if entry.IP != emptyIPTIP { - log.Warningf("netfilter: non-empty struct iptip found") - return syserr.ErrInvalidArgument + filter, err := filterFromIPTIP(entry.IP) + if err != nil { + return err } // Get the target of the rule. @@ -336,7 +335,10 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { } optVal = optVal[consumed:] - table.Rules = append(table.Rules, iptables.Rule{Target: target}) + table.Rules = append(table.Rules, iptables.Rule{ + Filter: filter, + Target: target, + }) offsets = append(offsets, offset) offset += linux.SizeOfIPTEntry + consumed } @@ -447,6 +449,31 @@ func parseTarget(optVal []byte) (iptables.Target, uint32, *syserr.Error) { return nil, 0, syserr.ErrInvalidArgument } +func filterFromIPTIP(iptip linux.IPTIP) (iptables.IPHeaderFilter, *syserr.Error) { + if containsUnsupportedFields(iptip) { + log.Warningf("netfilter: unsupported fields in struct iptip: %+v") + return iptables.IPHeaderFilter{}, syserr.ErrInvalidArgument + } + return iptables.IPHeaderFilter{ + Protocol: iptip.Protocol, + }, nil +} + +func containsUnsupportedFields(iptip linux.IPTIP) bool { + // Currently we check that everything except protocol is zeroed. + var emptyInetAddr = linux.InetAddr{} + var emptyInterface = [linux.IFNAMSIZ]byte{} + return iptip.Dst != emptyInetAddr || + iptip.SrcMask != emptyInetAddr || + iptip.DstMask != emptyInetAddr || + iptip.InputInterface != emptyInterface || + iptip.OutputInterface != emptyInterface || + iptip.InputInterfaceMask != emptyInterface || + iptip.OutputInterfaceMask != emptyInterface || + iptip.Flags != 0 || + iptip.InverseFlags != 0 +} + func hookFromLinux(hook int) iptables.Hook { switch hook { case linux.NF_INET_PRE_ROUTING: diff --git a/pkg/tcpip/iptables/iptables.go b/pkg/tcpip/iptables/iptables.go index 91abbbea8..b8d70ec1e 100644 --- a/pkg/tcpip/iptables/iptables.go +++ b/pkg/tcpip/iptables/iptables.go @@ -185,6 +185,13 @@ func (it *IPTables) checkTable(hook Hook, pkt tcpip.PacketBuffer, tablename stri func (it *IPTables) checkRule(hook Hook, pkt tcpip.PacketBuffer, table Table, ruleIdx int) Verdict { rule := table.Rules[ruleIdx] + + // First check whether the packet matches the IP header filter. + // TODO(gvisor.dev/issue/170): Support other fields of the filter. + // if rule.Filter.Protocol != pkt.Protocol { + // return Continue + // } + // Go through each rule matcher. If they all match, run // the rule target. for _, matcher := range rule.Matchers { diff --git a/pkg/tcpip/iptables/types.go b/pkg/tcpip/iptables/types.go index 4b2a9c294..4bedd9bc8 100644 --- a/pkg/tcpip/iptables/types.go +++ b/pkg/tcpip/iptables/types.go @@ -151,8 +151,8 @@ func (table *Table) SetMetadata(metadata interface{}) { // packets this rule applies to. If there are no matchers in the rule, it // applies to any packet. type Rule struct { - // IPHeaderFilters holds basic IP filtering fields common to every rule. - IPHeaderFilter IPHeaderFilter + // IPHeaderFilter holds basic IP filtering fields common to every rule. + Filter IPHeaderFilter // Matchers is the list of matchers for this rule. Matchers []Matcher diff --git a/test/iptables/BUILD b/test/iptables/BUILD index fa833c3b2..6a9d05828 100644 --- a/test/iptables/BUILD +++ b/test/iptables/BUILD @@ -4,6 +4,7 @@ package(licenses = ["notice"]) go_library( name = "iptables", + testonly = 1, srcs = [ "filter_input.go", "iptables.go", @@ -11,6 +12,9 @@ go_library( ], importpath = "gvisor.dev/gvisor/test/iptables", visibility = ["//test/iptables:__subpackages__"], + deps = [ + "//runsc/testutil", + ], ) go_test( diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index 7c4d469fa..a3f0052b5 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -28,6 +28,7 @@ const ( ) func init() { + RegisterTestCase(FilterInputDropOnlyUDP{}) RegisterTestCase(FilterInputDropUDP{}) RegisterTestCase(FilterInputDropUDPPort{}) RegisterTestCase(FilterInputDropDifferentUDPPort{}) @@ -65,6 +66,35 @@ func (FilterInputDropUDP) LocalAction(ip net.IP) error { return sendUDPLoop(ip, dropPort, sendloopDuration) } +// FilterInputDropOnlyUDP tests that "-p udp -j DROP" only affects UDP traffic. +type FilterInputDropOnlyUDP struct{} + +// Name implements TestCase.Name. +func (FilterInputDropOnlyUDP) Name() string { + return "FilterInputDropOnlyUDP" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterInputDropOnlyUDP) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "INPUT", "-p", "udp", "-j", "DROP"); err != nil { + return err + } + + // Listen for a TCP connection, which should be allowed. + if err := listenTCP(acceptPort, sendloopDuration); err != nil { + return fmt.Errorf("failed to establish a connection %v", err) + } + + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (FilterInputDropOnlyUDP) LocalAction(ip net.IP) error { + // Try to establish a TCP connection with the container, which should + // succeed. + return connectLoopTCP(ip, acceptPort, sendloopDuration) +} + // FilterInputDropUDPPort tests that we can drop UDP traffic by port. type FilterInputDropUDPPort struct{} diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index d040e971a..beaaf519c 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -160,11 +160,11 @@ func logContainer(output string, err error) { log.Infof(msg) } -func TestFilterInputDropUDP(t *testing.T) { - if err := singleTest(FilterInputDropUDP{}); err != nil { - t.Fatal(err) - } -} +// func TestFilterInputDropUDP(t *testing.T) { +// if err := singleTest(FilterInputDropUDP{}); err != nil { +// t.Fatal(err) +// } +// } // func TestFilterInputDropUDPPort(t *testing.T) { // if err := singleTest(FilterInputDropUDPPort{}); err != nil { @@ -183,3 +183,9 @@ func TestFilterInputDropUDP(t *testing.T) { // t.Fatal(err) // } // } + +func TestFilterInputDropOnlyUDP(t *testing.T) { + if err := singleTest(FilterInputDropOnlyUDP{}); err != nil { + t.Fatal(err) + } +} diff --git a/test/iptables/iptables_util.go b/test/iptables/iptables_util.go index 3a4d11f1a..3dcaafb79 100644 --- a/test/iptables/iptables_util.go +++ b/test/iptables/iptables_util.go @@ -19,6 +19,8 @@ import ( "net" "os/exec" "time" + + "gvisor.dev/gvisor/runsc/testutil" ) const iptablesBinary = "iptables" @@ -80,3 +82,41 @@ func sendUDPLoop(ip net.IP, port int, duration time.Duration) error { return nil } + +func listenTCP(port int, timeout time.Duration) error { + localAddr := net.TCPAddr{Port: acceptPort} + listener, err := net.ListenTCP("tcp4", &localAddr) + if err != nil { + return err + } + defer listener.Close() + listener.SetDeadline(time.Now().Add(timeout)) + conn, err := listener.AcceptTCP() + if err != nil { + return fmt.Errorf("failed to establish a connection %v", err) + } + defer conn.Close() + + return nil +} + +func connectLoopTCP(ip net.IP, port int, timeout time.Duration) error { + contAddr := net.TCPAddr{ + IP: ip, + Port: port, + } + // The container may not be listening when we first connect, so retry + // upon error. + cb := func() error { + conn, err := net.DialTCP("tcp4", nil, &contAddr) + if conn != nil { + conn.Close() + } + return err + } + if err := testutil.Poll(cb, timeout); err != nil { + return fmt.Errorf("timed out waiting to send IP, most recent error: %v", err) + } + + return nil +} diff --git a/test/iptables/runner/BUILD b/test/iptables/runner/BUILD index c6c42d870..a5b6f082c 100644 --- a/test/iptables/runner/BUILD +++ b/test/iptables/runner/BUILD @@ -10,6 +10,7 @@ container_image( go_image( name = "runner", + testonly = 1, srcs = ["main.go"], base = ":iptables-base", deps = ["//test/iptables"], -- cgit v1.2.3 From 04abc9cf558930472605bf740a4333d6fafe5930 Mon Sep 17 00:00:00 2001 From: Nayana Bidari Date: Thu, 9 Jan 2020 15:38:28 -0800 Subject: Add test for redirect port Fix the indentation and print statements. Moved the NAT redirect tests to new file. Added negative test to check redirect rule on ports other than redirected port. --- test/iptables/BUILD | 1 + test/iptables/filter_input.go | 28 -------------- test/iptables/iptables_test.go | 9 ++++- test/iptables/nat.go | 83 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 30 deletions(-) create mode 100644 test/iptables/nat.go (limited to 'test') diff --git a/test/iptables/BUILD b/test/iptables/BUILD index fa833c3b2..68eed721e 100644 --- a/test/iptables/BUILD +++ b/test/iptables/BUILD @@ -8,6 +8,7 @@ go_library( "filter_input.go", "iptables.go", "iptables_util.go", + "nat.go", ], importpath = "gvisor.dev/gvisor/test/iptables", visibility = ["//test/iptables:__subpackages__"], diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index 41bb85369..923f44e68 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -23,7 +23,6 @@ import ( const ( dropPort = 2401 acceptPort = 2402 - redirectPort = 42 sendloopDuration = 2 * time.Second network = "udp4" ) @@ -32,7 +31,6 @@ func init() { RegisterTestCase(FilterInputDropUDP{}) RegisterTestCase(FilterInputDropUDPPort{}) RegisterTestCase(FilterInputDropDifferentUDPPort{}) - RegisterTestCase(FilterInputRedirectUDPPort{}) } // FilterInputDropUDP tests that we can drop UDP traffic. @@ -124,29 +122,3 @@ func (FilterInputDropDifferentUDPPort) ContainerAction(ip net.IP) error { func (FilterInputDropDifferentUDPPort) LocalAction(ip net.IP) error { return sendUDPLoop(ip, acceptPort, sendloopDuration) } - -// FilterInputRedirectUDPPort tests that packets are redirected to different port. -type FilterInputRedirectUDPPort struct{} - -// Name implements TestCase.Name. -func (FilterInputRedirectUDPPort) Name() string { - return "FilterInputRedirectUDPPort" -} - -// ContainerAction implements TestCase.ContainerAction. -func (FilterInputRedirectUDPPort) ContainerAction(ip net.IP) error { - if err := filterTable("-t", "nat", "-A", "PREROUTING", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil { - return err - } - - if err := listenUDP(redirectPort, sendloopDuration); err != nil { - return fmt.Errorf("packets on port %d should be allowed, but encountered an error: %v", acceptPort, redirectPort, err) - } - - return nil -} - -// LocalAction implements TestCase.LocalAction. -func (FilterInputRedirectUDPPort) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, acceptPort, sendloopDuration) -} diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index d57ddc0fe..fce9247aa 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -178,9 +178,14 @@ func TestFilterInputDropDifferentUDPPort(t *testing.T) { } } -func TestFilterInputRedirectUDPPort(t *testing.T) { - if err := singleTest(FilterInputRedirectUDPPort{}); err != nil { +func TestFilterNATRedirectUDPPort(t *testing.T) { + if err := singleTest(FilterNATRedirectUDPPort{}); err != nil { t.Fatal(err) } } +func TestFilterNATDropUDP(t *testing.T) { + if err := singleTest(FilterNATDropUDP{}); err != nil { + t.Fatal(err) + } +} diff --git a/test/iptables/nat.go b/test/iptables/nat.go new file mode 100644 index 000000000..6deabf217 --- /dev/null +++ b/test/iptables/nat.go @@ -0,0 +1,83 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iptables + +import ( + "fmt" + "net" +) + +const ( + redirectPort = 42 +) + +func init() { + RegisterTestCase(FilterNATRedirectUDPPort{}) + RegisterTestCase(FilterNATDropUDP{}) +} + +// FilterInputRedirectUDPPort tests that packets are redirected to different port. +type FilterNATRedirectUDPPort struct{} + +// Name implements TestCase.Name. +func (FilterNATRedirectUDPPort) Name() string { + return "FilterNATRedirectUDPPort" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterNATRedirectUDPPort) ContainerAction(ip net.IP) error { + if err := filterTable("-t", "nat", "-A", "PREROUTING", "-p", "udp", "-j", "REDIRECT", "--to-ports", + fmt.Sprintf("%d", redirectPort)); err != nil { + return err + } + + if err := listenUDP(redirectPort, sendloopDuration); err != nil { + return fmt.Errorf("packets on port %d should be allowed, but encountered an error: %v", redirectPort, err) + } + + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (FilterNATRedirectUDPPort) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} + +// FilterNATDropUDP tests that packets are not received in ports other than redirect port. +type FilterNATDropUDP struct{} + +// Name implements TestCase.Name. +func (FilterNATDropUDP) Name() string { + return "FilterNATDropUDP" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterNATDropUDP) ContainerAction(ip net.IP) error { + if err := filterTable("-t", "nat", "-A", "PREROUTING", "-p", "udp", "-j", "REDIRECT", "--to-ports", + fmt.Sprintf("%d", redirectPort)); err != nil { + return err + } + + if err := listenUDP(acceptPort, sendloopDuration); err == nil { + return fmt.Errorf("packets on port %d should have been redirected to port %d", acceptPort, redirectPort) + } + + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (FilterNATDropUDP) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} -- cgit v1.2.3 From 356d81146bafc4b4548163eb87e886c851b49e12 Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Thu, 9 Jan 2020 17:56:58 -0800 Subject: Deflake a couple of TCP syscall tests when run under gotsan. PiperOrigin-RevId: 289010316 --- .../linux/socket_bind_to_device_distribution.cc | 25 ++++++++++++++++++-- test/syscalls/linux/socket_inet_loopback.cc | 27 +++++++++++++++++++--- 2 files changed, 47 insertions(+), 5 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/socket_bind_to_device_distribution.cc b/test/syscalls/linux/socket_bind_to_device_distribution.cc index 5767181a1..5ed57625c 100644 --- a/test/syscalls/linux/socket_bind_to_device_distribution.cc +++ b/test/syscalls/linux/socket_bind_to_device_distribution.cc @@ -183,7 +183,14 @@ TEST_P(BindToDeviceDistributionTest, Tcp) { } // Receive some data from a socket to be sure that the connect() // system call has been completed on another side. - int data; + // Do a short read and then close the socket to trigger a RST. This + // ensures that both ends of the connection are cleaned up and no + // goroutines hang around in TIME-WAIT. We do this so that this test + // does not timeout under gotsan runs where lots of goroutines can + // cause the test to use absurd amounts of memory. + // + // See: https://tools.ietf.org/html/rfc2525#page-50 section 2.17 + uint16_t data; EXPECT_THAT( RetryEINTR(recv)(fd.ValueOrDie().get(), &data, sizeof(data), 0), SyscallSucceedsWithValue(sizeof(data))); @@ -198,15 +205,29 @@ TEST_P(BindToDeviceDistributionTest, Tcp) { } for (int i = 0; i < kConnectAttempts; i++) { - FileDescriptor const fd = ASSERT_NO_ERRNO_AND_VALUE( + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE( Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); ASSERT_THAT( RetryEINTR(connect)(fd.get(), reinterpret_cast(&conn_addr), connector.addr_len), SyscallSucceeds()); + // Do two separate sends to ensure two segments are received. This is + // required for netstack where read is incorrectly assuming a whole + // segment is read when endpoint.Read() is called which is technically + // incorrect as the syscall that invoked endpoint.Read() may only + // consume it partially. This results in a case where a close() of + // such a socket does not trigger a RST in netstack due to the + // endpoint assuming that the endpoint has no unread data. EXPECT_THAT(RetryEINTR(send)(fd.get(), &i, sizeof(i), 0), SyscallSucceedsWithValue(sizeof(i))); + + // TODO(gvisor.dev/issue/1449): Remove this block once netstack correctly + // generates a RST. + if (IsRunningOnGvisor()) { + EXPECT_THAT(RetryEINTR(send)(fd.get(), &i, sizeof(i), 0), + SyscallSucceedsWithValue(sizeof(i))); + } } // Join threads to be sure that all connections have been counted. diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 619d41901..138024d9e 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -714,7 +714,7 @@ TEST_P(SocketInetReusePortTest, TcpPortReuseMultiThread_NoRandomSave) { sockaddr_storage listen_addr = listener.addr; sockaddr_storage conn_addr = connector.addr; constexpr int kThreadCount = 3; - constexpr int kConnectAttempts = 4096; + constexpr int kConnectAttempts = 10000; // Create the listening socket. FileDescriptor listener_fds[kThreadCount]; @@ -729,7 +729,7 @@ TEST_P(SocketInetReusePortTest, TcpPortReuseMultiThread_NoRandomSave) { ASSERT_THAT( bind(fd, reinterpret_cast(&listen_addr), listener.addr_len), SyscallSucceeds()); - ASSERT_THAT(listen(fd, kConnectAttempts / 3), SyscallSucceeds()); + ASSERT_THAT(listen(fd, 40), SyscallSucceeds()); // On the first bind we need to determine which port was bound. if (i != 0) { @@ -772,7 +772,14 @@ TEST_P(SocketInetReusePortTest, TcpPortReuseMultiThread_NoRandomSave) { } // Receive some data from a socket to be sure that the connect() // system call has been completed on another side. - int data; + // Do a short read and then close the socket to trigger a RST. This + // ensures that both ends of the connection are cleaned up and no + // goroutines hang around in TIME-WAIT. We do this so that this test + // does not timeout under gotsan runs where lots of goroutines can + // cause the test to use absurd amounts of memory. + // + // See: https://tools.ietf.org/html/rfc2525#page-50 section 2.17 + uint16_t data; EXPECT_THAT( RetryEINTR(recv)(fd.ValueOrDie().get(), &data, sizeof(data), 0), SyscallSucceedsWithValue(sizeof(data))); @@ -795,8 +802,22 @@ TEST_P(SocketInetReusePortTest, TcpPortReuseMultiThread_NoRandomSave) { connector.addr_len), SyscallSucceeds()); + // Do two separate sends to ensure two segments are received. This is + // required for netstack where read is incorrectly assuming a whole + // segment is read when endpoint.Read() is called which is technically + // incorrect as the syscall that invoked endpoint.Read() may only + // consume it partially. This results in a case where a close() of + // such a socket does not trigger a RST in netstack due to the + // endpoint assuming that the endpoint has no unread data. EXPECT_THAT(RetryEINTR(send)(fd.get(), &i, sizeof(i), 0), SyscallSucceedsWithValue(sizeof(i))); + + // TODO(gvisor.dev/issue/1449): Remove this block once netstack correctly + // generates a RST. + if (IsRunningOnGvisor()) { + EXPECT_THAT(RetryEINTR(send)(fd.get(), &i, sizeof(i), 0), + SyscallSucceedsWithValue(sizeof(i))); + } } }); -- cgit v1.2.3 From ebd25099bfb9ac6af9739dd9a7795aff13f8e34a Mon Sep 17 00:00:00 2001 From: Bin Lu Date: Fri, 10 Jan 2020 16:45:45 +0800 Subject: enable //test/syscalls:proc_test support on Arm64 Problems with different platform architectures have been solved. Signed-off-by: Bin Lu --- test/syscalls/linux/proc.cc | 70 +++++++++++++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 22 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index 8cf08991b..66f89ef64 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -102,7 +102,55 @@ namespace { // O_LARGEFILE as defined by Linux. glibc tries to be clever by setting it to 0 // because "it isn't needed", even though Linux can return it via F_GETFL. +#if defined(__x86_64__) || defined(__i386__) constexpr int kOLargeFile = 00100000; +#elif __aarch64__ +// The value originate from the Linux +// kernel's arch/arm64/include/uapi/asm/fcntl.h. +constexpr int kOLargeFile = 00400000; +#else +#error "Unknown architecture" +#endif + +#if defined(__x86_64__) || defined(__i386__) + // This list of "required" fields is taken from reading the file + // arch/x86/kernel/cpu/proc.c and seeing which fields will be unconditionally + // printed by the kernel. + static const char* required_fields[] = { + "processor", + "vendor_id", + "cpu family", + "model\t\t:", + "model name", + "stepping", + "cpu MHz", + "fpu\t\t:", + "fpu_exception", + "cpuid level", + "wp", + "bogomips", + "clflush size", + "cache_alignment", + "address sizes", + "power management", + }; +#elif __aarch64__ + // This list of "required" fields is taken from reading the file + // arch/arm64/kernel/cpuinfo.c and seeing which fields will be unconditionally + // printed by the kernel. + static const char* required_fields[] = { + "processor", + "BogoMIPS", + "Features", + "CPU implementer", + "CPU architecture", + "CPU variant", + "CPU part", + "CPU revision", + }; +#else +#error "Unknown architecture" +#endif // Takes the subprocess command line and pid. // If it returns !OK, WithSubprocess returns immediately. @@ -717,28 +765,6 @@ TEST(ProcCpuinfo, RequiredFieldsArePresent) { ASSERT_FALSE(proc_cpuinfo.empty()); std::vector cpuinfo_fields = absl::StrSplit(proc_cpuinfo, '\n'); - // This list of "required" fields is taken from reading the file - // arch/x86/kernel/cpu/proc.c and seeing which fields will be unconditionally - // printed by the kernel. - static const char* required_fields[] = { - "processor", - "vendor_id", - "cpu family", - "model\t\t:", - "model name", - "stepping", - "cpu MHz", - "fpu\t\t:", - "fpu_exception", - "cpuid level", - "wp", - "bogomips", - "clflush size", - "cache_alignment", - "address sizes", - "power management", - }; - // Check that the usual fields are there. We don't really care about the // contents. for (const std::string& field : required_fields) { -- cgit v1.2.3 From 9aeb053bbaf834aab5b716b8645996943262b525 Mon Sep 17 00:00:00 2001 From: Nayana Bidari Date: Fri, 10 Jan 2020 09:05:25 -0800 Subject: Add tests for redirect port Fix indentation and change function names. --- test/iptables/iptables_test.go | 10 +++++----- test/iptables/nat.go | 45 +++++++++++++++++++++--------------------- 2 files changed, 27 insertions(+), 28 deletions(-) (limited to 'test') diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index fce9247aa..05f27569f 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -178,14 +178,14 @@ func TestFilterInputDropDifferentUDPPort(t *testing.T) { } } -func TestFilterNATRedirectUDPPort(t *testing.T) { - if err := singleTest(FilterNATRedirectUDPPort{}); err != nil { +func TestNATRedirectUDPPort(t *testing.T) { + if err := singleTest(NATRedirectUDPPort{}); err != nil { t.Fatal(err) } } -func TestFilterNATDropUDP(t *testing.T) { - if err := singleTest(FilterNATDropUDP{}); err != nil { - t.Fatal(err) +func TestNATDropUDP(t *testing.T) { + if err := singleTest(NATDropUDP{}); err != nil { + t.Fatal(err) } } diff --git a/test/iptables/nat.go b/test/iptables/nat.go index 6deabf217..72c413af2 100644 --- a/test/iptables/nat.go +++ b/test/iptables/nat.go @@ -1,4 +1,4 @@ -// Copyright 2019 The gVisor Authors. +// Copyright 2020 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,56 +15,55 @@ package iptables import ( - "fmt" - "net" + "fmt" + "net" ) const ( - redirectPort = 42 + redirectPort = 42 ) func init() { - RegisterTestCase(FilterNATRedirectUDPPort{}) - RegisterTestCase(FilterNATDropUDP{}) + RegisterTestCase(NATRedirectUDPPort{}) + RegisterTestCase(NATDropUDP{}) } -// FilterInputRedirectUDPPort tests that packets are redirected to different port. -type FilterNATRedirectUDPPort struct{} +// InputRedirectUDPPort tests that packets are redirected to different port. +type NATRedirectUDPPort struct{} // Name implements TestCase.Name. -func (FilterNATRedirectUDPPort) Name() string { - return "FilterNATRedirectUDPPort" +func (NATRedirectUDPPort) Name() string { + return "NATRedirectUDPPort" } // ContainerAction implements TestCase.ContainerAction. -func (FilterNATRedirectUDPPort) ContainerAction(ip net.IP) error { - if err := filterTable("-t", "nat", "-A", "PREROUTING", "-p", "udp", "-j", "REDIRECT", "--to-ports", +func (NATRedirectUDPPort) ContainerAction(ip net.IP) error { + if err := filterTable("-t", "nat", "-A", "PREROUTING", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil { return err } if err := listenUDP(redirectPort, sendloopDuration); err != nil { - return fmt.Errorf("packets on port %d should be allowed, but encountered an error: %v", redirectPort, err) + return fmt.Errorf("packets on port %d should be allowed, but encountered an error: %v", redirectPort, err) } - return nil } // LocalAction implements TestCase.LocalAction. -func (FilterNATRedirectUDPPort) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, acceptPort, sendloopDuration) +func (NATRedirectUDPPort) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, acceptPort, sendloopDuration) } -// FilterNATDropUDP tests that packets are not received in ports other than redirect port. -type FilterNATDropUDP struct{} +// NATDropUDP tests that packets are not received in ports other than redirect port. +type NATDropUDP struct{} // Name implements TestCase.Name. -func (FilterNATDropUDP) Name() string { - return "FilterNATDropUDP" +func (NATDropUDP) Name() string { + return "NATDropUDP" } // ContainerAction implements TestCase.ContainerAction. -func (FilterNATDropUDP) ContainerAction(ip net.IP) error { +func (NATDropUDP) ContainerAction(ip net.IP) error { if err := filterTable("-t", "nat", "-A", "PREROUTING", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil { return err @@ -78,6 +77,6 @@ func (FilterNATDropUDP) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterNATDropUDP) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, acceptPort, sendloopDuration) +func (NATDropUDP) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, acceptPort, sendloopDuration) } -- cgit v1.2.3 From bf6429b944aed6de073c62ceb446cfaed5042dbc Mon Sep 17 00:00:00 2001 From: Brad Burlage Date: Fri, 10 Jan 2020 16:34:59 -0800 Subject: Don't set RWF_HIPRI on InvalidOffset test. This test fails on ubuntu 18.04 because preadv2 for some reason returns EOPNOTSUPP instead of EINVAL. Instead of root-causing the failure, I'm dropping the flag in the preadv2 call since it isn't under test in this scenario. PiperOrigin-RevId: 289188358 --- test/syscalls/linux/preadv2.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/syscalls/linux/preadv2.cc b/test/syscalls/linux/preadv2.cc index c9246367d..cd936ea90 100644 --- a/test/syscalls/linux/preadv2.cc +++ b/test/syscalls/linux/preadv2.cc @@ -202,7 +202,7 @@ TEST(Preadv2Test, TestInvalidOffset) { iov[0].iov_len = 0; EXPECT_THAT(preadv2(fd.get(), iov.get(), /*iovcnt=*/1, /*offset=*/-8, - /*flags=*/RWF_HIPRI), + /*flags=*/0), SyscallFailsWithErrno(EINVAL)); } -- cgit v1.2.3 From 98327a94cce7597589ac22b8557c5d9a2a03464d Mon Sep 17 00:00:00 2001 From: Nayana Bidari Date: Mon, 13 Jan 2020 09:11:40 -0800 Subject: Add test for iptables TCP rule Added tests for tcp protocol with input and output rules including options sport and dport Increased timeout in iptables_test as TCP tests were timing out with existing value. --- test/iptables/BUILD | 1 + test/iptables/filter_input.go | 66 +++++++++++++++++++++++++++++++ test/iptables/filter_output.go | 89 ++++++++++++++++++++++++++++++++++++++++++ test/iptables/iptables_test.go | 26 +++++++++++- test/iptables/iptables_util.go | 55 ++++++++++++++++++++++++++ 5 files changed, 236 insertions(+), 1 deletion(-) create mode 100644 test/iptables/filter_output.go (limited to 'test') diff --git a/test/iptables/BUILD b/test/iptables/BUILD index 68eed721e..372ba7abf 100644 --- a/test/iptables/BUILD +++ b/test/iptables/BUILD @@ -6,6 +6,7 @@ go_library( name = "iptables", srcs = [ "filter_input.go", + "filter_output.go", "iptables.go", "iptables_util.go", "nat.go", diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index 923f44e68..1c04601df 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -31,6 +31,8 @@ func init() { RegisterTestCase(FilterInputDropUDP{}) RegisterTestCase(FilterInputDropUDPPort{}) RegisterTestCase(FilterInputDropDifferentUDPPort{}) + RegisterTestCase(FilterInputDropTCPDestPort{}) + RegisterTestCase(FilterInputDropTCPSrcPort{}) } // FilterInputDropUDP tests that we can drop UDP traffic. @@ -122,3 +124,67 @@ func (FilterInputDropDifferentUDPPort) ContainerAction(ip net.IP) error { func (FilterInputDropDifferentUDPPort) LocalAction(ip net.IP) error { return sendUDPLoop(ip, acceptPort, sendloopDuration) } + +// FilterInputDropTCP tests that connections are not accepted on specified source ports. +type FilterInputDropTCPDestPort struct{} + +// Name implements TestCase.Name. +func (FilterInputDropTCPDestPort) Name() string { + return "FilterInputDropTCPDestPort" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterInputDropTCPDestPort) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "INPUT", "-p", "tcp", "-m", "tcp", "--dport", + fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { + return err + } + + // Listen for TCP packets on drop port. + if err := listenTCP(dropPort, sendloopDuration); err == nil { + return fmt.Errorf("Connections on port %d should not be accepted, but got accepted", dropPort) + } + + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (FilterInputDropTCPDestPort) LocalAction(ip net.IP) error { + if err := connectTCP(ip, dropPort, acceptPort, sendloopDuration); err == nil { + return fmt.Errorf("Connection destined to port %d should not be accepted, but got accepted", dropPort) + } + + return nil +} + +// FilterInputDropTCPSrcPort tests that connections are not accepted on specified source ports. +type FilterInputDropTCPSrcPort struct{} + +// Name implements TestCase.Name. +func (FilterInputDropTCPSrcPort) Name() string { + return "FilterInputDropTCPSrcPort" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterInputDropTCPSrcPort) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "INPUT", "-p", "tcp", "-m", "tcp", "--sport", + fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { + return err + } + + // Listen for TCP packets on accept port. + if err := listenTCP(acceptPort, sendloopDuration); err == nil { + return fmt.Errorf("connections destined to port %d should not be accepted, but got accepted", dropPort) + } + + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (FilterInputDropTCPSrcPort) LocalAction(ip net.IP) error { + if err := connectTCP(ip, acceptPort, dropPort, sendloopDuration); err == nil { + return fmt.Errorf("connection sent from port %d should not be accepted", dropPort) + } + + return nil +} diff --git a/test/iptables/filter_output.go b/test/iptables/filter_output.go new file mode 100644 index 000000000..63d74e4f4 --- /dev/null +++ b/test/iptables/filter_output.go @@ -0,0 +1,89 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iptables + +import ( + "fmt" + "net" +) + +func init() { + RegisterTestCase(FilterOutputDropTCPDestPort{}) + RegisterTestCase(FilterOutputDropTCPSrcPort{}) +} + +// FilterOutputDropTCPDestPort tests that connections are not accepted on specified source ports. +type FilterOutputDropTCPDestPort struct{} + +// Name implements TestCase.Name. +func (FilterOutputDropTCPDestPort) Name() string { + return "FilterOutputDropTCPDestPort" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterOutputDropTCPDestPort) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "OUTPUT", "-p", "tcp", "-m", "tcp", "--dport", + fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { + return err + } + + // Listen for TCP packets on accept port. + if err := listenTCP(acceptPort, sendloopDuration); err == nil { + return fmt.Errorf("connections destined to port %d should not be accepted, but got accepted", dropPort) + } + + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (FilterOutputDropTCPDestPort) LocalAction(ip net.IP) error { + if err := connectTCP(ip, acceptPort, dropPort, sendloopDuration); err == nil { + return fmt.Errorf("connection sent from port %d should not be accepted, but got accepted", dropPort) + } + + return nil +} + +// FilterOutputDropTCPSrcPort tests that connections are not accepted on specified source ports. +type FilterOutputDropTCPSrcPort struct{} + +// Name implements TestCase.Name. +func (FilterOutputDropTCPSrcPort) Name() string { + return "FilterOutputDropTCPSrcPort" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterOutputDropTCPSrcPort) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "OUTPUT", "-p", "tcp", "-m", "tcp", "--sport", + fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { + return err + } + + // Listen for TCP packets on drop port. + if err := listenTCP(dropPort, sendloopDuration); err == nil { + return fmt.Errorf("connections on port %d should not be accepted, but got accepted", dropPort) + } + + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (FilterOutputDropTCPSrcPort) LocalAction(ip net.IP) error { + if err := connectTCP(ip, dropPort, acceptPort, sendloopDuration); err == nil { + return fmt.Errorf("connection destined to port %d should not be accepted, but got accepted", dropPort) + } + + return nil +} diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 05f27569f..3eeb75b8b 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -28,7 +28,7 @@ import ( "gvisor.dev/gvisor/runsc/testutil" ) -const timeout time.Duration = 10 * time.Second +const timeout time.Duration = 18 * time.Second var image = flag.String("image", "bazel/test/iptables/runner:runner", "image to run tests in") @@ -189,3 +189,27 @@ func TestNATDropUDP(t *testing.T) { t.Fatal(err) } } + +func TestFilterInputDropTCPDestPort(t *testing.T) { + if err := singleTest(FilterInputDropTCPDestPort{}); err != nil { + t.Fatal(err) + } +} + +func TestFilterInputDropTCPSrcPort(t *testing.T) { + if err := singleTest(FilterInputDropTCPSrcPort{}); err != nil { + t.Fatal(err) + } +} + +func TestFilterOutputDropTCPDestPort(t *testing.T) { + if err := singleTest(FilterOutputDropTCPDestPort{}); err != nil { + t.Fatal(err) + } +} + +func TestFilterOutputDropTCPSrcPort(t *testing.T) { + if err := singleTest(FilterOutputDropTCPSrcPort{}); err != nil { + t.Fatal(err) + } +} diff --git a/test/iptables/iptables_util.go b/test/iptables/iptables_util.go index 3a4d11f1a..44945bd89 100644 --- a/test/iptables/iptables_util.go +++ b/test/iptables/iptables_util.go @@ -80,3 +80,58 @@ func sendUDPLoop(ip net.IP, port int, duration time.Duration) error { return nil } + +// listenTCP listens for connections on a TCP port +func listenTCP(port int, timeout time.Duration) error { + localAddr := net.TCPAddr{ + Port: port, + } + + // Starts listening on port + lConn, err := net.ListenTCP("tcp4", &localAddr) + if err != nil { + return err + } + defer lConn.Close() + + // Accept connections on port + lConn.SetDeadline(time.Now().Add(timeout)) + conn, err := lConn.AcceptTCP() + if err == nil { + conn.Close() + } + return err +} + +// connectTCP connects the TCP server over specified local port, server IP +// and remote/server port +func connectTCP(ip net.IP, remotePort int, localPort int, duration time.Duration) error { + remote := net.TCPAddr{ + IP: ip, + Port: remotePort, + } + + local := net.TCPAddr{ + Port: localPort, + } + + // Container may not be up. Retry DialTCP + // over a given duration + to := time.After(duration) + var res error + for timedOut := false; !timedOut; { + conn, err := net.DialTCP("tcp4", &local, &remote) + res = err + if res == nil { + conn.Close() + return nil + } + select{ + case <-to: + timedOut = true + default: + time.Sleep(200 * time.Millisecond) + } + } + return res +} -- cgit v1.2.3 From f54b9c0ee6e02f9c8bf32aa268c9028ff741bf7c Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 13 Jan 2020 10:14:30 -0800 Subject: tests: fix errors detected by asan. PiperOrigin-RevId: 289467083 --- test/syscalls/linux/inotify.cc | 4 ++-- test/syscalls/linux/poll.cc | 3 ++- test/syscalls/linux/readv_common.cc | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc index 59ec9940a..fdef646eb 100644 --- a/test/syscalls/linux/inotify.cc +++ b/test/syscalls/linux/inotify.cc @@ -977,7 +977,7 @@ TEST(Inotify, WatchOnRelativePath) { ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY)); // Change working directory to root. - const char* old_working_dir = get_current_dir_name(); + const FileDescriptor cwd = ASSERT_NO_ERRNO_AND_VALUE(Open(".", O_PATH)); EXPECT_THAT(chdir(root.path().c_str()), SyscallSucceeds()); // Add a watch on file1 with a relative path. @@ -997,7 +997,7 @@ TEST(Inotify, WatchOnRelativePath) { // continue to hold a reference, random save/restore tests can fail if a save // is triggered after "root" is unlinked; we can't save deleted fs objects // with active references. - EXPECT_THAT(chdir(old_working_dir), SyscallSucceeds()); + EXPECT_THAT(fchdir(cwd.get()), SyscallSucceeds()); } TEST(Inotify, ZeroLengthReadWriteDoesNotGenerateEvent) { diff --git a/test/syscalls/linux/poll.cc b/test/syscalls/linux/poll.cc index 9e5aa7fd0..c42472474 100644 --- a/test/syscalls/linux/poll.cc +++ b/test/syscalls/linux/poll.cc @@ -275,7 +275,8 @@ TEST_F(PollTest, Nfds) { // Each entry in the 'fds' array refers to the eventfd and polls for // "writable" events (events=POLLOUT). This essentially guarantees that the // poll() is a no-op and allows negative testing of the 'nfds' parameter. - std::vector fds(max_fds, {.fd = efd.get(), .events = POLLOUT}); + std::vector fds(max_fds + 1, + {.fd = efd.get(), .events = POLLOUT}); // Verify that 'nfds' up to RLIMIT_NOFILE are allowed. EXPECT_THAT(RetryEINTR(poll)(fds.data(), 1, 1), SyscallSucceedsWithValue(1)); diff --git a/test/syscalls/linux/readv_common.cc b/test/syscalls/linux/readv_common.cc index 491d5f40f..2694dc64f 100644 --- a/test/syscalls/linux/readv_common.cc +++ b/test/syscalls/linux/readv_common.cc @@ -154,7 +154,7 @@ void ReadBuffersOverlapping(int fd) { char* expected_ptr = expected.data(); memcpy(expected_ptr, &kReadvTestData[overlap_bytes], overlap_bytes); memcpy(&expected_ptr[overlap_bytes], &kReadvTestData[overlap_bytes], - kReadvTestDataSize); + kReadvTestDataSize - overlap_bytes); struct iovec iovs[2]; iovs[0].iov_base = buffer.data(); -- cgit v1.2.3 From debd213da61cf35d7c91346820e93fc87bfa5896 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Mon, 13 Jan 2020 14:45:31 -0800 Subject: Allow dual stack sockets to operate on AF_INET Fixes #1490 Fixes #1495 PiperOrigin-RevId: 289523250 --- pkg/sentry/socket/netstack/netstack.go | 65 +++++++++--- pkg/sentry/socket/unix/unix.go | 5 +- pkg/sentry/strace/socket.go | 2 +- pkg/tcpip/stack/stack.go | 43 ++++++++ pkg/tcpip/transport/icmp/endpoint.go | 22 ++-- pkg/tcpip/transport/tcp/endpoint.go | 23 +--- pkg/tcpip/transport/udp/endpoint.go | 41 ++------ scripts/common.sh | 2 +- test/syscalls/linux/BUILD | 1 + test/syscalls/linux/socket_inet_loopback.cc | 156 ++++++++++++++++++++++++++++ 10 files changed, 278 insertions(+), 82 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 099319327..c020c11cb 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -324,22 +324,15 @@ func bytesToIPAddress(addr []byte) tcpip.Address { // converts it to the FullAddress format. It supports AF_UNIX, AF_INET, // AF_INET6, and AF_PACKET addresses. // -// strict indicates whether addresses with the AF_UNSPEC family are accepted of not. -// // AddressAndFamily returns an address and its family. -func AddressAndFamily(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, uint16, *syserr.Error) { +func AddressAndFamily(addr []byte) (tcpip.FullAddress, uint16, *syserr.Error) { // Make sure we have at least 2 bytes for the address family. if len(addr) < 2 { return tcpip.FullAddress{}, 0, syserr.ErrInvalidArgument } - family := usermem.ByteOrder.Uint16(addr) - if family != uint16(sfamily) && (strict || family != linux.AF_UNSPEC) { - return tcpip.FullAddress{}, family, syserr.ErrAddressFamilyNotSupported - } - // Get the rest of the fields based on the address family. - switch family { + switch family := usermem.ByteOrder.Uint16(addr); family { case linux.AF_UNIX: path := addr[2:] if len(path) > linux.UnixPathMax { @@ -638,10 +631,40 @@ func (s *SocketOperations) Readiness(mask waiter.EventMask) waiter.EventMask { return r } +func (s *SocketOperations) checkFamily(family uint16, exact bool) *syserr.Error { + if family == uint16(s.family) { + return nil + } + if !exact && family == linux.AF_INET && s.family == linux.AF_INET6 { + v, err := s.Endpoint.GetSockOptBool(tcpip.V6OnlyOption) + if err != nil { + return syserr.TranslateNetstackError(err) + } + if !v { + return nil + } + } + return syserr.ErrInvalidArgument +} + +// mapFamily maps the AF_INET ANY address to the IPv4-mapped IPv6 ANY if the +// receiver's family is AF_INET6. +// +// This is a hack to work around the fact that both IPv4 and IPv6 ANY are +// represented by the empty string. +// +// TODO(gvisor.dev/issues/1556): remove this function. +func (s *SocketOperations) mapFamily(addr tcpip.FullAddress, family uint16) tcpip.FullAddress { + if len(addr.Addr) == 0 && s.family == linux.AF_INET6 && family == linux.AF_INET { + addr.Addr = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\x00\x00\x00\x00" + } + return addr +} + // Connect implements the linux syscall connect(2) for sockets backed by // tpcip.Endpoint. func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error { - addr, family, err := AddressAndFamily(s.family, sockaddr, false /* strict */) + addr, family, err := AddressAndFamily(sockaddr) if err != nil { return err } @@ -653,6 +676,12 @@ func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking boo } return syserr.TranslateNetstackError(err) } + + if err := s.checkFamily(family, false /* exact */); err != nil { + return err + } + addr = s.mapFamily(addr, family) + // Always return right away in the non-blocking case. if !blocking { return syserr.TranslateNetstackError(s.Endpoint.Connect(addr)) @@ -681,10 +710,14 @@ func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking boo // Bind implements the linux syscall bind(2) for sockets backed by // tcpip.Endpoint. func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { - addr, _, err := AddressAndFamily(s.family, sockaddr, true /* strict */) + addr, family, err := AddressAndFamily(sockaddr) if err != nil { return err } + if err := s.checkFamily(family, true /* exact */); err != nil { + return err + } + addr = s.mapFamily(addr, family) // Issue the bind request to the endpoint. return syserr.TranslateNetstackError(s.Endpoint.Bind(addr)) @@ -2080,8 +2113,8 @@ func ConvertAddress(family int, addr tcpip.FullAddress) (linux.SockAddr, uint32) case linux.AF_INET6: var out linux.SockAddrInet6 - if len(addr.Addr) == 4 { - // Copy address is v4-mapped format. + if len(addr.Addr) == header.IPv4AddressSize { + // Copy address in v4-mapped format. copy(out.Addr[12:], addr.Addr) out.Addr[10] = 0xff out.Addr[11] = 0xff @@ -2395,10 +2428,14 @@ func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to [] var addr *tcpip.FullAddress if len(to) > 0 { - addrBuf, _, err := AddressAndFamily(s.family, to, true /* strict */) + addrBuf, family, err := AddressAndFamily(to) if err != nil { return 0, err } + if err := s.checkFamily(family, false /* exact */); err != nil { + return 0, err + } + addrBuf = s.mapFamily(addrBuf, family) addr = &addrBuf } diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index 91effe89a..7f49ba864 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -116,13 +116,16 @@ func (s *SocketOperations) Endpoint() transport.Endpoint { // extractPath extracts and validates the address. func extractPath(sockaddr []byte) (string, *syserr.Error) { - addr, _, err := netstack.AddressAndFamily(linux.AF_UNIX, sockaddr, true /* strict */) + addr, family, err := netstack.AddressAndFamily(sockaddr) if err != nil { if err == syserr.ErrAddressFamilyNotSupported { err = syserr.ErrInvalidArgument } return "", err } + if family != linux.AF_UNIX { + return "", syserr.ErrInvalidArgument + } // The address is trimmed by GetAddress. p := string(addr.Addr) diff --git a/pkg/sentry/strace/socket.go b/pkg/sentry/strace/socket.go index 51f2efb39..b6d7177f4 100644 --- a/pkg/sentry/strace/socket.go +++ b/pkg/sentry/strace/socket.go @@ -341,7 +341,7 @@ func sockAddr(t *kernel.Task, addr usermem.Addr, length uint32) string { switch family { case linux.AF_INET, linux.AF_INET6, linux.AF_UNIX: - fa, _, err := netstack.AddressAndFamily(int(family), b, true /* strict */) + fa, _, err := netstack.AddressAndFamily(b) if err != nil { return fmt.Sprintf("%#x {Family: %s, error extracting address: %v}", addr, familyStr, err) } diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index a47ceba54..113b457fb 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -547,6 +547,49 @@ type TransportEndpointInfo struct { RegisterNICID tcpip.NICID } +// AddrNetProto unwraps the specified address if it is a V4-mapped V6 address +// and returns the network protocol number to be used to communicate with the +// specified address. It returns an error if the passed address is incompatible +// with the receiver. +func (e *TransportEndpointInfo) AddrNetProto(addr tcpip.FullAddress, v6only bool) (tcpip.FullAddress, tcpip.NetworkProtocolNumber, *tcpip.Error) { + netProto := e.NetProto + switch len(addr.Addr) { + case header.IPv4AddressSize: + netProto = header.IPv4ProtocolNumber + case header.IPv6AddressSize: + if header.IsV4MappedAddress(addr.Addr) { + netProto = header.IPv4ProtocolNumber + addr.Addr = addr.Addr[header.IPv6AddressSize-header.IPv4AddressSize:] + if addr.Addr == header.IPv4Any { + addr.Addr = "" + } + } + } + + switch len(e.ID.LocalAddress) { + case header.IPv4AddressSize: + if len(addr.Addr) == header.IPv6AddressSize { + return tcpip.FullAddress{}, 0, tcpip.ErrInvalidEndpointState + } + case header.IPv6AddressSize: + if len(addr.Addr) == header.IPv4AddressSize { + return tcpip.FullAddress{}, 0, tcpip.ErrNetworkUnreachable + } + } + + switch { + case netProto == e.NetProto: + case netProto == header.IPv4ProtocolNumber && e.NetProto == header.IPv6ProtocolNumber: + if v6only { + return tcpip.FullAddress{}, 0, tcpip.ErrNoRoute + } + default: + return tcpip.FullAddress{}, 0, tcpip.ErrInvalidEndpointState + } + + return addr, netProto, nil +} + // IsEndpointInfo is an empty method to implement the tcpip.EndpointInfo // marker interface. func (*TransportEndpointInfo) IsEndpointInfo() {} diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go index 330786f4c..42afb3f5b 100644 --- a/pkg/tcpip/transport/icmp/endpoint.go +++ b/pkg/tcpip/transport/icmp/endpoint.go @@ -288,7 +288,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c toCopy := *to to = &toCopy - netProto, err := e.checkV4Mapped(to, true) + netProto, err := e.checkV4Mapped(to) if err != nil { return 0, nil, err } @@ -475,18 +475,12 @@ func send6(r *stack.Route, ident uint16, data buffer.View, ttl uint8) *tcpip.Err }) } -func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress, allowMismatch bool) (tcpip.NetworkProtocolNumber, *tcpip.Error) { - netProto := e.NetProto - if header.IsV4MappedAddress(addr.Addr) { - return 0, tcpip.ErrNoRoute - } - - // Fail if we're bound to an address length different from the one we're - // checking. - if l := len(e.ID.LocalAddress); !allowMismatch && l != 0 && l != len(addr.Addr) { - return 0, tcpip.ErrInvalidEndpointState +func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress) (tcpip.NetworkProtocolNumber, *tcpip.Error) { + unwrapped, netProto, err := e.TransportEndpointInfo.AddrNetProto(*addr, false /* v6only */) + if err != nil { + return 0, err } - + *addr = unwrapped return netProto, nil } @@ -518,7 +512,7 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { return tcpip.ErrInvalidEndpointState } - netProto, err := e.checkV4Mapped(&addr, false) + netProto, err := e.checkV4Mapped(&addr) if err != nil { return err } @@ -631,7 +625,7 @@ func (e *endpoint) bindLocked(addr tcpip.FullAddress) *tcpip.Error { return tcpip.ErrInvalidEndpointState } - netProto, err := e.checkV4Mapped(&addr, false) + netProto, err := e.checkV4Mapped(&addr) if err != nil { return err } diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index cca511fb9..cc8b533c8 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -1691,26 +1691,11 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { } func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress) (tcpip.NetworkProtocolNumber, *tcpip.Error) { - netProto := e.NetProto - if header.IsV4MappedAddress(addr.Addr) { - // Fail if using a v4 mapped address on a v6only endpoint. - if e.v6only { - return 0, tcpip.ErrNoRoute - } - - netProto = header.IPv4ProtocolNumber - addr.Addr = addr.Addr[header.IPv6AddressSize-header.IPv4AddressSize:] - if addr.Addr == header.IPv4Any { - addr.Addr = "" - } - } - - // Fail if we're bound to an address length different from the one we're - // checking. - if l := len(e.ID.LocalAddress); l != 0 && len(addr.Addr) != 0 && l != len(addr.Addr) { - return 0, tcpip.ErrInvalidEndpointState + unwrapped, netProto, err := e.TransportEndpointInfo.AddrNetProto(*addr, e.v6only) + if err != nil { + return 0, err } - + *addr = unwrapped return netProto, nil } diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index a4ff29a7d..13446f5d9 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -402,7 +402,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c return 0, nil, tcpip.ErrBroadcastDisabled } - netProto, err := e.checkV4Mapped(to, false) + netProto, err := e.checkV4Mapped(to) if err != nil { return 0, nil, err } @@ -501,7 +501,7 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { defer e.mu.Unlock() fa := tcpip.FullAddress{Addr: v.InterfaceAddr} - netProto, err := e.checkV4Mapped(&fa, false) + netProto, err := e.checkV4Mapped(&fa) if err != nil { return err } @@ -839,35 +839,12 @@ func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort u return nil } -func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress, allowMismatch bool) (tcpip.NetworkProtocolNumber, *tcpip.Error) { - netProto := e.NetProto - if len(addr.Addr) == 0 { - return netProto, nil - } - if header.IsV4MappedAddress(addr.Addr) { - // Fail if using a v4 mapped address on a v6only endpoint. - if e.v6only { - return 0, tcpip.ErrNoRoute - } - - netProto = header.IPv4ProtocolNumber - addr.Addr = addr.Addr[header.IPv6AddressSize-header.IPv4AddressSize:] - if addr.Addr == header.IPv4Any { - addr.Addr = "" - } - - // Fail if we are bound to an IPv6 address. - if !allowMismatch && len(e.ID.LocalAddress) == 16 { - return 0, tcpip.ErrNetworkUnreachable - } - } - - // Fail if we're bound to an address length different from the one we're - // checking. - if l := len(e.ID.LocalAddress); l != 0 && l != len(addr.Addr) { - return 0, tcpip.ErrInvalidEndpointState +func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress) (tcpip.NetworkProtocolNumber, *tcpip.Error) { + unwrapped, netProto, err := e.TransportEndpointInfo.AddrNetProto(*addr, e.v6only) + if err != nil { + return 0, err } - + *addr = unwrapped return netProto, nil } @@ -916,7 +893,7 @@ func (e *endpoint) Disconnect() *tcpip.Error { // Connect connects the endpoint to its peer. Specifying a NIC is optional. func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { - netProto, err := e.checkV4Mapped(&addr, false) + netProto, err := e.checkV4Mapped(&addr) if err != nil { return err } @@ -1074,7 +1051,7 @@ func (e *endpoint) bindLocked(addr tcpip.FullAddress) *tcpip.Error { return tcpip.ErrInvalidEndpointState } - netProto, err := e.checkV4Mapped(&addr, true) + netProto, err := e.checkV4Mapped(&addr) if err != nil { return err } diff --git a/scripts/common.sh b/scripts/common.sh index 6dabad141..fdb1aa142 100755 --- a/scripts/common.sh +++ b/scripts/common.sh @@ -73,7 +73,7 @@ function install_runsc() { sudo "${RUNSC_BIN}" install --experimental=true --runtime="${runtime}" -- --debug-log "${RUNSC_LOGS}" "$@" # Clear old logs files that may exist. - sudo rm -f "${RUNSC_LOGS_DIR}"/* + sudo rm -f "${RUNSC_LOGS_DIR}"/'*' # Restart docker to pick up the new runtime configuration. sudo systemctl restart docker diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index ce8abe217..4c7ec3f06 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -2693,6 +2693,7 @@ cc_binary( srcs = ["socket_inet_loopback.cc"], linkstatic = 1, deps = [ + ":ip_socket_test_util", ":socket_test_util", "//test/util:file_descriptor", "//test/util:posix_error", diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 138024d9e..5d114d460 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -32,6 +32,7 @@ #include "absl/strings/str_cat.h" #include "absl/time/clock.h" #include "absl/time/time.h" +#include "test/syscalls/linux/ip_socket_test_util.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/util/file_descriptor.h" #include "test/util/posix_error.h" @@ -102,6 +103,161 @@ TEST(BadSocketPairArgs, ValidateErrForBadCallsToSocketPair) { SyscallFailsWithErrno(EAFNOSUPPORT)); } +enum class Operation { + Bind, + Connect, + SendTo, +}; + +std::string OperationToString(Operation operation) { + switch (operation) { + case Operation::Bind: + return "Bind"; + case Operation::Connect: + return "Connect"; + case Operation::SendTo: + return "SendTo"; + } +} + +using OperationSequence = std::vector; + +using DualStackSocketTest = + ::testing::TestWithParam>; + +TEST_P(DualStackSocketTest, AddressOperations) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET6, SOCK_DGRAM, 0)); + + const TestAddress& addr = std::get<0>(GetParam()); + const OperationSequence& operations = std::get<1>(GetParam()); + + auto addr_in = reinterpret_cast(&addr.addr); + + // sockets may only be bound once. Both `connect` and `sendto` cause a socket + // to be bound. + bool bound = false; + for (const Operation& operation : operations) { + bool sockname = false; + bool peername = false; + switch (operation) { + case Operation::Bind: { + ASSERT_NO_ERRNO(SetAddrPort( + addr.family(), const_cast(&addr.addr), 0)); + + int bind_ret = bind(fd.get(), addr_in, addr.addr_len); + + // Dual stack sockets may only be bound to AF_INET6. + if (!bound && addr.family() == AF_INET6) { + EXPECT_THAT(bind_ret, SyscallSucceeds()); + bound = true; + + sockname = true; + } else { + EXPECT_THAT(bind_ret, SyscallFailsWithErrno(EINVAL)); + } + break; + } + case Operation::Connect: { + ASSERT_NO_ERRNO(SetAddrPort( + addr.family(), const_cast(&addr.addr), 1337)); + + EXPECT_THAT(connect(fd.get(), addr_in, addr.addr_len), + SyscallSucceeds()) + << GetAddrStr(addr_in); + bound = true; + + sockname = true; + peername = true; + + break; + } + case Operation::SendTo: { + const char payload[] = "hello"; + ASSERT_NO_ERRNO(SetAddrPort( + addr.family(), const_cast(&addr.addr), 1337)); + + ssize_t sendto_ret = sendto(fd.get(), &payload, sizeof(payload), 0, + addr_in, addr.addr_len); + + EXPECT_THAT(sendto_ret, SyscallSucceedsWithValue(sizeof(payload))); + sockname = !bound; + bound = true; + break; + } + } + + if (sockname) { + sockaddr_storage sock_addr; + socklen_t addrlen = sizeof(sock_addr); + ASSERT_THAT(getsockname(fd.get(), reinterpret_cast(&sock_addr), + &addrlen), + SyscallSucceeds()); + ASSERT_EQ(addrlen, sizeof(struct sockaddr_in6)); + + auto sock_addr_in6 = reinterpret_cast(&sock_addr); + + if (operation == Operation::SendTo) { + EXPECT_EQ(sock_addr_in6->sin6_family, AF_INET6); + EXPECT_TRUE(IN6_IS_ADDR_UNSPECIFIED(sock_addr_in6->sin6_addr.s6_addr32)) + << OperationToString(operation) << " getsocknam=" + << GetAddrStr(reinterpret_cast(&sock_addr)); + + EXPECT_NE(sock_addr_in6->sin6_port, 0); + } else if (IN6_IS_ADDR_V4MAPPED( + reinterpret_cast(addr_in) + ->sin6_addr.s6_addr32)) { + EXPECT_TRUE(IN6_IS_ADDR_V4MAPPED(sock_addr_in6->sin6_addr.s6_addr32)) + << OperationToString(operation) << " getsocknam=" + << GetAddrStr(reinterpret_cast(&sock_addr)); + } + } + + if (peername) { + sockaddr_storage peer_addr; + socklen_t addrlen = sizeof(peer_addr); + ASSERT_THAT(getpeername(fd.get(), reinterpret_cast(&peer_addr), + &addrlen), + SyscallSucceeds()); + ASSERT_EQ(addrlen, sizeof(struct sockaddr_in6)); + + if (addr.family() == AF_INET || + IN6_IS_ADDR_V4MAPPED(reinterpret_cast(addr_in) + ->sin6_addr.s6_addr32)) { + EXPECT_TRUE(IN6_IS_ADDR_V4MAPPED( + reinterpret_cast(&peer_addr) + ->sin6_addr.s6_addr32)) + << OperationToString(operation) << " getpeername=" + << GetAddrStr(reinterpret_cast(&peer_addr)); + } + } + } +} + +// TODO(gvisor.dev/issues/1556): uncomment V4MappedAny. +INSTANTIATE_TEST_SUITE_P( + All, DualStackSocketTest, + ::testing::Combine( + ::testing::Values(V4Any(), V4Loopback(), /*V4MappedAny(),*/ + V4MappedLoopback(), V6Any(), V6Loopback()), + ::testing::ValuesIn( + {{Operation::Bind, Operation::Connect, Operation::SendTo}, + {Operation::Bind, Operation::SendTo, Operation::Connect}, + {Operation::Connect, Operation::Bind, Operation::SendTo}, + {Operation::Connect, Operation::SendTo, Operation::Bind}, + {Operation::SendTo, Operation::Bind, Operation::Connect}, + {Operation::SendTo, Operation::Connect, Operation::Bind}})), + [](::testing::TestParamInfo< + std::tuple> const& info) { + const TestAddress& addr = std::get<0>(info.param); + const OperationSequence& operations = std::get<1>(info.param); + std::string s = addr.description; + for (const Operation& operation : operations) { + absl::StrAppend(&s, OperationToString(operation)); + } + return s; + }); + void tcpSimpleConnectTest(TestAddress const& listener, TestAddress const& connector, bool unbound) { // Create the listening socket. -- cgit v1.2.3 From 50625cee59aaff834c7968771ab385ad0e7b0e1f Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Tue, 14 Jan 2020 13:31:52 -0800 Subject: Implement {g,s}etsockopt(IP_RECVTOS) for UDP sockets PiperOrigin-RevId: 289718534 --- pkg/sentry/socket/control/control.go | 2 +- pkg/sentry/socket/netstack/netstack.go | 36 +++++++++++++-- pkg/tcpip/checker/checker.go | 16 +++++++ pkg/tcpip/stack/nic.go | 2 +- pkg/tcpip/stack/stack.go | 2 +- pkg/tcpip/tcpip.go | 8 +++- pkg/tcpip/transport/udp/endpoint.go | 40 +++++++++++++++-- pkg/tcpip/transport/udp/udp_test.go | 67 ++++++++++++++++++++++++---- test/syscalls/linux/socket_ip_udp_generic.cc | 40 +++++++++++++++++ test/syscalls/linux/udp_socket_test_cases.cc | 8 ++-- 10 files changed, 197 insertions(+), 24 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/control/control.go b/pkg/sentry/socket/control/control.go index 4301b697c..1684dfc24 100644 --- a/pkg/sentry/socket/control/control.go +++ b/pkg/sentry/socket/control/control.go @@ -327,7 +327,7 @@ func PackInq(t *kernel.Task, inq int32, buf []byte) []byte { } // PackTOS packs an IP_TOS socket control message. -func PackTOS(t *kernel.Task, tos int8, buf []byte) []byte { +func PackTOS(t *kernel.Task, tos uint8, buf []byte) []byte { return putCmsgStruct( buf, linux.SOL_IP, diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index c020c11cb..d2f7e987d 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -1268,11 +1268,11 @@ func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (interf if err != nil { return nil, syserr.TranslateNetstackError(err) } - var o uint32 + var o int32 if v { o = 1 } - return int32(o), nil + return o, nil case linux.IPV6_PATHMTU: t.Kernel().EmitUnimplementedEvent(t) @@ -1377,6 +1377,21 @@ func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family in } return int32(v), nil + case linux.IP_RECVTOS: + if outLen < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + + v, err := ep.GetSockOptBool(tcpip.ReceiveTOSOption) + if err != nil { + return nil, syserr.TranslateNetstackError(err) + } + var o int32 + if v { + o = 1 + } + return o, nil + default: emitUnimplementedEventIP(t, name) } @@ -1895,6 +1910,13 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s } return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.IPv4TOSOption(v))) + case linux.IP_RECVTOS: + v, err := parseIntOrChar(optVal) + if err != nil { + return err + } + return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.ReceiveTOSOption, v != 0)) + case linux.IP_ADD_SOURCE_MEMBERSHIP, linux.IP_BIND_ADDRESS_NO_PORT, linux.IP_BLOCK_SOURCE, @@ -1915,7 +1937,6 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s linux.IP_RECVFRAGSIZE, linux.IP_RECVOPTS, linux.IP_RECVORIGDSTADDR, - linux.IP_RECVTOS, linux.IP_RECVTTL, linux.IP_RETOPTS, linux.IP_TRANSPARENT, @@ -2335,7 +2356,14 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe } func (s *SocketOperations) controlMessages() socket.ControlMessages { - return socket.ControlMessages{IP: tcpip.ControlMessages{HasTimestamp: s.readCM.HasTimestamp && s.sockOptTimestamp, Timestamp: s.readCM.Timestamp}} + return socket.ControlMessages{ + IP: tcpip.ControlMessages{ + HasTimestamp: s.readCM.HasTimestamp && s.sockOptTimestamp, + Timestamp: s.readCM.Timestamp, + HasTOS: s.readCM.HasTOS, + TOS: s.readCM.TOS, + }, + } } // updateTimestamp sets the timestamp for SIOCGSTAMP. It should be called after diff --git a/pkg/tcpip/checker/checker.go b/pkg/tcpip/checker/checker.go index 2f15bf1f1..542abc99d 100644 --- a/pkg/tcpip/checker/checker.go +++ b/pkg/tcpip/checker/checker.go @@ -33,6 +33,9 @@ type NetworkChecker func(*testing.T, []header.Network) // TransportChecker is a function to check a property of a transport packet. type TransportChecker func(*testing.T, header.Transport) +// ControlMessagesChecker is a function to check a property of ancillary data. +type ControlMessagesChecker func(*testing.T, tcpip.ControlMessages) + // IPv4 checks the validity and properties of the given IPv4 packet. It is // expected to be used in conjunction with other network checkers for specific // properties. For example, to check the source and destination address, one @@ -158,6 +161,19 @@ func FragmentFlags(flags uint8) NetworkChecker { } } +// ReceiveTOS creates a checker that checks the TOS field in ControlMessages. +func ReceiveTOS(want uint8) ControlMessagesChecker { + return func(t *testing.T, cm tcpip.ControlMessages) { + t.Helper() + if !cm.HasTOS { + t.Fatalf("got cm.HasTOS = %t, want cm.TOS = %d", cm.HasTOS, want) + } + if got := cm.TOS; got != want { + t.Fatalf("got cm.TOS = %d, want %d", got, want) + } + } +} + // TOS creates a checker that checks the TOS field. func TOS(tos uint8, label uint32) NetworkChecker { return func(t *testing.T, h []header.Network) { diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index abf73fe33..071221d5a 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -763,7 +763,7 @@ func (n *NIC) RemoveAddressRange(subnet tcpip.Subnet) { n.mu.Unlock() } -// Subnets returns the Subnets associated with this NIC. +// AddressRanges returns the Subnets associated with this NIC. func (n *NIC) AddressRanges() []tcpip.Subnet { n.mu.RLock() defer n.mu.RUnlock() diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index f8d89248e..386eb6eec 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -912,7 +912,7 @@ func (s *Stack) CheckNIC(id tcpip.NICID) bool { return false } -// NICSubnets returns a map of NICIDs to their associated subnets. +// NICAddressRanges returns a map of NICIDs to their associated subnets. func (s *Stack) NICAddressRanges() map[tcpip.NICID][]tcpip.Subnet { s.mu.RLock() defer s.mu.RUnlock() diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 4a090ac86..b7813cbc0 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -322,7 +322,7 @@ type ControlMessages struct { HasTOS bool // TOS is the IPv4 type of service of the associated packet. - TOS int8 + TOS uint8 // HasTClass indicates whether Tclass is valid/set. HasTClass bool @@ -500,9 +500,13 @@ type WriteOptions struct { type SockOptBool int const ( + // ReceiveTOSOption is used by SetSockOpt/GetSockOpt to specify if the TOS + // ancillary message is passed with incoming packets. + ReceiveTOSOption SockOptBool = iota + // V6OnlyOption is used by {G,S}etSockOptBool to specify whether an IPv6 // socket is to be restricted to sending and receiving IPv6 packets only. - V6OnlyOption SockOptBool = iota + V6OnlyOption ) // SockOptInt represents socket options which values have the int type. diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 13446f5d9..c9cbed8f4 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -31,6 +31,7 @@ type udpPacket struct { senderAddress tcpip.FullAddress data buffer.VectorisedView `state:".(buffer.VectorisedView)"` timestamp int64 + tos uint8 } // EndpointState represents the state of a UDP endpoint. @@ -113,6 +114,10 @@ type endpoint struct { // applied while sending packets. Defaults to 0 as on Linux. sendTOS uint8 + // receiveTOS determines if the incoming IPv4 TOS header field is passed + // as ancillary data to ControlMessages on Read. + receiveTOS bool + // shutdownFlags represent the current shutdown state of the endpoint. shutdownFlags tcpip.ShutdownFlags @@ -243,7 +248,18 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess *addr = p.senderAddress } - return p.data.ToView(), tcpip.ControlMessages{HasTimestamp: true, Timestamp: p.timestamp}, nil + cm := tcpip.ControlMessages{ + HasTimestamp: true, + Timestamp: p.timestamp, + } + e.mu.RLock() + receiveTOS := e.receiveTOS + e.mu.RUnlock() + if receiveTOS { + cm.HasTOS = true + cm.TOS = p.tos + } + return p.data.ToView(), cm, nil } // prepareForWrite prepares the endpoint for sending data. In particular, it @@ -458,6 +474,12 @@ func (e *endpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) { // SetSockOptBool implements tcpip.Endpoint.SetSockOptBool. func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error { switch opt { + case tcpip.ReceiveTOSOption: + e.mu.Lock() + e.receiveTOS = v + e.mu.Unlock() + return nil + case tcpip.V6OnlyOption: // We only recognize this option on v6 endpoints. if e.NetProto != header.IPv6ProtocolNumber { @@ -664,15 +686,21 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { // GetSockOptBool implements tcpip.Endpoint.GetSockOptBool. func (e *endpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) { switch opt { + case tcpip.ReceiveTOSOption: + e.mu.RLock() + v := e.receiveTOS + e.mu.RUnlock() + return v, nil + case tcpip.V6OnlyOption: // We only recognize this option on v6 endpoints. if e.NetProto != header.IPv6ProtocolNumber { return false, tcpip.ErrUnknownProtocolOption } - e.mu.Lock() + e.mu.RLock() v := e.v6only - e.mu.Unlock() + e.mu.RUnlock() return v, nil } @@ -1215,6 +1243,12 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk e.rcvList.PushBack(packet) e.rcvBufSize += pkt.Data.Size() + // Save any useful information from the network header to the packet. + switch r.NetProto { + case header.IPv4ProtocolNumber: + packet.tos, _ = header.IPv4(pkt.NetworkHeader).TOS() + } + packet.timestamp = e.stack.NowNanoseconds() e.rcvMu.Unlock() diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go index 0a82bc4fa..ee9d10555 100644 --- a/pkg/tcpip/transport/udp/udp_test.go +++ b/pkg/tcpip/transport/udp/udp_test.go @@ -56,6 +56,7 @@ const ( multicastAddr = "\xe8\x2b\xd3\xea" multicastV6Addr = "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" broadcastAddr = header.IPv4Broadcast + testTOS = 0x80 // defaultMTU is the MTU, in bytes, used throughout the tests, except // where another value is explicitly used. It is chosen to match the MTU @@ -453,6 +454,7 @@ func (c *testContext) injectV4Packet(payload []byte, h *header4Tuple, valid bool ip := header.IPv4(buf) ip.Encode(&header.IPv4Fields{ IHL: header.IPv4MinimumSize, + TOS: testTOS, TotalLength: uint16(len(buf)), TTL: 65, Protocol: uint8(udp.ProtocolNumber), @@ -552,8 +554,8 @@ func TestBindToDeviceOption(t *testing.T) { // testReadInternal sends a packet of the given test flow into the stack by // injecting it into the link endpoint. It then attempts to read it from the // UDP endpoint and depending on if this was expected to succeed verifies its -// correctness. -func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expectReadError bool) { +// correctness including any additional checker functions provided. +func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expectReadError bool, checkers ...checker.ControlMessagesChecker) { c.t.Helper() payload := newPayload() @@ -568,12 +570,12 @@ func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expe epstats := c.ep.Stats().(*tcpip.TransportEndpointStats).Clone() var addr tcpip.FullAddress - v, _, err := c.ep.Read(&addr) + v, cm, err := c.ep.Read(&addr) if err == tcpip.ErrWouldBlock { // Wait for data to become available. select { case <-ch: - v, _, err = c.ep.Read(&addr) + v, cm, err = c.ep.Read(&addr) case <-time.After(300 * time.Millisecond): if packetShouldBeDropped { @@ -606,15 +608,21 @@ func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expe if !bytes.Equal(payload, v) { c.t.Fatalf("bad payload: got %x, want %x", v, payload) } + + // Run any checkers against the ControlMessages. + for _, f := range checkers { + f(c.t, cm) + } + c.checkEndpointReadStats(1, epstats, err) } // testRead sends a packet of the given test flow into the stack by injecting it // into the link endpoint. It then reads it from the UDP endpoint and verifies -// its correctness. -func testRead(c *testContext, flow testFlow) { +// its correctness including any additional checker functions provided. +func testRead(c *testContext, flow testFlow, checkers ...checker.ControlMessagesChecker) { c.t.Helper() - testReadInternal(c, flow, false /* packetShouldBeDropped */, false /* expectReadError */) + testReadInternal(c, flow, false /* packetShouldBeDropped */, false /* expectReadError */, checkers...) } // testFailingRead sends a packet of the given test flow into the stack by @@ -1282,7 +1290,7 @@ func TestTOSV4(t *testing.T) { c.createEndpointForFlow(flow) - const tos = 0xC0 + const tos = testTOS var v tcpip.IPv4TOSOption if err := c.ep.GetSockOpt(&v); err != nil { c.t.Errorf("GetSockopt failed: %s", err) @@ -1317,7 +1325,7 @@ func TestTOSV6(t *testing.T) { c.createEndpointForFlow(flow) - const tos = 0xC0 + const tos = testTOS var v tcpip.IPv6TrafficClassOption if err := c.ep.GetSockOpt(&v); err != nil { c.t.Errorf("GetSockopt failed: %s", err) @@ -1344,6 +1352,47 @@ func TestTOSV6(t *testing.T) { } } +func TestReceiveTOSV4(t *testing.T) { + for _, flow := range []testFlow{unicastV4, broadcast} { + t.Run(fmt.Sprintf("flow:%s", flow), func(t *testing.T) { + c := newDualTestContext(t, defaultMTU) + defer c.cleanup() + + c.createEndpointForFlow(flow) + + // Verify that setting and reading the option works. + v, err := c.ep.GetSockOptBool(tcpip.ReceiveTOSOption) + if err != nil { + c.t.Fatal("GetSockOptBool(tcpip.ReceiveTOSOption) failed:", err) + } + // Test for expected default value. + if v != false { + c.t.Errorf("got GetSockOptBool(tcpip.ReceiveTOSOption) = %t, want = %t", v, false) + } + + want := true + if err := c.ep.SetSockOptBool(tcpip.ReceiveTOSOption, want); err != nil { + c.t.Fatalf("SetSockOptBool(tcpip.ReceiveTOSOption, %t) failed: %s", want, err) + } + + got, err := c.ep.GetSockOptBool(tcpip.ReceiveTOSOption) + if err != nil { + c.t.Fatal("GetSockOptBool(tcpip.ReceiveTOSOption) failed:", err) + } + if got != want { + c.t.Fatalf("got GetSockOptBool(tcpip.ReceiveTOSOption) = %t, want = %t", got, want) + } + + // Verify that the correct received TOS is handed through as + // ancillary data to the ControlMessages struct. + if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil { + c.t.Fatal("Bind failed:", err) + } + testRead(c, flow, checker.ReceiveTOS(testTOS)) + }) + } +} + func TestMulticastInterfaceOption(t *testing.T) { for _, flow := range []testFlow{multicastV4, multicastV4in6, multicastV6, multicastV6Only} { t.Run(fmt.Sprintf("flow:%s", flow), func(t *testing.T) { diff --git a/test/syscalls/linux/socket_ip_udp_generic.cc b/test/syscalls/linux/socket_ip_udp_generic.cc index 66eb68857..53290bed7 100644 --- a/test/syscalls/linux/socket_ip_udp_generic.cc +++ b/test/syscalls/linux/socket_ip_udp_generic.cc @@ -209,6 +209,46 @@ TEST_P(UDPSocketPairTest, SetMulticastLoopChar) { EXPECT_EQ(get, kSockOptOn); } +// Ensure that Receiving TOS is off by default. +TEST_P(UDPSocketPairTest, RecvTosDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +// Test that setting and getting IP_RECVTOS works as expected. +TEST_P(UDPSocketPairTest, SetRecvTos) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, + &kSockOptOff, sizeof(kSockOptOff)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); + + ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + ASSERT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); +} + TEST_P(UDPSocketPairTest, ReuseAddrDefault) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc index dc35c2f50..68e0a8109 100644 --- a/test/syscalls/linux/udp_socket_test_cases.cc +++ b/test/syscalls/linux/udp_socket_test_cases.cc @@ -1349,8 +1349,9 @@ TEST_P(UdpSocketTest, TimestampIoctlPersistence) { // outgoing packets, and that a receiving socket with IP_RECVTOS or // IPV6_RECVTCLASS will create the corresponding control message. TEST_P(UdpSocketTest, SetAndReceiveTOS) { - // TODO(b/68320120): IP_RECVTOS/IPV6_RECVTCLASS not supported for netstack. - SKIP_IF(IsRunningOnGvisor() && !IsRunningWithHostinet()); + // TODO(b/68320120): IPV6_RECVTCLASS not supported for netstack. + SKIP_IF((GetParam() != AddressFamily::kIpv4) && IsRunningOnGvisor() && + !IsRunningWithHostinet()); ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); @@ -1421,7 +1422,8 @@ TEST_P(UdpSocketTest, SetAndReceiveTOS) { // TOS byte on outgoing packets, and that a receiving socket with IP_RECVTOS or // IPV6_RECVTCLASS will create the corresponding control message. TEST_P(UdpSocketTest, SendAndReceiveTOS) { - // TODO(b/68320120): IP_RECVTOS/IPV6_RECVTCLASS not supported for netstack. + // TODO(b/68320120): IPV6_RECVTCLASS not supported for netstack. + // TODO(b/146661005): Setting TOS via cmsg not supported for netstack. SKIP_IF(IsRunningOnGvisor() && !IsRunningWithHostinet()); ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); -- cgit v1.2.3 From a611fdaee3c14abe2222140ae0a8a742ebfd31ab Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Tue, 14 Jan 2020 14:14:17 -0800 Subject: Changes TCP packet dispatch to use a pool of goroutines. All inbound segments for connections in ESTABLISHED state are delivered to the endpoint's queue but for every segment delivered we also queue the endpoint for processing to a selected processor. This ensures that when there are a large number of connections in ESTABLISHED state the inbound packets are all handled by a small number of goroutines and significantly reduces the amount of work the goscheduler has to perform. We let connections in other states follow the current path where the endpoint's goroutine directly handles the segments. Updates #231 PiperOrigin-RevId: 289728325 --- benchmarks/tcp/tcp_proxy.go | 6 +- pkg/sleep/sleep_test.go | 31 +++ pkg/tcpip/stack/transport_demuxer.go | 54 ++++- pkg/tcpip/transport/tcp/BUILD | 15 +- pkg/tcpip/transport/tcp/accept.go | 9 +- pkg/tcpip/transport/tcp/connect.go | 310 ++++++++++++++++------------ pkg/tcpip/transport/tcp/dispatcher.go | 218 +++++++++++++++++++ pkg/tcpip/transport/tcp/endpoint.go | 303 ++++++++++++++++++--------- pkg/tcpip/transport/tcp/endpoint_state.go | 30 +-- pkg/tcpip/transport/tcp/protocol.go | 11 + pkg/tcpip/transport/tcp/rcv.go | 21 +- pkg/tcpip/transport/tcp/snd.go | 14 +- pkg/tcpip/transport/tcp/tcp_test.go | 11 +- test/syscalls/linux/socket_inet_loopback.cc | 2 +- test/syscalls/linux/tcp_socket.cc | 14 ++ 15 files changed, 769 insertions(+), 280 deletions(-) create mode 100644 pkg/tcpip/transport/tcp/dispatcher.go (limited to 'test') diff --git a/benchmarks/tcp/tcp_proxy.go b/benchmarks/tcp/tcp_proxy.go index be0d7bdd6..dc96add66 100644 --- a/benchmarks/tcp/tcp_proxy.go +++ b/benchmarks/tcp/tcp_proxy.go @@ -85,7 +85,7 @@ func (netImpl) printStats() { const ( nicID = 1 // Fixed. - rcvBufSize = 1 << 20 // 1MB. + rcvBufSize = 4 << 20 // 1MB. ) type netstackImpl struct { @@ -130,6 +130,10 @@ func setupNetwork(ifaceName string, numChannels int) (fds []int, err error) { return nil, fmt.Errorf("setsockopt(..., SO_RCVBUF, %v,..) = %v", rcvBufSize, err) } + if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_SNDBUF, rcvBufSize); err != nil { + return nil, fmt.Errorf("setsockopt(..., SO_RCVBUF, %v,..) = %v", rcvBufSize, err) + } + if !*swgso && *gso != 0 { if err := syscall.SetsockoptInt(fd, syscall.SOL_PACKET, unix.PACKET_VNET_HDR, 1); err != nil { return nil, fmt.Errorf("unable to enable the PACKET_VNET_HDR option: %v", err) diff --git a/pkg/sleep/sleep_test.go b/pkg/sleep/sleep_test.go index 130806c86..af47e2ba1 100644 --- a/pkg/sleep/sleep_test.go +++ b/pkg/sleep/sleep_test.go @@ -376,6 +376,37 @@ func TestRace(t *testing.T) { } } +// TestRaceInOrder tests that multiple wakers can continuously send wake requests to +// the sleeper and that the wakers are retrieved in the order asserted. +func TestRaceInOrder(t *testing.T) { + const wakers = 100 + const wakeRequests = 10000 + + w := make([]Waker, wakers) + s := Sleeper{} + + // Associate each waker and start goroutines that will assert them. + for i := range w { + s.AddWaker(&w[i], i) + } + go func() { + n := 0 + for n < wakeRequests { + wk := w[n%len(w)] + wk.Assert() + n++ + } + }() + + // Wait for all wake up notifications from all wakers. + for i := 0; i < wakeRequests; i++ { + v, _ := s.Fetch(true) + if got, want := v, i%wakers; got != want { + t.Fatalf("got %d want %d", got, want) + } + } +} + // BenchmarkSleeperMultiSelect measures how long it takes to fetch a wake up // from 4 wakers when at least one is already asserted. func BenchmarkSleeperMultiSelect(b *testing.B) { diff --git a/pkg/tcpip/stack/transport_demuxer.go b/pkg/tcpip/stack/transport_demuxer.go index f384a91de..d686e6eb8 100644 --- a/pkg/tcpip/stack/transport_demuxer.go +++ b/pkg/tcpip/stack/transport_demuxer.go @@ -104,7 +104,14 @@ func (epsByNic *endpointsByNic) handlePacket(r *Route, id TransportEndpointID, p return } // multiPortEndpoints are guaranteed to have at least one element. - selectEndpoint(id, mpep, epsByNic.seed).HandlePacket(r, id, pkt) + transEP := selectEndpoint(id, mpep, epsByNic.seed) + if queuedProtocol, mustQueue := mpep.demux.queuedProtocols[protocolIDs{mpep.netProto, mpep.transProto}]; mustQueue { + queuedProtocol.QueuePacket(r, transEP, id, pkt) + epsByNic.mu.RUnlock() + return + } + + transEP.HandlePacket(r, id, pkt) epsByNic.mu.RUnlock() // Don't use defer for performance reasons. } @@ -130,7 +137,7 @@ func (epsByNic *endpointsByNic) handleControlPacket(n *NIC, id TransportEndpoint // registerEndpoint returns true if it succeeds. It fails and returns // false if ep already has an element with the same key. -func (epsByNic *endpointsByNic) registerEndpoint(t TransportEndpoint, reusePort bool, bindToDevice tcpip.NICID) *tcpip.Error { +func (epsByNic *endpointsByNic) registerEndpoint(d *transportDemuxer, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, t TransportEndpoint, reusePort bool, bindToDevice tcpip.NICID) *tcpip.Error { epsByNic.mu.Lock() defer epsByNic.mu.Unlock() @@ -140,7 +147,7 @@ func (epsByNic *endpointsByNic) registerEndpoint(t TransportEndpoint, reusePort } // This is a new binding. - multiPortEp := &multiPortEndpoint{} + multiPortEp := &multiPortEndpoint{demux: d, netProto: netProto, transProto: transProto} multiPortEp.endpointsMap = make(map[TransportEndpoint]int) multiPortEp.reuse = reusePort epsByNic.endpoints[bindToDevice] = multiPortEp @@ -168,18 +175,34 @@ func (epsByNic *endpointsByNic) unregisterEndpoint(bindToDevice tcpip.NICID, t T // newTransportDemuxer. type transportDemuxer struct { // protocol is immutable. - protocol map[protocolIDs]*transportEndpoints + protocol map[protocolIDs]*transportEndpoints + queuedProtocols map[protocolIDs]queuedTransportProtocol +} + +// queuedTransportProtocol if supported by a protocol implementation will cause +// the dispatcher to delivery packets to the QueuePacket method instead of +// calling HandlePacket directly on the endpoint. +type queuedTransportProtocol interface { + QueuePacket(r *Route, ep TransportEndpoint, id TransportEndpointID, pkt tcpip.PacketBuffer) } func newTransportDemuxer(stack *Stack) *transportDemuxer { - d := &transportDemuxer{protocol: make(map[protocolIDs]*transportEndpoints)} + d := &transportDemuxer{ + protocol: make(map[protocolIDs]*transportEndpoints), + queuedProtocols: make(map[protocolIDs]queuedTransportProtocol), + } // Add each network and transport pair to the demuxer. for netProto := range stack.networkProtocols { for proto := range stack.transportProtocols { - d.protocol[protocolIDs{netProto, proto}] = &transportEndpoints{ + protoIDs := protocolIDs{netProto, proto} + d.protocol[protoIDs] = &transportEndpoints{ endpoints: make(map[TransportEndpointID]*endpointsByNic), } + qTransProto, isQueued := (stack.transportProtocols[proto].proto).(queuedTransportProtocol) + if isQueued { + d.queuedProtocols[protoIDs] = qTransProto + } } } @@ -209,7 +232,11 @@ func (d *transportDemuxer) registerEndpoint(netProtos []tcpip.NetworkProtocolNum // // +stateify savable type multiPortEndpoint struct { - mu sync.RWMutex `state:"nosave"` + mu sync.RWMutex `state:"nosave"` + demux *transportDemuxer + netProto tcpip.NetworkProtocolNumber + transProto tcpip.TransportProtocolNumber + endpointsArr []TransportEndpoint endpointsMap map[TransportEndpoint]int // reuse indicates if more than one endpoint is allowed. @@ -258,13 +285,22 @@ func selectEndpoint(id TransportEndpointID, mpep *multiPortEndpoint, seed uint32 func (ep *multiPortEndpoint) handlePacketAll(r *Route, id TransportEndpointID, pkt tcpip.PacketBuffer) { ep.mu.RLock() + queuedProtocol, mustQueue := ep.demux.queuedProtocols[protocolIDs{ep.netProto, ep.transProto}] for i, endpoint := range ep.endpointsArr { // HandlePacket takes ownership of pkt, so each endpoint needs // its own copy except for the final one. if i == len(ep.endpointsArr)-1 { + if mustQueue { + queuedProtocol.QueuePacket(r, endpoint, id, pkt) + break + } endpoint.HandlePacket(r, id, pkt) break } + if mustQueue { + queuedProtocol.QueuePacket(r, endpoint, id, pkt.Clone()) + continue + } endpoint.HandlePacket(r, id, pkt.Clone()) } ep.mu.RUnlock() // Don't use defer for performance reasons. @@ -357,7 +393,7 @@ func (d *transportDemuxer) singleRegisterEndpoint(netProto tcpip.NetworkProtocol if epsByNic, ok := eps.endpoints[id]; ok { // There was already a binding. - return epsByNic.registerEndpoint(ep, reusePort, bindToDevice) + return epsByNic.registerEndpoint(d, netProto, protocol, ep, reusePort, bindToDevice) } // This is a new binding. @@ -367,7 +403,7 @@ func (d *transportDemuxer) singleRegisterEndpoint(netProto tcpip.NetworkProtocol } eps.endpoints[id] = epsByNic - return epsByNic.registerEndpoint(ep, reusePort, bindToDevice) + return epsByNic.registerEndpoint(d, netProto, protocol, ep, reusePort, bindToDevice) } // unregisterEndpoint unregisters the endpoint with the given id such that it diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD index 353bd06f4..0e3ab05ad 100644 --- a/pkg/tcpip/transport/tcp/BUILD +++ b/pkg/tcpip/transport/tcp/BUILD @@ -16,6 +16,18 @@ go_template_instance( }, ) +go_template_instance( + name = "tcp_endpoint_list", + out = "tcp_endpoint_list.go", + package = "tcp", + prefix = "endpoint", + template = "//pkg/ilist:generic_list", + types = { + "Element": "*endpoint", + "Linker": "*endpoint", + }, +) + go_library( name = "tcp", srcs = [ @@ -23,6 +35,7 @@ go_library( "connect.go", "cubic.go", "cubic_state.go", + "dispatcher.go", "endpoint.go", "endpoint_state.go", "forwarder.go", @@ -38,6 +51,7 @@ go_library( "segment_state.go", "snd.go", "snd_state.go", + "tcp_endpoint_list.go", "tcp_segment_list.go", "timer.go", ], @@ -45,7 +59,6 @@ go_library( imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"], visibility = ["//visibility:public"], deps = [ - "//pkg/log", "//pkg/rand", "//pkg/sleep", "//pkg/sync", diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go index 1ea996936..1a2e3efa9 100644 --- a/pkg/tcpip/transport/tcp/accept.go +++ b/pkg/tcpip/transport/tcp/accept.go @@ -285,7 +285,7 @@ func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *head // listenEP is nil when listenContext is used by tcp.Forwarder. if l.listenEP != nil { l.listenEP.mu.Lock() - if l.listenEP.state != StateListen { + if l.listenEP.EndpointState() != StateListen { l.listenEP.mu.Unlock() return nil, tcpip.ErrConnectionAborted } @@ -344,11 +344,12 @@ func (l *listenContext) closeAllPendingEndpoints() { // instead. func (e *endpoint) deliverAccepted(n *endpoint) { e.mu.Lock() - state := e.state + state := e.EndpointState() e.pendingAccepted.Add(1) defer e.pendingAccepted.Done() acceptedChan := e.acceptedChan e.mu.Unlock() + if state == StateListen { acceptedChan <- n e.waiterQueue.Notify(waiter.EventIn) @@ -562,8 +563,8 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) { // We do not use transitionToStateEstablishedLocked here as there is // no handshake state available when doing a SYN cookie based accept. n.stack.Stats().TCP.CurrentEstablished.Increment() - n.state = StateEstablished n.isConnectNotified = true + n.setEndpointState(StateEstablished) // Do the delivery in a separate goroutine so // that we don't block the listen loop in case @@ -596,7 +597,7 @@ func (e *endpoint) protocolListenLoop(rcvWnd seqnum.Size) *tcpip.Error { // handleSynSegment() from attempting to queue new connections // to the endpoint. e.mu.Lock() - e.state = StateClose + e.setEndpointState(StateClose) // close any endpoints in SYN-RCVD state. ctx.closeAllPendingEndpoints() diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index 613ec1775..f3896715b 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -190,7 +190,7 @@ func (h *handshake) resetToSynRcvd(iss seqnum.Value, irs seqnum.Value, opts *hea h.mss = opts.MSS h.sndWndScale = opts.WS h.ep.mu.Lock() - h.ep.state = StateSynRecv + h.ep.setEndpointState(StateSynRecv) h.ep.mu.Unlock() } @@ -274,14 +274,14 @@ func (h *handshake) synSentState(s *segment) *tcpip.Error { // SYN-RCVD state. h.state = handshakeSynRcvd h.ep.mu.Lock() - h.ep.state = StateSynRecv ttl := h.ep.ttl + h.ep.setEndpointState(StateSynRecv) h.ep.mu.Unlock() synOpts := header.TCPSynOptions{ WS: int(h.effectiveRcvWndScale()), TS: rcvSynOpts.TS, TSVal: h.ep.timestamp(), - TSEcr: h.ep.recentTS, + TSEcr: h.ep.recentTimestamp(), // We only send SACKPermitted if the other side indicated it // permits SACK. This is not explicitly defined in the RFC but @@ -341,7 +341,7 @@ func (h *handshake) synRcvdState(s *segment) *tcpip.Error { WS: h.rcvWndScale, TS: h.ep.sendTSOk, TSVal: h.ep.timestamp(), - TSEcr: h.ep.recentTS, + TSEcr: h.ep.recentTimestamp(), SACKPermitted: h.ep.sackPermitted, MSS: h.ep.amss, } @@ -501,7 +501,7 @@ func (h *handshake) execute() *tcpip.Error { WS: h.rcvWndScale, TS: true, TSVal: h.ep.timestamp(), - TSEcr: h.ep.recentTS, + TSEcr: h.ep.recentTimestamp(), SACKPermitted: bool(sackEnabled), MSS: h.ep.amss, } @@ -792,7 +792,7 @@ func (e *endpoint) makeOptions(sackBlocks []header.SACKBlock) []byte { // Ref: https://tools.ietf.org/html/rfc7323#section-5.4. offset += header.EncodeNOP(options[offset:]) offset += header.EncodeNOP(options[offset:]) - offset += header.EncodeTSOption(e.timestamp(), uint32(e.recentTS), options[offset:]) + offset += header.EncodeTSOption(e.timestamp(), e.recentTimestamp(), options[offset:]) } if e.sackPermitted && len(sackBlocks) > 0 { offset += header.EncodeNOP(options[offset:]) @@ -811,7 +811,7 @@ func (e *endpoint) makeOptions(sackBlocks []header.SACKBlock) []byte { // sendRaw sends a TCP segment to the endpoint's peer. func (e *endpoint) sendRaw(data buffer.VectorisedView, flags byte, seq, ack seqnum.Value, rcvWnd seqnum.Size) *tcpip.Error { var sackBlocks []header.SACKBlock - if e.state == StateEstablished && e.rcv.pendingBufSize > 0 && (flags&header.TCPFlagAck != 0) { + if e.EndpointState() == StateEstablished && e.rcv.pendingBufSize > 0 && (flags&header.TCPFlagAck != 0) { sackBlocks = e.sack.Blocks[:e.sack.NumBlocks] } options := e.makeOptions(sackBlocks) @@ -848,6 +848,9 @@ func (e *endpoint) handleWrite() *tcpip.Error { } func (e *endpoint) handleClose() *tcpip.Error { + if !e.EndpointState().connected() { + return nil + } // Drain the send queue. e.handleWrite() @@ -864,11 +867,7 @@ func (e *endpoint) handleClose() *tcpip.Error { func (e *endpoint) resetConnectionLocked(err *tcpip.Error) { // Only send a reset if the connection is being aborted for a reason // other than receiving a reset. - if e.state == StateEstablished || e.state == StateCloseWait { - e.stack.Stats().TCP.EstablishedResets.Increment() - e.stack.Stats().TCP.CurrentEstablished.Decrement() - } - e.state = StateError + e.setEndpointState(StateError) e.HardError = err if err != tcpip.ErrConnectionReset && err != tcpip.ErrTimeout { // The exact sequence number to be used for the RST is the same as the @@ -888,9 +887,12 @@ func (e *endpoint) resetConnectionLocked(err *tcpip.Error) { } // completeWorkerLocked is called by the worker goroutine when it's about to -// exit. It marks the worker as completed and performs cleanup work if requested -// by Close(). +// exit. func (e *endpoint) completeWorkerLocked() { + // Worker is terminating(either due to moving to + // CLOSED or ERROR state, ensure we release all + // registrations port reservations even if the socket + // itself is not yet closed by the application. e.workerRunning = false if e.workerCleanup { e.cleanupLocked() @@ -917,8 +919,7 @@ func (e *endpoint) transitionToStateEstablishedLocked(h *handshake) { e.rcvAutoParams.prevCopied = int(h.rcvWnd) e.rcvListMu.Unlock() } - h.ep.stack.Stats().TCP.CurrentEstablished.Increment() - e.state = StateEstablished + e.setEndpointState(StateEstablished) } // transitionToStateCloseLocked ensures that the endpoint is @@ -927,11 +928,12 @@ func (e *endpoint) transitionToStateEstablishedLocked(h *handshake) { // delivered to this endpoint from the demuxer when the endpoint // is transitioned to StateClose. func (e *endpoint) transitionToStateCloseLocked() { - if e.state == StateClose { + if e.EndpointState() == StateClose { return } + // Mark the endpoint as fully closed for reads/writes. e.cleanupLocked() - e.state = StateClose + e.setEndpointState(StateClose) e.stack.Stats().TCP.EstablishedClosed.Increment() } @@ -946,7 +948,9 @@ func (e *endpoint) tryDeliverSegmentFromClosedEndpoint(s *segment) { s.decRef() return } - ep.(*endpoint).enqueueSegment(s) + if ep.(*endpoint).enqueueSegment(s) { + ep.(*endpoint).newSegmentWaker.Assert() + } } func (e *endpoint) handleReset(s *segment) (ok bool, err *tcpip.Error) { @@ -955,9 +959,8 @@ func (e *endpoint) handleReset(s *segment) (ok bool, err *tcpip.Error) { // except SYN-SENT, all reset (RST) segments are // validated by checking their SEQ-fields." So // we only process it if it's acceptable. - s.decRef() e.mu.Lock() - switch e.state { + switch e.EndpointState() { // In case of a RST in CLOSE-WAIT linux moves // the socket to closed state with an error set // to indicate EPIPE. @@ -981,103 +984,57 @@ func (e *endpoint) handleReset(s *segment) (ok bool, err *tcpip.Error) { e.transitionToStateCloseLocked() e.HardError = tcpip.ErrAborted e.mu.Unlock() + e.notifyProtocolGoroutine(notifyTickleWorker) return false, nil default: e.mu.Unlock() + // RFC 793, page 37 states that "in all states + // except SYN-SENT, all reset (RST) segments are + // validated by checking their SEQ-fields." So + // we only process it if it's acceptable. + + // Notify protocol goroutine. This is required when + // handleSegment is invoked from the processor goroutine + // rather than the worker goroutine. + e.notifyProtocolGoroutine(notifyResetByPeer) return false, tcpip.ErrConnectionReset } } return true, nil } -// handleSegments pulls segments from the queue and processes them. It returns -// no error if the protocol loop should continue, an error otherwise. -func (e *endpoint) handleSegments() *tcpip.Error { +// handleSegments processes all inbound segments. +func (e *endpoint) handleSegments(fastPath bool) *tcpip.Error { checkRequeue := true for i := 0; i < maxSegmentsPerWake; i++ { + if e.EndpointState() == StateClose || e.EndpointState() == StateError { + return nil + } s := e.segmentQueue.dequeue() if s == nil { checkRequeue = false break } - // Invoke the tcp probe if installed. - if e.probe != nil { - e.probe(e.completeState()) + cont, err := e.handleSegment(s) + if err != nil { + s.decRef() + e.mu.Lock() + e.setEndpointState(StateError) + e.HardError = err + e.mu.Unlock() + return err } - - if s.flagIsSet(header.TCPFlagRst) { - if ok, err := e.handleReset(s); !ok { - return err - } - } else if s.flagIsSet(header.TCPFlagSyn) { - // See: https://tools.ietf.org/html/rfc5961#section-4.1 - // 1) If the SYN bit is set, irrespective of the sequence number, TCP - // MUST send an ACK (also referred to as challenge ACK) to the remote - // peer: - // - // - // - // After sending the acknowledgment, TCP MUST drop the unacceptable - // segment and stop processing further. - // - // By sending an ACK, the remote peer is challenged to confirm the loss - // of the previous connection and the request to start a new connection. - // A legitimate peer, after restart, would not have a TCB in the - // synchronized state. Thus, when the ACK arrives, the peer should send - // a RST segment back with the sequence number derived from the ACK - // field that caused the RST. - - // This RST will confirm that the remote peer has indeed closed the - // previous connection. Upon receipt of a valid RST, the local TCP - // endpoint MUST terminate its connection. The local TCP endpoint - // should then rely on SYN retransmission from the remote end to - // re-establish the connection. - - e.snd.sendAck() - } else if s.flagIsSet(header.TCPFlagAck) { - // Patch the window size in the segment according to the - // send window scale. - s.window <<= e.snd.sndWndScale - - // RFC 793, page 41 states that "once in the ESTABLISHED - // state all segments must carry current acknowledgment - // information." - drop, err := e.rcv.handleRcvdSegment(s) - if err != nil { - s.decRef() - return err - } - if drop { - s.decRef() - continue - } - - // Now check if the received segment has caused us to transition - // to a CLOSED state, if yes then terminate processing and do - // not invoke the sender. - e.mu.RLock() - state := e.state - e.mu.RUnlock() - if state == StateClose { - // When we get into StateClose while processing from the queue, - // return immediately and let the protocolMainloop handle it. - // - // We can reach StateClose only while processing a previous segment - // or a notification from the protocolMainLoop (caller goroutine). - // This means that with this return, the segment dequeue below can - // never occur on a closed endpoint. - s.decRef() - return nil - } - e.snd.handleRcvdSegment(s) + if !cont { + s.decRef() + return nil } - s.decRef() } - // If the queue is not empty, make sure we'll wake up in the next - // iteration. - if checkRequeue && !e.segmentQueue.empty() { + // When fastPath is true we don't want to wake up the worker + // goroutine. If the endpoint has more segments to process the + // dispatcher will call handleSegments again anyway. + if !fastPath && checkRequeue && !e.segmentQueue.empty() { e.newSegmentWaker.Assert() } @@ -1086,11 +1043,88 @@ func (e *endpoint) handleSegments() *tcpip.Error { e.snd.sendAck() } - e.resetKeepaliveTimer(true) + e.resetKeepaliveTimer(true /* receivedData */) return nil } +// handleSegment handles a given segment and notifies the worker goroutine if +// if the connection should be terminated. +func (e *endpoint) handleSegment(s *segment) (cont bool, err *tcpip.Error) { + // Invoke the tcp probe if installed. + if e.probe != nil { + e.probe(e.completeState()) + } + + if s.flagIsSet(header.TCPFlagRst) { + if ok, err := e.handleReset(s); !ok { + return false, err + } + } else if s.flagIsSet(header.TCPFlagSyn) { + // See: https://tools.ietf.org/html/rfc5961#section-4.1 + // 1) If the SYN bit is set, irrespective of the sequence number, TCP + // MUST send an ACK (also referred to as challenge ACK) to the remote + // peer: + // + // + // + // After sending the acknowledgment, TCP MUST drop the unacceptable + // segment and stop processing further. + // + // By sending an ACK, the remote peer is challenged to confirm the loss + // of the previous connection and the request to start a new connection. + // A legitimate peer, after restart, would not have a TCB in the + // synchronized state. Thus, when the ACK arrives, the peer should send + // a RST segment back with the sequence number derived from the ACK + // field that caused the RST. + + // This RST will confirm that the remote peer has indeed closed the + // previous connection. Upon receipt of a valid RST, the local TCP + // endpoint MUST terminate its connection. The local TCP endpoint + // should then rely on SYN retransmission from the remote end to + // re-establish the connection. + + e.snd.sendAck() + } else if s.flagIsSet(header.TCPFlagAck) { + // Patch the window size in the segment according to the + // send window scale. + s.window <<= e.snd.sndWndScale + + // RFC 793, page 41 states that "once in the ESTABLISHED + // state all segments must carry current acknowledgment + // information." + drop, err := e.rcv.handleRcvdSegment(s) + if err != nil { + return false, err + } + if drop { + return true, nil + } + + // Now check if the received segment has caused us to transition + // to a CLOSED state, if yes then terminate processing and do + // not invoke the sender. + e.mu.RLock() + state := e.state + e.mu.RUnlock() + if state == StateClose { + // When we get into StateClose while processing from the queue, + // return immediately and let the protocolMainloop handle it. + // + // We can reach StateClose only while processing a previous segment + // or a notification from the protocolMainLoop (caller goroutine). + // This means that with this return, the segment dequeue below can + // never occur on a closed endpoint. + s.decRef() + return false, nil + } + + e.snd.handleRcvdSegment(s) + } + + return true, nil +} + // keepaliveTimerExpired is called when the keepaliveTimer fires. We send TCP // keepalive packets periodically when the connection is idle. If we don't hear // from the other side after a number of tries, we terminate the connection. @@ -1160,7 +1194,7 @@ func (e *endpoint) disableKeepaliveTimer() { // protocolMainLoop is the main loop of the TCP protocol. It runs in its own // goroutine and is responsible for sending segments and handling received // segments. -func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { +func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{}) *tcpip.Error { var closeTimer *time.Timer var closeWaker sleep.Waker @@ -1182,6 +1216,7 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { } e.mu.Unlock() + e.workMu.Unlock() // When the protocol loop exits we should wake up our waiters. e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut) } @@ -1193,7 +1228,7 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { initialRcvWnd := e.initialReceiveWindow() h := newHandshake(e, seqnum.Size(initialRcvWnd)) e.mu.Lock() - h.ep.state = StateSynSent + h.ep.setEndpointState(StateSynSent) e.mu.Unlock() if err := h.execute(); err != nil { @@ -1202,12 +1237,11 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { e.lastErrorMu.Unlock() e.mu.Lock() - e.state = StateError + e.setEndpointState(StateError) e.HardError = err // Lock released below. epilogue() - return err } } @@ -1215,7 +1249,6 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { e.keepalive.timer.init(&e.keepalive.waker) defer e.keepalive.timer.cleanup() - // Tell waiters that the endpoint is connected and writable. e.mu.Lock() drained := e.drainDone != nil e.mu.Unlock() @@ -1224,8 +1257,6 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { <-e.undrain } - e.waiterQueue.Notify(waiter.EventOut) - // Set up the functions that will be called when the main protocol loop // wakes up. funcs := []struct { @@ -1240,18 +1271,15 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { w: &e.sndCloseWaker, f: e.handleClose, }, - { - w: &e.newSegmentWaker, - f: e.handleSegments, - }, { w: &closeWaker, f: func() *tcpip.Error { // This means the socket is being closed due - // to the TCP_FIN_WAIT2 timeout was hit. Just + // to the TCP-FIN-WAIT2 timeout was hit. Just // mark the socket as closed. e.mu.Lock() e.transitionToStateCloseLocked() + e.workerCleanup = true e.mu.Unlock() return nil }, @@ -1266,6 +1294,12 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { return nil }, }, + { + w: &e.newSegmentWaker, + f: func() *tcpip.Error { + return e.handleSegments(false /* fastPath */) + }, + }, { w: &e.keepalive.waker, f: e.keepaliveTimerExpired, @@ -1293,14 +1327,16 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { } if n¬ifyReset != 0 { - e.mu.Lock() - e.resetConnectionLocked(tcpip.ErrConnectionAborted) - e.mu.Unlock() + return tcpip.ErrConnectionAborted + } + + if n¬ifyResetByPeer != 0 { + return tcpip.ErrConnectionReset } if n¬ifyClose != 0 && closeTimer == nil { e.mu.Lock() - if e.state == StateFinWait2 && e.closed { + if e.EndpointState() == StateFinWait2 && e.closed { // The socket has been closed and we are in FIN_WAIT2 // so start the FIN_WAIT2 timer. closeTimer = time.AfterFunc(e.tcpLingerTimeout, func() { @@ -1320,11 +1356,11 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { if n¬ifyDrain != 0 { for !e.segmentQueue.empty() { - if err := e.handleSegments(); err != nil { + if err := e.handleSegments(false /* fastPath */); err != nil { return err } } - if e.state != StateClose && e.state != StateError { + if e.EndpointState() != StateClose && e.EndpointState() != StateError { // Only block the worker if the endpoint // is not in closed state or error state. close(e.drainDone) @@ -1349,14 +1385,21 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { s.AddWaker(funcs[i].w, i) } + // Notify the caller that the waker initialization is complete and the + // endpoint is ready. + if wakerInitDone != nil { + close(wakerInitDone) + } + + // Tell waiters that the endpoint is connected and writable. + e.waiterQueue.Notify(waiter.EventOut) + // The following assertions and notifications are needed for restored // endpoints. Fresh newly created endpoints have empty states and should // not invoke any. - e.segmentQueue.mu.Lock() - if !e.segmentQueue.list.Empty() { + if !e.segmentQueue.empty() { e.newSegmentWaker.Assert() } - e.segmentQueue.mu.Unlock() e.rcvListMu.Lock() if !e.rcvList.Empty() { @@ -1372,27 +1415,32 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { // Main loop. Handle segments until both send and receive ends of the // connection have completed. - for e.state != StateTimeWait && e.state != StateClose && e.state != StateError { + for e.EndpointState() != StateTimeWait && e.EndpointState() != StateClose && e.EndpointState() != StateError { e.mu.Unlock() e.workMu.Unlock() v, _ := s.Fetch(true) e.workMu.Lock() + // We need to double check here because the notification + // maybe stale by the time we got around to processing it. + // NOTE: since we now hold the workMu the processors cannot + // change the state of the endpoint so it' safe to proceed + // after this check. + if e.EndpointState() == StateTimeWait || e.EndpointState() == StateClose || e.EndpointState() == StateError { + e.mu.Lock() + break + } if err := funcs[v].f(); err != nil { e.mu.Lock() - // Ensure we release all endpoint registration and route - // references as the connection is now in an error - // state. e.workerCleanup = true e.resetConnectionLocked(err) // Lock released below. epilogue() - return nil } e.mu.Lock() } - state := e.state + state := e.EndpointState() e.mu.Unlock() var reuseTW func() if state == StateTimeWait { @@ -1405,13 +1453,15 @@ func (e *endpoint) protocolMainLoop(handshake bool) *tcpip.Error { s.Done() // Wake up any waiters before we enter TIME_WAIT. e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut) + e.mu.Lock() + e.workerCleanup = true + e.mu.Unlock() reuseTW = e.doTimeWait() } // Mark endpoint as closed. e.mu.Lock() - if e.state != StateError { - e.stack.Stats().TCP.CurrentEstablished.Decrement() + if e.EndpointState() != StateError { e.transitionToStateCloseLocked() } @@ -1468,7 +1518,11 @@ func (e *endpoint) handleTimeWaitSegments() (extendTimeWait bool, reuseTW func() tcpEP := listenEP.(*endpoint) if EndpointState(tcpEP.State()) == StateListen { reuseTW = func() { - tcpEP.enqueueSegment(s) + if !tcpEP.enqueueSegment(s) { + s.decRef() + return + } + tcpEP.newSegmentWaker.Assert() } // We explicitly do not decRef // the segment as it's still diff --git a/pkg/tcpip/transport/tcp/dispatcher.go b/pkg/tcpip/transport/tcp/dispatcher.go new file mode 100644 index 000000000..a72f0c379 --- /dev/null +++ b/pkg/tcpip/transport/tcp/dispatcher.go @@ -0,0 +1,218 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp + +import ( + "gvisor.dev/gvisor/pkg/rand" + "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/hash/jenkins" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" +) + +// epQueue is a queue of endpoints. +type epQueue struct { + mu sync.Mutex + list endpointList +} + +// enqueue adds e to the queue if the endpoint is not already on the queue. +func (q *epQueue) enqueue(e *endpoint) { + q.mu.Lock() + if e.pendingProcessing { + q.mu.Unlock() + return + } + q.list.PushBack(e) + e.pendingProcessing = true + q.mu.Unlock() +} + +// dequeue removes and returns the first element from the queue if available, +// returns nil otherwise. +func (q *epQueue) dequeue() *endpoint { + q.mu.Lock() + if e := q.list.Front(); e != nil { + q.list.Remove(e) + e.pendingProcessing = false + q.mu.Unlock() + return e + } + q.mu.Unlock() + return nil +} + +// empty returns true if the queue is empty, false otherwise. +func (q *epQueue) empty() bool { + q.mu.Lock() + v := q.list.Empty() + q.mu.Unlock() + return v +} + +// processor is responsible for processing packets queued to a tcp endpoint. +type processor struct { + epQ epQueue + newEndpointWaker sleep.Waker + id int +} + +func newProcessor(id int) *processor { + p := &processor{ + id: id, + } + go p.handleSegments() + return p +} + +func (p *processor) queueEndpoint(ep *endpoint) { + // Queue an endpoint for processing by the processor goroutine. + p.epQ.enqueue(ep) + p.newEndpointWaker.Assert() +} + +func (p *processor) handleSegments() { + const newEndpointWaker = 1 + s := sleep.Sleeper{} + s.AddWaker(&p.newEndpointWaker, newEndpointWaker) + defer s.Done() + for { + s.Fetch(true) + for ep := p.epQ.dequeue(); ep != nil; ep = p.epQ.dequeue() { + if ep.segmentQueue.empty() { + continue + } + + // If socket has transitioned out of connected state + // then just let the worker handle the packet. + // + // NOTE: We read this outside of e.mu lock which means + // that by the time we get to handleSegments the + // endpoint may not be in ESTABLISHED. But this should + // be fine as all normal shutdown states are handled by + // handleSegments and if the endpoint moves to a + // CLOSED/ERROR state then handleSegments is a noop. + if ep.EndpointState() != StateEstablished { + ep.newSegmentWaker.Assert() + continue + } + + if !ep.workMu.TryLock() { + ep.newSegmentWaker.Assert() + continue + } + // If the endpoint is in a connected state then we do + // direct delivery to ensure low latency and avoid + // scheduler interactions. + if err := ep.handleSegments(true /* fastPath */); err != nil || ep.EndpointState() == StateClose { + ep.notifyProtocolGoroutine(notifyTickleWorker) + ep.workMu.Unlock() + continue + } + + if !ep.segmentQueue.empty() { + p.epQ.enqueue(ep) + } + + ep.workMu.Unlock() + } + } +} + +// dispatcher manages a pool of TCP endpoint processors which are responsible +// for the processing of inbound segments. This fixed pool of processor +// goroutines do full tcp processing. The processor is selected based on the +// hash of the endpoint id to ensure that delivery for the same endpoint happens +// in-order. +type dispatcher struct { + processors []*processor + seed uint32 +} + +func newDispatcher(nProcessors int) *dispatcher { + processors := []*processor{} + for i := 0; i < nProcessors; i++ { + processors = append(processors, newProcessor(i)) + } + return &dispatcher{ + processors: processors, + seed: generateRandUint32(), + } +} + +func (d *dispatcher) queuePacket(r *stack.Route, stackEP stack.TransportEndpoint, id stack.TransportEndpointID, pkt tcpip.PacketBuffer) { + ep := stackEP.(*endpoint) + s := newSegment(r, id, pkt) + if !s.parse() { + ep.stack.Stats().MalformedRcvdPackets.Increment() + ep.stack.Stats().TCP.InvalidSegmentsReceived.Increment() + ep.stats.ReceiveErrors.MalformedPacketsReceived.Increment() + s.decRef() + return + } + + if !s.csumValid { + ep.stack.Stats().MalformedRcvdPackets.Increment() + ep.stack.Stats().TCP.ChecksumErrors.Increment() + ep.stats.ReceiveErrors.ChecksumErrors.Increment() + s.decRef() + return + } + + ep.stack.Stats().TCP.ValidSegmentsReceived.Increment() + ep.stats.SegmentsReceived.Increment() + if (s.flags & header.TCPFlagRst) != 0 { + ep.stack.Stats().TCP.ResetsReceived.Increment() + } + + if !ep.enqueueSegment(s) { + s.decRef() + return + } + + // For sockets not in established state let the worker goroutine + // handle the packets. + if ep.EndpointState() != StateEstablished { + ep.newSegmentWaker.Assert() + return + } + + d.selectProcessor(id).queueEndpoint(ep) +} + +func generateRandUint32() uint32 { + b := make([]byte, 4) + if _, err := rand.Read(b); err != nil { + panic(err) + } + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func (d *dispatcher) selectProcessor(id stack.TransportEndpointID) *processor { + payload := []byte{ + byte(id.LocalPort), + byte(id.LocalPort >> 8), + byte(id.RemotePort), + byte(id.RemotePort >> 8)} + + h := jenkins.Sum32(d.seed) + h.Write(payload) + h.Write([]byte(id.LocalAddress)) + h.Write([]byte(id.RemoteAddress)) + + return d.processors[h.Sum32()%uint32(len(d.processors))] +} diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index cc8b533c8..1799c6e10 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -120,6 +120,7 @@ const ( notifyMTUChanged notifyDrain notifyReset + notifyResetByPeer notifyKeepaliveChanged notifyMSSChanged // notifyTickleWorker is used to tickle the protocol main loop during a @@ -127,6 +128,7 @@ const ( // ensures the loop terminates if the final state of the endpoint is // say TIME_WAIT. notifyTickleWorker + notifyError ) // SACKInfo holds TCP SACK related information for a given endpoint. @@ -283,6 +285,18 @@ func (*EndpointInfo) IsEndpointInfo() {} type endpoint struct { EndpointInfo + // endpointEntry is used to queue endpoints for processing to the + // a given tcp processor goroutine. + // + // Precondition: epQueue.mu must be held to read/write this field.. + endpointEntry `state:"nosave"` + + // pendingProcessing is true if this endpoint is queued for processing + // to a TCP processor. + // + // Precondition: epQueue.mu must be held to read/write this field.. + pendingProcessing bool `state:"nosave"` + // workMu is used to arbitrate which goroutine may perform protocol // work. Only the main protocol goroutine is expected to call Lock() on // it, but other goroutines (e.g., send) may call TryLock() to eagerly @@ -324,6 +338,7 @@ type endpoint struct { // The following fields are protected by the mutex. mu sync.RWMutex `state:"nosave"` + // state must be read/set using the EndpointState()/setEndpointState() methods. state EndpointState `state:".(EndpointState)"` // origEndpointState is only used during a restore phase to save the @@ -359,7 +374,7 @@ type endpoint struct { workerRunning bool // workerCleanup specifies if the worker goroutine must perform cleanup - // before exitting. This can only be set to true when workerRunning is + // before exiting. This can only be set to true when workerRunning is // also true, and they're both protected by the mutex. workerCleanup bool @@ -371,6 +386,8 @@ type endpoint struct { // recentTS is the timestamp that should be sent in the TSEcr field of // the timestamp for future segments sent by the endpoint. This field is // updated if required when a new segment is received by this endpoint. + // + // recentTS must be read/written atomically. recentTS uint32 // tsOffset is a randomized offset added to the value of the @@ -567,6 +584,47 @@ func (e *endpoint) ResumeWork() { e.workMu.Unlock() } +// setEndpointState updates the state of the endpoint to state atomically. This +// method is unexported as the only place we should update the state is in this +// package but we allow the state to be read freely without holding e.mu. +// +// Precondition: e.mu must be held to call this method. +func (e *endpoint) setEndpointState(state EndpointState) { + oldstate := EndpointState(atomic.LoadUint32((*uint32)(&e.state))) + switch state { + case StateEstablished: + e.stack.Stats().TCP.CurrentEstablished.Increment() + case StateError: + fallthrough + case StateClose: + if oldstate == StateCloseWait || oldstate == StateEstablished { + e.stack.Stats().TCP.EstablishedResets.Increment() + } + fallthrough + default: + if oldstate == StateEstablished { + e.stack.Stats().TCP.CurrentEstablished.Decrement() + } + } + atomic.StoreUint32((*uint32)(&e.state), uint32(state)) +} + +// EndpointState returns the current state of the endpoint. +func (e *endpoint) EndpointState() EndpointState { + return EndpointState(atomic.LoadUint32((*uint32)(&e.state))) +} + +// setRecentTimestamp atomically sets the recentTS field to the +// provided value. +func (e *endpoint) setRecentTimestamp(recentTS uint32) { + atomic.StoreUint32(&e.recentTS, recentTS) +} + +// recentTimestamp atomically reads and returns the value of the recentTS field. +func (e *endpoint) recentTimestamp() uint32 { + return atomic.LoadUint32(&e.recentTS) +} + // keepalive is a synchronization wrapper used to appease stateify. See the // comment in endpoint, where it is used. // @@ -656,7 +714,7 @@ func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask { e.mu.RLock() defer e.mu.RUnlock() - switch e.state { + switch e.EndpointState() { case StateInitial, StateBound, StateConnecting, StateSynSent, StateSynRecv: // Ready for nothing. @@ -672,7 +730,7 @@ func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask { } } } - if e.state.connected() { + if e.EndpointState().connected() { // Determine if the endpoint is writable if requested. if (mask & waiter.EventOut) != 0 { e.sndBufMu.Lock() @@ -733,14 +791,20 @@ func (e *endpoint) Close() { // Issue a shutdown so that the peer knows we won't send any more data // if we're connected, or stop accepting if we're listening. e.Shutdown(tcpip.ShutdownWrite | tcpip.ShutdownRead) + e.closeNoShutdown() +} +// closeNoShutdown closes the endpoint without doing a full shutdown. This is +// used when a connection needs to be aborted with a RST and we want to skip +// a full 4 way TCP shutdown. +func (e *endpoint) closeNoShutdown() { e.mu.Lock() // For listening sockets, we always release ports inline so that they // are immediately available for reuse after Close() is called. If also // registered, we unregister as well otherwise the next user would fail // in Listen() when trying to register. - if e.state == StateListen && e.isPortReserved { + if e.EndpointState() == StateListen && e.isPortReserved { if e.isRegistered { e.stack.StartTransportEndpointCleanup(e.boundNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, e.boundBindToDevice) e.isRegistered = false @@ -780,6 +844,8 @@ func (e *endpoint) closePendingAcceptableConnectionsLocked() { defer close(done) for n := range e.acceptedChan { n.notifyProtocolGoroutine(notifyReset) + // close all connections that have completed but + // not accepted by the application. n.Close() } }() @@ -797,11 +863,13 @@ func (e *endpoint) closePendingAcceptableConnectionsLocked() { // after Close() is called and the worker goroutine (if any) is done with its // work. func (e *endpoint) cleanupLocked() { + // Close all endpoints that might have been accepted by TCP but not by // the client. if e.acceptedChan != nil { e.closePendingAcceptableConnectionsLocked() } + e.workerCleanup = false if e.isRegistered { @@ -920,7 +988,7 @@ func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, // reads to proceed before returning a ECONNRESET. e.rcvListMu.Lock() bufUsed := e.rcvBufUsed - if s := e.state; !s.connected() && s != StateClose && bufUsed == 0 { + if s := e.EndpointState(); !s.connected() && s != StateClose && bufUsed == 0 { e.rcvListMu.Unlock() he := e.HardError e.mu.RUnlock() @@ -944,7 +1012,7 @@ func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, func (e *endpoint) readLocked() (buffer.View, *tcpip.Error) { if e.rcvBufUsed == 0 { - if e.rcvClosed || !e.state.connected() { + if e.rcvClosed || !e.EndpointState().connected() { return buffer.View{}, tcpip.ErrClosedForReceive } return buffer.View{}, tcpip.ErrWouldBlock @@ -980,8 +1048,8 @@ func (e *endpoint) readLocked() (buffer.View, *tcpip.Error) { // Caller must hold e.mu and e.sndBufMu func (e *endpoint) isEndpointWritableLocked() (int, *tcpip.Error) { // The endpoint cannot be written to if it's not connected. - if !e.state.connected() { - switch e.state { + if !e.EndpointState().connected() { + switch e.EndpointState() { case StateError: return 0, e.HardError default: @@ -1039,42 +1107,86 @@ func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c return 0, nil, perr } - if !opts.Atomic { // See above. - e.mu.RLock() - e.sndBufMu.Lock() + if opts.Atomic { + // Add data to the send queue. + s := newSegmentFromView(&e.route, e.ID, v) + e.sndBufUsed += len(v) + e.sndBufInQueue += seqnum.Size(len(v)) + e.sndQueue.PushBack(s) + e.sndBufMu.Unlock() + // Release the endpoint lock to prevent deadlocks due to lock + // order inversion when acquiring workMu. + e.mu.RUnlock() + } - // Because we released the lock before copying, check state again - // to make sure the endpoint is still in a valid state for a write. - avail, err = e.isEndpointWritableLocked() - if err != nil { + if e.workMu.TryLock() { + // Since we released locks in between it's possible that the + // endpoint transitioned to a CLOSED/ERROR states so make + // sure endpoint is still writable before trying to write. + if !opts.Atomic { // See above. + e.mu.RLock() + e.sndBufMu.Lock() + + // Because we released the lock before copying, check state again + // to make sure the endpoint is still in a valid state for a write. + avail, err = e.isEndpointWritableLocked() + if err != nil { + e.sndBufMu.Unlock() + e.mu.RUnlock() + e.stats.WriteErrors.WriteClosed.Increment() + return 0, nil, err + } + + // Discard any excess data copied in due to avail being reduced due + // to a simultaneous write call to the socket. + if avail < len(v) { + v = v[:avail] + } + // Add data to the send queue. + s := newSegmentFromView(&e.route, e.ID, v) + e.sndBufUsed += len(v) + e.sndBufInQueue += seqnum.Size(len(v)) + e.sndQueue.PushBack(s) e.sndBufMu.Unlock() + // Release the endpoint lock to prevent deadlocks due to lock + // order inversion when acquiring workMu. e.mu.RUnlock() - e.stats.WriteErrors.WriteClosed.Increment() - return 0, nil, err - } - // Discard any excess data copied in due to avail being reduced due - // to a simultaneous write call to the socket. - if avail < len(v) { - v = v[:avail] } - } - - // Add data to the send queue. - s := newSegmentFromView(&e.route, e.ID, v) - e.sndBufUsed += len(v) - e.sndBufInQueue += seqnum.Size(len(v)) - e.sndQueue.PushBack(s) - e.sndBufMu.Unlock() - // Release the endpoint lock to prevent deadlocks due to lock - // order inversion when acquiring workMu. - e.mu.RUnlock() - - if e.workMu.TryLock() { // Do the work inline. e.handleWrite() e.workMu.Unlock() } else { + if !opts.Atomic { // See above. + e.mu.RLock() + e.sndBufMu.Lock() + + // Because we released the lock before copying, check state again + // to make sure the endpoint is still in a valid state for a write. + avail, err = e.isEndpointWritableLocked() + if err != nil { + e.sndBufMu.Unlock() + e.mu.RUnlock() + e.stats.WriteErrors.WriteClosed.Increment() + return 0, nil, err + } + + // Discard any excess data copied in due to avail being reduced due + // to a simultaneous write call to the socket. + if avail < len(v) { + v = v[:avail] + } + // Add data to the send queue. + s := newSegmentFromView(&e.route, e.ID, v) + e.sndBufUsed += len(v) + e.sndBufInQueue += seqnum.Size(len(v)) + e.sndQueue.PushBack(s) + e.sndBufMu.Unlock() + // Release the endpoint lock to prevent deadlocks due to lock + // order inversion when acquiring workMu. + e.mu.RUnlock() + + } // Let the protocol goroutine do the work. e.sndWaker.Assert() } @@ -1091,7 +1203,7 @@ func (e *endpoint) Peek(vec [][]byte) (int64, tcpip.ControlMessages, *tcpip.Erro // The endpoint can be read if it's connected, or if it's already closed // but has some pending unread data. - if s := e.state; !s.connected() && s != StateClose { + if s := e.EndpointState(); !s.connected() && s != StateClose { if s == StateError { return 0, tcpip.ControlMessages{}, e.HardError } @@ -1103,7 +1215,7 @@ func (e *endpoint) Peek(vec [][]byte) (int64, tcpip.ControlMessages, *tcpip.Erro defer e.rcvListMu.Unlock() if e.rcvBufUsed == 0 { - if e.rcvClosed || !e.state.connected() { + if e.rcvClosed || !e.EndpointState().connected() { e.stats.ReadErrors.ReadClosed.Increment() return 0, tcpip.ControlMessages{}, tcpip.ErrClosedForReceive } @@ -1187,7 +1299,7 @@ func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error { defer e.mu.Unlock() // We only allow this to be set when we're in the initial state. - if e.state != StateInitial { + if e.EndpointState() != StateInitial { return tcpip.ErrInvalidEndpointState } @@ -1402,14 +1514,14 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { // Acquire the work mutex as we may need to // reinitialize the congestion control state. e.mu.Lock() - state := e.state + state := e.EndpointState() e.cc = v e.mu.Unlock() switch state { case StateEstablished: e.workMu.Lock() e.mu.Lock() - if e.state == state { + if e.EndpointState() == state { e.snd.cc = e.snd.initCongestionControl(e.cc) } e.mu.Unlock() @@ -1472,7 +1584,7 @@ func (e *endpoint) readyReceiveSize() (int, *tcpip.Error) { defer e.mu.RUnlock() // The endpoint cannot be in listen state. - if e.state == StateListen { + if e.EndpointState() == StateListen { return 0, tcpip.ErrInvalidEndpointState } @@ -1731,7 +1843,7 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc return err } - if e.state.connected() { + if e.EndpointState().connected() { // The endpoint is already connected. If caller hasn't been // notified yet, return success. if !e.isConnectNotified { @@ -1743,7 +1855,7 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc } nicID := addr.NIC - switch e.state { + switch e.EndpointState() { case StateBound: // If we're already bound to a NIC but the caller is requesting // that we use a different one now, we cannot proceed. @@ -1850,7 +1962,7 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc } e.isRegistered = true - e.state = StateConnecting + e.setEndpointState(StateConnecting) e.route = r.Clone() e.boundNICID = nicID e.effectiveNetProtos = netProtos @@ -1871,14 +1983,13 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc } e.segmentQueue.mu.Unlock() e.snd.updateMaxPayloadSize(int(e.route.MTU()), 0) - e.state = StateEstablished - e.stack.Stats().TCP.CurrentEstablished.Increment() + e.setEndpointState(StateEstablished) } if run { e.workerRunning = true e.stack.Stats().TCP.ActiveConnectionOpenings.Increment() - go e.protocolMainLoop(handshake) // S/R-SAFE: will be drained before save. + go e.protocolMainLoop(handshake, nil) // S/R-SAFE: will be drained before save. } return tcpip.ErrConnectStarted @@ -1896,7 +2007,7 @@ func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error { e.shutdownFlags |= flags finQueued := false switch { - case e.state.connected(): + case e.EndpointState().connected(): // Close for read. if (e.shutdownFlags & tcpip.ShutdownRead) != 0 { // Mark read side as closed. @@ -1908,8 +2019,23 @@ func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error { // If we're fully closed and we have unread data we need to abort // the connection with a RST. if (e.shutdownFlags&tcpip.ShutdownWrite) != 0 && rcvBufUsed > 0 { - e.notifyProtocolGoroutine(notifyReset) + // Move the socket to error state immediately. + // This is done redundantly because in case of + // save/restore on a Shutdown/Close() the socket + // state needs to indicate the error otherwise + // save file will show the socket in established + // state even though snd/rcv are closed. e.mu.Unlock() + // Try to send an active reset immediately if the + // work mutex is available. + if e.workMu.TryLock() { + e.mu.Lock() + e.resetConnectionLocked(tcpip.ErrConnectionAborted) + e.mu.Unlock() + e.workMu.Unlock() + } else { + e.notifyProtocolGoroutine(notifyReset) + } return nil } } @@ -1931,11 +2057,10 @@ func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error { finQueued = true // Mark endpoint as closed. e.sndClosed = true - e.sndBufMu.Unlock() } - case e.state == StateListen: + case e.EndpointState() == StateListen: // Tell protocolListenLoop to stop. if flags&tcpip.ShutdownRead != 0 { e.notifyProtocolGoroutine(notifyClose) @@ -1976,7 +2101,7 @@ func (e *endpoint) listen(backlog int) *tcpip.Error { // When the endpoint shuts down, it sets workerCleanup to true, and from // that point onward, acceptedChan is the responsibility of the cleanup() // method (and should not be touched anywhere else, including here). - if e.state == StateListen && !e.workerCleanup { + if e.EndpointState() == StateListen && !e.workerCleanup { // Adjust the size of the channel iff we can fix existing // pending connections into the new one. if len(e.acceptedChan) > backlog { @@ -1994,7 +2119,7 @@ func (e *endpoint) listen(backlog int) *tcpip.Error { return nil } - if e.state == StateInitial { + if e.EndpointState() == StateInitial { // The listen is called on an unbound socket, the socket is // automatically bound to a random free port with the local // address set to INADDR_ANY. @@ -2004,7 +2129,7 @@ func (e *endpoint) listen(backlog int) *tcpip.Error { } // Endpoint must be bound before it can transition to listen mode. - if e.state != StateBound { + if e.EndpointState() != StateBound { e.stats.ReadErrors.InvalidEndpointState.Increment() return tcpip.ErrInvalidEndpointState } @@ -2015,24 +2140,27 @@ func (e *endpoint) listen(backlog int) *tcpip.Error { } e.isRegistered = true - e.state = StateListen + e.setEndpointState(StateListen) + if e.acceptedChan == nil { e.acceptedChan = make(chan *endpoint, backlog) } e.workerRunning = true - go e.protocolListenLoop( // S/R-SAFE: drained on save. seqnum.Size(e.receiveBufferAvailable())) - return nil } // startAcceptedLoop sets up required state and starts a goroutine with the // main loop for accepted connections. func (e *endpoint) startAcceptedLoop(waiterQueue *waiter.Queue) { + e.mu.Lock() e.waiterQueue = waiterQueue e.workerRunning = true - go e.protocolMainLoop(false) // S/R-SAFE: drained on save. + e.mu.Unlock() + wakerInitDone := make(chan struct{}) + go e.protocolMainLoop(false, wakerInitDone) // S/R-SAFE: drained on save. + <-wakerInitDone } // Accept returns a new endpoint if a peer has established a connection @@ -2042,7 +2170,7 @@ func (e *endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) { defer e.mu.RUnlock() // Endpoint must be in listen state before it can accept connections. - if e.state != StateListen { + if e.EndpointState() != StateListen { return nil, nil, tcpip.ErrInvalidEndpointState } @@ -2069,7 +2197,7 @@ func (e *endpoint) bindLocked(addr tcpip.FullAddress) (err *tcpip.Error) { // Don't allow binding once endpoint is not in the initial state // anymore. This is because once the endpoint goes into a connected or // listen state, it is already bound. - if e.state != StateInitial { + if e.EndpointState() != StateInitial { return tcpip.ErrAlreadyBound } @@ -2131,7 +2259,7 @@ func (e *endpoint) bindLocked(addr tcpip.FullAddress) (err *tcpip.Error) { } // Mark endpoint as bound. - e.state = StateBound + e.setEndpointState(StateBound) return nil } @@ -2153,7 +2281,7 @@ func (e *endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) { e.mu.RLock() defer e.mu.RUnlock() - if !e.state.connected() { + if !e.EndpointState().connected() { return tcpip.FullAddress{}, tcpip.ErrNotConnected } @@ -2164,45 +2292,22 @@ func (e *endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) { }, nil } -// HandlePacket is called by the stack when new packets arrive to this transport -// endpoint. func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt tcpip.PacketBuffer) { - s := newSegment(r, id, pkt) - if !s.parse() { - e.stack.Stats().MalformedRcvdPackets.Increment() - e.stack.Stats().TCP.InvalidSegmentsReceived.Increment() - e.stats.ReceiveErrors.MalformedPacketsReceived.Increment() - s.decRef() - return - } - - if !s.csumValid { - e.stack.Stats().MalformedRcvdPackets.Increment() - e.stack.Stats().TCP.ChecksumErrors.Increment() - e.stats.ReceiveErrors.ChecksumErrors.Increment() - s.decRef() - return - } - - e.stack.Stats().TCP.ValidSegmentsReceived.Increment() - e.stats.SegmentsReceived.Increment() - if (s.flags & header.TCPFlagRst) != 0 { - e.stack.Stats().TCP.ResetsReceived.Increment() - } - - e.enqueueSegment(s) + // TCP HandlePacket is not required anymore as inbound packets first + // land at the Dispatcher which then can either delivery using the + // worker go routine or directly do the invoke the tcp processing inline + // based on the state of the endpoint. } -func (e *endpoint) enqueueSegment(s *segment) { +func (e *endpoint) enqueueSegment(s *segment) bool { // Send packet to worker goroutine. - if e.segmentQueue.enqueue(s) { - e.newSegmentWaker.Assert() - } else { + if !e.segmentQueue.enqueue(s) { // The queue is full, so we drop the segment. e.stack.Stats().DroppedPackets.Increment() e.stats.ReceiveErrors.SegmentQueueDropped.Increment() - s.decRef() + return false } + return true } // HandleControlPacket implements stack.TransportEndpoint.HandleControlPacket. @@ -2319,8 +2424,8 @@ func (e *endpoint) rcvWndScaleForHandshake() int { // updateRecentTimestamp updates the recent timestamp using the algorithm // described in https://tools.ietf.org/html/rfc7323#section-4.3 func (e *endpoint) updateRecentTimestamp(tsVal uint32, maxSentAck seqnum.Value, segSeq seqnum.Value) { - if e.sendTSOk && seqnum.Value(e.recentTS).LessThan(seqnum.Value(tsVal)) && segSeq.LessThanEq(maxSentAck) { - e.recentTS = tsVal + if e.sendTSOk && seqnum.Value(e.recentTimestamp()).LessThan(seqnum.Value(tsVal)) && segSeq.LessThanEq(maxSentAck) { + e.setRecentTimestamp(tsVal) } } @@ -2330,7 +2435,7 @@ func (e *endpoint) updateRecentTimestamp(tsVal uint32, maxSentAck seqnum.Value, func (e *endpoint) maybeEnableTimestamp(synOpts *header.TCPSynOptions) { if synOpts.TS { e.sendTSOk = true - e.recentTS = synOpts.TSVal + e.setRecentTimestamp(synOpts.TSVal) } } @@ -2419,7 +2524,7 @@ func (e *endpoint) completeState() stack.TCPEndpointState { // Endpoint TCP Option state. s.SendTSOk = e.sendTSOk - s.RecentTS = e.recentTS + s.RecentTS = e.recentTimestamp() s.TSOffset = e.tsOffset s.SACKPermitted = e.sackPermitted s.SACK.Blocks = make([]header.SACKBlock, e.sack.NumBlocks) @@ -2526,9 +2631,7 @@ func (e *endpoint) initGSO() { // State implements tcpip.Endpoint.State. It exports the endpoint's protocol // state for diagnostics. func (e *endpoint) State() uint32 { - e.mu.Lock() - defer e.mu.Unlock() - return uint32(e.state) + return uint32(e.EndpointState()) } // Info returns a copy of the endpoint info. diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go index 4b8d867bc..4a46f0ec5 100644 --- a/pkg/tcpip/transport/tcp/endpoint_state.go +++ b/pkg/tcpip/transport/tcp/endpoint_state.go @@ -16,6 +16,7 @@ package tcp import ( "fmt" + "sync/atomic" "time" "gvisor.dev/gvisor/pkg/sync" @@ -48,7 +49,7 @@ func (e *endpoint) beforeSave() { e.mu.Lock() defer e.mu.Unlock() - switch e.state { + switch e.EndpointState() { case StateInitial, StateBound: // TODO(b/138137272): this enumeration duplicates // EndpointState.connected. remove it. @@ -70,31 +71,30 @@ func (e *endpoint) beforeSave() { fallthrough case StateListen, StateConnecting: e.drainSegmentLocked() - if e.state != StateClose && e.state != StateError { + if e.EndpointState() != StateClose && e.EndpointState() != StateError { if !e.workerRunning { panic("endpoint has no worker running in listen, connecting, or connected state") } break } - fallthrough case StateError, StateClose: - for (e.state == StateError || e.state == StateClose) && e.workerRunning { + for e.workerRunning { e.mu.Unlock() time.Sleep(100 * time.Millisecond) e.mu.Lock() } if e.workerRunning { - panic("endpoint still has worker running in closed or error state") + panic(fmt.Sprintf("endpoint: %+v still has worker running in closed or error state", e.ID)) } default: - panic(fmt.Sprintf("endpoint in unknown state %v", e.state)) + panic(fmt.Sprintf("endpoint in unknown state %v", e.EndpointState())) } if e.waiterQueue != nil && !e.waiterQueue.IsEmpty() { panic("endpoint still has waiters upon save") } - if e.state != StateClose && !((e.state == StateBound || e.state == StateListen) == e.isPortReserved) { + if e.EndpointState() != StateClose && !((e.EndpointState() == StateBound || e.EndpointState() == StateListen) == e.isPortReserved) { panic("endpoints which are not in the closed state must have a reserved port IFF they are in bound or listen state") } } @@ -135,7 +135,7 @@ func (e *endpoint) loadAcceptedChan(acceptedEndpoints []*endpoint) { // saveState is invoked by stateify. func (e *endpoint) saveState() EndpointState { - return e.state + return e.EndpointState() } // Endpoint loading must be done in the following ordering by their state, to @@ -151,7 +151,8 @@ var connectingLoading sync.WaitGroup func (e *endpoint) loadState(state EndpointState) { // This is to ensure that the loading wait groups include all applicable // endpoints before any asynchronous calls to the Wait() methods. - if state.connected() { + // For restore purposes we treat TimeWait like a connected endpoint. + if state.connected() || state == StateTimeWait { connectedLoading.Add(1) } switch state { @@ -160,13 +161,14 @@ func (e *endpoint) loadState(state EndpointState) { case StateConnecting, StateSynSent, StateSynRecv: connectingLoading.Add(1) } - e.state = state + // Directly update the state here rather than using e.setEndpointState + // as the endpoint is still being loaded and the stack reference to increment + // metrics is not yet initialized. + atomic.StoreUint32((*uint32)(&e.state), uint32(state)) } // afterLoad is invoked by stateify. func (e *endpoint) afterLoad() { - // Freeze segment queue before registering to prevent any segments - // from being delivered while it is being restored. e.origEndpointState = e.state // Restore the endpoint to InitialState as it will be moved to // its origEndpointState during Resume. @@ -180,7 +182,6 @@ func (e *endpoint) Resume(s *stack.Stack) { e.segmentQueue.setLimit(MaxUnprocessedSegments) e.workMu.Init() state := e.origEndpointState - switch state { case StateInitial, StateBound, StateListen, StateConnecting, StateEstablished: var ss SendBufferSizeOption @@ -276,7 +277,7 @@ func (e *endpoint) Resume(s *stack.Stack) { listenLoading.Wait() connectingLoading.Wait() bind() - e.state = StateClose + e.setEndpointState(StateClose) tcpip.AsyncLoading.Done() }() } @@ -288,6 +289,7 @@ func (e *endpoint) Resume(s *stack.Stack) { e.stack.CompleteTransportEndpointCleanup(e) tcpip.DeleteDanglingEndpoint(e) } + } // saveLastError is invoked by stateify. diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go index 9a8f64aa6..958c06fa7 100644 --- a/pkg/tcpip/transport/tcp/protocol.go +++ b/pkg/tcpip/transport/tcp/protocol.go @@ -21,6 +21,7 @@ package tcp import ( + "runtime" "strings" "time" @@ -104,6 +105,7 @@ type protocol struct { moderateReceiveBuffer bool tcpLingerTimeout time.Duration tcpTimeWaitTimeout time.Duration + dispatcher *dispatcher } // Number returns the tcp protocol number. @@ -134,6 +136,14 @@ func (*protocol) ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) { return h.SourcePort(), h.DestinationPort(), nil } +// QueuePacket queues packets targeted at an endpoint after hashing the packet +// to a specific processing queue. Each queue is serviced by its own processor +// goroutine which is responsible for dequeuing and doing full TCP dispatch of +// the packet. +func (p *protocol) QueuePacket(r *stack.Route, ep stack.TransportEndpoint, id stack.TransportEndpointID, pkt tcpip.PacketBuffer) { + p.dispatcher.queuePacket(r, ep, id, pkt) +} + // HandleUnknownDestinationPacket handles packets targeted at this protocol but // that don't match any existing endpoint. // @@ -330,5 +340,6 @@ func NewProtocol() stack.TransportProtocol { availableCongestionControl: []string{ccReno, ccCubic}, tcpLingerTimeout: DefaultTCPLingerTimeout, tcpTimeWaitTimeout: DefaultTCPTimeWaitTimeout, + dispatcher: newDispatcher(runtime.GOMAXPROCS(0)), } } diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go index 05c8488f8..958f03ac1 100644 --- a/pkg/tcpip/transport/tcp/rcv.go +++ b/pkg/tcpip/transport/tcp/rcv.go @@ -169,19 +169,19 @@ func (r *receiver) consumeSegment(s *segment, segSeq seqnum.Value, segLen seqnum // We just received a FIN, our next state depends on whether we sent a // FIN already or not. r.ep.mu.Lock() - switch r.ep.state { + switch r.ep.EndpointState() { case StateEstablished: - r.ep.state = StateCloseWait + r.ep.setEndpointState(StateCloseWait) case StateFinWait1: if s.flagIsSet(header.TCPFlagAck) { // FIN-ACK, transition to TIME-WAIT. - r.ep.state = StateTimeWait + r.ep.setEndpointState(StateTimeWait) } else { // Simultaneous close, expecting a final ACK. - r.ep.state = StateClosing + r.ep.setEndpointState(StateClosing) } case StateFinWait2: - r.ep.state = StateTimeWait + r.ep.setEndpointState(StateTimeWait) } r.ep.mu.Unlock() @@ -205,16 +205,16 @@ func (r *receiver) consumeSegment(s *segment, segSeq seqnum.Value, segLen seqnum // shutdown states. if s.flagIsSet(header.TCPFlagAck) && s.ackNumber == r.ep.snd.sndNxt { r.ep.mu.Lock() - switch r.ep.state { + switch r.ep.EndpointState() { case StateFinWait1: - r.ep.state = StateFinWait2 + r.ep.setEndpointState(StateFinWait2) // Notify protocol goroutine that we have received an // ACK to our FIN so that it can start the FIN_WAIT2 // timer to abort connection if the other side does // not close within 2MSL. r.ep.notifyProtocolGoroutine(notifyClose) case StateClosing: - r.ep.state = StateTimeWait + r.ep.setEndpointState(StateTimeWait) case StateLastAck: r.ep.transitionToStateCloseLocked() } @@ -267,7 +267,6 @@ func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, clo switch state { case StateCloseWait, StateClosing, StateLastAck: if !s.sequenceNumber.LessThanEq(r.rcvNxt) { - s.decRef() // Just drop the segment as we have // already received a FIN and this // segment is after the sequence number @@ -284,7 +283,6 @@ func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, clo // trigger a RST. endDataSeq := s.sequenceNumber.Add(seqnum.Size(s.data.Size())) if rcvClosed && r.rcvNxt.LessThan(endDataSeq) { - s.decRef() return true, tcpip.ErrConnectionAborted } if state == StateFinWait1 { @@ -314,7 +312,6 @@ func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, clo // the last actual data octet in a segment in // which it occurs. if closed && (!s.flagIsSet(header.TCPFlagFin) || s.sequenceNumber.Add(s.logicalLen()) != r.rcvNxt+1) { - s.decRef() return true, tcpip.ErrConnectionAborted } } @@ -336,7 +333,7 @@ func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, clo // r as they arrive. It is called by the protocol main loop. func (r *receiver) handleRcvdSegment(s *segment) (drop bool, err *tcpip.Error) { r.ep.mu.RLock() - state := r.ep.state + state := r.ep.EndpointState() closed := r.ep.closed r.ep.mu.RUnlock() diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index fdff7ed81..b74b61e7d 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -705,17 +705,15 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se } seg.flags = header.TCPFlagAck | header.TCPFlagFin segEnd = seg.sequenceNumber.Add(1) - // Transition to FIN-WAIT1 state since we're initiating an active close. - s.ep.mu.Lock() - switch s.ep.state { + // Update the state to reflect that we have now + // queued a FIN. + switch s.ep.EndpointState() { case StateCloseWait: - // We've already received a FIN and are now sending our own. The - // sender is now awaiting a final ACK for this FIN. - s.ep.state = StateLastAck + s.ep.setEndpointState(StateLastAck) default: - s.ep.state = StateFinWait1 + s.ep.setEndpointState(StateFinWait1) } - s.ep.mu.Unlock() + } else { // We're sending a non-FIN segment. if seg.flags&header.TCPFlagFin != 0 { diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 6edfa8dce..a9dfbe857 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -293,7 +293,6 @@ func TestTCPResetSentForACKWhenNotUsingSynCookies(t *testing.T) { checker.SeqNum(uint32(c.IRS+1)), checker.AckNum(uint32(iss)+1), checker.TCPFlags(header.TCPFlagFin|header.TCPFlagAck))) - finHeaders := &context.Headers{ SrcPort: context.TestPort, DstPort: context.StackPort, @@ -459,6 +458,9 @@ func TestConnectResetAfterClose(t *testing.T) { checker.IPv4(t, b, checker.TCP( checker.DstPort(context.TestPort), + // RST is always generated with sndNxt which if the FIN + // has been sent will be 1 higher than the sequence number + // of the FIN itself. checker.SeqNum(uint32(c.IRS)+2), checker.AckNum(0), checker.TCPFlags(header.TCPFlagRst), @@ -1500,6 +1502,9 @@ func TestRstOnCloseWithUnreadDataFinConvertRst(t *testing.T) { checker.TCP( checker.DstPort(context.TestPort), checker.TCPFlags(header.TCPFlagAck|header.TCPFlagRst), + // RST is always generated with sndNxt which if the FIN + // has been sent will be 1 higher than the sequence + // number of the FIN itself. checker.SeqNum(uint32(c.IRS)+2), )) // The RST puts the endpoint into an error state. @@ -5441,6 +5446,7 @@ func TestReceiveBufferAutoTuningApplicationLimited(t *testing.T) { rawEP.SendPacketWithTS(b[start:start+mss], tsVal) packetsSent++ } + // Resume the worker so that it only sees the packets once all of them // are waiting to be read. worker.ResumeWork() @@ -5508,7 +5514,7 @@ func TestReceiveBufferAutoTuning(t *testing.T) { stk := c.Stack() // Set lower limits for auto-tuning tests. This is required because the // test stops the worker which can cause packets to be dropped because - // the segment queue holding unprocessed packets is limited to 500. + // the segment queue holding unprocessed packets is limited to 300. const receiveBufferSize = 80 << 10 // 80KB. const maxReceiveBufferSize = receiveBufferSize * 10 if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, receiveBufferSize, maxReceiveBufferSize}); err != nil { @@ -5563,6 +5569,7 @@ func TestReceiveBufferAutoTuning(t *testing.T) { totalSent += mss packetsSent++ } + // Resume it so that it only sees the packets once all of them // are waiting to be read. worker.ResumeWork() diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 5d114d460..2f9821555 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -533,7 +533,7 @@ TEST_P(SocketInetLoopbackTest, TCPFinWait2Test_NoRandomSave) { // Sleep for a little over the linger timeout to reduce flakiness in // save/restore tests. - absl::SleepFor(absl::Seconds(kTCPLingerTimeout + 1)); + absl::SleepFor(absl::Seconds(kTCPLingerTimeout + 2)); ds.reset(); diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc index 6b99c021d..33a5ac66c 100644 --- a/test/syscalls/linux/tcp_socket.cc +++ b/test/syscalls/linux/tcp_socket.cc @@ -814,6 +814,20 @@ TEST_P(TcpSocketTest, FullBuffer) { t_ = -1; } +TEST_P(TcpSocketTest, PollAfterShutdown) { + ScopedThread client_thread([this]() { + EXPECT_THAT(shutdown(s_, SHUT_WR), SyscallSucceedsWithValue(0)); + struct pollfd poll_fd = {s_, POLLIN | POLLERR | POLLHUP, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10000), + SyscallSucceedsWithValue(1)); + }); + + EXPECT_THAT(shutdown(t_, SHUT_WR), SyscallSucceedsWithValue(0)); + struct pollfd poll_fd = {t_, POLLIN | POLLERR | POLLHUP, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10000), + SyscallSucceedsWithValue(1)); +} + TEST_P(SimpleTcpSocketTest, NonBlockingConnectNoListener) { // Initialize address to the loopback one. sockaddr_storage addr = -- cgit v1.2.3 From c50efc8c700fa2628f1415daeeb3b382009eb1bb Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Thu, 16 Jan 2020 12:48:05 -0800 Subject: Disable xattr tests. These can remain disabled until we actually support extended attributes. The following modifications were also made: 1. Disable save/restore on tests that change file permissions. Restore will not work properly for these tests, since it will try to open the file with read-write after it has been read- or write-only. 2. Change user.abc to user.test. PiperOrigin-RevId: 290123941 --- test/syscalls/BUILD | 5 -- test/syscalls/linux/xattr.cc | 152 ++++++++++++------------------------------- 2 files changed, 42 insertions(+), 115 deletions(-) (limited to 'test') diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index a3a85917d..829693e8e 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -717,11 +717,6 @@ syscall_test(test = "//test/syscalls/linux:proc_net_tcp_test") syscall_test(test = "//test/syscalls/linux:proc_net_udp_test") -syscall_test( - add_overlay = True, - test = "//test/syscalls/linux:xattr_test", -) - go_binary( name = "syscall_test_runner", testonly = 1, diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc index 75740238c..b3bc3463e 100644 --- a/test/syscalls/linux/xattr.cc +++ b/test/syscalls/linux/xattr.cc @@ -59,7 +59,8 @@ TEST_F(XattrTest, XattrLargeName) { std::string name = "user."; name += std::string(XATTR_NAME_MAX - name.length(), 'a'); - // TODO(b/127675828): Support setxattr and getxattr. + // An xattr should be whitelisted before it can be accessed--do not allow + // arbitrary xattrs to be read/written in gVisor. if (!IsRunningOnGvisor()) { EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), SyscallSucceeds()); @@ -83,59 +84,53 @@ TEST_F(XattrTest, XattrInvalidPrefix) { SyscallFailsWithErrno(EOPNOTSUPP)); } -TEST_F(XattrTest, XattrReadOnly) { +// Do not allow save/restore cycles after making the test file read-only, as +// the restore will fail to open it with r/w permissions. +TEST_F(XattrTest, XattrReadOnly_NoRandomSave) { // Drop capabilities that allow us to override file and directory permissions. ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); const char* path = test_file_name_.c_str(); - char name[] = "user.abc"; + const char name[] = "user.test"; char val = 'a'; size_t size = sizeof(val); - // TODO(b/127675828): Support setxattr and getxattr. - if (!IsRunningOnGvisor()) { - EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), - SyscallSucceeds()); - } + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); + DisableSave ds; ASSERT_NO_ERRNO(testing::Chmod(test_file_name_, S_IRUSR)); EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallFailsWithErrno(EACCES)); - // TODO(b/127675828): Support setxattr and getxattr. - if (!IsRunningOnGvisor()) { - char buf = '-'; - EXPECT_THAT(getxattr(path, name, &buf, size), - SyscallSucceedsWithValue(size)); - EXPECT_EQ(buf, val); - } + char buf = '-'; + EXPECT_THAT(getxattr(path, name, &buf, size), SyscallSucceedsWithValue(size)); + EXPECT_EQ(buf, val); } -TEST_F(XattrTest, XattrWriteOnly) { +// Do not allow save/restore cycles after making the test file write-only, as +// the restore will fail to open it with r/w permissions. +TEST_F(XattrTest, XattrWriteOnly_NoRandomSave) { // Drop capabilities that allow us to override file and directory permissions. ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + DisableSave ds; ASSERT_NO_ERRNO(testing::Chmod(test_file_name_, S_IWUSR)); const char* path = test_file_name_.c_str(); - char name[] = "user.abc"; + const char name[] = "user.test"; char val = 'a'; size_t size = sizeof(val); - // TODO(b/127675828): Support setxattr and getxattr. - if (!IsRunningOnGvisor()) { - EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), - SyscallSucceeds()); - } + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(EACCES)); } TEST_F(XattrTest, XattrTrustedWithNonadmin) { - // TODO(b/127675828): Support setxattr and getxattr. + // TODO(b/127675828): Support setxattr and getxattr with "trusted" prefix. SKIP_IF(IsRunningOnGvisor()); SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); @@ -147,11 +142,8 @@ TEST_F(XattrTest, XattrTrustedWithNonadmin) { } TEST_F(XattrTest, XattrOnDirectory) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); - char name[] = "user.abc"; + const char name[] = "user.test"; EXPECT_THAT(setxattr(dir.path().c_str(), name, NULL, 0, /*flags=*/0), SyscallSucceeds()); EXPECT_THAT(getxattr(dir.path().c_str(), name, NULL, 0), @@ -159,13 +151,10 @@ TEST_F(XattrTest, XattrOnDirectory) { } TEST_F(XattrTest, XattrOnSymlink) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); TempPath link = ASSERT_NO_ERRNO_AND_VALUE( TempPath::CreateSymlinkTo(dir.path(), test_file_name_)); - char name[] = "user.abc"; + const char name[] = "user.test"; EXPECT_THAT(setxattr(link.path().c_str(), name, NULL, 0, /*flags=*/0), SyscallSucceeds()); EXPECT_THAT(getxattr(link.path().c_str(), name, NULL, 0), @@ -173,7 +162,7 @@ TEST_F(XattrTest, XattrOnSymlink) { } TEST_F(XattrTest, XattrOnInvalidFileTypes) { - char name[] = "user.abc"; + const char name[] = "user.test"; char char_device[] = "/dev/zero"; EXPECT_THAT(setxattr(char_device, name, NULL, 0, /*flags=*/0), @@ -191,11 +180,8 @@ TEST_F(XattrTest, XattrOnInvalidFileTypes) { } TEST_F(XattrTest, SetxattrSizeSmallerThanValue) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); - char name[] = "user.abc"; + const char name[] = "user.test"; std::vector val = {'a', 'a'}; size_t size = 1; EXPECT_THAT(setxattr(path, name, val.data(), size, /*flags=*/0), @@ -209,11 +195,8 @@ TEST_F(XattrTest, SetxattrSizeSmallerThanValue) { } TEST_F(XattrTest, SetxattrZeroSize) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); - char name[] = "user.abc"; + const char name[] = "user.test"; char val = 'a'; EXPECT_THAT(setxattr(path, name, &val, 0, /*flags=*/0), SyscallSucceeds()); @@ -225,7 +208,7 @@ TEST_F(XattrTest, SetxattrZeroSize) { TEST_F(XattrTest, SetxattrSizeTooLarge) { const char* path = test_file_name_.c_str(); - char name[] = "user.abc"; + const char name[] = "user.test"; // Note that each particular fs implementation may stipulate a lower size // limit, in which case we actually may fail (e.g. error with ENOSPC) for @@ -235,43 +218,29 @@ TEST_F(XattrTest, SetxattrSizeTooLarge) { EXPECT_THAT(setxattr(path, name, val.data(), size, /*flags=*/0), SyscallFailsWithErrno(E2BIG)); - // TODO(b/127675828): Support setxattr and getxattr. - if (!IsRunningOnGvisor()) { - EXPECT_THAT(getxattr(path, name, nullptr, 0), - SyscallFailsWithErrno(ENODATA)); - } + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); } TEST_F(XattrTest, SetxattrNullValueAndNonzeroSize) { const char* path = test_file_name_.c_str(); - char name[] = "user.abc"; + const char name[] = "user.test"; EXPECT_THAT(setxattr(path, name, nullptr, 1, /*flags=*/0), SyscallFailsWithErrno(EFAULT)); - // TODO(b/127675828): Support setxattr and getxattr. - if (!IsRunningOnGvisor()) { - EXPECT_THAT(getxattr(path, name, nullptr, 0), - SyscallFailsWithErrno(ENODATA)); - } + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); } TEST_F(XattrTest, SetxattrNullValueAndZeroSize) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); - char name[] = "user.abc"; + const char name[] = "user.test"; EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallSucceedsWithValue(0)); } TEST_F(XattrTest, SetxattrValueTooLargeButOKSize) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); - char name[] = "user.abc"; + const char name[] = "user.test"; std::vector val(XATTR_SIZE_MAX + 1); std::fill(val.begin(), val.end(), 'a'); size_t size = 1; @@ -286,11 +255,8 @@ TEST_F(XattrTest, SetxattrValueTooLargeButOKSize) { } TEST_F(XattrTest, SetxattrReplaceWithSmaller) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); - char name[] = "user.abc"; + const char name[] = "user.test"; std::vector val = {'a', 'a'}; EXPECT_THAT(setxattr(path, name, val.data(), 2, /*flags=*/0), SyscallSucceeds()); @@ -304,11 +270,8 @@ TEST_F(XattrTest, SetxattrReplaceWithSmaller) { } TEST_F(XattrTest, SetxattrReplaceWithLarger) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); - char name[] = "user.abc"; + const char name[] = "user.test"; std::vector val = {'a', 'a'}; EXPECT_THAT(setxattr(path, name, val.data(), 1, /*flags=*/0), SyscallSucceeds()); @@ -321,11 +284,8 @@ TEST_F(XattrTest, SetxattrReplaceWithLarger) { } TEST_F(XattrTest, SetxattrCreateFlag) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); - char name[] = "user.abc"; + const char name[] = "user.test"; EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_CREATE), SyscallSucceeds()); EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_CREATE), @@ -335,11 +295,8 @@ TEST_F(XattrTest, SetxattrCreateFlag) { } TEST_F(XattrTest, SetxattrReplaceFlag) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); - char name[] = "user.abc"; + const char name[] = "user.test"; EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_REPLACE), SyscallFailsWithErrno(ENODATA)); EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); @@ -357,11 +314,8 @@ TEST_F(XattrTest, SetxattrInvalidFlags) { } TEST_F(XattrTest, Getxattr) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); - char name[] = "user.abc"; + const char name[] = "user.test"; int val = 1234; size_t size = sizeof(val); EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); @@ -372,11 +326,8 @@ TEST_F(XattrTest, Getxattr) { } TEST_F(XattrTest, GetxattrSizeSmallerThanValue) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); - char name[] = "user.abc"; + const char name[] = "user.test"; std::vector val = {'a', 'a'}; size_t size = val.size(); EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); @@ -387,11 +338,8 @@ TEST_F(XattrTest, GetxattrSizeSmallerThanValue) { } TEST_F(XattrTest, GetxattrSizeLargerThanValue) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); - char name[] = "user.abc"; + const char name[] = "user.test"; char val = 'a'; EXPECT_THAT(setxattr(path, name, &val, 1, /*flags=*/0), SyscallSucceeds()); @@ -405,11 +353,8 @@ TEST_F(XattrTest, GetxattrSizeLargerThanValue) { } TEST_F(XattrTest, GetxattrZeroSize) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); - char name[] = "user.abc"; + const char name[] = "user.test"; char val = 'a'; EXPECT_THAT(setxattr(path, name, &val, sizeof(val), /*flags=*/0), SyscallSucceeds()); @@ -421,11 +366,8 @@ TEST_F(XattrTest, GetxattrZeroSize) { } TEST_F(XattrTest, GetxattrSizeTooLarge) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); - char name[] = "user.abc"; + const char name[] = "user.test"; char val = 'a'; EXPECT_THAT(setxattr(path, name, &val, sizeof(val), /*flags=*/0), SyscallSucceeds()); @@ -440,11 +382,8 @@ TEST_F(XattrTest, GetxattrSizeTooLarge) { } TEST_F(XattrTest, GetxattrNullValue) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); - char name[] = "user.abc"; + const char name[] = "user.test"; char val = 'a'; size_t size = sizeof(val); EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); @@ -454,11 +393,8 @@ TEST_F(XattrTest, GetxattrNullValue) { } TEST_F(XattrTest, GetxattrNullValueAndZeroSize) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); - char name[] = "user.abc"; + const char name[] = "user.test"; char val = 'a'; size_t size = sizeof(val); // Set value with zero size. @@ -473,13 +409,9 @@ TEST_F(XattrTest, GetxattrNullValueAndZeroSize) { } TEST_F(XattrTest, GetxattrNonexistentName) { - // TODO(b/127675828): Support getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); - std::string name = "user.nonexistent"; - EXPECT_THAT(getxattr(path, name.c_str(), nullptr, 0), - SyscallFailsWithErrno(ENODATA)); + const char name[] = "user.test"; + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); } } // namespace -- cgit v1.2.3 From 82ae857877fdf3492f40bca87657a07892c3f59b Mon Sep 17 00:00:00 2001 From: Haibo Xu Date: Fri, 6 Dec 2019 06:29:24 +0000 Subject: Enable build of test/syscall tests on arm64. Signed-off-by: Haibo Xu Change-Id: I277d6c708bbf5c3edd7c3568941cfd01dc122e17 --- test/syscalls/linux/BUILD | 55 ++++++++++++++++++++++++++++++++++------- test/syscalls/linux/bad.cc | 3 ++- test/syscalls/linux/chroot.cc | 2 +- test/syscalls/linux/fork.cc | 3 +++ test/syscalls/linux/getdents.cc | 10 +++++++- test/syscalls/linux/preadv2.cc | 2 ++ test/syscalls/linux/proc.cc | 2 +- test/syscalls/linux/pwritev2.cc | 2 ++ test/syscalls/linux/seccomp.cc | 5 ++++ test/syscalls/linux/stat.cc | 2 ++ test/util/signal_util.h | 14 +++++++++++ test/util/test_util.cc | 2 +- 12 files changed, 88 insertions(+), 14 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 064ce8429..68dcc598b 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -19,6 +19,16 @@ exports_files( visibility = ["//:sandbox"], ) +config_setting( + name = "x86_64", + constraint_values = ["@bazel_tools//platforms:x86_64"], +) + +config_setting( + name = "aarch64", + constraint_values = ["@bazel_tools//platforms:aarch64"], +) + cc_binary( name = "sigaltstack_check", testonly = 1, @@ -197,7 +207,10 @@ cc_binary( cc_binary( name = "32bit_test", testonly = 1, - srcs = ["32bit.cc"], + srcs = select({ + ":x86_64": ["32bit.cc"], + ":aarch64": [], + }), linkstatic = 1, deps = [ "//test/util:memory_util", @@ -584,7 +597,10 @@ cc_binary( cc_binary( name = "exceptions_test", testonly = 1, - srcs = ["exceptions.cc"], + srcs = select({ + ":x86_64": ["exceptions.cc"], + ":aarch64": [], + }), linkstatic = 1, deps = [ "//test/util:logging", @@ -640,7 +656,10 @@ cc_binary( cc_binary( name = "exec_binary_test", testonly = 1, - srcs = ["exec_binary.cc"], + srcs = select({ + ":x86_64": ["exec_binary.cc"], + ":aarch64": [], + }), linkstatic = 1, deps = [ "//test/util:cleanup", @@ -811,7 +830,10 @@ cc_binary( cc_binary( name = "fpsig_fork_test", testonly = 1, - srcs = ["fpsig_fork.cc"], + srcs = select({ + ":x86_64": ["fpsig_fork.cc"], + ":aarch64": [], + }), linkstatic = 1, deps = [ "//test/util:logging", @@ -825,7 +847,10 @@ cc_binary( cc_binary( name = "fpsig_nested_test", testonly = 1, - srcs = ["fpsig_nested.cc"], + srcs = select({ + ":x86_64": ["fpsig_nested.cc"], + ":aarch64": [], + }), linkstatic = 1, deps = [ "//test/util:test_main", @@ -1440,7 +1465,10 @@ cc_binary( cc_binary( name = "arch_prctl_test", testonly = 1, - srcs = ["arch_prctl.cc"], + srcs = select({ + ":x86_64": ["arch_prctl.cc"], + ":aarch64": [], + }), linkstatic = 1, deps = [ "//test/util:test_main", @@ -2035,7 +2063,10 @@ cc_binary( cc_binary( name = "sigiret_test", testonly = 1, - srcs = ["sigiret.cc"], + srcs = select({ + ":x86_64": ["sigiret.cc"], + ":aarch64": [], + }), linkstatic = 1, deps = [ "//test/util:logging", @@ -2043,7 +2074,10 @@ cc_binary( "//test/util:test_util", "//test/util:timer_util", "@com_google_googletest//:gtest", - ], + ] + select({ + ":x86_64": [], + ":aarch64": ["//test/util:test_main"], + }), ) cc_binary( @@ -3260,7 +3294,10 @@ cc_binary( cc_binary( name = "sysret_test", testonly = 1, - srcs = ["sysret.cc"], + srcs = select({ + ":x86_64": ["sysret.cc"], + ":aarch64": [], + }), linkstatic = 1, deps = [ "//test/util:logging", diff --git a/test/syscalls/linux/bad.cc b/test/syscalls/linux/bad.cc index f246a799e..9e4d8ea57 100644 --- a/test/syscalls/linux/bad.cc +++ b/test/syscalls/linux/bad.cc @@ -22,12 +22,13 @@ namespace gvisor { namespace testing { namespace { - +#if defined(__x86_64__) TEST(BadSyscallTest, NotImplemented) { // get_kernel_syms is not supported in Linux > 2.6, and not implemented in // gVisor. EXPECT_THAT(syscall(SYS_get_kernel_syms), SyscallFailsWithErrno(ENOSYS)); } +#endif // defined(__x86_64__) TEST(BadSyscallTest, NegativeOne) { EXPECT_THAT(syscall(-1), SyscallFailsWithErrno(ENOSYS)); diff --git a/test/syscalls/linux/chroot.cc b/test/syscalls/linux/chroot.cc index 04bc2d7b9..0a2d44a2c 100644 --- a/test/syscalls/linux/chroot.cc +++ b/test/syscalls/linux/chroot.cc @@ -162,7 +162,7 @@ TEST(ChrootTest, DotDotFromOpenFD) { // getdents on fd should not error. char buf[1024]; - ASSERT_THAT(syscall(SYS_getdents, fd.get(), buf, sizeof(buf)), + ASSERT_THAT(syscall(SYS_getdents64, fd.get(), buf, sizeof(buf)), SyscallSucceeds()); } diff --git a/test/syscalls/linux/fork.cc b/test/syscalls/linux/fork.cc index 371890110..906f3358d 100644 --- a/test/syscalls/linux/fork.cc +++ b/test/syscalls/linux/fork.cc @@ -215,6 +215,8 @@ TEST_F(ForkTest, PrivateMapping) { EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); } +// CPUID is x86 specific. +#ifdef __x86_64__ // Test that cpuid works after a fork. TEST_F(ForkTest, Cpuid) { pid_t child = Fork(); @@ -227,6 +229,7 @@ TEST_F(ForkTest, Cpuid) { } EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); } +#endif TEST_F(ForkTest, Mmap) { pid_t child = Fork(); diff --git a/test/syscalls/linux/getdents.cc b/test/syscalls/linux/getdents.cc index ad2dbacb8..bfd18d4ff 100644 --- a/test/syscalls/linux/getdents.cc +++ b/test/syscalls/linux/getdents.cc @@ -228,19 +228,27 @@ class GetdentsTest : public ::testing::Test { // Multiple template parameters are not allowed, so we must use explicit // template specialization to set the syscall number. +#ifdef __x86_64__ template <> int GetdentsTest::SyscallNum() { return SYS_getdents; } +#endif template <> int GetdentsTest::SyscallNum() { return SYS_getdents64; } -// Test both legacy getdents and getdents64. +#ifdef __x86_64__ +// Test both legacy getdents and getdents64 on x86_64. typedef ::testing::Types GetdentsTypes; +#elif __aarch64__ +// Test only getdents64 on arm64. +typedef ::testing::Types + GetdentsTypes; +#endif TYPED_TEST_SUITE(GetdentsTest, GetdentsTypes); // N.B. TYPED_TESTs require explicitly using this-> to access members of diff --git a/test/syscalls/linux/preadv2.cc b/test/syscalls/linux/preadv2.cc index c9246367d..3eeaf6ad8 100644 --- a/test/syscalls/linux/preadv2.cc +++ b/test/syscalls/linux/preadv2.cc @@ -35,6 +35,8 @@ namespace { #ifndef SYS_preadv2 #if defined(__x86_64__) #define SYS_preadv2 327 +#elif defined(__aarch64__) +#define SYS_preadv2 286 #else #error "Unknown architecture" #endif diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index 8cf08991b..5b4f29cd9 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -1986,7 +1986,7 @@ TEST(Proc, GetdentsEnoent) { }, nullptr, nullptr)); char buf[1024]; - ASSERT_THAT(syscall(SYS_getdents, fd.get(), buf, sizeof(buf)), + ASSERT_THAT(syscall(SYS_getdents64, fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(ENOENT)); } diff --git a/test/syscalls/linux/pwritev2.cc b/test/syscalls/linux/pwritev2.cc index 1dbc0d6df..3fe5a600f 100644 --- a/test/syscalls/linux/pwritev2.cc +++ b/test/syscalls/linux/pwritev2.cc @@ -34,6 +34,8 @@ namespace { #ifndef SYS_pwritev2 #if defined(__x86_64__) #define SYS_pwritev2 328 +#elif defined(__aarch64__) +#define SYS_pwritev2 287 #else #error "Unknown architecture" #endif diff --git a/test/syscalls/linux/seccomp.cc b/test/syscalls/linux/seccomp.cc index 7e41fe7d8..6d7e543b9 100644 --- a/test/syscalls/linux/seccomp.cc +++ b/test/syscalls/linux/seccomp.cc @@ -49,7 +49,12 @@ namespace testing { namespace { // A syscall not implemented by Linux that we don't expect to be called. +#ifdef __x86_64__ constexpr uint32_t kFilteredSyscall = SYS_vserver; +#elif __aarch64__ +// Using arch_specific_syscalls which are not implemented on arm64. +constexpr uint32_t kFilteredSyscall = SYS_arch_specific_syscall+15; +#endif // Applies a seccomp-bpf filter that returns `filtered_result` for // `sysno` and allows all other syscalls. Async-signal-safe. diff --git a/test/syscalls/linux/stat.cc b/test/syscalls/linux/stat.cc index 30de2f8ff..7a99f2636 100644 --- a/test/syscalls/linux/stat.cc +++ b/test/syscalls/linux/stat.cc @@ -557,6 +557,8 @@ TEST(SimpleStatTest, AnonDeviceAllocatesUniqueInodesAcrossSaveRestore) { #ifndef SYS_statx #if defined(__x86_64__) #define SYS_statx 332 +#elif defined(__aarch64__) +#define SYS_statx 291 #else #error "Unknown architecture" #endif diff --git a/test/util/signal_util.h b/test/util/signal_util.h index bcf85c337..e7b66aa51 100644 --- a/test/util/signal_util.h +++ b/test/util/signal_util.h @@ -85,6 +85,20 @@ inline void FixupFault(ucontext_t* ctx) { // The encoding is 0x48 0xab 0x00. ctx->uc_mcontext.gregs[REG_RIP] += 3; } +#elif __aarch64__ +inline void Fault() { + // Zero and dereference x0. + asm("mov xzr, x0\r\n" + "str xzr, [x0]\r\n" + : + : + : "x0"); +} + +inline void FixupFault(ucontext_t* ctx) { + // Skip the bad instruction above. + ctx->uc_mcontext.pc += 4; +} #endif } // namespace testing diff --git a/test/util/test_util.cc b/test/util/test_util.cc index 848504c88..a4f78eec2 100644 --- a/test/util/test_util.cc +++ b/test/util/test_util.cc @@ -76,7 +76,6 @@ bool IsRunningWithHostinet() { "xchg %%rdi, %%rbx\n" \ : "=a"(a), "=D"(b), "=c"(c), "=d"(d) \ : "a"(a_inp), "2"(c_inp)) -#endif // defined(__x86_64__) CPUVendor GetCPUVendor() { uint32_t eax, ebx, ecx, edx; @@ -93,6 +92,7 @@ CPUVendor GetCPUVendor() { } return CPUVendor::kUnknownVendor; } +#endif // defined(__x86_64__) bool operator==(const KernelVersion& first, const KernelVersion& second) { return first.major == second.major && first.minor == second.minor && -- cgit v1.2.3 From 9073521098ee52cdda74a193565b7bbe75d8c35a Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Fri, 17 Jan 2020 13:31:26 -0800 Subject: Convert EventMask to uint64 It is used for signalfd where the maximum signal is 64. PiperOrigin-RevId: 290331008 --- pkg/waiter/waiter.go | 2 +- test/syscalls/BUILD | 2 + test/syscalls/linux/signalfd.cc | 118 ++++++++++++++++++++++++---------------- 3 files changed, 74 insertions(+), 48 deletions(-) (limited to 'test') diff --git a/pkg/waiter/waiter.go b/pkg/waiter/waiter.go index f708e95fa..707eb085b 100644 --- a/pkg/waiter/waiter.go +++ b/pkg/waiter/waiter.go @@ -62,7 +62,7 @@ import ( ) // EventMask represents io events as used in the poll() syscall. -type EventMask uint16 +type EventMask uint64 // Events that waiters can wait on. The meaning is the same as those in the // poll() syscall. diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 829693e8e..90d52e73b 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -380,6 +380,8 @@ syscall_test(test = "//test/syscalls/linux:rseq_test") syscall_test(test = "//test/syscalls/linux:rtsignal_test") +syscall_test(test = "//test/syscalls/linux:signalfd_test") + syscall_test(test = "//test/syscalls/linux:sched_test") syscall_test(test = "//test/syscalls/linux:sched_yield_test") diff --git a/test/syscalls/linux/signalfd.cc b/test/syscalls/linux/signalfd.cc index 09ecad34a..95be4b66c 100644 --- a/test/syscalls/linux/signalfd.cc +++ b/test/syscalls/linux/signalfd.cc @@ -39,6 +39,7 @@ namespace testing { namespace { constexpr int kSigno = SIGUSR1; +constexpr int kSignoMax = 64; // SIGRTMAX constexpr int kSignoAlt = SIGUSR2; // Returns a new signalfd. @@ -51,41 +52,45 @@ inline PosixErrorOr NewSignalFD(sigset_t* mask, int flags = 0) { return FileDescriptor(fd); } -TEST(Signalfd, Basic) { +class SignalfdTest : public ::testing::TestWithParam {}; + +TEST_P(SignalfdTest, Basic) { + int signo = GetParam(); // Create the signalfd. sigset_t mask; sigemptyset(&mask); - sigaddset(&mask, kSigno); + sigaddset(&mask, signo); FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, 0)); // Deliver the blocked signal. const auto scoped_sigmask = - ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, kSigno)); - ASSERT_THAT(tgkill(getpid(), gettid(), kSigno), SyscallSucceeds()); + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, signo)); + ASSERT_THAT(tgkill(getpid(), gettid(), signo), SyscallSucceeds()); // We should now read the signal. struct signalfd_siginfo rbuf; ASSERT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)), SyscallSucceedsWithValue(sizeof(rbuf))); - EXPECT_EQ(rbuf.ssi_signo, kSigno); + EXPECT_EQ(rbuf.ssi_signo, signo); } -TEST(Signalfd, MaskWorks) { +TEST_P(SignalfdTest, MaskWorks) { + int signo = GetParam(); // Create two signalfds with different masks. sigset_t mask1, mask2; sigemptyset(&mask1); sigemptyset(&mask2); - sigaddset(&mask1, kSigno); + sigaddset(&mask1, signo); sigaddset(&mask2, kSignoAlt); FileDescriptor fd1 = ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask1, 0)); FileDescriptor fd2 = ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask2, 0)); // Deliver the two signals. const auto scoped_sigmask1 = - ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, kSigno)); + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, signo)); const auto scoped_sigmask2 = ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, kSignoAlt)); - ASSERT_THAT(tgkill(getpid(), gettid(), kSigno), SyscallSucceeds()); + ASSERT_THAT(tgkill(getpid(), gettid(), signo), SyscallSucceeds()); ASSERT_THAT(tgkill(getpid(), gettid(), kSignoAlt), SyscallSucceeds()); // We should see the signals on the appropriate signalfds. @@ -98,7 +103,7 @@ TEST(Signalfd, MaskWorks) { EXPECT_EQ(rbuf2.ssi_signo, kSignoAlt); ASSERT_THAT(read(fd1.get(), &rbuf1, sizeof(rbuf1)), SyscallSucceedsWithValue(sizeof(rbuf1))); - EXPECT_EQ(rbuf1.ssi_signo, kSigno); + EXPECT_EQ(rbuf1.ssi_signo, signo); } TEST(Signalfd, Cloexec) { @@ -111,11 +116,12 @@ TEST(Signalfd, Cloexec) { EXPECT_THAT(fcntl(fd.get(), F_GETFD), SyscallSucceedsWithValue(FD_CLOEXEC)); } -TEST(Signalfd, Blocking) { +TEST_P(SignalfdTest, Blocking) { + int signo = GetParam(); // Create the signalfd in blocking mode. sigset_t mask; sigemptyset(&mask); - sigaddset(&mask, kSigno); + sigaddset(&mask, signo); FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, 0)); // Shared tid variable. @@ -136,7 +142,7 @@ TEST(Signalfd, Blocking) { struct signalfd_siginfo rbuf; ASSERT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)), SyscallSucceedsWithValue(sizeof(rbuf))); - EXPECT_EQ(rbuf.ssi_signo, kSigno); + EXPECT_EQ(rbuf.ssi_signo, signo); }); // Wait until blocked. @@ -149,20 +155,21 @@ TEST(Signalfd, Blocking) { // // See gvisor.dev/issue/139. if (IsRunningOnGvisor()) { - ASSERT_THAT(tgkill(getpid(), gettid(), kSigno), SyscallSucceeds()); + ASSERT_THAT(tgkill(getpid(), gettid(), signo), SyscallSucceeds()); } else { - ASSERT_THAT(tgkill(getpid(), tid, kSigno), SyscallSucceeds()); + ASSERT_THAT(tgkill(getpid(), tid, signo), SyscallSucceeds()); } // Ensure that it was received. t.Join(); } -TEST(Signalfd, ThreadGroup) { +TEST_P(SignalfdTest, ThreadGroup) { + int signo = GetParam(); // Create the signalfd in blocking mode. sigset_t mask; sigemptyset(&mask); - sigaddset(&mask, kSigno); + sigaddset(&mask, signo); FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, 0)); // Shared variable. @@ -176,7 +183,7 @@ TEST(Signalfd, ThreadGroup) { struct signalfd_siginfo rbuf; ASSERT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)), SyscallSucceedsWithValue(sizeof(rbuf))); - EXPECT_EQ(rbuf.ssi_signo, kSigno); + EXPECT_EQ(rbuf.ssi_signo, signo); // Wait for the other thread. absl::MutexLock ml(&mu); @@ -185,7 +192,7 @@ TEST(Signalfd, ThreadGroup) { }); // Deliver the signal to the threadgroup. - ASSERT_THAT(kill(getpid(), kSigno), SyscallSucceeds()); + ASSERT_THAT(kill(getpid(), signo), SyscallSucceeds()); // Wait for the first thread to process. { @@ -194,13 +201,13 @@ TEST(Signalfd, ThreadGroup) { } // Deliver to the thread group again (other thread still exists). - ASSERT_THAT(kill(getpid(), kSigno), SyscallSucceeds()); + ASSERT_THAT(kill(getpid(), signo), SyscallSucceeds()); // Ensure that we can also receive it. struct signalfd_siginfo rbuf; ASSERT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)), SyscallSucceedsWithValue(sizeof(rbuf))); - EXPECT_EQ(rbuf.ssi_signo, kSigno); + EXPECT_EQ(rbuf.ssi_signo, signo); // Mark the test as done. { @@ -212,11 +219,12 @@ TEST(Signalfd, ThreadGroup) { t.Join(); } -TEST(Signalfd, Nonblock) { +TEST_P(SignalfdTest, Nonblock) { + int signo = GetParam(); // Create the signalfd in non-blocking mode. sigset_t mask; sigemptyset(&mask); - sigaddset(&mask, kSigno); + sigaddset(&mask, signo); FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, SFD_NONBLOCK)); @@ -227,20 +235,21 @@ TEST(Signalfd, Nonblock) { // Block and deliver the signal. const auto scoped_sigmask = - ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, kSigno)); - ASSERT_THAT(tgkill(getpid(), gettid(), kSigno), SyscallSucceeds()); + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, signo)); + ASSERT_THAT(tgkill(getpid(), gettid(), signo), SyscallSucceeds()); // Ensure that a read actually works. ASSERT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)), SyscallSucceedsWithValue(sizeof(rbuf))); - EXPECT_EQ(rbuf.ssi_signo, kSigno); + EXPECT_EQ(rbuf.ssi_signo, signo); // Should block again. EXPECT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)), SyscallFailsWithErrno(EWOULDBLOCK)); } -TEST(Signalfd, SetMask) { +TEST_P(SignalfdTest, SetMask) { + int signo = GetParam(); // Create the signalfd matching nothing. sigset_t mask; sigemptyset(&mask); @@ -249,8 +258,8 @@ TEST(Signalfd, SetMask) { // Block and deliver a signal. const auto scoped_sigmask = - ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, kSigno)); - ASSERT_THAT(tgkill(getpid(), gettid(), kSigno), SyscallSucceeds()); + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, signo)); + ASSERT_THAT(tgkill(getpid(), gettid(), signo), SyscallSucceeds()); // We should have nothing. struct signalfd_siginfo rbuf; @@ -258,29 +267,30 @@ TEST(Signalfd, SetMask) { SyscallFailsWithErrno(EWOULDBLOCK)); // Change the signal mask. - sigaddset(&mask, kSigno); + sigaddset(&mask, signo); ASSERT_THAT(signalfd(fd.get(), &mask, 0), SyscallSucceeds()); // We should now have the signal. ASSERT_THAT(read(fd.get(), &rbuf, sizeof(rbuf)), SyscallSucceedsWithValue(sizeof(rbuf))); - EXPECT_EQ(rbuf.ssi_signo, kSigno); + EXPECT_EQ(rbuf.ssi_signo, signo); } -TEST(Signalfd, Poll) { +TEST_P(SignalfdTest, Poll) { + int signo = GetParam(); // Create the signalfd. sigset_t mask; sigemptyset(&mask); - sigaddset(&mask, kSigno); + sigaddset(&mask, signo); FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, 0)); // Block the signal, and start a thread to deliver it. const auto scoped_sigmask = - ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, kSigno)); + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, signo)); pid_t orig_tid = gettid(); ScopedThread t([&] { absl::SleepFor(absl::Seconds(5)); - ASSERT_THAT(tgkill(getpid(), orig_tid, kSigno), SyscallSucceeds()); + ASSERT_THAT(tgkill(getpid(), orig_tid, signo), SyscallSucceeds()); }); // Start polling for the signal. We expect that it is not available at the @@ -297,19 +307,18 @@ TEST(Signalfd, Poll) { SyscallSucceedsWithValue(sizeof(rbuf))); } -TEST(Signalfd, KillStillKills) { - sigset_t mask; - sigemptyset(&mask); - sigaddset(&mask, SIGKILL); - FileDescriptor fd = - ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, SFD_CLOEXEC)); - - // Just because there is a signalfd, we shouldn't see any change in behavior - // for unblockable signals. It's easier to test this with SIGKILL. - const auto scoped_sigmask = - ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, SIGKILL)); - EXPECT_EXIT(tgkill(getpid(), gettid(), SIGKILL), KilledBySignal(SIGKILL), ""); +std::string PrintSigno(::testing::TestParamInfo info) { + switch (info.param) { + case kSigno: + return "kSigno"; + case kSignoMax: + return "kSignoMax"; + default: + return absl::StrCat(info.param); + } } +INSTANTIATE_TEST_SUITE_P(Signalfd, SignalfdTest, + ::testing::Values(kSigno, kSignoMax), PrintSigno); TEST(Signalfd, Ppoll) { sigset_t mask; @@ -328,6 +337,20 @@ TEST(Signalfd, Ppoll) { SyscallSucceedsWithValue(0)); } +TEST(Signalfd, KillStillKills) { + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, SIGKILL); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NewSignalFD(&mask, SFD_CLOEXEC)); + + // Just because there is a signalfd, we shouldn't see any change in behavior + // for unblockable signals. It's easier to test this with SIGKILL. + const auto scoped_sigmask = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSignalMask(SIG_BLOCK, SIGKILL)); + EXPECT_EXIT(tgkill(getpid(), gettid(), SIGKILL), KilledBySignal(SIGKILL), ""); +} + } // namespace } // namespace testing @@ -340,6 +363,7 @@ int main(int argc, char** argv) { sigset_t set; sigemptyset(&set); sigaddset(&set, gvisor::testing::kSigno); + sigaddset(&set, gvisor::testing::kSignoMax); sigaddset(&set, gvisor::testing::kSignoAlt); TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0); -- cgit v1.2.3 From 2ba6198851dc1e293295d7cadf8c0ae456b68beb Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Tue, 21 Jan 2020 12:41:50 -0800 Subject: Add syscalls for lgetxattr, fgetxattr, lsetxattr, and fsetxattr. Note that these simply will use the same logic as getxattr and setxattr, which is not yet implemented for most filesystems. PiperOrigin-RevId: 290800960 --- pkg/sentry/syscalls/linux/linux64_amd64.go | 8 +- pkg/sentry/syscalls/linux/linux64_arm64.go | 8 +- pkg/sentry/syscalls/linux/sys_xattr.go | 136 +++++++++++++++++++++-------- test/syscalls/linux/xattr.cc | 41 +++++++++ 4 files changed, 150 insertions(+), 43 deletions(-) (limited to 'test') diff --git a/pkg/sentry/syscalls/linux/linux64_amd64.go b/pkg/sentry/syscalls/linux/linux64_amd64.go index 6b2920900..c76771a54 100644 --- a/pkg/sentry/syscalls/linux/linux64_amd64.go +++ b/pkg/sentry/syscalls/linux/linux64_amd64.go @@ -229,11 +229,11 @@ var AMD64 = &kernel.SyscallTable{ 186: syscalls.Supported("gettid", Gettid), 187: syscalls.Supported("readahead", Readahead), 188: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil), - 189: syscalls.Error("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 190: syscalls.Error("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), + 189: syscalls.PartiallySupported("lsetxattr", LSetXattr, "Only supported for tmpfs.", nil), + 190: syscalls.PartiallySupported("fsetxattr", FSetXattr, "Only supported for tmpfs.", nil), 191: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil), - 192: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 193: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), + 192: syscalls.PartiallySupported("lgetxattr", LGetXattr, "Only supported for tmpfs.", nil), + 193: syscalls.PartiallySupported("fgetxattr", FGetXattr, "Only supported for tmpfs.", nil), 194: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), 195: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), 196: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), diff --git a/pkg/sentry/syscalls/linux/linux64_arm64.go b/pkg/sentry/syscalls/linux/linux64_arm64.go index c9629f6f3..d3587fda6 100644 --- a/pkg/sentry/syscalls/linux/linux64_arm64.go +++ b/pkg/sentry/syscalls/linux/linux64_arm64.go @@ -42,11 +42,11 @@ var ARM64 = &kernel.SyscallTable{ 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), 5: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil), - 6: syscalls.Error("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 7: syscalls.Error("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), + 6: syscalls.PartiallySupported("lsetxattr", LSetXattr, "Only supported for tmpfs.", nil), + 7: syscalls.PartiallySupported("fsetxattr", FSetXattr, "Only supported for tmpfs.", nil), 8: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil), - 9: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 10: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), + 9: syscalls.PartiallySupported("lgetxattr", LGetXattr, "Only supported for tmpfs.", nil), + 10: syscalls.PartiallySupported("fgetxattr", FGetXattr, "Only supported for tmpfs.", nil), 11: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), 12: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), 13: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), diff --git a/pkg/sentry/syscalls/linux/sys_xattr.go b/pkg/sentry/syscalls/linux/sys_xattr.go index 23d20da6f..e35c077d6 100644 --- a/pkg/sentry/syscalls/linux/sys_xattr.go +++ b/pkg/sentry/syscalls/linux/sys_xattr.go @@ -27,6 +27,40 @@ import ( // GetXattr implements linux syscall getxattr(2). func GetXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + return getXattrFromPath(t, args, true) +} + +// LGetXattr implements linux syscall lgetxattr(2). +func LGetXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + return getXattrFromPath(t, args, false) +} + +// FGetXattr implements linux syscall fgetxattr(2). +func FGetXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + nameAddr := args[1].Pointer() + valueAddr := args[2].Pointer() + size := uint64(args[3].SizeT()) + + // TODO(b/113957122): Return EBADF if the fd was opened with O_PATH. + f := t.GetFile(fd) + if f == nil { + return 0, nil, syserror.EBADF + } + defer f.DecRef() + + n, value, err := getXattr(t, f.Dirent, nameAddr, size) + if err != nil { + return 0, nil, err + } + + if _, err := t.CopyOutBytes(valueAddr, []byte(value)); err != nil { + return 0, nil, err + } + return uintptr(n), nil, nil +} + +func getXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink bool) (uintptr, *kernel.SyscallControl, error) { pathAddr := args[0].Pointer() nameAddr := args[1].Pointer() valueAddr := args[2].Pointer() @@ -38,29 +72,17 @@ func GetXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } valueLen := 0 - err = fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { - // If getxattr(2) is called with size 0, the size of the value will be - // returned successfully even if it is nonzero. In that case, we need to - // retrieve the entire attribute value so we can return the correct size. - requestedSize := size - if size == 0 || size > linux.XATTR_SIZE_MAX { - requestedSize = linux.XATTR_SIZE_MAX + err = fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { + if dirPath && !fs.IsDir(d.Inode.StableAttr) { + return syserror.ENOTDIR } - value, err := getXattr(t, d, dirPath, nameAddr, uint64(requestedSize)) + n, value, err := getXattr(t, d, nameAddr, size) + valueLen = n if err != nil { return err } - valueLen = len(value) - if uint64(valueLen) > requestedSize { - return syserror.ERANGE - } - - // Skip copying out the attribute value if size is 0. - if size == 0 { - return nil - } _, err = t.CopyOutBytes(valueAddr, []byte(value)) return err }) @@ -71,29 +93,73 @@ func GetXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } // getXattr implements getxattr(2) from the given *fs.Dirent. -func getXattr(t *kernel.Task, d *fs.Dirent, dirPath bool, nameAddr usermem.Addr, size uint64) (string, error) { - if dirPath && !fs.IsDir(d.Inode.StableAttr) { - return "", syserror.ENOTDIR - } - +func getXattr(t *kernel.Task, d *fs.Dirent, nameAddr usermem.Addr, size uint64) (int, string, error) { if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Read: true}); err != nil { - return "", err + return 0, "", err } name, err := copyInXattrName(t, nameAddr) if err != nil { - return "", err + return 0, "", err } if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) { - return "", syserror.EOPNOTSUPP + return 0, "", syserror.EOPNOTSUPP } - return d.Inode.GetXattr(t, name, size) + // If getxattr(2) is called with size 0, the size of the value will be + // returned successfully even if it is nonzero. In that case, we need to + // retrieve the entire attribute value so we can return the correct size. + requestedSize := size + if size == 0 || size > linux.XATTR_SIZE_MAX { + requestedSize = linux.XATTR_SIZE_MAX + } + + value, err := d.Inode.GetXattr(t, name, requestedSize) + if err != nil { + return 0, "", err + } + n := len(value) + if uint64(n) > requestedSize { + return 0, "", syserror.ERANGE + } + + // Don't copy out the attribute value if size is 0. + if size == 0 { + return n, "", nil + } + return n, value, nil } // SetXattr implements linux syscall setxattr(2). func SetXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + return setXattrFromPath(t, args, true) +} + +// LSetXattr implements linux syscall lsetxattr(2). +func LSetXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + return setXattrFromPath(t, args, false) +} + +// FSetXattr implements linux syscall fsetxattr(2). +func FSetXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + nameAddr := args[1].Pointer() + valueAddr := args[2].Pointer() + size := uint64(args[3].SizeT()) + flags := args[4].Uint() + + // TODO(b/113957122): Return EBADF if the fd was opened with O_PATH. + f := t.GetFile(fd) + if f == nil { + return 0, nil, syserror.EBADF + } + defer f.DecRef() + + return 0, nil, setXattr(t, f.Dirent, nameAddr, valueAddr, uint64(size), flags) +} + +func setXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink bool) (uintptr, *kernel.SyscallControl, error) { pathAddr := args[0].Pointer() nameAddr := args[1].Pointer() valueAddr := args[2].Pointer() @@ -105,19 +171,19 @@ func SetXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc return 0, nil, err } - if flags&^(linux.XATTR_CREATE|linux.XATTR_REPLACE) != 0 { - return 0, nil, syserror.EINVAL - } + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { + if dirPath && !fs.IsDir(d.Inode.StableAttr) { + return syserror.ENOTDIR + } - return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { - return setXattr(t, d, dirPath, nameAddr, valueAddr, uint64(size), flags) + return setXattr(t, d, nameAddr, valueAddr, uint64(size), flags) }) } // setXattr implements setxattr(2) from the given *fs.Dirent. -func setXattr(t *kernel.Task, d *fs.Dirent, dirPath bool, nameAddr, valueAddr usermem.Addr, size uint64, flags uint32) error { - if dirPath && !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR +func setXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr usermem.Addr, size uint64, flags uint32) error { + if flags&^(linux.XATTR_CREATE|linux.XATTR_REPLACE) != 0 { + return syserror.EINVAL } if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Write: true}); err != nil { @@ -133,7 +199,7 @@ func setXattr(t *kernel.Task, d *fs.Dirent, dirPath bool, nameAddr, valueAddr us return syserror.E2BIG } buf := make([]byte, size) - if _, err = t.CopyInBytes(valueAddr, buf); err != nil { + if _, err := t.CopyInBytes(valueAddr, buf); err != nil { return err } value := string(buf) diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc index b3bc3463e..e77c355d7 100644 --- a/test/syscalls/linux/xattr.cc +++ b/test/syscalls/linux/xattr.cc @@ -26,6 +26,7 @@ #include "gtest/gtest.h" #include "test/syscalls/linux/file_base.h" #include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" #include "test/util/posix_error.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" @@ -414,6 +415,46 @@ TEST_F(XattrTest, GetxattrNonexistentName) { EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); } +TEST_F(XattrTest, LGetSetxattrOnSymlink) { + TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(dir.path(), test_file_name_)); + + EXPECT_THAT(lsetxattr(link.path().c_str(), nullptr, nullptr, 0, 0), + SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(lgetxattr(link.path().c_str(), nullptr, nullptr, 0), + SyscallFailsWithErrno(ENODATA)); +} + +TEST_F(XattrTest, LGetSetxattrOnNonsymlink) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + int val = 1234; + size_t size = sizeof(val); + EXPECT_THAT(lsetxattr(path, name, &val, size, /*flags=*/0), + SyscallSucceeds()); + + int buf = 0; + EXPECT_THAT(lgetxattr(path, name, &buf, size), + SyscallSucceedsWithValue(size)); + EXPECT_EQ(buf, val); +} + +TEST_F(XattrTest, FGetSetxattr) { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_.c_str(), 0)); + const char name[] = "user.test"; + int val = 1234; + size_t size = sizeof(val); + EXPECT_THAT(fsetxattr(fd.get(), name, &val, size, /*flags=*/0), + SyscallSucceeds()); + + int buf = 0; + EXPECT_THAT(fgetxattr(fd.get(), name, &buf, size), + SyscallSucceedsWithValue(size)); + EXPECT_EQ(buf, val); +} + } // namespace } // namespace testing -- cgit v1.2.3 From 47bc7550c0b8fcde7b3452bf536082e955882026 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Tue, 21 Jan 2020 13:37:25 -0800 Subject: Fixing stuff --- pkg/sentry/socket/netfilter/netfilter.go | 3 ++ pkg/tcpip/iptables/types.go | 21 ++++---------- pkg/tcpip/packet_buffer.go | 25 +---------------- test/iptables/iptables_test.go | 47 ++++++++++++++++---------------- 4 files changed, 34 insertions(+), 62 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index 09a3276c7..4ef8123ac 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -164,6 +164,9 @@ func convertNetstackToBinary(tablename string, table iptables.Table) (linux.Kern // Each rule corresponds to an entry. entry := linux.KernelIPTEntry{ IPTEntry: linux.IPTEntry{ + IP: linux.IPTIP{ + Protocol: uint16(rule.Filter.Protocol), + }, NextOffset: linux.SizeOfIPTEntry, TargetOffset: linux.SizeOfIPTEntry, }, diff --git a/pkg/tcpip/iptables/types.go b/pkg/tcpip/iptables/types.go index 4f2a4d65e..a0bfc8b41 100644 --- a/pkg/tcpip/iptables/types.go +++ b/pkg/tcpip/iptables/types.go @@ -14,7 +14,9 @@ package iptables -import "gvisor.dev/gvisor/pkg/tcpip" +import ( + "gvisor.dev/gvisor/pkg/tcpip" +) // A Hook specifies one of the hooks built into the network stack. // @@ -161,21 +163,10 @@ type Rule struct { Target Target } -// TODO: This is gross. -// TODO: Save this in SetEntries. -// TODO: Utilize this when traversing tables. +// IPHeaderFilter holds basic IP filtering data common to every rule. type IPHeaderFilter struct { - Source [4]byte - Destination [4]byte - SourceMask [4]byte - DestinationMask [4]byte - OutputInterface string - InputInterface string - OutputInterfaceMask string - InputInterfaceMask string - Protocol tcpip.TransportProtocolNumber - Flags uint8 - InverseFlags uint8 + // Protocol matches the transport protocol. + Protocol tcpip.TransportProtocolNumber } // A Matcher is the interface for matching packets. diff --git a/pkg/tcpip/packet_buffer.go b/pkg/tcpip/packet_buffer.go index 7a036b93c..ab24372e7 100644 --- a/pkg/tcpip/packet_buffer.go +++ b/pkg/tcpip/packet_buffer.go @@ -13,9 +13,7 @@ package tcpip -import ( - "gvisor.dev/gvisor/pkg/tcpip/buffer" -) +import "gvisor.dev/gvisor/pkg/tcpip/buffer" // A PacketBuffer contains all the data of a network packet. // @@ -67,24 +65,3 @@ func (pk PacketBuffer) Clone() PacketBuffer { pk.Data = pk.Data.Clone(nil) return pk } - -//// TransportProtocol returns the transport protocol of pk. -//// -//// Precondition: pk.NetworkHeader is set. -//func (pk PacketBuffer) TransportProtocolIPv4() uint16 { -// if pk.NetworkHeader == nil { -// panic("This should only be called when pk.NetworkHeader is set.") -// } -// return header.IPv4(pk.NetworkHeader).TransportProtocol() -//} - -// func (pk Packet) findNetHeader() header.IPv4 { -// // Inbound: -// // Data holds everything, but may have had some headers shaved off. -// // Figure out whether it's set or still somewhere in data and return -// // appropriately. - -// // Outbound: -// // NetworkHeader will be set if we've added one. Otherwise there's no -// // header. -// } diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 63e691af6..150b44e42 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -23,6 +23,7 @@ import ( "time" "flag" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/runsc/dockerutil" "gvisor.dev/gvisor/runsc/testutil" @@ -160,29 +161,29 @@ func logContainer(output string, err error) { log.Infof(msg) } -// func TestFilterInputDropUDP(t *testing.T) { -// if err := singleTest(FilterInputDropUDP{}); err != nil { -// t.Fatal(err) -// } -// } - -// func TestFilterInputDropUDPPort(t *testing.T) { -// if err := singleTest(FilterInputDropUDPPort{}); err != nil { -// t.Fatal(err) -// } -// } - -// func TestFilterInputDropDifferentUDPPort(t *testing.T) { -// if err := singleTest(FilterInputDropDifferentUDPPort{}); err != nil { -// t.Fatal(err) -// } -// } - -// func TestFilterInputDropAll(t *testing.T) { -// if err := singleTest(FilterInputDropAll{}); err != nil { -// t.Fatal(err) -// } -// } +func TestFilterInputDropUDP(t *testing.T) { + if err := singleTest(FilterInputDropUDP{}); err != nil { + t.Fatal(err) + } +} + +func TestFilterInputDropUDPPort(t *testing.T) { + if err := singleTest(FilterInputDropUDPPort{}); err != nil { + t.Fatal(err) + } +} + +func TestFilterInputDropDifferentUDPPort(t *testing.T) { + if err := singleTest(FilterInputDropDifferentUDPPort{}); err != nil { + t.Fatal(err) + } +} + +func TestFilterInputDropAll(t *testing.T) { + if err := singleTest(FilterInputDropAll{}); err != nil { + t.Fatal(err) + } +} func TestFilterInputDropOnlyUDP(t *testing.T) { if err := singleTest(FilterInputDropOnlyUDP{}); err != nil { -- cgit v1.2.3 From 9f736ac6a7747917f690596ac9b072c108b5670c Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Tue, 21 Jan 2020 13:39:48 -0800 Subject: More little fixes. --- pkg/sentry/socket/netfilter/netfilter.go | 4 ++-- test/iptables/iptables_test.go | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index 4ef8123ac..e1f2bacce 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -325,8 +325,8 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { return syserr.ErrInvalidArgument } - // TODO(gvisor.dev/issue/170): We should support IPTIP - // filtering. We reject any nonzero IPTIP values for now. + // TODO(gvisor.dev/issue/170): We should support more IPTIP + // filtering fields. filter, err := filterFromIPTIP(entry.IP) if err != nil { return err diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 150b44e42..679a29bef 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -15,6 +15,7 @@ package iptables import ( + "flag" "fmt" "net" "os" @@ -22,8 +23,6 @@ import ( "testing" "time" - "flag" - "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/runsc/dockerutil" "gvisor.dev/gvisor/runsc/testutil" -- cgit v1.2.3 From cbc0a92276b75e744511a43a9c0b78fc64946ec6 Mon Sep 17 00:00:00 2001 From: Ryan Heacock Date: Tue, 21 Jan 2020 14:15:01 -0800 Subject: Correct todos referencing IPV6_RECVTCLASS Bug 68320120 was revived because TODOs referenced the IP_RECVTOS bug instead of the IPV6_RECVTCLASS bug. PiperOrigin-RevId: 290820178 --- test/syscalls/linux/udp_socket_test_cases.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc index 68e0a8109..a2f6ef8cc 100644 --- a/test/syscalls/linux/udp_socket_test_cases.cc +++ b/test/syscalls/linux/udp_socket_test_cases.cc @@ -1349,7 +1349,7 @@ TEST_P(UdpSocketTest, TimestampIoctlPersistence) { // outgoing packets, and that a receiving socket with IP_RECVTOS or // IPV6_RECVTCLASS will create the corresponding control message. TEST_P(UdpSocketTest, SetAndReceiveTOS) { - // TODO(b/68320120): IPV6_RECVTCLASS not supported for netstack. + // TODO(b/144868438): IPV6_RECVTCLASS not supported for netstack. SKIP_IF((GetParam() != AddressFamily::kIpv4) && IsRunningOnGvisor() && !IsRunningWithHostinet()); ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); @@ -1422,7 +1422,7 @@ TEST_P(UdpSocketTest, SetAndReceiveTOS) { // TOS byte on outgoing packets, and that a receiving socket with IP_RECVTOS or // IPV6_RECVTCLASS will create the corresponding control message. TEST_P(UdpSocketTest, SendAndReceiveTOS) { - // TODO(b/68320120): IPV6_RECVTCLASS not supported for netstack. + // TODO(b/144868438): IPV6_RECVTCLASS not supported for netstack. // TODO(b/146661005): Setting TOS via cmsg not supported for netstack. SKIP_IF(IsRunningOnGvisor() && !IsRunningWithHostinet()); ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); -- cgit v1.2.3 From 9143fcd7fd38243dd40f927dafaeb75f6ef8ef49 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Tue, 21 Jan 2020 14:47:17 -0800 Subject: Add UDP matchers. --- pkg/abi/linux/netfilter.go | 92 +++++++++++++++++ pkg/sentry/socket/netfilter/netfilter.go | 164 ++++++++++++++++++++++++++----- pkg/tcpip/iptables/BUILD | 2 + pkg/tcpip/iptables/iptables.go | 3 + pkg/tcpip/iptables/tcp_matcher.go | 122 +++++++++++++++++++++++ pkg/tcpip/iptables/types.go | 17 ++++ pkg/tcpip/iptables/udp_matcher.go | 127 ++++++++++++++++++++++++ pkg/tcpip/network/ipv4/ipv4.go | 10 +- test/iptables/filter_input.go | 46 +++++++++ 9 files changed, 555 insertions(+), 28 deletions(-) create mode 100644 pkg/tcpip/iptables/tcp_matcher.go create mode 100644 pkg/tcpip/iptables/udp_matcher.go (limited to 'test') diff --git a/pkg/abi/linux/netfilter.go b/pkg/abi/linux/netfilter.go index 33fcc6c95..fb4588272 100644 --- a/pkg/abi/linux/netfilter.go +++ b/pkg/abi/linux/netfilter.go @@ -340,3 +340,95 @@ func goString(cstring []byte) string { } return string(cstring) } + +// XTTCP holds data for matching TCP packets. It corresponds to struct xt_tcp +// in include/uapi/linux/netfilter/xt_tcpudp.h. +type XTTCP struct { + // SourcePortStart specifies the inclusive start of the range of source + // ports to which the matcher applies. + SourcePortStart uint16 + + // SourcePortEnd specifies the inclusive end of the range of source ports + // to which the matcher applies. + SourcePortEnd uint16 + + // DestinationPortStart specifies the start of the destination port + // range to which the matcher applies. + DestinationPortStart uint16 + + // DestinationPortEnd specifies the start of the destination port + // range to which the matcher applies. + DestinationPortEnd uint16 + + // Option specifies that a particular TCP option must be set. + Option uint8 + + // FlagMask masks the FlagCompare byte when comparing to the TCP flag + // fields. + FlagMask uint8 + + // FlagCompare is binary and-ed with the TCP flag fields. + FlagCompare uint8 + + // InverseFlags flips the meaning of certain fields. See the + // TX_TCP_INV_* flags. + InverseFlags uint8 +} + +// SizeOfXTTCP is the size of an XTTCP. +const SizeOfXTTCP = 12 + +// Flags in XTTCP.InverseFlags. Corresponding constants are in +// include/uapi/linux/netfilter/xt_tcpudp.h. +const ( + // Invert the meaning of SourcePortStart/End. + XT_TCP_INV_SRCPT = 0x01 + // Invert the meaning of DestinationPortStart/End. + XT_TCP_INV_DSTPT = 0x02 + // Invert the meaning of FlagCompare. + XT_TCP_INV_FLAGS = 0x04 + // Invert the meaning of Option. + XT_TCP_INV_OPTION = 0x08 + // Enable all flags. + XT_TCP_INV_MASK = 0x0F +) + +// XTUDP holds data for matching UDP packets. It corresponds to struct xt_udp +// in include/uapi/linux/netfilter/xt_tcpudp.h. +type XTUDP struct { + // SourcePortStart specifies the inclusive start of the range of source + // ports to which the matcher applies. + SourcePortStart uint16 + + // SourcePortEnd specifies the inclusive end of the range of source ports + // to which the matcher applies. + SourcePortEnd uint16 + + // DestinationPortStart specifies the start of the destination port + // range to which the matcher applies. + DestinationPortStart uint16 + + // DestinationPortEnd specifies the start of the destination port + // range to which the matcher applies. + DestinationPortEnd uint16 + + // InverseFlags flips the meaning of certain fields. See the + // TX_UDP_INV_* flags. + InverseFlags uint8 + + _ uint8 +} + +// SizeOfXTUDP is the size of an XTUDP. +const SizeOfXTUDP = 10 + +// Flags in XTUDP.InverseFlags. Corresponding constants are in +// include/uapi/linux/netfilter/xt_tcpudp.h. +const ( + // Invert the meaning of SourcePortStart/End. + XT_UDP_INV_SRCPT = 0x01 + // Invert the meaning of DestinationPortStart/End. + XT_UDP_INV_DSTPT = 0x02 + // Enable all flags. + XT_UDP_INV_MASK = 0x03 +) diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index e1f2bacce..45296b339 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -131,6 +131,7 @@ func FillDefaultIPTables(stack *stack.Stack) { stack.SetIPTables(ipt) } +// TODO: Return proto. // convertNetstackToBinary converts the iptables as stored in netstack to the // format expected by the iptables tool. Linux stores each table as a binary // blob that can only be traversed by parsing a bit, reading some offsets, @@ -318,10 +319,12 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { } var entry linux.IPTEntry buf := optVal[:linux.SizeOfIPTEntry] - optVal = optVal[linux.SizeOfIPTEntry:] binary.Unmarshal(buf, usermem.ByteOrder, &entry) - if entry.TargetOffset != linux.SizeOfIPTEntry { - // TODO(gvisor.dev/issue/170): Support matchers. + initialOptValLen := len(optVal) + optVal = optVal[linux.SizeOfIPTEntry:] + + if entry.TargetOffset < linux.SizeOfIPTEntry { + log.Warningf("netfilter: entry has too-small target offset %d", entry.TargetOffset) return syserr.ErrInvalidArgument } @@ -332,19 +335,41 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { return err } + // TODO: Matchers (and maybe targets) can specify that they only work for certiain protocols, hooks, tables. + // Get matchers. + matchersSize := entry.TargetOffset - linux.SizeOfIPTEntry + if len(optVal) < int(matchersSize) { + log.Warningf("netfilter: entry doesn't have enough room for its matchers (only %d bytes remain)", len(optVal)) + } + matchers, err := parseMatchers(filter, optVal[:matchersSize]) + if err != nil { + log.Warningf("netfilter: failed to parse matchers: %v", err) + return err + } + optVal = optVal[matchersSize:] + // Get the target of the rule. - target, consumed, err := parseTarget(optVal) + targetSize := entry.NextOffset - entry.TargetOffset + if len(optVal) < int(targetSize) { + log.Warningf("netfilter: entry doesn't have enough room for its target (only %d bytes remain)", len(optVal)) + } + target, err := parseTarget(optVal[:targetSize]) if err != nil { return err } - optVal = optVal[consumed:] + optVal = optVal[targetSize:] table.Rules = append(table.Rules, iptables.Rule{ - Filter: filter, - Target: target, + Filter: filter, + Target: target, + Matchers: matchers, }) offsets = append(offsets, offset) - offset += linux.SizeOfIPTEntry + consumed + offset += uint32(entry.NextOffset) + + if initialOptValLen-len(optVal) != int(entry.NextOffset) { + log.Warningf("netfilter: entry NextOffset is %d, but entry took up %d bytes", entry.NextOffset, initialOptValLen-len(optVal)) + } } // Go through the list of supported hooks for this table and, for each @@ -401,12 +426,105 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { return nil } -// parseTarget parses a target from the start of optVal and returns the target -// along with the number of bytes it occupies in optVal. -func parseTarget(optVal []byte) (iptables.Target, uint32, *syserr.Error) { +// parseMatchers parses 0 or more matchers from optVal. optVal should contain +// only the matchers. +func parseMatchers(filter iptables.IPHeaderFilter, optVal []byte) ([]iptables.Matcher, *syserr.Error) { + var matchers []iptables.Matcher + for len(optVal) > 0 { + log.Infof("parseMatchers: optVal has len %d", len(optVal)) + // Get the XTEntryMatch. + if len(optVal) < linux.SizeOfXTEntryMatch { + log.Warningf("netfilter: optVal has insufficient size for entry match: %d", len(optVal)) + return nil, syserr.ErrInvalidArgument + } + var match linux.XTEntryMatch + buf := optVal[:linux.SizeOfXTEntryMatch] + binary.Unmarshal(buf, usermem.ByteOrder, &match) + log.Infof("parseMatchers: parsed entry match %q: %+v", match.Name.String(), match) + + // Check some invariants. + if match.MatchSize < linux.SizeOfXTEntryMatch { + log.Warningf("netfilter: match size is too small, must be at least %d", linux.SizeOfXTEntryMatch) + return nil, syserr.ErrInvalidArgument + } + if len(optVal) < int(match.MatchSize) { + log.Warningf("netfilter: optVal has insufficient size for match: %d", len(optVal)) + return nil, syserr.ErrInvalidArgument + } + + buf = optVal[linux.SizeOfXTEntryMatch:match.MatchSize] + var matcher iptables.Matcher + var err error + switch match.Name.String() { + case "tcp": + if len(buf) < linux.SizeOfXTTCP { + log.Warningf("netfilter: optVal has insufficient size for TCP match: %d", len(optVal)) + return nil, syserr.ErrInvalidArgument + } + var matchData linux.XTTCP + // For alignment reasons, the match's total size may exceed what's + // strictly necessary to hold matchData. + binary.Unmarshal(buf[:linux.SizeOfXTUDP], usermem.ByteOrder, &matchData) + log.Infof("parseMatchers: parsed XTTCP: %+v", matchData) + matcher, err = iptables.NewTCPMatcher(filter, iptables.TCPMatcherData{ + SourcePortStart: matchData.SourcePortStart, + SourcePortEnd: matchData.SourcePortEnd, + DestinationPortStart: matchData.DestinationPortStart, + DestinationPortEnd: matchData.DestinationPortEnd, + Option: matchData.Option, + FlagMask: matchData.FlagMask, + FlagCompare: matchData.FlagCompare, + InverseFlags: matchData.InverseFlags, + }) + if err != nil { + log.Warningf("netfilter: failed to create TCP matcher: %v", err) + return nil, syserr.ErrInvalidArgument + } + + case "udp": + if len(buf) < linux.SizeOfXTUDP { + log.Warningf("netfilter: optVal has insufficient size for UDP match: %d", len(optVal)) + return nil, syserr.ErrInvalidArgument + } + var matchData linux.XTUDP + // For alignment reasons, the match's total size may exceed what's + // strictly necessary to hold matchData. + binary.Unmarshal(buf[:linux.SizeOfXTUDP], usermem.ByteOrder, &matchData) + log.Infof("parseMatchers: parsed XTUDP: %+v", matchData) + matcher, err = iptables.NewUDPMatcher(filter, iptables.UDPMatcherData{ + SourcePortStart: matchData.SourcePortStart, + SourcePortEnd: matchData.SourcePortEnd, + DestinationPortStart: matchData.DestinationPortStart, + DestinationPortEnd: matchData.DestinationPortEnd, + InverseFlags: matchData.InverseFlags, + }) + if err != nil { + log.Warningf("netfilter: failed to create UDP matcher: %v", err) + return nil, syserr.ErrInvalidArgument + } + + default: + log.Warningf("netfilter: unsupported matcher with name %q", match.Name.String()) + return nil, syserr.ErrInvalidArgument + } + + matchers = append(matchers, matcher) + + // TODO: Support revision. + // TODO: Support proto -- matchers usually specify which proto(s) they work with. + optVal = optVal[match.MatchSize:] + } + + // TODO: Check that optVal is exhausted. + return matchers, nil +} + +// parseTarget parses a target from optVal. optVal should contain only the +// target. +func parseTarget(optVal []byte) (iptables.Target, *syserr.Error) { if len(optVal) < linux.SizeOfXTEntryTarget { log.Warningf("netfilter: optVal has insufficient size for entry target %d", len(optVal)) - return nil, 0, syserr.ErrInvalidArgument + return nil, syserr.ErrInvalidArgument } var target linux.XTEntryTarget buf := optVal[:linux.SizeOfXTEntryTarget] @@ -414,9 +532,9 @@ func parseTarget(optVal []byte) (iptables.Target, uint32, *syserr.Error) { switch target.Name.String() { case "": // Standard target. - if len(optVal) < linux.SizeOfXTStandardTarget { - log.Warningf("netfilter.SetEntries: optVal has insufficient size for standard target %d", len(optVal)) - return nil, 0, syserr.ErrInvalidArgument + if len(optVal) != linux.SizeOfXTStandardTarget { + log.Warningf("netfilter.SetEntries: optVal has wrong size for standard target %d", len(optVal)) + return nil, syserr.ErrInvalidArgument } var standardTarget linux.XTStandardTarget buf = optVal[:linux.SizeOfXTStandardTarget] @@ -424,22 +542,22 @@ func parseTarget(optVal []byte) (iptables.Target, uint32, *syserr.Error) { verdict, err := translateToStandardVerdict(standardTarget.Verdict) if err != nil { - return nil, 0, err + return nil, err } switch verdict { case iptables.Accept: - return iptables.UnconditionalAcceptTarget{}, linux.SizeOfXTStandardTarget, nil + return iptables.UnconditionalAcceptTarget{}, nil case iptables.Drop: - return iptables.UnconditionalDropTarget{}, linux.SizeOfXTStandardTarget, nil + return iptables.UnconditionalDropTarget{}, nil default: panic(fmt.Sprintf("Unknown verdict: %v", verdict)) } case errorTargetName: // Error target. - if len(optVal) < linux.SizeOfXTErrorTarget { + if len(optVal) != linux.SizeOfXTErrorTarget { log.Infof("netfilter.SetEntries: optVal has insufficient size for error target %d", len(optVal)) - return nil, 0, syserr.ErrInvalidArgument + return nil, syserr.ErrInvalidArgument } var errorTarget linux.XTErrorTarget buf = optVal[:linux.SizeOfXTErrorTarget] @@ -454,16 +572,16 @@ func parseTarget(optVal []byte) (iptables.Target, uint32, *syserr.Error) { // rules have an error with the name of the chain. switch errorTarget.Name.String() { case errorTargetName: - return iptables.ErrorTarget{}, linux.SizeOfXTErrorTarget, nil + return iptables.ErrorTarget{}, nil default: log.Infof("Unknown error target %q doesn't exist or isn't supported yet.", errorTarget.Name.String()) - return nil, 0, syserr.ErrInvalidArgument + return nil, syserr.ErrInvalidArgument } } // Unknown target. log.Infof("Unknown target %q doesn't exist or isn't supported yet.", target.Name.String()) - return nil, 0, syserr.ErrInvalidArgument + return nil, syserr.ErrInvalidArgument } func filterFromIPTIP(iptip linux.IPTIP) (iptables.IPHeaderFilter, *syserr.Error) { diff --git a/pkg/tcpip/iptables/BUILD b/pkg/tcpip/iptables/BUILD index 297eaccaf..ff4e3c932 100644 --- a/pkg/tcpip/iptables/BUILD +++ b/pkg/tcpip/iptables/BUILD @@ -7,7 +7,9 @@ go_library( srcs = [ "iptables.go", "targets.go", + "tcp_matcher.go", "types.go", + "udp_matcher.go", ], importpath = "gvisor.dev/gvisor/pkg/tcpip/iptables", visibility = ["//visibility:public"], diff --git a/pkg/tcpip/iptables/iptables.go b/pkg/tcpip/iptables/iptables.go index fc06b5b87..accedba1e 100644 --- a/pkg/tcpip/iptables/iptables.go +++ b/pkg/tcpip/iptables/iptables.go @@ -138,6 +138,8 @@ func EmptyFilterTable() Table { // Check runs pkt through the rules for hook. It returns true when the packet // should continue traversing the network stack and false when it should be // dropped. +// +// Precondition: pkt.NetworkHeader is set. func (it *IPTables) Check(hook Hook, pkt tcpip.PacketBuffer) bool { // TODO(gvisor.dev/issue/170): A lot of this is uncomplicated because // we're missing features. Jumps, the call stack, etc. aren't checked @@ -163,6 +165,7 @@ func (it *IPTables) Check(hook Hook, pkt tcpip.PacketBuffer) bool { return true } +// Precondition: pkt.NetworkHeader is set. func (it *IPTables) checkTable(hook Hook, pkt tcpip.PacketBuffer, tablename string) Verdict { // Start from ruleIdx and walk the list of rules until a rule gives us // a verdict. diff --git a/pkg/tcpip/iptables/tcp_matcher.go b/pkg/tcpip/iptables/tcp_matcher.go new file mode 100644 index 000000000..6acbd6eb9 --- /dev/null +++ b/pkg/tcpip/iptables/tcp_matcher.go @@ -0,0 +1,122 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iptables + +import ( + "fmt" + + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" +) + +type TCPMatcher struct { + data TCPMatcherData + + // tablename string + // unsigned int matchsize; + // unsigned int usersize; + // #ifdef CONFIG_COMPAT + // unsigned int compatsize; + // #endif + // unsigned int hooks; + // unsigned short proto; + // unsigned short family; +} + +// TODO: Delete? +// MatchCheckEntryParams + +type TCPMatcherData struct { + // Filter IPHeaderFilter + + SourcePortStart uint16 + SourcePortEnd uint16 + DestinationPortStart uint16 + DestinationPortEnd uint16 + Option uint8 + FlagMask uint8 + FlagCompare uint8 + InverseFlags uint8 +} + +func NewTCPMatcher(filter IPHeaderFilter, data TCPMatcherData) (Matcher, error) { + // TODO: We currently only support source port and destination port. + log.Infof("Adding rule with TCPMatcherData: %+v", data) + + if data.Option != 0 || + data.FlagMask != 0 || + data.FlagCompare != 0 || + data.InverseFlags != 0 { + return nil, fmt.Errorf("unsupported TCP matcher flags set") + } + + if filter.Protocol != header.TCPProtocolNumber { + log.Warningf("TCP matching is only valid for protocol %d.", header.TCPProtocolNumber) + } + + return &TCPMatcher{data: data}, nil +} + +// TODO: Check xt_tcpudp.c. Need to check for same things (e.g. fragments). +func (tm *TCPMatcher) Match(hook Hook, pkt tcpip.PacketBuffer, interfaceName string) (bool, bool) { + netHeader := header.IPv4(pkt.NetworkHeader) + + // TODO: Do we check proto here or elsewhere? I think elsewhere (check + // codesearch). + if netHeader.TransportProtocol() != header.TCPProtocolNumber { + return false, false + } + + // We dont't match fragments. + if frag := netHeader.FragmentOffset(); frag != 0 { + if frag == 1 { + log.Warningf("Dropping TCP packet: malicious packet with fragment with fragment offest of 1.") + return false, true + } + return false, false + } + + // Now we need the transport header. However, this may not have been set + // yet. + // TODO + var tcpHeader header.TCP + if pkt.TransportHeader != nil { + tcpHeader = header.TCP(pkt.TransportHeader) + } else { + // The TCP header hasn't been parsed yet. We have to do it here. + if len(pkt.Data.First()) < header.TCPMinimumSize { + // There's no valid TCP header here, so we hotdrop the + // packet. + // TODO: Stats. + log.Warningf("Dropping TCP packet: size to small.") + return false, true + } + tcpHeader = header.TCP(pkt.Data.First()) + } + + // Check whether the source and destination ports are within the + // matching range. + sourcePort := tcpHeader.SourcePort() + destinationPort := tcpHeader.DestinationPort() + if sourcePort < tm.data.SourcePortStart || tm.data.SourcePortEnd < sourcePort { + return false, false + } + if destinationPort < tm.data.DestinationPortStart || tm.data.DestinationPortEnd < destinationPort { + return false, false + } + + return true, false +} diff --git a/pkg/tcpip/iptables/types.go b/pkg/tcpip/iptables/types.go index a0bfc8b41..54e66f09a 100644 --- a/pkg/tcpip/iptables/types.go +++ b/pkg/tcpip/iptables/types.go @@ -169,12 +169,29 @@ type IPHeaderFilter struct { Protocol tcpip.TransportProtocolNumber } +// TODO: Should these be able to marshal/unmarshal themselves? +// TODO: Something has to map the name to the matcher. // A Matcher is the interface for matching packets. type Matcher interface { // Match returns whether the packet matches and whether the packet // should be "hotdropped", i.e. dropped immediately. This is usually // used for suspicious packets. + // + // Precondition: packet.NetworkHeader is set. Match(hook Hook, packet tcpip.PacketBuffer, interfaceName string) (matches bool, hotdrop bool) + + // TODO: Make this typesafe by having each Matcher have their own, typed CheckEntry? + // CheckEntry(params MatchCheckEntryParams) bool +} + +// TODO: Unused? +type MatchCheckEntryParams struct { + Table string // TODO: Tables should be an enum... + Filter IPHeaderFilter + Info interface{} // TODO: Type unsafe. + // HookMask uint8 + // Family uint8 + // NFTCompat bool } // A Target is the interface for taking an action for a packet. diff --git a/pkg/tcpip/iptables/udp_matcher.go b/pkg/tcpip/iptables/udp_matcher.go new file mode 100644 index 000000000..ce4368a3d --- /dev/null +++ b/pkg/tcpip/iptables/udp_matcher.go @@ -0,0 +1,127 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iptables + +import ( + "fmt" + "runtime/debug" + + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" +) + +type UDPMatcher struct { + data UDPMatcherData + + // tablename string + // unsigned int matchsize; + // unsigned int usersize; + // #ifdef CONFIG_COMPAT + // unsigned int compatsize; + // #endif + // unsigned int hooks; + // unsigned short proto; + // unsigned short family; +} + +// TODO: Delete? +// MatchCheckEntryParams + +type UDPMatcherData struct { + // Filter IPHeaderFilter + + SourcePortStart uint16 + SourcePortEnd uint16 + DestinationPortStart uint16 + DestinationPortEnd uint16 + InverseFlags uint8 +} + +func NewUDPMatcher(filter IPHeaderFilter, data UDPMatcherData) (Matcher, error) { + // TODO: We currently only support source port and destination port. + log.Infof("Adding rule with UDPMatcherData: %+v", data) + + if data.InverseFlags != 0 { + return nil, fmt.Errorf("unsupported UDP matcher flags set") + } + + if filter.Protocol != header.UDPProtocolNumber { + log.Warningf("UDP matching is only valid for protocol %d.", header.UDPProtocolNumber) + } + + return &UDPMatcher{data: data}, nil +} + +// TODO: Check xt_tcpudp.c. Need to check for same things (e.g. fragments). +func (tm *UDPMatcher) Match(hook Hook, pkt tcpip.PacketBuffer, interfaceName string) (bool, bool) { + log.Infof("UDPMatcher called from: %s", string(debug.Stack())) + netHeader := header.IPv4(pkt.NetworkHeader) + + // TODO: Do we check proto here or elsewhere? I think elsewhere (check + // codesearch). + if netHeader.TransportProtocol() != header.UDPProtocolNumber { + log.Infof("UDPMatcher: wrong protocol number") + return false, false + } + + // We dont't match fragments. + if frag := netHeader.FragmentOffset(); frag != 0 { + log.Infof("UDPMatcher: it's a fragment") + if frag == 1 { + return false, true + } + log.Warningf("Dropping UDP packet: malicious fragmented packet.") + return false, false + } + + // Now we need the transport header. However, this may not have been set + // yet. + // TODO + var udpHeader header.UDP + if pkt.TransportHeader != nil { + log.Infof("UDPMatcher: transport header is not nil") + udpHeader = header.UDP(pkt.TransportHeader) + } else { + log.Infof("UDPMatcher: transport header is nil") + log.Infof("UDPMatcher: is network header nil: %t", pkt.NetworkHeader == nil) + // The UDP header hasn't been parsed yet. We have to do it here. + if len(pkt.Data.First()) < header.UDPMinimumSize { + // There's no valid UDP header here, so we hotdrop the + // packet. + // TODO: Stats. + log.Warningf("Dropping UDP packet: size to small.") + return false, true + } + udpHeader = header.UDP(pkt.Data.First()) + } + + // Check whether the source and destination ports are within the + // matching range. + sourcePort := udpHeader.SourcePort() + destinationPort := udpHeader.DestinationPort() + log.Infof("UDPMatcher: sport and dport are %d and %d. sports and dport start and end are (%d, %d) and (%d, %d)", + udpHeader.SourcePort(), udpHeader.DestinationPort(), + tm.data.SourcePortStart, tm.data.SourcePortEnd, + tm.data.DestinationPortStart, tm.data.DestinationPortEnd) + if sourcePort < tm.data.SourcePortStart || tm.data.SourcePortEnd < sourcePort { + return false, false + } + if destinationPort < tm.data.DestinationPortStart || tm.data.DestinationPortEnd < destinationPort { + return false, false + } + + return true, false +} diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go index 85512f9b2..6597e6781 100644 --- a/pkg/tcpip/network/ipv4/ipv4.go +++ b/pkg/tcpip/network/ipv4/ipv4.go @@ -353,6 +353,11 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt tcpip.PacketBuffer) { } pkt.NetworkHeader = headerView[:h.HeaderLength()] + hlen := int(h.HeaderLength()) + tlen := int(h.TotalLength()) + pkt.Data.TrimFront(hlen) + pkt.Data.CapLength(tlen - hlen) + // iptables filtering. All packets that reach here are intended for // this machine and will not be forwarded. ipt := e.stack.IPTables() @@ -361,11 +366,6 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt tcpip.PacketBuffer) { return } - hlen := int(h.HeaderLength()) - tlen := int(h.TotalLength()) - pkt.Data.TrimFront(hlen) - pkt.Data.CapLength(tlen - hlen) - more := (h.Flags() & header.IPv4FlagMoreFragments) != 0 if more || h.FragmentOffset() != 0 { if pkt.Data.Size() == 0 { diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index fd02ff2ff..bc963d40e 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -15,6 +15,7 @@ package iptables import ( + "errors" "fmt" "net" "time" @@ -248,3 +249,48 @@ func (FilterInputDropAll) ContainerAction(ip net.IP) error { func (FilterInputDropAll) LocalAction(ip net.IP) error { return sendUDPLoop(ip, dropPort, sendloopDuration) } + +// FilterInputMultiUDPRules verifies that multiple UDP rules are applied +// correctly. This has the added benefit of testing whether we're serializing +// rules correctly -- if we do it incorrectly, the iptables tool will +// misunderstand and save the wrong tables. +type FilterInputMultiUDPRules struct{} + +func (FilterInputMultiUDPRules) Name() string { + return "FilterInputMultiUDPRules" +} + +func (FilterInputMultiUDPRules) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { + return err + } + // if err := filterTable("-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", acceptPort), "-j", "ACCEPT"); err != nil { + // return err + // } + return filterTable("-L") +} + +func (FilterInputMultiUDPRules) LocalAction(ip net.IP) error { + // No-op. + return nil +} + +// FilterInputRequireProtocolUDP checks that "-m udp" requires "-p udp" to be +// specified. +type FilterInputRequireProtocolUDP struct{} + +func (FilterInputRequireProtocolUDP) Name() string { + return "FilterInputRequireProtocolUDP" +} + +func (FilterInputRequireProtocolUDP) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "INPUT", "-m", "udp", "--destination-port", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err == nil { + return errors.New("expected iptables to fail with out \"-p udp\", but succeeded") + } + return nil +} + +func (FilterInputRequireProtocolUDP) LocalAction(ip net.IP) error { + // No-op. + return nil +} -- cgit v1.2.3 From 2296b4734462b6eeef383ea58e2b1b0b1a214d76 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Tue, 21 Jan 2020 16:16:51 -0800 Subject: Change to standard types. PiperOrigin-RevId: 290846481 --- test/syscalls/linux/aio.cc | 2 +- test/syscalls/linux/chown.cc | 6 +- test/syscalls/linux/chroot.cc | 2 +- test/syscalls/linux/clock_gettime.cc | 12 ++-- test/syscalls/linux/eventfd.cc | 22 ++++---- test/syscalls/linux/exceptions.cc | 66 +++++++++++----------- test/syscalls/linux/exec.cc | 10 ++-- test/syscalls/linux/exec_binary.cc | 18 +++--- test/syscalls/linux/fcntl.cc | 22 ++++---- test/syscalls/linux/fork.cc | 2 +- test/syscalls/linux/futex.cc | 2 +- test/syscalls/linux/inotify.cc | 24 ++++---- test/syscalls/linux/ip_socket_test_util.cc | 4 +- test/syscalls/linux/ip_socket_test_util.h | 4 +- test/syscalls/linux/itimer.cc | 4 +- test/syscalls/linux/kill.cc | 4 +- test/syscalls/linux/link.cc | 5 +- test/syscalls/linux/memfd.cc | 2 +- test/syscalls/linux/memory_accounting.cc | 14 ++--- test/syscalls/linux/mempolicy.cc | 28 ++++----- test/syscalls/linux/mmap.cc | 16 +++--- test/syscalls/linux/open.cc | 2 +- test/syscalls/linux/partial_bad_buffer.cc | 2 +- test/syscalls/linux/prctl_setuid.cc | 2 +- test/syscalls/linux/proc.cc | 42 +++++++------- test/syscalls/linux/proc_net_tcp.cc | 62 ++++++++++---------- test/syscalls/linux/proc_net_udp.cc | 32 +++++------ test/syscalls/linux/proc_net_unix.cc | 12 ++-- test/syscalls/linux/proc_pid_uid_gid_map.cc | 26 ++++----- test/syscalls/linux/ptrace.cc | 4 +- test/syscalls/linux/pty.cc | 14 ++--- test/syscalls/linux/pwrite64.cc | 4 +- test/syscalls/linux/raw_socket_hdrincl.cc | 4 +- test/syscalls/linux/rseq.cc | 2 +- test/syscalls/linux/rseq/critical.h | 2 +- test/syscalls/linux/rseq/rseq.cc | 50 ++++++++-------- test/syscalls/linux/rseq/types.h | 16 +++--- test/syscalls/linux/seccomp.cc | 14 ++--- test/syscalls/linux/semaphore.cc | 10 ++-- test/syscalls/linux/shm.cc | 10 ++-- test/syscalls/linux/sigaltstack.cc | 2 +- test/syscalls/linux/sigiret.cc | 14 ++--- .../linux/socket_bind_to_device_distribution.cc | 14 ++--- test/syscalls/linux/socket_generic.cc | 2 +- test/syscalls/linux/socket_inet_loopback.cc | 56 +++++++++--------- test/syscalls/linux/socket_ip_unbound.cc | 8 +-- test/syscalls/linux/socket_netdevice.cc | 8 +-- test/syscalls/linux/socket_netlink_route.cc | 30 +++++----- test/syscalls/linux/socket_netlink_util.cc | 4 +- test/syscalls/linux/socket_netlink_util.h | 2 +- test/syscalls/linux/socket_test_util.cc | 2 +- test/syscalls/linux/splice.cc | 2 +- test/syscalls/linux/stat.cc | 40 ++++++------- test/syscalls/linux/sticky.cc | 4 +- test/syscalls/linux/sysret.cc | 8 +-- test/syscalls/linux/tcp_socket.cc | 2 +- test/syscalls/linux/time.cc | 4 +- test/syscalls/linux/timerfd.cc | 48 ++++++++-------- test/syscalls/linux/udp_socket_test_cases.cc | 14 ++--- test/syscalls/linux/uidgid.cc | 8 +-- test/syscalls/linux/utimes.cc | 25 ++++---- test/syscalls/linux/vfork.cc | 14 ++--- test/syscalls/linux/vsyscall.cc | 2 +- test/syscalls/linux/wait.cc | 18 +++--- test/util/mount_util.h | 6 +- test/util/multiprocess_util.cc | 2 +- test/util/multiprocess_util.h | 5 +- test/util/proc_util.cc | 2 +- test/util/temp_path.cc | 2 +- test/util/test_util.cc | 20 +++---- test/util/test_util.h | 12 ++-- test/util/test_util_test.cc | 4 +- 72 files changed, 483 insertions(+), 480 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/aio.cc b/test/syscalls/linux/aio.cc index 28592bc8f..a33daff17 100644 --- a/test/syscalls/linux/aio.cc +++ b/test/syscalls/linux/aio.cc @@ -183,7 +183,7 @@ TEST_F(AIOTest, BadWrite) { // Verify that it fails with the right error code. EXPECT_EQ(events[0].data, 0x123); - EXPECT_EQ(events[0].obj, reinterpret_cast(&cb)); + EXPECT_EQ(events[0].obj, reinterpret_cast(&cb)); EXPECT_LT(events[0].res, 0); } diff --git a/test/syscalls/linux/chown.cc b/test/syscalls/linux/chown.cc index 1c00e2731..7a28b674d 100644 --- a/test/syscalls/linux/chown.cc +++ b/test/syscalls/linux/chown.cc @@ -31,9 +31,9 @@ #include "test/util/test_util.h" #include "test/util/thread_util.h" -ABSL_FLAG(int32, scratch_uid1, 65534, "first scratch UID"); -ABSL_FLAG(int32, scratch_uid2, 65533, "second scratch UID"); -ABSL_FLAG(int32, scratch_gid, 65534, "first scratch GID"); +ABSL_FLAG(int32_t, scratch_uid1, 65534, "first scratch UID"); +ABSL_FLAG(int32_t, scratch_uid2, 65533, "second scratch UID"); +ABSL_FLAG(int32_t, scratch_gid, 65534, "first scratch GID"); namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/chroot.cc b/test/syscalls/linux/chroot.cc index 27e057086..04bc2d7b9 100644 --- a/test/syscalls/linux/chroot.cc +++ b/test/syscalls/linux/chroot.cc @@ -253,7 +253,7 @@ TEST(ChrootTest, ProcMemSelfMapsNoEscapeProcOpen) { // Mmap the newly created file. void* foo_map = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, foo.get(), 0); - ASSERT_THAT(reinterpret_cast(foo_map), SyscallSucceeds()); + ASSERT_THAT(reinterpret_cast(foo_map), SyscallSucceeds()); // Always unmap. auto cleanup_map = Cleanup( diff --git a/test/syscalls/linux/clock_gettime.cc b/test/syscalls/linux/clock_gettime.cc index 1d5b5af94..7f6015049 100644 --- a/test/syscalls/linux/clock_gettime.cc +++ b/test/syscalls/linux/clock_gettime.cc @@ -34,7 +34,7 @@ namespace testing { namespace { -int64 clock_gettime_nsecs(clockid_t id) { +int64_t clock_gettime_nsecs(clockid_t id) { struct timespec ts; TEST_PCHECK(clock_gettime(id, &ts) == 0); return (ts.tv_sec * 1000000000 + ts.tv_nsec); @@ -42,9 +42,9 @@ int64 clock_gettime_nsecs(clockid_t id) { // Spin on the CPU for at least ns nanoseconds, based on // CLOCK_THREAD_CPUTIME_ID. -void spin_ns(int64 ns) { - int64 start = clock_gettime_nsecs(CLOCK_THREAD_CPUTIME_ID); - int64 end = start + ns; +void spin_ns(int64_t ns) { + int64_t start = clock_gettime_nsecs(CLOCK_THREAD_CPUTIME_ID); + int64_t end = start + ns; do { constexpr int kLoopCount = 1000000; // large and arbitrary @@ -64,7 +64,7 @@ TEST(ClockGettime, CputimeId) { // the workers. Note that we test CLOCK_PROCESS_CPUTIME_ID by having the // workers execute in parallel and verifying that CLOCK_PROCESS_CPUTIME_ID // accumulates the runtime of all threads. - int64 start = clock_gettime_nsecs(CLOCK_PROCESS_CPUTIME_ID); + int64_t start = clock_gettime_nsecs(CLOCK_PROCESS_CPUTIME_ID); // Create a kNumThreads threads. std::list threads; @@ -76,7 +76,7 @@ TEST(ClockGettime, CputimeId) { t.Join(); } - int64 end = clock_gettime_nsecs(CLOCK_PROCESS_CPUTIME_ID); + int64_t end = clock_gettime_nsecs(CLOCK_PROCESS_CPUTIME_ID); // The aggregate time spent in the worker threads must be at least // 'kNumThreads' times the time each thread spun. diff --git a/test/syscalls/linux/eventfd.cc b/test/syscalls/linux/eventfd.cc index fed67a56e..367682c3d 100644 --- a/test/syscalls/linux/eventfd.cc +++ b/test/syscalls/linux/eventfd.cc @@ -37,7 +37,7 @@ TEST(EventfdTest, Nonblock) { FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE)); - uint64 l; + uint64_t l; ASSERT_THAT(read(efd.get(), &l, sizeof(l)), SyscallFailsWithErrno(EAGAIN)); l = 1; @@ -52,7 +52,7 @@ TEST(EventfdTest, Nonblock) { void* read_three_times(void* arg) { int efd = *reinterpret_cast(arg); - uint64 l; + uint64_t l; EXPECT_THAT(read(efd, &l, sizeof(l)), SyscallSucceedsWithValue(sizeof(l))); EXPECT_THAT(read(efd, &l, sizeof(l)), SyscallSucceedsWithValue(sizeof(l))); EXPECT_THAT(read(efd, &l, sizeof(l)), SyscallSucceedsWithValue(sizeof(l))); @@ -68,7 +68,7 @@ TEST(EventfdTest, BlockingWrite) { reinterpret_cast(&efd)), SyscallSucceeds()); - uint64 l = 1; + uint64_t l = 1; ASSERT_THAT(write(efd, &l, sizeof(l)), SyscallSucceeds()); EXPECT_EQ(l, 1); @@ -85,7 +85,7 @@ TEST(EventfdTest, SmallWrite) { FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE)); - uint64 l = 16; + uint64_t l = 16; ASSERT_THAT(write(efd.get(), &l, 4), SyscallFailsWithErrno(EINVAL)); } @@ -93,7 +93,7 @@ TEST(EventfdTest, SmallRead) { FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE)); - uint64 l = 1; + uint64_t l = 1; ASSERT_THAT(write(efd.get(), &l, sizeof(l)), SyscallSucceeds()); l = 0; @@ -104,7 +104,7 @@ TEST(EventfdTest, BigWrite) { FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE)); - uint64 big[16]; + uint64_t big[16]; big[0] = 16; ASSERT_THAT(write(efd.get(), big, sizeof(big)), SyscallSucceeds()); } @@ -113,10 +113,10 @@ TEST(EventfdTest, BigRead) { FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE)); - uint64 l = 1; + uint64_t l = 1; ASSERT_THAT(write(efd.get(), &l, sizeof(l)), SyscallSucceeds()); - uint64 big[16]; + uint64_t big[16]; ASSERT_THAT(read(efd.get(), big, sizeof(big)), SyscallSucceeds()); EXPECT_EQ(big[0], 1); } @@ -125,7 +125,7 @@ TEST(EventfdTest, BigWriteBigRead) { FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE)); - uint64 l[16]; + uint64_t l[16]; l[0] = 16; ASSERT_THAT(write(efd.get(), l, sizeof(l)), SyscallSucceeds()); ASSERT_THAT(read(efd.get(), l, sizeof(l)), SyscallSucceeds()); @@ -150,7 +150,7 @@ TEST(EventfdTest, NotifyNonZero_NoRandomSave) { int wait_out = epoll_wait(epollfd.get(), &out_ev, 1, kEpollTimeoutMs); EXPECT_EQ(wait_out, 1); EXPECT_EQ(efd.get(), out_ev.data.fd); - uint64 val = 0; + uint64_t val = 0; ASSERT_THAT(read(efd.get(), &val, sizeof(val)), SyscallSucceeds()); EXPECT_EQ(val, 1); @@ -159,7 +159,7 @@ TEST(EventfdTest, NotifyNonZero_NoRandomSave) { // epoll_wait times out. ScopedThread t([&efd] { sleep(5); - uint64 val = 1; + uint64_t val = 1; EXPECT_THAT(write(efd.get(), &val, sizeof(val)), SyscallSucceedsWithValue(sizeof(val))); }); diff --git a/test/syscalls/linux/exceptions.cc b/test/syscalls/linux/exceptions.cc index 0b67eb0ad..3d564e720 100644 --- a/test/syscalls/linux/exceptions.cc +++ b/test/syscalls/linux/exceptions.cc @@ -24,20 +24,20 @@ namespace testing { // Default value for the x87 FPU control word. See Intel SDM Vol 1, Ch 8.1.5 // "x87 FPU Control Word". -constexpr uint16 kX87ControlWordDefault = 0x37f; +constexpr uint16_t kX87ControlWordDefault = 0x37f; // Mask for the divide-by-zero exception. -constexpr uint16 kX87ControlWordDiv0Mask = 1 << 2; +constexpr uint16_t kX87ControlWordDiv0Mask = 1 << 2; // Default value for the SSE control register (MXCSR). See Intel SDM Vol 1, Ch // 11.6.4 "Initialization of SSE/SSE3 Extensions". -constexpr uint32 kMXCSRDefault = 0x1f80; +constexpr uint32_t kMXCSRDefault = 0x1f80; // Mask for the divide-by-zero exception. -constexpr uint32 kMXCSRDiv0Mask = 1 << 9; +constexpr uint32_t kMXCSRDiv0Mask = 1 << 9; // Flag for a pending divide-by-zero exception. -constexpr uint32 kMXCSRDiv0Flag = 1 << 2; +constexpr uint32_t kMXCSRDiv0Flag = 1 << 2; void inline Halt() { asm("hlt\r\n"); } @@ -112,10 +112,10 @@ TEST(ExceptionTest, DivideByZero) { EXPECT_EXIT( { - uint32 remainder; - uint32 quotient; - uint32 divisor = 0; - uint64 value = 1; + uint32_t remainder; + uint32_t quotient; + uint32_t divisor = 0; + uint64_t value = 1; asm("divl 0(%2)\r\n" : "=d"(remainder), "=a"(quotient) : "r"(&divisor), "d"(value >> 32), "a"(value)); @@ -126,9 +126,9 @@ TEST(ExceptionTest, DivideByZero) { // By default, x87 exceptions are masked and simply return a default value. TEST(ExceptionTest, X87DivideByZeroMasked) { - int32 quotient; - int32 value = 1; - int32 divisor = 0; + int32_t quotient; + int32_t value = 1; + int32_t divisor = 0; asm("fildl %[value]\r\n" "fidivl %[divisor]\r\n" "fistpl %[quotient]\r\n" @@ -148,12 +148,12 @@ TEST(ExceptionTest, X87DivideByZeroUnmasked) { EXPECT_EXIT( { // Clear the divide by zero exception mask. - constexpr uint16 kControlWord = + constexpr uint16_t kControlWord = kX87ControlWordDefault & ~kX87ControlWordDiv0Mask; - int32 quotient; - int32 value = 1; - int32 divisor = 0; + int32_t quotient; + int32_t value = 1; + int32_t divisor = 0; asm volatile( "fldcw %[cw]\r\n" "fildl %[value]\r\n" @@ -176,12 +176,12 @@ TEST(ExceptionTest, X87StatusClobber) { EXPECT_EXIT( { // Clear the divide by zero exception mask. - constexpr uint16 kControlWord = + constexpr uint16_t kControlWord = kX87ControlWordDefault & ~kX87ControlWordDiv0Mask; - int32 quotient; - int32 value = 1; - int32 divisor = 0; + int32_t quotient; + int32_t value = 1; + int32_t divisor = 0; asm volatile( "fildl %[value]\r\n" "fidivl %[divisor]\r\n" @@ -208,10 +208,10 @@ TEST(ExceptionTest, X87StatusClobber) { // By default, SSE exceptions are masked and simply return a default value. TEST(ExceptionTest, SSEDivideByZeroMasked) { - uint32 status; - int32 quotient; - int32 value = 1; - int32 divisor = 0; + uint32_t status; + int32_t quotient; + int32_t value = 1; + int32_t divisor = 0; asm("cvtsi2ssl %[value], %%xmm0\r\n" "cvtsi2ssl %[divisor], %%xmm1\r\n" "divss %%xmm1, %%xmm0\r\n" @@ -233,11 +233,11 @@ TEST(ExceptionTest, SSEDivideByZeroUnmasked) { EXPECT_EXIT( { // Clear the divide by zero exception mask. - constexpr uint32 kMXCSR = kMXCSRDefault & ~kMXCSRDiv0Mask; + constexpr uint32_t kMXCSR = kMXCSRDefault & ~kMXCSRDiv0Mask; - int32 quotient; - int32 value = 1; - int32 divisor = 0; + int32_t quotient; + int32_t value = 1; + int32_t divisor = 0; asm volatile( "ldmxcsr %[mxcsr]\r\n" "cvtsi2ssl %[value], %%xmm0\r\n" @@ -254,10 +254,10 @@ TEST(ExceptionTest, SSEDivideByZeroUnmasked) { // Pending exceptions in the SSE status register are not clobbered by syscalls. TEST(ExceptionTest, SSEStatusClobber) { - uint32 mxcsr; - int32 quotient; - int32 value = 1; - int32 divisor = 0; + uint32_t mxcsr; + int32_t quotient; + int32_t value = 1; + int32_t divisor = 0; asm("cvtsi2ssl %[value], %%xmm0\r\n" "cvtsi2ssl %[divisor], %%xmm1\r\n" "divss %%xmm1, %%xmm0\r\n" @@ -336,7 +336,7 @@ TEST(ExceptionTest, AlignmentCheck) { SetAlignmentCheck(); for (int i = 0; i < 8; i++) { // At least 7/8 offsets will be unaligned here. - uint64* ptr = reinterpret_cast(&array[i]); + uint64_t* ptr = reinterpret_cast(&array[i]); asm("mov %0, 0(%0)\r\n" : : "r"(ptr) : "ax"); } }, diff --git a/test/syscalls/linux/exec.cc b/test/syscalls/linux/exec.cc index 9c5a11206..b5e0a512b 100644 --- a/test/syscalls/linux/exec.cc +++ b/test/syscalls/linux/exec.cc @@ -62,7 +62,7 @@ constexpr char kExecFromThread[] = "--exec_exec_from_thread"; // Runs file specified by dirfd and pathname with argv and checks that the exit // status is expect_status and that stderr contains expect_stderr. -void CheckExecHelper(const absl::optional dirfd, +void CheckExecHelper(const absl::optional dirfd, const std::string& pathname, const ExecveArray& argv, const ExecveArray& envv, const int flags, int expect_status, const std::string& expect_stderr) { @@ -143,15 +143,15 @@ void CheckExecHelper(const absl::optional dirfd, void CheckExec(const std::string& filename, const ExecveArray& argv, const ExecveArray& envv, int expect_status, const std::string& expect_stderr) { - CheckExecHelper(/*dirfd=*/absl::optional(), filename, argv, envv, + CheckExecHelper(/*dirfd=*/absl::optional(), filename, argv, envv, /*flags=*/0, expect_status, expect_stderr); } -void CheckExecveat(const int32 dirfd, const std::string& pathname, +void CheckExecveat(const int32_t dirfd, const std::string& pathname, const ExecveArray& argv, const ExecveArray& envv, const int flags, int expect_status, const std::string& expect_stderr) { - CheckExecHelper(absl::optional(dirfd), pathname, argv, envv, flags, + CheckExecHelper(absl::optional(dirfd), pathname, argv, envv, flags, expect_status, expect_stderr); } @@ -603,7 +603,7 @@ TEST(ExecveatTest, AbsolutePathWithFDCWD) { TEST(ExecveatTest, AbsolutePath) { std::string path = RunfilePath(kBasicWorkload); // File descriptor should be ignored when an absolute path is given. - const int32 badFD = -1; + const int32_t badFD = -1; CheckExecveat(badFD, path, {path}, {}, ArgEnvExitStatus(0, 0), 0, absl::StrCat(path, "\n")); } diff --git a/test/syscalls/linux/exec_binary.cc b/test/syscalls/linux/exec_binary.cc index 144bf45cf..736452b0c 100644 --- a/test/syscalls/linux/exec_binary.cc +++ b/test/syscalls/linux/exec_binary.cc @@ -700,7 +700,7 @@ TEST(ElfTest, PIE) { // The first segment really needs to start at 0 for a normal PIE binary, and // thus includes the headers. - const uint64 offset = elf.phdrs[1].p_offset; + const uint64_t offset = elf.phdrs[1].p_offset; elf.phdrs[1].p_offset = 0x0; elf.phdrs[1].p_vaddr = 0x0; elf.phdrs[1].p_filesz += offset; @@ -720,7 +720,7 @@ TEST(ElfTest, PIE) { struct user_regs_struct regs; ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, ®s), SyscallSucceeds()); - const uint64 load_addr = regs.rip & ~(kPageSize - 1); + const uint64_t load_addr = regs.rip & ~(kPageSize - 1); EXPECT_THAT(child, ContainsMappings(std::vector({ // text page. @@ -789,7 +789,7 @@ TEST(ElfTest, PIENonZeroStart) { struct user_regs_struct regs; ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, ®s), SyscallSucceeds()); - const uint64 load_addr = regs.rip & ~(kPageSize - 1); + const uint64_t load_addr = regs.rip & ~(kPageSize - 1); // The ELF is loaded at an arbitrary address, not the first PT_LOAD vaddr. // @@ -859,7 +859,7 @@ TEST(ElfTest, ELFInterpreter) { // The first segment really needs to start at 0 for a normal PIE binary, and // thus includes the headers. - uint64 const offset = interpreter.phdrs[1].p_offset; + uint64_t const offset = interpreter.phdrs[1].p_offset; // N.B. Since Linux 4.10 (0036d1f7eb95b "binfmt_elf: fix calculations for bss // padding"), Linux unconditionally zeroes the remainder of the highest mapped // page in an interpreter, failing if the protections don't allow write. Thus @@ -912,7 +912,7 @@ TEST(ElfTest, ELFInterpreter) { struct user_regs_struct regs; ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, ®s), SyscallSucceeds()); - const uint64 interp_load_addr = regs.rip & ~(kPageSize - 1); + const uint64_t interp_load_addr = regs.rip & ~(kPageSize - 1); EXPECT_THAT( child, ContainsMappings(std::vector({ @@ -1047,7 +1047,7 @@ TEST(ElfTest, ELFInterpreterRelative) { // The first segment really needs to start at 0 for a normal PIE binary, and // thus includes the headers. - uint64 const offset = interpreter.phdrs[1].p_offset; + uint64_t const offset = interpreter.phdrs[1].p_offset; // See comment in ElfTest.ELFInterpreter. interpreter.phdrs[1].p_flags = PF_R | PF_W | PF_X; interpreter.phdrs[1].p_offset = 0x0; @@ -1086,7 +1086,7 @@ TEST(ElfTest, ELFInterpreterRelative) { struct user_regs_struct regs; ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, ®s), SyscallSucceeds()); - const uint64 interp_load_addr = regs.rip & ~(kPageSize - 1); + const uint64_t interp_load_addr = regs.rip & ~(kPageSize - 1); EXPECT_THAT( child, ContainsMappings(std::vector({ @@ -1109,7 +1109,7 @@ TEST(ElfTest, ELFInterpreterWrongArch) { // The first segment really needs to start at 0 for a normal PIE binary, and // thus includes the headers. - uint64 const offset = interpreter.phdrs[1].p_offset; + uint64_t const offset = interpreter.phdrs[1].p_offset; // See comment in ElfTest.ELFInterpreter. interpreter.phdrs[1].p_flags = PF_R | PF_W | PF_X; interpreter.phdrs[1].p_offset = 0x0; @@ -1190,7 +1190,7 @@ TEST(ElfTest, ElfInterpreterNoExecute) { // The first segment really needs to start at 0 for a normal PIE binary, and // thus includes the headers. - uint64 const offset = interpreter.phdrs[1].p_offset; + uint64_t const offset = interpreter.phdrs[1].p_offset; // See comment in ElfTest.ELFInterpreter. interpreter.phdrs[1].p_flags = PF_R | PF_W | PF_X; interpreter.phdrs[1].p_offset = 0x0; diff --git a/test/syscalls/linux/fcntl.cc b/test/syscalls/linux/fcntl.cc index 6eb597eae..4f3aa81d6 100644 --- a/test/syscalls/linux/fcntl.cc +++ b/test/syscalls/linux/fcntl.cc @@ -46,9 +46,9 @@ ABSL_FLAG(bool, blocking, false, "Whether to set a blocking lock (otherwise non-blocking)."); ABSL_FLAG(bool, retry_eintr, false, "Whether to retry in the subprocess on EINTR."); -ABSL_FLAG(uint64, child_setlock_start, 0, "The value of struct flock start"); -ABSL_FLAG(uint64, child_setlock_len, 0, "The value of struct flock len"); -ABSL_FLAG(int32, socket_fd, -1, +ABSL_FLAG(uint64_t, child_setlock_start, 0, "The value of struct flock start"); +ABSL_FLAG(uint64_t, child_setlock_len, 0, "The value of struct flock len"); +ABSL_FLAG(int32_t, socket_fd, -1, "A socket to use for communicating more state back " "to the parent."); @@ -71,8 +71,8 @@ class FcntlLockTest : public ::testing::Test { EXPECT_THAT(close(fds_[1]), SyscallSucceeds()); } - int64 GetSubprocessFcntlTimeInUsec() { - int64 ret = 0; + int64_t GetSubprocessFcntlTimeInUsec() { + int64_t ret = 0; EXPECT_THAT(ReadFd(fds_[0], reinterpret_cast(&ret), sizeof(ret)), SyscallSucceedsWithValue(sizeof(ret))); return ret; @@ -676,7 +676,7 @@ TEST_F(FcntlLockTest, SetWriteLockThenBlockingWriteLock) { // We will wait kHoldLockForSec before we release our lock allowing the // subprocess to obtain it. constexpr absl::Duration kHoldLockFor = absl::Seconds(5); - const int64 kMinBlockTimeUsec = absl::ToInt64Microseconds(absl::Seconds(1)); + const int64_t kMinBlockTimeUsec = absl::ToInt64Microseconds(absl::Seconds(1)); absl::SleepFor(kHoldLockFor); @@ -685,7 +685,7 @@ TEST_F(FcntlLockTest, SetWriteLockThenBlockingWriteLock) { ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds()); // Read the blocked time from the subprocess socket. - int64 subprocess_blocked_time_usec = GetSubprocessFcntlTimeInUsec(); + int64_t subprocess_blocked_time_usec = GetSubprocessFcntlTimeInUsec(); // We must have been waiting at least kMinBlockTime. EXPECT_GT(subprocess_blocked_time_usec, kMinBlockTimeUsec); @@ -729,7 +729,7 @@ TEST_F(FcntlLockTest, SetReadLockThenBlockingWriteLock) { // subprocess to obtain it. constexpr absl::Duration kHoldLockFor = absl::Seconds(5); - const int64 kMinBlockTimeUsec = absl::ToInt64Microseconds(absl::Seconds(1)); + const int64_t kMinBlockTimeUsec = absl::ToInt64Microseconds(absl::Seconds(1)); absl::SleepFor(kHoldLockFor); @@ -738,7 +738,7 @@ TEST_F(FcntlLockTest, SetReadLockThenBlockingWriteLock) { ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds()); // Read the blocked time from the subprocess socket. - int64 subprocess_blocked_time_usec = GetSubprocessFcntlTimeInUsec(); + int64_t subprocess_blocked_time_usec = GetSubprocessFcntlTimeInUsec(); // We must have been waiting at least kMinBlockTime. EXPECT_GT(subprocess_blocked_time_usec, kMinBlockTimeUsec); @@ -782,7 +782,7 @@ TEST_F(FcntlLockTest, SetWriteLockThenBlockingReadLock) { // subprocess to obtain it. constexpr absl::Duration kHoldLockFor = absl::Seconds(5); - const int64 kMinBlockTimeUsec = absl::ToInt64Microseconds(absl::Seconds(1)); + const int64_t kMinBlockTimeUsec = absl::ToInt64Microseconds(absl::Seconds(1)); absl::SleepFor(kHoldLockFor); @@ -791,7 +791,7 @@ TEST_F(FcntlLockTest, SetWriteLockThenBlockingReadLock) { ASSERT_THAT(fcntl(fd.get(), F_SETLKW, &fl), SyscallSucceeds()); // Read the blocked time from the subprocess socket. - int64 subprocess_blocked_time_usec = GetSubprocessFcntlTimeInUsec(); + int64_t subprocess_blocked_time_usec = GetSubprocessFcntlTimeInUsec(); // We must have been waiting at least kMinBlockTime. EXPECT_GT(subprocess_blocked_time_usec, kMinBlockTimeUsec); diff --git a/test/syscalls/linux/fork.cc b/test/syscalls/linux/fork.cc index 486189697..371890110 100644 --- a/test/syscalls/linux/fork.cc +++ b/test/syscalls/linux/fork.cc @@ -270,7 +270,7 @@ TEST_F(ForkTest, Alarm) { // Child cannot affect parent private memory. TEST_F(ForkTest, PrivateMemory) { - std::atomic local(0); + std::atomic local(0); pid_t child1 = Fork(); if (child1 == 0) { diff --git a/test/syscalls/linux/futex.cc b/test/syscalls/linux/futex.cc index b4a7cc8d6..40c80a6e1 100644 --- a/test/syscalls/linux/futex.cc +++ b/test/syscalls/linux/futex.cc @@ -112,7 +112,7 @@ int futex_wake_bitset(bool priv, std::atomic* uaddr, int count, } int futex_wake_op(bool priv, std::atomic* uaddr1, std::atomic* uaddr2, - int nwake1, int nwake2, uint32 sub_op) { + int nwake1, int nwake2, uint32_t sub_op) { int op = FUTEX_WAKE_OP; if (priv) { op |= FUTEX_PRIVATE_FLAG; diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc index 182d676d5..fdef646eb 100644 --- a/test/syscalls/linux/inotify.cc +++ b/test/syscalls/linux/inotify.cc @@ -48,26 +48,26 @@ constexpr int kBufSize = 1024; // C++-friendly version of struct inotify_event. struct Event { - int32 wd; - uint32 mask; - uint32 cookie; - uint32 len; + int32_t wd; + uint32_t mask; + uint32_t cookie; + uint32_t len; std::string name; - Event(uint32 mask, int32 wd, absl::string_view name, uint32 cookie) + Event(uint32_t mask, int32_t wd, absl::string_view name, uint32_t cookie) : wd(wd), mask(mask), cookie(cookie), len(name.size()), name(std::string(name)) {} - Event(uint32 mask, int32 wd, absl::string_view name) + Event(uint32_t mask, int32_t wd, absl::string_view name) : Event(mask, wd, name, 0) {} - Event(uint32 mask, int32 wd) : Event(mask, wd, "", 0) {} + Event(uint32_t mask, int32_t wd) : Event(mask, wd, "", 0) {} Event() : Event(0, 0, "", 0) {} }; // Prints the symbolic name for a struct inotify_event's 'mask' field. -std::string FlagString(uint32 flags) { +std::string FlagString(uint32_t flags) { std::vector names; #define EMIT(target) \ @@ -320,7 +320,7 @@ PosixErrorOr InotifyInit1(int flags) { } PosixErrorOr InotifyAddWatch(int fd, const std::string& path, - uint32 mask) { + uint32_t mask) { int wd; EXPECT_THAT(wd = inotify_add_watch(fd, path.c_str(), mask), SyscallSucceeds()); @@ -647,7 +647,7 @@ TEST(Inotify, MoveGeneratesEvents) { Event(IN_MOVED_TO, root_wd, Basename(newpath), events[1].cookie)})); EXPECT_NE(events[0].cookie, 0); EXPECT_EQ(events[0].cookie, events[1].cookie); - uint32 last_cookie = events[0].cookie; + uint32_t last_cookie = events[0].cookie; // Test move from root -> root/dir1. newpath = NewTempAbsPathInDir(dir1.path()); @@ -841,7 +841,7 @@ TEST(Inotify, ConcurrentThreadsGeneratingEvents) { } auto test_thread = [&files]() { - uint32 seed = time(nullptr); + uint32_t seed = time(nullptr); for (int i = 0; i < 20; i++) { const TempPath& file = files[rand_r(&seed) % files.size()]; const FileDescriptor file_fd = @@ -960,7 +960,7 @@ TEST(Inotify, BlockingReadOnInotifyFd) { t.Join(); // Make sure the event we got back is sane. - uint32 event_mask; + uint32_t event_mask; memcpy(&event_mask, buf.data() + offsetof(struct inotify_event, mask), sizeof(event_mask)); EXPECT_EQ(event_mask, IN_ACCESS); diff --git a/test/syscalls/linux/ip_socket_test_util.cc b/test/syscalls/linux/ip_socket_test_util.cc index f694a6360..6b472eb2f 100644 --- a/test/syscalls/linux/ip_socket_test_util.cc +++ b/test/syscalls/linux/ip_socket_test_util.cc @@ -24,12 +24,12 @@ namespace gvisor { namespace testing { -uint32 IPFromInetSockaddr(const struct sockaddr* addr) { +uint32_t IPFromInetSockaddr(const struct sockaddr* addr) { auto* in_addr = reinterpret_cast(addr); return in_addr->sin_addr.s_addr; } -uint16 PortFromInetSockaddr(const struct sockaddr* addr) { +uint16_t PortFromInetSockaddr(const struct sockaddr* addr) { auto* in_addr = reinterpret_cast(addr); return ntohs(in_addr->sin_port); } diff --git a/test/syscalls/linux/ip_socket_test_util.h b/test/syscalls/linux/ip_socket_test_util.h index 0eeca30dd..0f58e0f77 100644 --- a/test/syscalls/linux/ip_socket_test_util.h +++ b/test/syscalls/linux/ip_socket_test_util.h @@ -27,10 +27,10 @@ namespace gvisor { namespace testing { // Extracts the IP address from an inet sockaddr in network byte order. -uint32 IPFromInetSockaddr(const struct sockaddr* addr); +uint32_t IPFromInetSockaddr(const struct sockaddr* addr); // Extracts the port from an inet sockaddr in host byte order. -uint16 PortFromInetSockaddr(const struct sockaddr* addr); +uint16_t PortFromInetSockaddr(const struct sockaddr* addr); // InterfaceIndex returns the index of the named interface. PosixErrorOr InterfaceIndex(std::string name); diff --git a/test/syscalls/linux/itimer.cc b/test/syscalls/linux/itimer.cc index 52ffbe89d..b77e4cbd1 100644 --- a/test/syscalls/linux/itimer.cc +++ b/test/syscalls/linux/itimer.cc @@ -177,8 +177,8 @@ SignalTestResult ItimerSignalTest(int id, clock_t main_clock, SignalTestResult result; // Wait for the workers to be done and collect their sample counts. - result.worker_samples.push_back(reinterpret_cast(th1.Join())); - result.worker_samples.push_back(reinterpret_cast(th2.Join())); + result.worker_samples.push_back(reinterpret_cast(th1.Join())); + result.worker_samples.push_back(reinterpret_cast(th2.Join())); cleanup_itimer.Release()(); result.expected_total = (Now(main_clock) - start) / kPeriod; result.main_thread_samples = signal_test_num_samples.load(); diff --git a/test/syscalls/linux/kill.cc b/test/syscalls/linux/kill.cc index a2247fdeb..db29bd59c 100644 --- a/test/syscalls/linux/kill.cc +++ b/test/syscalls/linux/kill.cc @@ -32,8 +32,8 @@ #include "test/util/test_util.h" #include "test/util/thread_util.h" -ABSL_FLAG(int32, scratch_uid, 65534, "scratch UID"); -ABSL_FLAG(int32, scratch_gid, 65534, "scratch GID"); +ABSL_FLAG(int32_t, scratch_uid, 65534, "scratch UID"); +ABSL_FLAG(int32_t, scratch_gid, 65534, "scratch GID"); using ::testing::Ge; diff --git a/test/syscalls/linux/link.cc b/test/syscalls/linux/link.cc index 108a0c23e..e74fa2ed5 100644 --- a/test/syscalls/linux/link.cc +++ b/test/syscalls/linux/link.cc @@ -32,7 +32,7 @@ #include "test/util/test_util.h" #include "test/util/thread_util.h" -ABSL_FLAG(int32, scratch_uid, 65534, "scratch UID"); +ABSL_FLAG(int32_t, scratch_uid, 65534, "scratch UID"); namespace gvisor { namespace testing { @@ -55,7 +55,8 @@ TEST(LinkTest, CanCreateLinkFile) { const std::string newname = NewTempAbsPath(); // Get the initial link count. - uint64 initial_link_count = ASSERT_NO_ERRNO_AND_VALUE(Links(oldfile.path())); + uint64_t initial_link_count = + ASSERT_NO_ERRNO_AND_VALUE(Links(oldfile.path())); EXPECT_THAT(link(oldfile.path().c_str(), newname.c_str()), SyscallSucceeds()); diff --git a/test/syscalls/linux/memfd.cc b/test/syscalls/linux/memfd.cc index e10f250d1..e57b49a4a 100644 --- a/test/syscalls/linux/memfd.cc +++ b/test/syscalls/linux/memfd.cc @@ -61,7 +61,7 @@ int memfd_create(const std::string& name, unsigned int flags) { } PosixErrorOr MemfdCreate(const std::string& name, - uint32 flags) { + uint32_t flags) { int fd = memfd_create(name, flags); if (fd < 0) { return PosixError( diff --git a/test/syscalls/linux/memory_accounting.cc b/test/syscalls/linux/memory_accounting.cc index 987dbd151..94aea4077 100644 --- a/test/syscalls/linux/memory_accounting.cc +++ b/test/syscalls/linux/memory_accounting.cc @@ -33,7 +33,7 @@ using ::absl::StrFormat; // AnonUsageFromMeminfo scrapes the current anonymous memory usage from // /proc/meminfo and returns it in bytes. -PosixErrorOr AnonUsageFromMeminfo() { +PosixErrorOr AnonUsageFromMeminfo() { ASSIGN_OR_RETURN_ERRNO(auto meminfo, GetContents("/proc/meminfo")); std::vector lines(absl::StrSplit(meminfo, '\n')); @@ -47,7 +47,7 @@ PosixErrorOr AnonUsageFromMeminfo() { absl::StrSplit(line, ' ', absl::SkipEmpty())); if (parts.size() == 3) { // The size is the second field, let's try to parse it as a number. - ASSIGN_OR_RETURN_ERRNO(auto anon_kb, Atoi(parts[1])); + ASSIGN_OR_RETURN_ERRNO(auto anon_kb, Atoi(parts[1])); return anon_kb * 1024; } @@ -65,10 +65,10 @@ TEST(MemoryAccounting, AnonAccountingPreservedOnSaveRestore) { // the test. SKIP_IF(!IsRunningOnGvisor()); - uint64 anon_initial = ASSERT_NO_ERRNO_AND_VALUE(AnonUsageFromMeminfo()); + uint64_t anon_initial = ASSERT_NO_ERRNO_AND_VALUE(AnonUsageFromMeminfo()); // Cause some anonymous memory usage. - uint64 map_bytes = Megabytes(512); + uint64_t map_bytes = Megabytes(512); char* mem = static_cast(mmap(nullptr, map_bytes, PROT_READ | PROT_WRITE, MAP_POPULATE | MAP_ANON | MAP_PRIVATE, -1, 0)); @@ -77,11 +77,11 @@ TEST(MemoryAccounting, AnonAccountingPreservedOnSaveRestore) { // Write something to each page to prevent them from being decommited on // S/R. Zero pages are dropped on save. - for (uint64 i = 0; i < map_bytes; i += kPageSize) { + for (uint64_t i = 0; i < map_bytes; i += kPageSize) { mem[i] = 'a'; } - uint64 anon_after_alloc = ASSERT_NO_ERRNO_AND_VALUE(AnonUsageFromMeminfo()); + uint64_t anon_after_alloc = ASSERT_NO_ERRNO_AND_VALUE(AnonUsageFromMeminfo()); EXPECT_THAT(anon_after_alloc, EquivalentWithin(anon_initial + map_bytes, 0.03)); @@ -90,7 +90,7 @@ TEST(MemoryAccounting, AnonAccountingPreservedOnSaveRestore) { MaybeSave(); // Usage should remain the same across S/R. - uint64 anon_after_sr = ASSERT_NO_ERRNO_AND_VALUE(AnonUsageFromMeminfo()); + uint64_t anon_after_sr = ASSERT_NO_ERRNO_AND_VALUE(AnonUsageFromMeminfo()); EXPECT_THAT(anon_after_sr, EquivalentWithin(anon_after_alloc, 0.03)); } diff --git a/test/syscalls/linux/mempolicy.cc b/test/syscalls/linux/mempolicy.cc index 46bbbc923..9d5f47651 100644 --- a/test/syscalls/linux/mempolicy.cc +++ b/test/syscalls/linux/mempolicy.cc @@ -43,12 +43,12 @@ namespace { #define MPOL_MF_MOVE (1 << 1) #define MPOL_MF_MOVE_ALL (1 << 2) -int get_mempolicy(int *policy, uint64 *nmask, uint64 maxnode, void *addr, +int get_mempolicy(int *policy, uint64_t *nmask, uint64_t maxnode, void *addr, int flags) { return syscall(SYS_get_mempolicy, policy, nmask, maxnode, addr, flags); } -int set_mempolicy(int mode, uint64 *nmask, uint64 maxnode) { +int set_mempolicy(int mode, uint64_t *nmask, uint64_t maxnode) { return syscall(SYS_set_mempolicy, mode, nmask, maxnode); } @@ -68,8 +68,8 @@ Cleanup ScopedMempolicy() { // Temporarily change the memory policy for the calling thread within the // caller's scope. -PosixErrorOr ScopedSetMempolicy(int mode, uint64 *nmask, - uint64 maxnode) { +PosixErrorOr ScopedSetMempolicy(int mode, uint64_t *nmask, + uint64_t maxnode) { if (set_mempolicy(mode, nmask, maxnode)) { return PosixError(errno, "set_mempolicy"); } @@ -78,7 +78,7 @@ PosixErrorOr ScopedSetMempolicy(int mode, uint64 *nmask, TEST(MempolicyTest, CheckDefaultPolicy) { int mode = 0; - uint64 nodemask = 0; + uint64_t nodemask = 0; ASSERT_THAT(get_mempolicy(&mode, &nodemask, sizeof(nodemask) * BITS_PER_BYTE, nullptr, 0), SyscallSucceeds()); @@ -88,12 +88,12 @@ TEST(MempolicyTest, CheckDefaultPolicy) { } TEST(MempolicyTest, PolicyPreservedAfterSetMempolicy) { - uint64 nodemask = 0x1; + uint64_t nodemask = 0x1; auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSetMempolicy( MPOL_BIND, &nodemask, sizeof(nodemask) * BITS_PER_BYTE)); int mode = 0; - uint64 nodemask_after = 0x0; + uint64_t nodemask_after = 0x0; ASSERT_THAT(get_mempolicy(&mode, &nodemask_after, sizeof(nodemask_after) * BITS_PER_BYTE, nullptr, 0), SyscallSucceeds()); @@ -118,7 +118,7 @@ TEST(MempolicyTest, PolicyPreservedAfterSetMempolicy) { TEST(MempolicyTest, SetMempolicyRejectsInvalidInputs) { auto cleanup = ScopedMempolicy(); - uint64 nodemask; + uint64_t nodemask; if (IsRunningOnGvisor()) { // Invalid nodemask, we only support a single node on gvisor. @@ -165,7 +165,7 @@ TEST(MempolicyTest, EmptyNodemaskOnSet) { SyscallFailsWithErrno(EINVAL)); EXPECT_THAT(set_mempolicy(MPOL_PREFERRED, nullptr, 1), SyscallSucceeds()); - uint64 nodemask = 0x1; + uint64_t nodemask = 0x1; EXPECT_THAT(set_mempolicy(MPOL_DEFAULT, &nodemask, 0), SyscallFailsWithErrno(EINVAL)); EXPECT_THAT(set_mempolicy(MPOL_BIND, &nodemask, 0), @@ -175,7 +175,7 @@ TEST(MempolicyTest, EmptyNodemaskOnSet) { } TEST(MempolicyTest, QueryAvailableNodes) { - uint64 nodemask = 0; + uint64_t nodemask = 0; ASSERT_THAT( get_mempolicy(nullptr, &nodemask, sizeof(nodemask) * BITS_PER_BYTE, nullptr, MPOL_F_MEMS_ALLOWED), @@ -197,8 +197,8 @@ TEST(MempolicyTest, QueryAvailableNodes) { } TEST(MempolicyTest, GetMempolicyQueryNodeForAddress) { - uint64 dummy_stack_address; - auto dummy_heap_address = absl::make_unique(); + uint64_t dummy_stack_address; + auto dummy_heap_address = absl::make_unique(); int mode; for (auto ptr : {&dummy_stack_address, dummy_heap_address.get()}) { @@ -228,7 +228,7 @@ TEST(MempolicyTest, GetMempolicyQueryNodeForAddress) { TEST(MempolicyTest, GetMempolicyCanOmitPointers) { int mode; - uint64 nodemask; + uint64_t nodemask; // Omit nodemask pointer. ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, nullptr, 0), SyscallSucceeds()); @@ -249,7 +249,7 @@ TEST(MempolicyTest, GetMempolicyNextInterleaveNode) { SyscallFailsWithErrno(EINVAL)); // Set default policy for thread to MPOL_INTERLEAVE. - uint64 nodemask = 0x1; + uint64_t nodemask = 0x1; auto cleanup = ASSERT_NO_ERRNO_AND_VALUE(ScopedSetMempolicy( MPOL_INTERLEAVE, &nodemask, sizeof(nodemask) * BITS_PER_BYTE)); diff --git a/test/syscalls/linux/mmap.cc b/test/syscalls/linux/mmap.cc index 9b2270c8d..1c4d9f1c7 100644 --- a/test/syscalls/linux/mmap.cc +++ b/test/syscalls/linux/mmap.cc @@ -50,13 +50,13 @@ namespace testing { namespace { -PosixErrorOr VirtualMemorySize() { +PosixErrorOr VirtualMemorySize() { ASSIGN_OR_RETURN_ERRNO(auto contents, GetContents("/proc/self/statm")); std::vector parts = absl::StrSplit(contents, ' '); if (parts.empty()) { return PosixError(EINVAL, "Unable to parse /proc/self/statm"); } - ASSIGN_OR_RETURN_ERRNO(auto pages, Atoi(parts[0])); + ASSIGN_OR_RETURN_ERRNO(auto pages, Atoi(parts[0])); return pages * getpagesize(); } @@ -245,7 +245,7 @@ TEST_F(MMapTest, MapDevZeroSharedFdNoPersistence) { // Create a second mapping via the same fd. void* psec_map = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, dev_zero.get(), 0); - ASSERT_THAT(reinterpret_cast(psec_map), SyscallSucceeds()); + ASSERT_THAT(reinterpret_cast(psec_map), SyscallSucceeds()); // Always unmap. auto cleanup_psec_map = Cleanup( @@ -690,10 +690,10 @@ TEST_F(MMapTest, ExceedLimitDataPrlimitPID) { } TEST_F(MMapTest, NoExceedLimitAS) { - constexpr uint64 kAllocBytes = 200 << 20; + constexpr uint64_t kAllocBytes = 200 << 20; // Add some headroom to the AS limit in case of e.g. unexpected stack // expansion. - constexpr uint64 kExtraASBytes = kAllocBytes + (20 << 20); + constexpr uint64_t kExtraASBytes = kAllocBytes + (20 << 20); static_assert(kAllocBytes < kExtraASBytes, "test depends on allocation not exceeding AS limit"); @@ -708,10 +708,10 @@ TEST_F(MMapTest, NoExceedLimitAS) { } TEST_F(MMapTest, ExceedLimitAS) { - constexpr uint64 kAllocBytes = 200 << 20; + constexpr uint64_t kAllocBytes = 200 << 20; // Add some headroom to the AS limit in case of e.g. unexpected stack // expansion. - constexpr uint64 kExtraASBytes = 20 << 20; + constexpr uint64_t kExtraASBytes = 20 << 20; static_assert(kAllocBytes > kExtraASBytes, "test depends on allocation exceeding AS limit"); @@ -1469,7 +1469,7 @@ TEST_F(MMapFileTest, InternalSigBusZeroing) { SyscallFailsWithErrno(EFAULT)); } -// Checks that mmaps with a length of uint64(-PAGE_SIZE + 1) or greater do not +// Checks that mmaps with a length of uint64_t(-PAGE_SIZE + 1) or greater do not // induce a sentry panic (due to "rounding up" to 0). TEST_F(MMapTest, HugeLength) { EXPECT_THAT(Map(0, static_cast(-kPageSize + 1), PROT_NONE, diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc index a5e790729..267ae19f6 100644 --- a/test/syscalls/linux/open.cc +++ b/test/syscalls/linux/open.cc @@ -193,7 +193,7 @@ TEST_F(OpenTest, Fault) { TEST_F(OpenTest, AppendOnly) { // First write some data to the fresh file. - const int64 kBufSize = 1024; + const int64_t kBufSize = 1024; std::vector buf(kBufSize, 'a'); FileDescriptor fd0 = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR)); diff --git a/test/syscalls/linux/partial_bad_buffer.cc b/test/syscalls/linux/partial_bad_buffer.cc index 55eb9361f..df7129acc 100644 --- a/test/syscalls/linux/partial_bad_buffer.cc +++ b/test/syscalls/linux/partial_bad_buffer.cc @@ -363,7 +363,7 @@ TEST_F(PartialBadBufferTest, SendMsgTCP) { // byte past the valid page and check that it triggers an EFAULT // correctly. Otherwise in gVisor the sendmsg call will just return with no // error with kPageSize bytes written successfully. - const uint32 buf_size = kPageSize + 1; + const uint32_t buf_size = kPageSize + 1; ASSERT_THAT(setsockopt(send_socket.get(), SOL_SOCKET, SO_SNDBUF, &buf_size, sizeof(buf_size)), SyscallSucceedsWithValue(0)); diff --git a/test/syscalls/linux/prctl_setuid.cc b/test/syscalls/linux/prctl_setuid.cc index ad39a8463..30f0d75b3 100644 --- a/test/syscalls/linux/prctl_setuid.cc +++ b/test/syscalls/linux/prctl_setuid.cc @@ -26,7 +26,7 @@ #include "test/util/test_util.h" #include "test/util/thread_util.h" -ABSL_FLAG(int32, scratch_uid, 65534, "scratch UID"); +ABSL_FLAG(int32_t, scratch_uid, 65534, "scratch UID"); // This flag is used to verify that after an exec PR_GET_KEEPCAPS // returns 0, the return code will be offset by kPrGetKeepCapsExitBase. ABSL_FLAG(bool, prctl_pr_get_keepcaps, false, diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index 0d5899ec9..bf9bb45d3 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -463,12 +463,12 @@ std::string AnonymousMapsEntryForMapping(const Mapping& m, int prot) { return AnonymousMapsEntry(m.addr(), m.len(), prot); } -PosixErrorOr> ReadProcSelfAuxv() { +PosixErrorOr> ReadProcSelfAuxv() { std::string auxv_file; RETURN_IF_ERRNO(GetContents("/proc/self/auxv", &auxv_file)); const Elf64_auxv_t* auxv_data = reinterpret_cast(auxv_file.data()); - std::map auxv_entries; + std::map auxv_entries; for (int i = 0; auxv_data[i].a_type != AT_NULL; i++) { auto a_type = auxv_data[i].a_type; EXPECT_EQ(0, auxv_entries.count(a_type)) << "a_type: " << a_type; @@ -877,7 +877,7 @@ TEST(ProcStat, Fields) { // All fields besides itime are valid base 10 numbers. for (size_t i = 1; i < fields.size(); i++) { - uint64 val; + uint64_t val; EXPECT_TRUE(absl::SimpleAtoi(fields[i], &val)) << proc_stat; } } @@ -904,7 +904,7 @@ TEST(ProcLoadavg, Fields) { EXPECT_EQ(fields.size(), 6) << proc_loadvg; double val; - uint64 val2; + uint64_t val2; // First three fields are floating point numbers. EXPECT_TRUE(absl::SimpleAtod(fields[0], &val)) << proc_loadvg; EXPECT_TRUE(absl::SimpleAtod(fields[1], &val)) << proc_loadvg; @@ -936,19 +936,19 @@ TEST_P(ProcPidStatTest, HasBasicFields) { // boot time will be very close, and the proc starttime field (which is the // delta of the two times) will be 0. For that unfortunate reason, we can // only check that starttime >= 0, and not that it is strictly > 0. - uint64 starttime; + uint64_t starttime; ASSERT_TRUE(absl::SimpleAtoi(fields[21], &starttime)); EXPECT_GE(starttime, 0); - uint64 vss; + uint64_t vss; ASSERT_TRUE(absl::SimpleAtoi(fields[22], &vss)); EXPECT_GT(vss, 0); - uint64 rss; + uint64_t rss; ASSERT_TRUE(absl::SimpleAtoi(fields[23], &rss)); EXPECT_GT(rss, 0); - uint64 rsslim; + uint64_t rsslim; ASSERT_TRUE(absl::SimpleAtoi(fields[24], &rsslim)); EXPECT_GT(rsslim, 0); } @@ -965,11 +965,11 @@ TEST_P(ProcPidStatmTest, HasBasicFields) { std::vector fields = absl::StrSplit(proc_pid_statm, ' '); ASSERT_GE(fields.size(), 7); - uint64 vss; + uint64_t vss; ASSERT_TRUE(absl::SimpleAtoi(fields[0], &vss)); EXPECT_GT(vss, 0); - uint64 rss; + uint64_t rss; ASSERT_TRUE(absl::SimpleAtoi(fields[1], &rss)); EXPECT_GT(rss, 0); } @@ -977,7 +977,7 @@ TEST_P(ProcPidStatmTest, HasBasicFields) { INSTANTIATE_TEST_SUITE_P(SelfAndNumericPid, ProcPidStatmTest, ::testing::Values("self", absl::StrCat(getpid()))); -PosixErrorOr CurrentRSS() { +PosixErrorOr CurrentRSS() { ASSIGN_OR_RETURN_ERRNO(auto proc_self_stat, GetContents("/proc/self/stat")); if (proc_self_stat.empty()) { return PosixError(EINVAL, "empty /proc/self/stat"); @@ -990,7 +990,7 @@ PosixErrorOr CurrentRSS() { absl::StrCat("/proc/self/stat has too few fields: ", proc_self_stat)); } - uint64 rss; + uint64_t rss; if (!absl::SimpleAtoi(fields[23], &rss)) { return PosixError( EINVAL, absl::StrCat("/proc/self/stat RSS field is not a number: ", @@ -1002,14 +1002,14 @@ PosixErrorOr CurrentRSS() { } // The size of mapping created by MapPopulateRSS. -constexpr uint64 kMappingSize = 100 << 20; +constexpr uint64_t kMappingSize = 100 << 20; // Tolerance on RSS comparisons to account for background thread mappings, // reclaimed pages, newly faulted pages, etc. -constexpr uint64 kRSSTolerance = 5 << 20; +constexpr uint64_t kRSSTolerance = 5 << 20; // Capture RSS before and after an anonymous mapping with passed prot. -void MapPopulateRSS(int prot, uint64* before, uint64* after) { +void MapPopulateRSS(int prot, uint64_t* before, uint64_t* after) { *before = ASSERT_NO_ERRNO_AND_VALUE(CurrentRSS()); // N.B. The kernel asynchronously accumulates per-task RSS counters into the @@ -1040,7 +1040,7 @@ void MapPopulateRSS(int prot, uint64* before, uint64* after) { // PROT_WRITE + MAP_POPULATE anonymous mappings are always committed. TEST(ProcSelfStat, PopulateWriteRSS) { - uint64 before, after; + uint64_t before, after; MapPopulateRSS(PROT_READ | PROT_WRITE, &before, &after); // Mapping is committed. @@ -1049,7 +1049,7 @@ TEST(ProcSelfStat, PopulateWriteRSS) { // PROT_NONE + MAP_POPULATE anonymous mappings are never committed. TEST(ProcSelfStat, PopulateNoneRSS) { - uint64 before, after; + uint64_t before, after; MapPopulateRSS(PROT_NONE, &before, &after); // Mapping not committed. @@ -1766,7 +1766,7 @@ TEST(ProcTask, VerifyTaskDirNlinks) { // Once we reach the test body, we can count on the thread count being stable // unless we spawn a new one. - uint64 initial_links = ASSERT_NO_ERRNO_AND_VALUE(Links("/proc/self/task")); + uint64_t initial_links = ASSERT_NO_ERRNO_AND_VALUE(Links("/proc/self/task")); ASSERT_GE(initial_links, 3); // For each new subtask, we should gain a new link. @@ -1864,9 +1864,9 @@ TEST(ProcFilesystems, Bug65172365) { } TEST(ProcFilesystems, PresenceOfShmMaxMniAll) { - uint64 shmmax = 0; - uint64 shmall = 0; - uint64 shmmni = 0; + uint64_t shmmax = 0; + uint64_t shmall = 0; + uint64_t shmmni = 0; std::string proc_file; proc_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/shmmax")); ASSERT_FALSE(proc_file.empty()); diff --git a/test/syscalls/linux/proc_net_tcp.cc b/test/syscalls/linux/proc_net_tcp.cc index 77183420b..5b6e3e3cd 100644 --- a/test/syscalls/linux/proc_net_tcp.cc +++ b/test/syscalls/linux/proc_net_tcp.cc @@ -40,15 +40,15 @@ constexpr char kProcNetTCPHeader[] = // TCPEntry represents a single entry from /proc/net/tcp. struct TCPEntry { - uint32 local_addr; - uint16 local_port; + uint32_t local_addr; + uint16_t local_port; - uint32 remote_addr; - uint16 remote_port; + uint32_t remote_addr; + uint16_t remote_port; - uint64 state; - uint64 uid; - uint64 inode; + uint64_t state; + uint64_t uid; + uint64_t inode; }; // Finds the first entry in 'entries' for which 'predicate' returns true. @@ -69,8 +69,8 @@ bool FindBy(const std::vector& entries, TCPEntry* match, bool FindByLocalAddr(const std::vector& entries, TCPEntry* match, const struct sockaddr* addr) { - uint32 host = IPFromInetSockaddr(addr); - uint16 port = PortFromInetSockaddr(addr); + uint32_t host = IPFromInetSockaddr(addr); + uint16_t port = PortFromInetSockaddr(addr); return FindBy(entries, match, [host, port](const TCPEntry& e) { return (e.local_addr == host && e.local_port == port); }); @@ -78,8 +78,8 @@ bool FindByLocalAddr(const std::vector& entries, TCPEntry* match, bool FindByRemoteAddr(const std::vector& entries, TCPEntry* match, const struct sockaddr* addr) { - uint32 host = IPFromInetSockaddr(addr); - uint16 port = PortFromInetSockaddr(addr); + uint32_t host = IPFromInetSockaddr(addr); + uint16_t port = PortFromInetSockaddr(addr); return FindBy(entries, match, [host, port](const TCPEntry& e) { return (e.remote_addr == host && e.remote_port == port); }); @@ -131,8 +131,8 @@ PosixErrorOr> ProcNetTCPEntries() { ASSIGN_OR_RETURN_ERRNO(entry.remote_port, AtoiBase(fields[4], 16)); ASSIGN_OR_RETURN_ERRNO(entry.state, AtoiBase(fields[5], 16)); - ASSIGN_OR_RETURN_ERRNO(entry.uid, Atoi(fields[11])); - ASSIGN_OR_RETURN_ERRNO(entry.inode, Atoi(fields[13])); + ASSIGN_OR_RETURN_ERRNO(entry.uid, Atoi(fields[11])); + ASSIGN_OR_RETURN_ERRNO(entry.inode, Atoi(fields[13])); entries.push_back(entry); } @@ -234,8 +234,8 @@ TEST(ProcNetTCP, State) { FileDescriptor accepted = ASSERT_NO_ERRNO_AND_VALUE(Accept(server->get(), nullptr, nullptr)); - const uint32 accepted_local_host = IPFromInetSockaddr(&addr); - const uint16 accepted_local_port = PortFromInetSockaddr(&addr); + const uint32_t accepted_local_host = IPFromInetSockaddr(&addr); + const uint16_t accepted_local_port = PortFromInetSockaddr(&addr); entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCPEntries()); TCPEntry accepted_entry; @@ -258,14 +258,14 @@ constexpr char kProcNetTCP6Header[] = // TCP6Entry represents a single entry from /proc/net/tcp6. struct TCP6Entry { struct in6_addr local_addr; - uint16 local_port; + uint16_t local_port; struct in6_addr remote_addr; - uint16 remote_port; + uint16_t remote_port; - uint64 state; - uint64 uid; - uint64 inode; + uint64_t state; + uint64_t uid; + uint64_t inode; }; bool IPv6AddrEqual(const struct in6_addr* a1, const struct in6_addr* a2) { @@ -296,7 +296,7 @@ const struct in6_addr* IP6FromInetSockaddr(const struct sockaddr* addr) { bool FindByLocalAddr6(const std::vector& entries, TCP6Entry* match, const struct sockaddr* addr) { const struct in6_addr* local = IP6FromInetSockaddr(addr); - uint16 port = PortFromInetSockaddr(addr); + uint16_t port = PortFromInetSockaddr(addr); return FindBy6(entries, match, [local, port](const TCP6Entry& e) { return (IPv6AddrEqual(&e.local_addr, local) && e.local_port == port); }); @@ -305,22 +305,22 @@ bool FindByLocalAddr6(const std::vector& entries, TCP6Entry* match, bool FindByRemoteAddr6(const std::vector& entries, TCP6Entry* match, const struct sockaddr* addr) { const struct in6_addr* remote = IP6FromInetSockaddr(addr); - uint16 port = PortFromInetSockaddr(addr); + uint16_t port = PortFromInetSockaddr(addr); return FindBy6(entries, match, [remote, port](const TCP6Entry& e) { return (IPv6AddrEqual(&e.remote_addr, remote) && e.remote_port == port); }); } void ReadIPv6Address(std::string s, struct in6_addr* addr) { - uint32 a0, a1, a2, a3; + uint32_t a0, a1, a2, a3; const char* fmt = "%08X%08X%08X%08X"; EXPECT_EQ(sscanf(s.c_str(), fmt, &a0, &a1, &a2, &a3), 4); - uint8* b = addr->s6_addr; - *((uint32*)&b[0]) = a0; - *((uint32*)&b[4]) = a1; - *((uint32*)&b[8]) = a2; - *((uint32*)&b[12]) = a3; + uint8_t* b = addr->s6_addr; + *((uint32_t*)&b[0]) = a0; + *((uint32_t*)&b[4]) = a1; + *((uint32_t*)&b[8]) = a2; + *((uint32_t*)&b[12]) = a3; } // Returns a parsed representation of /proc/net/tcp6 entries. @@ -367,8 +367,8 @@ PosixErrorOr> ProcNetTCP6Entries() { ReadIPv6Address(fields[3], &entry.remote_addr); ASSIGN_OR_RETURN_ERRNO(entry.remote_port, AtoiBase(fields[4], 16)); ASSIGN_OR_RETURN_ERRNO(entry.state, AtoiBase(fields[5], 16)); - ASSIGN_OR_RETURN_ERRNO(entry.uid, Atoi(fields[11])); - ASSIGN_OR_RETURN_ERRNO(entry.inode, Atoi(fields[13])); + ASSIGN_OR_RETURN_ERRNO(entry.uid, Atoi(fields[11])); + ASSIGN_OR_RETURN_ERRNO(entry.inode, Atoi(fields[13])); entries.push_back(entry); } @@ -476,7 +476,7 @@ TEST(ProcNetTCP6, State) { ASSERT_NO_ERRNO_AND_VALUE(Accept(server->get(), nullptr, nullptr)); const struct in6_addr* local = IP6FromInetSockaddr(addr); - const uint16 accepted_local_port = PortFromInetSockaddr(addr); + const uint16_t accepted_local_port = PortFromInetSockaddr(addr); entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetTCP6Entries()); TCP6Entry accepted_entry; diff --git a/test/syscalls/linux/proc_net_udp.cc b/test/syscalls/linux/proc_net_udp.cc index 98c1e0cf1..786b4b4af 100644 --- a/test/syscalls/linux/proc_net_udp.cc +++ b/test/syscalls/linux/proc_net_udp.cc @@ -40,15 +40,15 @@ constexpr char kProcNetUDPHeader[] = // UDPEntry represents a single entry from /proc/net/udp. struct UDPEntry { - uint32 local_addr; - uint16 local_port; + uint32_t local_addr; + uint16_t local_port; - uint32 remote_addr; - uint16 remote_port; + uint32_t remote_addr; + uint16_t remote_port; - uint64 state; - uint64 uid; - uint64 inode; + uint64_t state; + uint64_t uid; + uint64_t inode; }; std::string DescribeFirstInetSocket(const SocketPair& sockets) { @@ -81,8 +81,8 @@ bool FindBy(const std::vector& entries, UDPEntry* match, bool FindByLocalAddr(const std::vector& entries, UDPEntry* match, const struct sockaddr* addr) { - uint32 host = IPFromInetSockaddr(addr); - uint16 port = PortFromInetSockaddr(addr); + uint32_t host = IPFromInetSockaddr(addr); + uint16_t port = PortFromInetSockaddr(addr); return FindBy(entries, match, [host, port](const UDPEntry& e) { return (e.local_addr == host && e.local_port == port); }); @@ -90,14 +90,14 @@ bool FindByLocalAddr(const std::vector& entries, UDPEntry* match, bool FindByRemoteAddr(const std::vector& entries, UDPEntry* match, const struct sockaddr* addr) { - uint32 host = IPFromInetSockaddr(addr); - uint16 port = PortFromInetSockaddr(addr); + uint32_t host = IPFromInetSockaddr(addr); + uint16_t port = PortFromInetSockaddr(addr); return FindBy(entries, match, [host, port](const UDPEntry& e) { return (e.remote_addr == host && e.remote_port == port); }); } -PosixErrorOr InodeFromSocketFD(int fd) { +PosixErrorOr InodeFromSocketFD(int fd) { ASSIGN_OR_RETURN_ERRNO(struct stat s, Fstat(fd)); if (!S_ISSOCK(s.st_mode)) { return PosixError(EINVAL, StrFormat("FD %d is not a socket", fd)); @@ -107,7 +107,7 @@ PosixErrorOr InodeFromSocketFD(int fd) { PosixErrorOr FindByFD(const std::vector& entries, UDPEntry* match, int fd) { - ASSIGN_OR_RETURN_ERRNO(uint64 inode, InodeFromSocketFD(fd)); + ASSIGN_OR_RETURN_ERRNO(uint64_t inode, InodeFromSocketFD(fd)); return FindBy(entries, match, [inode](const UDPEntry& e) { return (e.inode == inode); }); } @@ -158,8 +158,8 @@ PosixErrorOr> ProcNetUDPEntries() { ASSIGN_OR_RETURN_ERRNO(entry.remote_port, AtoiBase(fields[4], 16)); ASSIGN_OR_RETURN_ERRNO(entry.state, AtoiBase(fields[5], 16)); - ASSIGN_OR_RETURN_ERRNO(entry.uid, Atoi(fields[11])); - ASSIGN_OR_RETURN_ERRNO(entry.inode, Atoi(fields[13])); + ASSIGN_OR_RETURN_ERRNO(entry.uid, Atoi(fields[11])); + ASSIGN_OR_RETURN_ERRNO(entry.inode, Atoi(fields[13])); // Linux shares internal data structures between TCP and UDP sockets. The // proc entries for UDP sockets share some fields with TCP sockets, but @@ -267,7 +267,7 @@ TEST(ProcNetUDP, BoundEntry) { struct sockaddr addr; socklen_t len = sizeof(addr); ASSERT_THAT(getsockname(socket->get(), &addr, &len), SyscallSucceeds()); - uint16 port = PortFromInetSockaddr(&addr); + uint16_t port = PortFromInetSockaddr(&addr); std::vector entries = ASSERT_NO_ERRNO_AND_VALUE(ProcNetUDPEntries()); diff --git a/test/syscalls/linux/proc_net_unix.cc b/test/syscalls/linux/proc_net_unix.cc index 2fe63f215..66db0acaa 100644 --- a/test/syscalls/linux/proc_net_unix.cc +++ b/test/syscalls/linux/proc_net_unix.cc @@ -46,12 +46,12 @@ enum { // UnixEntry represents a single entry from /proc/net/unix. struct UnixEntry { uintptr_t addr; - uint64 refs; - uint64 protocol; - uint64 flags; - uint64 type; - uint64 state; - uint64 inode; + uint64_t refs; + uint64_t protocol; + uint64_t flags; + uint64_t type; + uint64_t state; + uint64_t inode; std::string path; }; diff --git a/test/syscalls/linux/proc_pid_uid_gid_map.cc b/test/syscalls/linux/proc_pid_uid_gid_map.cc index 8e268ebd1..748f7be58 100644 --- a/test/syscalls/linux/proc_pid_uid_gid_map.cc +++ b/test/syscalls/linux/proc_pid_uid_gid_map.cc @@ -117,13 +117,13 @@ void DenyPidSetgroups(pid_t pid) { } // Returns a valid UID/GID that isn't id. -uint32 another_id(uint32 id) { return (id + 1) % 65535; } +uint32_t another_id(uint32_t id) { return (id + 1) % 65535; } struct TestParam { std::string desc; int cap; std::function get_map_filename; - std::function get_current_id; + std::function get_current_id; }; std::string DescribeTestParam(const ::testing::TestParamInfo& info) { @@ -135,17 +135,17 @@ std::vector UidGidMapTestParams() { [](absl::string_view pid) { return absl::StrCat("/proc/", pid, "/uid_map"); }, - []() -> uint32 { return getuid(); }}, + []() -> uint32_t { return getuid(); }}, TestParam{"GID", CAP_SETGID, [](absl::string_view pid) { return absl::StrCat("/proc/", pid, "/gid_map"); }, - []() -> uint32 { return getgid(); }}}; + []() -> uint32_t { return getgid(); }}}; } class ProcUidGidMapTest : public ::testing::TestWithParam { protected: - uint32 CurrentID() { return GetParam().get_current_id(); } + uint32_t CurrentID() { return GetParam().get_current_id(); } }; class ProcSelfUidGidMapTest : public ProcUidGidMapTest { @@ -198,7 +198,7 @@ TEST_P(ProcSelfUidGidMapTest, IsInitiallyEmpty) { TEST_P(ProcSelfUidGidMapTest, IdentityMapOwnID) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); - uint32 id = CurrentID(); + uint32_t id = CurrentID(); std::string line = absl::StrCat(id, " ", id, " 1"); EXPECT_THAT( InNewUserNamespaceWithMapFD([&](int fd) { @@ -213,7 +213,7 @@ TEST_P(ProcSelfUidGidMapTest, TrailingNewlineAndNULIgnored) { // and an invalid (incomplete) map entry are appended to the valid entry. The // newline should be accepted, and everything after the NUL should be ignored. SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); - uint32 id = CurrentID(); + uint32_t id = CurrentID(); std::string line = absl::StrCat(id, " ", id, " 1\n\0 4 3"); EXPECT_THAT( InNewUserNamespaceWithMapFD([&](int fd) { @@ -227,8 +227,8 @@ TEST_P(ProcSelfUidGidMapTest, TrailingNewlineAndNULIgnored) { TEST_P(ProcSelfUidGidMapTest, NonIdentityMapOwnID) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); - uint32 id = CurrentID(); - uint32 id2 = another_id(id); + uint32_t id = CurrentID(); + uint32_t id2 = another_id(id); std::string line = absl::StrCat(id2, " ", id, " 1"); EXPECT_THAT( InNewUserNamespaceWithMapFD([&](int fd) { @@ -243,8 +243,8 @@ TEST_P(ProcSelfUidGidMapTest, MapOtherID) { // Whether or not we have CAP_SET*ID is irrelevant: the process running in the // new (child) user namespace won't have any capabilities in the current // (parent) user namespace, which is needed. - uint32 id = CurrentID(); - uint32 id2 = another_id(id); + uint32_t id = CurrentID(); + uint32_t id2 = another_id(id); std::string line = absl::StrCat(id, " ", id2, " 1"); EXPECT_THAT(InNewUserNamespaceWithMapFD([&](int fd) { DenySelfSetgroups(); @@ -270,8 +270,8 @@ TEST_P(ProcPidUidGidMapTest, MapOtherIDPrivileged) { std::tie(child_pid, cleanup_child) = ASSERT_NO_ERRNO_AND_VALUE(CreateProcessInNewUserNamespace()); - uint32 id = CurrentID(); - uint32 id2 = another_id(id); + uint32_t id = CurrentID(); + uint32_t id2 = another_id(id); std::string line = absl::StrCat(id, " ", id2, " 1"); DenyPidSetgroups(child_pid); auto fd = ASSERT_NO_ERRNO_AND_VALUE(OpenMapFile(child_pid)); diff --git a/test/syscalls/linux/ptrace.cc b/test/syscalls/linux/ptrace.cc index 37dabb1ad..8f3800380 100644 --- a/test/syscalls/linux/ptrace.cc +++ b/test/syscalls/linux/ptrace.cc @@ -574,7 +574,7 @@ TEST_P(PtraceExecveTest, Execve_GetRegs_PeekUser_SIGKILL_TraceClone_TraceExit) { #ifdef __x86_64__ { // CS should be 0x33, indicating an 64-bit binary. - constexpr uint64 kAMD64UserCS = 0x33; + constexpr uint64_t kAMD64UserCS = 0x33; EXPECT_THAT(ptrace(PTRACE_PEEKUSER, leader_tid, offsetof(struct user_regs_struct, cs), 0), SyscallSucceedsWithValue(kAMD64UserCS)); @@ -862,7 +862,7 @@ TEST(PtraceTest, Int3) { TEST(PtraceTest, Sysemu_PokeUser) { constexpr int kSysemuHelperFirstExitCode = 126; - constexpr uint64 kSysemuInjectedExitGroupReturn = 42; + constexpr uint64_t kSysemuInjectedExitGroupReturn = 42; pid_t const child_pid = fork(); if (child_pid == 0) { diff --git a/test/syscalls/linux/pty.cc b/test/syscalls/linux/pty.cc index 5020372c1..dafe64d20 100644 --- a/test/syscalls/linux/pty.cc +++ b/test/syscalls/linux/pty.cc @@ -109,13 +109,13 @@ constexpr bool IsControlCharacter(char c) { return c <= 31; } struct Field { const char* name; - uint64 mask; - uint64 value; + uint64_t mask; + uint64_t value; }; // ParseFields returns a string representation of value, using the names in // fields. -std::string ParseFields(const Field* fields, size_t len, uint64 value) { +std::string ParseFields(const Field* fields, size_t len, uint64_t value) { bool first = true; std::string s; for (size_t i = 0; i < len; i++) { @@ -1213,8 +1213,8 @@ TEST_F(PtyTest, GetWindowSize) { } TEST_F(PtyTest, SetSlaveWindowSize) { - constexpr uint16 kRows = 343; - constexpr uint16 kCols = 2401; + constexpr uint16_t kRows = 343; + constexpr uint16_t kCols = 2401; struct winsize ws = {.ws_row = kRows, .ws_col = kCols}; ASSERT_THAT(ioctl(slave_.get(), TIOCSWINSZ, &ws), SyscallSucceeds()); @@ -1226,8 +1226,8 @@ TEST_F(PtyTest, SetSlaveWindowSize) { } TEST_F(PtyTest, SetMasterWindowSize) { - constexpr uint16 kRows = 343; - constexpr uint16 kCols = 2401; + constexpr uint16_t kRows = 343; + constexpr uint16_t kCols = 2401; struct winsize ws = {.ws_row = kRows, .ws_col = kCols}; ASSERT_THAT(ioctl(master_.get(), TIOCSWINSZ, &ws), SyscallSucceeds()); diff --git a/test/syscalls/linux/pwrite64.cc b/test/syscalls/linux/pwrite64.cc index 18f847929..b48fe540d 100644 --- a/test/syscalls/linux/pwrite64.cc +++ b/test/syscalls/linux/pwrite64.cc @@ -52,7 +52,7 @@ class Pwrite64 : public ::testing::Test { TEST_F(Pwrite64, AppendOnly) { int fd; ASSERT_THAT(fd = open(name_.c_str(), O_APPEND | O_RDWR), SyscallSucceeds()); - constexpr int64 kBufSize = 1024; + constexpr int64_t kBufSize = 1024; std::vector buf(kBufSize); std::fill(buf.begin(), buf.end(), 'a'); EXPECT_THAT(PwriteFd(fd, buf.data(), buf.size(), 0), @@ -64,7 +64,7 @@ TEST_F(Pwrite64, AppendOnly) { TEST_F(Pwrite64, InvalidArgs) { int fd; ASSERT_THAT(fd = open(name_.c_str(), O_APPEND | O_RDWR), SyscallSucceeds()); - constexpr int64 kBufSize = 1024; + constexpr int64_t kBufSize = 1024; std::vector buf(kBufSize); std::fill(buf.begin(), buf.end(), 'a'); EXPECT_THAT(PwriteFd(fd, buf.data(), buf.size(), -1), diff --git a/test/syscalls/linux/raw_socket_hdrincl.cc b/test/syscalls/linux/raw_socket_hdrincl.cc index 0c04b974e..0a27506aa 100644 --- a/test/syscalls/linux/raw_socket_hdrincl.cc +++ b/test/syscalls/linux/raw_socket_hdrincl.cc @@ -53,7 +53,7 @@ class RawHDRINCL : public ::testing::Test { // Fills in buf with an IP header, UDP header, and payload. Returns false if // buf_size isn't large enough to hold everything. bool FillPacket(char* buf, size_t buf_size, int port, const char* payload, - uint16 payload_size); + uint16_t payload_size); // The socket used for both reading and writing. int socket_; @@ -104,7 +104,7 @@ struct iphdr RawHDRINCL::LoopbackHeader() { } bool RawHDRINCL::FillPacket(char* buf, size_t buf_size, int port, - const char* payload, uint16 payload_size) { + const char* payload, uint16_t payload_size) { if (buf_size < sizeof(struct iphdr) + sizeof(struct udphdr) + payload_size) { return false; } diff --git a/test/syscalls/linux/rseq.cc b/test/syscalls/linux/rseq.cc index 9b2a76b91..106c045e3 100644 --- a/test/syscalls/linux/rseq.cc +++ b/test/syscalls/linux/rseq.cc @@ -43,7 +43,7 @@ namespace { // only be cleared by execve (or knowing the old rseq address), and glibc (based // on the current unmerged patches) register rseq before calling main()). -int RSeq(struct rseq* rseq, uint32 rseq_len, int flags, uint32 sig) { +int RSeq(struct rseq* rseq, uint32_t rseq_len, int flags, uint32_t sig) { return syscall(kRseqSyscall, rseq, rseq_len, flags, sig); } diff --git a/test/syscalls/linux/rseq/critical.h b/test/syscalls/linux/rseq/critical.h index 238143fd0..ac987a25e 100644 --- a/test/syscalls/linux/rseq/critical.h +++ b/test/syscalls/linux/rseq/critical.h @@ -18,7 +18,7 @@ #include "test/syscalls/linux/rseq/types.h" #include "test/syscalls/linux/rseq/uapi.h" -constexpr uint32 kRseqSignature = 0x90909090; +constexpr uint32_t kRseqSignature = 0x90909090; extern "C" { diff --git a/test/syscalls/linux/rseq/rseq.cc b/test/syscalls/linux/rseq/rseq.cc index 4fe7c5ecf..f036db26d 100644 --- a/test/syscalls/linux/rseq/rseq.cc +++ b/test/syscalls/linux/rseq/rseq.cc @@ -49,7 +49,7 @@ int strcmp(const char* s1, const char* s2) { return static_cast(*p1) - static_cast(*p2); } -int sys_rseq(struct rseq* rseq, uint32 rseq_len, int flags, uint32 sig) { +int sys_rseq(struct rseq* rseq, uint32_t rseq_len, int flags, uint32_t sig) { return raw_syscall(kRseqSyscall, rseq, rseq_len, flags, sig); } @@ -176,10 +176,10 @@ int TestAbort() { struct rseq_cs cs = {}; cs.version = 0; cs.flags = 0; - cs.start_ip = reinterpret_cast(&rseq_loop_start); - cs.post_commit_offset = reinterpret_cast(&rseq_loop_post_commit) - - reinterpret_cast(&rseq_loop_start); - cs.abort_ip = reinterpret_cast(&rseq_loop_abort); + cs.start_ip = reinterpret_cast(&rseq_loop_start); + cs.post_commit_offset = reinterpret_cast(&rseq_loop_post_commit) - + reinterpret_cast(&rseq_loop_start); + cs.abort_ip = reinterpret_cast(&rseq_loop_abort); // Loops until abort. If this returns then abort occurred. rseq_loop(&r, &cs); @@ -198,10 +198,10 @@ int TestAbortBefore() { struct rseq_cs cs = {}; cs.version = 0; cs.flags = 0; - cs.start_ip = reinterpret_cast(&rseq_loop_start); - cs.post_commit_offset = reinterpret_cast(&rseq_loop_post_commit) - - reinterpret_cast(&rseq_loop_start); - cs.abort_ip = reinterpret_cast(&rseq_loop_early_abort); + cs.start_ip = reinterpret_cast(&rseq_loop_start); + cs.post_commit_offset = reinterpret_cast(&rseq_loop_post_commit) - + reinterpret_cast(&rseq_loop_start); + cs.abort_ip = reinterpret_cast(&rseq_loop_early_abort); // Loops until abort. If this returns then abort occurred. rseq_loop(&r, &cs); @@ -220,10 +220,10 @@ int TestAbortSignature() { struct rseq_cs cs = {}; cs.version = 0; cs.flags = 0; - cs.start_ip = reinterpret_cast(&rseq_loop_start); - cs.post_commit_offset = reinterpret_cast(&rseq_loop_post_commit) - - reinterpret_cast(&rseq_loop_start); - cs.abort_ip = reinterpret_cast(&rseq_loop_abort); + cs.start_ip = reinterpret_cast(&rseq_loop_start); + cs.post_commit_offset = reinterpret_cast(&rseq_loop_post_commit) - + reinterpret_cast(&rseq_loop_start); + cs.abort_ip = reinterpret_cast(&rseq_loop_abort); // Loops until abort. This should SIGSEGV on abort. rseq_loop(&r, &cs); @@ -242,10 +242,10 @@ int TestAbortPreCommit() { struct rseq_cs cs = {}; cs.version = 0; cs.flags = 0; - cs.start_ip = reinterpret_cast(&rseq_loop_start); - cs.post_commit_offset = reinterpret_cast(&rseq_loop_post_commit) - - reinterpret_cast(&rseq_loop_start); - cs.abort_ip = reinterpret_cast(&rseq_loop_pre_commit); + cs.start_ip = reinterpret_cast(&rseq_loop_start); + cs.post_commit_offset = reinterpret_cast(&rseq_loop_post_commit) - + reinterpret_cast(&rseq_loop_start); + cs.abort_ip = reinterpret_cast(&rseq_loop_pre_commit); // Loops until abort. This should SIGSEGV on abort. rseq_loop(&r, &cs); @@ -264,10 +264,10 @@ int TestAbortClearsCS() { struct rseq_cs cs = {}; cs.version = 0; cs.flags = 0; - cs.start_ip = reinterpret_cast(&rseq_loop_start); - cs.post_commit_offset = reinterpret_cast(&rseq_loop_post_commit) - - reinterpret_cast(&rseq_loop_start); - cs.abort_ip = reinterpret_cast(&rseq_loop_abort); + cs.start_ip = reinterpret_cast(&rseq_loop_start); + cs.post_commit_offset = reinterpret_cast(&rseq_loop_post_commit) - + reinterpret_cast(&rseq_loop_start); + cs.abort_ip = reinterpret_cast(&rseq_loop_abort); // Loops until abort. If this returns then abort occurred. rseq_loop(&r, &cs); @@ -290,10 +290,10 @@ int TestInvalidAbortClearsCS() { struct rseq_cs cs = {}; cs.version = 0; cs.flags = 0; - cs.start_ip = reinterpret_cast(&rseq_loop_start); - cs.post_commit_offset = reinterpret_cast(&rseq_loop_post_commit) - - reinterpret_cast(&rseq_loop_start); - cs.abort_ip = reinterpret_cast(&rseq_loop_abort); + cs.start_ip = reinterpret_cast(&rseq_loop_start); + cs.post_commit_offset = reinterpret_cast(&rseq_loop_post_commit) - + reinterpret_cast(&rseq_loop_start); + cs.abort_ip = reinterpret_cast(&rseq_loop_abort); __atomic_store_n(&r.rseq_cs, &cs, __ATOMIC_RELAXED); diff --git a/test/syscalls/linux/rseq/types.h b/test/syscalls/linux/rseq/types.h index 7f1e0c5c2..b6afe9817 100644 --- a/test/syscalls/linux/rseq/types.h +++ b/test/syscalls/linux/rseq/types.h @@ -18,14 +18,14 @@ using size_t = __SIZE_TYPE__; using uintptr_t = __UINTPTR_TYPE__; -using uint8 = __UINT8_TYPE__; -using uint16 = __UINT16_TYPE__; -using uint32 = __UINT32_TYPE__; -using uint64 = __UINT64_TYPE__; +using uint8_t = __UINT8_TYPE__; +using uint16_t = __UINT16_TYPE__; +using uint32_t = __UINT32_TYPE__; +using uint64_t = __UINT64_TYPE__; -using int8 = __INT8_TYPE__; -using int16 = __INT16_TYPE__; -using int32 = __INT32_TYPE__; -using int64 = __INT64_TYPE__; +using int8_t = __INT8_TYPE__; +using int16_t = __INT16_TYPE__; +using int32_t = __INT32_TYPE__; +using int64_t = __INT64_TYPE__; #endif // GVISOR_TEST_SYSCALLS_LINUX_RSEQ_TYPES_H_ diff --git a/test/syscalls/linux/seccomp.cc b/test/syscalls/linux/seccomp.cc index 7a2c1191a..7e41fe7d8 100644 --- a/test/syscalls/linux/seccomp.cc +++ b/test/syscalls/linux/seccomp.cc @@ -49,12 +49,12 @@ namespace testing { namespace { // A syscall not implemented by Linux that we don't expect to be called. -constexpr uint32 kFilteredSyscall = SYS_vserver; +constexpr uint32_t kFilteredSyscall = SYS_vserver; // Applies a seccomp-bpf filter that returns `filtered_result` for // `sysno` and allows all other syscalls. Async-signal-safe. -void ApplySeccompFilter(uint32 sysno, uint32 filtered_result, - uint32 flags = 0) { +void ApplySeccompFilter(uint32_t sysno, uint32_t filtered_result, + uint32_t flags = 0) { // "Prior to [PR_SET_SECCOMP], the task must call prctl(PR_SET_NO_NEW_PRIVS, // 1) or run with CAP_SYS_ADMIN privileges in its namespace." - // Documentation/prctl/seccomp_filter.txt @@ -162,7 +162,7 @@ TEST(SeccompTest, RetKillOnlyKillsOneThread) { TEST(SeccompTest, RetTrapCausesSIGSYS) { pid_t const pid = fork(); if (pid == 0) { - constexpr uint16 kTrapValue = 0xdead; + constexpr uint16_t kTrapValue = 0xdead; RegisterSignalHandler( SIGSYS, +[](int signo, siginfo_t* info, void* ucv) { ucontext_t* uc = static_cast(ucv); @@ -191,7 +191,7 @@ TEST(SeccompTest, RetTrapCausesSIGSYS) { #ifdef __x86_64__ -constexpr uint64 kVsyscallTimeEntry = 0xffffffffff600400; +constexpr uint64_t kVsyscallTimeEntry = 0xffffffffff600400; time_t vsyscall_time(time_t* t) { return reinterpret_cast(kVsyscallTimeEntry)(t); @@ -202,7 +202,7 @@ TEST(SeccompTest, SeccompAppliesToVsyscall) { pid_t const pid = fork(); if (pid == 0) { - constexpr uint16 kTrapValue = 0xdead; + constexpr uint16_t kTrapValue = 0xdead; RegisterSignalHandler( SIGSYS, +[](int signo, siginfo_t* info, void* ucv) { ucontext_t* uc = static_cast(ucv); @@ -335,7 +335,7 @@ TEST(SeccompTest, TsyncAppliesToAllThreads) { // This test will validate that seccomp(2) rejects unsupported flags. TEST(SeccompTest, SeccompRejectsUnknownFlags) { - constexpr uint32 kInvalidFlag = 123; + constexpr uint32_t kInvalidFlag = 123; ASSERT_THAT( syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, kInvalidFlag, nullptr), SyscallFailsWithErrno(EINVAL)); diff --git a/test/syscalls/linux/semaphore.cc b/test/syscalls/linux/semaphore.cc index a9e8a44c1..e9b131ca9 100644 --- a/test/syscalls/linux/semaphore.cc +++ b/test/syscalls/linux/semaphore.cc @@ -274,7 +274,7 @@ TEST(SemaphoreTest, SemOpRandom) { // Protects the seed below. absl::Mutex mutex; - uint32 seed = time(nullptr); + uint32_t seed = time(nullptr); int count = 0; // Tracks semaphore value. bool done = false; // Tells waiters to stop after signal threads are done. @@ -284,7 +284,7 @@ TEST(SemaphoreTest, SemOpRandom) { for (auto& dec : decs) { dec = absl::make_unique([&sem, &mutex, &count, &seed, &done] { for (size_t i = 0; i < 500; ++i) { - int16 val; + int16_t val; { absl::MutexLock l(&mutex); if (done) { @@ -325,7 +325,7 @@ TEST(SemaphoreTest, SemOpRandom) { for (auto& inc : incs) { inc = absl::make_unique([&sem, &mutex, &count, &seed] { for (size_t i = 0; i < 500; ++i) { - int16 val; + int16_t val; { absl::MutexLock l(&mutex); val = (rand_r(&seed) % 10 + 1); // Rand between 1 and 10. @@ -415,14 +415,14 @@ TEST(SemaphoreTest, SemCtlValAll) { ASSERT_THAT(sem.get(), SyscallSucceeds()); // Semaphores must start with 0. - uint16 get[3] = {10, 10, 10}; + uint16_t get[3] = {10, 10, 10}; EXPECT_THAT(semctl(sem.get(), 1, GETALL, get), SyscallSucceedsWithValue(0)); for (auto v : get) { EXPECT_EQ(v, 0); } // SetAll and check that they were set. - uint16 vals[3] = {0, 10, 20}; + uint16_t vals[3] = {0, 10, 20}; EXPECT_THAT(semctl(sem.get(), 1, SETALL, vals), SyscallSucceedsWithValue(0)); EXPECT_THAT(semctl(sem.get(), 1, GETALL, get), SyscallSucceedsWithValue(0)); for (size_t i = 0; i < ABSL_ARRAYSIZE(vals); ++i) { diff --git a/test/syscalls/linux/shm.cc b/test/syscalls/linux/shm.cc index 80700615f..7ba752599 100644 --- a/test/syscalls/linux/shm.cc +++ b/test/syscalls/linux/shm.cc @@ -30,7 +30,7 @@ namespace { using ::testing::_; -const uint64 kAllocSize = kPageSize * 128ULL; +const uint64_t kAllocSize = kPageSize * 128ULL; PosixErrorOr Shmat(int shmid, const void* shmaddr, int shmflg) { const intptr_t addr = @@ -320,11 +320,11 @@ TEST(ShmTest, RemovedSegmentsAreDestroyed) { Shmget(IPC_PRIVATE, kAllocSize, IPC_CREAT | 0777)); const char* addr = ASSERT_NO_ERRNO_AND_VALUE(Shmat(shm.id(), nullptr, 0)); - const uint64 alloc_pages = kAllocSize / kPageSize; + const uint64_t alloc_pages = kAllocSize / kPageSize; struct shm_info info; ASSERT_NO_ERRNO(Shmctl(0 /*ignored*/, SHM_INFO, &info)); - const uint64 before = info.shm_tot; + const uint64_t before = info.shm_tot; ASSERT_NO_ERRNO(shm.Rmid()); ASSERT_NO_ERRNO(Shmdt(addr)); @@ -400,7 +400,7 @@ TEST(ShmDeathTest, SegmentNotAccessibleAfterDetach) { TEST(ShmTest, RequestingSegmentSmallerThanSHMMINFails) { struct shminfo info; ASSERT_NO_ERRNO(Shmctl(0, IPC_INFO, &info)); - const uint64 size = info.shmmin - 1; + const uint64_t size = info.shmmin - 1; EXPECT_THAT(Shmget(IPC_PRIVATE, size, IPC_CREAT | 0777), PosixErrorIs(EINVAL, _)); } @@ -408,7 +408,7 @@ TEST(ShmTest, RequestingSegmentSmallerThanSHMMINFails) { TEST(ShmTest, RequestingSegmentLargerThanSHMMAXFails) { struct shminfo info; ASSERT_NO_ERRNO(Shmctl(0, IPC_INFO, &info)); - const uint64 size = info.shmmax + kPageSize; + const uint64_t size = info.shmmax + kPageSize; EXPECT_THAT(Shmget(IPC_PRIVATE, size, IPC_CREAT | 0777), PosixErrorIs(EINVAL, _)); } diff --git a/test/syscalls/linux/sigaltstack.cc b/test/syscalls/linux/sigaltstack.cc index 9a0816e10..62b04ef1d 100644 --- a/test/syscalls/linux/sigaltstack.cc +++ b/test/syscalls/linux/sigaltstack.cc @@ -114,7 +114,7 @@ TEST(SigaltstackTest, ResetByExecve) { volatile bool badhandler_on_sigaltstack = true; // Set by the handler. char* volatile badhandler_low_water_mark = nullptr; // Set by the handler. -volatile uint8 badhandler_recursive_faults = 0; // Consumed by the handler. +volatile uint8_t badhandler_recursive_faults = 0; // Consumed by the handler. void badhandler(int sig, siginfo_t* siginfo, void* arg) { char stack_var = 0; diff --git a/test/syscalls/linux/sigiret.cc b/test/syscalls/linux/sigiret.cc index 207506569..a47c781ea 100644 --- a/test/syscalls/linux/sigiret.cc +++ b/test/syscalls/linux/sigiret.cc @@ -28,8 +28,8 @@ namespace testing { namespace { -constexpr uint64 kOrigRcx = 0xdeadbeeffacefeed; -constexpr uint64 kOrigR11 = 0xfacefeedbaad1dea; +constexpr uint64_t kOrigRcx = 0xdeadbeeffacefeed; +constexpr uint64_t kOrigR11 = 0xfacefeedbaad1dea; volatile int gotvtalrm, ready; @@ -40,8 +40,8 @@ void sigvtalrm(int sig, siginfo_t* siginfo, void* _uc) { // - test is in the busy-wait loop waiting for signal. // - %rcx and %r11 values in mcontext_t match kOrigRcx and kOrigR11. if (ready && - static_cast(uc->uc_mcontext.gregs[REG_RCX]) == kOrigRcx && - static_cast(uc->uc_mcontext.gregs[REG_R11]) == kOrigR11) { + static_cast(uc->uc_mcontext.gregs[REG_RCX]) == kOrigRcx && + static_cast(uc->uc_mcontext.gregs[REG_R11]) == kOrigR11) { // Modify the values %rcx and %r11 in the ucontext. These are the // values seen by the application after the signal handler returns. uc->uc_mcontext.gregs[REG_RCX] = ~kOrigRcx; @@ -69,8 +69,8 @@ TEST(SigIretTest, CheckRcxR11) { ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_VIRTUAL, itimer)); // Initialize %rcx and %r11 and spin until the signal handler returns. - uint64 rcx = kOrigRcx; - uint64 r11 = kOrigR11; + uint64_t rcx = kOrigRcx; + uint64_t r11 = kOrigR11; asm volatile( "movq %[rcx], %%rcx;" // %rcx = rcx "movq %[r11], %%r11;" // %r11 = r11 @@ -91,7 +91,7 @@ TEST(SigIretTest, CheckRcxR11) { EXPECT_EQ(r11, ~kOrigR11); } -constexpr uint64 kNonCanonicalRip = 0xCCCC000000000000; +constexpr uint64_t kNonCanonicalRip = 0xCCCC000000000000; // Test that a non-canonical signal handler faults as expected. TEST(SigIretTest, BadHandler) { diff --git a/test/syscalls/linux/socket_bind_to_device_distribution.cc b/test/syscalls/linux/socket_bind_to_device_distribution.cc index c705da1b4..5ed57625c 100644 --- a/test/syscalls/linux/socket_bind_to_device_distribution.cc +++ b/test/syscalls/linux/socket_bind_to_device_distribution.cc @@ -77,13 +77,13 @@ class BindToDeviceDistributionTest } }; -PosixErrorOr AddrPort(int family, sockaddr_storage const& addr) { +PosixErrorOr AddrPort(int family, sockaddr_storage const& addr) { switch (family) { case AF_INET: - return static_cast( + return static_cast( reinterpret_cast(&addr)->sin_port); case AF_INET6: - return static_cast( + return static_cast( reinterpret_cast(&addr)->sin6_port); default: return PosixError(EINVAL, @@ -91,7 +91,7 @@ PosixErrorOr AddrPort(int family, sockaddr_storage const& addr) { } } -PosixError SetAddrPort(int family, sockaddr_storage* addr, uint16 port) { +PosixError SetAddrPort(int family, sockaddr_storage* addr, uint16_t port) { switch (family) { case AF_INET: reinterpret_cast(addr)->sin_port = port; @@ -157,7 +157,7 @@ TEST_P(BindToDeviceDistributionTest, Tcp) { getsockname(listener_fds[0].get(), reinterpret_cast(&listen_addr), &addrlen), SyscallSucceeds()); - uint16 const port = + uint16_t const port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); } @@ -190,7 +190,7 @@ TEST_P(BindToDeviceDistributionTest, Tcp) { // cause the test to use absurd amounts of memory. // // See: https://tools.ietf.org/html/rfc2525#page-50 section 2.17 - uint16 data; + uint16_t data; EXPECT_THAT( RetryEINTR(recv)(fd.ValueOrDie().get(), &data, sizeof(data), 0), SyscallSucceedsWithValue(sizeof(data))); @@ -296,7 +296,7 @@ TEST_P(BindToDeviceDistributionTest, Udp) { getsockname(listener_fds[0].get(), reinterpret_cast(&listen_addr), &addrlen), SyscallSucceeds()); - uint16 const port = + uint16_t const port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); ASSERT_NO_ERRNO(SetAddrPort(listener.family(), &listen_addr, port)); ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); diff --git a/test/syscalls/linux/socket_generic.cc b/test/syscalls/linux/socket_generic.cc index ee9856f7f..e8f24a59e 100644 --- a/test/syscalls/linux/socket_generic.cc +++ b/test/syscalls/linux/socket_generic.cc @@ -507,7 +507,7 @@ TEST_P(AllSocketPairTest, SoRcvTimeoIsSetLargerArg) { struct timeval_with_extra { struct timeval tv; - int64 extra_data; + int64_t extra_data; } ABSL_ATTRIBUTE_PACKED; timeval_with_extra tv_extra; diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 12df2b35a..2f9821555 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -47,13 +47,13 @@ namespace { using ::testing::Gt; -PosixErrorOr AddrPort(int family, sockaddr_storage const& addr) { +PosixErrorOr AddrPort(int family, sockaddr_storage const& addr) { switch (family) { case AF_INET: - return static_cast( + return static_cast( reinterpret_cast(&addr)->sin_port); case AF_INET6: - return static_cast( + return static_cast( reinterpret_cast(&addr)->sin6_port); default: return PosixError(EINVAL, @@ -61,7 +61,7 @@ PosixErrorOr AddrPort(int family, sockaddr_storage const& addr) { } } -PosixError SetAddrPort(int family, sockaddr_storage* addr, uint16 port) { +PosixError SetAddrPort(int family, sockaddr_storage* addr, uint16_t port) { switch (family) { case AF_INET: reinterpret_cast(addr)->sin_port = port; @@ -276,7 +276,7 @@ void tcpSimpleConnectTest(TestAddress const& listener, ASSERT_THAT(getsockname(listen_fd.get(), reinterpret_cast(&listen_addr), &addrlen), SyscallSucceeds()); - uint16 const port = + uint16_t const port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); // Connect to the listening socket. @@ -339,7 +339,7 @@ TEST_P(SocketInetLoopbackTest, TCPListenClose) { ASSERT_THAT(getsockname(listen_fd.get(), reinterpret_cast(&listen_addr), &addrlen), SyscallSucceeds()); - uint16 const port = + uint16_t const port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); DisableSave ds; // Too many system calls. @@ -400,7 +400,7 @@ TEST_P(SocketInetLoopbackTest, TCPbacklog) { ASSERT_THAT(getsockname(listen_fd.get(), reinterpret_cast(&listen_addr), &addrlen), SyscallSucceeds()); - uint16 const port = + uint16_t const port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); int i = 0; while (1) { @@ -468,7 +468,7 @@ TEST_P(SocketInetLoopbackTest, TCPFinWait2Test_NoRandomSave) { reinterpret_cast(&listen_addr), &addrlen), SyscallSucceeds()); - uint16 const port = + uint16_t const port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); // Connect to the listening socket. @@ -576,7 +576,7 @@ TEST_P(SocketInetLoopbackTest, TCPLinger2TimeoutAfterClose_NoRandomSave) { reinterpret_cast(&listen_addr), &addrlen), SyscallSucceeds()); - uint16 const port = + uint16_t const port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); // Connect to the listening socket. @@ -650,7 +650,7 @@ TEST_P(SocketInetLoopbackTest, TCPResetAfterClose) { reinterpret_cast(&listen_addr), &addrlen), SyscallSucceeds()); - uint16 const port = + uint16_t const port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); // Connect to the listening socket. @@ -717,7 +717,7 @@ TEST_P(SocketInetLoopbackTest, TCPTimeWaitTest_NoRandomSave) { reinterpret_cast(&listen_addr), &addrlen), SyscallSucceeds()); - uint16 const port = + uint16_t const port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); // Connect to the listening socket. @@ -794,7 +794,7 @@ TEST_P(SocketInetLoopbackTest, AcceptedInheritsTCPUserTimeout) { reinterpret_cast(&listen_addr), &addrlen), SyscallSucceeds()); - const uint16 port = + const uint16_t port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); // Set the userTimeout on the listening socket. @@ -898,7 +898,7 @@ TEST_P(SocketInetReusePortTest, TcpPortReuseMultiThread_NoRandomSave) { getsockname(listener_fds[0].get(), reinterpret_cast(&listen_addr), &addrlen), SyscallSucceeds()); - uint16 const port = + uint16_t const port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); ASSERT_NO_ERRNO(SetAddrPort(listener.family(), &listen_addr, port)); ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); @@ -935,7 +935,7 @@ TEST_P(SocketInetReusePortTest, TcpPortReuseMultiThread_NoRandomSave) { // cause the test to use absurd amounts of memory. // // See: https://tools.ietf.org/html/rfc2525#page-50 section 2.17 - uint16 data; + uint16_t data; EXPECT_THAT( RetryEINTR(recv)(fd.ValueOrDie().get(), &data, sizeof(data), 0), SyscallSucceedsWithValue(sizeof(data))); @@ -1022,7 +1022,7 @@ TEST_P(SocketInetReusePortTest, UdpPortReuseMultiThread) { getsockname(listener_fds[0].get(), reinterpret_cast(&listen_addr), &addrlen), SyscallSucceeds()); - uint16 const port = + uint16_t const port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); ASSERT_NO_ERRNO(SetAddrPort(listener.family(), &listen_addr, port)); ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); @@ -1138,7 +1138,7 @@ TEST_P(SocketInetReusePortTest, UdpPortReuseMultiThreadShort) { getsockname(listener_fds[0].get(), reinterpret_cast(&listen_addr), &addrlen), SyscallSucceeds()); - uint16 const port = + uint16_t const port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); ASSERT_NO_ERRNO(SetAddrPort(listener.family(), &listen_addr, port)); ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); @@ -1174,7 +1174,7 @@ TEST_P(SocketInetReusePortTest, UdpPortReuseMultiThreadShort) { pollfds[i].events = POLLIN; } - std::map portToFD; + std::map portToFD; int received = 0; while (received < kConnectAttempts * 2) { @@ -1196,7 +1196,7 @@ TEST_P(SocketInetReusePortTest, UdpPortReuseMultiThreadShort) { fd, &data, sizeof(data), 0, reinterpret_cast(&addr), &addrlen), SyscallSucceedsWithValue(sizeof(data))); - uint16 const port = + uint16_t const port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(connector.family(), addr)); auto prev_port = portToFD.find(port); // Check that all packets from one client have been delivered to the @@ -1257,7 +1257,7 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V4MappedLoopbackOnlyReservesV4) { ASSERT_THAT(getsockname(fd_dual.get(), reinterpret_cast(&addr_dual), &addrlen), SyscallSucceeds()); - uint16 const port = + uint16_t const port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual)); // Verify that we can still bind the v6 loopback on the same port. @@ -1309,7 +1309,7 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V4MappedAnyOnlyReservesV4) { ASSERT_THAT(getsockname(fd_dual.get(), reinterpret_cast(&addr_dual), &addrlen), SyscallSucceeds()); - uint16 const port = + uint16_t const port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual)); // Verify that we can still bind the v6 loopback on the same port. @@ -1360,7 +1360,7 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, DualStackV6AnyReservesEverything) { ASSERT_THAT(getsockname(fd_dual.get(), reinterpret_cast(&addr_dual), &addrlen), SyscallSucceeds()); - uint16 const port = + uint16_t const port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual)); // Verify that binding the v6 loopback with the same port fails. @@ -1419,7 +1419,7 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V6OnlyV6AnyReservesV6) { ASSERT_THAT(getsockname(fd_dual.get(), reinterpret_cast(&addr_dual), &addrlen), SyscallSucceeds()); - uint16 const port = + uint16_t const port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual)); // Verify that binding the v6 loopback with the same port fails. @@ -1498,7 +1498,7 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V6EphemeralPortReserved) { reinterpret_cast(&connected_addr), &connected_addr_len), SyscallSucceeds()); - uint16 const ephemeral_port = + uint16_t const ephemeral_port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr)); // Verify that we actually got an ephemeral port. @@ -1603,7 +1603,7 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V6EphemeralPortReservedReuseAddr) { reinterpret_cast(&connected_addr), &connected_addr_len), SyscallSucceeds()); - uint16 const ephemeral_port = + uint16_t const ephemeral_port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr)); // Verify that we actually got an ephemeral port. @@ -1665,7 +1665,7 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V4MappedEphemeralPortReserved) { reinterpret_cast(&connected_addr), &connected_addr_len), SyscallSucceeds()); - uint16 const ephemeral_port = + uint16_t const ephemeral_port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr)); // Verify that we actually got an ephemeral port. @@ -1794,7 +1794,7 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, reinterpret_cast(&connected_addr), &connected_addr_len), SyscallSucceeds()); - uint16 const ephemeral_port = + uint16_t const ephemeral_port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr)); // Verify that we actually got an ephemeral port. @@ -1856,7 +1856,7 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V4EphemeralPortReserved) { reinterpret_cast(&connected_addr), &connected_addr_len), SyscallSucceeds()); - uint16 const ephemeral_port = + uint16_t const ephemeral_port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr)); // Verify that we actually got an ephemeral port. @@ -1988,7 +1988,7 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V4EphemeralPortReservedReuseAddr) { reinterpret_cast(&connected_addr), &connected_addr_len), SyscallSucceeds()); - uint16 const ephemeral_port = + uint16_t const ephemeral_port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr.family(), connected_addr)); // Verify that we actually got an ephemeral port. diff --git a/test/syscalls/linux/socket_ip_unbound.cc b/test/syscalls/linux/socket_ip_unbound.cc index 4a8337159..ca597e267 100644 --- a/test/syscalls/linux/socket_ip_unbound.cc +++ b/test/syscalls/linux/socket_ip_unbound.cc @@ -223,7 +223,7 @@ TEST_P(IPUnboundSocketTest, CheckSkipECN) { TOSOption t = GetTOSOption(GetParam().domain); EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz), SyscallSucceedsWithValue(0)); - int expect = static_cast(set); + int expect = static_cast(set); if (GetParam().protocol == IPPROTO_TCP) { expect &= ~INET_ECN_MASK; } @@ -267,7 +267,7 @@ TEST_P(IPUnboundSocketTest, SmallTOSOptionSize) { EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, i), SyscallSucceedsWithValue(0)); expect_tos = set; - expect_sz = sizeof(uint8); + expect_sz = sizeof(uint8_t); } else { EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, i), SyscallFailsWithErrno(EINVAL)); @@ -314,7 +314,7 @@ TEST_P(IPUnboundSocketTest, NegativeTOS) { SyscallSucceedsWithValue(0)); int expect; if (GetParam().domain == AF_INET) { - expect = static_cast(set); + expect = static_cast(set); if (GetParam().protocol == IPPROTO_TCP) { expect &= ~INET_ECN_MASK; } @@ -340,7 +340,7 @@ TEST_P(IPUnboundSocketTest, InvalidNegativeTOS) { if (GetParam().domain == AF_INET) { EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz), SyscallSucceedsWithValue(0)); - expect = static_cast(set); + expect = static_cast(set); if (GetParam().protocol == IPPROTO_TCP) { expect &= ~INET_ECN_MASK; } diff --git a/test/syscalls/linux/socket_netdevice.cc b/test/syscalls/linux/socket_netdevice.cc index 689014a59..405dbbd73 100644 --- a/test/syscalls/linux/socket_netdevice.cc +++ b/test/syscalls/linux/socket_netdevice.cc @@ -70,14 +70,14 @@ TEST(NetdeviceTest, Netmask) { // netmask obtained via ioctl. FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); - uint32 port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); + uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); struct request { struct nlmsghdr hdr; struct rtgenmsg rgm; }; - constexpr uint32 kSeq = 12345; + constexpr uint32_t kSeq = 12345; struct request req; req.hdr.nlmsg_len = sizeof(req); @@ -109,7 +109,7 @@ TEST(NetdeviceTest, Netmask) { struct ifaddrmsg *ifaddrmsg = reinterpret_cast(NLMSG_DATA(hdr)); - if (ifaddrmsg->ifa_index == static_cast(ifr.ifr_ifindex) && + if (ifaddrmsg->ifa_index == static_cast(ifr.ifr_ifindex) && ifaddrmsg->ifa_family == AF_INET) { prefixlen = ifaddrmsg->ifa_prefixlen; } @@ -120,7 +120,7 @@ TEST(NetdeviceTest, Netmask) { // Netmask is stored big endian in struct sockaddr_in, so we do the same for // comparison. - uint32 mask = 0xffffffff << (32 - prefixlen); + uint32_t mask = 0xffffffff << (32 - prefixlen); mask = absl::gbswap_32(mask); // Check that the loopback interface has the correct subnet mask. diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc index 5612f1a13..ef567f512 100644 --- a/test/syscalls/linux/socket_netlink_route.cc +++ b/test/syscalls/linux/socket_netlink_route.cc @@ -116,14 +116,14 @@ void CheckGetLinkResponse(const struct nlmsghdr* hdr, int seq, int port) { TEST(NetlinkRouteTest, GetLinkDump) { FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); - uint32 port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); + uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); struct request { struct nlmsghdr hdr; struct ifinfomsg ifm; }; - constexpr uint32 kSeq = 12345; + constexpr uint32_t kSeq = 12345; struct request req = {}; req.hdr.nlmsg_len = sizeof(req); @@ -164,7 +164,7 @@ TEST(NetlinkRouteTest, MsgHdrMsgUnsuppType) { struct ifinfomsg ifm; }; - constexpr uint32 kSeq = 12345; + constexpr uint32_t kSeq = 12345; struct request req = {}; req.hdr.nlmsg_len = sizeof(req); @@ -198,7 +198,7 @@ TEST(NetlinkRouteTest, MsgHdrMsgTrunc) { struct ifinfomsg ifm; }; - constexpr uint32 kSeq = 12345; + constexpr uint32_t kSeq = 12345; struct request req = {}; req.hdr.nlmsg_len = sizeof(req); @@ -238,7 +238,7 @@ TEST(NetlinkRouteTest, MsgTruncMsgHdrMsgTrunc) { struct ifinfomsg ifm; }; - constexpr uint32 kSeq = 12345; + constexpr uint32_t kSeq = 12345; struct request req = {}; req.hdr.nlmsg_len = sizeof(req); @@ -274,7 +274,7 @@ TEST(NetlinkRouteTest, MsgTruncMsgHdrMsgTrunc) { TEST(NetlinkRouteTest, ControlMessageIgnored) { FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); - uint32 port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); + uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); struct request { struct nlmsghdr control_hdr; @@ -282,7 +282,7 @@ TEST(NetlinkRouteTest, ControlMessageIgnored) { struct ifinfomsg ifm; }; - constexpr uint32 kSeq = 12345; + constexpr uint32_t kSeq = 12345; struct request req = {}; @@ -310,14 +310,14 @@ TEST(NetlinkRouteTest, ControlMessageIgnored) { TEST(NetlinkRouteTest, GetAddrDump) { FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); - uint32 port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); + uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); struct request { struct nlmsghdr hdr; struct rtgenmsg rgm; }; - constexpr uint32 kSeq = 12345; + constexpr uint32_t kSeq = 12345; struct request req; req.hdr.nlmsg_len = sizeof(req); @@ -371,14 +371,14 @@ TEST(NetlinkRouteTest, LookupAll) { TEST(NetlinkRouteTest, GetRouteDump) { FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); - uint32 port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); + uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); struct request { struct nlmsghdr hdr; struct rtmsg rtm; }; - constexpr uint32 kSeq = 12345; + constexpr uint32_t kSeq = 12345; struct request req = {}; req.hdr.nlmsg_len = sizeof(req); @@ -454,7 +454,7 @@ TEST(NetlinkRouteTest, RecvmsgTrunc) { struct rtgenmsg rgm; }; - constexpr uint32 kSeq = 12345; + constexpr uint32_t kSeq = 12345; struct request req; req.hdr.nlmsg_len = sizeof(req); @@ -531,7 +531,7 @@ TEST(NetlinkRouteTest, RecvmsgTruncPeek) { struct rtgenmsg rgm; }; - constexpr uint32 kSeq = 12345; + constexpr uint32_t kSeq = 12345; struct request req; req.hdr.nlmsg_len = sizeof(req); @@ -611,7 +611,7 @@ TEST(NetlinkRouteTest, NoPasscredNoCreds) { struct rtgenmsg rgm; }; - constexpr uint32 kSeq = 12345; + constexpr uint32_t kSeq = 12345; struct request req; req.hdr.nlmsg_len = sizeof(req); @@ -659,7 +659,7 @@ TEST(NetlinkRouteTest, PasscredCreds) { struct rtgenmsg rgm; }; - constexpr uint32 kSeq = 12345; + constexpr uint32_t kSeq = 12345; struct request req; req.hdr.nlmsg_len = sizeof(req); diff --git a/test/syscalls/linux/socket_netlink_util.cc b/test/syscalls/linux/socket_netlink_util.cc index 17f99c238..723f5d728 100644 --- a/test/syscalls/linux/socket_netlink_util.cc +++ b/test/syscalls/linux/socket_netlink_util.cc @@ -40,7 +40,7 @@ PosixErrorOr NetlinkBoundSocket(int protocol) { return std::move(fd); } -PosixErrorOr NetlinkPortID(int fd) { +PosixErrorOr NetlinkPortID(int fd) { struct sockaddr_nl addr; socklen_t addrlen = sizeof(addr); @@ -48,7 +48,7 @@ PosixErrorOr NetlinkPortID(int fd) { getsockname(fd, reinterpret_cast(&addr), &addrlen)); MaybeSave(); - return static_cast(addr.nl_pid); + return static_cast(addr.nl_pid); } PosixError NetlinkRequestResponse( diff --git a/test/syscalls/linux/socket_netlink_util.h b/test/syscalls/linux/socket_netlink_util.h index bd0c1d79b..76e772c48 100644 --- a/test/syscalls/linux/socket_netlink_util.h +++ b/test/syscalls/linux/socket_netlink_util.h @@ -30,7 +30,7 @@ namespace testing { PosixErrorOr NetlinkBoundSocket(int protocol); // Returns the port ID of the passed socket. -PosixErrorOr NetlinkPortID(int fd); +PosixErrorOr NetlinkPortID(int fd); // Send the passed request and call fn will all response netlink messages. PosixError NetlinkRequestResponse( diff --git a/test/syscalls/linux/socket_test_util.cc b/test/syscalls/linux/socket_test_util.cc index 2169ff1c6..eff7d577e 100644 --- a/test/syscalls/linux/socket_test_util.cc +++ b/test/syscalls/linux/socket_test_util.cc @@ -507,7 +507,7 @@ void TransferTest(int fd1, int fd2) { // Initializes the given buffer with random data. void RandomizeBuffer(char* ptr, size_t len) { - uint32 seed = time(nullptr); + uint32_t seed = time(nullptr); for (size_t i = 0; i < len; ++i) { ptr[i] = static_cast(rand_r(&seed)); } diff --git a/test/syscalls/linux/splice.cc b/test/syscalls/linux/splice.cc index 562b6a8d4..85232cb1f 100644 --- a/test/syscalls/linux/splice.cc +++ b/test/syscalls/linux/splice.cc @@ -139,7 +139,7 @@ TEST(SpliceTest, PipeOffsets) { // Event FDs may be used with splice without an offset. TEST(SpliceTest, FromEventFD) { // Open the input eventfd with an initial value so that it is readable. - constexpr uint64 kEventFDValue = 1; + constexpr uint64_t kEventFDValue = 1; int efd; ASSERT_THAT(efd = eventfd(kEventFDValue, 0), SyscallSucceeds()); const FileDescriptor in_fd(efd); diff --git a/test/syscalls/linux/stat.cc b/test/syscalls/linux/stat.cc index 6b259cb89..30de2f8ff 100644 --- a/test/syscalls/linux/stat.cc +++ b/test/syscalls/linux/stat.cc @@ -568,35 +568,35 @@ TEST(SimpleStatTest, AnonDeviceAllocatesUniqueInodesAcrossSaveRestore) { // struct kernel_statx_timestamp is a Linux statx_timestamp struct. struct kernel_statx_timestamp { - int64 tv_sec; - uint32 tv_nsec; - int32 __reserved; + int64_t tv_sec; + uint32_t tv_nsec; + int32_t __reserved; }; // struct kernel_statx is a Linux statx struct. Old versions of glibc do not // expose it. See include/uapi/linux/stat.h struct kernel_statx { - uint32 stx_mask; - uint32 stx_blksize; - uint64 stx_attributes; - uint32 stx_nlink; - uint32 stx_uid; - uint32 stx_gid; - uint16 stx_mode; - uint16 __spare0[1]; - uint64 stx_ino; - uint64 stx_size; - uint64 stx_blocks; - uint64 stx_attributes_mask; + uint32_t stx_mask; + uint32_t stx_blksize; + uint64_t stx_attributes; + uint32_t stx_nlink; + uint32_t stx_uid; + uint32_t stx_gid; + uint16_t stx_mode; + uint16_t __spare0[1]; + uint64_t stx_ino; + uint64_t stx_size; + uint64_t stx_blocks; + uint64_t stx_attributes_mask; struct kernel_statx_timestamp stx_atime; struct kernel_statx_timestamp stx_btime; struct kernel_statx_timestamp stx_ctime; struct kernel_statx_timestamp stx_mtime; - uint32 stx_rdev_major; - uint32 stx_rdev_minor; - uint32 stx_dev_major; - uint32 stx_dev_minor; - uint64 __spare2[14]; + uint32_t stx_rdev_major; + uint32_t stx_rdev_minor; + uint32_t stx_dev_major; + uint32_t stx_dev_minor; + uint64_t __spare2[14]; }; int statx(int dirfd, const char *pathname, int flags, unsigned int mask, diff --git a/test/syscalls/linux/sticky.cc b/test/syscalls/linux/sticky.cc index abcabaffb..7e73325bf 100644 --- a/test/syscalls/linux/sticky.cc +++ b/test/syscalls/linux/sticky.cc @@ -29,8 +29,8 @@ #include "test/util/test_util.h" #include "test/util/thread_util.h" -ABSL_FLAG(int32, scratch_uid, 65534, "first scratch UID"); -ABSL_FLAG(int32, scratch_gid, 65534, "first scratch GID"); +ABSL_FLAG(int32_t, scratch_uid, 65534, "first scratch UID"); +ABSL_FLAG(int32_t, scratch_gid, 65534, "first scratch GID"); namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/sysret.cc b/test/syscalls/linux/sysret.cc index d98d6be91..819fa655a 100644 --- a/test/syscalls/linux/sysret.cc +++ b/test/syscalls/linux/sysret.cc @@ -26,8 +26,8 @@ namespace testing { namespace { -constexpr uint64 kNonCanonicalRip = 0xCCCC000000000000; -constexpr uint64 kNonCanonicalRsp = 0xFFFF000000000000; +constexpr uint64_t kNonCanonicalRip = 0xCCCC000000000000; +constexpr uint64_t kNonCanonicalRsp = 0xFFFF000000000000; class SysretTest : public ::testing::Test { protected: @@ -60,12 +60,12 @@ class SysretTest : public ::testing::Test { ASSERT_THAT(ptrace(PTRACE_DETACH, child_, 0, 0), SyscallSucceeds()); } - void SetRip(uint64 newrip) { + void SetRip(uint64_t newrip) { regs_.rip = newrip; ASSERT_THAT(ptrace(PTRACE_SETREGS, child_, 0, ®s_), SyscallSucceeds()); } - void SetRsp(uint64 newrsp) { + void SetRsp(uint64_t newrsp) { regs_.rsp = newrsp; ASSERT_THAT(ptrace(PTRACE_SETREGS, child_, 0, ®s_), SyscallSucceeds()); } diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc index cb304d6f5..33a5ac66c 100644 --- a/test/syscalls/linux/tcp_socket.cc +++ b/test/syscalls/linux/tcp_socket.cc @@ -640,7 +640,7 @@ TEST_P(TcpSocketTest, Tiocinq) { size_t size = sizeof(buf); ASSERT_THAT(RetryEINTR(write)(s_, buf, size), SyscallSucceedsWithValue(size)); - uint32 seed = time(nullptr); + uint32_t seed = time(nullptr); const size_t max_chunk = size / 10; while (size > 0) { size_t chunk = (rand_r(&seed) % max_chunk) + 1; diff --git a/test/syscalls/linux/time.cc b/test/syscalls/linux/time.cc index 03e028f50..c7eead17e 100644 --- a/test/syscalls/linux/time.cc +++ b/test/syscalls/linux/time.cc @@ -28,7 +28,7 @@ constexpr long kFudgeSeconds = 5; // Mimics the time(2) wrapper from glibc prior to 2.15. time_t vsyscall_time(time_t* t) { - constexpr uint64 kVsyscallTimeEntry = 0xffffffffff600400; + constexpr uint64_t kVsyscallTimeEntry = 0xffffffffff600400; return reinterpret_cast(kVsyscallTimeEntry)(t); } @@ -63,7 +63,7 @@ TEST(TimeTest, VsyscallTime_InvalidAddressSIGSEGV) { } int vsyscall_gettimeofday(struct timeval* tv, struct timezone* tz) { - constexpr uint64 kVsyscallGettimeofdayEntry = 0xffffffffff600000; + constexpr uint64_t kVsyscallGettimeofdayEntry = 0xffffffffff600000; return reinterpret_cast( kVsyscallGettimeofdayEntry)(tv, tz); } diff --git a/test/syscalls/linux/timerfd.cc b/test/syscalls/linux/timerfd.cc index d87dbc666..86ed87b7c 100644 --- a/test/syscalls/linux/timerfd.cc +++ b/test/syscalls/linux/timerfd.cc @@ -69,9 +69,9 @@ TEST_P(TimerfdTest, SingleShot) { // The timer should fire exactly once since the interval is zero. absl::SleepFor(kDelay + TimerSlack()); - uint64 val = 0; - ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64)), - SyscallSucceedsWithValue(sizeof(uint64))); + uint64_t val = 0; + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallSucceedsWithValue(sizeof(uint64_t))); EXPECT_EQ(1, val); } @@ -89,9 +89,9 @@ TEST_P(TimerfdTest, Periodic) { // Expect to see at least kPeriods expirations. More may occur due to the // timer slack, or due to delays from scheduling or save/restore. absl::SleepFor(kPeriods * kDelay + TimerSlack()); - uint64 val = 0; - ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64)), - SyscallSucceedsWithValue(sizeof(uint64))); + uint64_t val = 0; + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallSucceedsWithValue(sizeof(uint64_t))); EXPECT_GE(val, kPeriods); } @@ -106,9 +106,9 @@ TEST_P(TimerfdTest, BlockingRead) { SyscallSucceeds()); // read should block until the timer fires. - uint64 val = 0; - ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64)), - SyscallSucceedsWithValue(sizeof(uint64))); + uint64_t val = 0; + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallSucceedsWithValue(sizeof(uint64_t))); auto const end_time = absl::Now(); EXPECT_EQ(1, val); EXPECT_GE((end_time - start_time) + TimerSlack(), kDelay); @@ -122,8 +122,8 @@ TEST_P(TimerfdTest, NonblockingRead_NoRandomSave) { // Since the timer is initially disabled and has never fired, read should // return EAGAIN. - uint64 val = 0; - ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64)), + uint64_t val = 0; + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), SyscallFailsWithErrno(EAGAIN)); DisableSave ds; // Timing-sensitive. @@ -135,19 +135,19 @@ TEST_P(TimerfdTest, NonblockingRead_NoRandomSave) { SyscallSucceeds()); // Since the timer has not yet fired, read should return EAGAIN. - ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64)), + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), SyscallFailsWithErrno(EAGAIN)); ds.reset(); // No longer timing-sensitive. // After the timer fires, read should indicate 1 expiration. absl::SleepFor(kDelay + TimerSlack()); - ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64)), - SyscallSucceedsWithValue(sizeof(uint64))); + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallSucceedsWithValue(sizeof(uint64_t))); EXPECT_EQ(1, val); // The successful read should have reset the number of expirations. - ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64)), + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), SyscallFailsWithErrno(EAGAIN)); } @@ -179,8 +179,8 @@ TEST_P(TimerfdTest, BlockingPoll_SetTimeResetsExpirations) { its.it_value.tv_sec = 0; ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr), SyscallSucceeds()); - uint64 val = 0; - ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64)), + uint64_t val = 0; + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), SyscallFailsWithErrno(EAGAIN)); } @@ -198,16 +198,16 @@ TEST_P(TimerfdTest, SetAbsoluteTime) { SyscallSucceeds()); absl::SleepFor(kDelay + TimerSlack()); - uint64 val = 0; - ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64)), - SyscallSucceedsWithValue(sizeof(uint64))); + uint64_t val = 0; + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallSucceedsWithValue(sizeof(uint64_t))); EXPECT_EQ(1, val); } TEST_P(TimerfdTest, IllegalReadWrite) { auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), TFD_NONBLOCK)); - uint64 val = 0; + uint64_t val = 0; EXPECT_THAT(PreadFd(tfd.get(), &val, sizeof(val), 0), SyscallFailsWithErrno(ESPIPE)); EXPECT_THAT(WriteFd(tfd.get(), &val, sizeof(val)), @@ -244,9 +244,9 @@ TEST(TimerfdClockRealtimeTest, ClockRealtime) { ASSERT_THAT(timerfd_settime(tfd.get(), /* flags = */ 0, &its, nullptr), SyscallSucceeds()); - uint64 val = 0; - ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64)), - SyscallSucceedsWithValue(sizeof(uint64))); + uint64_t val = 0; + ASSERT_THAT(ReadFd(tfd.get(), &val, sizeof(uint64_t)), + SyscallSucceedsWithValue(sizeof(uint64_t))); EXPECT_EQ(1, val); } diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc index af94d7baa..a2f6ef8cc 100644 --- a/test/syscalls/linux/udp_socket_test_cases.cc +++ b/test/syscalls/linux/udp_socket_test_cases.cc @@ -34,7 +34,7 @@ namespace gvisor { namespace testing { // Gets a pointer to the port component of the given address. -uint16* Port(struct sockaddr_storage* addr) { +uint16_t* Port(struct sockaddr_storage* addr) { switch (addr->ss_family) { case AF_INET: { auto sin = reinterpret_cast(addr); @@ -331,7 +331,7 @@ TEST_P(UdpSocketTest, Connect) { EXPECT_EQ(memcmp(&peer, addr_[2], addrlen_), 0); } -void ConnectAny(AddressFamily family, int sockfd, uint16 port) { +void ConnectAny(AddressFamily family, int sockfd, uint16_t port) { struct sockaddr_storage addr = {}; // Precondition check. @@ -1398,7 +1398,7 @@ TEST_P(UdpSocketTest, SetAndReceiveTOS) { received_iov.iov_len = kDataLength; received_msg.msg_iov = &received_iov; received_msg.msg_iovlen = 1; - size_t cmsg_data_len = sizeof(int8); + size_t cmsg_data_len = sizeof(int8_t); if (sent_type == IPV6_TCLASS) { cmsg_data_len = sizeof(int); } @@ -1413,7 +1413,7 @@ TEST_P(UdpSocketTest, SetAndReceiveTOS) { EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(cmsg_data_len)); EXPECT_EQ(cmsg->cmsg_level, sent_level); EXPECT_EQ(cmsg->cmsg_type, sent_type); - int8 received_tos = 0; + int8_t received_tos = 0; memcpy(&received_tos, CMSG_DATA(cmsg), sizeof(received_tos)); EXPECT_EQ(received_tos, sent_tos); } @@ -1453,7 +1453,7 @@ TEST_P(UdpSocketTest, SendAndReceiveTOS) { sent_iov.iov_len = kDataLength; sent_msg.msg_iov = &sent_iov; sent_msg.msg_iovlen = 1; - size_t cmsg_data_len = sizeof(int8); + size_t cmsg_data_len = sizeof(int8_t); if (sent_level == SOL_IPV6) { sent_type = IPV6_TCLASS; cmsg_data_len = sizeof(int); @@ -1467,7 +1467,7 @@ TEST_P(UdpSocketTest, SendAndReceiveTOS) { sent_cmsg->cmsg_len = CMSG_LEN(cmsg_data_len); sent_cmsg->cmsg_level = sent_level; sent_cmsg->cmsg_type = sent_type; - *(int8*)CMSG_DATA(sent_cmsg) = sent_tos; + *(int8_t*)CMSG_DATA(sent_cmsg) = sent_tos; ASSERT_THAT(RetryEINTR(sendmsg)(t_, &sent_msg, 0), SyscallSucceedsWithValue(kDataLength)); @@ -1491,7 +1491,7 @@ TEST_P(UdpSocketTest, SendAndReceiveTOS) { EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(cmsg_data_len)); EXPECT_EQ(cmsg->cmsg_level, sent_level); EXPECT_EQ(cmsg->cmsg_type, sent_type); - int8 received_tos = 0; + int8_t received_tos = 0; memcpy(&received_tos, CMSG_DATA(cmsg), sizeof(received_tos)); EXPECT_EQ(received_tos, sent_tos); } diff --git a/test/syscalls/linux/uidgid.cc b/test/syscalls/linux/uidgid.cc index e0e39e5e3..6218fbce1 100644 --- a/test/syscalls/linux/uidgid.cc +++ b/test/syscalls/linux/uidgid.cc @@ -27,10 +27,10 @@ #include "test/util/thread_util.h" #include "test/util/uid_util.h" -ABSL_FLAG(int32, scratch_uid1, 65534, "first scratch UID"); -ABSL_FLAG(int32, scratch_uid2, 65533, "second scratch UID"); -ABSL_FLAG(int32, scratch_gid1, 65534, "first scratch GID"); -ABSL_FLAG(int32, scratch_gid2, 65533, "second scratch GID"); +ABSL_FLAG(int32_t, scratch_uid1, 65534, "first scratch UID"); +ABSL_FLAG(int32_t, scratch_uid2, 65533, "second scratch UID"); +ABSL_FLAG(int32_t, scratch_gid1, 65534, "first scratch GID"); +ABSL_FLAG(int32_t, scratch_gid2, 65533, "second scratch GID"); using ::testing::UnorderedElementsAreArray; diff --git a/test/syscalls/linux/utimes.cc b/test/syscalls/linux/utimes.cc index e7bae9c07..3a927a430 100644 --- a/test/syscalls/linux/utimes.cc +++ b/test/syscalls/linux/utimes.cc @@ -163,12 +163,12 @@ TEST(FutimesatTest, OnRelPath) { TEST(FutimesatTest, InvalidNsec) { auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); struct timeval times[4][2] = {{ - {0, 1}, // Valid - {1, static_cast(1e7)} // Invalid + {0, 1}, // Valid + {1, static_cast(1e7)} // Invalid }, { - {1, static_cast(1e7)}, // Invalid - {0, 1} // Valid + {1, static_cast(1e7)}, // Invalid + {0, 1} // Valid }, { {0, 1}, // Valid @@ -288,14 +288,15 @@ TEST(UtimeTest, ZeroAtimeandMtime) { TEST(UtimensatTest, InvalidNsec) { auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); - struct timespec times[2][2] = {{ - {0, UTIME_OMIT}, // Valid - {2, static_cast(1e10)} // Invalid - }, - { - {2, static_cast(1e10)}, // Invalid - {0, UTIME_OMIT} // Valid - }}; + struct timespec times[2][2] = { + { + {0, UTIME_OMIT}, // Valid + {2, static_cast(1e10)} // Invalid + }, + { + {2, static_cast(1e10)}, // Invalid + {0, UTIME_OMIT} // Valid + }}; for (unsigned int i = 0; i < sizeof(times) / sizeof(times[0]); i++) { std::cout << "test:" << i << "\n"; diff --git a/test/syscalls/linux/vfork.cc b/test/syscalls/linux/vfork.cc index 153b3bd69..0aaba482d 100644 --- a/test/syscalls/linux/vfork.cc +++ b/test/syscalls/linux/vfork.cc @@ -51,7 +51,7 @@ constexpr absl::Duration kChildDelay = absl::Seconds(10); // errno, so kChildExitCode is chosen to be an unlikely errno: constexpr int kChildExitCode = 118; // ENOTNAM: Not a XENIX named type file -int64 MonotonicNow() { +int64_t MonotonicNow() { struct timespec now; TEST_PCHECK(clock_gettime(CLOCK_MONOTONIC, &now) == 0); return now.tv_sec * 1000000000ll + now.tv_nsec; @@ -62,7 +62,7 @@ TEST(VforkTest, ParentStopsUntilChildExits) { // N.B. Run the test in a single-threaded subprocess because // vfork is not safe in a multi-threaded process. - const int64 start = MonotonicNow(); + const int64_t start = MonotonicNow(); pid_t pid = vfork(); if (pid == 0) { @@ -72,7 +72,7 @@ TEST(VforkTest, ParentStopsUntilChildExits) { TEST_PCHECK_MSG(pid > 0, "vfork failed"); MaybeSave(); - const int64 end = MonotonicNow(); + const int64_t end = MonotonicNow(); absl::Duration dur = absl::Nanoseconds(end - start); @@ -92,7 +92,7 @@ TEST(VforkTest, ParentStopsUntilChildExecves_NoRandomSave) { char* const* const child_argv = owned_child_argv.get(); const auto test = [&] { - const int64 start = MonotonicNow(); + const int64_t start = MonotonicNow(); pid_t pid = vfork(); if (pid == 0) { @@ -104,7 +104,7 @@ TEST(VforkTest, ParentStopsUntilChildExecves_NoRandomSave) { // since the test expects an upper bound on the time spent // stopped. int saved_errno = errno; - const int64 end = MonotonicNow(); + const int64_t end = MonotonicNow(); errno = saved_errno; TEST_PCHECK_MSG(pid > 0, "vfork failed"); MaybeSave(); @@ -143,7 +143,7 @@ TEST(VforkTest, ExecedChildExitDoesntUnstopParent_NoRandomSave) { // pid1 exec'd and is now sleeping. SleepSafe(kChildDelay / 2); - const int64 start = MonotonicNow(); + const int64_t start = MonotonicNow(); pid_t pid2 = vfork(); if (pid2 == 0) { @@ -153,7 +153,7 @@ TEST(VforkTest, ExecedChildExitDoesntUnstopParent_NoRandomSave) { TEST_PCHECK_MSG(pid2 > 0, "vfork failed"); MaybeSave(); - const int64 end = MonotonicNow(); + const int64_t end = MonotonicNow(); absl::Duration dur = absl::Nanoseconds(end - start); diff --git a/test/syscalls/linux/vsyscall.cc b/test/syscalls/linux/vsyscall.cc index 99e8c6cea..2c2303358 100644 --- a/test/syscalls/linux/vsyscall.cc +++ b/test/syscalls/linux/vsyscall.cc @@ -25,7 +25,7 @@ namespace testing { namespace { time_t vsyscall_time(time_t* t) { - constexpr uint64 kVsyscallTimeEntry = 0xffffffffff600400; + constexpr uint64_t kVsyscallTimeEntry = 0xffffffffff600400; return reinterpret_cast(kVsyscallTimeEntry)(t); } diff --git a/test/syscalls/linux/wait.cc b/test/syscalls/linux/wait.cc index 709b87a21..944149d5e 100644 --- a/test/syscalls/linux/wait.cc +++ b/test/syscalls/linux/wait.cc @@ -64,7 +64,7 @@ static const size_t kStackSize = 2 * kPageSize; // The child thread created in CloneAndExit runs this function. // This child does not have the TLS setup, so it must not use glibc functions. int CloneChild(void* priv) { - int64 sleep = reinterpret_cast(priv); + int64_t sleep = reinterpret_cast(priv); SleepSafe(absl::Seconds(sleep)); // glibc's _exit(2) function wrapper will helpfully call exit_group(2), @@ -75,7 +75,7 @@ int CloneChild(void* priv) { // ForkAndExit forks a child process which exits with exit_code, after // sleeping for the specified duration (seconds). -pid_t ForkAndExit(int exit_code, int64 sleep) { +pid_t ForkAndExit(int exit_code, int64_t sleep) { pid_t child = fork(); if (child == 0) { SleepSafe(absl::Seconds(sleep)); @@ -84,16 +84,16 @@ pid_t ForkAndExit(int exit_code, int64 sleep) { return child; } -int64 clock_gettime_nsecs(clockid_t id) { +int64_t clock_gettime_nsecs(clockid_t id) { struct timespec ts; TEST_PCHECK(clock_gettime(id, &ts) == 0); return (ts.tv_sec * 1000000000 + ts.tv_nsec); } -void spin(int64 sec) { - int64 ns = sec * 1000000000; - int64 start = clock_gettime_nsecs(CLOCK_THREAD_CPUTIME_ID); - int64 end = start + ns; +void spin(int64_t sec) { + int64_t ns = sec * 1000000000; + int64_t start = clock_gettime_nsecs(CLOCK_THREAD_CPUTIME_ID); + int64_t end = start + ns; do { constexpr int kLoopCount = 1000000; // large and arbitrary @@ -105,7 +105,7 @@ void spin(int64 sec) { // ForkSpinAndExit forks a child process which exits with exit_code, after // spinning for the specified duration (seconds). -pid_t ForkSpinAndExit(int exit_code, int64 spintime) { +pid_t ForkSpinAndExit(int exit_code, int64_t spintime) { pid_t child = fork(); if (child == 0) { spin(spintime); @@ -141,7 +141,7 @@ int FreeStack(uintptr_t addr) { // CloneAndExit clones a child thread, which exits with 0 after sleeping for // the specified duration (must be in seconds). extra_flags are ORed against // the standard clone(2) flags. -int CloneAndExit(int64 sleep, uintptr_t stack, int extra_flags) { +int CloneAndExit(int64_t sleep, uintptr_t stack, int extra_flags) { return clone(CloneChild, reinterpret_cast(stack), CLONE_FILES | CLONE_FS | CLONE_SIGHAND | CLONE_VM | extra_flags, reinterpret_cast(sleep)); diff --git a/test/util/mount_util.h b/test/util/mount_util.h index 51119f22f..23eea51a2 100644 --- a/test/util/mount_util.h +++ b/test/util/mount_util.h @@ -33,9 +33,9 @@ namespace testing { // destroyed. inline PosixErrorOr Mount(const std::string &source, const std::string &target, - const std::string &fstype, uint64 mountflags, - const std::string &data, - uint64 umountflags) { + const std::string &fstype, + uint64_t mountflags, const std::string &data, + uint64_t umountflags) { if (mount(source.c_str(), target.c_str(), fstype.c_str(), mountflags, data.c_str()) == -1) { return PosixError(errno, "mount failed"); diff --git a/test/util/multiprocess_util.cc b/test/util/multiprocess_util.cc index ba601f300..8b676751b 100644 --- a/test/util/multiprocess_util.cc +++ b/test/util/multiprocess_util.cc @@ -135,7 +135,7 @@ PosixErrorOr ForkAndExec(const std::string& filename, return ForkAndExecHelper(exec_fn, fn, child, execve_errno); } -PosixErrorOr ForkAndExecveat(const int32 dirfd, +PosixErrorOr ForkAndExecveat(const int32_t dirfd, const std::string& pathname, const ExecveArray& argv, const ExecveArray& envv, const int flags, diff --git a/test/util/multiprocess_util.h b/test/util/multiprocess_util.h index 342e73a52..3e736261b 100644 --- a/test/util/multiprocess_util.h +++ b/test/util/multiprocess_util.h @@ -103,13 +103,14 @@ inline PosixErrorOr ForkAndExec(const std::string& filename, } // Equivalent to ForkAndExec, except using dirfd and flags with execveat. -PosixErrorOr ForkAndExecveat(int32 dirfd, const std::string& pathname, +PosixErrorOr ForkAndExecveat(int32_t dirfd, + const std::string& pathname, const ExecveArray& argv, const ExecveArray& envv, int flags, const std::function& fn, pid_t* child, int* execve_errno); -inline PosixErrorOr ForkAndExecveat(int32 dirfd, +inline PosixErrorOr ForkAndExecveat(int32_t dirfd, const std::string& pathname, const ExecveArray& argv, const ExecveArray& envv, int flags, diff --git a/test/util/proc_util.cc b/test/util/proc_util.cc index c81f363ef..34d636ba9 100644 --- a/test/util/proc_util.cc +++ b/test/util/proc_util.cc @@ -72,7 +72,7 @@ PosixErrorOr ParseProcMapsLine(absl::string_view line) { ASSIGN_OR_RETURN_ERRNO(map_entry.major, AtoiBase(device[0], 16)); ASSIGN_OR_RETURN_ERRNO(map_entry.minor, AtoiBase(device[1], 16)); - ASSIGN_OR_RETURN_ERRNO(map_entry.inode, Atoi(parts[4])); + ASSIGN_OR_RETURN_ERRNO(map_entry.inode, Atoi(parts[4])); if (parts.size() == 6) { // A filename is present. However, absl::StrSplit retained the whitespace // between the inode number and the filename. diff --git a/test/util/temp_path.cc b/test/util/temp_path.cc index f5096dd53..35aacb172 100644 --- a/test/util/temp_path.cc +++ b/test/util/temp_path.cc @@ -32,7 +32,7 @@ namespace testing { namespace { -std::atomic global_temp_file_number = ATOMIC_VAR_INIT(1); +std::atomic global_temp_file_number = ATOMIC_VAR_INIT(1); // Return a new temp filename, intended to be unique system-wide. // diff --git a/test/util/test_util.cc b/test/util/test_util.cc index 51f4b4539..848504c88 100644 --- a/test/util/test_util.cc +++ b/test/util/test_util.cc @@ -79,7 +79,7 @@ bool IsRunningWithHostinet() { #endif // defined(__x86_64__) CPUVendor GetCPUVendor() { - uint32 eax, ebx, ecx, edx; + uint32_t eax, ebx, ecx, edx; std::string vendor_str; // Get vendor string (issue CPUID with eax = 0) GETCPUID(eax, ebx, ecx, edx, 0, 0); @@ -179,36 +179,36 @@ PosixErrorOr> GetOpenFDs() { return ret_fds; } -PosixErrorOr Links(const std::string& path) { +PosixErrorOr Links(const std::string& path) { struct stat st; if (stat(path.c_str(), &st)) { return PosixError(errno, absl::StrCat("Failed to stat ", path)); } - return static_cast(st.st_nlink); + return static_cast(st.st_nlink); } void RandomizeBuffer(void* buffer, size_t len) { struct timespec ts = {}; clock_gettime(CLOCK_MONOTONIC, &ts); - uint32 seed = static_cast(ts.tv_nsec); + uint32_t seed = static_cast(ts.tv_nsec); char* const buf = static_cast(buffer); for (size_t i = 0; i < len; i++) { buf[i] = rand_r(&seed) % 255; } } -std::vector> GenerateIovecs(uint64 total_size, +std::vector> GenerateIovecs(uint64_t total_size, void* buf, size_t buflen) { std::vector> result; - for (uint64 offset = 0; offset < total_size;) { + for (uint64_t offset = 0; offset < total_size;) { auto& iovec_array = *result.emplace(result.end()); for (; offset < total_size && iovec_array.size() < IOV_MAX; offset += buflen) { struct iovec iov = {}; iov.iov_base = buf; - iov.iov_len = std::min(total_size - offset, buflen); + iov.iov_len = std::min(total_size - offset, buflen); iovec_array.push_back(iov); } } @@ -216,15 +216,15 @@ std::vector> GenerateIovecs(uint64 total_size, return result; } -uint64 Megabytes(uint64 n) { +uint64_t Megabytes(uint64_t n) { // Overflow check, upper 20 bits in n shouldn't be set. TEST_CHECK(!(0xfffff00000000000 & n)); return n << 20; } -bool Equivalent(uint64 current, uint64 target, double tolerance) { +bool Equivalent(uint64_t current, uint64_t target, double tolerance) { auto abs_diff = target > current ? target - current : current - target; - return abs_diff <= static_cast(tolerance * target); + return abs_diff <= static_cast(tolerance * target); } } // namespace testing diff --git a/test/util/test_util.h b/test/util/test_util.h index 6eb46ac76..b3235c7e3 100644 --- a/test/util/test_util.h +++ b/test/util/test_util.h @@ -264,7 +264,7 @@ std::ostream& operator<<(std::ostream& out, OpenFd const& ofd); PosixErrorOr> GetOpenFDs(); // Returns the number of hard links to a path. -PosixErrorOr Links(const std::string& path); +PosixErrorOr Links(const std::string& path); namespace internal { @@ -706,7 +706,7 @@ inline PosixErrorOr Atoi(absl::string_view str) { return ret; } -inline PosixErrorOr AtoiBase(absl::string_view str, int base) { +inline PosixErrorOr AtoiBase(absl::string_view str, int base) { if (base > 255 || base < 2) { return PosixError(EINVAL, "Invalid Base"); } @@ -737,16 +737,16 @@ inline PosixErrorOr Atof(absl::string_view str) { // Return the smallest number of iovec arrays that can be used to write // "total_bytes" number of bytes, each iovec writing one "buf". -std::vector> GenerateIovecs(uint64 total_size, +std::vector> GenerateIovecs(uint64_t total_size, void* buf, size_t buflen); // Returns bytes in 'n' megabytes. Used for readability. -uint64 Megabytes(uint64 n); +uint64_t Megabytes(uint64_t n); // Predicate for checking that a value is within some tolerance of another // value. Returns true iff current is in the range [target * (1 - tolerance), // target * (1 + tolerance)]. -bool Equivalent(uint64 current, uint64 target, double tolerance); +bool Equivalent(uint64_t current, uint64_t target, double tolerance); // Matcher wrapping the Equivalent predicate. MATCHER_P2(EquivalentWithin, target, tolerance, @@ -756,7 +756,7 @@ MATCHER_P2(EquivalentWithin, target, tolerance, if (target == 0) { *result_listener << ::absl::StreamFormat("difference of infinity%%"); } else { - int64 delta = static_cast(arg) - static_cast(target); + int64_t delta = static_cast(arg) - static_cast(target); double delta_percent = static_cast(delta) / static_cast(target) * 100; *result_listener << ::absl::StreamFormat("difference of %.2f%%", diff --git a/test/util/test_util_test.cc b/test/util/test_util_test.cc index 024304535..f42100374 100644 --- a/test/util/test_util_test.cc +++ b/test/util/test_util_test.cc @@ -171,7 +171,7 @@ MATCHER_P(IovecsListEq, expected, "") { return false; } - for (uint64 i = 0; i < expected.size(); ++i) { + for (uint64_t i = 0; i < expected.size(); ++i) { const std::vector& actual_iovecs = arg[i]; const std::vector& expected_iovecs = expected[i]; if (actual_iovecs.size() != expected_iovecs.size()) { @@ -181,7 +181,7 @@ MATCHER_P(IovecsListEq, expected, "") { return false; } - for (uint64 j = 0; j < expected_iovecs.size(); ++j) { + for (uint64_t j = 0; j < expected_iovecs.size(); ++j) { const struct iovec& actual_iov = actual_iovecs[j]; const struct iovec& expected_iov = expected_iovecs[j]; if (actual_iov.iov_base != expected_iov.iov_base) { -- cgit v1.2.3 From d59a3cc959cb14b0bed14b62e33ee4178b89b346 Mon Sep 17 00:00:00 2001 From: Haibo Xu Date: Wed, 22 Jan 2020 05:51:57 +0000 Subject: Enable fault() syscall test on arm64. Signed-off-by: Haibo Xu Change-Id: I9b2b2e0d84946c10cf136abeef6c60642fa3b6ec --- test/syscalls/linux/fault.cc | 3 +++ 1 file changed, 3 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/fault.cc b/test/syscalls/linux/fault.cc index f6e19026f..a85750382 100644 --- a/test/syscalls/linux/fault.cc +++ b/test/syscalls/linux/fault.cc @@ -37,6 +37,9 @@ int GetPcFromUcontext(ucontext_t* uc, uintptr_t* pc) { #elif defined(__i386__) *pc = uc->uc_mcontext.gregs[REG_EIP]; return 1; +#elif defined(__aarch64__) + *pc = uc->uc_mcontext.pc; + return 1; #else return 0; #endif -- cgit v1.2.3 From 159992300ddb2924cfbf1de57591a78ea27a3a4b Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Wed, 22 Jan 2020 11:50:53 -0800 Subject: Toolchain version bumps. - bazel_toolchain to 2.0.2 - rules_go to 0.21.0 - Go toolchain to 1.13.6 - Use new proto lib archive. PiperOrigin-RevId: 290999410 --- WORKSPACE | 33 ++++++++++++++++----------------- test/BUILD | 4 ++-- 2 files changed, 18 insertions(+), 19 deletions(-) (limited to 'test') diff --git a/WORKSPACE b/WORKSPACE index e2afc073c..5d2fc36f9 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -4,10 +4,10 @@ load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") # Load go bazel rules and gazelle. http_archive( name = "io_bazel_rules_go", - sha256 = "b9aa86ec08a292b97ec4591cf578e020b35f98e12173bbd4a921f84f583aebd9", + sha256 = "b27e55d2dcc9e6020e17614ae6e0374818a3e3ce6f2024036e688ada24110444", urls = [ - "https://storage.googleapis.com/bazel-mirror/github.com/bazelbuild/rules_go/releases/download/v0.20.2/rules_go-v0.20.2.tar.gz", - "https://github.com/bazelbuild/rules_go/releases/download/v0.20.2/rules_go-v0.20.2.tar.gz", + "https://storage.googleapis.com/bazel-mirror/github.com/bazelbuild/rules_go/releases/download/v0.21.0/rules_go-v0.21.0.tar.gz", + "https://github.com/bazelbuild/rules_go/releases/download/v0.21.0/rules_go-v0.21.0.tar.gz", ], ) @@ -25,7 +25,7 @@ load("@io_bazel_rules_go//go:deps.bzl", "go_rules_dependencies", "go_register_to go_rules_dependencies() go_register_toolchains( - go_version = "1.13.4", + go_version = "1.13.6", nogo = "@//:nogo", ) @@ -46,18 +46,17 @@ http_archive( # Load protobuf dependencies. http_archive( - name = "com_google_protobuf", - sha256 = "532d2575d8c0992065bb19ec5fba13aa3683499726f6055c11b474f91a00bb0c", - strip_prefix = "protobuf-7f520092d9050d96fb4b707ad11a51701af4ce49", + name = "rules_proto", + sha256 = "602e7161d9195e50246177e7c55b2f39950a9cf7366f74ed5f22fd45750cd208", + strip_prefix = "rules_proto-97d8af4dc474595af3900dd85cb3a29ad28cc313", urls = [ - "https://mirror.bazel.build/github.com/protocolbuffers/protobuf/archive/7f520092d9050d96fb4b707ad11a51701af4ce49.zip", - "https://github.com/protocolbuffers/protobuf/archive/7f520092d9050d96fb4b707ad11a51701af4ce49.zip", + "https://mirror.bazel.build/github.com/bazelbuild/rules_proto/archive/97d8af4dc474595af3900dd85cb3a29ad28cc313.tar.gz", + "https://github.com/bazelbuild/rules_proto/archive/97d8af4dc474595af3900dd85cb3a29ad28cc313.tar.gz", ], ) - -load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps") - -protobuf_deps() +load("@rules_proto//proto:repositories.bzl", "rules_proto_dependencies", "rules_proto_toolchains") +rules_proto_dependencies() +rules_proto_toolchains() # Load python dependencies. git_repository( @@ -83,11 +82,11 @@ pip_install() # See releases at https://releases.bazel.build/bazel-toolchains.html http_archive( name = "bazel_toolchains", - sha256 = "a019fbd579ce5aed0239de865b2d8281dbb809efd537bf42e0d366783e8dec65", - strip_prefix = "bazel-toolchains-0.29.2", + sha256 = "a653c9d318e42b14c0ccd7ac50c4a2a276c0db1e39743ab88b5aa2f0bc9cf607", + strip_prefix = "bazel-toolchains-2.0.2", urls = [ - "https://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/0.29.2.tar.gz", - "https://github.com/bazelbuild/bazel-toolchains/archive/0.29.2.tar.gz", + "https://github.com/bazelbuild/bazel-toolchains/releases/download/2.0.2/bazel-toolchains-2.0.2.tar.gz", + "https://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/2.0.2.tar.gz", ], ) diff --git a/test/BUILD b/test/BUILD index 01fa01f2e..bf834d994 100644 --- a/test/BUILD +++ b/test/BUILD @@ -20,7 +20,7 @@ platform( remote_execution_properties = """ properties: { name: "container-image" - value:"docker://gcr.io/cloud-marketplace/google/rbe-ubuntu16-04@sha256:69c9f1652941d64a46f6f7358a44c1718f25caa5cb1ced4a58ccc5281cd183b5" + value:"docker://gcr.io/cloud-marketplace/google/rbe-ubuntu16-04@sha256:93f7e127196b9b653d39830c50f8b05d49ef6fd8739a9b5b8ab16e1df5399e50" } properties: { name: "dockerAddCapabilities" @@ -39,6 +39,6 @@ toolchain( ], target_compatible_with = [ ], - toolchain = "@bazel_toolchains//configs/ubuntu16_04_clang/9.0.0/bazel_0.28.0/cc:cc-compiler-k8", + toolchain = "@bazel_toolchains//configs/ubuntu16_04_clang/10.0.0/bazel_2.0.0/cc:cc-compiler-k8", toolchain_type = "@bazel_tools//tools/cpp:toolchain_type", ) -- cgit v1.2.3 From 49e84b10e5ed7f94e6cbe003b9f7268e8235bb08 Mon Sep 17 00:00:00 2001 From: Haibo Xu Date: Wed, 22 Jan 2020 06:22:18 +0000 Subject: Unify the kOLargeFile definition in syscall tests. Signed-off-by: Haibo Xu Change-Id: Id9d6ae98305a4057d55d622ea4c3ac2228fea212 --- test/syscalls/linux/fcntl.cc | 5 +---- test/syscalls/linux/pipe.cc | 6 +++--- test/syscalls/linux/proc.cc | 12 ------------ test/util/fs_util.h | 11 +++++++++++ 4 files changed, 15 insertions(+), 19 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/fcntl.cc b/test/syscalls/linux/fcntl.cc index 4f3aa81d6..421c15b87 100644 --- a/test/syscalls/linux/fcntl.cc +++ b/test/syscalls/linux/fcntl.cc @@ -31,6 +31,7 @@ #include "test/syscalls/linux/socket_test_util.h" #include "test/util/cleanup.h" #include "test/util/eventfd_util.h" +#include "test/util/fs_util.h" #include "test/util/multiprocess_util.h" #include "test/util/posix_error.h" #include "test/util/save_util.h" @@ -55,10 +56,6 @@ ABSL_FLAG(int32_t, socket_fd, -1, namespace gvisor { namespace testing { -// O_LARGEFILE as defined by Linux. glibc tries to be clever by setting it to 0 -// because "it isn't needed", even though Linux can return it via F_GETFL. -constexpr int kOLargeFile = 00100000; - class FcntlLockTest : public ::testing::Test { public: void SetUp() override { diff --git a/test/syscalls/linux/pipe.cc b/test/syscalls/linux/pipe.cc index ac9b21b24..d8e19e910 100644 --- a/test/syscalls/linux/pipe.cc +++ b/test/syscalls/linux/pipe.cc @@ -25,6 +25,7 @@ #include "absl/time/clock.h" #include "absl/time/time.h" #include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" #include "test/util/posix_error.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" @@ -144,11 +145,10 @@ TEST_P(PipeTest, Flags) { if (IsNamedPipe()) { // May be stubbed to zero; define locally. - constexpr int kLargefile = 0100000; EXPECT_THAT(fcntl(rfd_.get(), F_GETFL), - SyscallSucceedsWithValue(kLargefile | O_RDONLY)); + SyscallSucceedsWithValue(kOLargeFile | O_RDONLY)); EXPECT_THAT(fcntl(wfd_.get(), F_GETFL), - SyscallSucceedsWithValue(kLargefile | O_WRONLY)); + SyscallSucceedsWithValue(kOLargeFile | O_WRONLY)); } else { EXPECT_THAT(fcntl(rfd_.get(), F_GETFL), SyscallSucceedsWithValue(O_RDONLY)); EXPECT_THAT(fcntl(wfd_.get(), F_GETFL), SyscallSucceedsWithValue(O_WRONLY)); diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index bf9bb45d3..a03c1e43d 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -100,18 +100,6 @@ namespace { #define SUID_DUMP_ROOT 2 #endif /* SUID_DUMP_ROOT */ -// O_LARGEFILE as defined by Linux. glibc tries to be clever by setting it to 0 -// because "it isn't needed", even though Linux can return it via F_GETFL. -#if defined(__x86_64__) || defined(__i386__) -constexpr int kOLargeFile = 00100000; -#elif __aarch64__ -// The value originate from the Linux -// kernel's arch/arm64/include/uapi/asm/fcntl.h. -constexpr int kOLargeFile = 00400000; -#else -#error "Unknown architecture" -#endif - #if defined(__x86_64__) || defined(__i386__) // This list of "required" fields is taken from reading the file // arch/x86/kernel/cpu/proc.c and seeing which fields will be unconditionally diff --git a/test/util/fs_util.h b/test/util/fs_util.h index ee1b341d7..caf19b24d 100644 --- a/test/util/fs_util.h +++ b/test/util/fs_util.h @@ -26,6 +26,17 @@ namespace gvisor { namespace testing { + +// O_LARGEFILE as defined by Linux. glibc tries to be clever by setting it to 0 +// because "it isn't needed", even though Linux can return it via F_GETFL. +#if defined(__x86_64__) +constexpr int kOLargeFile = 00100000; +#elif defined(__aarch64__) +constexpr int kOLargeFile = 00400000; +#else +#error "Unknown architecture" +#endif + // Returns a status or the current working directory. PosixErrorOr GetCWD(); -- cgit v1.2.3 From d29e59af9fbd420e34378bcbf7ae543134070217 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Mon, 27 Jan 2020 10:04:07 -0800 Subject: Standardize on tools directory. PiperOrigin-RevId: 291745021 --- .bazelrc | 8 +- BUILD | 49 ++++++- benchmarks/defs.bzl | 18 --- benchmarks/harness/BUILD | 74 +++++----- benchmarks/harness/machine_producers/BUILD | 4 +- benchmarks/runner/BUILD | 24 ++-- benchmarks/tcp/BUILD | 3 +- benchmarks/workloads/ab/BUILD | 19 ++- benchmarks/workloads/absl/BUILD | 19 ++- benchmarks/workloads/curl/BUILD | 2 +- benchmarks/workloads/ffmpeg/BUILD | 2 +- benchmarks/workloads/fio/BUILD | 19 ++- benchmarks/workloads/httpd/BUILD | 2 +- benchmarks/workloads/iperf/BUILD | 19 ++- benchmarks/workloads/netcat/BUILD | 2 +- benchmarks/workloads/nginx/BUILD | 2 +- benchmarks/workloads/node/BUILD | 2 +- benchmarks/workloads/node_template/BUILD | 2 +- benchmarks/workloads/redis/BUILD | 2 +- benchmarks/workloads/redisbenchmark/BUILD | 19 ++- benchmarks/workloads/ruby/BUILD | 2 +- benchmarks/workloads/ruby_template/BUILD | 2 +- benchmarks/workloads/sleep/BUILD | 2 +- benchmarks/workloads/sysbench/BUILD | 19 ++- benchmarks/workloads/syscall/BUILD | 19 ++- benchmarks/workloads/tensorflow/BUILD | 2 +- benchmarks/workloads/true/BUILD | 2 +- pkg/abi/BUILD | 3 +- pkg/abi/linux/BUILD | 6 +- pkg/amutex/BUILD | 6 +- pkg/atomicbitops/BUILD | 6 +- pkg/binary/BUILD | 6 +- pkg/bits/BUILD | 6 +- pkg/bpf/BUILD | 6 +- pkg/compressio/BUILD | 6 +- pkg/control/client/BUILD | 3 +- pkg/control/server/BUILD | 3 +- pkg/cpuid/BUILD | 8 +- pkg/eventchannel/BUILD | 16 +-- pkg/fd/BUILD | 6 +- pkg/fdchannel/BUILD | 8 +- pkg/fdnotifier/BUILD | 3 +- pkg/flipcall/BUILD | 8 +- pkg/fspath/BUILD | 13 +- pkg/gate/BUILD | 4 +- pkg/goid/BUILD | 6 +- pkg/ilist/BUILD | 6 +- pkg/linewriter/BUILD | 6 +- pkg/log/BUILD | 6 +- pkg/memutil/BUILD | 3 +- pkg/metric/BUILD | 23 +-- pkg/p9/BUILD | 6 +- pkg/p9/p9test/BUILD | 6 +- pkg/procid/BUILD | 8 +- pkg/rand/BUILD | 3 +- pkg/refs/BUILD | 6 +- pkg/seccomp/BUILD | 6 +- pkg/secio/BUILD | 6 +- pkg/segment/test/BUILD | 6 +- pkg/sentry/BUILD | 2 + pkg/sentry/arch/BUILD | 20 +-- pkg/sentry/context/BUILD | 3 +- pkg/sentry/context/contexttest/BUILD | 3 +- pkg/sentry/control/BUILD | 8 +- pkg/sentry/device/BUILD | 6 +- pkg/sentry/fs/BUILD | 6 +- pkg/sentry/fs/anon/BUILD | 3 +- pkg/sentry/fs/dev/BUILD | 3 +- pkg/sentry/fs/fdpipe/BUILD | 6 +- pkg/sentry/fs/filetest/BUILD | 3 +- pkg/sentry/fs/fsutil/BUILD | 6 +- pkg/sentry/fs/gofer/BUILD | 6 +- pkg/sentry/fs/host/BUILD | 6 +- pkg/sentry/fs/lock/BUILD | 6 +- pkg/sentry/fs/proc/BUILD | 6 +- pkg/sentry/fs/proc/device/BUILD | 3 +- pkg/sentry/fs/proc/seqfile/BUILD | 6 +- pkg/sentry/fs/ramfs/BUILD | 6 +- pkg/sentry/fs/sys/BUILD | 3 +- pkg/sentry/fs/timerfd/BUILD | 3 +- pkg/sentry/fs/tmpfs/BUILD | 6 +- pkg/sentry/fs/tty/BUILD | 6 +- pkg/sentry/fsimpl/ext/BUILD | 6 +- pkg/sentry/fsimpl/ext/benchmark/BUILD | 2 +- pkg/sentry/fsimpl/ext/disklayout/BUILD | 6 +- pkg/sentry/fsimpl/kernfs/BUILD | 6 +- pkg/sentry/fsimpl/proc/BUILD | 8 +- pkg/sentry/fsimpl/sys/BUILD | 6 +- pkg/sentry/fsimpl/testutil/BUILD | 5 +- pkg/sentry/fsimpl/tmpfs/BUILD | 8 +- pkg/sentry/hostcpu/BUILD | 6 +- pkg/sentry/hostmm/BUILD | 3 +- pkg/sentry/inet/BUILD | 3 +- pkg/sentry/kernel/BUILD | 24 +--- pkg/sentry/kernel/auth/BUILD | 3 +- pkg/sentry/kernel/contexttest/BUILD | 3 +- pkg/sentry/kernel/epoll/BUILD | 6 +- pkg/sentry/kernel/eventfd/BUILD | 6 +- pkg/sentry/kernel/fasync/BUILD | 3 +- pkg/sentry/kernel/futex/BUILD | 6 +- pkg/sentry/kernel/memevent/BUILD | 20 +-- pkg/sentry/kernel/pipe/BUILD | 6 +- pkg/sentry/kernel/sched/BUILD | 6 +- pkg/sentry/kernel/semaphore/BUILD | 6 +- pkg/sentry/kernel/shm/BUILD | 3 +- pkg/sentry/kernel/signalfd/BUILD | 5 +- pkg/sentry/kernel/time/BUILD | 3 +- pkg/sentry/limits/BUILD | 6 +- pkg/sentry/loader/BUILD | 4 +- pkg/sentry/memmap/BUILD | 6 +- pkg/sentry/mm/BUILD | 6 +- pkg/sentry/pgalloc/BUILD | 6 +- pkg/sentry/platform/BUILD | 3 +- pkg/sentry/platform/interrupt/BUILD | 6 +- pkg/sentry/platform/kvm/BUILD | 6 +- pkg/sentry/platform/kvm/testutil/BUILD | 3 +- pkg/sentry/platform/ptrace/BUILD | 3 +- pkg/sentry/platform/ring0/BUILD | 3 +- pkg/sentry/platform/ring0/gen_offsets/BUILD | 2 +- pkg/sentry/platform/ring0/pagetables/BUILD | 16 +-- pkg/sentry/platform/safecopy/BUILD | 6 +- pkg/sentry/safemem/BUILD | 6 +- pkg/sentry/sighandling/BUILD | 3 +- pkg/sentry/socket/BUILD | 3 +- pkg/sentry/socket/control/BUILD | 3 +- pkg/sentry/socket/hostinet/BUILD | 3 +- pkg/sentry/socket/netfilter/BUILD | 3 +- pkg/sentry/socket/netlink/BUILD | 3 +- pkg/sentry/socket/netlink/port/BUILD | 6 +- pkg/sentry/socket/netlink/route/BUILD | 3 +- pkg/sentry/socket/netlink/uevent/BUILD | 3 +- pkg/sentry/socket/netstack/BUILD | 3 +- pkg/sentry/socket/unix/BUILD | 3 +- pkg/sentry/socket/unix/transport/BUILD | 3 +- pkg/sentry/state/BUILD | 3 +- pkg/sentry/strace/BUILD | 20 +-- pkg/sentry/syscalls/BUILD | 3 +- pkg/sentry/syscalls/linux/BUILD | 3 +- pkg/sentry/time/BUILD | 6 +- pkg/sentry/unimpl/BUILD | 21 +-- pkg/sentry/uniqueid/BUILD | 3 +- pkg/sentry/usage/BUILD | 5 +- pkg/sentry/usermem/BUILD | 7 +- pkg/sentry/vfs/BUILD | 8 +- pkg/sentry/watchdog/BUILD | 3 +- pkg/sleep/BUILD | 6 +- pkg/state/BUILD | 17 +-- pkg/state/statefile/BUILD | 6 +- pkg/sync/BUILD | 6 +- pkg/sync/atomicptrtest/BUILD | 6 +- pkg/sync/seqatomictest/BUILD | 6 +- pkg/syserr/BUILD | 3 +- pkg/syserror/BUILD | 4 +- pkg/tcpip/BUILD | 6 +- pkg/tcpip/adapters/gonet/BUILD | 6 +- pkg/tcpip/buffer/BUILD | 6 +- pkg/tcpip/checker/BUILD | 3 +- pkg/tcpip/hash/jenkins/BUILD | 6 +- pkg/tcpip/header/BUILD | 6 +- pkg/tcpip/iptables/BUILD | 3 +- pkg/tcpip/link/channel/BUILD | 3 +- pkg/tcpip/link/fdbased/BUILD | 6 +- pkg/tcpip/link/loopback/BUILD | 3 +- pkg/tcpip/link/muxed/BUILD | 6 +- pkg/tcpip/link/rawfile/BUILD | 3 +- pkg/tcpip/link/sharedmem/BUILD | 6 +- pkg/tcpip/link/sharedmem/pipe/BUILD | 6 +- pkg/tcpip/link/sharedmem/queue/BUILD | 6 +- pkg/tcpip/link/sniffer/BUILD | 3 +- pkg/tcpip/link/tun/BUILD | 3 +- pkg/tcpip/link/waitable/BUILD | 6 +- pkg/tcpip/network/BUILD | 2 +- pkg/tcpip/network/arp/BUILD | 4 +- pkg/tcpip/network/fragmentation/BUILD | 6 +- pkg/tcpip/network/hash/BUILD | 3 +- pkg/tcpip/network/ipv4/BUILD | 4 +- pkg/tcpip/network/ipv6/BUILD | 6 +- pkg/tcpip/ports/BUILD | 6 +- pkg/tcpip/sample/tun_tcp_connect/BUILD | 2 +- pkg/tcpip/sample/tun_tcp_echo/BUILD | 2 +- pkg/tcpip/seqnum/BUILD | 3 +- pkg/tcpip/stack/BUILD | 6 +- pkg/tcpip/transport/icmp/BUILD | 3 +- pkg/tcpip/transport/packet/BUILD | 3 +- pkg/tcpip/transport/raw/BUILD | 3 +- pkg/tcpip/transport/tcp/BUILD | 4 +- pkg/tcpip/transport/tcp/testing/context/BUILD | 3 +- pkg/tcpip/transport/tcpconntrack/BUILD | 4 +- pkg/tcpip/transport/udp/BUILD | 4 +- pkg/tmutex/BUILD | 6 +- pkg/unet/BUILD | 6 +- pkg/urpc/BUILD | 6 +- pkg/waiter/BUILD | 6 +- runsc/BUILD | 27 ++-- runsc/boot/BUILD | 5 +- runsc/boot/filter/BUILD | 3 +- runsc/boot/platforms/BUILD | 3 +- runsc/cgroup/BUILD | 5 +- runsc/cmd/BUILD | 5 +- runsc/console/BUILD | 3 +- runsc/container/BUILD | 5 +- runsc/container/test_app/BUILD | 4 +- runsc/criutil/BUILD | 3 +- runsc/dockerutil/BUILD | 3 +- runsc/fsgofer/BUILD | 9 +- runsc/fsgofer/filter/BUILD | 3 +- runsc/sandbox/BUILD | 3 +- runsc/specutils/BUILD | 5 +- runsc/testutil/BUILD | 3 +- runsc/version_test.sh | 2 +- scripts/common.sh | 6 +- scripts/common_bazel.sh | 99 ------------- scripts/common_build.sh | 99 +++++++++++++ test/BUILD | 45 +----- test/e2e/BUILD | 5 +- test/image/BUILD | 5 +- test/iptables/BUILD | 5 +- test/iptables/runner/BUILD | 12 +- test/root/BUILD | 5 +- test/root/testdata/BUILD | 3 +- test/runtimes/BUILD | 4 +- test/runtimes/build_defs.bzl | 5 +- test/runtimes/images/proctor/BUILD | 4 +- test/syscalls/BUILD | 2 +- test/syscalls/build_defs.bzl | 6 +- test/syscalls/gtest/BUILD | 7 +- test/syscalls/linux/BUILD | 23 ++- test/syscalls/linux/arch_prctl.cc | 2 + test/syscalls/linux/rseq/BUILD | 5 +- .../linux/udp_socket_errqueue_test_case.cc | 4 + test/uds/BUILD | 3 +- test/util/BUILD | 27 ++-- test/util/save_util_linux.cc | 4 + test/util/save_util_other.cc | 4 + test/util/test_util_runfiles.cc | 4 + tools/BUILD | 3 + tools/build/BUILD | 10 ++ tools/build/defs.bzl | 91 ++++++++++++ tools/checkunsafe/BUILD | 3 +- tools/defs.bzl | 154 +++++++++++++++++++++ tools/go_generics/BUILD | 2 +- tools/go_generics/globals/BUILD | 4 +- tools/go_generics/go_merge/BUILD | 2 +- tools/go_generics/rules_tests/BUILD | 2 +- tools/go_marshal/BUILD | 4 +- tools/go_marshal/README.md | 52 +------ tools/go_marshal/analysis/BUILD | 5 +- tools/go_marshal/defs.bzl | 112 ++------------- tools/go_marshal/gomarshal/BUILD | 6 +- tools/go_marshal/gomarshal/generator.go | 20 ++- tools/go_marshal/gomarshal/generator_tests.go | 6 +- tools/go_marshal/main.go | 11 +- tools/go_marshal/marshal/BUILD | 5 +- tools/go_marshal/test/BUILD | 7 +- tools/go_marshal/test/external/BUILD | 6 +- tools/go_stateify/BUILD | 2 +- tools/go_stateify/defs.bzl | 79 +---------- tools/images/BUILD | 2 +- tools/images/defs.bzl | 6 +- tools/issue_reviver/BUILD | 2 +- tools/issue_reviver/github/BUILD | 3 +- tools/issue_reviver/reviver/BUILD | 5 +- tools/workspace_status.sh | 2 +- vdso/BUILD | 33 ++--- 264 files changed, 1012 insertions(+), 1380 deletions(-) delete mode 100644 benchmarks/defs.bzl delete mode 100755 scripts/common_bazel.sh create mode 100755 scripts/common_build.sh create mode 100644 tools/BUILD create mode 100644 tools/build/BUILD create mode 100644 tools/build/defs.bzl create mode 100644 tools/defs.bzl (limited to 'test') diff --git a/.bazelrc b/.bazelrc index 9c35c5e7b..ef214bcfa 100644 --- a/.bazelrc +++ b/.bazelrc @@ -30,10 +30,10 @@ build:remote --auth_scope="https://www.googleapis.com/auth/cloud-source-tools" # Add a custom platform and toolchain that builds in a privileged docker # container, which is required by our syscall tests. -build:remote --host_platform=//test:rbe_ubuntu1604 -build:remote --extra_toolchains=//test:cc-toolchain-clang-x86_64-default -build:remote --extra_execution_platforms=//test:rbe_ubuntu1604 -build:remote --platforms=//test:rbe_ubuntu1604 +build:remote --host_platform=//:rbe_ubuntu1604 +build:remote --extra_toolchains=//:cc-toolchain-clang-x86_64-default +build:remote --extra_execution_platforms=//:rbe_ubuntu1604 +build:remote --platforms=//:rbe_ubuntu1604 build:remote --crosstool_top=@rbe_default//cc:toolchain build:remote --jobs=50 build:remote --remote_timeout=3600 diff --git a/BUILD b/BUILD index 76286174f..5fd929378 100644 --- a/BUILD +++ b/BUILD @@ -1,8 +1,8 @@ -package(licenses = ["notice"]) # Apache 2.0 - load("@io_bazel_rules_go//go:def.bzl", "go_path", "nogo") load("@bazel_gazelle//:def.bzl", "gazelle") +package(licenses = ["notice"]) + # The sandbox filegroup is used for sandbox-internal dependencies. package_group( name = "sandbox", @@ -49,9 +49,52 @@ gazelle(name = "gazelle") # live in the tools subdirectory (unless they are standard). nogo( name = "nogo", - config = "tools/nogo.js", + config = "//tools:nogo.js", visibility = ["//visibility:public"], deps = [ "//tools/checkunsafe", ], ) + +# We need to define a bazel platform and toolchain to specify dockerPrivileged +# and dockerRunAsRoot options, they are required to run tests on the RBE +# cluster in Kokoro. +alias( + name = "rbe_ubuntu1604", + actual = ":rbe_ubuntu1604_r346485", +) + +platform( + name = "rbe_ubuntu1604_r346485", + constraint_values = [ + "@bazel_tools//platforms:x86_64", + "@bazel_tools//platforms:linux", + "@bazel_tools//tools/cpp:clang", + "@bazel_toolchains//constraints:xenial", + "@bazel_toolchains//constraints/sanitizers:support_msan", + ], + remote_execution_properties = """ + properties: { + name: "container-image" + value:"docker://gcr.io/cloud-marketplace/google/rbe-ubuntu16-04@sha256:93f7e127196b9b653d39830c50f8b05d49ef6fd8739a9b5b8ab16e1df5399e50" + } + properties: { + name: "dockerAddCapabilities" + value: "SYS_ADMIN" + } + properties: { + name: "dockerPrivileged" + value: "true" + } + """, +) + +toolchain( + name = "cc-toolchain-clang-x86_64-default", + exec_compatible_with = [ + ], + target_compatible_with = [ + ], + toolchain = "@bazel_toolchains//configs/ubuntu16_04_clang/10.0.0/bazel_2.0.0/cc:cc-compiler-k8", + toolchain_type = "@bazel_tools//tools/cpp:toolchain_type", +) diff --git a/benchmarks/defs.bzl b/benchmarks/defs.bzl deleted file mode 100644 index 79e6cdbc8..000000000 --- a/benchmarks/defs.bzl +++ /dev/null @@ -1,18 +0,0 @@ -"""Provides python helper functions.""" - -load("@pydeps//:requirements.bzl", _requirement = "requirement") - -def filter_deps(deps = None): - if deps == None: - deps = [] - return [dep for dep in deps if dep] - -def py_library(deps = None, **kwargs): - return native.py_library(deps = filter_deps(deps), **kwargs) - -def py_test(deps = None, **kwargs): - return native.py_test(deps = filter_deps(deps), **kwargs) - -def requirement(name, direct = True): - """ requirement returns the required dependency. """ - return _requirement(name) diff --git a/benchmarks/harness/BUILD b/benchmarks/harness/BUILD index 081a74243..52d4e42f8 100644 --- a/benchmarks/harness/BUILD +++ b/benchmarks/harness/BUILD @@ -1,4 +1,4 @@ -load("//benchmarks:defs.bzl", "py_library", "requirement") +load("//tools:defs.bzl", "py_library", "py_requirement") package( default_visibility = ["//benchmarks:__subpackages__"], @@ -25,16 +25,16 @@ py_library( srcs = ["container.py"], deps = [ "//benchmarks/workloads", - requirement("asn1crypto", False), - requirement("chardet", False), - requirement("certifi", False), - requirement("docker", True), - requirement("docker-pycreds", False), - requirement("idna", False), - requirement("ptyprocess", False), - requirement("requests", False), - requirement("urllib3", False), - requirement("websocket-client", False), + py_requirement("asn1crypto", False), + py_requirement("chardet", False), + py_requirement("certifi", False), + py_requirement("docker", True), + py_requirement("docker-pycreds", False), + py_requirement("idna", False), + py_requirement("ptyprocess", False), + py_requirement("requests", False), + py_requirement("urllib3", False), + py_requirement("websocket-client", False), ], ) @@ -47,17 +47,17 @@ py_library( "//benchmarks/harness:ssh_connection", "//benchmarks/harness:tunnel_dispatcher", "//benchmarks/harness/machine_mocks", - requirement("asn1crypto", False), - requirement("chardet", False), - requirement("certifi", False), - requirement("docker", True), - requirement("docker-pycreds", False), - requirement("idna", False), - requirement("ptyprocess", False), - requirement("requests", False), - requirement("six", False), - requirement("urllib3", False), - requirement("websocket-client", False), + py_requirement("asn1crypto", False), + py_requirement("chardet", False), + py_requirement("certifi", False), + py_requirement("docker", True), + py_requirement("docker-pycreds", False), + py_requirement("idna", False), + py_requirement("ptyprocess", False), + py_requirement("requests", False), + py_requirement("six", False), + py_requirement("urllib3", False), + py_requirement("websocket-client", False), ], ) @@ -66,10 +66,10 @@ py_library( srcs = ["ssh_connection.py"], deps = [ "//benchmarks/harness", - requirement("bcrypt", False), - requirement("cffi", True), - requirement("paramiko", True), - requirement("cryptography", False), + py_requirement("bcrypt", False), + py_requirement("cffi", True), + py_requirement("paramiko", True), + py_requirement("cryptography", False), ], ) @@ -77,16 +77,16 @@ py_library( name = "tunnel_dispatcher", srcs = ["tunnel_dispatcher.py"], deps = [ - requirement("asn1crypto", False), - requirement("chardet", False), - requirement("certifi", False), - requirement("docker", True), - requirement("docker-pycreds", False), - requirement("idna", False), - requirement("pexpect", True), - requirement("ptyprocess", False), - requirement("requests", False), - requirement("urllib3", False), - requirement("websocket-client", False), + py_requirement("asn1crypto", False), + py_requirement("chardet", False), + py_requirement("certifi", False), + py_requirement("docker", True), + py_requirement("docker-pycreds", False), + py_requirement("idna", False), + py_requirement("pexpect", True), + py_requirement("ptyprocess", False), + py_requirement("requests", False), + py_requirement("urllib3", False), + py_requirement("websocket-client", False), ], ) diff --git a/benchmarks/harness/machine_producers/BUILD b/benchmarks/harness/machine_producers/BUILD index c4e943882..48ea0ef39 100644 --- a/benchmarks/harness/machine_producers/BUILD +++ b/benchmarks/harness/machine_producers/BUILD @@ -1,4 +1,4 @@ -load("//benchmarks:defs.bzl", "py_library", "requirement") +load("//tools:defs.bzl", "py_library", "py_requirement") package( default_visibility = ["//benchmarks:__subpackages__"], @@ -31,7 +31,7 @@ py_library( deps = [ "//benchmarks/harness:machine", "//benchmarks/harness/machine_producers:machine_producer", - requirement("PyYAML", False), + py_requirement("PyYAML", False), ], ) diff --git a/benchmarks/runner/BUILD b/benchmarks/runner/BUILD index e1b2ea550..fae0ca800 100644 --- a/benchmarks/runner/BUILD +++ b/benchmarks/runner/BUILD @@ -1,4 +1,4 @@ -load("//benchmarks:defs.bzl", "py_library", "py_test", "requirement") +load("//tools:defs.bzl", "py_library", "py_requirement", "py_test") package(licenses = ["notice"]) @@ -28,7 +28,7 @@ py_library( "//benchmarks/suites:startup", "//benchmarks/suites:sysbench", "//benchmarks/suites:syscall", - requirement("click", True), + py_requirement("click", True), ], ) @@ -36,7 +36,7 @@ py_library( name = "commands", srcs = ["commands.py"], deps = [ - requirement("click", True), + py_requirement("click", True), ], ) @@ -50,14 +50,14 @@ py_test( ], deps = [ ":runner", - requirement("click", True), - requirement("attrs", False), - requirement("atomicwrites", False), - requirement("more-itertools", False), - requirement("pathlib2", False), - requirement("pluggy", False), - requirement("py", False), - requirement("pytest", True), - requirement("six", False), + py_requirement("click", True), + py_requirement("attrs", False), + py_requirement("atomicwrites", False), + py_requirement("more-itertools", False), + py_requirement("pathlib2", False), + py_requirement("pluggy", False), + py_requirement("py", False), + py_requirement("pytest", True), + py_requirement("six", False), ], ) diff --git a/benchmarks/tcp/BUILD b/benchmarks/tcp/BUILD index 735d7127f..d5e401acc 100644 --- a/benchmarks/tcp/BUILD +++ b/benchmarks/tcp/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") -load("@rules_cc//cc:defs.bzl", "cc_binary") +load("//tools:defs.bzl", "cc_binary", "go_binary") package(licenses = ["notice"]) diff --git a/benchmarks/workloads/ab/BUILD b/benchmarks/workloads/ab/BUILD index 4fc0ab735..4dd91ceb3 100644 --- a/benchmarks/workloads/ab/BUILD +++ b/benchmarks/workloads/ab/BUILD @@ -1,5 +1,4 @@ -load("//benchmarks:defs.bzl", "py_library", "py_test", "requirement") -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar", "py_library", "py_requirement", "py_test") package( default_visibility = ["//benchmarks:__subpackages__"], @@ -17,14 +16,14 @@ py_test( python_version = "PY3", deps = [ ":ab", - requirement("attrs", False), - requirement("atomicwrites", False), - requirement("more-itertools", False), - requirement("pathlib2", False), - requirement("pluggy", False), - requirement("py", False), - requirement("pytest", True), - requirement("six", False), + py_requirement("attrs", False), + py_requirement("atomicwrites", False), + py_requirement("more-itertools", False), + py_requirement("pathlib2", False), + py_requirement("pluggy", False), + py_requirement("py", False), + py_requirement("pytest", True), + py_requirement("six", False), ], ) diff --git a/benchmarks/workloads/absl/BUILD b/benchmarks/workloads/absl/BUILD index 61e010096..55dae3baa 100644 --- a/benchmarks/workloads/absl/BUILD +++ b/benchmarks/workloads/absl/BUILD @@ -1,5 +1,4 @@ -load("//benchmarks:defs.bzl", "py_library", "py_test", "requirement") -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar", "py_library", "py_requirement", "py_test") package( default_visibility = ["//benchmarks:__subpackages__"], @@ -17,14 +16,14 @@ py_test( python_version = "PY3", deps = [ ":absl", - requirement("attrs", False), - requirement("atomicwrites", False), - requirement("more-itertools", False), - requirement("pathlib2", False), - requirement("pluggy", False), - requirement("py", False), - requirement("pytest", True), - requirement("six", False), + py_requirement("attrs", False), + py_requirement("atomicwrites", False), + py_requirement("more-itertools", False), + py_requirement("pathlib2", False), + py_requirement("pluggy", False), + py_requirement("py", False), + py_requirement("pytest", True), + py_requirement("six", False), ], ) diff --git a/benchmarks/workloads/curl/BUILD b/benchmarks/workloads/curl/BUILD index eb0fb6165..a70873065 100644 --- a/benchmarks/workloads/curl/BUILD +++ b/benchmarks/workloads/curl/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/ffmpeg/BUILD b/benchmarks/workloads/ffmpeg/BUILD index be472dfb2..7c41ba631 100644 --- a/benchmarks/workloads/ffmpeg/BUILD +++ b/benchmarks/workloads/ffmpeg/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/fio/BUILD b/benchmarks/workloads/fio/BUILD index de257adad..7b78e8e75 100644 --- a/benchmarks/workloads/fio/BUILD +++ b/benchmarks/workloads/fio/BUILD @@ -1,5 +1,4 @@ -load("//benchmarks:defs.bzl", "py_library", "py_test", "requirement") -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar", "py_library", "py_requirement", "py_test") package( default_visibility = ["//benchmarks:__subpackages__"], @@ -17,14 +16,14 @@ py_test( python_version = "PY3", deps = [ ":fio", - requirement("attrs", False), - requirement("atomicwrites", False), - requirement("more-itertools", False), - requirement("pathlib2", False), - requirement("pluggy", False), - requirement("py", False), - requirement("pytest", True), - requirement("six", False), + py_requirement("attrs", False), + py_requirement("atomicwrites", False), + py_requirement("more-itertools", False), + py_requirement("pathlib2", False), + py_requirement("pluggy", False), + py_requirement("py", False), + py_requirement("pytest", True), + py_requirement("six", False), ], ) diff --git a/benchmarks/workloads/httpd/BUILD b/benchmarks/workloads/httpd/BUILD index eb0fb6165..a70873065 100644 --- a/benchmarks/workloads/httpd/BUILD +++ b/benchmarks/workloads/httpd/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/iperf/BUILD b/benchmarks/workloads/iperf/BUILD index 8832a996c..570f40148 100644 --- a/benchmarks/workloads/iperf/BUILD +++ b/benchmarks/workloads/iperf/BUILD @@ -1,5 +1,4 @@ -load("//benchmarks:defs.bzl", "py_library", "py_test", "requirement") -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar", "py_library", "py_requirement", "py_test") package( default_visibility = ["//benchmarks:__subpackages__"], @@ -17,14 +16,14 @@ py_test( python_version = "PY3", deps = [ ":iperf", - requirement("attrs", False), - requirement("atomicwrites", False), - requirement("more-itertools", False), - requirement("pathlib2", False), - requirement("pluggy", False), - requirement("py", False), - requirement("pytest", True), - requirement("six", False), + py_requirement("attrs", False), + py_requirement("atomicwrites", False), + py_requirement("more-itertools", False), + py_requirement("pathlib2", False), + py_requirement("pluggy", False), + py_requirement("py", False), + py_requirement("pytest", True), + py_requirement("six", False), ], ) diff --git a/benchmarks/workloads/netcat/BUILD b/benchmarks/workloads/netcat/BUILD index eb0fb6165..a70873065 100644 --- a/benchmarks/workloads/netcat/BUILD +++ b/benchmarks/workloads/netcat/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/nginx/BUILD b/benchmarks/workloads/nginx/BUILD index eb0fb6165..a70873065 100644 --- a/benchmarks/workloads/nginx/BUILD +++ b/benchmarks/workloads/nginx/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/node/BUILD b/benchmarks/workloads/node/BUILD index 71cd9f519..bfcf78cf9 100644 --- a/benchmarks/workloads/node/BUILD +++ b/benchmarks/workloads/node/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/node_template/BUILD b/benchmarks/workloads/node_template/BUILD index ca996f068..e142f082a 100644 --- a/benchmarks/workloads/node_template/BUILD +++ b/benchmarks/workloads/node_template/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/redis/BUILD b/benchmarks/workloads/redis/BUILD index eb0fb6165..a70873065 100644 --- a/benchmarks/workloads/redis/BUILD +++ b/benchmarks/workloads/redis/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/redisbenchmark/BUILD b/benchmarks/workloads/redisbenchmark/BUILD index f5994a815..f472a4443 100644 --- a/benchmarks/workloads/redisbenchmark/BUILD +++ b/benchmarks/workloads/redisbenchmark/BUILD @@ -1,5 +1,4 @@ -load("//benchmarks:defs.bzl", "py_library", "py_test", "requirement") -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar", "py_library", "py_requirement", "py_test") package( default_visibility = ["//benchmarks:__subpackages__"], @@ -17,14 +16,14 @@ py_test( python_version = "PY3", deps = [ ":redisbenchmark", - requirement("attrs", False), - requirement("atomicwrites", False), - requirement("more-itertools", False), - requirement("pathlib2", False), - requirement("pluggy", False), - requirement("py", False), - requirement("pytest", True), - requirement("six", False), + py_requirement("attrs", False), + py_requirement("atomicwrites", False), + py_requirement("more-itertools", False), + py_requirement("pathlib2", False), + py_requirement("pluggy", False), + py_requirement("py", False), + py_requirement("pytest", True), + py_requirement("six", False), ], ) diff --git a/benchmarks/workloads/ruby/BUILD b/benchmarks/workloads/ruby/BUILD index e37d77804..a3be4fe92 100644 --- a/benchmarks/workloads/ruby/BUILD +++ b/benchmarks/workloads/ruby/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/ruby_template/BUILD b/benchmarks/workloads/ruby_template/BUILD index 27f7c0c46..59443b14a 100644 --- a/benchmarks/workloads/ruby_template/BUILD +++ b/benchmarks/workloads/ruby_template/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/sleep/BUILD b/benchmarks/workloads/sleep/BUILD index eb0fb6165..a70873065 100644 --- a/benchmarks/workloads/sleep/BUILD +++ b/benchmarks/workloads/sleep/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/sysbench/BUILD b/benchmarks/workloads/sysbench/BUILD index fd2f8f03d..3834af7ed 100644 --- a/benchmarks/workloads/sysbench/BUILD +++ b/benchmarks/workloads/sysbench/BUILD @@ -1,5 +1,4 @@ -load("//benchmarks:defs.bzl", "py_library", "py_test", "requirement") -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar", "py_library", "py_requirement", "py_test") package( default_visibility = ["//benchmarks:__subpackages__"], @@ -17,14 +16,14 @@ py_test( python_version = "PY3", deps = [ ":sysbench", - requirement("attrs", False), - requirement("atomicwrites", False), - requirement("more-itertools", False), - requirement("pathlib2", False), - requirement("pluggy", False), - requirement("py", False), - requirement("pytest", True), - requirement("six", False), + py_requirement("attrs", False), + py_requirement("atomicwrites", False), + py_requirement("more-itertools", False), + py_requirement("pathlib2", False), + py_requirement("pluggy", False), + py_requirement("py", False), + py_requirement("pytest", True), + py_requirement("six", False), ], ) diff --git a/benchmarks/workloads/syscall/BUILD b/benchmarks/workloads/syscall/BUILD index 5100cbb21..dba4bb1e7 100644 --- a/benchmarks/workloads/syscall/BUILD +++ b/benchmarks/workloads/syscall/BUILD @@ -1,5 +1,4 @@ -load("//benchmarks:defs.bzl", "py_library", "py_test", "requirement") -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar", "py_library", "py_requirement", "py_test") package( default_visibility = ["//benchmarks:__subpackages__"], @@ -17,14 +16,14 @@ py_test( python_version = "PY3", deps = [ ":syscall", - requirement("attrs", False), - requirement("atomicwrites", False), - requirement("more-itertools", False), - requirement("pathlib2", False), - requirement("pluggy", False), - requirement("py", False), - requirement("pytest", True), - requirement("six", False), + py_requirement("attrs", False), + py_requirement("atomicwrites", False), + py_requirement("more-itertools", False), + py_requirement("pathlib2", False), + py_requirement("pluggy", False), + py_requirement("py", False), + py_requirement("pytest", True), + py_requirement("six", False), ], ) diff --git a/benchmarks/workloads/tensorflow/BUILD b/benchmarks/workloads/tensorflow/BUILD index 026c3b316..a7b7742f4 100644 --- a/benchmarks/workloads/tensorflow/BUILD +++ b/benchmarks/workloads/tensorflow/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/true/BUILD b/benchmarks/workloads/true/BUILD index 221c4b9a7..eba23d325 100644 --- a/benchmarks/workloads/true/BUILD +++ b/benchmarks/workloads/true/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/pkg/abi/BUILD b/pkg/abi/BUILD index f5c08ea06..839f822eb 100644 --- a/pkg/abi/BUILD +++ b/pkg/abi/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -9,6 +9,5 @@ go_library( "abi_linux.go", "flag.go", ], - importpath = "gvisor.dev/gvisor/pkg/abi", visibility = ["//:sandbox"], ) diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD index 716ff22d2..1f3c0c687 100644 --- a/pkg/abi/linux/BUILD +++ b/pkg/abi/linux/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") # Package linux contains the constants and types needed to interface with a # Linux kernel. It should be used instead of syscall or golang.org/x/sys/unix @@ -60,7 +59,6 @@ go_library( "wait.go", "xattr.go", ], - importpath = "gvisor.dev/gvisor/pkg/abi/linux", visibility = ["//visibility:public"], deps = [ "//pkg/abi", @@ -73,7 +71,7 @@ go_test( name = "linux_test", size = "small", srcs = ["netfilter_test.go"], - embed = [":linux"], + library = ":linux", deps = [ "//pkg/binary", ], diff --git a/pkg/amutex/BUILD b/pkg/amutex/BUILD index d99e37b40..9612f072e 100644 --- a/pkg/amutex/BUILD +++ b/pkg/amutex/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "amutex", srcs = ["amutex.go"], - importpath = "gvisor.dev/gvisor/pkg/amutex", visibility = ["//:sandbox"], ) @@ -14,6 +12,6 @@ go_test( name = "amutex_test", size = "small", srcs = ["amutex_test.go"], - embed = [":amutex"], + library = ":amutex", deps = ["//pkg/sync"], ) diff --git a/pkg/atomicbitops/BUILD b/pkg/atomicbitops/BUILD index 6403c60c2..3948074ba 100644 --- a/pkg/atomicbitops/BUILD +++ b/pkg/atomicbitops/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -11,7 +10,6 @@ go_library( "atomic_bitops_arm64.s", "atomic_bitops_common.go", ], - importpath = "gvisor.dev/gvisor/pkg/atomicbitops", visibility = ["//:sandbox"], ) @@ -19,6 +17,6 @@ go_test( name = "atomicbitops_test", size = "small", srcs = ["atomic_bitops_test.go"], - embed = [":atomicbitops"], + library = ":atomicbitops", deps = ["//pkg/sync"], ) diff --git a/pkg/binary/BUILD b/pkg/binary/BUILD index 543fb54bf..7ca2fda90 100644 --- a/pkg/binary/BUILD +++ b/pkg/binary/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "binary", srcs = ["binary.go"], - importpath = "gvisor.dev/gvisor/pkg/binary", visibility = ["//:sandbox"], ) @@ -14,5 +12,5 @@ go_test( name = "binary_test", size = "small", srcs = ["binary_test.go"], - embed = [":binary"], + library = ":binary", ) diff --git a/pkg/bits/BUILD b/pkg/bits/BUILD index 93b88a29a..63f4670d7 100644 --- a/pkg/bits/BUILD +++ b/pkg/bits/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template", "go_template_instance") package(licenses = ["notice"]) @@ -15,7 +14,6 @@ go_library( "uint64_arch_arm64_asm.s", "uint64_arch_generic.go", ], - importpath = "gvisor.dev/gvisor/pkg/bits", visibility = ["//:sandbox"], ) @@ -53,5 +51,5 @@ go_test( name = "bits_test", size = "small", srcs = ["uint64_test.go"], - embed = [":bits"], + library = ":bits", ) diff --git a/pkg/bpf/BUILD b/pkg/bpf/BUILD index fba5643e8..2a6977f85 100644 --- a/pkg/bpf/BUILD +++ b/pkg/bpf/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -12,7 +11,6 @@ go_library( "interpreter.go", "program_builder.go", ], - importpath = "gvisor.dev/gvisor/pkg/bpf", visibility = ["//visibility:public"], deps = ["//pkg/abi/linux"], ) @@ -25,7 +23,7 @@ go_test( "interpreter_test.go", "program_builder_test.go", ], - embed = [":bpf"], + library = ":bpf", deps = [ "//pkg/abi/linux", "//pkg/binary", diff --git a/pkg/compressio/BUILD b/pkg/compressio/BUILD index 2bb581b18..1f75319a7 100644 --- a/pkg/compressio/BUILD +++ b/pkg/compressio/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "compressio", srcs = ["compressio.go"], - importpath = "gvisor.dev/gvisor/pkg/compressio", visibility = ["//:sandbox"], deps = [ "//pkg/binary", @@ -18,5 +16,5 @@ go_test( name = "compressio_test", size = "medium", srcs = ["compressio_test.go"], - embed = [":compressio"], + library = ":compressio", ) diff --git a/pkg/control/client/BUILD b/pkg/control/client/BUILD index 066d7b1a1..1b9e10ee7 100644 --- a/pkg/control/client/BUILD +++ b/pkg/control/client/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -7,7 +7,6 @@ go_library( srcs = [ "client.go", ], - importpath = "gvisor.dev/gvisor/pkg/control/client", visibility = ["//:sandbox"], deps = [ "//pkg/unet", diff --git a/pkg/control/server/BUILD b/pkg/control/server/BUILD index adbd1e3f8..002d2ef44 100644 --- a/pkg/control/server/BUILD +++ b/pkg/control/server/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "server", srcs = ["server.go"], - importpath = "gvisor.dev/gvisor/pkg/control/server", visibility = ["//:sandbox"], deps = [ "//pkg/log", diff --git a/pkg/cpuid/BUILD b/pkg/cpuid/BUILD index ed111fd2a..43a432190 100644 --- a/pkg/cpuid/BUILD +++ b/pkg/cpuid/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -9,7 +8,6 @@ go_library( "cpu_amd64.s", "cpuid.go", ], - importpath = "gvisor.dev/gvisor/pkg/cpuid", visibility = ["//:sandbox"], deps = ["//pkg/log"], ) @@ -18,7 +16,7 @@ go_test( name = "cpuid_test", size = "small", srcs = ["cpuid_test.go"], - embed = [":cpuid"], + library = ":cpuid", ) go_test( @@ -27,6 +25,6 @@ go_test( srcs = [ "cpuid_parse_test.go", ], - embed = [":cpuid"], + library = ":cpuid", tags = ["manual"], ) diff --git a/pkg/eventchannel/BUILD b/pkg/eventchannel/BUILD index 9d68682c7..bee28b68d 100644 --- a/pkg/eventchannel/BUILD +++ b/pkg/eventchannel/BUILD @@ -1,6 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test", "proto_library") package(licenses = ["notice"]) @@ -10,7 +8,6 @@ go_library( "event.go", "rate.go", ], - importpath = "gvisor.dev/gvisor/pkg/eventchannel", visibility = ["//:sandbox"], deps = [ ":eventchannel_go_proto", @@ -24,22 +21,15 @@ go_library( ) proto_library( - name = "eventchannel_proto", + name = "eventchannel", srcs = ["event.proto"], visibility = ["//:sandbox"], ) -go_proto_library( - name = "eventchannel_go_proto", - importpath = "gvisor.dev/gvisor/pkg/eventchannel/eventchannel_go_proto", - proto = ":eventchannel_proto", - visibility = ["//:sandbox"], -) - go_test( name = "eventchannel_test", srcs = ["event_test.go"], - embed = [":eventchannel"], + library = ":eventchannel", deps = [ "//pkg/sync", "@com_github_golang_protobuf//proto:go_default_library", diff --git a/pkg/fd/BUILD b/pkg/fd/BUILD index afa8f7659..872361546 100644 --- a/pkg/fd/BUILD +++ b/pkg/fd/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "fd", srcs = ["fd.go"], - importpath = "gvisor.dev/gvisor/pkg/fd", visibility = ["//visibility:public"], ) @@ -14,5 +12,5 @@ go_test( name = "fd_test", size = "small", srcs = ["fd_test.go"], - embed = [":fd"], + library = ":fd", ) diff --git a/pkg/fdchannel/BUILD b/pkg/fdchannel/BUILD index b0478c672..d9104ef02 100644 --- a/pkg/fdchannel/BUILD +++ b/pkg/fdchannel/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") -package(licenses = ["notice"]) +licenses(["notice"]) go_library( name = "fdchannel", srcs = ["fdchannel_unsafe.go"], - importpath = "gvisor.dev/gvisor/pkg/fdchannel", visibility = ["//visibility:public"], ) @@ -14,6 +12,6 @@ go_test( name = "fdchannel_test", size = "small", srcs = ["fdchannel_test.go"], - embed = [":fdchannel"], + library = ":fdchannel", deps = ["//pkg/sync"], ) diff --git a/pkg/fdnotifier/BUILD b/pkg/fdnotifier/BUILD index 91a202a30..235dcc490 100644 --- a/pkg/fdnotifier/BUILD +++ b/pkg/fdnotifier/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -8,7 +8,6 @@ go_library( "fdnotifier.go", "poll_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/fdnotifier", visibility = ["//:sandbox"], deps = [ "//pkg/sync", diff --git a/pkg/flipcall/BUILD b/pkg/flipcall/BUILD index 85bd83af1..9c5ad500b 100644 --- a/pkg/flipcall/BUILD +++ b/pkg/flipcall/BUILD @@ -1,7 +1,6 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") -package(licenses = ["notice"]) +licenses(["notice"]) go_library( name = "flipcall", @@ -13,7 +12,6 @@ go_library( "io.go", "packet_window_allocator.go", ], - importpath = "gvisor.dev/gvisor/pkg/flipcall", visibility = ["//visibility:public"], deps = [ "//pkg/abi/linux", @@ -30,6 +28,6 @@ go_test( "flipcall_example_test.go", "flipcall_test.go", ], - embed = [":flipcall"], + library = ":flipcall", deps = ["//pkg/sync"], ) diff --git a/pkg/fspath/BUILD b/pkg/fspath/BUILD index ca540363c..ee84471b2 100644 --- a/pkg/fspath/BUILD +++ b/pkg/fspath/BUILD @@ -1,10 +1,8 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") -package( - default_visibility = ["//visibility:public"], - licenses = ["notice"], -) +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) go_library( name = "fspath", @@ -13,7 +11,6 @@ go_library( "builder_unsafe.go", "fspath.go", ], - importpath = "gvisor.dev/gvisor/pkg/fspath", ) go_test( @@ -23,5 +20,5 @@ go_test( "builder_test.go", "fspath_test.go", ], - embed = [":fspath"], + library = ":fspath", ) diff --git a/pkg/gate/BUILD b/pkg/gate/BUILD index f22bd070d..dd3141143 100644 --- a/pkg/gate/BUILD +++ b/pkg/gate/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -8,7 +7,6 @@ go_library( srcs = [ "gate.go", ], - importpath = "gvisor.dev/gvisor/pkg/gate", visibility = ["//visibility:public"], ) diff --git a/pkg/goid/BUILD b/pkg/goid/BUILD index 5d31e5366..ea8d2422c 100644 --- a/pkg/goid/BUILD +++ b/pkg/goid/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -11,7 +10,6 @@ go_library( "goid_race.go", "goid_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/goid", visibility = ["//visibility:public"], ) @@ -22,5 +20,5 @@ go_test( "empty_test.go", "goid_test.go", ], - embed = [":goid"], + library = ":goid", ) diff --git a/pkg/ilist/BUILD b/pkg/ilist/BUILD index 34d2673ef..3f6eb07df 100644 --- a/pkg/ilist/BUILD +++ b/pkg/ilist/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -9,7 +8,6 @@ go_library( srcs = [ "interface_list.go", ], - importpath = "gvisor.dev/gvisor/pkg/ilist", visibility = ["//visibility:public"], ) @@ -41,7 +39,7 @@ go_test( "list_test.go", "test_list.go", ], - embed = [":ilist"], + library = ":ilist", ) go_template( diff --git a/pkg/linewriter/BUILD b/pkg/linewriter/BUILD index bcde6d308..41bf104d0 100644 --- a/pkg/linewriter/BUILD +++ b/pkg/linewriter/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "linewriter", srcs = ["linewriter.go"], - importpath = "gvisor.dev/gvisor/pkg/linewriter", visibility = ["//visibility:public"], deps = ["//pkg/sync"], ) @@ -14,5 +12,5 @@ go_library( go_test( name = "linewriter_test", srcs = ["linewriter_test.go"], - embed = [":linewriter"], + library = ":linewriter", ) diff --git a/pkg/log/BUILD b/pkg/log/BUILD index 0df0f2849..935d06963 100644 --- a/pkg/log/BUILD +++ b/pkg/log/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -12,7 +11,6 @@ go_library( "json_k8s.go", "log.go", ], - importpath = "gvisor.dev/gvisor/pkg/log", visibility = [ "//visibility:public", ], @@ -29,5 +27,5 @@ go_test( "json_test.go", "log_test.go", ], - embed = [":log"], + library = ":log", ) diff --git a/pkg/memutil/BUILD b/pkg/memutil/BUILD index 7b50e2b28..9d07d98b4 100644 --- a/pkg/memutil/BUILD +++ b/pkg/memutil/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "memutil", srcs = ["memutil_unsafe.go"], - importpath = "gvisor.dev/gvisor/pkg/memutil", visibility = ["//visibility:public"], deps = ["@org_golang_x_sys//unix:go_default_library"], ) diff --git a/pkg/metric/BUILD b/pkg/metric/BUILD index 9145f3233..58305009d 100644 --- a/pkg/metric/BUILD +++ b/pkg/metric/BUILD @@ -1,14 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("@rules_cc//cc:defs.bzl", "cc_proto_library") +load("//tools:defs.bzl", "go_library", "go_test", "proto_library") package(licenses = ["notice"]) go_library( name = "metric", srcs = ["metric.go"], - importpath = "gvisor.dev/gvisor/pkg/metric", visibility = ["//:sandbox"], deps = [ ":metric_go_proto", @@ -19,28 +15,15 @@ go_library( ) proto_library( - name = "metric_proto", + name = "metric", srcs = ["metric.proto"], visibility = ["//:sandbox"], ) -cc_proto_library( - name = "metric_cc_proto", - visibility = ["//:sandbox"], - deps = [":metric_proto"], -) - -go_proto_library( - name = "metric_go_proto", - importpath = "gvisor.dev/gvisor/pkg/metric/metric_go_proto", - proto = ":metric_proto", - visibility = ["//:sandbox"], -) - go_test( name = "metric_test", srcs = ["metric_test.go"], - embed = [":metric"], + library = ":metric", deps = [ ":metric_go_proto", "//pkg/eventchannel", diff --git a/pkg/p9/BUILD b/pkg/p9/BUILD index a3e05c96d..4ccc1de86 100644 --- a/pkg/p9/BUILD +++ b/pkg/p9/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package( default_visibility = ["//visibility:public"], @@ -23,7 +22,6 @@ go_library( "transport_flipcall.go", "version.go", ], - importpath = "gvisor.dev/gvisor/pkg/p9", deps = [ "//pkg/fd", "//pkg/fdchannel", @@ -47,7 +45,7 @@ go_test( "transport_test.go", "version_test.go", ], - embed = [":p9"], + library = ":p9", deps = [ "//pkg/fd", "//pkg/unet", diff --git a/pkg/p9/p9test/BUILD b/pkg/p9/p9test/BUILD index f4edd68b2..7ca67cb19 100644 --- a/pkg/p9/p9test/BUILD +++ b/pkg/p9/p9test/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_test") +load("//tools:defs.bzl", "go_binary", "go_library", "go_test") package(licenses = ["notice"]) @@ -64,7 +63,6 @@ go_library( "mocks.go", "p9test.go", ], - importpath = "gvisor.dev/gvisor/pkg/p9/p9test", visibility = ["//:sandbox"], deps = [ "//pkg/fd", @@ -80,7 +78,7 @@ go_test( name = "client_test", size = "medium", srcs = ["client_test.go"], - embed = [":p9test"], + library = ":p9test", deps = [ "//pkg/fd", "//pkg/p9", diff --git a/pkg/procid/BUILD b/pkg/procid/BUILD index b506813f0..aa3e3ac0b 100644 --- a/pkg/procid/BUILD +++ b/pkg/procid/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -10,7 +9,6 @@ go_library( "procid_amd64.s", "procid_arm64.s", ], - importpath = "gvisor.dev/gvisor/pkg/procid", visibility = ["//visibility:public"], ) @@ -20,7 +18,7 @@ go_test( srcs = [ "procid_test.go", ], - embed = [":procid"], + library = ":procid", deps = ["//pkg/sync"], ) @@ -31,6 +29,6 @@ go_test( "procid_net_test.go", "procid_test.go", ], - embed = [":procid"], + library = ":procid", deps = ["//pkg/sync"], ) diff --git a/pkg/rand/BUILD b/pkg/rand/BUILD index 9d5b4859b..80b8ceb02 100644 --- a/pkg/rand/BUILD +++ b/pkg/rand/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -8,7 +8,6 @@ go_library( "rand.go", "rand_linux.go", ], - importpath = "gvisor.dev/gvisor/pkg/rand", visibility = ["//:sandbox"], deps = [ "//pkg/sync", diff --git a/pkg/refs/BUILD b/pkg/refs/BUILD index 974d9af9b..74affc887 100644 --- a/pkg/refs/BUILD +++ b/pkg/refs/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -23,7 +22,6 @@ go_library( "refcounter_state.go", "weak_ref_list.go", ], - importpath = "gvisor.dev/gvisor/pkg/refs", visibility = ["//:sandbox"], deps = [ "//pkg/log", @@ -35,6 +33,6 @@ go_test( name = "refs_test", size = "small", srcs = ["refcounter_test.go"], - embed = [":refs"], + library = ":refs", deps = ["//pkg/sync"], ) diff --git a/pkg/seccomp/BUILD b/pkg/seccomp/BUILD index af94e944d..742c8b79b 100644 --- a/pkg/seccomp/BUILD +++ b/pkg/seccomp/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_embed_data", "go_test") +load("//tools:defs.bzl", "go_binary", "go_embed_data", "go_library", "go_test") package(licenses = ["notice"]) @@ -27,7 +26,6 @@ go_library( "seccomp_rules.go", "seccomp_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/seccomp", visibility = ["//visibility:public"], deps = [ "//pkg/abi/linux", @@ -43,7 +41,7 @@ go_test( "seccomp_test.go", ":victim_data", ], - embed = [":seccomp"], + library = ":seccomp", deps = [ "//pkg/abi/linux", "//pkg/binary", diff --git a/pkg/secio/BUILD b/pkg/secio/BUILD index 22abdc69f..60f63c7a6 100644 --- a/pkg/secio/BUILD +++ b/pkg/secio/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -9,7 +8,6 @@ go_library( "full_reader.go", "secio.go", ], - importpath = "gvisor.dev/gvisor/pkg/secio", visibility = ["//pkg/sentry:internal"], ) @@ -17,5 +15,5 @@ go_test( name = "secio_test", size = "small", srcs = ["secio_test.go"], - embed = [":secio"], + library = ":secio", ) diff --git a/pkg/segment/test/BUILD b/pkg/segment/test/BUILD index a27c35e21..f2d8462d8 100644 --- a/pkg/segment/test/BUILD +++ b/pkg/segment/test/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") package( @@ -38,7 +37,6 @@ go_library( "int_set.go", "set_functions.go", ], - importpath = "gvisor.dev/gvisor/pkg/segment/segment", deps = [ "//pkg/state", ], @@ -48,5 +46,5 @@ go_test( name = "segment_test", size = "small", srcs = ["segment_test.go"], - embed = [":segment"], + library = ":segment", ) diff --git a/pkg/sentry/BUILD b/pkg/sentry/BUILD index 2d6379c86..e8b794179 100644 --- a/pkg/sentry/BUILD +++ b/pkg/sentry/BUILD @@ -6,6 +6,8 @@ package(licenses = ["notice"]) package_group( name = "internal", packages = [ + "//cloud/gvisor/gopkg/sentry/...", + "//cloud/gvisor/sentry/...", "//pkg/sentry/...", "//runsc/...", # Code generated by go_marshal relies on go_marshal libraries. diff --git a/pkg/sentry/arch/BUILD b/pkg/sentry/arch/BUILD index 65f22af2b..51ca09b24 100644 --- a/pkg/sentry/arch/BUILD +++ b/pkg/sentry/arch/BUILD @@ -1,6 +1,4 @@ -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") -load("@rules_cc//cc:defs.bzl", "cc_proto_library") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "proto_library") package(licenses = ["notice"]) @@ -27,7 +25,6 @@ go_library( "syscalls_amd64.go", "syscalls_arm64.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/arch", visibility = ["//:sandbox"], deps = [ ":registers_go_proto", @@ -44,20 +41,7 @@ go_library( ) proto_library( - name = "registers_proto", + name = "registers", srcs = ["registers.proto"], visibility = ["//visibility:public"], ) - -cc_proto_library( - name = "registers_cc_proto", - visibility = ["//visibility:public"], - deps = [":registers_proto"], -) - -go_proto_library( - name = "registers_go_proto", - importpath = "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto", - proto = ":registers_proto", - visibility = ["//visibility:public"], -) diff --git a/pkg/sentry/context/BUILD b/pkg/sentry/context/BUILD index 8dc1a77b1..e13a9ce20 100644 --- a/pkg/sentry/context/BUILD +++ b/pkg/sentry/context/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "context", srcs = ["context.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/context", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/amutex", diff --git a/pkg/sentry/context/contexttest/BUILD b/pkg/sentry/context/contexttest/BUILD index 581e7aa96..f91a6d4ed 100644 --- a/pkg/sentry/context/contexttest/BUILD +++ b/pkg/sentry/context/contexttest/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -6,7 +6,6 @@ go_library( name = "contexttest", testonly = 1, srcs = ["contexttest.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/context/contexttest", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/memutil", diff --git a/pkg/sentry/control/BUILD b/pkg/sentry/control/BUILD index 2561a6109..e69496477 100644 --- a/pkg/sentry/control/BUILD +++ b/pkg/sentry/control/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -12,9 +11,8 @@ go_library( "proc.go", "state.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/control", visibility = [ - "//pkg/sentry:internal", + "//:sandbox", ], deps = [ "//pkg/abi/linux", @@ -40,7 +38,7 @@ go_test( name = "control_test", size = "small", srcs = ["proc_test.go"], - embed = [":control"], + library = ":control", deps = [ "//pkg/log", "//pkg/sentry/kernel/time", diff --git a/pkg/sentry/device/BUILD b/pkg/sentry/device/BUILD index 97fa1512c..e403cbd8b 100644 --- a/pkg/sentry/device/BUILD +++ b/pkg/sentry/device/BUILD @@ -1,12 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "device", srcs = ["device.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/device", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -18,5 +16,5 @@ go_test( name = "device_test", size = "small", srcs = ["device_test.go"], - embed = [":device"], + library = ":device", ) diff --git a/pkg/sentry/fs/BUILD b/pkg/sentry/fs/BUILD index 7d5d72d5a..605d61dbe 100644 --- a/pkg/sentry/fs/BUILD +++ b/pkg/sentry/fs/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -44,7 +43,6 @@ go_library( "splice.go", "sync.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -129,7 +127,7 @@ go_test( "mount_test.go", "path_test.go", ], - embed = [":fs"], + library = ":fs", deps = [ "//pkg/sentry/context", "//pkg/sentry/context/contexttest", diff --git a/pkg/sentry/fs/anon/BUILD b/pkg/sentry/fs/anon/BUILD index ae1c9cf76..c14e5405e 100644 --- a/pkg/sentry/fs/anon/BUILD +++ b/pkg/sentry/fs/anon/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -8,7 +8,6 @@ go_library( "anon.go", "device.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/anon", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/fs/dev/BUILD b/pkg/sentry/fs/dev/BUILD index a0d9e8496..0c7247bd7 100644 --- a/pkg/sentry/fs/dev/BUILD +++ b/pkg/sentry/fs/dev/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -13,7 +13,6 @@ go_library( "random.go", "tty.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/dev", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/fs/fdpipe/BUILD b/pkg/sentry/fs/fdpipe/BUILD index cc43de69d..25ef96299 100644 --- a/pkg/sentry/fs/fdpipe/BUILD +++ b/pkg/sentry/fs/fdpipe/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -10,7 +9,6 @@ go_library( "pipe_opener.go", "pipe_state.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/fdpipe", imports = ["gvisor.dev/gvisor/pkg/sentry/fs"], visibility = ["//pkg/sentry:internal"], deps = [ @@ -36,7 +34,7 @@ go_test( "pipe_opener_test.go", "pipe_test.go", ], - embed = [":fdpipe"], + library = ":fdpipe", deps = [ "//pkg/fd", "//pkg/fdnotifier", diff --git a/pkg/sentry/fs/filetest/BUILD b/pkg/sentry/fs/filetest/BUILD index 358dc2be3..9a7608cae 100644 --- a/pkg/sentry/fs/filetest/BUILD +++ b/pkg/sentry/fs/filetest/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -6,7 +6,6 @@ go_library( name = "filetest", testonly = 1, srcs = ["filetest.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/filetest", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/sentry/context", diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD index 945b6270d..9142f5bdf 100644 --- a/pkg/sentry/fs/fsutil/BUILD +++ b/pkg/sentry/fs/fsutil/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -75,7 +74,6 @@ go_library( "inode.go", "inode_cached.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/fsutil", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -106,7 +104,7 @@ go_test( "dirty_set_test.go", "inode_cached_test.go", ], - embed = [":fsutil"], + library = ":fsutil", deps = [ "//pkg/sentry/context", "//pkg/sentry/context/contexttest", diff --git a/pkg/sentry/fs/gofer/BUILD b/pkg/sentry/fs/gofer/BUILD index fd870e8e1..cf48e7c03 100644 --- a/pkg/sentry/fs/gofer/BUILD +++ b/pkg/sentry/fs/gofer/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -22,7 +21,6 @@ go_library( "socket.go", "util.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/gofer", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -56,7 +54,7 @@ go_test( name = "gofer_test", size = "small", srcs = ["gofer_test.go"], - embed = [":gofer"], + library = ":gofer", deps = [ "//pkg/p9", "//pkg/p9/p9test", diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD index 2b581aa69..f586f47c1 100644 --- a/pkg/sentry/fs/host/BUILD +++ b/pkg/sentry/fs/host/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -25,7 +24,6 @@ go_library( "util_arm64_unsafe.go", "util_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/host", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -69,7 +67,7 @@ go_test( "socket_test.go", "wait_test.go", ], - embed = [":host"], + library = ":host", deps = [ "//pkg/fd", "//pkg/fdnotifier", diff --git a/pkg/sentry/fs/lock/BUILD b/pkg/sentry/fs/lock/BUILD index 2c332a82a..ae3331737 100644 --- a/pkg/sentry/fs/lock/BUILD +++ b/pkg/sentry/fs/lock/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -40,7 +39,6 @@ go_library( "lock_set.go", "lock_set_functions.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/lock", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/log", @@ -56,5 +54,5 @@ go_test( "lock_range_test.go", "lock_test.go", ], - embed = [":lock"], + library = ":lock", ) diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD index cb37c6c6b..b06bead41 100644 --- a/pkg/sentry/fs/proc/BUILD +++ b/pkg/sentry/fs/proc/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -27,7 +26,6 @@ go_library( "uptime.go", "version.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/proc", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -63,7 +61,7 @@ go_test( "net_test.go", "sys_net_test.go", ], - embed = [":proc"], + library = ":proc", deps = [ "//pkg/abi/linux", "//pkg/sentry/context", diff --git a/pkg/sentry/fs/proc/device/BUILD b/pkg/sentry/fs/proc/device/BUILD index 0394451d4..52c9aa93d 100644 --- a/pkg/sentry/fs/proc/device/BUILD +++ b/pkg/sentry/fs/proc/device/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "device", srcs = ["device.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/proc/device", visibility = ["//pkg/sentry:internal"], deps = ["//pkg/sentry/device"], ) diff --git a/pkg/sentry/fs/proc/seqfile/BUILD b/pkg/sentry/fs/proc/seqfile/BUILD index 38b246dff..310d8dd52 100644 --- a/pkg/sentry/fs/proc/seqfile/BUILD +++ b/pkg/sentry/fs/proc/seqfile/BUILD @@ -1,12 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "seqfile", srcs = ["seqfile.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -26,7 +24,7 @@ go_test( name = "seqfile_test", size = "small", srcs = ["seqfile_test.go"], - embed = [":seqfile"], + library = ":seqfile", deps = [ "//pkg/sentry/context", "//pkg/sentry/context/contexttest", diff --git a/pkg/sentry/fs/ramfs/BUILD b/pkg/sentry/fs/ramfs/BUILD index 3fb7b0633..39c4b84f8 100644 --- a/pkg/sentry/fs/ramfs/BUILD +++ b/pkg/sentry/fs/ramfs/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -11,7 +10,6 @@ go_library( "symlink.go", "tree.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/ramfs", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -31,7 +29,7 @@ go_test( name = "ramfs_test", size = "small", srcs = ["tree_test.go"], - embed = [":ramfs"], + library = ":ramfs", deps = [ "//pkg/sentry/context/contexttest", "//pkg/sentry/fs", diff --git a/pkg/sentry/fs/sys/BUILD b/pkg/sentry/fs/sys/BUILD index 25f0f124e..cc6b3bfbf 100644 --- a/pkg/sentry/fs/sys/BUILD +++ b/pkg/sentry/fs/sys/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -10,7 +10,6 @@ go_library( "fs.go", "sys.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/sys", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/fs/timerfd/BUILD b/pkg/sentry/fs/timerfd/BUILD index a215c1b95..092668e8d 100644 --- a/pkg/sentry/fs/timerfd/BUILD +++ b/pkg/sentry/fs/timerfd/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "timerfd", srcs = ["timerfd.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/timerfd", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/sentry/context", diff --git a/pkg/sentry/fs/tmpfs/BUILD b/pkg/sentry/fs/tmpfs/BUILD index 3400b940c..04776555f 100644 --- a/pkg/sentry/fs/tmpfs/BUILD +++ b/pkg/sentry/fs/tmpfs/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -12,7 +11,6 @@ go_library( "inode_file.go", "tmpfs.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -41,7 +39,7 @@ go_test( name = "tmpfs_test", size = "small", srcs = ["file_test.go"], - embed = [":tmpfs"], + library = ":tmpfs", deps = [ "//pkg/sentry/context", "//pkg/sentry/fs", diff --git a/pkg/sentry/fs/tty/BUILD b/pkg/sentry/fs/tty/BUILD index f6f60d0cf..29f804c6c 100644 --- a/pkg/sentry/fs/tty/BUILD +++ b/pkg/sentry/fs/tty/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -14,7 +13,6 @@ go_library( "slave.go", "terminal.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/tty", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -40,7 +38,7 @@ go_test( name = "tty_test", size = "small", srcs = ["tty_test.go"], - embed = [":tty"], + library = ":tty", deps = [ "//pkg/abi/linux", "//pkg/sentry/context/contexttest", diff --git a/pkg/sentry/fsimpl/ext/BUILD b/pkg/sentry/fsimpl/ext/BUILD index 903874141..a718920d5 100644 --- a/pkg/sentry/fsimpl/ext/BUILD +++ b/pkg/sentry/fsimpl/ext/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") package(licenses = ["notice"]) @@ -32,7 +31,6 @@ go_library( "symlink.go", "utils.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -71,7 +69,7 @@ go_test( "//pkg/sentry/fsimpl/ext:assets/tiny.ext3", "//pkg/sentry/fsimpl/ext:assets/tiny.ext4", ], - embed = [":ext"], + library = ":ext", deps = [ "//pkg/abi/linux", "//pkg/binary", diff --git a/pkg/sentry/fsimpl/ext/benchmark/BUILD b/pkg/sentry/fsimpl/ext/benchmark/BUILD index 4fc8296ef..12f3990c1 100644 --- a/pkg/sentry/fsimpl/ext/benchmark/BUILD +++ b/pkg/sentry/fsimpl/ext/benchmark/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_test") package(licenses = ["notice"]) diff --git a/pkg/sentry/fsimpl/ext/disklayout/BUILD b/pkg/sentry/fsimpl/ext/disklayout/BUILD index fcfaf5c3e..9bd9c76c0 100644 --- a/pkg/sentry/fsimpl/ext/disklayout/BUILD +++ b/pkg/sentry/fsimpl/ext/disklayout/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -23,7 +22,6 @@ go_library( "superblock_old.go", "test_utils.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -44,6 +42,6 @@ go_test( "inode_test.go", "superblock_test.go", ], - embed = [":disklayout"], + library = ":disklayout", deps = ["//pkg/sentry/kernel/time"], ) diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD index 66d409785..7bf83ccba 100644 --- a/pkg/sentry/fsimpl/kernfs/BUILD +++ b/pkg/sentry/fsimpl/kernfs/BUILD @@ -1,8 +1,7 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -package(licenses = ["notice"]) +licenses(["notice"]) go_template_instance( name = "slot_list", @@ -27,7 +26,6 @@ go_library( "slot_list.go", "symlink.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD index c5b79fb38..3768f55b2 100644 --- a/pkg/sentry/fsimpl/proc/BUILD +++ b/pkg/sentry/fsimpl/proc/BUILD @@ -1,7 +1,6 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") -package(licenses = ["notice"]) +licenses(["notice"]) go_library( name = "proc", @@ -15,7 +14,6 @@ go_library( "tasks_net.go", "tasks_sys.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc", deps = [ "//pkg/abi/linux", "//pkg/log", @@ -47,7 +45,7 @@ go_test( "tasks_sys_test.go", "tasks_test.go", ], - embed = [":proc"], + library = ":proc", deps = [ "//pkg/abi/linux", "//pkg/fspath", diff --git a/pkg/sentry/fsimpl/sys/BUILD b/pkg/sentry/fsimpl/sys/BUILD index ee3c842bd..beda141f1 100644 --- a/pkg/sentry/fsimpl/sys/BUILD +++ b/pkg/sentry/fsimpl/sys/BUILD @@ -1,14 +1,12 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") -package(licenses = ["notice"]) +licenses(["notice"]) go_library( name = "sys", srcs = [ "sys.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys", deps = [ "//pkg/abi/linux", "//pkg/sentry/context", diff --git a/pkg/sentry/fsimpl/testutil/BUILD b/pkg/sentry/fsimpl/testutil/BUILD index 4e70d84a7..12053a5b6 100644 --- a/pkg/sentry/fsimpl/testutil/BUILD +++ b/pkg/sentry/fsimpl/testutil/BUILD @@ -1,6 +1,6 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") -package(licenses = ["notice"]) +licenses(["notice"]) go_library( name = "testutil", @@ -9,7 +9,6 @@ go_library( "kernel.go", "testutil.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD index 691476b4f..857e98bc5 100644 --- a/pkg/sentry/fsimpl/tmpfs/BUILD +++ b/pkg/sentry/fsimpl/tmpfs/BUILD @@ -1,8 +1,7 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -package(licenses = ["notice"]) +licenses(["notice"]) go_template_instance( name = "dentry_list", @@ -28,7 +27,6 @@ go_library( "symlink.go", "tmpfs.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs", deps = [ "//pkg/abi/linux", "//pkg/amutex", @@ -81,7 +79,7 @@ go_test( "regular_file_test.go", "stat_test.go", ], - embed = [":tmpfs"], + library = ":tmpfs", deps = [ "//pkg/abi/linux", "//pkg/fspath", diff --git a/pkg/sentry/hostcpu/BUILD b/pkg/sentry/hostcpu/BUILD index 359468ccc..e6933aa70 100644 --- a/pkg/sentry/hostcpu/BUILD +++ b/pkg/sentry/hostcpu/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -10,7 +9,6 @@ go_library( "getcpu_arm64.s", "hostcpu.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/hostcpu", visibility = ["//:sandbox"], ) @@ -18,5 +16,5 @@ go_test( name = "hostcpu_test", size = "small", srcs = ["hostcpu_test.go"], - embed = [":hostcpu"], + library = ":hostcpu", ) diff --git a/pkg/sentry/hostmm/BUILD b/pkg/sentry/hostmm/BUILD index 67831d5a1..a145a5ca3 100644 --- a/pkg/sentry/hostmm/BUILD +++ b/pkg/sentry/hostmm/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -8,7 +8,6 @@ go_library( "cgroup.go", "hostmm.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/hostmm", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/fd", diff --git a/pkg/sentry/inet/BUILD b/pkg/sentry/inet/BUILD index 8d60ad4ad..aa621b724 100644 --- a/pkg/sentry/inet/BUILD +++ b/pkg/sentry/inet/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package( default_visibility = ["//:sandbox"], @@ -12,7 +12,6 @@ go_library( "inet.go", "test_stack.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/inet", deps = [ "//pkg/sentry/context", "//pkg/tcpip/stack", diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index ac85ba0c8..cebaccd92 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -1,8 +1,5 @@ -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("@rules_cc//cc:defs.bzl", "cc_proto_library") +load("//tools:defs.bzl", "go_library", "go_test", "proto_library") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -78,26 +75,12 @@ go_template_instance( ) proto_library( - name = "uncaught_signal_proto", + name = "uncaught_signal", srcs = ["uncaught_signal.proto"], visibility = ["//visibility:public"], deps = ["//pkg/sentry/arch:registers_proto"], ) -cc_proto_library( - name = "uncaught_signal_cc_proto", - visibility = ["//visibility:public"], - deps = [":uncaught_signal_proto"], -) - -go_proto_library( - name = "uncaught_signal_go_proto", - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto", - proto = ":uncaught_signal_proto", - visibility = ["//visibility:public"], - deps = ["//pkg/sentry/arch:registers_go_proto"], -) - go_library( name = "kernel", srcs = [ @@ -156,7 +139,6 @@ go_library( "vdso.go", "version.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel", imports = [ "gvisor.dev/gvisor/pkg/bpf", "gvisor.dev/gvisor/pkg/sentry/device", @@ -227,7 +209,7 @@ go_test( "task_test.go", "timekeeper_test.go", ], - embed = [":kernel"], + library = ":kernel", deps = [ "//pkg/abi", "//pkg/sentry/arch", diff --git a/pkg/sentry/kernel/auth/BUILD b/pkg/sentry/kernel/auth/BUILD index 1aa72fa47..64537c9be 100644 --- a/pkg/sentry/kernel/auth/BUILD +++ b/pkg/sentry/kernel/auth/BUILD @@ -1,5 +1,5 @@ +load("//tools:defs.bzl", "go_library") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -57,7 +57,6 @@ go_library( "id_map_set.go", "user_namespace.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/auth", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/kernel/contexttest/BUILD b/pkg/sentry/kernel/contexttest/BUILD index 3a88a585c..daff608d7 100644 --- a/pkg/sentry/kernel/contexttest/BUILD +++ b/pkg/sentry/kernel/contexttest/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -6,7 +6,6 @@ go_library( name = "contexttest", testonly = 1, srcs = ["contexttest.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/sentry/context", diff --git a/pkg/sentry/kernel/epoll/BUILD b/pkg/sentry/kernel/epoll/BUILD index c47f6b6fc..19e16ab3a 100644 --- a/pkg/sentry/kernel/epoll/BUILD +++ b/pkg/sentry/kernel/epoll/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -23,7 +22,6 @@ go_library( "epoll_list.go", "epoll_state.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/epoll", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/refs", @@ -43,7 +41,7 @@ go_test( srcs = [ "epoll_test.go", ], - embed = [":epoll"], + library = ":epoll", deps = [ "//pkg/sentry/context/contexttest", "//pkg/sentry/fs/filetest", diff --git a/pkg/sentry/kernel/eventfd/BUILD b/pkg/sentry/kernel/eventfd/BUILD index c831fbab2..ee2d74864 100644 --- a/pkg/sentry/kernel/eventfd/BUILD +++ b/pkg/sentry/kernel/eventfd/BUILD @@ -1,12 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "eventfd", srcs = ["eventfd.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/eventfd", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -26,7 +24,7 @@ go_test( name = "eventfd_test", size = "small", srcs = ["eventfd_test.go"], - embed = [":eventfd"], + library = ":eventfd", deps = [ "//pkg/sentry/context/contexttest", "//pkg/sentry/usermem", diff --git a/pkg/sentry/kernel/fasync/BUILD b/pkg/sentry/kernel/fasync/BUILD index 6b36bc63e..b9126e946 100644 --- a/pkg/sentry/kernel/fasync/BUILD +++ b/pkg/sentry/kernel/fasync/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "fasync", srcs = ["fasync.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/fasync", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/kernel/futex/BUILD b/pkg/sentry/kernel/futex/BUILD index 50db443ce..f413d8ae2 100644 --- a/pkg/sentry/kernel/futex/BUILD +++ b/pkg/sentry/kernel/futex/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -34,7 +33,6 @@ go_library( "futex.go", "waiter_list.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/futex", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -51,7 +49,7 @@ go_test( name = "futex_test", size = "small", srcs = ["futex_test.go"], - embed = [":futex"], + library = ":futex", deps = [ "//pkg/sentry/usermem", "//pkg/sync", diff --git a/pkg/sentry/kernel/memevent/BUILD b/pkg/sentry/kernel/memevent/BUILD index 7f36252a9..4486848d2 100644 --- a/pkg/sentry/kernel/memevent/BUILD +++ b/pkg/sentry/kernel/memevent/BUILD @@ -1,13 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") -load("@rules_cc//cc:defs.bzl", "cc_proto_library") +load("//tools:defs.bzl", "go_library", "proto_library") package(licenses = ["notice"]) go_library( name = "memevent", srcs = ["memory_events.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/memevent", visibility = ["//:sandbox"], deps = [ ":memory_events_go_proto", @@ -21,20 +18,7 @@ go_library( ) proto_library( - name = "memory_events_proto", + name = "memory_events", srcs = ["memory_events.proto"], visibility = ["//visibility:public"], ) - -cc_proto_library( - name = "memory_events_cc_proto", - visibility = ["//visibility:public"], - deps = [":memory_events_proto"], -) - -go_proto_library( - name = "memory_events_go_proto", - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/memevent/memory_events_go_proto", - proto = ":memory_events_proto", - visibility = ["//visibility:public"], -) diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD index 5eeaeff66..2c7b6206f 100644 --- a/pkg/sentry/kernel/pipe/BUILD +++ b/pkg/sentry/kernel/pipe/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -30,7 +29,6 @@ go_library( "vfs.go", "writer.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/pipe", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -57,7 +55,7 @@ go_test( "node_test.go", "pipe_test.go", ], - embed = [":pipe"], + library = ":pipe", deps = [ "//pkg/sentry/context", "//pkg/sentry/context/contexttest", diff --git a/pkg/sentry/kernel/sched/BUILD b/pkg/sentry/kernel/sched/BUILD index 98ea7a0d8..1b82e087b 100644 --- a/pkg/sentry/kernel/sched/BUILD +++ b/pkg/sentry/kernel/sched/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -9,7 +8,6 @@ go_library( "cpuset.go", "sched.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/sched", visibility = ["//pkg/sentry:internal"], ) @@ -17,5 +15,5 @@ go_test( name = "sched_test", size = "small", srcs = ["cpuset_test.go"], - embed = [":sched"], + library = ":sched", ) diff --git a/pkg/sentry/kernel/semaphore/BUILD b/pkg/sentry/kernel/semaphore/BUILD index 13a961594..76e19b551 100644 --- a/pkg/sentry/kernel/semaphore/BUILD +++ b/pkg/sentry/kernel/semaphore/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -22,7 +21,6 @@ go_library( "semaphore.go", "waiter_list.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/semaphore", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -40,7 +38,7 @@ go_test( name = "semaphore_test", size = "small", srcs = ["semaphore_test.go"], - embed = [":semaphore"], + library = ":semaphore", deps = [ "//pkg/abi/linux", "//pkg/sentry/context", diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD index 7321b22ed..5547c5abf 100644 --- a/pkg/sentry/kernel/shm/BUILD +++ b/pkg/sentry/kernel/shm/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -8,7 +8,6 @@ go_library( "device.go", "shm.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/shm", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/kernel/signalfd/BUILD b/pkg/sentry/kernel/signalfd/BUILD index 89e4d84b1..5d44773d4 100644 --- a/pkg/sentry/kernel/signalfd/BUILD +++ b/pkg/sentry/kernel/signalfd/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") -package(licenses = ["notice"]) +licenses(["notice"]) go_library( name = "signalfd", srcs = ["signalfd.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/signalfd", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/kernel/time/BUILD b/pkg/sentry/kernel/time/BUILD index 4e4de0512..d49594d9f 100644 --- a/pkg/sentry/kernel/time/BUILD +++ b/pkg/sentry/kernel/time/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -8,7 +8,6 @@ go_library( "context.go", "time.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/time", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/limits/BUILD b/pkg/sentry/limits/BUILD index 9fa841e8b..67869757f 100644 --- a/pkg/sentry/limits/BUILD +++ b/pkg/sentry/limits/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -10,7 +9,6 @@ go_library( "limits.go", "linux.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/limits", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", @@ -25,5 +23,5 @@ go_test( srcs = [ "limits_test.go", ], - embed = [":limits"], + library = ":limits", ) diff --git a/pkg/sentry/loader/BUILD b/pkg/sentry/loader/BUILD index 2890393bd..d4ad2bd6c 100644 --- a/pkg/sentry/loader/BUILD +++ b/pkg/sentry/loader/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_embed_data") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_embed_data", "go_library") package(licenses = ["notice"]) @@ -20,7 +19,6 @@ go_library( "vdso_state.go", ":vdso_bin", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/loader", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi", diff --git a/pkg/sentry/memmap/BUILD b/pkg/sentry/memmap/BUILD index 112794e9c..f9a65f086 100644 --- a/pkg/sentry/memmap/BUILD +++ b/pkg/sentry/memmap/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -37,7 +36,6 @@ go_library( "mapping_set_impl.go", "memmap.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/memmap", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/log", @@ -52,6 +50,6 @@ go_test( name = "memmap_test", size = "small", srcs = ["mapping_set_test.go"], - embed = [":memmap"], + library = ":memmap", deps = ["//pkg/sentry/usermem"], ) diff --git a/pkg/sentry/mm/BUILD b/pkg/sentry/mm/BUILD index 83e248431..bd6399fa2 100644 --- a/pkg/sentry/mm/BUILD +++ b/pkg/sentry/mm/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -96,7 +95,6 @@ go_library( "vma.go", "vma_set.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/mm", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -128,7 +126,7 @@ go_test( name = "mm_test", size = "small", srcs = ["mm_test.go"], - embed = [":mm"], + library = ":mm", deps = [ "//pkg/sentry/arch", "//pkg/sentry/context", diff --git a/pkg/sentry/pgalloc/BUILD b/pkg/sentry/pgalloc/BUILD index a9a2642c5..02385a3ce 100644 --- a/pkg/sentry/pgalloc/BUILD +++ b/pkg/sentry/pgalloc/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -60,7 +59,6 @@ go_library( "save_restore.go", "usage_set.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/pgalloc", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/log", @@ -82,6 +80,6 @@ go_test( name = "pgalloc_test", size = "small", srcs = ["pgalloc_test.go"], - embed = [":pgalloc"], + library = ":pgalloc", deps = ["//pkg/sentry/usermem"], ) diff --git a/pkg/sentry/platform/BUILD b/pkg/sentry/platform/BUILD index 157bffa81..006450b2d 100644 --- a/pkg/sentry/platform/BUILD +++ b/pkg/sentry/platform/BUILD @@ -1,5 +1,5 @@ +load("//tools:defs.bzl", "go_library") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -22,7 +22,6 @@ go_library( "mmap_min_addr.go", "platform.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/platform/interrupt/BUILD b/pkg/sentry/platform/interrupt/BUILD index 85e882df9..83b385f14 100644 --- a/pkg/sentry/platform/interrupt/BUILD +++ b/pkg/sentry/platform/interrupt/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -8,7 +7,6 @@ go_library( srcs = [ "interrupt.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform/interrupt", visibility = ["//pkg/sentry:internal"], deps = ["//pkg/sync"], ) @@ -17,5 +15,5 @@ go_test( name = "interrupt_test", size = "small", srcs = ["interrupt_test.go"], - embed = [":interrupt"], + library = ":interrupt", ) diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD index 6a358d1d4..a4532a766 100644 --- a/pkg/sentry/platform/kvm/BUILD +++ b/pkg/sentry/platform/kvm/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -38,7 +37,6 @@ go_library( "physical_map_arm64.go", "virtual_map.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform/kvm", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -65,7 +63,7 @@ go_test( "kvm_test.go", "virtual_map_test.go", ], - embed = [":kvm"], + library = ":kvm", tags = [ "manual", "nogotsan", diff --git a/pkg/sentry/platform/kvm/testutil/BUILD b/pkg/sentry/platform/kvm/testutil/BUILD index b0e45f159..f7605df8a 100644 --- a/pkg/sentry/platform/kvm/testutil/BUILD +++ b/pkg/sentry/platform/kvm/testutil/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -12,6 +12,5 @@ go_library( "testutil_arm64.go", "testutil_arm64.s", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil", visibility = ["//pkg/sentry/platform/kvm:__pkg__"], ) diff --git a/pkg/sentry/platform/ptrace/BUILD b/pkg/sentry/platform/ptrace/BUILD index cd13390c3..3bcc5e040 100644 --- a/pkg/sentry/platform/ptrace/BUILD +++ b/pkg/sentry/platform/ptrace/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -20,7 +20,6 @@ go_library( "subprocess_linux_unsafe.go", "subprocess_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform/ptrace", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/platform/ring0/BUILD b/pkg/sentry/platform/ring0/BUILD index 87f4552b5..6dee8fcc5 100644 --- a/pkg/sentry/platform/ring0/BUILD +++ b/pkg/sentry/platform/ring0/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") load("//tools/go_generics:defs.bzl", "go_template", "go_template_instance") package(licenses = ["notice"]) @@ -74,7 +74,6 @@ go_library( "lib_arm64.s", "ring0.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform/ring0", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/cpuid", diff --git a/pkg/sentry/platform/ring0/gen_offsets/BUILD b/pkg/sentry/platform/ring0/gen_offsets/BUILD index 42076fb04..147311ed3 100644 --- a/pkg/sentry/platform/ring0/gen_offsets/BUILD +++ b/pkg/sentry/platform/ring0/gen_offsets/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") +load("//tools:defs.bzl", "go_binary") load("//tools/go_generics:defs.bzl", "go_template_instance") package(licenses = ["notice"]) diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/sentry/platform/ring0/pagetables/BUILD index 387a7f6c3..8b5cdd6c1 100644 --- a/pkg/sentry/platform/ring0/pagetables/BUILD +++ b/pkg/sentry/platform/ring0/pagetables/BUILD @@ -1,17 +1,14 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test", "select_arch") load("//tools/go_generics:defs.bzl", "go_template", "go_template_instance") package(licenses = ["notice"]) -config_setting( - name = "aarch64", - constraint_values = ["@bazel_tools//platforms:aarch64"], -) - go_template( name = "generic_walker", - srcs = ["walker_amd64.go"], + srcs = select_arch( + amd64 = ["walker_amd64.go"], + arm64 = ["walker_amd64.go"], + ), opt_types = [ "Visitor", ], @@ -91,7 +88,6 @@ go_library( "walker_map.go", "walker_unmap.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables", visibility = [ "//pkg/sentry/platform/kvm:__subpackages__", "//pkg/sentry/platform/ring0:__subpackages__", @@ -111,6 +107,6 @@ go_test( "pagetables_test.go", "walker_check.go", ], - embed = [":pagetables"], + library = ":pagetables", deps = ["//pkg/sentry/usermem"], ) diff --git a/pkg/sentry/platform/safecopy/BUILD b/pkg/sentry/platform/safecopy/BUILD index 6769cd0a5..b8747585b 100644 --- a/pkg/sentry/platform/safecopy/BUILD +++ b/pkg/sentry/platform/safecopy/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -17,7 +16,6 @@ go_library( "sighandler_amd64.s", "sighandler_arm64.s", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform/safecopy", visibility = ["//pkg/sentry:internal"], deps = ["//pkg/syserror"], ) @@ -27,5 +25,5 @@ go_test( srcs = [ "safecopy_test.go", ], - embed = [":safecopy"], + library = ":safecopy", ) diff --git a/pkg/sentry/safemem/BUILD b/pkg/sentry/safemem/BUILD index 884020f7b..3ab76da97 100644 --- a/pkg/sentry/safemem/BUILD +++ b/pkg/sentry/safemem/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -11,7 +10,6 @@ go_library( "safemem.go", "seq_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/safemem", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/sentry/platform/safecopy", @@ -25,5 +23,5 @@ go_test( "io_test.go", "seq_test.go", ], - embed = [":safemem"], + library = ":safemem", ) diff --git a/pkg/sentry/sighandling/BUILD b/pkg/sentry/sighandling/BUILD index f561670c7..6c38a3f44 100644 --- a/pkg/sentry/sighandling/BUILD +++ b/pkg/sentry/sighandling/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -8,7 +8,6 @@ go_library( "sighandling.go", "sighandling_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/sighandling", visibility = ["//pkg/sentry:internal"], deps = ["//pkg/abi/linux"], ) diff --git a/pkg/sentry/socket/BUILD b/pkg/sentry/socket/BUILD index 26176b10d..8e2b97afb 100644 --- a/pkg/sentry/socket/BUILD +++ b/pkg/sentry/socket/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "socket", srcs = ["socket.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/socket/control/BUILD b/pkg/sentry/socket/control/BUILD index 357517ed4..3850f6345 100644 --- a/pkg/sentry/socket/control/BUILD +++ b/pkg/sentry/socket/control/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "control", srcs = ["control.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/control", imports = [ "gvisor.dev/gvisor/pkg/sentry/fs", ], diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD index 4c44c7c0f..42bf7be6a 100644 --- a/pkg/sentry/socket/hostinet/BUILD +++ b/pkg/sentry/socket/hostinet/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -12,7 +12,6 @@ go_library( "socket_unsafe.go", "stack.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/hostinet", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/socket/netfilter/BUILD b/pkg/sentry/socket/netfilter/BUILD index b70047d81..ed34a8308 100644 --- a/pkg/sentry/socket/netfilter/BUILD +++ b/pkg/sentry/socket/netfilter/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -7,7 +7,6 @@ go_library( srcs = [ "netfilter.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netfilter", # This target depends on netstack and should only be used by epsocket, # which is allowed to depend on netstack. visibility = ["//pkg/sentry:internal"], diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD index 103933144..baaac13c6 100644 --- a/pkg/sentry/socket/netlink/BUILD +++ b/pkg/sentry/socket/netlink/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -9,7 +9,6 @@ go_library( "provider.go", "socket.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/socket/netlink/port/BUILD b/pkg/sentry/socket/netlink/port/BUILD index 2d9f4ba9b..3a22923d8 100644 --- a/pkg/sentry/socket/netlink/port/BUILD +++ b/pkg/sentry/socket/netlink/port/BUILD @@ -1,12 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "port", srcs = ["port.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink/port", visibility = ["//pkg/sentry:internal"], deps = ["//pkg/sync"], ) @@ -14,5 +12,5 @@ go_library( go_test( name = "port_test", srcs = ["port_test.go"], - embed = [":port"], + library = ":port", ) diff --git a/pkg/sentry/socket/netlink/route/BUILD b/pkg/sentry/socket/netlink/route/BUILD index 1d4912753..2137c7aeb 100644 --- a/pkg/sentry/socket/netlink/route/BUILD +++ b/pkg/sentry/socket/netlink/route/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "route", srcs = ["protocol.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink/route", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/socket/netlink/uevent/BUILD b/pkg/sentry/socket/netlink/uevent/BUILD index 0777f3baf..73fbdf1eb 100644 --- a/pkg/sentry/socket/netlink/uevent/BUILD +++ b/pkg/sentry/socket/netlink/uevent/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "uevent", srcs = ["protocol.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink/uevent", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD index f78784569..e3d1f90cb 100644 --- a/pkg/sentry/socket/netstack/BUILD +++ b/pkg/sentry/socket/netstack/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -11,7 +11,6 @@ go_library( "save_restore.go", "stack.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netstack", visibility = [ "//pkg/sentry:internal", ], diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD index 5b6a154f6..bade18686 100644 --- a/pkg/sentry/socket/unix/BUILD +++ b/pkg/sentry/socket/unix/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -9,7 +9,6 @@ go_library( "io.go", "unix.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/unix", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/socket/unix/transport/BUILD b/pkg/sentry/socket/unix/transport/BUILD index d7ba95dff..4bdfc9208 100644 --- a/pkg/sentry/socket/unix/transport/BUILD +++ b/pkg/sentry/socket/unix/transport/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") load("//tools/go_generics:defs.bzl", "go_template_instance") package(licenses = ["notice"]) @@ -25,7 +25,6 @@ go_library( "transport_message_list.go", "unix.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/state/BUILD b/pkg/sentry/state/BUILD index 88765f4d6..0ea4aab8b 100644 --- a/pkg/sentry/state/BUILD +++ b/pkg/sentry/state/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -9,7 +9,6 @@ go_library( "state_metadata.go", "state_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/state", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/strace/BUILD b/pkg/sentry/strace/BUILD index aa1ac720c..ff6fafa63 100644 --- a/pkg/sentry/strace/BUILD +++ b/pkg/sentry/strace/BUILD @@ -1,6 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") -load("@rules_cc//cc:defs.bzl", "cc_proto_library") +load("//tools:defs.bzl", "go_library", "proto_library") package(licenses = ["notice"]) @@ -21,7 +19,6 @@ go_library( "strace.go", "syscalls.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/strace", visibility = ["//:sandbox"], deps = [ ":strace_go_proto", @@ -42,20 +39,7 @@ go_library( ) proto_library( - name = "strace_proto", + name = "strace", srcs = ["strace.proto"], visibility = ["//visibility:public"], ) - -cc_proto_library( - name = "strace_cc_proto", - visibility = ["//visibility:public"], - deps = [":strace_proto"], -) - -go_proto_library( - name = "strace_go_proto", - importpath = "gvisor.dev/gvisor/pkg/sentry/strace/strace_go_proto", - proto = ":strace_proto", - visibility = ["//visibility:public"], -) diff --git a/pkg/sentry/syscalls/BUILD b/pkg/sentry/syscalls/BUILD index 79d972202..b8d1bd415 100644 --- a/pkg/sentry/syscalls/BUILD +++ b/pkg/sentry/syscalls/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -8,7 +8,6 @@ go_library( "epoll.go", "syscalls.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/syscalls", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD index 917f74e07..7d74e0f70 100644 --- a/pkg/sentry/syscalls/linux/BUILD +++ b/pkg/sentry/syscalls/linux/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -57,7 +57,6 @@ go_library( "sys_xattr.go", "timespec.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/syscalls/linux", visibility = ["//:sandbox"], deps = [ "//pkg/abi", diff --git a/pkg/sentry/time/BUILD b/pkg/sentry/time/BUILD index 3cde3a0be..04f81a35b 100644 --- a/pkg/sentry/time/BUILD +++ b/pkg/sentry/time/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") package(licenses = ["notice"]) @@ -31,7 +30,6 @@ go_library( "tsc_amd64.s", "tsc_arm64.s", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/time", visibility = ["//:sandbox"], deps = [ "//pkg/log", @@ -48,5 +46,5 @@ go_test( "parameters_test.go", "sampler_test.go", ], - embed = [":time"], + library = ":time", ) diff --git a/pkg/sentry/unimpl/BUILD b/pkg/sentry/unimpl/BUILD index fc7614fff..370fa6ec5 100644 --- a/pkg/sentry/unimpl/BUILD +++ b/pkg/sentry/unimpl/BUILD @@ -1,34 +1,17 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") -load("@rules_cc//cc:defs.bzl", "cc_proto_library") +load("//tools:defs.bzl", "go_library", "proto_library") package(licenses = ["notice"]) proto_library( - name = "unimplemented_syscall_proto", + name = "unimplemented_syscall", srcs = ["unimplemented_syscall.proto"], visibility = ["//visibility:public"], deps = ["//pkg/sentry/arch:registers_proto"], ) -cc_proto_library( - name = "unimplemented_syscall_cc_proto", - visibility = ["//visibility:public"], - deps = [":unimplemented_syscall_proto"], -) - -go_proto_library( - name = "unimplemented_syscall_go_proto", - importpath = "gvisor.dev/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto", - proto = ":unimplemented_syscall_proto", - visibility = ["//visibility:public"], - deps = ["//pkg/sentry/arch:registers_go_proto"], -) - go_library( name = "unimpl", srcs = ["events.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/unimpl", visibility = ["//:sandbox"], deps = [ "//pkg/log", diff --git a/pkg/sentry/uniqueid/BUILD b/pkg/sentry/uniqueid/BUILD index 86a87edd4..e9c18f170 100644 --- a/pkg/sentry/uniqueid/BUILD +++ b/pkg/sentry/uniqueid/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "uniqueid", srcs = ["context.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/uniqueid", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/sentry/context", diff --git a/pkg/sentry/usage/BUILD b/pkg/sentry/usage/BUILD index 5518ac3d0..099315613 100644 --- a/pkg/sentry/usage/BUILD +++ b/pkg/sentry/usage/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -11,9 +11,8 @@ go_library( "memory_unsafe.go", "usage.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/usage", visibility = [ - "//pkg/sentry:internal", + "//:sandbox", ], deps = [ "//pkg/bits", diff --git a/pkg/sentry/usermem/BUILD b/pkg/sentry/usermem/BUILD index 684f59a6b..c8322e29e 100644 --- a/pkg/sentry/usermem/BUILD +++ b/pkg/sentry/usermem/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -29,7 +28,6 @@ go_library( "usermem_unsafe.go", "usermem_x86.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/usermem", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/atomicbitops", @@ -38,7 +36,6 @@ go_library( "//pkg/sentry/context", "//pkg/sentry/safemem", "//pkg/syserror", - "//pkg/tcpip/buffer", ], ) @@ -49,7 +46,7 @@ go_test( "addr_range_seq_test.go", "usermem_test.go", ], - embed = [":usermem"], + library = ":usermem", deps = [ "//pkg/sentry/context", "//pkg/sentry/safemem", diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD index 35c7be259..51acdc4e9 100644 --- a/pkg/sentry/vfs/BUILD +++ b/pkg/sentry/vfs/BUILD @@ -1,7 +1,6 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") -package(licenses = ["notice"]) +licenses(["notice"]) go_library( name = "vfs", @@ -24,7 +23,6 @@ go_library( "testutil.go", "vfs.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/vfs", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -47,7 +45,7 @@ go_test( "file_description_impl_util_test.go", "mount_test.go", ], - embed = [":vfs"], + library = ":vfs", deps = [ "//pkg/abi/linux", "//pkg/sentry/context", diff --git a/pkg/sentry/watchdog/BUILD b/pkg/sentry/watchdog/BUILD index 28f21f13d..1c5a1c9b6 100644 --- a/pkg/sentry/watchdog/BUILD +++ b/pkg/sentry/watchdog/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "watchdog", srcs = ["watchdog.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/watchdog", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sleep/BUILD b/pkg/sleep/BUILD index a23c86fb1..e131455f7 100644 --- a/pkg/sleep/BUILD +++ b/pkg/sleep/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -12,7 +11,6 @@ go_library( "commit_noasm.go", "sleep_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/sleep", visibility = ["//:sandbox"], ) @@ -22,5 +20,5 @@ go_test( srcs = [ "sleep_test.go", ], - embed = [":sleep"], + library = ":sleep", ) diff --git a/pkg/state/BUILD b/pkg/state/BUILD index be93750bf..921af9d63 100644 --- a/pkg/state/BUILD +++ b/pkg/state/BUILD @@ -1,6 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test", "proto_library") load("//tools/go_generics:defs.bzl", "go_template_instance") package(licenses = ["notice"]) @@ -49,7 +47,7 @@ go_library( "state.go", "stats.go", ], - importpath = "gvisor.dev/gvisor/pkg/state", + stateify = False, visibility = ["//:sandbox"], deps = [ ":object_go_proto", @@ -58,21 +56,14 @@ go_library( ) proto_library( - name = "object_proto", + name = "object", srcs = ["object.proto"], visibility = ["//:sandbox"], ) -go_proto_library( - name = "object_go_proto", - importpath = "gvisor.dev/gvisor/pkg/state/object_go_proto", - proto = ":object_proto", - visibility = ["//:sandbox"], -) - go_test( name = "state_test", timeout = "long", srcs = ["state_test.go"], - embed = [":state"], + library = ":state", ) diff --git a/pkg/state/statefile/BUILD b/pkg/state/statefile/BUILD index 8a865d229..e7581c09b 100644 --- a/pkg/state/statefile/BUILD +++ b/pkg/state/statefile/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "statefile", srcs = ["statefile.go"], - importpath = "gvisor.dev/gvisor/pkg/state/statefile", visibility = ["//:sandbox"], deps = [ "//pkg/binary", @@ -18,6 +16,6 @@ go_test( name = "statefile_test", size = "small", srcs = ["statefile_test.go"], - embed = [":statefile"], + library = ":statefile", deps = ["//pkg/compressio"], ) diff --git a/pkg/sync/BUILD b/pkg/sync/BUILD index 97c4b3b1e..5340cf0d6 100644 --- a/pkg/sync/BUILD +++ b/pkg/sync/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template") package( @@ -40,7 +39,6 @@ go_library( "syncutil.go", "tmutex_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/sync", ) go_test( @@ -51,5 +49,5 @@ go_test( "seqcount_test.go", "tmutex_test.go", ], - embed = [":sync"], + library = ":sync", ) diff --git a/pkg/sync/atomicptrtest/BUILD b/pkg/sync/atomicptrtest/BUILD index 418eda29c..e97553254 100644 --- a/pkg/sync/atomicptrtest/BUILD +++ b/pkg/sync/atomicptrtest/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") package(licenses = ["notice"]) @@ -18,12 +17,11 @@ go_template_instance( go_library( name = "atomicptr", srcs = ["atomicptr_int_unsafe.go"], - importpath = "gvisor.dev/gvisor/pkg/sync/atomicptr", ) go_test( name = "atomicptr_test", size = "small", srcs = ["atomicptr_test.go"], - embed = [":atomicptr"], + library = ":atomicptr", ) diff --git a/pkg/sync/seqatomictest/BUILD b/pkg/sync/seqatomictest/BUILD index eba21518d..5c38c783e 100644 --- a/pkg/sync/seqatomictest/BUILD +++ b/pkg/sync/seqatomictest/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") package(licenses = ["notice"]) @@ -18,7 +17,6 @@ go_template_instance( go_library( name = "seqatomic", srcs = ["seqatomic_int_unsafe.go"], - importpath = "gvisor.dev/gvisor/pkg/sync/seqatomic", deps = [ "//pkg/sync", ], @@ -28,6 +26,6 @@ go_test( name = "seqatomic_test", size = "small", srcs = ["seqatomic_test.go"], - embed = [":seqatomic"], + library = ":seqatomic", deps = ["//pkg/sync"], ) diff --git a/pkg/syserr/BUILD b/pkg/syserr/BUILD index 5665ad4ee..7d760344a 100644 --- a/pkg/syserr/BUILD +++ b/pkg/syserr/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -9,7 +9,6 @@ go_library( "netstack.go", "syserr.go", ], - importpath = "gvisor.dev/gvisor/pkg/syserr", visibility = ["//visibility:public"], deps = [ "//pkg/abi/linux", diff --git a/pkg/syserror/BUILD b/pkg/syserror/BUILD index bd3f9fd28..b13c15d9b 100644 --- a/pkg/syserror/BUILD +++ b/pkg/syserror/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "syserror", srcs = ["syserror.go"], - importpath = "gvisor.dev/gvisor/pkg/syserror", visibility = ["//visibility:public"], ) diff --git a/pkg/tcpip/BUILD b/pkg/tcpip/BUILD index 23e4b09e7..26f7ba86b 100644 --- a/pkg/tcpip/BUILD +++ b/pkg/tcpip/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -12,7 +11,6 @@ go_library( "time_unsafe.go", "timer.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip", visibility = ["//visibility:public"], deps = [ "//pkg/sync", @@ -25,7 +23,7 @@ go_test( name = "tcpip_test", size = "small", srcs = ["tcpip_test.go"], - embed = [":tcpip"], + library = ":tcpip", ) go_test( diff --git a/pkg/tcpip/adapters/gonet/BUILD b/pkg/tcpip/adapters/gonet/BUILD index 3df7d18d3..a984f1712 100644 --- a/pkg/tcpip/adapters/gonet/BUILD +++ b/pkg/tcpip/adapters/gonet/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "gonet", srcs = ["gonet.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/adapters/gonet", visibility = ["//visibility:public"], deps = [ "//pkg/sync", @@ -23,7 +21,7 @@ go_test( name = "gonet_test", size = "small", srcs = ["gonet_test.go"], - embed = [":gonet"], + library = ":gonet", deps = [ "//pkg/tcpip", "//pkg/tcpip/header", diff --git a/pkg/tcpip/buffer/BUILD b/pkg/tcpip/buffer/BUILD index d6c31bfa2..563bc78ea 100644 --- a/pkg/tcpip/buffer/BUILD +++ b/pkg/tcpip/buffer/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -9,7 +8,6 @@ go_library( "prependable.go", "view.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/buffer", visibility = ["//visibility:public"], ) @@ -17,5 +15,5 @@ go_test( name = "buffer_test", size = "small", srcs = ["view_test.go"], - embed = [":buffer"], + library = ":buffer", ) diff --git a/pkg/tcpip/checker/BUILD b/pkg/tcpip/checker/BUILD index b6fa6fc37..ed434807f 100644 --- a/pkg/tcpip/checker/BUILD +++ b/pkg/tcpip/checker/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -6,7 +6,6 @@ go_library( name = "checker", testonly = 1, srcs = ["checker.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/checker", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/hash/jenkins/BUILD b/pkg/tcpip/hash/jenkins/BUILD index e648efa71..ff2719291 100644 --- a/pkg/tcpip/hash/jenkins/BUILD +++ b/pkg/tcpip/hash/jenkins/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "jenkins", srcs = ["jenkins.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/hash/jenkins", visibility = ["//visibility:public"], ) @@ -16,5 +14,5 @@ go_test( srcs = [ "jenkins_test.go", ], - embed = [":jenkins"], + library = ":jenkins", ) diff --git a/pkg/tcpip/header/BUILD b/pkg/tcpip/header/BUILD index cd747d100..9da0d71f8 100644 --- a/pkg/tcpip/header/BUILD +++ b/pkg/tcpip/header/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -24,7 +23,6 @@ go_library( "tcp.go", "udp.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/header", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", @@ -59,7 +57,7 @@ go_test( "eth_test.go", "ndp_test.go", ], - embed = [":header"], + library = ":header", deps = [ "//pkg/tcpip", "@com_github_google_go-cmp//cmp:go_default_library", diff --git a/pkg/tcpip/iptables/BUILD b/pkg/tcpip/iptables/BUILD index 297eaccaf..d1b73cfdf 100644 --- a/pkg/tcpip/iptables/BUILD +++ b/pkg/tcpip/iptables/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -9,7 +9,6 @@ go_library( "targets.go", "types.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/iptables", visibility = ["//visibility:public"], deps = [ "//pkg/log", diff --git a/pkg/tcpip/link/channel/BUILD b/pkg/tcpip/link/channel/BUILD index 7dbc05754..3974c464e 100644 --- a/pkg/tcpip/link/channel/BUILD +++ b/pkg/tcpip/link/channel/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "channel", srcs = ["channel.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/link/channel", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/link/fdbased/BUILD b/pkg/tcpip/link/fdbased/BUILD index 66cc53ed4..abe725548 100644 --- a/pkg/tcpip/link/fdbased/BUILD +++ b/pkg/tcpip/link/fdbased/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -13,7 +12,6 @@ go_library( "mmap_unsafe.go", "packet_dispatchers.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/link/fdbased", visibility = ["//visibility:public"], deps = [ "//pkg/sync", @@ -30,7 +28,7 @@ go_test( name = "fdbased_test", size = "small", srcs = ["endpoint_test.go"], - embed = [":fdbased"], + library = ":fdbased", deps = [ "//pkg/tcpip", "//pkg/tcpip/buffer", diff --git a/pkg/tcpip/link/loopback/BUILD b/pkg/tcpip/link/loopback/BUILD index f35fcdff4..6bf3805b7 100644 --- a/pkg/tcpip/link/loopback/BUILD +++ b/pkg/tcpip/link/loopback/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "loopback", srcs = ["loopback.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/link/loopback", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/link/muxed/BUILD b/pkg/tcpip/link/muxed/BUILD index 1ac7948b6..82b441b79 100644 --- a/pkg/tcpip/link/muxed/BUILD +++ b/pkg/tcpip/link/muxed/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "muxed", srcs = ["injectable.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/link/muxed", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", @@ -19,7 +17,7 @@ go_test( name = "muxed_test", size = "small", srcs = ["injectable_test.go"], - embed = [":muxed"], + library = ":muxed", deps = [ "//pkg/tcpip", "//pkg/tcpip/buffer", diff --git a/pkg/tcpip/link/rawfile/BUILD b/pkg/tcpip/link/rawfile/BUILD index d8211e93d..14b527bc2 100644 --- a/pkg/tcpip/link/rawfile/BUILD +++ b/pkg/tcpip/link/rawfile/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -12,7 +12,6 @@ go_library( "errors.go", "rawfile_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/link/rawfile", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/link/sharedmem/BUILD b/pkg/tcpip/link/sharedmem/BUILD index 09165dd4c..13243ebbb 100644 --- a/pkg/tcpip/link/sharedmem/BUILD +++ b/pkg/tcpip/link/sharedmem/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -11,7 +10,6 @@ go_library( "sharedmem_unsafe.go", "tx.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem", visibility = ["//visibility:public"], deps = [ "//pkg/log", @@ -30,7 +28,7 @@ go_test( srcs = [ "sharedmem_test.go", ], - embed = [":sharedmem"], + library = ":sharedmem", deps = [ "//pkg/sync", "//pkg/tcpip", diff --git a/pkg/tcpip/link/sharedmem/pipe/BUILD b/pkg/tcpip/link/sharedmem/pipe/BUILD index a0d4ad0be..87020ec08 100644 --- a/pkg/tcpip/link/sharedmem/pipe/BUILD +++ b/pkg/tcpip/link/sharedmem/pipe/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -11,7 +10,6 @@ go_library( "rx.go", "tx.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/pipe", visibility = ["//visibility:public"], ) @@ -20,6 +18,6 @@ go_test( srcs = [ "pipe_test.go", ], - embed = [":pipe"], + library = ":pipe", deps = ["//pkg/sync"], ) diff --git a/pkg/tcpip/link/sharedmem/queue/BUILD b/pkg/tcpip/link/sharedmem/queue/BUILD index 8c9234d54..3ba06af73 100644 --- a/pkg/tcpip/link/sharedmem/queue/BUILD +++ b/pkg/tcpip/link/sharedmem/queue/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -9,7 +8,6 @@ go_library( "rx.go", "tx.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue", visibility = ["//visibility:public"], deps = [ "//pkg/log", @@ -22,7 +20,7 @@ go_test( srcs = [ "queue_test.go", ], - embed = [":queue"], + library = ":queue", deps = [ "//pkg/tcpip/link/sharedmem/pipe", ], diff --git a/pkg/tcpip/link/sniffer/BUILD b/pkg/tcpip/link/sniffer/BUILD index d6ae0368a..230a8d53a 100644 --- a/pkg/tcpip/link/sniffer/BUILD +++ b/pkg/tcpip/link/sniffer/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -8,7 +8,6 @@ go_library( "pcap.go", "sniffer.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sniffer", visibility = ["//visibility:public"], deps = [ "//pkg/log", diff --git a/pkg/tcpip/link/tun/BUILD b/pkg/tcpip/link/tun/BUILD index a71a493fc..e5096ea38 100644 --- a/pkg/tcpip/link/tun/BUILD +++ b/pkg/tcpip/link/tun/BUILD @@ -1,10 +1,9 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "tun", srcs = ["tun_unsafe.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/link/tun", visibility = ["//visibility:public"], ) diff --git a/pkg/tcpip/link/waitable/BUILD b/pkg/tcpip/link/waitable/BUILD index 134837943..0956d2c65 100644 --- a/pkg/tcpip/link/waitable/BUILD +++ b/pkg/tcpip/link/waitable/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -8,7 +7,6 @@ go_library( srcs = [ "waitable.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/link/waitable", visibility = ["//visibility:public"], deps = [ "//pkg/gate", @@ -23,7 +21,7 @@ go_test( srcs = [ "waitable_test.go", ], - embed = [":waitable"], + library = ":waitable", deps = [ "//pkg/tcpip", "//pkg/tcpip/buffer", diff --git a/pkg/tcpip/network/BUILD b/pkg/tcpip/network/BUILD index 9d16ff8c9..6a4839fb8 100644 --- a/pkg/tcpip/network/BUILD +++ b/pkg/tcpip/network/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_test") package(licenses = ["notice"]) diff --git a/pkg/tcpip/network/arp/BUILD b/pkg/tcpip/network/arp/BUILD index e7617229b..eddf7b725 100644 --- a/pkg/tcpip/network/arp/BUILD +++ b/pkg/tcpip/network/arp/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "arp", srcs = ["arp.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/network/arp", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/network/fragmentation/BUILD b/pkg/tcpip/network/fragmentation/BUILD index ed16076fd..d1c728ccf 100644 --- a/pkg/tcpip/network/fragmentation/BUILD +++ b/pkg/tcpip/network/fragmentation/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -24,7 +23,6 @@ go_library( "reassembler.go", "reassembler_list.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/network/fragmentation", visibility = ["//visibility:public"], deps = [ "//pkg/log", @@ -42,6 +40,6 @@ go_test( "fragmentation_test.go", "reassembler_test.go", ], - embed = [":fragmentation"], + library = ":fragmentation", deps = ["//pkg/tcpip/buffer"], ) diff --git a/pkg/tcpip/network/hash/BUILD b/pkg/tcpip/network/hash/BUILD index e6db5c0b0..872165866 100644 --- a/pkg/tcpip/network/hash/BUILD +++ b/pkg/tcpip/network/hash/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "hash", srcs = ["hash.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/network/hash", visibility = ["//visibility:public"], deps = [ "//pkg/rand", diff --git a/pkg/tcpip/network/ipv4/BUILD b/pkg/tcpip/network/ipv4/BUILD index 4e2aae9a3..0fef2b1f1 100644 --- a/pkg/tcpip/network/ipv4/BUILD +++ b/pkg/tcpip/network/ipv4/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -9,7 +8,6 @@ go_library( "icmp.go", "ipv4.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/network/ipv4", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/network/ipv6/BUILD b/pkg/tcpip/network/ipv6/BUILD index e4e273460..fb11874c6 100644 --- a/pkg/tcpip/network/ipv6/BUILD +++ b/pkg/tcpip/network/ipv6/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -9,7 +8,6 @@ go_library( "icmp.go", "ipv6.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/network/ipv6", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", @@ -27,7 +25,7 @@ go_test( "ipv6_test.go", "ndp_test.go", ], - embed = [":ipv6"], + library = ":ipv6", deps = [ "//pkg/tcpip", "//pkg/tcpip/buffer", diff --git a/pkg/tcpip/ports/BUILD b/pkg/tcpip/ports/BUILD index a6ef3bdcc..2bad05a2e 100644 --- a/pkg/tcpip/ports/BUILD +++ b/pkg/tcpip/ports/BUILD @@ -1,12 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "ports", srcs = ["ports.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/ports", visibility = ["//visibility:public"], deps = [ "//pkg/sync", @@ -17,7 +15,7 @@ go_library( go_test( name = "ports_test", srcs = ["ports_test.go"], - embed = [":ports"], + library = ":ports", deps = [ "//pkg/tcpip", ], diff --git a/pkg/tcpip/sample/tun_tcp_connect/BUILD b/pkg/tcpip/sample/tun_tcp_connect/BUILD index d7496fde6..cf0a5fefe 100644 --- a/pkg/tcpip/sample/tun_tcp_connect/BUILD +++ b/pkg/tcpip/sample/tun_tcp_connect/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") +load("//tools:defs.bzl", "go_binary") package(licenses = ["notice"]) diff --git a/pkg/tcpip/sample/tun_tcp_echo/BUILD b/pkg/tcpip/sample/tun_tcp_echo/BUILD index 875561566..43264b76d 100644 --- a/pkg/tcpip/sample/tun_tcp_echo/BUILD +++ b/pkg/tcpip/sample/tun_tcp_echo/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") +load("//tools:defs.bzl", "go_binary") package(licenses = ["notice"]) diff --git a/pkg/tcpip/seqnum/BUILD b/pkg/tcpip/seqnum/BUILD index b31ddba2f..45f503845 100644 --- a/pkg/tcpip/seqnum/BUILD +++ b/pkg/tcpip/seqnum/BUILD @@ -1,10 +1,9 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "seqnum", srcs = ["seqnum.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/seqnum", visibility = ["//visibility:public"], ) diff --git a/pkg/tcpip/stack/BUILD b/pkg/tcpip/stack/BUILD index 783351a69..f5b750046 100644 --- a/pkg/tcpip/stack/BUILD +++ b/pkg/tcpip/stack/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -30,7 +29,6 @@ go_library( "stack_global_state.go", "transport_demuxer.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/stack", visibility = ["//visibility:public"], deps = [ "//pkg/ilist", @@ -81,7 +79,7 @@ go_test( name = "stack_test", size = "small", srcs = ["linkaddrcache_test.go"], - embed = [":stack"], + library = ":stack", deps = [ "//pkg/sleep", "//pkg/sync", diff --git a/pkg/tcpip/transport/icmp/BUILD b/pkg/tcpip/transport/icmp/BUILD index 3aa23d529..ac18ec5b1 100644 --- a/pkg/tcpip/transport/icmp/BUILD +++ b/pkg/tcpip/transport/icmp/BUILD @@ -1,5 +1,5 @@ +load("//tools:defs.bzl", "go_library") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -23,7 +23,6 @@ go_library( "icmp_packet_list.go", "protocol.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/icmp", imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"], visibility = ["//visibility:public"], deps = [ diff --git a/pkg/tcpip/transport/packet/BUILD b/pkg/tcpip/transport/packet/BUILD index 4858d150c..d22de6b26 100644 --- a/pkg/tcpip/transport/packet/BUILD +++ b/pkg/tcpip/transport/packet/BUILD @@ -1,5 +1,5 @@ +load("//tools:defs.bzl", "go_library") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -22,7 +22,6 @@ go_library( "endpoint_state.go", "packet_list.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/packet", imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"], visibility = ["//visibility:public"], deps = [ diff --git a/pkg/tcpip/transport/raw/BUILD b/pkg/tcpip/transport/raw/BUILD index 2f2131ff7..c9baf4600 100644 --- a/pkg/tcpip/transport/raw/BUILD +++ b/pkg/tcpip/transport/raw/BUILD @@ -1,5 +1,5 @@ +load("//tools:defs.bzl", "go_library") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -23,7 +23,6 @@ go_library( "protocol.go", "raw_packet_list.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/raw", imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"], visibility = ["//visibility:public"], deps = [ diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD index 0e3ab05ad..4acd9fb9a 100644 --- a/pkg/tcpip/transport/tcp/BUILD +++ b/pkg/tcpip/transport/tcp/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -55,7 +54,6 @@ go_library( "tcp_segment_list.go", "timer.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/tcp", imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"], visibility = ["//visibility:public"], deps = [ diff --git a/pkg/tcpip/transport/tcp/testing/context/BUILD b/pkg/tcpip/transport/tcp/testing/context/BUILD index b33ec2087..ce6a2c31d 100644 --- a/pkg/tcpip/transport/tcp/testing/context/BUILD +++ b/pkg/tcpip/transport/tcp/testing/context/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -6,7 +6,6 @@ go_library( name = "context", testonly = 1, srcs = ["context.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/transport/tcpconntrack/BUILD b/pkg/tcpip/transport/tcpconntrack/BUILD index 43fcc27f0..3ad6994a7 100644 --- a/pkg/tcpip/transport/tcpconntrack/BUILD +++ b/pkg/tcpip/transport/tcpconntrack/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "tcpconntrack", srcs = ["tcp_conntrack.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/tcpconntrack", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip/header", diff --git a/pkg/tcpip/transport/udp/BUILD b/pkg/tcpip/transport/udp/BUILD index 57ff123e3..adc908e24 100644 --- a/pkg/tcpip/transport/udp/BUILD +++ b/pkg/tcpip/transport/udp/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -25,7 +24,6 @@ go_library( "protocol.go", "udp_packet_list.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/udp", imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"], visibility = ["//visibility:public"], deps = [ diff --git a/pkg/tmutex/BUILD b/pkg/tmutex/BUILD index 07778e4f7..2dcba84ae 100644 --- a/pkg/tmutex/BUILD +++ b/pkg/tmutex/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "tmutex", srcs = ["tmutex.go"], - importpath = "gvisor.dev/gvisor/pkg/tmutex", visibility = ["//:sandbox"], ) @@ -14,6 +12,6 @@ go_test( name = "tmutex_test", size = "medium", srcs = ["tmutex_test.go"], - embed = [":tmutex"], + library = ":tmutex", deps = ["//pkg/sync"], ) diff --git a/pkg/unet/BUILD b/pkg/unet/BUILD index d1885ae66..a86501fa2 100644 --- a/pkg/unet/BUILD +++ b/pkg/unet/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -9,7 +8,6 @@ go_library( "unet.go", "unet_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/unet", visibility = ["//visibility:public"], deps = [ "//pkg/gate", @@ -23,6 +21,6 @@ go_test( srcs = [ "unet_test.go", ], - embed = [":unet"], + library = ":unet", deps = ["//pkg/sync"], ) diff --git a/pkg/urpc/BUILD b/pkg/urpc/BUILD index b8fdc3125..850c34ed0 100644 --- a/pkg/urpc/BUILD +++ b/pkg/urpc/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "urpc", srcs = ["urpc.go"], - importpath = "gvisor.dev/gvisor/pkg/urpc", visibility = ["//:sandbox"], deps = [ "//pkg/fd", @@ -20,6 +18,6 @@ go_test( name = "urpc_test", size = "small", srcs = ["urpc_test.go"], - embed = [":urpc"], + library = ":urpc", deps = ["//pkg/unet"], ) diff --git a/pkg/waiter/BUILD b/pkg/waiter/BUILD index 1c6890e52..852480a09 100644 --- a/pkg/waiter/BUILD +++ b/pkg/waiter/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -22,7 +21,6 @@ go_library( "waiter.go", "waiter_list.go", ], - importpath = "gvisor.dev/gvisor/pkg/waiter", visibility = ["//visibility:public"], deps = ["//pkg/sync"], ) @@ -33,5 +31,5 @@ go_test( srcs = [ "waiter_test.go", ], - embed = [":waiter"], + library = ":waiter", ) diff --git a/runsc/BUILD b/runsc/BUILD index e5587421d..b35b41d81 100644 --- a/runsc/BUILD +++ b/runsc/BUILD @@ -1,7 +1,6 @@ -package(licenses = ["notice"]) # Apache 2.0 +load("//tools:defs.bzl", "go_binary", "pkg_deb", "pkg_tar") -load("@io_bazel_rules_go//go:def.bzl", "go_binary") -load("@rules_pkg//:pkg.bzl", "pkg_deb", "pkg_tar") +package(licenses = ["notice"]) go_binary( name = "runsc", @@ -9,7 +8,7 @@ go_binary( "main.go", "version.go", ], - pure = "on", + pure = True, visibility = [ "//visibility:public", ], @@ -26,10 +25,12 @@ go_binary( ) # The runsc-race target is a race-compatible BUILD target. This must be built -# via "bazel build --features=race //runsc:runsc-race", since the race feature -# must apply to all dependencies due a bug in gazelle file selection. The pure -# attribute must be off because the race detector requires linking with non-Go -# components, although we still require a static binary. +# via: bazel build --features=race //runsc:runsc-race +# +# This is neccessary because the race feature must apply to all dependencies +# due a bug in gazelle file selection. The pure attribute must be off because +# the race detector requires linking with non-Go components, although we still +# require a static binary. # # Note that in the future this might be convertible to a compatible target by # using the pure and static attributes within a select function, but select is @@ -42,7 +43,7 @@ go_binary( "main.go", "version.go", ], - static = "on", + static = True, visibility = [ "//visibility:public", ], @@ -82,7 +83,12 @@ genrule( # because they are assumes to be hermetic). srcs = [":runsc"], outs = ["version.txt"], - cmd = "$(location :runsc) -version | grep 'runsc version' | sed 's/^[^0-9]*//' > $@", + # Note that the little dance here is necessary because files in the $(SRCS) + # attribute are not executable by default, and we can't touch in place. + cmd = "cp $(location :runsc) $(@D)/runsc && \ + chmod a+x $(@D)/runsc && \ + $(@D)/runsc -version | grep version | sed 's/^[^0-9]*//' > $@ && \ + rm -f $(@D)/runsc", stamp = 1, ) @@ -109,5 +115,6 @@ sh_test( name = "version_test", size = "small", srcs = ["version_test.sh"], + args = ["$(location :runsc)"], data = [":runsc"], ) diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 3e20f8f2f..f3ebc0231 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -23,7 +23,6 @@ go_library( "strace.go", "user.go", ], - importpath = "gvisor.dev/gvisor/runsc/boot", visibility = [ "//runsc:__subpackages__", "//test:__subpackages__", @@ -107,7 +106,7 @@ go_test( "loader_test.go", "user_test.go", ], - embed = [":boot"], + library = ":boot", deps = [ "//pkg/control/server", "//pkg/log", diff --git a/runsc/boot/filter/BUILD b/runsc/boot/filter/BUILD index 3a9dcfc04..ce30f6c53 100644 --- a/runsc/boot/filter/BUILD +++ b/runsc/boot/filter/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -13,7 +13,6 @@ go_library( "extra_filters_race.go", "filter.go", ], - importpath = "gvisor.dev/gvisor/runsc/boot/filter", visibility = [ "//runsc/boot:__subpackages__", ], diff --git a/runsc/boot/platforms/BUILD b/runsc/boot/platforms/BUILD index 03391cdca..77774f43c 100644 --- a/runsc/boot/platforms/BUILD +++ b/runsc/boot/platforms/BUILD @@ -1,11 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "platforms", srcs = ["platforms.go"], - importpath = "gvisor.dev/gvisor/runsc/boot/platforms", visibility = [ "//runsc:__subpackages__", ], diff --git a/runsc/cgroup/BUILD b/runsc/cgroup/BUILD index d6165f9e5..d4c7bdfbb 100644 --- a/runsc/cgroup/BUILD +++ b/runsc/cgroup/BUILD @@ -1,11 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "cgroup", srcs = ["cgroup.go"], - importpath = "gvisor.dev/gvisor/runsc/cgroup", visibility = ["//:sandbox"], deps = [ "//pkg/log", @@ -19,6 +18,6 @@ go_test( name = "cgroup_test", size = "small", srcs = ["cgroup_test.go"], - embed = [":cgroup"], + library = ":cgroup", tags = ["local"], ) diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD index b94bc4fa0..09aa46434 100644 --- a/runsc/cmd/BUILD +++ b/runsc/cmd/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -34,7 +34,6 @@ go_library( "syscalls.go", "wait.go", ], - importpath = "gvisor.dev/gvisor/runsc/cmd", visibility = [ "//runsc:__subpackages__", ], @@ -73,7 +72,7 @@ go_test( data = [ "//runsc", ], - embed = [":cmd"], + library = ":cmd", deps = [ "//pkg/abi/linux", "//pkg/log", diff --git a/runsc/console/BUILD b/runsc/console/BUILD index e623c1a0f..06924bccd 100644 --- a/runsc/console/BUILD +++ b/runsc/console/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -7,7 +7,6 @@ go_library( srcs = [ "console.go", ], - importpath = "gvisor.dev/gvisor/runsc/console", visibility = [ "//runsc:__subpackages__", ], diff --git a/runsc/container/BUILD b/runsc/container/BUILD index 6dea179e4..e21431e4c 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -10,7 +10,6 @@ go_library( "state_file.go", "status.go", ], - importpath = "gvisor.dev/gvisor/runsc/container", visibility = [ "//runsc:__subpackages__", "//test:__subpackages__", @@ -42,7 +41,7 @@ go_test( "//runsc", "//runsc/container/test_app", ], - embed = [":container"], + library = ":container", shard_count = 5, tags = [ "requires-kvm", diff --git a/runsc/container/test_app/BUILD b/runsc/container/test_app/BUILD index bfd338bb6..e200bafd9 100644 --- a/runsc/container/test_app/BUILD +++ b/runsc/container/test_app/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") +load("//tools:defs.bzl", "go_binary") package(licenses = ["notice"]) @@ -9,7 +9,7 @@ go_binary( "fds.go", "test_app.go", ], - pure = "on", + pure = True, visibility = ["//runsc/container:__pkg__"], deps = [ "//pkg/unet", diff --git a/runsc/criutil/BUILD b/runsc/criutil/BUILD index 558133a0e..8a571a000 100644 --- a/runsc/criutil/BUILD +++ b/runsc/criutil/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -6,7 +6,6 @@ go_library( name = "criutil", testonly = 1, srcs = ["criutil.go"], - importpath = "gvisor.dev/gvisor/runsc/criutil", visibility = ["//:sandbox"], deps = ["//runsc/testutil"], ) diff --git a/runsc/dockerutil/BUILD b/runsc/dockerutil/BUILD index 0e0423504..8621af901 100644 --- a/runsc/dockerutil/BUILD +++ b/runsc/dockerutil/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -6,7 +6,6 @@ go_library( name = "dockerutil", testonly = 1, srcs = ["dockerutil.go"], - importpath = "gvisor.dev/gvisor/runsc/dockerutil", visibility = ["//:sandbox"], deps = [ "//runsc/testutil", diff --git a/runsc/fsgofer/BUILD b/runsc/fsgofer/BUILD index a9582d92b..64a406ae2 100644 --- a/runsc/fsgofer/BUILD +++ b/runsc/fsgofer/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -10,10 +10,7 @@ go_library( "fsgofer_arm64_unsafe.go", "fsgofer_unsafe.go", ], - importpath = "gvisor.dev/gvisor/runsc/fsgofer", - visibility = [ - "//runsc:__subpackages__", - ], + visibility = ["//runsc:__subpackages__"], deps = [ "//pkg/abi/linux", "//pkg/fd", @@ -30,7 +27,7 @@ go_test( name = "fsgofer_test", size = "small", srcs = ["fsgofer_test.go"], - embed = [":fsgofer"], + library = ":fsgofer", deps = [ "//pkg/log", "//pkg/p9", diff --git a/runsc/fsgofer/filter/BUILD b/runsc/fsgofer/filter/BUILD index bac73f89d..82b48ef32 100644 --- a/runsc/fsgofer/filter/BUILD +++ b/runsc/fsgofer/filter/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -13,7 +13,6 @@ go_library( "extra_filters_race.go", "filter.go", ], - importpath = "gvisor.dev/gvisor/runsc/fsgofer/filter", visibility = [ "//runsc:__subpackages__", ], diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD index ddbc37456..c95d50294 100644 --- a/runsc/sandbox/BUILD +++ b/runsc/sandbox/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -9,7 +9,6 @@ go_library( "network_unsafe.go", "sandbox.go", ], - importpath = "gvisor.dev/gvisor/runsc/sandbox", visibility = [ "//runsc:__subpackages__", ], diff --git a/runsc/specutils/BUILD b/runsc/specutils/BUILD index 205638803..4ccd77f63 100644 --- a/runsc/specutils/BUILD +++ b/runsc/specutils/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -10,7 +10,6 @@ go_library( "namespace.go", "specutils.go", ], - importpath = "gvisor.dev/gvisor/runsc/specutils", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", @@ -28,6 +27,6 @@ go_test( name = "specutils_test", size = "small", srcs = ["specutils_test.go"], - embed = [":specutils"], + library = ":specutils", deps = ["@com_github_opencontainers_runtime-spec//specs-go:go_default_library"], ) diff --git a/runsc/testutil/BUILD b/runsc/testutil/BUILD index 3c3027cb5..f845120b0 100644 --- a/runsc/testutil/BUILD +++ b/runsc/testutil/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -6,7 +6,6 @@ go_library( name = "testutil", testonly = 1, srcs = ["testutil.go"], - importpath = "gvisor.dev/gvisor/runsc/testutil", visibility = ["//:sandbox"], deps = [ "//pkg/log", diff --git a/runsc/version_test.sh b/runsc/version_test.sh index cc0ca3f05..747350654 100755 --- a/runsc/version_test.sh +++ b/runsc/version_test.sh @@ -16,7 +16,7 @@ set -euf -x -o pipefail -readonly runsc="${TEST_SRCDIR}/__main__/runsc/linux_amd64_pure_stripped/runsc" +readonly runsc="$1" readonly version=$($runsc --version) # Version should should not match VERSION, which is the default and which will diff --git a/scripts/common.sh b/scripts/common.sh index fdb1aa142..cd91b9f8e 100755 --- a/scripts/common.sh +++ b/scripts/common.sh @@ -16,11 +16,7 @@ set -xeou pipefail -if [[ -f $(dirname $0)/common_google.sh ]]; then - source $(dirname $0)/common_google.sh -else - source $(dirname $0)/common_bazel.sh -fi +source $(dirname $0)/common_build.sh # Ensure it attempts to collect logs in all cases. trap collect_logs EXIT diff --git a/scripts/common_bazel.sh b/scripts/common_bazel.sh deleted file mode 100755 index a473a88a4..000000000 --- a/scripts/common_bazel.sh +++ /dev/null @@ -1,99 +0,0 @@ -#!/bin/bash - -# Copyright 2019 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Install the latest version of Bazel and log the version. -(which use_bazel.sh && use_bazel.sh latest) || which bazel -bazel version - -# Switch into the workspace; only necessary if run with kokoro. -if [[ -v KOKORO_GIT_COMMIT ]] && [[ -d git/repo ]]; then - cd git/repo -elif [[ -v KOKORO_GIT_COMMIT ]] && [[ -d github/repo ]]; then - cd github/repo -fi - -# Set the standard bazel flags. -declare -r BAZEL_FLAGS=( - "--show_timestamps" - "--test_output=errors" - "--keep_going" - "--verbose_failures=true" -) -if [[ -v KOKORO_BAZEL_AUTH_CREDENTIAL ]]; then - declare -r BAZEL_RBE_AUTH_FLAGS=( - "--auth_credentials=${KOKORO_BAZEL_AUTH_CREDENTIAL}" - ) - declare -r BAZEL_RBE_FLAGS=("--config=remote") -fi - -# Wrap bazel. -function build() { - bazel build "${BAZEL_RBE_FLAGS[@]}" "${BAZEL_RBE_AUTH_FLAGS[@]}" "${BAZEL_FLAGS[@]}" "$@" 2>&1 | - tee /dev/fd/2 | grep -E '^ bazel-bin/' | awk '{ print $1; }' -} - -function test() { - bazel test "${BAZEL_RBE_FLAGS[@]}" "${BAZEL_RBE_AUTH_FLAGS[@]}" "${BAZEL_FLAGS[@]}" "$@" -} - -function run() { - local binary=$1 - shift - bazel run "${binary}" -- "$@" -} - -function run_as_root() { - local binary=$1 - shift - bazel run --run_under="sudo" "${binary}" -- "$@" -} - -function collect_logs() { - # Zip out everything into a convenient form. - if [[ -v KOKORO_ARTIFACTS_DIR ]] && [[ -e bazel-testlogs ]]; then - # Merge results files of all shards for each test suite. - for d in `find -L "bazel-testlogs" -name 'shard_*_of_*' | xargs dirname | sort | uniq`; do - junitparser merge `find $d -name test.xml` $d/test.xml - cat $d/shard_*_of_*/test.log > $d/test.log - ls -l $d/shard_*_of_*/test.outputs/outputs.zip && zip -r -1 $d/outputs.zip $d/shard_*_of_*/test.outputs/outputs.zip - done - find -L "bazel-testlogs" -name 'shard_*_of_*' | xargs rm -rf - # Move test logs to Kokoro directory. tar is used to conveniently perform - # renames while moving files. - find -L "bazel-testlogs" -name "test.xml" -o -name "test.log" -o -name "outputs.zip" | - tar --create --files-from - --transform 's/test\./sponge_log./' | - tar --extract --directory ${KOKORO_ARTIFACTS_DIR} - - # Collect sentry logs, if any. - if [[ -v RUNSC_LOGS_DIR ]] && [[ -d "${RUNSC_LOGS_DIR}" ]]; then - # Check if the directory is empty or not (only the first line it needed). - local -r logs=$(ls "${RUNSC_LOGS_DIR}" | head -n1) - if [[ "${logs}" ]]; then - local -r archive=runsc_logs_"${RUNTIME}".tar.gz - if [[ -v KOKORO_BUILD_ARTIFACTS_SUBDIR ]]; then - echo "runsc logs will be uploaded to:" - echo " gsutil cp gs://gvisor/logs/${KOKORO_BUILD_ARTIFACTS_SUBDIR}/${archive} /tmp" - echo " https://storage.cloud.google.com/gvisor/logs/${KOKORO_BUILD_ARTIFACTS_SUBDIR}/${archive}" - fi - tar --create --gzip --file="${KOKORO_ARTIFACTS_DIR}/${archive}" -C "${RUNSC_LOGS_DIR}" . - fi - fi - fi -} - -function find_branch_name() { - git branch --show-current || git rev-parse HEAD || bazel info workspace | xargs basename -} diff --git a/scripts/common_build.sh b/scripts/common_build.sh new file mode 100755 index 000000000..a473a88a4 --- /dev/null +++ b/scripts/common_build.sh @@ -0,0 +1,99 @@ +#!/bin/bash + +# Copyright 2019 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Install the latest version of Bazel and log the version. +(which use_bazel.sh && use_bazel.sh latest) || which bazel +bazel version + +# Switch into the workspace; only necessary if run with kokoro. +if [[ -v KOKORO_GIT_COMMIT ]] && [[ -d git/repo ]]; then + cd git/repo +elif [[ -v KOKORO_GIT_COMMIT ]] && [[ -d github/repo ]]; then + cd github/repo +fi + +# Set the standard bazel flags. +declare -r BAZEL_FLAGS=( + "--show_timestamps" + "--test_output=errors" + "--keep_going" + "--verbose_failures=true" +) +if [[ -v KOKORO_BAZEL_AUTH_CREDENTIAL ]]; then + declare -r BAZEL_RBE_AUTH_FLAGS=( + "--auth_credentials=${KOKORO_BAZEL_AUTH_CREDENTIAL}" + ) + declare -r BAZEL_RBE_FLAGS=("--config=remote") +fi + +# Wrap bazel. +function build() { + bazel build "${BAZEL_RBE_FLAGS[@]}" "${BAZEL_RBE_AUTH_FLAGS[@]}" "${BAZEL_FLAGS[@]}" "$@" 2>&1 | + tee /dev/fd/2 | grep -E '^ bazel-bin/' | awk '{ print $1; }' +} + +function test() { + bazel test "${BAZEL_RBE_FLAGS[@]}" "${BAZEL_RBE_AUTH_FLAGS[@]}" "${BAZEL_FLAGS[@]}" "$@" +} + +function run() { + local binary=$1 + shift + bazel run "${binary}" -- "$@" +} + +function run_as_root() { + local binary=$1 + shift + bazel run --run_under="sudo" "${binary}" -- "$@" +} + +function collect_logs() { + # Zip out everything into a convenient form. + if [[ -v KOKORO_ARTIFACTS_DIR ]] && [[ -e bazel-testlogs ]]; then + # Merge results files of all shards for each test suite. + for d in `find -L "bazel-testlogs" -name 'shard_*_of_*' | xargs dirname | sort | uniq`; do + junitparser merge `find $d -name test.xml` $d/test.xml + cat $d/shard_*_of_*/test.log > $d/test.log + ls -l $d/shard_*_of_*/test.outputs/outputs.zip && zip -r -1 $d/outputs.zip $d/shard_*_of_*/test.outputs/outputs.zip + done + find -L "bazel-testlogs" -name 'shard_*_of_*' | xargs rm -rf + # Move test logs to Kokoro directory. tar is used to conveniently perform + # renames while moving files. + find -L "bazel-testlogs" -name "test.xml" -o -name "test.log" -o -name "outputs.zip" | + tar --create --files-from - --transform 's/test\./sponge_log./' | + tar --extract --directory ${KOKORO_ARTIFACTS_DIR} + + # Collect sentry logs, if any. + if [[ -v RUNSC_LOGS_DIR ]] && [[ -d "${RUNSC_LOGS_DIR}" ]]; then + # Check if the directory is empty or not (only the first line it needed). + local -r logs=$(ls "${RUNSC_LOGS_DIR}" | head -n1) + if [[ "${logs}" ]]; then + local -r archive=runsc_logs_"${RUNTIME}".tar.gz + if [[ -v KOKORO_BUILD_ARTIFACTS_SUBDIR ]]; then + echo "runsc logs will be uploaded to:" + echo " gsutil cp gs://gvisor/logs/${KOKORO_BUILD_ARTIFACTS_SUBDIR}/${archive} /tmp" + echo " https://storage.cloud.google.com/gvisor/logs/${KOKORO_BUILD_ARTIFACTS_SUBDIR}/${archive}" + fi + tar --create --gzip --file="${KOKORO_ARTIFACTS_DIR}/${archive}" -C "${RUNSC_LOGS_DIR}" . + fi + fi + fi +} + +function find_branch_name() { + git branch --show-current || git rev-parse HEAD || bazel info workspace | xargs basename +} diff --git a/test/BUILD b/test/BUILD index bf834d994..34b950644 100644 --- a/test/BUILD +++ b/test/BUILD @@ -1,44 +1 @@ -package(licenses = ["notice"]) # Apache 2.0 - -# We need to define a bazel platform and toolchain to specify dockerPrivileged -# and dockerRunAsRoot options, they are required to run tests on the RBE -# cluster in Kokoro. -alias( - name = "rbe_ubuntu1604", - actual = ":rbe_ubuntu1604_r346485", -) - -platform( - name = "rbe_ubuntu1604_r346485", - constraint_values = [ - "@bazel_tools//platforms:x86_64", - "@bazel_tools//platforms:linux", - "@bazel_tools//tools/cpp:clang", - "@bazel_toolchains//constraints:xenial", - "@bazel_toolchains//constraints/sanitizers:support_msan", - ], - remote_execution_properties = """ - properties: { - name: "container-image" - value:"docker://gcr.io/cloud-marketplace/google/rbe-ubuntu16-04@sha256:93f7e127196b9b653d39830c50f8b05d49ef6fd8739a9b5b8ab16e1df5399e50" - } - properties: { - name: "dockerAddCapabilities" - value: "SYS_ADMIN" - } - properties: { - name: "dockerPrivileged" - value: "true" - } - """, -) - -toolchain( - name = "cc-toolchain-clang-x86_64-default", - exec_compatible_with = [ - ], - target_compatible_with = [ - ], - toolchain = "@bazel_toolchains//configs/ubuntu16_04_clang/10.0.0/bazel_2.0.0/cc:cc-compiler-k8", - toolchain_type = "@bazel_tools//tools/cpp:toolchain_type", -) +package(licenses = ["notice"]) diff --git a/test/e2e/BUILD b/test/e2e/BUILD index 4fe03a220..76e04f878 100644 --- a/test/e2e/BUILD +++ b/test/e2e/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -10,7 +10,7 @@ go_test( "integration_test.go", "regression_test.go", ], - embed = [":integration"], + library = ":integration", tags = [ # Requires docker and runsc to be configured before the test runs. "manual", @@ -29,5 +29,4 @@ go_test( go_library( name = "integration", srcs = ["integration.go"], - importpath = "gvisor.dev/gvisor/test/integration", ) diff --git a/test/image/BUILD b/test/image/BUILD index 09b0a0ad5..7392ac54e 100644 --- a/test/image/BUILD +++ b/test/image/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -14,7 +14,7 @@ go_test( "ruby.rb", "ruby.sh", ], - embed = [":image"], + library = ":image", tags = [ # Requires docker and runsc to be configured before the test runs. "manual", @@ -30,5 +30,4 @@ go_test( go_library( name = "image", srcs = ["image.go"], - importpath = "gvisor.dev/gvisor/test/image", ) diff --git a/test/iptables/BUILD b/test/iptables/BUILD index 22f470092..6bb3b82b5 100644 --- a/test/iptables/BUILD +++ b/test/iptables/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -12,7 +12,6 @@ go_library( "iptables_util.go", "nat.go", ], - importpath = "gvisor.dev/gvisor/test/iptables", visibility = ["//test/iptables:__subpackages__"], deps = [ "//runsc/testutil", @@ -24,7 +23,7 @@ go_test( srcs = [ "iptables_test.go", ], - embed = [":iptables"], + library = ":iptables", tags = [ "local", "manual", diff --git a/test/iptables/runner/BUILD b/test/iptables/runner/BUILD index a5b6f082c..b9199387a 100644 --- a/test/iptables/runner/BUILD +++ b/test/iptables/runner/BUILD @@ -1,15 +1,21 @@ -load("@io_bazel_rules_docker//go:image.bzl", "go_image") -load("@io_bazel_rules_docker//container:container.bzl", "container_image") +load("//tools:defs.bzl", "container_image", "go_binary", "go_image") package(licenses = ["notice"]) +go_binary( + name = "runner", + testonly = 1, + srcs = ["main.go"], + deps = ["//test/iptables"], +) + container_image( name = "iptables-base", base = "@iptables-test//image", ) go_image( - name = "runner", + name = "runner-image", testonly = 1, srcs = ["main.go"], base = ":iptables-base", diff --git a/test/root/BUILD b/test/root/BUILD index d5dd9bca2..23ce2a70f 100644 --- a/test/root/BUILD +++ b/test/root/BUILD @@ -1,11 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "root", srcs = ["root.go"], - importpath = "gvisor.dev/gvisor/test/root", ) go_test( @@ -21,7 +20,7 @@ go_test( data = [ "//runsc", ], - embed = [":root"], + library = ":root", tags = [ # Requires docker and runsc to be configured before the test runs. # Also test only runs as root. diff --git a/test/root/testdata/BUILD b/test/root/testdata/BUILD index 125633680..bca5f9cab 100644 --- a/test/root/testdata/BUILD +++ b/test/root/testdata/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -12,7 +12,6 @@ go_library( "sandbox.go", "simple.go", ], - importpath = "gvisor.dev/gvisor/test/root/testdata", visibility = [ "//visibility:public", ], diff --git a/test/runtimes/BUILD b/test/runtimes/BUILD index 367295206..2c472bf8d 100644 --- a/test/runtimes/BUILD +++ b/test/runtimes/BUILD @@ -1,6 +1,6 @@ # These packages are used to run language runtime tests inside gVisor sandboxes. -load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_test") +load("//tools:defs.bzl", "go_binary", "go_test") load("//test/runtimes:build_defs.bzl", "runtime_test") package(licenses = ["notice"]) @@ -49,5 +49,5 @@ go_test( name = "blacklist_test", size = "small", srcs = ["blacklist_test.go"], - embed = [":runner"], + library = ":runner", ) diff --git a/test/runtimes/build_defs.bzl b/test/runtimes/build_defs.bzl index 6f84ca852..92e275a76 100644 --- a/test/runtimes/build_defs.bzl +++ b/test/runtimes/build_defs.bzl @@ -1,6 +1,6 @@ """Defines a rule for runtime test targets.""" -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_test", "loopback") def runtime_test( name, @@ -34,6 +34,7 @@ def runtime_test( ] data = [ ":runner", + loopback, ] if blacklist_file: args += ["--blacklist_file", "test/runtimes/" + blacklist_file] @@ -61,7 +62,7 @@ def blacklist_test(name, blacklist_file): """Test that a blacklist parses correctly.""" go_test( name = name + "_blacklist_test", - embed = [":runner"], + library = ":runner", srcs = ["blacklist_test.go"], args = ["--blacklist_file", "test/runtimes/" + blacklist_file], data = [blacklist_file], diff --git a/test/runtimes/images/proctor/BUILD b/test/runtimes/images/proctor/BUILD index 09dc6c42f..85e004c45 100644 --- a/test/runtimes/images/proctor/BUILD +++ b/test/runtimes/images/proctor/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_test") +load("//tools:defs.bzl", "go_binary", "go_test") package(licenses = ["notice"]) @@ -19,7 +19,7 @@ go_test( name = "proctor_test", size = "small", srcs = ["proctor_test.go"], - embed = [":proctor"], + library = ":proctor", deps = [ "//runsc/testutil", ], diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 90d52e73b..40e974314 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") +load("//tools:defs.bzl", "go_binary") load("//test/syscalls:build_defs.bzl", "syscall_test") package(licenses = ["notice"]) diff --git a/test/syscalls/build_defs.bzl b/test/syscalls/build_defs.bzl index aaf77c65b..1df761dd0 100644 --- a/test/syscalls/build_defs.bzl +++ b/test/syscalls/build_defs.bzl @@ -1,5 +1,7 @@ """Defines a rule for syscall test targets.""" +load("//tools:defs.bzl", "loopback") + # syscall_test is a macro that will create targets to run the given test target # on the host (native) and runsc. def syscall_test( @@ -135,6 +137,7 @@ def _syscall_test( name = name, data = [ ":syscall_test_runner", + loopback, test, ], args = args, @@ -148,6 +151,3 @@ def sh_test(**kwargs): native.sh_test( **kwargs ) - -def select_for_linux(for_linux, for_others = []): - return for_linux diff --git a/test/syscalls/gtest/BUILD b/test/syscalls/gtest/BUILD index 9293f25cb..de4b2727c 100644 --- a/test/syscalls/gtest/BUILD +++ b/test/syscalls/gtest/BUILD @@ -1,12 +1,9 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "gtest", srcs = ["gtest.go"], - importpath = "gvisor.dev/gvisor/test/syscalls/gtest", - visibility = [ - "//test:__subpackages__", - ], + visibility = ["//:sandbox"], ) diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 4c7ec3f06..c2ef50c1d 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1,5 +1,4 @@ -load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library") -load("//test/syscalls:build_defs.bzl", "select_for_linux") +load("//tools:defs.bzl", "cc_binary", "cc_library", "default_net_util", "select_system") package( default_visibility = ["//:sandbox"], @@ -126,13 +125,11 @@ cc_library( testonly = 1, srcs = [ "socket_test_util.cc", - ] + select_for_linux( - [ - "socket_test_util_impl.cc", - ], - ), + "socket_test_util_impl.cc", + ], hdrs = ["socket_test_util.h"], - deps = [ + defines = select_system(), + deps = default_net_util() + [ "@com_google_googletest//:gtest", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", @@ -143,8 +140,7 @@ cc_library( "//test/util:temp_path", "//test/util:test_util", "//test/util:thread_util", - ] + select_for_linux([ - ]), + ], ) cc_library( @@ -1443,6 +1439,7 @@ cc_binary( srcs = ["arch_prctl.cc"], linkstatic = 1, deps = [ + "//test/util:file_descriptor", "//test/util:test_main", "//test/util:test_util", "@com_google_googletest//:gtest", @@ -3383,11 +3380,11 @@ cc_library( name = "udp_socket_test_cases", testonly = 1, srcs = [ - "udp_socket_test_cases.cc", - ] + select_for_linux([ "udp_socket_errqueue_test_case.cc", - ]), + "udp_socket_test_cases.cc", + ], hdrs = ["udp_socket_test_cases.h"], + defines = select_system(), deps = [ ":socket_test_util", ":unix_domain_socket_test_util", diff --git a/test/syscalls/linux/arch_prctl.cc b/test/syscalls/linux/arch_prctl.cc index 81bf5a775..3a901faf5 100644 --- a/test/syscalls/linux/arch_prctl.cc +++ b/test/syscalls/linux/arch_prctl.cc @@ -14,8 +14,10 @@ #include #include +#include #include "gtest/gtest.h" +#include "test/util/file_descriptor.h" #include "test/util/test_util.h" // glibc does not provide a prototype for arch_prctl() so declare it here. diff --git a/test/syscalls/linux/rseq/BUILD b/test/syscalls/linux/rseq/BUILD index 5cfe4e56f..ed488dbc2 100644 --- a/test/syscalls/linux/rseq/BUILD +++ b/test/syscalls/linux/rseq/BUILD @@ -1,8 +1,7 @@ # This package contains a standalone rseq test binary. This binary must not # depend on libc, which might use rseq itself. -load("@bazel_tools//tools/cpp:cc_flags_supplier.bzl", "cc_flags_supplier") -load("@rules_cc//cc:defs.bzl", "cc_library") +load("//tools:defs.bzl", "cc_flags_supplier", "cc_library", "cc_toolchain") package(licenses = ["notice"]) @@ -37,8 +36,8 @@ genrule( "$(location start.S)", ]), toolchains = [ + cc_toolchain, ":no_pie_cc_flags", - "@bazel_tools//tools/cpp:current_cc_toolchain", ], visibility = ["//:sandbox"], ) diff --git a/test/syscalls/linux/udp_socket_errqueue_test_case.cc b/test/syscalls/linux/udp_socket_errqueue_test_case.cc index 147978f46..9a24e1df0 100644 --- a/test/syscalls/linux/udp_socket_errqueue_test_case.cc +++ b/test/syscalls/linux/udp_socket_errqueue_test_case.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#ifndef __fuchsia__ + #include "test/syscalls/linux/udp_socket_test_cases.h" #include @@ -52,3 +54,5 @@ TEST_P(UdpSocketTest, ErrorQueue) { } // namespace testing } // namespace gvisor + +#endif // __fuchsia__ diff --git a/test/uds/BUILD b/test/uds/BUILD index a3843e699..51e2c7ce8 100644 --- a/test/uds/BUILD +++ b/test/uds/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package( default_visibility = ["//:sandbox"], @@ -9,7 +9,6 @@ go_library( name = "uds", testonly = 1, srcs = ["uds.go"], - importpath = "gvisor.dev/gvisor/test/uds", deps = [ "//pkg/log", "//pkg/unet", diff --git a/test/util/BUILD b/test/util/BUILD index cbc728159..3c732be62 100644 --- a/test/util/BUILD +++ b/test/util/BUILD @@ -1,5 +1,4 @@ -load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") -load("//test/syscalls:build_defs.bzl", "select_for_linux") +load("//tools:defs.bzl", "cc_library", "cc_test", "select_system") package( default_visibility = ["//:sandbox"], @@ -142,12 +141,13 @@ cc_library( cc_library( name = "save_util", testonly = 1, - srcs = ["save_util.cc"] + - select_for_linux( - ["save_util_linux.cc"], - ["save_util_other.cc"], - ), + srcs = [ + "save_util.cc", + "save_util_linux.cc", + "save_util_other.cc", + ], hdrs = ["save_util.h"], + defines = select_system(), ) cc_library( @@ -234,13 +234,16 @@ cc_library( testonly = 1, srcs = [ "test_util.cc", - ] + select_for_linux( - [ - "test_util_impl.cc", - "test_util_runfiles.cc", + "test_util_impl.cc", + "test_util_runfiles.cc", + ], + hdrs = ["test_util.h"], + defines = select_system( + fuchsia = [ + "__opensource__", + "__fuchsia__", ], ), - hdrs = ["test_util.h"], deps = [ ":fs_util", ":logging", diff --git a/test/util/save_util_linux.cc b/test/util/save_util_linux.cc index cd56118c0..d0aea8e6a 100644 --- a/test/util/save_util_linux.cc +++ b/test/util/save_util_linux.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#ifdef __linux__ + #include #include #include @@ -43,3 +45,5 @@ void MaybeSave() { } // namespace testing } // namespace gvisor + +#endif diff --git a/test/util/save_util_other.cc b/test/util/save_util_other.cc index 1aca663b7..931af2c29 100644 --- a/test/util/save_util_other.cc +++ b/test/util/save_util_other.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#ifndef __linux__ + namespace gvisor { namespace testing { @@ -21,3 +23,5 @@ void MaybeSave() { } // namespace testing } // namespace gvisor + +#endif diff --git a/test/util/test_util_runfiles.cc b/test/util/test_util_runfiles.cc index 7210094eb..694d21692 100644 --- a/test/util/test_util_runfiles.cc +++ b/test/util/test_util_runfiles.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#ifndef __fuchsia__ + #include #include @@ -44,3 +46,5 @@ std::string RunfilePath(std::string path) { } // namespace testing } // namespace gvisor + +#endif // __fuchsia__ diff --git a/tools/BUILD b/tools/BUILD new file mode 100644 index 000000000..e73a9c885 --- /dev/null +++ b/tools/BUILD @@ -0,0 +1,3 @@ +package(licenses = ["notice"]) + +exports_files(["nogo.js"]) diff --git a/tools/build/BUILD b/tools/build/BUILD new file mode 100644 index 000000000..0c0ce3f4d --- /dev/null +++ b/tools/build/BUILD @@ -0,0 +1,10 @@ +package(licenses = ["notice"]) + +# In bazel, no special support is required for loopback networking. This is +# just a dummy data target that does not change the test environment. +genrule( + name = "loopback", + outs = ["loopback.txt"], + cmd = "touch $@", + visibility = ["//visibility:public"], +) diff --git a/tools/build/defs.bzl b/tools/build/defs.bzl new file mode 100644 index 000000000..d0556abd1 --- /dev/null +++ b/tools/build/defs.bzl @@ -0,0 +1,91 @@ +"""Bazel implementations of standard rules.""" + +load("@bazel_tools//tools/cpp:cc_flags_supplier.bzl", _cc_flags_supplier = "cc_flags_supplier") +load("@io_bazel_rules_go//go:def.bzl", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_library = "go_library", _go_test = "go_test", _go_tool_library = "go_tool_library") +load("@io_bazel_rules_go//proto:def.bzl", _go_proto_library = "go_proto_library") +load("@rules_cc//cc:defs.bzl", _cc_binary = "cc_binary", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test") +load("@rules_pkg//:pkg.bzl", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar") +load("@io_bazel_rules_docker//go:image.bzl", _go_image = "go_image") +load("@io_bazel_rules_docker//container:container.bzl", _container_image = "container_image") +load("@pydeps//:requirements.bzl", _py_requirement = "requirement") + +container_image = _container_image +cc_binary = _cc_binary +cc_library = _cc_library +cc_flags_supplier = _cc_flags_supplier +cc_proto_library = _cc_proto_library +cc_test = _cc_test +cc_toolchain = "@bazel_tools//tools/cpp:current_cc_toolchain" +go_image = _go_image +go_embed_data = _go_embed_data +loopback = "//tools/build:loopback" +proto_library = native.proto_library +pkg_deb = _pkg_deb +pkg_tar = _pkg_tar +py_library = native.py_library +py_binary = native.py_binary +py_test = native.py_test + +def go_binary(name, static = False, pure = False, **kwargs): + if static: + kwargs["static"] = "on" + if pure: + kwargs["pure"] = "on" + _go_binary( + name = name, + **kwargs + ) + +def go_library(name, **kwargs): + _go_library( + name = name, + importpath = "gvisor.dev/gvisor/" + native.package_name(), + **kwargs + ) + +def go_tool_library(name, **kwargs): + _go_tool_library( + name = name, + importpath = "gvisor.dev/gvisor/" + native.package_name(), + **kwargs + ) + +def go_proto_library(name, proto, **kwargs): + deps = kwargs.pop("deps", []) + _go_proto_library( + name = name, + importpath = "gvisor.dev/gvisor/" + native.package_name() + "/" + name, + proto = proto, + deps = [dep.replace("_proto", "_go_proto") for dep in deps], + **kwargs + ) + +def go_test(name, **kwargs): + library = kwargs.pop("library", None) + if library: + kwargs["embed"] = [library] + _go_test( + name = name, + **kwargs + ) + +def py_requirement(name, direct = False): + return _py_requirement(name) + +def select_arch(amd64 = "amd64", arm64 = "arm64", default = None, **kwargs): + values = { + "@bazel_tools//src/conditions:linux_x86_64": amd64, + "@bazel_tools//src/conditions:linux_aarch64": arm64, + } + if default: + values["//conditions:default"] = default + return select(values, **kwargs) + +def select_system(linux = ["__linux__"], **kwargs): + return linux # Only Linux supported. + +def default_installer(): + return None + +def default_net_util(): + return [] # Nothing needed. diff --git a/tools/checkunsafe/BUILD b/tools/checkunsafe/BUILD index d85c56131..92ba8ab06 100644 --- a/tools/checkunsafe/BUILD +++ b/tools/checkunsafe/BUILD @@ -1,11 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_tool_library") +load("//tools:defs.bzl", "go_tool_library") package(licenses = ["notice"]) go_tool_library( name = "checkunsafe", srcs = ["check_unsafe.go"], - importpath = "checkunsafe", visibility = ["//visibility:public"], deps = [ "@org_golang_x_tools//go/analysis:go_tool_library", diff --git a/tools/defs.bzl b/tools/defs.bzl new file mode 100644 index 000000000..819f12b0d --- /dev/null +++ b/tools/defs.bzl @@ -0,0 +1,154 @@ +"""Wrappers for common build rules. + +These wrappers apply common BUILD configurations (e.g., proto_library +automagically creating cc_ and go_ proto targets) and act as a single point of +change for Google-internal and bazel-compatible rules. +""" + +load("//tools/go_stateify:defs.bzl", "go_stateify") +load("//tools/go_marshal:defs.bzl", "go_marshal", "marshal_deps", "marshal_test_deps") +load("//tools/build:defs.bzl", _cc_binary = "cc_binary", _cc_flags_supplier = "cc_flags_supplier", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test", _cc_toolchain = "cc_toolchain", _container_image = "container_image", _default_installer = "default_installer", _default_net_util = "default_net_util", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_image = "go_image", _go_library = "go_library", _go_proto_library = "go_proto_library", _go_test = "go_test", _go_tool_library = "go_tool_library", _loopback = "loopback", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar", _proto_library = "proto_library", _py_binary = "py_binary", _py_library = "py_library", _py_requirement = "py_requirement", _py_test = "py_test", _select_arch = "select_arch", _select_system = "select_system") + +# Delegate directly. +cc_binary = _cc_binary +cc_library = _cc_library +cc_test = _cc_test +cc_toolchain = _cc_toolchain +cc_flags_supplier = _cc_flags_supplier +container_image = _container_image +go_embed_data = _go_embed_data +go_image = _go_image +go_test = _go_test +go_tool_library = _go_tool_library +pkg_deb = _pkg_deb +pkg_tar = _pkg_tar +py_library = _py_library +py_binary = _py_binary +py_test = _py_test +py_requirement = _py_requirement +select_arch = _select_arch +select_system = _select_system +loopback = _loopback +default_installer = _default_installer +default_net_util = _default_net_util + +def go_binary(name, **kwargs): + """Wraps the standard go_binary. + + Args: + name: the rule name. + **kwargs: standard go_binary arguments. + """ + _go_binary( + name = name, + **kwargs + ) + +def go_library(name, srcs, deps = [], imports = [], stateify = True, marshal = False, **kwargs): + """Wraps the standard go_library and does stateification and marshalling. + + The recommended way is to use this rule with mostly identical configuration as the native + go_library rule. + + These definitions provide additional flags (stateify, marshal) that can be used + with the generators to automatically supplement the library code. + + load("//tools:defs.bzl", "go_library") + + go_library( + name = "foo", + srcs = ["foo.go"], + ) + + Args: + name: the rule name. + srcs: the library sources. + deps: the library dependencies. + imports: imports required for stateify. + stateify: whether statify is enabled (default: true). + marshal: whether marshal is enabled (default: false). + **kwargs: standard go_library arguments. + """ + if stateify: + # Only do stateification for non-state packages without manual autogen. + go_stateify( + name = name + "_state_autogen", + srcs = [src for src in srcs if src.endswith(".go")], + imports = imports, + package = name, + arch = select_arch(), + out = name + "_state_autogen.go", + ) + all_srcs = srcs + [name + "_state_autogen.go"] + if "//pkg/state" not in deps: + all_deps = deps + ["//pkg/state"] + else: + all_deps = deps + else: + all_deps = deps + all_srcs = srcs + if marshal: + go_marshal( + name = name + "_abi_autogen", + srcs = [src for src in srcs if src.endswith(".go")], + debug = False, + imports = imports, + package = name, + ) + extra_deps = [ + dep + for dep in marshal_deps + if not dep in all_deps + ] + all_deps = all_deps + extra_deps + all_srcs = srcs + [name + "_abi_autogen_unsafe.go"] + + _go_library( + name = name, + srcs = all_srcs, + deps = all_deps, + **kwargs + ) + + if marshal: + # Ignore importpath for go_test. + kwargs.pop("importpath", None) + + _go_test( + name = name + "_abi_autogen_test", + srcs = [name + "_abi_autogen_test.go"], + library = ":" + name, + deps = marshal_test_deps, + **kwargs + ) + +def proto_library(name, srcs, **kwargs): + """Wraps the standard proto_library. + + Given a proto_library named "foo", this produces three different targets: + - foo_proto: proto_library rule. + - foo_go_proto: go_proto_library rule. + - foo_cc_proto: cc_proto_library rule. + + Args: + srcs: the proto sources. + **kwargs: standard proto_library arguments. + """ + deps = kwargs.pop("deps", []) + _proto_library( + name = name + "_proto", + srcs = srcs, + deps = deps, + **kwargs + ) + _go_proto_library( + name = name + "_go_proto", + proto = ":" + name + "_proto", + deps = deps, + **kwargs + ) + _cc_proto_library( + name = name + "_cc_proto", + deps = [":" + name + "_proto"], + **kwargs + ) diff --git a/tools/go_generics/BUILD b/tools/go_generics/BUILD index 39318b877..069df3856 100644 --- a/tools/go_generics/BUILD +++ b/tools/go_generics/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") +load("//tools:defs.bzl", "go_binary") package(licenses = ["notice"]) diff --git a/tools/go_generics/globals/BUILD b/tools/go_generics/globals/BUILD index 74853c7d2..38caa3ce7 100644 --- a/tools/go_generics/globals/BUILD +++ b/tools/go_generics/globals/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -8,6 +8,6 @@ go_library( "globals_visitor.go", "scope.go", ], - importpath = "gvisor.dev/gvisor/tools/go_generics/globals", + stateify = False, visibility = ["//tools/go_generics:__pkg__"], ) diff --git a/tools/go_generics/go_merge/BUILD b/tools/go_generics/go_merge/BUILD index 02b09120e..b7d35e272 100644 --- a/tools/go_generics/go_merge/BUILD +++ b/tools/go_generics/go_merge/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") +load("//tools:defs.bzl", "go_binary") package(licenses = ["notice"]) diff --git a/tools/go_generics/rules_tests/BUILD b/tools/go_generics/rules_tests/BUILD index 9d26a88b7..8a329dfc6 100644 --- a/tools/go_generics/rules_tests/BUILD +++ b/tools/go_generics/rules_tests/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_test") load("//tools/go_generics:defs.bzl", "go_template", "go_template_instance") package(licenses = ["notice"]) diff --git a/tools/go_marshal/BUILD b/tools/go_marshal/BUILD index c862b277c..80d9c0504 100644 --- a/tools/go_marshal/BUILD +++ b/tools/go_marshal/BUILD @@ -1,6 +1,6 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") +load("//tools:defs.bzl", "go_binary") -package(licenses = ["notice"]) +licenses(["notice"]) go_binary( name = "go_marshal", diff --git a/tools/go_marshal/README.md b/tools/go_marshal/README.md index 481575bd3..4886efddf 100644 --- a/tools/go_marshal/README.md +++ b/tools/go_marshal/README.md @@ -20,19 +20,7 @@ comment `// +marshal`. # Usage -See `defs.bzl`: two new rules are provided, `go_marshal` and `go_library`. - -The recommended way to generate a go library with marshalling is to use the -`go_library` with mostly identical configuration as the native go_library rule. - -``` -load("/gvisor/tools/go_marshal:defs.bzl", "go_library") - -go_library( - name = "foo", - srcs = ["foo.go"], -) -``` +See `defs.bzl`: a new rule is provided, `go_marshal`. Under the hood, the `go_marshal` rule is used to generate a file that will appear in a Go target; the output file should appear explicitly in a srcs list. @@ -54,11 +42,7 @@ go_library( "foo.go", "foo_abi.go", ], - deps = [ - "/gvisor/pkg/abi", - "/gvisor/pkg/sentry/safemem/safemem", - "/gvisor/pkg/sentry/usermem/usermem", - ], + ... ) ``` @@ -69,22 +53,6 @@ These tests use reflection to verify properties of the ABI struct, and should be considered part of the generated interfaces (but are too expensive to execute at runtime). Ensure these tests run at some point. -``` -$ cat BUILD -load("/gvisor/tools/go_marshal:defs.bzl", "go_library") - -go_library( - name = "foo", - srcs = ["foo.go"], -) -$ blaze build :foo -$ blaze query ... -:foo_abi_autogen -:foo_abi_autogen_test -$ blaze test :foo_abi_autogen_test - -``` - # Restrictions Not all valid go type definitions can be used with `go_marshal`. `go_marshal` is @@ -131,22 +99,6 @@ for embedded structs that are not aligned. Because of this, it's generally best to avoid using `marshal:"unaligned"` and insert explicit padding fields instead. -## Debugging go_marshal - -To enable debugging output from the go marshal tool, pass the `-debug` flag to -the tool. When using the build rules from above, add a `debug = True` field to -the build rule like this: - -``` -load("/gvisor/tools/go_marshal:defs.bzl", "go_library") - -go_library( - name = "foo", - srcs = ["foo.go"], - debug = True, -) -``` - ## Modifying the `go_marshal` Tool The following are some guidelines for modifying the `go_marshal` tool: diff --git a/tools/go_marshal/analysis/BUILD b/tools/go_marshal/analysis/BUILD index c859ced77..c2a4d45c4 100644 --- a/tools/go_marshal/analysis/BUILD +++ b/tools/go_marshal/analysis/BUILD @@ -1,12 +1,11 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") -package(licenses = ["notice"]) +licenses(["notice"]) go_library( name = "analysis", testonly = 1, srcs = ["analysis_unsafe.go"], - importpath = "gvisor.dev/gvisor/tools/go_marshal/analysis", visibility = [ "//:sandbox", ], diff --git a/tools/go_marshal/defs.bzl b/tools/go_marshal/defs.bzl index c32eb559f..2918ceffe 100644 --- a/tools/go_marshal/defs.bzl +++ b/tools/go_marshal/defs.bzl @@ -1,57 +1,14 @@ -"""Marshal is a tool for generating marshalling interfaces for Go types. - -The recommended way is to use the go_library rule defined below with mostly -identical configuration as the native go_library rule. - -load("//tools/go_marshal:defs.bzl", "go_library") - -go_library( - name = "foo", - srcs = ["foo.go"], -) - -Under the hood, the go_marshal rule is used to generate a file that will -appear in a Go target; the output file should appear explicitly in a srcs list. -For example (the above is still the preferred way): - -load("//tools/go_marshal:defs.bzl", "go_marshal") - -go_marshal( - name = "foo_abi", - srcs = ["foo.go"], - out = "foo_abi.go", - package = "foo", -) - -go_library( - name = "foo", - srcs = [ - "foo.go", - "foo_abi.go", - ], - deps = [ - "//tools/go_marshal:marshal", - "//pkg/sentry/platform/safecopy", - "//pkg/sentry/usermem", - ], -) -""" - -load("@io_bazel_rules_go//go:def.bzl", _go_library = "go_library", _go_test = "go_test") +"""Marshal is a tool for generating marshalling interfaces for Go types.""" def _go_marshal_impl(ctx): """Execute the go_marshal tool.""" output = ctx.outputs.lib output_test = ctx.outputs.test - (build_dir, _, _) = ctx.build_file_path.rpartition("/BUILD") - - decl = "/".join(["gvisor.dev/gvisor", build_dir]) # Run the marshal command. args = ["-output=%s" % output.path] args += ["-pkg=%s" % ctx.attr.package] args += ["-output_test=%s" % output_test.path] - args += ["-declarationPkg=%s" % decl] if ctx.attr.debug: args += ["-debug"] @@ -83,7 +40,6 @@ go_marshal = rule( implementation = _go_marshal_impl, attrs = { "srcs": attr.label_list(mandatory = True, allow_files = True), - "libname": attr.string(mandatory = True), "imports": attr.string_list(mandatory = False), "package": attr.string(mandatory = True), "debug": attr.bool(doc = "enable debugging output from the go_marshal tool"), @@ -95,58 +51,14 @@ go_marshal = rule( }, ) -def go_library(name, srcs, deps = [], imports = [], debug = False, **kwargs): - """wraps the standard go_library and does mashalling interface generation. - - Args: - name: Same as native go_library. - srcs: Same as native go_library. - deps: Same as native go_library. - imports: Extra import paths to pass to the go_marshal tool. - debug: Enables debugging output from the go_marshal tool. - **kwargs: Remaining args to pass to the native go_library rule unmodified. - """ - go_marshal( - name = name + "_abi_autogen", - libname = name, - srcs = [src for src in srcs if src.endswith(".go")], - debug = debug, - imports = imports, - package = name, - ) - - extra_deps = [ - "//tools/go_marshal/marshal", - "//pkg/sentry/platform/safecopy", - "//pkg/sentry/usermem", - ] - - all_srcs = srcs + [name + "_abi_autogen_unsafe.go"] - all_deps = deps + [] # + extra_deps - - for extra in extra_deps: - if extra not in deps: - all_deps.append(extra) - - _go_library( - name = name, - srcs = all_srcs, - deps = all_deps, - **kwargs - ) - - # Don't pass importpath arg to go_test. - kwargs.pop("importpath", "") - - _go_test( - name = name + "_abi_autogen_test", - srcs = [name + "_abi_autogen_test.go"], - # Generated test has a fixed set of dependencies since we generate these - # tests. They should only depend on the library generated above, and the - # Marshallable interface. - deps = [ - ":" + name, - "//tools/go_marshal/analysis", - ], - **kwargs - ) +# marshal_deps are the dependencies requied by generated code. +marshal_deps = [ + "//tools/go_marshal/marshal", + "//pkg/sentry/platform/safecopy", + "//pkg/sentry/usermem", +] + +# marshal_test_deps are required by test targets. +marshal_test_deps = [ + "//tools/go_marshal/analysis", +] diff --git a/tools/go_marshal/gomarshal/BUILD b/tools/go_marshal/gomarshal/BUILD index a0eae6492..c92b59dd6 100644 --- a/tools/go_marshal/gomarshal/BUILD +++ b/tools/go_marshal/gomarshal/BUILD @@ -1,6 +1,6 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") -package(licenses = ["notice"]) +licenses(["notice"]) go_library( name = "gomarshal", @@ -10,7 +10,7 @@ go_library( "generator_tests.go", "util.go", ], - importpath = "gvisor.dev/gvisor/tools/go_marshal/gomarshal", + stateify = False, visibility = [ "//:sandbox", ], diff --git a/tools/go_marshal/gomarshal/generator.go b/tools/go_marshal/gomarshal/generator.go index 641ccd938..8392f3f6d 100644 --- a/tools/go_marshal/gomarshal/generator.go +++ b/tools/go_marshal/gomarshal/generator.go @@ -62,15 +62,12 @@ type Generator struct { outputTest *os.File // Package name for the generated file. pkg string - // Go import path for package we're processing. This package should directly - // declare the type we're generating code for. - declaration string // Set of extra packages to import in the generated file. imports *importTable } // NewGenerator creates a new code Generator. -func NewGenerator(srcs []string, out, outTest, pkg, declaration string, imports []string) (*Generator, error) { +func NewGenerator(srcs []string, out, outTest, pkg string, imports []string) (*Generator, error) { f, err := os.OpenFile(out, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) if err != nil { return nil, fmt.Errorf("Couldn't open output file %q: %v", out, err) @@ -80,12 +77,11 @@ func NewGenerator(srcs []string, out, outTest, pkg, declaration string, imports return nil, fmt.Errorf("Couldn't open test output file %q: %v", out, err) } g := Generator{ - inputs: srcs, - output: f, - outputTest: fTest, - pkg: pkg, - declaration: declaration, - imports: newImportTable(), + inputs: srcs, + output: f, + outputTest: fTest, + pkg: pkg, + imports: newImportTable(), } for _, i := range imports { // All imports on the extra imports list are unconditionally marked as @@ -264,7 +260,7 @@ func (g *Generator) generateOne(t *ast.TypeSpec, fset *token.FileSet) *interface // generateOneTestSuite generates a test suite for the automatically generated // implementations type t. func (g *Generator) generateOneTestSuite(t *ast.TypeSpec) *testGenerator { - i := newTestGenerator(t, g.declaration) + i := newTestGenerator(t) i.emitTests() return i } @@ -359,7 +355,7 @@ func (g *Generator) Run() error { // source file. func (g *Generator) writeTests(ts []*testGenerator) error { var b sourceBuffer - b.emit("package %s_test\n\n", g.pkg) + b.emit("package %s\n\n", g.pkg) if err := b.write(g.outputTest); err != nil { return err } diff --git a/tools/go_marshal/gomarshal/generator_tests.go b/tools/go_marshal/gomarshal/generator_tests.go index df25cb5b2..bcda17c3b 100644 --- a/tools/go_marshal/gomarshal/generator_tests.go +++ b/tools/go_marshal/gomarshal/generator_tests.go @@ -46,7 +46,7 @@ type testGenerator struct { decl *importStmt } -func newTestGenerator(t *ast.TypeSpec, declaration string) *testGenerator { +func newTestGenerator(t *ast.TypeSpec) *testGenerator { if _, ok := t.Type.(*ast.StructType); !ok { panic(fmt.Sprintf("Attempting to generate code for a not struct type %v", t)) } @@ -59,14 +59,12 @@ func newTestGenerator(t *ast.TypeSpec, declaration string) *testGenerator { for _, i := range standardImports { g.imports.add(i).markUsed() } - g.decl = g.imports.add(declaration) - g.decl.markUsed() return g } func (g *testGenerator) typeName() string { - return fmt.Sprintf("%s.%s", g.decl.name, g.t.Name.Name) + return g.t.Name.Name } func (g *testGenerator) forEachField(fn func(f *ast.Field)) { diff --git a/tools/go_marshal/main.go b/tools/go_marshal/main.go index 3d12eb93c..e1a97b311 100644 --- a/tools/go_marshal/main.go +++ b/tools/go_marshal/main.go @@ -31,11 +31,10 @@ import ( ) var ( - pkg = flag.String("pkg", "", "output package") - output = flag.String("output", "", "output file") - outputTest = flag.String("output_test", "", "output file for tests") - imports = flag.String("imports", "", "comma-separated list of extra packages to import in generated code") - declarationPkg = flag.String("declarationPkg", "", "import path of target declaring the types we're generating on") + pkg = flag.String("pkg", "", "output package") + output = flag.String("output", "", "output file") + outputTest = flag.String("output_test", "", "output file for tests") + imports = flag.String("imports", "", "comma-separated list of extra packages to import in generated code") ) func main() { @@ -62,7 +61,7 @@ func main() { // as an import. extraImports = strings.Split(*imports, ",") } - g, err := gomarshal.NewGenerator(flag.Args(), *output, *outputTest, *pkg, *declarationPkg, extraImports) + g, err := gomarshal.NewGenerator(flag.Args(), *output, *outputTest, *pkg, extraImports) if err != nil { panic(err) } diff --git a/tools/go_marshal/marshal/BUILD b/tools/go_marshal/marshal/BUILD index 47dda97a1..ad508c72f 100644 --- a/tools/go_marshal/marshal/BUILD +++ b/tools/go_marshal/marshal/BUILD @@ -1,13 +1,12 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") -package(licenses = ["notice"]) +licenses(["notice"]) go_library( name = "marshal", srcs = [ "marshal.go", ], - importpath = "gvisor.dev/gvisor/tools/go_marshal/marshal", visibility = [ "//:sandbox", ], diff --git a/tools/go_marshal/test/BUILD b/tools/go_marshal/test/BUILD index d412e1ccf..38ba49fed 100644 --- a/tools/go_marshal/test/BUILD +++ b/tools/go_marshal/test/BUILD @@ -1,7 +1,6 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_marshal:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") -package(licenses = ["notice"]) +licenses(["notice"]) package_group( name = "gomarshal_test", @@ -25,6 +24,6 @@ go_library( name = "test", testonly = 1, srcs = ["test.go"], - importpath = "gvisor.dev/gvisor/tools/go_marshal/test", + marshal = True, deps = ["//tools/go_marshal/test/external"], ) diff --git a/tools/go_marshal/test/external/BUILD b/tools/go_marshal/test/external/BUILD index 9bb89e1da..0cf6da603 100644 --- a/tools/go_marshal/test/external/BUILD +++ b/tools/go_marshal/test/external/BUILD @@ -1,11 +1,11 @@ -load("//tools/go_marshal:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") -package(licenses = ["notice"]) +licenses(["notice"]) go_library( name = "external", testonly = 1, srcs = ["external.go"], - importpath = "gvisor.dev/gvisor/tools/go_marshal/test/external", + marshal = True, visibility = ["//tools/go_marshal/test:gomarshal_test"], ) diff --git a/tools/go_stateify/BUILD b/tools/go_stateify/BUILD index bb53f8ae9..a133d6f8b 100644 --- a/tools/go_stateify/BUILD +++ b/tools/go_stateify/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") +load("//tools:defs.bzl", "go_binary") package(licenses = ["notice"]) diff --git a/tools/go_stateify/defs.bzl b/tools/go_stateify/defs.bzl index 33267c074..0f261d89f 100644 --- a/tools/go_stateify/defs.bzl +++ b/tools/go_stateify/defs.bzl @@ -1,41 +1,4 @@ -"""Stateify is a tool for generating state wrappers for Go types. - -The recommended way is to use the go_library rule defined below with mostly -identical configuration as the native go_library rule. - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "foo", - srcs = ["foo.go"], -) - -Under the hood, the go_stateify rule is used to generate a file that will -appear in a Go target; the output file should appear explicitly in a srcs list. -For example (the above is still the preferred way): - -load("//tools/go_stateify:defs.bzl", "go_stateify") - -go_stateify( - name = "foo_state", - srcs = ["foo.go"], - out = "foo_state.go", - package = "foo", -) - -go_library( - name = "foo", - srcs = [ - "foo.go", - "foo_state.go", - ], - deps = [ - "//pkg/state", - ], -) -""" - -load("@io_bazel_rules_go//go:def.bzl", _go_library = "go_library") +"""Stateify is a tool for generating state wrappers for Go types.""" def _go_stateify_impl(ctx): """Implementation for the stateify tool.""" @@ -103,43 +66,3 @@ files and must be added to the srcs of the relevant go_library. "_statepkg": attr.string(default = "gvisor.dev/gvisor/pkg/state"), }, ) - -def go_library(name, srcs, deps = [], imports = [], **kwargs): - """Standard go_library wrapped which generates state source files. - - Args: - name: the name of the go_library rule. - srcs: sources of the go_library. Each will be processed for stateify - annotations. - deps: dependencies for the go_library. - imports: an optional list of extra non-aliased, Go-style absolute import - paths required for stateified types. - **kwargs: passed to go_library. - """ - if "encode_unsafe.go" not in srcs and (name + "_state_autogen.go") not in srcs: - # Only do stateification for non-state packages without manual autogen. - go_stateify( - name = name + "_state_autogen", - srcs = [src for src in srcs if src.endswith(".go")], - imports = imports, - package = name, - arch = select({ - "@bazel_tools//src/conditions:linux_aarch64": "arm64", - "//conditions:default": "amd64", - }), - out = name + "_state_autogen.go", - ) - all_srcs = srcs + [name + "_state_autogen.go"] - if "//pkg/state" not in deps: - all_deps = deps + ["//pkg/state"] - else: - all_deps = deps - else: - all_deps = deps - all_srcs = srcs - _go_library( - name = name, - srcs = all_srcs, - deps = all_deps, - **kwargs - ) diff --git a/tools/images/BUILD b/tools/images/BUILD index 2b77c2737..f1699b184 100644 --- a/tools/images/BUILD +++ b/tools/images/BUILD @@ -1,4 +1,4 @@ -load("@rules_cc//cc:defs.bzl", "cc_binary") +load("//tools:defs.bzl", "cc_binary") load("//tools/images:defs.bzl", "vm_image", "vm_test") package( diff --git a/tools/images/defs.bzl b/tools/images/defs.bzl index d8e422a5d..32235813a 100644 --- a/tools/images/defs.bzl +++ b/tools/images/defs.bzl @@ -28,6 +28,8 @@ The vm_test rule can be used to execute a command remotely. For example, ) """ +load("//tools:defs.bzl", "default_installer") + def _vm_image_impl(ctx): script_paths = [] for script in ctx.files.scripts: @@ -165,8 +167,8 @@ def vm_test( targets = kwargs.pop("targets", []) if installer: targets = [installer] + targets - targets = [ - ] + targets + if default_installer(): + targets = [default_installer()] + targets _vm_test( tags = [ "local", diff --git a/tools/issue_reviver/BUILD b/tools/issue_reviver/BUILD index ee7ea11fd..4ef1a3124 100644 --- a/tools/issue_reviver/BUILD +++ b/tools/issue_reviver/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") +load("//tools:defs.bzl", "go_binary") package(licenses = ["notice"]) diff --git a/tools/issue_reviver/github/BUILD b/tools/issue_reviver/github/BUILD index 6da22ba1c..da4133472 100644 --- a/tools/issue_reviver/github/BUILD +++ b/tools/issue_reviver/github/BUILD @@ -1,11 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "github", srcs = ["github.go"], - importpath = "gvisor.dev/gvisor/tools/issue_reviver/github", visibility = [ "//tools/issue_reviver:__subpackages__", ], diff --git a/tools/issue_reviver/reviver/BUILD b/tools/issue_reviver/reviver/BUILD index 2c3675977..d262932bd 100644 --- a/tools/issue_reviver/reviver/BUILD +++ b/tools/issue_reviver/reviver/BUILD @@ -1,11 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "reviver", srcs = ["reviver.go"], - importpath = "gvisor.dev/gvisor/tools/issue_reviver/reviver", visibility = [ "//tools/issue_reviver:__subpackages__", ], @@ -15,5 +14,5 @@ go_test( name = "reviver_test", size = "small", srcs = ["reviver_test.go"], - embed = [":reviver"], + library = ":reviver", ) diff --git a/tools/workspace_status.sh b/tools/workspace_status.sh index fb09ff331..a22c8c9f2 100755 --- a/tools/workspace_status.sh +++ b/tools/workspace_status.sh @@ -15,4 +15,4 @@ # limitations under the License. # The STABLE_ prefix will trigger a re-link if it changes. -echo STABLE_VERSION $(git describe --always --tags --abbrev=12 --dirty) +echo STABLE_VERSION $(git describe --always --tags --abbrev=12 --dirty || echo 0.0.0) diff --git a/vdso/BUILD b/vdso/BUILD index 2b6744c26..d37d4266d 100644 --- a/vdso/BUILD +++ b/vdso/BUILD @@ -3,20 +3,10 @@ # normal system VDSO (time, gettimeofday, clock_gettimeofday) but which uses # timekeeping parameters managed by the sandbox kernel. -load("@bazel_tools//tools/cpp:cc_flags_supplier.bzl", "cc_flags_supplier") +load("//tools:defs.bzl", "cc_flags_supplier", "cc_toolchain", "select_arch") package(licenses = ["notice"]) -config_setting( - name = "x86_64", - constraint_values = ["@bazel_tools//platforms:x86_64"], -) - -config_setting( - name = "aarch64", - constraint_values = ["@bazel_tools//platforms:aarch64"], -) - genrule( name = "vdso", srcs = [ @@ -39,14 +29,15 @@ genrule( "-O2 " + "-std=c++11 " + "-fPIC " + + "-fno-sanitize=all " + # Some toolchains enable stack protector by default. Disable it, the # VDSO has no hooks to handle failures. "-fno-stack-protector " + "-fuse-ld=gold " + - select({ - ":x86_64": "-m64 ", - "//conditions:default": "", - }) + + select_arch( + amd64 = "-m64 ", + arm64 = "", + ) + "-shared " + "-nostdlib " + "-Wl,-soname=linux-vdso.so.1 " + @@ -55,12 +46,10 @@ genrule( "-Wl,-Bsymbolic " + "-Wl,-z,max-page-size=4096 " + "-Wl,-z,common-page-size=4096 " + - select( - { - ":x86_64": "-Wl,-T$(location vdso_amd64.lds) ", - ":aarch64": "-Wl,-T$(location vdso_arm64.lds) ", - }, - no_match_error = "Unsupported architecture", + select_arch( + amd64 = "-Wl,-T$(location vdso_amd64.lds) ", + arm64 = "-Wl,-T$(location vdso_arm64.lds) ", + no_match_error = "unsupported architecture", ) + "-o $(location vdso.so) " + "$(location vdso.cc) " + @@ -73,7 +62,7 @@ genrule( ], features = ["-pie"], toolchains = [ - "@bazel_tools//tools/cpp:current_cc_toolchain", + cc_toolchain, ":no_pie_cc_flags", ], visibility = ["//:sandbox"], -- cgit v1.2.3 From 29316e66adfc49c158425554761e34c12338f1d9 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Mon, 27 Jan 2020 12:27:04 -0800 Subject: Cleanup for GH review. --- pkg/abi/linux/netfilter.go | 14 +-- pkg/sentry/socket/netfilter/netfilter.go | 144 ++++++++++++------------------- pkg/tcpip/iptables/types.go | 15 ---- pkg/tcpip/iptables/udp_matcher.go | 62 ++++++------- test/iptables/filter_input.go | 6 +- 5 files changed, 88 insertions(+), 153 deletions(-) (limited to 'test') diff --git a/pkg/abi/linux/netfilter.go b/pkg/abi/linux/netfilter.go index effed7976..8e40bcc62 100644 --- a/pkg/abi/linux/netfilter.go +++ b/pkg/abi/linux/netfilter.go @@ -198,6 +198,8 @@ type XTEntryMatch struct { // SizeOfXTEntryMatch is the size of an XTEntryMatch. const SizeOfXTEntryMatch = 32 +// KernelXTEntryMatch is identical to XTEntryMatch, but contains +// variable-length Data field. type KernelXTEntryMatch struct { XTEntryMatch Data []byte @@ -349,19 +351,19 @@ func goString(cstring []byte) string { // XTUDP holds data for matching UDP packets. It corresponds to struct xt_udp // in include/uapi/linux/netfilter/xt_tcpudp.h. type XTUDP struct { - // SourcePortStart specifies the inclusive start of the range of source - // ports to which the matcher applies. + // SourcePortStart is the inclusive start of the range of source ports + // to which the matcher applies. SourcePortStart uint16 - // SourcePortEnd specifies the inclusive end of the range of source ports - // to which the matcher applies. + // SourcePortEnd is the inclusive end of the range of source ports to + // which the matcher applies. SourcePortEnd uint16 - // DestinationPortStart specifies the start of the destination port + // DestinationPortStart is the inclusive start of the destination port // range to which the matcher applies. DestinationPortStart uint16 - // DestinationPortEnd specifies the start of the destination port + // DestinationPortEnd is the inclusive end of the destination port // range to which the matcher applies. DestinationPortEnd uint16 diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index 6c88a50a6..b8848f08a 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -34,9 +34,16 @@ import ( // shouldn't be reached - an error has occurred if we fall through to one. const errorTargetName = "ERROR" -// metadata is opaque to netstack. It holds data that we need to translate -// between Linux's and netstack's iptables representations. -// TODO(gvisor.dev/issue/170): Use metadata to check correctness. +const ( + matcherNameUDP = "udp" +) + +// Metadata is used to verify that we are correctly serializing and +// deserializing iptables into structs consumable by the iptables tool. We save +// a metadata struct when the tables are written, and when they are read out we +// verify that certain fields are the same. +// +// metadata is opaque to netstack. type metadata struct { HookEntry [linux.NF_INET_NUMHOOKS]uint32 Underflow [linux.NF_INET_NUMHOOKS]uint32 @@ -44,10 +51,12 @@ type metadata struct { Size uint32 } -const enableDebugLog = true +const enableDebug = false +// nflog logs messages related to the writing and reading of iptables, but only +// when enableDebug is true. func nflog(format string, args ...interface{}) { - if enableDebugLog { + if enableDebug { log.Infof("netfilter: "+format, args...) } } @@ -80,7 +89,7 @@ func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr) (linux.IPT info.NumEntries = metadata.NumEntries info.Size = metadata.Size - nflog("GetInfo returning info: %+v", info) + nflog("returning info: %+v", info) return info, nil } @@ -163,19 +172,19 @@ func convertNetstackToBinary(tablename string, table iptables.Table) (linux.Kern copy(entries.Name[:], tablename) for ruleIdx, rule := range table.Rules { - nflog("Current offset: %d", entries.Size) + nflog("convert to binary: current offset: %d", entries.Size) // Is this a chain entry point? for hook, hookRuleIdx := range table.BuiltinChains { if hookRuleIdx == ruleIdx { - nflog("Found hook %d at offset %d", hook, entries.Size) + nflog("convert to binary: found hook %d at offset %d", hook, entries.Size) meta.HookEntry[hook] = entries.Size } } // Is this a chain underflow point? for underflow, underflowRuleIdx := range table.Underflows { if underflowRuleIdx == ruleIdx { - nflog("Found underflow %d at offset %d", underflow, entries.Size) + nflog("convert to binary: found underflow %d at offset %d", underflow, entries.Size) meta.Underflow[underflow] = entries.Size } } @@ -195,7 +204,7 @@ func convertNetstackToBinary(tablename string, table iptables.Table) (linux.Kern // Serialize the matcher and add it to the // entry. serialized := marshalMatcher(matcher) - nflog("matcher serialized as: %v", serialized) + nflog("convert to binary: matcher serialized as: %v", serialized) if len(serialized)%8 != 0 { panic(fmt.Sprintf("matcher %T is not 64-bit aligned", matcher)) } @@ -212,14 +221,14 @@ func convertNetstackToBinary(tablename string, table iptables.Table) (linux.Kern entry.Elems = append(entry.Elems, serialized...) entry.NextOffset += uint16(len(serialized)) - nflog("Adding entry: %+v", entry) + nflog("convert to binary: adding entry: %+v", entry) entries.Size += uint32(entry.NextOffset) entries.Entrytable = append(entries.Entrytable, entry) meta.NumEntries++ } - nflog("Finished with an marshalled size of %d", meta.Size) + nflog("convert to binary: finished with an marshalled size of %d", meta.Size) meta.Size = entries.Size return entries, meta, nil } @@ -237,16 +246,18 @@ func marshalMatcher(matcher iptables.Matcher) []byte { } func marshalUDPMatcher(matcher *iptables.UDPMatcher) []byte { - nflog("Marshalling UDP matcher: %+v", matcher) + nflog("convert to binary: marshalling UDP matcher: %+v", matcher) + + // We have to pad this struct size to a multiple of 8 bytes. + const size = linux.SizeOfXTEntryMatch + linux.SizeOfXTUDP + 6 linuxMatcher := linux.KernelXTEntryMatch{ XTEntryMatch: linux.XTEntryMatch{ - MatchSize: linux.SizeOfXTEntryMatch + linux.SizeOfXTUDP + 6, - // Name: "udp", + MatchSize: size, }, Data: make([]byte, 0, linux.SizeOfXTUDP), } - copy(linuxMatcher.Name[:], "udp") + copy(linuxMatcher.Name[:], matcherNameUDP) xtudp := linux.XTUDP{ SourcePortStart: matcher.Data.SourcePortStart, @@ -255,17 +266,12 @@ func marshalUDPMatcher(matcher *iptables.UDPMatcher) []byte { DestinationPortEnd: matcher.Data.DestinationPortEnd, InverseFlags: matcher.Data.InverseFlags, } - nflog("marshalUDPMatcher: xtudp: %+v", xtudp) linuxMatcher.Data = binary.Marshal(linuxMatcher.Data, usermem.ByteOrder, xtudp) - nflog("marshalUDPMatcher: linuxMatcher: %+v", linuxMatcher) - // We have to pad this struct size to a multiple of 8 bytes, so we make - // this a little longer than it needs to be. - buf := make([]byte, 0, linux.SizeOfXTEntryMatch+linux.SizeOfXTUDP+6) + buf := make([]byte, 0, size) buf = binary.Marshal(buf, usermem.ByteOrder, linuxMatcher) buf = append(buf, []byte{0, 0, 0, 0, 0, 0}...) - nflog("Marshalled into matcher of size %d", len(buf)) - nflog("marshalUDPMatcher: buf is: %v", buf) + nflog("convert to binary: marshalled UDP matcher into %v", buf) return buf[:] } @@ -283,9 +289,8 @@ func marshalTarget(target iptables.Target) []byte { } func marshalStandardTarget(verdict iptables.Verdict) []byte { - nflog("Marshalling standard target with size %d", linux.SizeOfXTStandardTarget) + nflog("convert to binary: marshalling standard target with size %d", linux.SizeOfXTStandardTarget) - // TODO: Must be aligned. // The target's name will be the empty string. target := linux.XTStandardTarget{ Target: linux.XTEntryTarget{ @@ -353,8 +358,6 @@ func translateToStandardVerdict(val int32) (iptables.Verdict, *syserr.Error) { // SetEntries sets iptables rules for a single table. See // net/ipv4/netfilter/ip_tables.c:translate_table for reference. func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { - // printReplace(optVal) - // Get the basic rules data (struct ipt_replace). if len(optVal) < linux.SizeOfIPTReplace { log.Warningf("netfilter.SetEntries: optVal has insufficient size for replace %d", len(optVal)) @@ -375,13 +378,13 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { return syserr.ErrInvalidArgument } - nflog("Setting entries in table %q", replace.Name.String()) + nflog("set entries: setting entries in table %q", replace.Name.String()) // Convert input into a list of rules and their offsets. var offset uint32 var offsets []uint32 for entryIdx := uint32(0); entryIdx < replace.NumEntries; entryIdx++ { - nflog("Processing entry at offset %d", offset) + nflog("set entries: processing entry at offset %d", offset) // Get the struct ipt_entry. if len(optVal) < linux.SizeOfIPTEntry { @@ -406,11 +409,13 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { return err } - // TODO: Matchers (and maybe targets) can specify that they only work for certiain protocols, hooks, tables. + // TODO(gvisor.dev/issue/170): Matchers and targets can specify + // that they only work for certiain protocols, hooks, tables. // Get matchers. matchersSize := entry.TargetOffset - linux.SizeOfIPTEntry if len(optVal) < int(matchersSize) { log.Warningf("netfilter: entry doesn't have enough room for its matchers (only %d bytes remain)", len(optVal)) + return syserr.ErrInvalidArgument } matchers, err := parseMatchers(filter, optVal[:matchersSize]) if err != nil { @@ -423,6 +428,7 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { targetSize := entry.NextOffset - entry.TargetOffset if len(optVal) < int(targetSize) { log.Warningf("netfilter: entry doesn't have enough room for its target (only %d bytes remain)", len(optVal)) + return syserr.ErrInvalidArgument } target, err := parseTarget(optVal[:targetSize]) if err != nil { @@ -500,10 +506,11 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { // parseMatchers parses 0 or more matchers from optVal. optVal should contain // only the matchers. func parseMatchers(filter iptables.IPHeaderFilter, optVal []byte) ([]iptables.Matcher, *syserr.Error) { - nflog("Parsing matchers of size %d", len(optVal)) + nflog("set entries: parsing matchers of size %d", len(optVal)) var matchers []iptables.Matcher for len(optVal) > 0 { - nflog("parseMatchers: optVal has len %d", len(optVal)) + nflog("set entries: optVal has len %d", len(optVal)) + // Get the XTEntryMatch. if len(optVal) < linux.SizeOfXTEntryMatch { log.Warningf("netfilter: optVal has insufficient size for entry match: %d", len(optVal)) @@ -512,7 +519,7 @@ func parseMatchers(filter iptables.IPHeaderFilter, optVal []byte) ([]iptables.Ma var match linux.XTEntryMatch buf := optVal[:linux.SizeOfXTEntryMatch] binary.Unmarshal(buf, usermem.ByteOrder, &match) - nflog("parseMatchers: parsed entry match %q: %+v", match.Name.String(), match) + nflog("set entries: parsed entry match %q: %+v", match.Name.String(), match) // Check some invariants. if match.MatchSize < linux.SizeOfXTEntryMatch { @@ -528,17 +535,17 @@ func parseMatchers(filter iptables.IPHeaderFilter, optVal []byte) ([]iptables.Ma var matcher iptables.Matcher var err error switch match.Name.String() { - case "udp": + case matcherNameUDP: if len(buf) < linux.SizeOfXTUDP { log.Warningf("netfilter: optVal has insufficient size for UDP match: %d", len(optVal)) return nil, syserr.ErrInvalidArgument } + // For alignment reasons, the match's total size may + // exceed what's strictly necessary to hold matchData. var matchData linux.XTUDP - // For alignment reasons, the match's total size may exceed what's - // strictly necessary to hold matchData. binary.Unmarshal(buf[:linux.SizeOfXTUDP], usermem.ByteOrder, &matchData) log.Infof("parseMatchers: parsed XTUDP: %+v", matchData) - matcher, err = iptables.NewUDPMatcher(filter, iptables.UDPMatcherData{ + matcher, err = iptables.NewUDPMatcher(filter, iptables.UDPMatcherParams{ SourcePortStart: matchData.SourcePortStart, SourcePortEnd: matchData.SourcePortEnd, DestinationPortStart: matchData.DestinationPortStart, @@ -557,19 +564,22 @@ func parseMatchers(filter iptables.IPHeaderFilter, optVal []byte) ([]iptables.Ma matchers = append(matchers, matcher) - // TODO: Support revision. - // TODO: Support proto -- matchers usually specify which proto(s) they work with. + // TODO(gvisor.dev/issue/170): Check the revision field. optVal = optVal[match.MatchSize:] } - // TODO: Check that optVal is exhausted. + if len(optVal) != 0 { + log.Warningf("netfilter: optVal should be exhausted after parsing matchers") + return nil, syserr.ErrInvalidArgument + } + return matchers, nil } // parseTarget parses a target from optVal. optVal should contain only the // target. func parseTarget(optVal []byte) (iptables.Target, *syserr.Error) { - nflog("Parsing target of size %d", len(optVal)) + nflog("set entries: parsing target of size %d", len(optVal)) if len(optVal) < linux.SizeOfXTEntryTarget { log.Warningf("netfilter: optVal has insufficient size for entry target %d", len(optVal)) return nil, syserr.ErrInvalidArgument @@ -598,7 +608,8 @@ func parseTarget(optVal []byte) (iptables.Target, *syserr.Error) { case iptables.Drop: return iptables.UnconditionalDropTarget{}, nil default: - panic(fmt.Sprintf("Unknown verdict: %v", verdict)) + log.Warningf("Unknown verdict: %v", verdict) + return nil, syserr.ErrInvalidArgument } case errorTargetName: @@ -673,52 +684,3 @@ func hookFromLinux(hook int) iptables.Hook { } panic(fmt.Sprintf("Unknown hook %d does not correspond to a builtin chain", hook)) } - -// printReplace prints information about the struct ipt_replace in optVal. It -// is only for debugging. -func printReplace(optVal []byte) { - // Basic replace info. - var replace linux.IPTReplace - replaceBuf := optVal[:linux.SizeOfIPTReplace] - optVal = optVal[linux.SizeOfIPTReplace:] - binary.Unmarshal(replaceBuf, usermem.ByteOrder, &replace) - log.Infof("Replacing table %q: %+v", replace.Name.String(), replace) - - // Read in the list of entries at the end of replace. - var totalOffset uint16 - for entryIdx := uint32(0); entryIdx < replace.NumEntries; entryIdx++ { - var entry linux.IPTEntry - entryBuf := optVal[:linux.SizeOfIPTEntry] - binary.Unmarshal(entryBuf, usermem.ByteOrder, &entry) - log.Infof("Entry %d (total offset %d): %+v", entryIdx, totalOffset, entry) - - totalOffset += entry.NextOffset - if entry.TargetOffset == linux.SizeOfIPTEntry { - log.Infof("Entry has no matches.") - } else { - log.Infof("Entry has matches.") - } - - var target linux.XTEntryTarget - targetBuf := optVal[entry.TargetOffset : entry.TargetOffset+linux.SizeOfXTEntryTarget] - binary.Unmarshal(targetBuf, usermem.ByteOrder, &target) - log.Infof("Target named %q: %+v", target.Name.String(), target) - - switch target.Name.String() { - case "": - var standardTarget linux.XTStandardTarget - stBuf := optVal[entry.TargetOffset : entry.TargetOffset+linux.SizeOfXTStandardTarget] - binary.Unmarshal(stBuf, usermem.ByteOrder, &standardTarget) - log.Infof("Standard target with verdict %q (%d).", linux.VerdictStrings[standardTarget.Verdict], standardTarget.Verdict) - case errorTargetName: - var errorTarget linux.XTErrorTarget - etBuf := optVal[entry.TargetOffset : entry.TargetOffset+linux.SizeOfXTErrorTarget] - binary.Unmarshal(etBuf, usermem.ByteOrder, &errorTarget) - log.Infof("Error target with name %q.", errorTarget.Name.String()) - default: - log.Infof("Unknown target type.") - } - - optVal = optVal[entry.NextOffset:] - } -} diff --git a/pkg/tcpip/iptables/types.go b/pkg/tcpip/iptables/types.go index d47447d40..ba5ed75b4 100644 --- a/pkg/tcpip/iptables/types.go +++ b/pkg/tcpip/iptables/types.go @@ -169,8 +169,6 @@ type IPHeaderFilter struct { Protocol tcpip.TransportProtocolNumber } -// TODO: Should these be able to marshal/unmarshal themselves? -// TODO: Something has to map the name to the matcher. // A Matcher is the interface for matching packets. type Matcher interface { // Match returns whether the packet matches and whether the packet @@ -179,19 +177,6 @@ type Matcher interface { // // Precondition: packet.NetworkHeader is set. Match(hook Hook, packet tcpip.PacketBuffer, interfaceName string) (matches bool, hotdrop bool) - - // TODO: Make this typesafe by having each Matcher have their own, typed CheckEntry? - // CheckEntry(params MatchCheckEntryParams) bool -} - -// TODO: Unused? -type MatchCheckEntryParams struct { - Table string // TODO: Tables should be an enum... - Filter IPHeaderFilter - Info interface{} // TODO: Type unsafe. - // HookMask uint8 - // Family uint8 - // NFTCompat bool } // A Target is the interface for taking an action for a packet. diff --git a/pkg/tcpip/iptables/udp_matcher.go b/pkg/tcpip/iptables/udp_matcher.go index 65ae7f9e0..f59ca2027 100644 --- a/pkg/tcpip/iptables/udp_matcher.go +++ b/pkg/tcpip/iptables/udp_matcher.go @@ -16,33 +16,28 @@ package iptables import ( "fmt" - "runtime/debug" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" ) +// TODO(gvisor.dev/issue/170): The following per-matcher params should be +// supported: +// - Table name +// - Match size +// - User size +// - Hooks +// - Proto +// - Family + +// UDPMatcher matches UDP packets and their headers. It implements Matcher. type UDPMatcher struct { - Data UDPMatcherData - - // tablename string - // unsigned int matchsize; - // unsigned int usersize; - // #ifdef CONFIG_COMPAT - // unsigned int compatsize; - // #endif - // unsigned int hooks; - // unsigned short proto; - // unsigned short family; + Data UDPMatcherParams } -// TODO: Delete? -// MatchCheckEntryParams - -type UDPMatcherData struct { - // Filter IPHeaderFilter - +// UDPMatcherParams are the parameters used to create a UDPMatcher. +type UDPMatcherParams struct { SourcePortStart uint16 SourcePortEnd uint16 DestinationPortStart uint16 @@ -50,12 +45,12 @@ type UDPMatcherData struct { InverseFlags uint8 } -func NewUDPMatcher(filter IPHeaderFilter, data UDPMatcherData) (Matcher, error) { - // TODO: We currently only support source port and destination port. - log.Infof("Adding rule with UDPMatcherData: %+v", data) +// NewUDPMatcher returns a new instance of UDPMatcher. +func NewUDPMatcher(filter IPHeaderFilter, data UDPMatcherParams) (Matcher, error) { + log.Infof("Adding rule with UDPMatcherParams: %+v", data) if data.InverseFlags != 0 { - return nil, fmt.Errorf("unsupported UDP matcher flags set") + return nil, fmt.Errorf("unsupported UDP matcher inverse flags set") } if filter.Protocol != header.UDPProtocolNumber { @@ -65,21 +60,18 @@ func NewUDPMatcher(filter IPHeaderFilter, data UDPMatcherData) (Matcher, error) return &UDPMatcher{Data: data}, nil } -// TODO: Check xt_tcpudp.c. Need to check for same things (e.g. fragments). +// Match implements Matcher.Match. func (um *UDPMatcher) Match(hook Hook, pkt tcpip.PacketBuffer, interfaceName string) (bool, bool) { - log.Infof("UDPMatcher called from: %s", string(debug.Stack())) netHeader := header.IPv4(pkt.NetworkHeader) - // TODO: Do we check proto here or elsewhere? I think elsewhere (check - // codesearch). + // TODO(gvisor.dev/issue/170): Proto checks should ultimately be moved + // into the iptables.Check codepath as matchers are added. if netHeader.TransportProtocol() != header.UDPProtocolNumber { - log.Infof("UDPMatcher: wrong protocol number") return false, false } // We dont't match fragments. if frag := netHeader.FragmentOffset(); frag != 0 { - log.Infof("UDPMatcher: it's a fragment") if frag == 1 { return false, true } @@ -89,20 +81,18 @@ func (um *UDPMatcher) Match(hook Hook, pkt tcpip.PacketBuffer, interfaceName str // Now we need the transport header. However, this may not have been set // yet. - // TODO + // TODO(gvisor.dev/issue/170): Parsing the transport header should + // ultimately be moved into the iptables.Check codepath as matchers are + // added. var udpHeader header.UDP if pkt.TransportHeader != nil { - log.Infof("UDPMatcher: transport header is not nil") udpHeader = header.UDP(pkt.TransportHeader) } else { - log.Infof("UDPMatcher: transport header is nil") - log.Infof("UDPMatcher: is network header nil: %t", pkt.NetworkHeader == nil) // The UDP header hasn't been parsed yet. We have to do it here. if len(pkt.Data.First()) < header.UDPMinimumSize { // There's no valid UDP header here, so we hotdrop the // packet. - // TODO: Stats. - log.Warningf("Dropping UDP packet: size to small.") + log.Warningf("Dropping UDP packet: size too small.") return false, true } udpHeader = header.UDP(pkt.Data.First()) @@ -112,10 +102,6 @@ func (um *UDPMatcher) Match(hook Hook, pkt tcpip.PacketBuffer, interfaceName str // matching range. sourcePort := udpHeader.SourcePort() destinationPort := udpHeader.DestinationPort() - log.Infof("UDPMatcher: sport and dport are %d and %d. sports and dport start and end are (%d, %d) and (%d, %d)", - udpHeader.SourcePort(), udpHeader.DestinationPort(), - um.Data.SourcePortStart, um.Data.SourcePortEnd, - um.Data.DestinationPortStart, um.Data.DestinationPortEnd) if sourcePort < um.Data.SourcePortStart || um.Data.SourcePortEnd < sourcePort { return false, false } diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index bc963d40e..e9f0978eb 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -264,9 +264,9 @@ func (FilterInputMultiUDPRules) ContainerAction(ip net.IP) error { if err := filterTable("-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { return err } - // if err := filterTable("-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", acceptPort), "-j", "ACCEPT"); err != nil { - // return err - // } + if err := filterTable("-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", acceptPort), "-j", "ACCEPT"); err != nil { + return err + } return filterTable("-L") } -- cgit v1.2.3 From 13c1f38dfa215ab3e3cc70642721f55ab226d5b7 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Mon, 27 Jan 2020 12:19:20 -0800 Subject: Update bug number for supporting extended attribute namespaces. PiperOrigin-RevId: 291774815 --- pkg/sentry/syscalls/linux/sys_xattr.go | 1 + test/syscalls/linux/xattr.cc | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/pkg/sentry/syscalls/linux/sys_xattr.go b/pkg/sentry/syscalls/linux/sys_xattr.go index e35c077d6..77deb8980 100644 --- a/pkg/sentry/syscalls/linux/sys_xattr.go +++ b/pkg/sentry/syscalls/linux/sys_xattr.go @@ -103,6 +103,7 @@ func getXattr(t *kernel.Task, d *fs.Dirent, nameAddr usermem.Addr, size uint64) return 0, "", err } + // TODO(b/148380782): Support xattrs in namespaces other than "user". if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) { return 0, "", syserror.EOPNOTSUPP } diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc index e77c355d7..ab21d68c6 100644 --- a/test/syscalls/linux/xattr.cc +++ b/test/syscalls/linux/xattr.cc @@ -131,7 +131,7 @@ TEST_F(XattrTest, XattrWriteOnly_NoRandomSave) { } TEST_F(XattrTest, XattrTrustedWithNonadmin) { - // TODO(b/127675828): Support setxattr and getxattr with "trusted" prefix. + // TODO(b/148380782): Support setxattr and getxattr with "trusted" prefix. SKIP_IF(IsRunningOnGvisor()); SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); -- cgit v1.2.3 From 5776a7b6f6b52faf6e0735c3f4a892639c1bd773 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Mon, 27 Jan 2020 18:26:26 -0800 Subject: Fix header ordering and format all C++ code. PiperOrigin-RevId: 291844200 --- CONTRIBUTING.md | 3 +- test/syscalls/linux/32bit.cc | 2 +- test/syscalls/linux/fpsig_fork.cc | 4 +- test/syscalls/linux/fpsig_nested.cc | 8 +-- test/syscalls/linux/madvise.cc | 4 +- test/syscalls/linux/mempolicy.cc | 6 +- test/syscalls/linux/mlock.cc | 1 - test/syscalls/linux/msync.cc | 4 +- test/syscalls/linux/ptrace.cc | 3 +- test/syscalls/linux/seccomp.cc | 9 ++- test/syscalls/linux/sigaltstack.cc | 4 +- test/syscalls/linux/sigiret.cc | 4 +- test/syscalls/linux/socket_stream_blocking.cc | 64 +++++++++++----------- test/syscalls/linux/stat.cc | 2 +- .../linux/udp_socket_errqueue_test_case.cc | 3 +- test/util/capability_util.cc | 8 +-- test/util/fs_util.cc | 2 +- test/util/multiprocess_util.h | 3 +- 18 files changed, 68 insertions(+), 66 deletions(-) (limited to 'test') diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5d46168bc..55a1ad0d9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -36,7 +36,8 @@ directory tree. All Go code should conform to the [Go style guidelines][gostyle]. C++ code should conform to the [Google C++ Style Guide][cppstyle] and the guidelines -described for [tests][teststyle]. +described for [tests][teststyle]. Note that code may be automatically formatted +per the guidelines when merged. As a secure runtime, we need to maintain the safety of all of code included in gVisor. The following rules help mitigate issues. diff --git a/test/syscalls/linux/32bit.cc b/test/syscalls/linux/32bit.cc index a7cbee06b..6a15d47e1 100644 --- a/test/syscalls/linux/32bit.cc +++ b/test/syscalls/linux/32bit.cc @@ -71,7 +71,7 @@ void ExitGroup32(const char instruction[2], int code) { "iretl\n" "int $3\n" : - : [code] "m"(code), [ip] "d"(m.ptr()) + : [ code ] "m"(code), [ ip ] "d"(m.ptr()) : "rax", "rbx", "rsp"); } diff --git a/test/syscalls/linux/fpsig_fork.cc b/test/syscalls/linux/fpsig_fork.cc index e7e9f06a1..a346f1f00 100644 --- a/test/syscalls/linux/fpsig_fork.cc +++ b/test/syscalls/linux/fpsig_fork.cc @@ -76,8 +76,8 @@ TEST(FPSigTest, Fork) { "movl %[sig], %%edx;" "syscall;" : - : [killnr] "i"(__NR_tgkill), [parent] "rm"(parent), - [tid] "rm"(parent_tid), [sig] "i"(SIGUSR1) + : [ killnr ] "i"(__NR_tgkill), [ parent ] "rm"(parent), + [ tid ] "rm"(parent_tid), [ sig ] "i"(SIGUSR1) : "rax", "rdi", "rsi", "rdx", // Clobbered by syscall. "rcx", "r11"); diff --git a/test/syscalls/linux/fpsig_nested.cc b/test/syscalls/linux/fpsig_nested.cc index 395463aed..c476a8e7a 100644 --- a/test/syscalls/linux/fpsig_nested.cc +++ b/test/syscalls/linux/fpsig_nested.cc @@ -61,8 +61,8 @@ void sigusr1(int s, siginfo_t* siginfo, void* _uc) { "movl %[sig], %%edx;" "syscall;" : - : [killnr] "i"(__NR_tgkill), [pid] "rm"(pid), [tid] "rm"(tid), - [sig] "i"(SIGUSR2) + : [ killnr ] "i"(__NR_tgkill), [ pid ] "rm"(pid), [ tid ] "rm"(tid), + [ sig ] "i"(SIGUSR2) : "rax", "rdi", "rsi", "rdx", // Clobbered by syscall. "rcx", "r11"); @@ -107,8 +107,8 @@ TEST(FPSigTest, NestedSignals) { "movl %[sig], %%edx;" "syscall;" : - : [killnr] "i"(__NR_tgkill), [pid] "rm"(pid), [tid] "rm"(tid), - [sig] "i"(SIGUSR1) + : [ killnr ] "i"(__NR_tgkill), [ pid ] "rm"(pid), [ tid ] "rm"(tid), + [ sig ] "i"(SIGUSR1) : "rax", "rdi", "rsi", "rdx", // Clobbered by syscall. "rcx", "r11"); diff --git a/test/syscalls/linux/madvise.cc b/test/syscalls/linux/madvise.cc index 7fd0ea20c..dbd54ff2a 100644 --- a/test/syscalls/linux/madvise.cc +++ b/test/syscalls/linux/madvise.cc @@ -38,7 +38,7 @@ namespace testing { namespace { -void ExpectAllMappingBytes(Mapping const& m, char c) { +void ExpectAllMappingBytes(Mapping const &m, char c) { auto const v = m.view(); for (size_t i = 0; i < kPageSize; i++) { ASSERT_EQ(v[i], c) << "at offset " << i; @@ -47,7 +47,7 @@ void ExpectAllMappingBytes(Mapping const& m, char c) { // Equivalent to ExpectAllMappingBytes but async-signal-safe and with less // helpful failure messages. -void CheckAllMappingBytes(Mapping const& m, char c) { +void CheckAllMappingBytes(Mapping const &m, char c) { auto const v = m.view(); for (size_t i = 0; i < kPageSize; i++) { TEST_CHECK_MSG(v[i] == c, "mapping contains wrong value"); diff --git a/test/syscalls/linux/mempolicy.cc b/test/syscalls/linux/mempolicy.cc index 9d5f47651..d21093899 100644 --- a/test/syscalls/linux/mempolicy.cc +++ b/test/syscalls/linux/mempolicy.cc @@ -213,7 +213,7 @@ TEST(MempolicyTest, GetMempolicyQueryNodeForAddress) { } } - void* invalid_address = reinterpret_cast(-1); + void *invalid_address = reinterpret_cast(-1); // Invalid address. ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, invalid_address, @@ -221,8 +221,8 @@ TEST(MempolicyTest, GetMempolicyQueryNodeForAddress) { SyscallFailsWithErrno(EFAULT)); // Invalid mode pointer. - ASSERT_THAT(get_mempolicy(reinterpret_cast(invalid_address), nullptr, 0, - &dummy_stack_address, MPOL_F_ADDR | MPOL_F_NODE), + ASSERT_THAT(get_mempolicy(reinterpret_cast(invalid_address), nullptr, + 0, &dummy_stack_address, MPOL_F_ADDR | MPOL_F_NODE), SyscallFailsWithErrno(EFAULT)); } diff --git a/test/syscalls/linux/mlock.cc b/test/syscalls/linux/mlock.cc index 620b4f8b4..367a90fe1 100644 --- a/test/syscalls/linux/mlock.cc +++ b/test/syscalls/linux/mlock.cc @@ -60,7 +60,6 @@ bool IsPageMlocked(uintptr_t addr) { return true; } - TEST(MlockTest, Basic) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( diff --git a/test/syscalls/linux/msync.cc b/test/syscalls/linux/msync.cc index ac7146017..2b2b6aef9 100644 --- a/test/syscalls/linux/msync.cc +++ b/test/syscalls/linux/msync.cc @@ -60,9 +60,7 @@ std::vector()>> SyncableMappings() { for (int const mflags : {MAP_PRIVATE, MAP_SHARED}) { int const prot = PROT_READ | (writable ? PROT_WRITE : 0); int const oflags = O_CREAT | (writable ? O_RDWR : O_RDONLY); - funcs.push_back([=] { - return MmapAnon(kPageSize, prot, mflags); - }); + funcs.push_back([=] { return MmapAnon(kPageSize, prot, mflags); }); funcs.push_back([=]() -> PosixErrorOr { std::string const path = NewTempAbsPath(); ASSIGN_OR_RETURN_ERRNO(auto fd, Open(path, oflags, 0644)); diff --git a/test/syscalls/linux/ptrace.cc b/test/syscalls/linux/ptrace.cc index 8f3800380..ef67b747b 100644 --- a/test/syscalls/linux/ptrace.cc +++ b/test/syscalls/linux/ptrace.cc @@ -178,7 +178,8 @@ TEST(PtraceTest, GetSigMask) { // Install a signal handler for kBlockSignal to avoid termination and block // it. - TEST_PCHECK(signal(kBlockSignal, +[](int signo) {}) != SIG_ERR); + TEST_PCHECK(signal( + kBlockSignal, +[](int signo) {}) != SIG_ERR); MaybeSave(); TEST_PCHECK(sigprocmask(SIG_SETMASK, &blocked, nullptr) == 0); MaybeSave(); diff --git a/test/syscalls/linux/seccomp.cc b/test/syscalls/linux/seccomp.cc index 7e41fe7d8..294ee6808 100644 --- a/test/syscalls/linux/seccomp.cc +++ b/test/syscalls/linux/seccomp.cc @@ -113,7 +113,8 @@ TEST(SeccompTest, RetKillCausesDeathBySIGSYS) { pid_t const pid = fork(); if (pid == 0) { // Register a signal handler for SIGSYS that we don't expect to be invoked. - RegisterSignalHandler(SIGSYS, +[](int, siginfo_t*, void*) { _exit(1); }); + RegisterSignalHandler( + SIGSYS, +[](int, siginfo_t*, void*) { _exit(1); }); ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_KILL); syscall(kFilteredSyscall); TEST_CHECK_MSG(false, "Survived invocation of test syscall"); @@ -132,7 +133,8 @@ TEST(SeccompTest, RetKillOnlyKillsOneThread) { pid_t const pid = fork(); if (pid == 0) { // Register a signal handler for SIGSYS that we don't expect to be invoked. - RegisterSignalHandler(SIGSYS, +[](int, siginfo_t*, void*) { _exit(1); }); + RegisterSignalHandler( + SIGSYS, +[](int, siginfo_t*, void*) { _exit(1); }); ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_KILL); // Pass CLONE_VFORK to block the original thread in the child process until // the clone thread exits with SIGSYS. @@ -346,7 +348,8 @@ TEST(SeccompTest, LeastPermissiveFilterReturnValueApplies) { // one that causes the kill that should be ignored. pid_t const pid = fork(); if (pid == 0) { - RegisterSignalHandler(SIGSYS, +[](int, siginfo_t*, void*) { _exit(1); }); + RegisterSignalHandler( + SIGSYS, +[](int, siginfo_t*, void*) { _exit(1); }); ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_TRACE); ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_KILL); ApplySeccompFilter(kFilteredSyscall, SECCOMP_RET_ERRNO | ENOTNAM); diff --git a/test/syscalls/linux/sigaltstack.cc b/test/syscalls/linux/sigaltstack.cc index 62b04ef1d..24e7c4960 100644 --- a/test/syscalls/linux/sigaltstack.cc +++ b/test/syscalls/linux/sigaltstack.cc @@ -168,8 +168,8 @@ TEST(SigaltstackTest, WalksOffBottom) { // Trigger a single fault. badhandler_low_water_mark = - static_cast(stack.ss_sp) + SIGSTKSZ; // Expected top. - badhandler_recursive_faults = 0; // Disable refault. + static_cast(stack.ss_sp) + SIGSTKSZ; // Expected top. + badhandler_recursive_faults = 0; // Disable refault. Fault(); EXPECT_TRUE(badhandler_on_sigaltstack); EXPECT_THAT(sigaltstack(nullptr, &stack), SyscallSucceeds()); diff --git a/test/syscalls/linux/sigiret.cc b/test/syscalls/linux/sigiret.cc index a47c781ea..4deb1ae95 100644 --- a/test/syscalls/linux/sigiret.cc +++ b/test/syscalls/linux/sigiret.cc @@ -78,8 +78,8 @@ TEST(SigIretTest, CheckRcxR11) { "1: pause; cmpl $0, %[gotvtalrm]; je 1b;" // while (!gotvtalrm); "movq %%rcx, %[rcx];" // rcx = %rcx "movq %%r11, %[r11];" // r11 = %r11 - : [ready] "=m"(ready), [rcx] "+m"(rcx), [r11] "+m"(r11) - : [gotvtalrm] "m"(gotvtalrm) + : [ ready ] "=m"(ready), [ rcx ] "+m"(rcx), [ r11 ] "+m"(r11) + : [ gotvtalrm ] "m"(gotvtalrm) : "cc", "memory", "rcx", "r11"); // If sigreturn(2) returns via 'sysret' then %rcx and %r11 will be diff --git a/test/syscalls/linux/socket_stream_blocking.cc b/test/syscalls/linux/socket_stream_blocking.cc index e9cc082bf..538ee2268 100644 --- a/test/syscalls/linux/socket_stream_blocking.cc +++ b/test/syscalls/linux/socket_stream_blocking.cc @@ -32,38 +32,38 @@ namespace gvisor { namespace testing { TEST_P(BlockingStreamSocketPairTest, BlockPartialWriteClosed) { - // FIXME(b/35921550): gVisor doesn't support SO_SNDBUF on UDS, nor does it - // enforce any limit; it will write arbitrary amounts of data without - // blocking. - SKIP_IF(IsRunningOnGvisor()); - - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); - - int buffer_size; - socklen_t length = sizeof(buffer_size); - ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, - &buffer_size, &length), - SyscallSucceeds()); - - int wfd = sockets->first_fd(); - ScopedThread t([wfd, buffer_size]() { - std::vector buf(2 * buffer_size); - // Write more than fits in the buffer. Blocks then returns partial write - // when the other end is closed. The next call returns EPIPE. - // - // N.B. writes occur in chunks, so we may see less than buffer_size from - // the first call. - ASSERT_THAT(write(wfd, buf.data(), buf.size()), - SyscallSucceedsWithValue(::testing::Gt(0))); - ASSERT_THAT(write(wfd, buf.data(), buf.size()), - ::testing::AnyOf(SyscallFailsWithErrno(EPIPE), - SyscallFailsWithErrno(ECONNRESET))); - }); - - // Leave time for write to become blocked. - absl::SleepFor(absl::Seconds(1)); - - ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds()); + // FIXME(b/35921550): gVisor doesn't support SO_SNDBUF on UDS, nor does it + // enforce any limit; it will write arbitrary amounts of data without + // blocking. + SKIP_IF(IsRunningOnGvisor()); + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int buffer_size; + socklen_t length = sizeof(buffer_size); + ASSERT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDBUF, + &buffer_size, &length), + SyscallSucceeds()); + + int wfd = sockets->first_fd(); + ScopedThread t([wfd, buffer_size]() { + std::vector buf(2 * buffer_size); + // Write more than fits in the buffer. Blocks then returns partial write + // when the other end is closed. The next call returns EPIPE. + // + // N.B. writes occur in chunks, so we may see less than buffer_size from + // the first call. + ASSERT_THAT(write(wfd, buf.data(), buf.size()), + SyscallSucceedsWithValue(::testing::Gt(0))); + ASSERT_THAT(write(wfd, buf.data(), buf.size()), + ::testing::AnyOf(SyscallFailsWithErrno(EPIPE), + SyscallFailsWithErrno(ECONNRESET))); + }); + + // Leave time for write to become blocked. + absl::SleepFor(absl::Seconds(1)); + + ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds()); } // Random save may interrupt the call to sendmsg() in SendLargeSendMsg(), diff --git a/test/syscalls/linux/stat.cc b/test/syscalls/linux/stat.cc index 30de2f8ff..c1e45e10a 100644 --- a/test/syscalls/linux/stat.cc +++ b/test/syscalls/linux/stat.cc @@ -377,7 +377,7 @@ TEST_F(StatTest, ZeroLinksOpenFdRegularFileChild_NoRandomSave) { // // We need to support this because when a file is unlinked and we forward // the stat to the gofer it would return ENOENT. - const char* uncached_gofer = getenv("GVISOR_GOFER_UNCACHED"); + const char *uncached_gofer = getenv("GVISOR_GOFER_UNCACHED"); SKIP_IF(uncached_gofer != nullptr); // We don't support saving unlinked files. diff --git a/test/syscalls/linux/udp_socket_errqueue_test_case.cc b/test/syscalls/linux/udp_socket_errqueue_test_case.cc index 9a24e1df0..fcdba7279 100644 --- a/test/syscalls/linux/udp_socket_errqueue_test_case.cc +++ b/test/syscalls/linux/udp_socket_errqueue_test_case.cc @@ -14,8 +14,6 @@ #ifndef __fuchsia__ -#include "test/syscalls/linux/udp_socket_test_cases.h" - #include #include #include @@ -29,6 +27,7 @@ #include "absl/time/clock.h" #include "absl/time/time.h" #include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/udp_socket_test_cases.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" diff --git a/test/util/capability_util.cc b/test/util/capability_util.cc index 5d733887b..9fee52fbb 100644 --- a/test/util/capability_util.cc +++ b/test/util/capability_util.cc @@ -36,10 +36,10 @@ PosixErrorOr CanCreateUserNamespace() { ASSIGN_OR_RETURN_ERRNO( auto child_stack, MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); - int const child_pid = - clone(+[](void*) { return 0; }, - reinterpret_cast(child_stack.addr() + kPageSize), - CLONE_NEWUSER | SIGCHLD, /* arg = */ nullptr); + int const child_pid = clone( + +[](void*) { return 0; }, + reinterpret_cast(child_stack.addr() + kPageSize), + CLONE_NEWUSER | SIGCHLD, /* arg = */ nullptr); if (child_pid > 0) { int status; int const ret = waitpid(child_pid, &status, /* options = */ 0); diff --git a/test/util/fs_util.cc b/test/util/fs_util.cc index 042cec94a..052781445 100644 --- a/test/util/fs_util.cc +++ b/test/util/fs_util.cc @@ -452,7 +452,7 @@ PosixErrorOr MakeAbsolute(absl::string_view filename, std::string CleanPath(const absl::string_view unclean_path) { std::string path = std::string(unclean_path); - const char *src = path.c_str(); + const char* src = path.c_str(); std::string::iterator dst = path.begin(); // Check for absolute path and determine initial backtrack limit. diff --git a/test/util/multiprocess_util.h b/test/util/multiprocess_util.h index 3e736261b..2f3bf4a6f 100644 --- a/test/util/multiprocess_util.h +++ b/test/util/multiprocess_util.h @@ -99,7 +99,8 @@ inline PosixErrorOr ForkAndExec(const std::string& filename, const ExecveArray& argv, const ExecveArray& envv, pid_t* child, int* execve_errno) { - return ForkAndExec(filename, argv, envv, [] {}, child, execve_errno); + return ForkAndExec( + filename, argv, envv, [] {}, child, execve_errno); } // Equivalent to ForkAndExec, except using dirfd and flags with execveat. -- cgit v1.2.3 From 5d569408ef94c753b7aae9392b5e4ebf7e5ea50d Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Mon, 27 Jan 2020 22:27:57 -0800 Subject: Create platform_util for tests. PiperOrigin-RevId: 291869423 --- test/syscalls/linux/32bit.cc | 130 +++++++++++++++++++++---------------- test/syscalls/linux/BUILD | 5 ++ test/syscalls/linux/arch_prctl.cc | 2 - test/syscalls/linux/concurrency.cc | 2 + test/syscalls/linux/exceptions.cc | 2 + test/syscalls/linux/ptrace.cc | 10 +-- test/util/BUILD | 15 +++-- test/util/platform_util.cc | 49 ++++++++++++++ test/util/platform_util.h | 56 ++++++++++++++++ test/util/test_util.cc | 11 +--- test/util/test_util.h | 27 ++++---- 11 files changed, 216 insertions(+), 93 deletions(-) create mode 100644 test/util/platform_util.cc create mode 100644 test/util/platform_util.h (limited to 'test') diff --git a/test/syscalls/linux/32bit.cc b/test/syscalls/linux/32bit.cc index 6a15d47e1..2751fb4e7 100644 --- a/test/syscalls/linux/32bit.cc +++ b/test/syscalls/linux/32bit.cc @@ -15,10 +15,12 @@ #include #include +#include "gtest/gtest.h" +#include "absl/base/macros.h" #include "test/util/memory_util.h" +#include "test/util/platform_util.h" #include "test/util/posix_error.h" #include "test/util/test_util.h" -#include "gtest/gtest.h" #ifndef __x86_64__ #error "This test is x86-64 specific." @@ -30,7 +32,6 @@ namespace testing { namespace { constexpr char kInt3 = '\xcc'; - constexpr char kInt80[2] = {'\xcd', '\x80'}; constexpr char kSyscall[2] = {'\x0f', '\x05'}; constexpr char kSysenter[2] = {'\x0f', '\x34'}; @@ -43,6 +44,7 @@ void ExitGroup32(const char instruction[2], int code) { // Fill with INT 3 in case we execute too far. memset(m.ptr(), kInt3, m.len()); + // Copy in the actual instruction. memcpy(m.ptr(), instruction, 2); // We're playing *extremely* fast-and-loose with the various syscall ABIs @@ -78,70 +80,87 @@ void ExitGroup32(const char instruction[2], int code) { constexpr int kExitCode = 42; TEST(Syscall32Bit, Int80) { - switch (GvisorPlatform()) { - case Platform::kKVM: - // TODO(b/111805002): 32-bit segments are broken (but not explictly - // disabled). - return; - case Platform::kPtrace: - // TODO(gvisor.dev/issue/167): The ptrace platform does not have a - // consistent story here. - return; - case Platform::kNative: + switch (PlatformSupport32Bit()) { + case PlatformSupport::NotSupported: + break; + case PlatformSupport::Segfault: + EXPECT_EXIT(ExitGroup32(kInt80, kExitCode), + ::testing::KilledBySignal(SIGSEGV), ""); break; - } - // Upstream Linux. 32-bit syscalls allowed. - EXPECT_EXIT(ExitGroup32(kInt80, kExitCode), ::testing::ExitedWithCode(42), - ""); -} + case PlatformSupport::Ignored: + // Since the call is ignored, we'll hit the int3 trap. + EXPECT_EXIT(ExitGroup32(kInt80, kExitCode), + ::testing::KilledBySignal(SIGTRAP), ""); + break; -TEST(Syscall32Bit, Sysenter) { - switch (GvisorPlatform()) { - case Platform::kKVM: - // TODO(b/111805002): See above. - return; - case Platform::kPtrace: - // TODO(gvisor.dev/issue/167): See above. - return; - case Platform::kNative: + case PlatformSupport::Allowed: + EXPECT_EXIT(ExitGroup32(kInt80, kExitCode), ::testing::ExitedWithCode(42), + ""); break; } +} - if (GetCPUVendor() == CPUVendor::kAMD) { +TEST(Syscall32Bit, Sysenter) { + if (PlatformSupport32Bit() == PlatformSupport::Allowed && + GetCPUVendor() == CPUVendor::kAMD) { // SYSENTER is an illegal instruction in compatibility mode on AMD. EXPECT_EXIT(ExitGroup32(kSysenter, kExitCode), ::testing::KilledBySignal(SIGILL), ""); return; } - // Upstream Linux on !AMD, 32-bit syscalls allowed. - EXPECT_EXIT(ExitGroup32(kSysenter, kExitCode), ::testing::ExitedWithCode(42), - ""); -} + switch (PlatformSupport32Bit()) { + case PlatformSupport::NotSupported: + break; -TEST(Syscall32Bit, Syscall) { - switch (GvisorPlatform()) { - case Platform::kKVM: - // TODO(b/111805002): See above. - return; - case Platform::kPtrace: - // TODO(gvisor.dev/issue/167): See above. - return; - case Platform::kNative: + case PlatformSupport::Segfault: + EXPECT_EXIT(ExitGroup32(kSysenter, kExitCode), + ::testing::KilledBySignal(SIGSEGV), ""); + break; + + case PlatformSupport::Ignored: + // See above, except expected code is SIGSEGV. + EXPECT_EXIT(ExitGroup32(kSysenter, kExitCode), + ::testing::KilledBySignal(SIGSEGV), ""); + break; + + case PlatformSupport::Allowed: + EXPECT_EXIT(ExitGroup32(kSysenter, kExitCode), + ::testing::ExitedWithCode(42), ""); break; } +} - if (GetCPUVendor() == CPUVendor::kIntel) { +TEST(Syscall32Bit, Syscall) { + if (PlatformSupport32Bit() == PlatformSupport::Allowed && + GetCPUVendor() == CPUVendor::kIntel) { // SYSCALL is an illegal instruction in compatibility mode on Intel. EXPECT_EXIT(ExitGroup32(kSyscall, kExitCode), ::testing::KilledBySignal(SIGILL), ""); return; } - // Upstream Linux on !Intel, 32-bit syscalls allowed. - EXPECT_EXIT(ExitGroup32(kSyscall, kExitCode), ::testing::ExitedWithCode(42), - ""); + switch (PlatformSupport32Bit()) { + case PlatformSupport::NotSupported: + break; + + case PlatformSupport::Segfault: + EXPECT_EXIT(ExitGroup32(kSyscall, kExitCode), + ::testing::KilledBySignal(SIGSEGV), ""); + break; + + case PlatformSupport::Ignored: + // See above. + EXPECT_EXIT(ExitGroup32(kSyscall, kExitCode), + ::testing::KilledBySignal(SIGILL), ""); + break; + + case PlatformSupport::Allowed: + EXPECT_EXIT(ExitGroup32(kSyscall, kExitCode), + ::testing::ExitedWithCode(42), ""); + break; + } } // Far call code called below. @@ -205,19 +224,20 @@ void FarCall32() { } TEST(Call32Bit, Disallowed) { - switch (GvisorPlatform()) { - case Platform::kKVM: - // TODO(b/111805002): See above. - return; - case Platform::kPtrace: - // The ptrace platform cannot prevent switching to compatibility mode. - ABSL_FALLTHROUGH_INTENDED; - case Platform::kNative: + switch (PlatformSupport32Bit()) { + case PlatformSupport::NotSupported: break; - } - // Shouldn't crash. - FarCall32(); + case PlatformSupport::Segfault: + EXPECT_EXIT(FarCall32(), ::testing::KilledBySignal(SIGSEGV), ""); + break; + + case PlatformSupport::Ignored: + ABSL_FALLTHROUGH_INTENDED; + case PlatformSupport::Allowed: + // Shouldn't crash. + FarCall32(); + } } } // namespace diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index c2ef50c1d..74bf068ec 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -197,9 +197,11 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:memory_util", + "//test/util:platform_util", "//test/util:posix_error", "//test/util:test_main", "//test/util:test_util", + "@com_google_absl//absl/base:core_headers", "@com_google_googletest//:gtest", ], ) @@ -479,6 +481,7 @@ cc_binary( srcs = ["concurrency.cc"], linkstatic = 1, deps = [ + "//test/util:platform_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", @@ -584,6 +587,7 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:logging", + "//test/util:platform_util", "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", @@ -1658,6 +1662,7 @@ cc_binary( deps = [ "//test/util:logging", "//test/util:multiprocess_util", + "//test/util:platform_util", "//test/util:signal_util", "//test/util:test_util", "//test/util:thread_util", diff --git a/test/syscalls/linux/arch_prctl.cc b/test/syscalls/linux/arch_prctl.cc index 3a901faf5..81bf5a775 100644 --- a/test/syscalls/linux/arch_prctl.cc +++ b/test/syscalls/linux/arch_prctl.cc @@ -14,10 +14,8 @@ #include #include -#include #include "gtest/gtest.h" -#include "test/util/file_descriptor.h" #include "test/util/test_util.h" // glibc does not provide a prototype for arch_prctl() so declare it here. diff --git a/test/syscalls/linux/concurrency.cc b/test/syscalls/linux/concurrency.cc index 00b96b34a..f41f99900 100644 --- a/test/syscalls/linux/concurrency.cc +++ b/test/syscalls/linux/concurrency.cc @@ -20,6 +20,7 @@ #include "absl/strings/string_view.h" #include "absl/time/clock.h" #include "absl/time/time.h" +#include "test/util/platform_util.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" @@ -99,6 +100,7 @@ TEST(ConcurrencyTest, MultiProcessMultithreaded) { // Test that multiple processes can execute concurrently, even if one process // never yields. TEST(ConcurrencyTest, MultiProcessConcurrency) { + SKIP_IF(PlatformSupportMultiProcess() == PlatformSupport::NotSupported); pid_t child_pid = fork(); if (child_pid == 0) { diff --git a/test/syscalls/linux/exceptions.cc b/test/syscalls/linux/exceptions.cc index 3d564e720..420b9543f 100644 --- a/test/syscalls/linux/exceptions.cc +++ b/test/syscalls/linux/exceptions.cc @@ -16,6 +16,7 @@ #include "gtest/gtest.h" #include "test/util/logging.h" +#include "test/util/platform_util.h" #include "test/util/signal_util.h" #include "test/util/test_util.h" @@ -324,6 +325,7 @@ TEST(ExceptionTest, AlignmentHalt) { } TEST(ExceptionTest, AlignmentCheck) { + SKIP_IF(PlatformSupportAlignmentCheck() != PlatformSupport::Allowed); // See above. struct sigaction sa = {}; diff --git a/test/syscalls/linux/ptrace.cc b/test/syscalls/linux/ptrace.cc index ef67b747b..4dd5cf27b 100644 --- a/test/syscalls/linux/ptrace.cc +++ b/test/syscalls/linux/ptrace.cc @@ -32,6 +32,7 @@ #include "absl/time/time.h" #include "test/util/logging.h" #include "test/util/multiprocess_util.h" +#include "test/util/platform_util.h" #include "test/util/signal_util.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" @@ -824,13 +825,8 @@ TEST(PtraceTest, // These tests requires knowledge of architecture-specific syscall convention. #ifdef __x86_64__ TEST(PtraceTest, Int3) { - switch (GvisorPlatform()) { - case Platform::kKVM: - // TODO(b/124248694): int3 isn't handled properly. - return; - default: - break; - } + SKIP_IF(PlatformSupportInt3() == PlatformSupport::NotSupported); + pid_t const child_pid = fork(); if (child_pid == 0) { // In child process. diff --git a/test/util/BUILD b/test/util/BUILD index 3c732be62..1ac8b3fd6 100644 --- a/test/util/BUILD +++ b/test/util/BUILD @@ -165,6 +165,14 @@ cc_library( ], ) +cc_library( + name = "platform_util", + testonly = 1, + srcs = ["platform_util.cc"], + hdrs = ["platform_util.h"], + deps = [":test_util"], +) + cc_library( name = "posix_error", testonly = 1, @@ -238,12 +246,7 @@ cc_library( "test_util_runfiles.cc", ], hdrs = ["test_util.h"], - defines = select_system( - fuchsia = [ - "__opensource__", - "__fuchsia__", - ], - ), + defines = select_system(), deps = [ ":fs_util", ":logging", diff --git a/test/util/platform_util.cc b/test/util/platform_util.cc new file mode 100644 index 000000000..2724e63f3 --- /dev/null +++ b/test/util/platform_util.cc @@ -0,0 +1,49 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/util/platform_util.h" + +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +PlatformSupport PlatformSupport32Bit() { + if (GvisorPlatform() == Platform::kPtrace) { + return PlatformSupport::NotSupported; + } else if (GvisorPlatform() == Platform::kKVM) { + return PlatformSupport::Segfault; + } else { + return PlatformSupport::Allowed; + } +} + +PlatformSupport PlatformSupportAlignmentCheck() { + return PlatformSupport::Allowed; +} + +PlatformSupport PlatformSupportMultiProcess() { + return PlatformSupport::Allowed; +} + +PlatformSupport PlatformSupportInt3() { + if (GvisorPlatform() == Platform::kKVM) { + return PlatformSupport::NotSupported; + } else { + return PlatformSupport::Allowed; + } +} + +} // namespace testing +} // namespace gvisor diff --git a/test/util/platform_util.h b/test/util/platform_util.h new file mode 100644 index 000000000..28cc92371 --- /dev/null +++ b/test/util/platform_util.h @@ -0,0 +1,56 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_UTIL_PLATFORM_UTIL_H_ +#define GVISOR_TEST_UTIL_PLATFORM_UTIL_H_ + +namespace gvisor { +namespace testing { + +// PlatformSupport is a generic enumeration of classes of support. +// +// It is up to the individual functions and callers to agree on the precise +// definition for each case. The document here generally refers to 32-bit +// as an example. Many cases will use only NotSupported and Allowed. +enum class PlatformSupport { + // The feature is not supported on the current platform. + // + // In the case of 32-bit, this means that calls will generally be interpreted + // as 64-bit calls, and there is no support for 32-bit binaries, long calls, + // etc. This usually means that the underlying implementation just pretends + // that 32-bit doesn't exist. + NotSupported, + + // Calls will be ignored by the kernel with a fixed error. + Ignored, + + // Calls will result in a SIGSEGV or similar fault. + Segfault, + + // The feature is supported as expected. + // + // In the case of 32-bit, this means that the system call or far call will be + // handled properly. + Allowed, +}; + +PlatformSupport PlatformSupport32Bit(); +PlatformSupport PlatformSupportAlignmentCheck(); +PlatformSupport PlatformSupportMultiProcess(); +PlatformSupport PlatformSupportInt3(); + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_UTIL_PLATFORM_UTL_H_ diff --git a/test/util/test_util.cc b/test/util/test_util.cc index 848504c88..15cbc6da6 100644 --- a/test/util/test_util.cc +++ b/test/util/test_util.cc @@ -45,20 +45,13 @@ namespace testing { bool IsRunningOnGvisor() { return GvisorPlatform() != Platform::kNative; } -Platform GvisorPlatform() { +const std::string GvisorPlatform() { // Set by runner.go. char* env = getenv(TEST_ON_GVISOR); if (!env) { return Platform::kNative; } - if (strcmp(env, "ptrace") == 0) { - return Platform::kPtrace; - } - if (strcmp(env, "kvm") == 0) { - return Platform::kKVM; - } - std::cerr << "unknown platform " << env; - abort(); + return std::string(env); } bool IsRunningWithHostinet() { diff --git a/test/util/test_util.h b/test/util/test_util.h index b3235c7e3..2d22b0eb8 100644 --- a/test/util/test_util.h +++ b/test/util/test_util.h @@ -26,16 +26,13 @@ // IsRunningOnGvisor returns true if the test is known to be running on gVisor. // GvisorPlatform can be used to get more detail: // -// switch (GvisorPlatform()) { -// case Platform::kNative: -// case Platform::kGvisor: -// EXPECT_THAT(mmap(...), SyscallSucceeds()); -// break; -// case Platform::kPtrace: -// EXPECT_THAT(mmap(...), SyscallFailsWithErrno(ENOSYS)); -// break; +// if (GvisorPlatform() == Platform::kPtrace) { +// ... // } // +// SetupGvisorDeathTest ensures that signal handling does not interfere with +/// tests that rely on fatal signals. +// // Matchers // ======== // @@ -213,13 +210,15 @@ void TestInit(int* argc, char*** argv); if (expr) GTEST_SKIP() << #expr; \ } while (0) -enum class Platform { - kNative, - kKVM, - kPtrace, -}; +// Platform contains platform names. +namespace Platform { +constexpr char kNative[] = "native"; +constexpr char kPtrace[] = "ptrace"; +constexpr char kKVM[] = "kvm"; +} // namespace Platform + bool IsRunningOnGvisor(); -Platform GvisorPlatform(); +const std::string GvisorPlatform(); bool IsRunningWithHostinet(); #ifdef __linux__ -- cgit v1.2.3 From 74e04506a430535b7f3461eb35f36c9398db735a Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Tue, 28 Jan 2020 11:06:24 -0800 Subject: Prefer Type& over Type & And Type* over Type *. This is basically a whitespace only change. gVisor code already prefers left-alignment of pointers and references, but clang-format formats for consistency with the majority of a file, and some files leaned the wrong way. This is a one-time pass to make us completely conforming. Autogenerated with: $ find . \( -name "*.cc" -or -name "*.c" -or -name "*.h" \) \ | xargs clang-format -i -style="{BasedOnStyle: Google, \ DerivePointerAlignment: false, PointerAlignment: Left}" PiperOrigin-RevId: 291972421 --- test/syscalls/linux/connect_external.cc | 12 ++++++------ test/syscalls/linux/getrusage.cc | 2 +- test/syscalls/linux/iptables.h | 2 +- test/syscalls/linux/madvise.cc | 16 ++++++++-------- test/syscalls/linux/mempolicy.cc | 16 ++++++++-------- test/syscalls/linux/proc_net.cc | 20 ++++++++++---------- test/syscalls/linux/sendfile_socket.cc | 8 ++++---- .../syscalls/linux/socket_bind_to_device_sequence.cc | 10 +++++----- test/syscalls/linux/socket_netdevice.cc | 10 +++++----- test/syscalls/linux/stat.cc | 6 +++--- test/util/mount_util.h | 8 ++++---- vdso/syscalls.h | 8 ++++---- 12 files changed, 59 insertions(+), 59 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/connect_external.cc b/test/syscalls/linux/connect_external.cc index bfe1da82e..1edb50e47 100644 --- a/test/syscalls/linux/connect_external.cc +++ b/test/syscalls/linux/connect_external.cc @@ -56,7 +56,7 @@ TEST_P(GoferStreamSeqpacketTest, Echo) { ProtocolSocket proto; std::tie(env, proto) = GetParam(); - char *val = getenv(env.c_str()); + char* val = getenv(env.c_str()); ASSERT_NE(val, nullptr); std::string root(val); @@ -69,7 +69,7 @@ TEST_P(GoferStreamSeqpacketTest, Echo) { addr.sun_family = AF_UNIX; memcpy(addr.sun_path, socket_path.c_str(), socket_path.length()); - ASSERT_THAT(connect(sock.get(), reinterpret_cast(&addr), + ASSERT_THAT(connect(sock.get(), reinterpret_cast(&addr), sizeof(addr)), SyscallSucceeds()); @@ -92,7 +92,7 @@ TEST_P(GoferStreamSeqpacketTest, NonListening) { ProtocolSocket proto; std::tie(env, proto) = GetParam(); - char *val = getenv(env.c_str()); + char* val = getenv(env.c_str()); ASSERT_NE(val, nullptr); std::string root(val); @@ -105,7 +105,7 @@ TEST_P(GoferStreamSeqpacketTest, NonListening) { addr.sun_family = AF_UNIX; memcpy(addr.sun_path, socket_path.c_str(), socket_path.length()); - ASSERT_THAT(connect(sock.get(), reinterpret_cast(&addr), + ASSERT_THAT(connect(sock.get(), reinterpret_cast(&addr), sizeof(addr)), SyscallFailsWithErrno(ECONNREFUSED)); } @@ -127,7 +127,7 @@ using GoferDgramTest = ::testing::TestWithParam; // unnamed. The server thus has no way to reply to us. TEST_P(GoferDgramTest, Null) { std::string env = GetParam(); - char *val = getenv(env.c_str()); + char* val = getenv(env.c_str()); ASSERT_NE(val, nullptr); std::string root(val); @@ -140,7 +140,7 @@ TEST_P(GoferDgramTest, Null) { addr.sun_family = AF_UNIX; memcpy(addr.sun_path, socket_path.c_str(), socket_path.length()); - ASSERT_THAT(connect(sock.get(), reinterpret_cast(&addr), + ASSERT_THAT(connect(sock.get(), reinterpret_cast(&addr), sizeof(addr)), SyscallSucceeds()); diff --git a/test/syscalls/linux/getrusage.cc b/test/syscalls/linux/getrusage.cc index 9bdb1e4cd..0e51d42a8 100644 --- a/test/syscalls/linux/getrusage.cc +++ b/test/syscalls/linux/getrusage.cc @@ -67,7 +67,7 @@ TEST(GetrusageTest, Grandchild) { pid = fork(); if (pid == 0) { int flags = MAP_ANONYMOUS | MAP_POPULATE | MAP_PRIVATE; - void *addr = + void* addr = mmap(nullptr, kGrandchildSizeKb * 1024, PROT_WRITE, flags, -1, 0); TEST_PCHECK(addr != MAP_FAILED); } else { diff --git a/test/syscalls/linux/iptables.h b/test/syscalls/linux/iptables.h index 616bea550..0719c60a4 100644 --- a/test/syscalls/linux/iptables.h +++ b/test/syscalls/linux/iptables.h @@ -188,7 +188,7 @@ struct ipt_replace { unsigned int num_counters; // The unchanged values from each ipt_entry's counters. - struct xt_counters *counters; + struct xt_counters* counters; // The entries to write to the table. This will run past the size defined by // sizeof(srtuct ipt_replace); diff --git a/test/syscalls/linux/madvise.cc b/test/syscalls/linux/madvise.cc index dbd54ff2a..5a1973f60 100644 --- a/test/syscalls/linux/madvise.cc +++ b/test/syscalls/linux/madvise.cc @@ -38,7 +38,7 @@ namespace testing { namespace { -void ExpectAllMappingBytes(Mapping const &m, char c) { +void ExpectAllMappingBytes(Mapping const& m, char c) { auto const v = m.view(); for (size_t i = 0; i < kPageSize; i++) { ASSERT_EQ(v[i], c) << "at offset " << i; @@ -47,7 +47,7 @@ void ExpectAllMappingBytes(Mapping const &m, char c) { // Equivalent to ExpectAllMappingBytes but async-signal-safe and with less // helpful failure messages. -void CheckAllMappingBytes(Mapping const &m, char c) { +void CheckAllMappingBytes(Mapping const& m, char c) { auto const v = m.view(); for (size_t i = 0; i < kPageSize; i++) { TEST_CHECK_MSG(v[i] == c, "mapping contains wrong value"); @@ -139,7 +139,7 @@ TEST(MadviseDontneedTest, IgnoresPermissions) { TEST(MadviseDontforkTest, AddressLength) { auto m = ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE)); - char *addr = static_cast(m.ptr()); + char* addr = static_cast(m.ptr()); // Address must be page aligned. EXPECT_THAT(madvise(addr + 1, kPageSize, MADV_DONTFORK), @@ -168,9 +168,9 @@ TEST(MadviseDontforkTest, DontforkShared) { Mapping m = ASSERT_NO_ERRNO_AND_VALUE(Mmap( nullptr, kPageSize * 2, PROT_READ | PROT_WRITE, MAP_SHARED, fd.get(), 0)); - const Mapping ms1 = Mapping(reinterpret_cast(m.addr()), kPageSize); + const Mapping ms1 = Mapping(reinterpret_cast(m.addr()), kPageSize); const Mapping ms2 = - Mapping(reinterpret_cast(m.addr() + kPageSize), kPageSize); + Mapping(reinterpret_cast(m.addr() + kPageSize), kPageSize); m.release(); ASSERT_THAT(madvise(ms2.ptr(), kPageSize, MADV_DONTFORK), SyscallSucceeds()); @@ -197,11 +197,11 @@ TEST(MadviseDontforkTest, DontforkAnonPrivate) { // Mmap three anonymous pages and MADV_DONTFORK the middle page. Mapping m = ASSERT_NO_ERRNO_AND_VALUE( MmapAnon(kPageSize * 3, PROT_READ | PROT_WRITE, MAP_PRIVATE)); - const Mapping mp1 = Mapping(reinterpret_cast(m.addr()), kPageSize); + const Mapping mp1 = Mapping(reinterpret_cast(m.addr()), kPageSize); const Mapping mp2 = - Mapping(reinterpret_cast(m.addr() + kPageSize), kPageSize); + Mapping(reinterpret_cast(m.addr() + kPageSize), kPageSize); const Mapping mp3 = - Mapping(reinterpret_cast(m.addr() + 2 * kPageSize), kPageSize); + Mapping(reinterpret_cast(m.addr() + 2 * kPageSize), kPageSize); m.release(); ASSERT_THAT(madvise(mp2.ptr(), kPageSize, MADV_DONTFORK), SyscallSucceeds()); diff --git a/test/syscalls/linux/mempolicy.cc b/test/syscalls/linux/mempolicy.cc index d21093899..059fad598 100644 --- a/test/syscalls/linux/mempolicy.cc +++ b/test/syscalls/linux/mempolicy.cc @@ -43,17 +43,17 @@ namespace { #define MPOL_MF_MOVE (1 << 1) #define MPOL_MF_MOVE_ALL (1 << 2) -int get_mempolicy(int *policy, uint64_t *nmask, uint64_t maxnode, void *addr, +int get_mempolicy(int* policy, uint64_t* nmask, uint64_t maxnode, void* addr, int flags) { return syscall(SYS_get_mempolicy, policy, nmask, maxnode, addr, flags); } -int set_mempolicy(int mode, uint64_t *nmask, uint64_t maxnode) { +int set_mempolicy(int mode, uint64_t* nmask, uint64_t maxnode) { return syscall(SYS_set_mempolicy, mode, nmask, maxnode); } -int mbind(void *addr, unsigned long len, int mode, - const unsigned long *nodemask, unsigned long maxnode, +int mbind(void* addr, unsigned long len, int mode, + const unsigned long* nodemask, unsigned long maxnode, unsigned flags) { return syscall(SYS_mbind, addr, len, mode, nodemask, maxnode, flags); } @@ -68,7 +68,7 @@ Cleanup ScopedMempolicy() { // Temporarily change the memory policy for the calling thread within the // caller's scope. -PosixErrorOr ScopedSetMempolicy(int mode, uint64_t *nmask, +PosixErrorOr ScopedSetMempolicy(int mode, uint64_t* nmask, uint64_t maxnode) { if (set_mempolicy(mode, nmask, maxnode)) { return PosixError(errno, "set_mempolicy"); @@ -213,7 +213,7 @@ TEST(MempolicyTest, GetMempolicyQueryNodeForAddress) { } } - void *invalid_address = reinterpret_cast(-1); + void* invalid_address = reinterpret_cast(-1); // Invalid address. ASSERT_THAT(get_mempolicy(&mode, nullptr, 0, invalid_address, @@ -221,8 +221,8 @@ TEST(MempolicyTest, GetMempolicyQueryNodeForAddress) { SyscallFailsWithErrno(EFAULT)); // Invalid mode pointer. - ASSERT_THAT(get_mempolicy(reinterpret_cast(invalid_address), nullptr, - 0, &dummy_stack_address, MPOL_F_ADDR | MPOL_F_NODE), + ASSERT_THAT(get_mempolicy(reinterpret_cast(invalid_address), nullptr, 0, + &dummy_stack_address, MPOL_F_ADDR | MPOL_F_NODE), SyscallFailsWithErrno(EFAULT)); } diff --git a/test/syscalls/linux/proc_net.cc b/test/syscalls/linux/proc_net.cc index 65bad06d4..3a611a86f 100644 --- a/test/syscalls/linux/proc_net.cc +++ b/test/syscalls/linux/proc_net.cc @@ -68,8 +68,8 @@ TEST(ProcSysNetIpv4Sack, CanReadAndWrite) { } PosixErrorOr GetSNMPMetricFromProc(const std::string snmp, - const std::string &type, - const std::string &item) { + const std::string& type, + const std::string& item) { std::vector snmp_vec = absl::StrSplit(snmp, '\n'); // /proc/net/snmp prints a line of headers followed by a line of metrics. @@ -127,7 +127,7 @@ TEST(ProcNetSnmp, TcpReset_NoRandomSave) { }; ASSERT_EQ(inet_pton(AF_INET, "127.0.0.1", &(sin.sin_addr)), 1); - ASSERT_THAT(connect(s.get(), (struct sockaddr *)&sin, sizeof(sin)), + ASSERT_THAT(connect(s.get(), (struct sockaddr*)&sin, sizeof(sin)), SyscallFailsWithErrno(ECONNREFUSED)); uint64_t newAttemptFails; @@ -172,19 +172,19 @@ TEST(ProcNetSnmp, TcpEstab_NoRandomSave) { }; ASSERT_EQ(inet_pton(AF_INET, "127.0.0.1", &(sin.sin_addr)), 1); - ASSERT_THAT(bind(s_listen.get(), (struct sockaddr *)&sin, sizeof(sin)), + ASSERT_THAT(bind(s_listen.get(), (struct sockaddr*)&sin, sizeof(sin)), SyscallSucceeds()); ASSERT_THAT(listen(s_listen.get(), 1), SyscallSucceeds()); // Get the port bound by the listening socket. socklen_t addrlen = sizeof(sin); ASSERT_THAT( - getsockname(s_listen.get(), reinterpret_cast(&sin), &addrlen), + getsockname(s_listen.get(), reinterpret_cast(&sin), &addrlen), SyscallSucceeds()); FileDescriptor s_connect = ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_STREAM, 0)); - ASSERT_THAT(connect(s_connect.get(), (struct sockaddr *)&sin, sizeof(sin)), + ASSERT_THAT(connect(s_connect.get(), (struct sockaddr*)&sin, sizeof(sin)), SyscallSucceeds()); auto s_accept = @@ -260,7 +260,7 @@ TEST(ProcNetSnmp, UdpNoPorts_NoRandomSave) { .sin_port = htons(4444), }; ASSERT_EQ(inet_pton(AF_INET, "127.0.0.1", &(sin.sin_addr)), 1); - ASSERT_THAT(sendto(s.get(), "a", 1, 0, (struct sockaddr *)&sin, sizeof(sin)), + ASSERT_THAT(sendto(s.get(), "a", 1, 0, (struct sockaddr*)&sin, sizeof(sin)), SyscallSucceedsWithValue(1)); uint64_t newOutDatagrams; @@ -295,18 +295,18 @@ TEST(ProcNetSnmp, UdpIn) { .sin_port = htons(0), }; ASSERT_EQ(inet_pton(AF_INET, "127.0.0.1", &(sin.sin_addr)), 1); - ASSERT_THAT(bind(server.get(), (struct sockaddr *)&sin, sizeof(sin)), + ASSERT_THAT(bind(server.get(), (struct sockaddr*)&sin, sizeof(sin)), SyscallSucceeds()); // Get the port bound by the server socket. socklen_t addrlen = sizeof(sin); ASSERT_THAT( - getsockname(server.get(), reinterpret_cast(&sin), &addrlen), + getsockname(server.get(), reinterpret_cast(&sin), &addrlen), SyscallSucceeds()); FileDescriptor client = ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); ASSERT_THAT( - sendto(client.get(), "a", 1, 0, (struct sockaddr *)&sin, sizeof(sin)), + sendto(client.get(), "a", 1, 0, (struct sockaddr*)&sin, sizeof(sin)), SyscallSucceedsWithValue(1)); char buf[128]; diff --git a/test/syscalls/linux/sendfile_socket.cc b/test/syscalls/linux/sendfile_socket.cc index 3331288b7..8f7ee4163 100644 --- a/test/syscalls/linux/sendfile_socket.cc +++ b/test/syscalls/linux/sendfile_socket.cc @@ -41,15 +41,15 @@ class SendFileTest : public ::testing::TestWithParam { struct sockaddr server_addr = {}; switch (family) { case AF_INET: { - struct sockaddr_in *server_addr_in = - reinterpret_cast(&server_addr); + struct sockaddr_in* server_addr_in = + reinterpret_cast(&server_addr); server_addr_in->sin_family = family; server_addr_in->sin_addr.s_addr = INADDR_ANY; break; } case AF_UNIX: { - struct sockaddr_un *server_addr_un = - reinterpret_cast(&server_addr); + struct sockaddr_un* server_addr_un = + reinterpret_cast(&server_addr); server_addr_un->sun_family = family; server_addr_un->sun_path[0] = '\0'; break; diff --git a/test/syscalls/linux/socket_bind_to_device_sequence.cc b/test/syscalls/linux/socket_bind_to_device_sequence.cc index 34b1058a9..637d1151a 100644 --- a/test/syscalls/linux/socket_bind_to_device_sequence.cc +++ b/test/syscalls/linux/socket_bind_to_device_sequence.cc @@ -66,7 +66,7 @@ class BindToDeviceSequenceTest : public ::testing::TestWithParam { // Gets a device by device_id. If the device_id has been seen before, returns // the previously returned device. If not, finds or creates a new device. // Returns an empty string on failure. - void GetDevice(int device_id, string *device_name) { + void GetDevice(int device_id, string* device_name) { auto device = devices_.find(device_id); if (device != devices_.end()) { *device_name = device->second; @@ -112,7 +112,7 @@ class BindToDeviceSequenceTest : public ::testing::TestWithParam { // Sets the socket_id to uniquely identify the socket bound if it is not // nullptr. void BindSocket(bool reuse_port, bool reuse_addr, int device_id = 0, - int want = 0, int *socket_id = nullptr) { + int want = 0, int* socket_id = nullptr) { next_socket_id_++; sockets_to_close_[next_socket_id_] = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto socket_fd = sockets_to_close_[next_socket_id_]->get(); @@ -154,12 +154,12 @@ class BindToDeviceSequenceTest : public ::testing::TestWithParam { addr.sin_port = port_; if (want == 0) { ASSERT_THAT( - bind(socket_fd, reinterpret_cast(&addr), + bind(socket_fd, reinterpret_cast(&addr), sizeof(addr)), SyscallSucceeds()); } else { ASSERT_THAT( - bind(socket_fd, reinterpret_cast(&addr), + bind(socket_fd, reinterpret_cast(&addr), sizeof(addr)), SyscallFailsWithErrno(want)); } @@ -169,7 +169,7 @@ class BindToDeviceSequenceTest : public ::testing::TestWithParam { // remember it for future commands. socklen_t addr_size = sizeof(addr); ASSERT_THAT( - getsockname(socket_fd, reinterpret_cast(&addr), + getsockname(socket_fd, reinterpret_cast(&addr), &addr_size), SyscallSucceeds()); port_ = addr.sin_port; diff --git a/test/syscalls/linux/socket_netdevice.cc b/test/syscalls/linux/socket_netdevice.cc index 405dbbd73..15d4b85a7 100644 --- a/test/syscalls/linux/socket_netdevice.cc +++ b/test/syscalls/linux/socket_netdevice.cc @@ -91,7 +91,7 @@ TEST(NetdeviceTest, Netmask) { int prefixlen = -1; ASSERT_NO_ERRNO(NetlinkRequestResponse( fd, &req, sizeof(req), - [&](const struct nlmsghdr *hdr) { + [&](const struct nlmsghdr* hdr) { EXPECT_THAT(hdr->nlmsg_type, AnyOf(Eq(RTM_NEWADDR), Eq(NLMSG_DONE))); EXPECT_TRUE((hdr->nlmsg_flags & NLM_F_MULTI) == NLM_F_MULTI) @@ -107,8 +107,8 @@ TEST(NetdeviceTest, Netmask) { // RTM_NEWADDR contains at least the header and ifaddrmsg. EXPECT_GE(hdr->nlmsg_len, sizeof(*hdr) + sizeof(struct ifaddrmsg)); - struct ifaddrmsg *ifaddrmsg = - reinterpret_cast(NLMSG_DATA(hdr)); + struct ifaddrmsg* ifaddrmsg = + reinterpret_cast(NLMSG_DATA(hdr)); if (ifaddrmsg->ifa_index == static_cast(ifr.ifr_ifindex) && ifaddrmsg->ifa_family == AF_INET) { prefixlen = ifaddrmsg->ifa_prefixlen; @@ -127,8 +127,8 @@ TEST(NetdeviceTest, Netmask) { snprintf(ifr.ifr_name, IFNAMSIZ, "lo"); ASSERT_THAT(ioctl(sock.get(), SIOCGIFNETMASK, &ifr), SyscallSucceeds()); EXPECT_EQ(ifr.ifr_netmask.sa_family, AF_INET); - struct sockaddr_in *sin = - reinterpret_cast(&ifr.ifr_netmask); + struct sockaddr_in* sin = + reinterpret_cast(&ifr.ifr_netmask); EXPECT_EQ(sin->sin_addr.s_addr, mask); } diff --git a/test/syscalls/linux/stat.cc b/test/syscalls/linux/stat.cc index c1e45e10a..388d75835 100644 --- a/test/syscalls/linux/stat.cc +++ b/test/syscalls/linux/stat.cc @@ -377,7 +377,7 @@ TEST_F(StatTest, ZeroLinksOpenFdRegularFileChild_NoRandomSave) { // // We need to support this because when a file is unlinked and we forward // the stat to the gofer it would return ENOENT. - const char *uncached_gofer = getenv("GVISOR_GOFER_UNCACHED"); + const char* uncached_gofer = getenv("GVISOR_GOFER_UNCACHED"); SKIP_IF(uncached_gofer != nullptr); // We don't support saving unlinked files. @@ -599,8 +599,8 @@ struct kernel_statx { uint64_t __spare2[14]; }; -int statx(int dirfd, const char *pathname, int flags, unsigned int mask, - struct kernel_statx *statxbuf) { +int statx(int dirfd, const char* pathname, int flags, unsigned int mask, + struct kernel_statx* statxbuf) { return syscall(SYS_statx, dirfd, pathname, flags, mask, statxbuf); } diff --git a/test/util/mount_util.h b/test/util/mount_util.h index 23eea51a2..09e2281eb 100644 --- a/test/util/mount_util.h +++ b/test/util/mount_util.h @@ -31,10 +31,10 @@ namespace testing { // Mount mounts the filesystem, and unmounts when the returned reference is // destroyed. -inline PosixErrorOr Mount(const std::string &source, - const std::string &target, - const std::string &fstype, - uint64_t mountflags, const std::string &data, +inline PosixErrorOr Mount(const std::string& source, + const std::string& target, + const std::string& fstype, + uint64_t mountflags, const std::string& data, uint64_t umountflags) { if (mount(source.c_str(), target.c_str(), fstype.c_str(), mountflags, data.c_str()) == -1) { diff --git a/vdso/syscalls.h b/vdso/syscalls.h index f5865bb72..b6d15a7d3 100644 --- a/vdso/syscalls.h +++ b/vdso/syscalls.h @@ -65,8 +65,8 @@ static inline int sys_rt_sigreturn(void) { return num; } -static inline int sys_clock_gettime(clockid_t _clkid, struct timespec *_ts) { - register struct timespec *ts asm("x1") = _ts; +static inline int sys_clock_gettime(clockid_t _clkid, struct timespec* _ts) { + register struct timespec* ts asm("x1") = _ts; register clockid_t clkid asm("x0") = _clkid; register long ret asm("x0"); register long nr asm("x8") = __NR_clock_gettime; @@ -78,8 +78,8 @@ static inline int sys_clock_gettime(clockid_t _clkid, struct timespec *_ts) { return ret; } -static inline int sys_clock_getres(clockid_t _clkid, struct timespec *_ts) { - register struct timespec *ts asm("x1") = _ts; +static inline int sys_clock_getres(clockid_t _clkid, struct timespec* _ts) { + register struct timespec* ts asm("x1") = _ts; register clockid_t clkid asm("x0") = _clkid; register long ret asm("x0"); register long nr asm("x8") = __NR_clock_getres; -- cgit v1.2.3 From 76483b8b1ec4ee1fb6b6efb6bdcfaf6dba7be4ce Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Tue, 28 Jan 2020 11:12:01 -0800 Subject: Check sigsetsize in rt_sigaction This isn't in the libc wrapper, but it is in the syscall itself. Discovered by @xiaobo55x in #1625. PiperOrigin-RevId: 291973931 --- pkg/sentry/strace/linux64_amd64.go | 2 +- pkg/sentry/strace/linux64_arm64.go | 2 +- pkg/sentry/syscalls/linux/sys_signal.go | 5 ++++ test/syscalls/linux/sigaction.cc | 53 +++++++++++++++++++-------------- 4 files changed, 38 insertions(+), 24 deletions(-) (limited to 'test') diff --git a/pkg/sentry/strace/linux64_amd64.go b/pkg/sentry/strace/linux64_amd64.go index 1e823b685..85ec66fd3 100644 --- a/pkg/sentry/strace/linux64_amd64.go +++ b/pkg/sentry/strace/linux64_amd64.go @@ -37,7 +37,7 @@ var linuxAMD64 = SyscallMap{ 10: makeSyscallInfo("mprotect", Hex, Hex, Hex), 11: makeSyscallInfo("munmap", Hex, Hex), 12: makeSyscallInfo("brk", Hex), - 13: makeSyscallInfo("rt_sigaction", Signal, SigAction, PostSigAction), + 13: makeSyscallInfo("rt_sigaction", Signal, SigAction, PostSigAction, Hex), 14: makeSyscallInfo("rt_sigprocmask", SignalMaskAction, SigSet, PostSigSet, Hex), 15: makeSyscallInfo("rt_sigreturn"), 16: makeSyscallInfo("ioctl", FD, Hex, Hex), diff --git a/pkg/sentry/strace/linux64_arm64.go b/pkg/sentry/strace/linux64_arm64.go index c3ac5248d..8bc38545f 100644 --- a/pkg/sentry/strace/linux64_arm64.go +++ b/pkg/sentry/strace/linux64_arm64.go @@ -158,7 +158,7 @@ var linuxARM64 = SyscallMap{ 131: makeSyscallInfo("tgkill", Hex, Hex, Signal), 132: makeSyscallInfo("sigaltstack", Hex, Hex), 133: makeSyscallInfo("rt_sigsuspend", Hex), - 134: makeSyscallInfo("rt_sigaction", Signal, SigAction, PostSigAction), + 134: makeSyscallInfo("rt_sigaction", Signal, SigAction, PostSigAction, Hex), 135: makeSyscallInfo("rt_sigprocmask", SignalMaskAction, SigSet, PostSigSet, Hex), 136: makeSyscallInfo("rt_sigpending", Hex), 137: makeSyscallInfo("rt_sigtimedwait", SigSet, Hex, Timespec, Hex), diff --git a/pkg/sentry/syscalls/linux/sys_signal.go b/pkg/sentry/syscalls/linux/sys_signal.go index 209be2990..7e1747a0c 100644 --- a/pkg/sentry/syscalls/linux/sys_signal.go +++ b/pkg/sentry/syscalls/linux/sys_signal.go @@ -245,6 +245,11 @@ func RtSigaction(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S sig := linux.Signal(args[0].Int()) newactarg := args[1].Pointer() oldactarg := args[2].Pointer() + sigsetsize := args[3].SizeT() + + if sigsetsize != linux.SignalSetSize { + return 0, nil, syserror.EINVAL + } var newactptr *arch.SignalAct if newactarg != 0 { diff --git a/test/syscalls/linux/sigaction.cc b/test/syscalls/linux/sigaction.cc index 9a53fd3e0..9d9dd57a8 100644 --- a/test/syscalls/linux/sigaction.cc +++ b/test/syscalls/linux/sigaction.cc @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include "gtest/gtest.h" #include "test/util/test_util.h" @@ -23,45 +24,53 @@ namespace testing { namespace { TEST(SigactionTest, GetLessThanOrEqualToZeroFails) { - struct sigaction act; - memset(&act, 0, sizeof(act)); - ASSERT_THAT(sigaction(-1, NULL, &act), SyscallFailsWithErrno(EINVAL)); - ASSERT_THAT(sigaction(0, NULL, &act), SyscallFailsWithErrno(EINVAL)); + struct sigaction act = {}; + ASSERT_THAT(sigaction(-1, nullptr, &act), SyscallFailsWithErrno(EINVAL)); + ASSERT_THAT(sigaction(0, nullptr, &act), SyscallFailsWithErrno(EINVAL)); } TEST(SigactionTest, SetLessThanOrEqualToZeroFails) { - struct sigaction act; - memset(&act, 0, sizeof(act)); - ASSERT_THAT(sigaction(0, &act, NULL), SyscallFailsWithErrno(EINVAL)); - ASSERT_THAT(sigaction(0, &act, NULL), SyscallFailsWithErrno(EINVAL)); + struct sigaction act = {}; + ASSERT_THAT(sigaction(0, &act, nullptr), SyscallFailsWithErrno(EINVAL)); + ASSERT_THAT(sigaction(0, &act, nullptr), SyscallFailsWithErrno(EINVAL)); } TEST(SigactionTest, GetGreaterThanMaxFails) { - struct sigaction act; - memset(&act, 0, sizeof(act)); - ASSERT_THAT(sigaction(SIGRTMAX + 1, NULL, &act), + struct sigaction act = {}; + ASSERT_THAT(sigaction(SIGRTMAX + 1, nullptr, &act), SyscallFailsWithErrno(EINVAL)); } TEST(SigactionTest, SetGreaterThanMaxFails) { - struct sigaction act; - memset(&act, 0, sizeof(act)); - ASSERT_THAT(sigaction(SIGRTMAX + 1, &act, NULL), + struct sigaction act = {}; + ASSERT_THAT(sigaction(SIGRTMAX + 1, &act, nullptr), SyscallFailsWithErrno(EINVAL)); } TEST(SigactionTest, SetSigkillFails) { - struct sigaction act; - memset(&act, 0, sizeof(act)); - ASSERT_THAT(sigaction(SIGKILL, NULL, &act), SyscallSucceeds()); - ASSERT_THAT(sigaction(SIGKILL, &act, NULL), SyscallFailsWithErrno(EINVAL)); + struct sigaction act = {}; + ASSERT_THAT(sigaction(SIGKILL, nullptr, &act), SyscallSucceeds()); + ASSERT_THAT(sigaction(SIGKILL, &act, nullptr), SyscallFailsWithErrno(EINVAL)); } TEST(SigactionTest, SetSigstopFails) { - struct sigaction act; - memset(&act, 0, sizeof(act)); - ASSERT_THAT(sigaction(SIGSTOP, NULL, &act), SyscallSucceeds()); - ASSERT_THAT(sigaction(SIGSTOP, &act, NULL), SyscallFailsWithErrno(EINVAL)); + struct sigaction act = {}; + ASSERT_THAT(sigaction(SIGSTOP, nullptr, &act), SyscallSucceeds()); + ASSERT_THAT(sigaction(SIGSTOP, &act, nullptr), SyscallFailsWithErrno(EINVAL)); +} + +TEST(SigactionTest, BadSigsetFails) { + constexpr size_t kWrongSigSetSize = 43; + + struct sigaction act = {}; + + // The syscall itself (rather than the libc wrapper) takes the sigset_t size. + ASSERT_THAT( + syscall(SYS_rt_sigaction, SIGTERM, nullptr, &act, kWrongSigSetSize), + SyscallFailsWithErrno(EINVAL)); + ASSERT_THAT( + syscall(SYS_rt_sigaction, SIGTERM, &act, nullptr, kWrongSigSetSize), + SyscallFailsWithErrno(EINVAL)); } } // namespace -- cgit v1.2.3 From d99329e58492ef91b44a0bac346f757e8af2a7ec Mon Sep 17 00:00:00 2001 From: Jianfeng Tan Date: Tue, 28 Jan 2020 12:31:58 -0800 Subject: netlink: add support for RTM_F_LOOKUP_TABLE Test command: $ ip route get 1.1.1.1 Fixes: #1099 Signed-off-by: Jianfeng Tan COPYBARA_INTEGRATE_REVIEW=https://github.com/google/gvisor/pull/1121 from tanjianfeng:fix-1099 e6919f3d4ede5aa51a48b3d2be0d7a4b482dd53d PiperOrigin-RevId: 291990716 --- pkg/abi/linux/netlink_route.go | 13 +++ pkg/sentry/socket/netlink/route/BUILD | 6 +- pkg/sentry/socket/netlink/route/protocol.go | 158 +++++++++++++++++++++++++--- test/syscalls/linux/socket_netlink_route.cc | 84 +++++++++++++++ test/syscalls/linux/socket_netlink_util.cc | 38 +++++++ test/syscalls/linux/socket_netlink_util.h | 11 +- 6 files changed, 295 insertions(+), 15 deletions(-) (limited to 'test') diff --git a/pkg/abi/linux/netlink_route.go b/pkg/abi/linux/netlink_route.go index 0e3582ab6..40bec566c 100644 --- a/pkg/abi/linux/netlink_route.go +++ b/pkg/abi/linux/netlink_route.go @@ -205,6 +205,9 @@ type RouteMessage struct { Flags uint32 } +// SizeOfRouteMessage is the size of RouteMessage. +const SizeOfRouteMessage = 12 + // Route types, from uapi/linux/rtnetlink.h. const ( // RTN_UNSPEC represents an unspecified route type. @@ -331,3 +334,13 @@ const ( RTF_GATEWAY = 0x2 RTF_UP = 0x1 ) + +// RtAttr is the header of optional addition route information, as a netlink +// attribute. From include/uapi/linux/rtnetlink.h. +type RtAttr struct { + Len uint16 + Type uint16 +} + +// SizeOfRtAttr is the size of RtAttr. +const SizeOfRtAttr = 4 diff --git a/pkg/sentry/socket/netlink/route/BUILD b/pkg/sentry/socket/netlink/route/BUILD index 0234aadde..622a1eafc 100644 --- a/pkg/sentry/socket/netlink/route/BUILD +++ b/pkg/sentry/socket/netlink/route/BUILD @@ -4,15 +4,19 @@ package(licenses = ["notice"]) go_library( name = "route", - srcs = ["protocol.go"], + srcs = [ + "protocol.go", + ], visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/binary", "//pkg/context", "//pkg/sentry/inet", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/socket/netlink", "//pkg/syserr", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/socket/netlink/route/protocol.go b/pkg/sentry/socket/netlink/route/protocol.go index 80a15d6cb..2b3c7f5b3 100644 --- a/pkg/sentry/socket/netlink/route/protocol.go +++ b/pkg/sentry/socket/netlink/route/protocol.go @@ -19,12 +19,14 @@ import ( "bytes" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/socket/netlink" "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/usermem" ) // commandKind describes the operational class of a message type. @@ -66,8 +68,14 @@ func (p *Protocol) CanSend() bool { return true } -// dumpLinks handles RTM_GETLINK + NLM_F_DUMP requests. +// dumpLinks handles RTM_GETLINK dump requests. func (p *Protocol) dumpLinks(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error { + // TODO(b/68878065): Only the dump variant of the types below are + // supported. + if hdr.Flags&linux.NLM_F_DUMP != linux.NLM_F_DUMP { + return syserr.ErrNotSupported + } + // NLM_F_DUMP + RTM_GETLINK messages are supposed to include an // ifinfomsg. However, Linux <3.9 only checked for rtgenmsg, and some // userspace applications (including glibc) still include rtgenmsg. @@ -121,8 +129,14 @@ func (p *Protocol) dumpLinks(ctx context.Context, hdr linux.NetlinkMessageHeader return nil } -// dumpAddrs handles RTM_GETADDR + NLM_F_DUMP requests. +// dumpAddrs handles RTM_GETADDR dump requests. func (p *Protocol) dumpAddrs(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error { + // TODO(b/68878065): Only the dump variant of the types below are + // supported. + if hdr.Flags&linux.NLM_F_DUMP != linux.NLM_F_DUMP { + return syserr.ErrNotSupported + } + // RTM_GETADDR dump requests need not contain anything more than the // netlink header and 1 byte protocol family common to all // NETLINK_ROUTE requests. @@ -163,22 +177,146 @@ func (p *Protocol) dumpAddrs(ctx context.Context, hdr linux.NetlinkMessageHeader return nil } -// dumpRoutes handles RTM_GETROUTE + NLM_F_DUMP requests. +// commonPrefixLen reports the length of the longest IP address prefix. +// This is a simplied version from Golang's src/net/addrselect.go. +func commonPrefixLen(a, b []byte) (cpl int) { + for len(a) > 0 { + if a[0] == b[0] { + cpl += 8 + a = a[1:] + b = b[1:] + continue + } + bits := 8 + ab, bb := a[0], b[0] + for { + ab >>= 1 + bb >>= 1 + bits-- + if ab == bb { + cpl += bits + return + } + } + } + return +} + +// fillRoute returns the Route using LPM algorithm. Refer to Linux's +// net/ipv4/route.c:rt_fill_info(). +func fillRoute(routes []inet.Route, addr []byte) (inet.Route, *syserr.Error) { + family := uint8(linux.AF_INET) + if len(addr) != 4 { + family = linux.AF_INET6 + } + + idx := -1 // Index of the Route rule to be returned. + idxDef := -1 // Index of the default route rule. + prefix := 0 // Current longest prefix. + for i, route := range routes { + if route.Family != family { + continue + } + + if len(route.GatewayAddr) > 0 && route.DstLen == 0 { + idxDef = i + continue + } + + cpl := commonPrefixLen(addr, route.DstAddr) + if cpl < int(route.DstLen) { + continue + } + cpl = int(route.DstLen) + if cpl > prefix { + idx = i + prefix = cpl + } + } + if idx == -1 { + idx = idxDef + } + if idx == -1 { + return inet.Route{}, syserr.ErrNoRoute + } + + route := routes[idx] + if family == linux.AF_INET { + route.DstLen = 32 + } else { + route.DstLen = 128 + } + route.DstAddr = addr + route.Flags |= linux.RTM_F_CLONED // This route is cloned. + return route, nil +} + +// parseForDestination parses a message as format of RouteMessage-RtAttr-dst. +func parseForDestination(data []byte) ([]byte, *syserr.Error) { + var rtMsg linux.RouteMessage + if len(data) < linux.SizeOfRouteMessage { + return nil, syserr.ErrInvalidArgument + } + binary.Unmarshal(data[:linux.SizeOfRouteMessage], usermem.ByteOrder, &rtMsg) + // iproute2 added the RTM_F_LOOKUP_TABLE flag in version v4.4.0. See + // commit bc234301af12. Note we don't check this flag for backward + // compatibility. + if rtMsg.Flags != 0 && rtMsg.Flags != linux.RTM_F_LOOKUP_TABLE { + return nil, syserr.ErrNotSupported + } + + data = data[linux.SizeOfRouteMessage:] + + // TODO(gvisor.dev/issue/1611): Add generic attribute parsing. + var rtAttr linux.RtAttr + if len(data) < linux.SizeOfRtAttr { + return nil, syserr.ErrInvalidArgument + } + binary.Unmarshal(data[:linux.SizeOfRtAttr], usermem.ByteOrder, &rtAttr) + if rtAttr.Type != linux.RTA_DST { + return nil, syserr.ErrInvalidArgument + } + + if len(data) < int(rtAttr.Len) { + return nil, syserr.ErrInvalidArgument + } + return data[linux.SizeOfRtAttr:rtAttr.Len], nil +} + +// dumpRoutes handles RTM_GETROUTE requests. func (p *Protocol) dumpRoutes(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error { // RTM_GETROUTE dump requests need not contain anything more than the // netlink header and 1 byte protocol family common to all // NETLINK_ROUTE requests. - // We always send back an NLMSG_DONE. - ms.Multi = true - stack := inet.StackFromContext(ctx) if stack == nil { // No network routes. return nil } - for _, rt := range stack.RouteTable() { + routeTables := stack.RouteTable() + + if hdr.Flags == linux.NLM_F_REQUEST { + dst, err := parseForDestination(data) + if err != nil { + return err + } + route, err := fillRoute(routeTables, dst) + if err != nil { + // TODO(gvisor.dev/issue/1237): return NLMSG_ERROR with ENETUNREACH. + return syserr.ErrNotSupported + } + routeTables = append([]inet.Route{}, route) + } else if hdr.Flags&linux.NLM_F_DUMP == linux.NLM_F_DUMP { + // We always send back an NLMSG_DONE. + ms.Multi = true + } else { + // TODO(b/68878065): Only above cases are supported. + return syserr.ErrNotSupported + } + + for _, rt := range routeTables { m := ms.AddMessage(linux.NetlinkMessageHeader{ Type: linux.RTM_NEWROUTE, }) @@ -236,12 +374,6 @@ func (p *Protocol) ProcessMessage(ctx context.Context, hdr linux.NetlinkMessageH } } - // TODO(b/68878065): Only the dump variant of the types below are - // supported. - if hdr.Flags&linux.NLM_F_DUMP != linux.NLM_F_DUMP { - return syserr.ErrNotSupported - } - switch hdr.Type { case linux.RTM_GETLINK: return p.dumpLinks(ctx, hdr, data, ms) diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc index ef567f512..1e28e658d 100644 --- a/test/syscalls/linux/socket_netlink_route.cc +++ b/test/syscalls/linux/socket_netlink_route.cc @@ -442,6 +442,90 @@ TEST(NetlinkRouteTest, GetRouteDump) { EXPECT_TRUE(dstFound); } +// GetRouteRequest tests a RTM_GETROUTE request with RTM_F_LOOKUP_TABLE flag. +TEST(NetlinkRouteTest, GetRouteRequest) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); + + struct __attribute__((__packed__)) request { + struct nlmsghdr hdr; + struct rtmsg rtm; + struct nlattr nla; + struct in_addr sin_addr; + }; + + constexpr uint32_t kSeq = 12345; + + struct request req = {}; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETROUTE; + req.hdr.nlmsg_flags = NLM_F_REQUEST; + req.hdr.nlmsg_seq = kSeq; + + req.rtm.rtm_family = AF_INET; + req.rtm.rtm_dst_len = 32; + req.rtm.rtm_src_len = 0; + req.rtm.rtm_tos = 0; + req.rtm.rtm_table = RT_TABLE_UNSPEC; + req.rtm.rtm_protocol = RTPROT_UNSPEC; + req.rtm.rtm_scope = RT_SCOPE_UNIVERSE; + req.rtm.rtm_type = RTN_UNSPEC; + req.rtm.rtm_flags = RTM_F_LOOKUP_TABLE; + + req.nla.nla_len = 8; + req.nla.nla_type = RTA_DST; + inet_aton("127.0.0.2", &req.sin_addr); + + bool rtDstFound = false; + ASSERT_NO_ERRNO(NetlinkRequestResponseSingle( + fd, &req, sizeof(req), [&](const struct nlmsghdr* hdr) { + // Validate the reponse to RTM_GETROUTE request with RTM_F_LOOKUP_TABLE + // flag. + EXPECT_THAT(hdr->nlmsg_type, RTM_NEWROUTE); + + EXPECT_TRUE(hdr->nlmsg_flags == 0) << std::hex << hdr->nlmsg_flags; + + EXPECT_EQ(hdr->nlmsg_seq, kSeq); + EXPECT_EQ(hdr->nlmsg_pid, port); + + // RTM_NEWROUTE contains at least the header and rtmsg. + ASSERT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct rtmsg))); + const struct rtmsg* msg = + reinterpret_cast(NLMSG_DATA(hdr)); + + // NOTE: rtmsg fields are char fields. + std::cout << "Found route table=" << static_cast(msg->rtm_table) + << ", protocol=" << static_cast(msg->rtm_protocol) + << ", scope=" << static_cast(msg->rtm_scope) + << ", type=" << static_cast(msg->rtm_type); + + EXPECT_EQ(msg->rtm_family, AF_INET); + EXPECT_EQ(msg->rtm_dst_len, 32); + EXPECT_TRUE((msg->rtm_flags & RTM_F_CLONED) == RTM_F_CLONED) + << std::hex << msg->rtm_flags; + + int len = RTM_PAYLOAD(hdr); + std::cout << ", len=" << len; + for (struct rtattr* attr = RTM_RTA(msg); RTA_OK(attr, len); + attr = RTA_NEXT(attr, len)) { + if (attr->rta_type == RTA_DST) { + char address[INET_ADDRSTRLEN] = {}; + inet_ntop(AF_INET, RTA_DATA(attr), address, sizeof(address)); + std::cout << ", dst=" << address; + rtDstFound = true; + } else if (attr->rta_type == RTA_OIF) { + const char* oif = reinterpret_cast(RTA_DATA(attr)); + std::cout << ", oif=" << oif; + } + } + + std::cout << std::endl; + })); + // Found RTA_DST for RTM_F_LOOKUP_TABLE. + EXPECT_TRUE(rtDstFound); +} + // RecvmsgTrunc tests the recvmsg MSG_TRUNC flag with zero length output // buffer. MSG_TRUNC with a zero length buffer should consume subsequent // messages off the socket. diff --git a/test/syscalls/linux/socket_netlink_util.cc b/test/syscalls/linux/socket_netlink_util.cc index 723f5d728..cd2212a1a 100644 --- a/test/syscalls/linux/socket_netlink_util.cc +++ b/test/syscalls/linux/socket_netlink_util.cc @@ -108,5 +108,43 @@ PosixError NetlinkRequestResponse( return NoError(); } +PosixError NetlinkRequestResponseSingle( + const FileDescriptor& fd, void* request, size_t len, + const std::function& fn) { + struct iovec iov = {}; + iov.iov_base = request; + iov.iov_len = len; + + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + // No destination required; it defaults to pid 0, the kernel. + + RETURN_ERROR_IF_SYSCALL_FAIL(RetryEINTR(sendmsg)(fd.get(), &msg, 0)); + + constexpr size_t kBufferSize = 4096; + std::vector buf(kBufferSize); + iov.iov_base = buf.data(); + iov.iov_len = buf.size(); + + int ret; + RETURN_ERROR_IF_SYSCALL_FAIL(ret = RetryEINTR(recvmsg)(fd.get(), &msg, 0)); + + // We don't bother with the complexity of dealing with truncated messages. + // We must allocate a large enough buffer up front. + if ((msg.msg_flags & MSG_TRUNC) == MSG_TRUNC) { + return PosixError( + EIO, + absl::StrCat("Received truncated message with flags: ", msg.msg_flags)); + } + + for (struct nlmsghdr* hdr = reinterpret_cast(buf.data()); + NLMSG_OK(hdr, ret); hdr = NLMSG_NEXT(hdr, ret)) { + fn(hdr); + } + + return NoError(); +} + } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_netlink_util.h b/test/syscalls/linux/socket_netlink_util.h index 76e772c48..3678c0599 100644 --- a/test/syscalls/linux/socket_netlink_util.h +++ b/test/syscalls/linux/socket_netlink_util.h @@ -32,12 +32,21 @@ PosixErrorOr NetlinkBoundSocket(int protocol); // Returns the port ID of the passed socket. PosixErrorOr NetlinkPortID(int fd); -// Send the passed request and call fn will all response netlink messages. +// Send the passed request and call fn on all response netlink messages. +// +// To be used on requests with NLM_F_MULTI reponses. PosixError NetlinkRequestResponse( const FileDescriptor& fd, void* request, size_t len, const std::function& fn, bool expect_nlmsgerr); +// Send the passed request and call fn on all response netlink messages. +// +// To be used on requests without NLM_F_MULTI reponses. +PosixError NetlinkRequestResponseSingle( + const FileDescriptor& fd, void* request, size_t len, + const std::function& fn); + } // namespace testing } // namespace gvisor -- cgit v1.2.3 From f263801a74d4ccac042b068d0928c8738e40af5b Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 28 Jan 2020 13:36:16 -0800 Subject: fs/splice: don't report partial errors for special files Special files can have additional requirements for granularity. For example, read from eventfd returns EINVAL if a size is less 8 bytes. Reported-by: syzbot+3905f5493bec08eb7b02@syzkaller.appspotmail.com PiperOrigin-RevId: 292002926 --- pkg/sentry/fs/attr.go | 5 +++++ pkg/sentry/fs/file.go | 7 ------- pkg/sentry/fs/splice.go | 5 ----- pkg/sentry/syscalls/linux/sys_splice.go | 19 +++++++++++++++---- test/syscalls/linux/eventfd.cc | 25 +++++++++++++++++++++++++ 5 files changed, 45 insertions(+), 16 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fs/attr.go b/pkg/sentry/fs/attr.go index fa9e7d517..f60bd423d 100644 --- a/pkg/sentry/fs/attr.go +++ b/pkg/sentry/fs/attr.go @@ -206,6 +206,11 @@ func IsPipe(s StableAttr) bool { return s.Type == Pipe } +// IsAnonymous returns true if StableAttr.Type matches any type of anonymous. +func IsAnonymous(s StableAttr) bool { + return s.Type == Anonymous +} + // IsSocket returns true if StableAttr.Type matches any type of socket. func IsSocket(s StableAttr) bool { return s.Type == Socket diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go index ca3466f4f..78100e448 100644 --- a/pkg/sentry/fs/file.go +++ b/pkg/sentry/fs/file.go @@ -555,10 +555,6 @@ type lockedWriter struct { // // This applies only to Write, not WriteAt. Offset int64 - - // Err contains the first error encountered while copying. This is - // useful to determine whether Writer or Reader failed during io.Copy. - Err error } // Write implements io.Writer.Write. @@ -594,8 +590,5 @@ func (w *lockedWriter) WriteAt(buf []byte, offset int64) (int, error) { break } } - if w.Err == nil { - w.Err = err - } return written, err } diff --git a/pkg/sentry/fs/splice.go b/pkg/sentry/fs/splice.go index 791d1526c..33da82868 100644 --- a/pkg/sentry/fs/splice.go +++ b/pkg/sentry/fs/splice.go @@ -167,11 +167,6 @@ func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64, if !srcPipe && !opts.SrcOffset { atomic.StoreInt64(&src.offset, src.offset+n) } - - // Don't report any errors if we have some progress without data loss. - if w.Err == nil { - err = nil - } } // Drop locks. diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go index dd3a5807f..f43d6c155 100644 --- a/pkg/sentry/syscalls/linux/sys_splice.go +++ b/pkg/sentry/syscalls/linux/sys_splice.go @@ -211,8 +211,10 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal opts := fs.SpliceOpts{ Length: count, } + inFileAttr := inFile.Dirent.Inode.StableAttr + outFileAttr := outFile.Dirent.Inode.StableAttr switch { - case fs.IsPipe(inFile.Dirent.Inode.StableAttr) && !fs.IsPipe(outFile.Dirent.Inode.StableAttr): + case fs.IsPipe(inFileAttr) && !fs.IsPipe(outFileAttr): if inOffset != 0 { return 0, nil, syserror.ESPIPE } @@ -229,7 +231,7 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal opts.DstOffset = true opts.DstStart = offset } - case !fs.IsPipe(inFile.Dirent.Inode.StableAttr) && fs.IsPipe(outFile.Dirent.Inode.StableAttr): + case !fs.IsPipe(inFileAttr) && fs.IsPipe(outFileAttr): if outOffset != 0 { return 0, nil, syserror.ESPIPE } @@ -246,13 +248,13 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal opts.SrcOffset = true opts.SrcStart = offset } - case fs.IsPipe(inFile.Dirent.Inode.StableAttr) && fs.IsPipe(outFile.Dirent.Inode.StableAttr): + case fs.IsPipe(inFileAttr) && fs.IsPipe(outFileAttr): if inOffset != 0 || outOffset != 0 { return 0, nil, syserror.ESPIPE } // We may not refer to the same pipe; otherwise it's a continuous loop. - if inFile.Dirent.Inode.StableAttr.InodeID == outFile.Dirent.Inode.StableAttr.InodeID { + if inFileAttr.InodeID == outFileAttr.InodeID { return 0, nil, syserror.EINVAL } default: @@ -262,6 +264,15 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Splice data. n, err := doSplice(t, outFile, inFile, opts, nonBlock) + // Special files can have additional requirements for granularity. For + // example, read from eventfd returns EINVAL if a size is less 8 bytes. + // Inotify is another example. read will return EINVAL is a buffer is + // too small to return the next event, but a size of an event isn't + // fixed, it is sizeof(struct inotify_event) + {NAME_LEN} + 1. + if n != 0 && err != nil && (fs.IsAnonymous(inFileAttr) || fs.IsAnonymous(outFileAttr)) { + err = nil + } + // See above; inFile is chosen arbitrarily here. return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "splice", inFile) } diff --git a/test/syscalls/linux/eventfd.cc b/test/syscalls/linux/eventfd.cc index 367682c3d..927001eee 100644 --- a/test/syscalls/linux/eventfd.cc +++ b/test/syscalls/linux/eventfd.cc @@ -132,6 +132,31 @@ TEST(EventfdTest, BigWriteBigRead) { EXPECT_EQ(l[0], 1); } +TEST(EventfdTest, SpliceFromPipePartialSucceeds) { + int pipes[2]; + ASSERT_THAT(pipe2(pipes, O_NONBLOCK), SyscallSucceeds()); + const FileDescriptor pipe_rfd(pipes[0]); + const FileDescriptor pipe_wfd(pipes[1]); + constexpr uint64_t kVal{1}; + + FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK)); + + uint64_t event_array[2]; + event_array[0] = kVal; + event_array[1] = kVal; + ASSERT_THAT(write(pipe_wfd.get(), event_array, sizeof(event_array)), + SyscallSucceedsWithValue(sizeof(event_array))); + EXPECT_THAT(splice(pipe_rfd.get(), /*__offin=*/nullptr, efd.get(), + /*__offout=*/nullptr, sizeof(event_array[0]) + 1, + SPLICE_F_NONBLOCK), + SyscallSucceedsWithValue(sizeof(event_array[0]))); + + uint64_t val; + ASSERT_THAT(read(efd.get(), &val, sizeof(val)), + SyscallSucceedsWithValue(sizeof(val))); + EXPECT_EQ(val, kVal); +} + // NotifyNonZero is inherently racy, so random save is disabled. TEST(EventfdTest, NotifyNonZero_NoRandomSave) { // Waits will time out at 10 seconds. -- cgit v1.2.3 From 51b783505b1ec164b02b48a0fd234509fba01a73 Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Wed, 29 Jan 2020 15:41:51 -0800 Subject: Add support for TCP_DEFER_ACCEPT. PiperOrigin-RevId: 292233574 --- pkg/sentry/socket/netstack/netstack.go | 22 ++++ pkg/tcpip/tcpip.go | 6 ++ pkg/tcpip/transport/tcp/BUILD | 1 + pkg/tcpip/transport/tcp/accept.go | 25 ++--- pkg/tcpip/transport/tcp/connect.go | 53 +++++++++- pkg/tcpip/transport/tcp/endpoint.go | 26 ++++- pkg/tcpip/transport/tcp/forwarder.go | 4 +- pkg/tcpip/transport/tcp/tcp_test.go | 126 ++++++++++++++++++++++ test/syscalls/linux/socket_inet_loopback.cc | 158 ++++++++++++++++++++++++++++ test/syscalls/linux/tcp_socket.cc | 53 ++++++++++ 10 files changed, 451 insertions(+), 23 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 8619cc506..049d04bf2 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -1260,6 +1260,18 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa return int32(time.Duration(v) / time.Second), nil + case linux.TCP_DEFER_ACCEPT: + if outLen < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + + var v tcpip.TCPDeferAcceptOption + if err := ep.GetSockOpt(&v); err != nil { + return nil, syserr.TranslateNetstackError(err) + } + + return int32(time.Duration(v) / time.Second), nil + default: emitUnimplementedEventTCP(t, name) } @@ -1713,6 +1725,16 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) * v := usermem.ByteOrder.Uint32(optVal) return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.TCPLingerTimeoutOption(time.Second * time.Duration(v)))) + case linux.TCP_DEFER_ACCEPT: + if len(optVal) < sizeOfInt32 { + return syserr.ErrInvalidArgument + } + v := int32(usermem.ByteOrder.Uint32(optVal)) + if v < 0 { + v = 0 + } + return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.TCPDeferAcceptOption(time.Second * time.Duration(v)))) + case linux.TCP_REPAIR_OPTIONS: t.Kernel().EmitUnimplementedEvent(t) diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 59c9b3fb0..0fa141d58 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -626,6 +626,12 @@ type TCPLingerTimeoutOption time.Duration // before being marked closed. type TCPTimeWaitTimeoutOption time.Duration +// TCPDeferAcceptOption is used by SetSockOpt/GetSockOpt to allow a +// accept to return a completed connection only when there is data to be +// read. This usually means the listening socket will drop the final ACK +// for a handshake till the specified timeout until a segment with data arrives. +type TCPDeferAcceptOption time.Duration + // MulticastTTLOption is used by SetSockOpt/GetSockOpt to control the default // TTL value for multicast messages. The default is 1. type MulticastTTLOption uint8 diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD index 4acd9fb9a..7b4a87a2d 100644 --- a/pkg/tcpip/transport/tcp/BUILD +++ b/pkg/tcpip/transport/tcp/BUILD @@ -57,6 +57,7 @@ go_library( imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"], visibility = ["//visibility:public"], deps = [ + "//pkg/log", "//pkg/rand", "//pkg/sleep", "//pkg/sync", diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go index d469758eb..6101f2945 100644 --- a/pkg/tcpip/transport/tcp/accept.go +++ b/pkg/tcpip/transport/tcp/accept.go @@ -222,13 +222,13 @@ func (l *listenContext) isCookieValid(id stack.TransportEndpointID, cookie seqnu // createConnectingEndpoint creates a new endpoint in a connecting state, with // the connection parameters given by the arguments. -func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, irs seqnum.Value, rcvdSynOpts *header.TCPSynOptions) (*endpoint, *tcpip.Error) { +func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, irs seqnum.Value, rcvdSynOpts *header.TCPSynOptions, queue *waiter.Queue) (*endpoint, *tcpip.Error) { // Create a new endpoint. netProto := l.netProto if netProto == 0 { netProto = s.route.NetProto } - n := newEndpoint(l.stack, netProto, nil) + n := newEndpoint(l.stack, netProto, queue) n.v6only = l.v6only n.ID = s.id n.boundNICID = s.route.NICID() @@ -273,16 +273,17 @@ func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, i // createEndpoint creates a new endpoint in connected state and then performs // the TCP 3-way handshake. -func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *header.TCPSynOptions) (*endpoint, *tcpip.Error) { +func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *header.TCPSynOptions, queue *waiter.Queue) (*endpoint, *tcpip.Error) { // Create new endpoint. irs := s.sequenceNumber isn := generateSecureISN(s.id, l.stack.Seed()) - ep, err := l.createConnectingEndpoint(s, isn, irs, opts) + ep, err := l.createConnectingEndpoint(s, isn, irs, opts, queue) if err != nil { return nil, err } // listenEP is nil when listenContext is used by tcp.Forwarder. + deferAccept := time.Duration(0) if l.listenEP != nil { l.listenEP.mu.Lock() if l.listenEP.EndpointState() != StateListen { @@ -290,13 +291,12 @@ func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *head return nil, tcpip.ErrConnectionAborted } l.addPendingEndpoint(ep) + deferAccept = l.listenEP.deferAccept l.listenEP.mu.Unlock() } // Perform the 3-way handshake. - h := newHandshake(ep, seqnum.Size(ep.initialReceiveWindow())) - - h.resetToSynRcvd(isn, irs, opts) + h := newPassiveHandshake(ep, seqnum.Size(ep.initialReceiveWindow()), isn, irs, opts, deferAccept) if err := h.execute(); err != nil { ep.Close() if l.listenEP != nil { @@ -377,16 +377,14 @@ func (e *endpoint) handleSynSegment(ctx *listenContext, s *segment, opts *header defer e.decSynRcvdCount() defer s.decRef() - n, err := ctx.createEndpointAndPerformHandshake(s, opts) + n, err := ctx.createEndpointAndPerformHandshake(s, opts, &waiter.Queue{}) if err != nil { e.stack.Stats().TCP.FailedConnectionAttempts.Increment() e.stats.FailedConnectionAttempts.Increment() return } ctx.removePendingEndpoint(n) - // Start the protocol goroutine. - wq := &waiter.Queue{} - n.startAcceptedLoop(wq) + n.startAcceptedLoop() e.stack.Stats().TCP.PassiveConnectionOpenings.Increment() e.deliverAccepted(n) @@ -546,7 +544,7 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) { rcvdSynOptions.TSEcr = s.parsedOptions.TSEcr } - n, err := ctx.createConnectingEndpoint(s, s.ackNumber-1, s.sequenceNumber-1, rcvdSynOptions) + n, err := ctx.createConnectingEndpoint(s, s.ackNumber-1, s.sequenceNumber-1, rcvdSynOptions, &waiter.Queue{}) if err != nil { e.stack.Stats().TCP.FailedConnectionAttempts.Increment() e.stats.FailedConnectionAttempts.Increment() @@ -576,8 +574,7 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) { // space available in the backlog. // Start the protocol goroutine. - wq := &waiter.Queue{} - n.startAcceptedLoop(wq) + n.startAcceptedLoop() e.stack.Stats().TCP.PassiveConnectionOpenings.Increment() go e.deliverAccepted(n) } diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index 4e3c5419c..9ff7ac261 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -86,6 +86,19 @@ type handshake struct { // rcvWndScale is the receive window scale, as defined in RFC 1323. rcvWndScale int + + // startTime is the time at which the first SYN/SYN-ACK was sent. + startTime time.Time + + // deferAccept if non-zero will drop the final ACK for a passive + // handshake till an ACK segment with data is received or the timeout is + // hit. + deferAccept time.Duration + + // acked is true if the the final ACK for a 3-way handshake has + // been received. This is required to stop retransmitting the + // original SYN-ACK when deferAccept is enabled. + acked bool } func newHandshake(ep *endpoint, rcvWnd seqnum.Size) handshake { @@ -112,6 +125,12 @@ func newHandshake(ep *endpoint, rcvWnd seqnum.Size) handshake { return h } +func newPassiveHandshake(ep *endpoint, rcvWnd seqnum.Size, isn, irs seqnum.Value, opts *header.TCPSynOptions, deferAccept time.Duration) handshake { + h := newHandshake(ep, rcvWnd) + h.resetToSynRcvd(isn, irs, opts, deferAccept) + return h +} + // FindWndScale determines the window scale to use for the given maximum window // size. func FindWndScale(wnd seqnum.Size) int { @@ -181,7 +200,7 @@ func (h *handshake) effectiveRcvWndScale() uint8 { // resetToSynRcvd resets the state of the handshake object to the SYN-RCVD // state. -func (h *handshake) resetToSynRcvd(iss seqnum.Value, irs seqnum.Value, opts *header.TCPSynOptions) { +func (h *handshake) resetToSynRcvd(iss seqnum.Value, irs seqnum.Value, opts *header.TCPSynOptions, deferAccept time.Duration) { h.active = false h.state = handshakeSynRcvd h.flags = header.TCPFlagSyn | header.TCPFlagAck @@ -189,6 +208,7 @@ func (h *handshake) resetToSynRcvd(iss seqnum.Value, irs seqnum.Value, opts *hea h.ackNum = irs + 1 h.mss = opts.MSS h.sndWndScale = opts.WS + h.deferAccept = deferAccept h.ep.mu.Lock() h.ep.setEndpointState(StateSynRecv) h.ep.mu.Unlock() @@ -352,6 +372,14 @@ func (h *handshake) synRcvdState(s *segment) *tcpip.Error { // We have previously received (and acknowledged) the peer's SYN. If the // peer acknowledges our SYN, the handshake is completed. if s.flagIsSet(header.TCPFlagAck) { + // If deferAccept is not zero and this is a bare ACK and the + // timeout is not hit then drop the ACK. + if h.deferAccept != 0 && s.data.Size() == 0 && time.Since(h.startTime) < h.deferAccept { + h.acked = true + h.ep.stack.Stats().DroppedPackets.Increment() + return nil + } + // If the timestamp option is negotiated and the segment does // not carry a timestamp option then the segment must be dropped // as per https://tools.ietf.org/html/rfc7323#section-3.2. @@ -365,10 +393,16 @@ func (h *handshake) synRcvdState(s *segment) *tcpip.Error { h.ep.updateRecentTimestamp(s.parsedOptions.TSVal, h.ackNum, s.sequenceNumber) } h.state = handshakeCompleted + h.ep.mu.Lock() h.ep.transitionToStateEstablishedLocked(h) + // If the segment has data then requeue it for the receiver + // to process it again once main loop is started. + if s.data.Size() > 0 { + s.incRef() + h.ep.enqueueSegment(s) + } h.ep.mu.Unlock() - return nil } @@ -471,6 +505,7 @@ func (h *handshake) execute() *tcpip.Error { } } + h.startTime = time.Now() // Initialize the resend timer. resendWaker := sleep.Waker{} timeOut := time.Duration(time.Second) @@ -524,11 +559,21 @@ func (h *handshake) execute() *tcpip.Error { switch index, _ := s.Fetch(true); index { case wakerForResend: timeOut *= 2 - if timeOut > 60*time.Second { + if timeOut > MaxRTO { return tcpip.ErrTimeout } rt.Reset(timeOut) - h.ep.sendSynTCP(&h.ep.route, h.ep.ID, h.ep.ttl, h.ep.sendTOS, h.flags, h.iss, h.ackNum, h.rcvWnd, synOpts) + // Resend the SYN/SYN-ACK only if the following conditions hold. + // - It's an active handshake (deferAccept does not apply) + // - It's a passive handshake and we have not yet got the final-ACK. + // - It's a passive handshake and we got an ACK but deferAccept is + // enabled and we are now past the deferAccept duration. + // The last is required to provide a way for the peer to complete + // the connection with another ACK or data (as ACKs are never + // retransmitted on their own). + if h.active || !h.acked || h.deferAccept != 0 && time.Since(h.startTime) > h.deferAccept { + h.ep.sendSynTCP(&h.ep.route, h.ep.ID, h.ep.ttl, h.ep.sendTOS, h.flags, h.iss, h.ackNum, h.rcvWnd, synOpts) + } case wakerForNotification: n := h.ep.fetchNotifications() diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 13718ff55..8d52414b7 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -498,6 +498,13 @@ type endpoint struct { // without any data being acked. userTimeout time.Duration + // deferAccept if non-zero specifies a user specified time during + // which the final ACK of a handshake will be dropped provided the + // ACK is a bare ACK and carries no data. If the timeout is crossed then + // the bare ACK is accepted and the connection is delivered to the + // listener. + deferAccept time.Duration + // pendingAccepted is a synchronization primitive used to track number // of connections that are queued up to be delivered to the accepted // channel. We use this to ensure that all goroutines blocked on writing @@ -1574,6 +1581,15 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { e.mu.Unlock() return nil + case tcpip.TCPDeferAcceptOption: + e.mu.Lock() + if time.Duration(v) > MaxRTO { + v = tcpip.TCPDeferAcceptOption(MaxRTO) + } + e.deferAccept = time.Duration(v) + e.mu.Unlock() + return nil + default: return nil } @@ -1798,6 +1814,12 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { e.mu.Unlock() return nil + case *tcpip.TCPDeferAcceptOption: + e.mu.Lock() + *o = tcpip.TCPDeferAcceptOption(e.deferAccept) + e.mu.Unlock() + return nil + default: return tcpip.ErrUnknownProtocolOption } @@ -2149,9 +2171,8 @@ func (e *endpoint) listen(backlog int) *tcpip.Error { // startAcceptedLoop sets up required state and starts a goroutine with the // main loop for accepted connections. -func (e *endpoint) startAcceptedLoop(waiterQueue *waiter.Queue) { +func (e *endpoint) startAcceptedLoop() { e.mu.Lock() - e.waiterQueue = waiterQueue e.workerRunning = true e.mu.Unlock() wakerInitDone := make(chan struct{}) @@ -2177,7 +2198,6 @@ func (e *endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) { default: return nil, nil, tcpip.ErrWouldBlock } - return n, n.waiterQueue, nil } diff --git a/pkg/tcpip/transport/tcp/forwarder.go b/pkg/tcpip/transport/tcp/forwarder.go index 7eb613be5..c9ee5bf06 100644 --- a/pkg/tcpip/transport/tcp/forwarder.go +++ b/pkg/tcpip/transport/tcp/forwarder.go @@ -157,13 +157,13 @@ func (r *ForwarderRequest) CreateEndpoint(queue *waiter.Queue) (tcpip.Endpoint, TSVal: r.synOptions.TSVal, TSEcr: r.synOptions.TSEcr, SACKPermitted: r.synOptions.SACKPermitted, - }) + }, queue) if err != nil { return nil, err } // Start the protocol goroutine. - ep.startAcceptedLoop(queue) + ep.startAcceptedLoop() return ep, nil } diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index df2fb1071..a12336d47 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -6787,3 +6787,129 @@ func TestIncreaseWindowOnBufferResize(t *testing.T) { ), ) } + +func TestTCPDeferAccept(t *testing.T) { + c := context.New(t, defaultMTU) + defer c.Cleanup() + + c.Create(-1) + + if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { + t.Fatal("Bind failed:", err) + } + + if err := c.EP.Listen(10); err != nil { + t.Fatal("Listen failed:", err) + } + + const tcpDeferAccept = 1 * time.Second + if err := c.EP.SetSockOpt(tcpip.TCPDeferAcceptOption(tcpDeferAccept)); err != nil { + t.Fatalf("c.EP.SetSockOpt(TCPDeferAcceptOption(%s) failed: %v", tcpDeferAccept, err) + } + + irs, iss := executeHandshake(t, c, context.TestPort, false /* synCookiesInUse */) + + if _, _, err := c.EP.Accept(); err != tcpip.ErrWouldBlock { + t.Fatalf("c.EP.Accept() returned unexpected error got: %v, want: %s", err, tcpip.ErrWouldBlock) + } + + // Send data. This should result in an acceptable endpoint. + c.SendPacket([]byte{1, 2, 3, 4}, &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagAck, + SeqNum: irs + 1, + AckNum: iss + 1, + }) + + // Receive ACK for the data we sent. + checker.IPv4(t, c.GetPacket(), checker.TCP( + checker.DstPort(context.TestPort), + checker.TCPFlags(header.TCPFlagAck), + checker.SeqNum(uint32(iss+1)), + checker.AckNum(uint32(irs+5)))) + + // Give a bit of time for the socket to be delivered to the accept queue. + time.Sleep(50 * time.Millisecond) + aep, _, err := c.EP.Accept() + if err != nil { + t.Fatalf("c.EP.Accept() returned unexpected error got: %v, want: nil", err) + } + + aep.Close() + // Closing aep without reading the data should trigger a RST. + checker.IPv4(t, c.GetPacket(), checker.TCP( + checker.DstPort(context.TestPort), + checker.TCPFlags(header.TCPFlagRst|header.TCPFlagAck), + checker.SeqNum(uint32(iss+1)), + checker.AckNum(uint32(irs+5)))) +} + +func TestTCPDeferAcceptTimeout(t *testing.T) { + c := context.New(t, defaultMTU) + defer c.Cleanup() + + c.Create(-1) + + if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { + t.Fatal("Bind failed:", err) + } + + if err := c.EP.Listen(10); err != nil { + t.Fatal("Listen failed:", err) + } + + const tcpDeferAccept = 1 * time.Second + if err := c.EP.SetSockOpt(tcpip.TCPDeferAcceptOption(tcpDeferAccept)); err != nil { + t.Fatalf("c.EP.SetSockOpt(TCPDeferAcceptOption(%s) failed: %v", tcpDeferAccept, err) + } + + irs, iss := executeHandshake(t, c, context.TestPort, false /* synCookiesInUse */) + + if _, _, err := c.EP.Accept(); err != tcpip.ErrWouldBlock { + t.Fatalf("c.EP.Accept() returned unexpected error got: %v, want: %s", err, tcpip.ErrWouldBlock) + } + + // Sleep for a little of the tcpDeferAccept timeout. + time.Sleep(tcpDeferAccept + 100*time.Millisecond) + + // On timeout expiry we should get a SYN-ACK retransmission. + checker.IPv4(t, c.GetPacket(), checker.TCP( + checker.SrcPort(context.StackPort), + checker.DstPort(context.TestPort), + checker.TCPFlags(header.TCPFlagAck|header.TCPFlagSyn), + checker.AckNum(uint32(irs)+1))) + + // Send data. This should result in an acceptable endpoint. + c.SendPacket([]byte{1, 2, 3, 4}, &context.Headers{ + SrcPort: context.TestPort, + DstPort: context.StackPort, + Flags: header.TCPFlagAck, + SeqNum: irs + 1, + AckNum: iss + 1, + }) + + // Receive ACK for the data we sent. + checker.IPv4(t, c.GetPacket(), checker.TCP( + checker.SrcPort(context.StackPort), + checker.DstPort(context.TestPort), + checker.TCPFlags(header.TCPFlagAck), + checker.SeqNum(uint32(iss+1)), + checker.AckNum(uint32(irs+5)))) + + // Give sometime for the endpoint to be delivered to the accept queue. + time.Sleep(50 * time.Millisecond) + aep, _, err := c.EP.Accept() + if err != nil { + t.Fatalf("c.EP.Accept() returned unexpected error got: %v, want: nil", err) + } + + aep.Close() + // Closing aep without reading the data should trigger a RST. + checker.IPv4(t, c.GetPacket(), checker.TCP( + checker.SrcPort(context.StackPort), + checker.DstPort(context.TestPort), + checker.TCPFlags(header.TCPFlagRst|header.TCPFlagAck), + checker.SeqNum(uint32(iss+1)), + checker.AckNum(uint32(irs+5)))) +} diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 2f9821555..3bf7081b9 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -828,6 +828,164 @@ TEST_P(SocketInetLoopbackTest, AcceptedInheritsTCPUserTimeout) { EXPECT_EQ(get, kUserTimeout); } +// TODO(gvisor.dev/issue/1688): Partially completed passive endpoints are not +// saved. Enable S/R once issue is fixed. +TEST_P(SocketInetLoopbackTest, TCPDeferAccept_NoRandomSave) { + // TODO(gvisor.dev/issue/1688): Partially completed passive endpoints are not + // saved. Enable S/R issue is fixed. + DisableSave ds; + + auto const& param = GetParam(); + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + // Create the listening socket. + const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast(&listen_addr), &addrlen), + SyscallSucceeds()); + + const uint16_t port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + // Set the TCP_DEFER_ACCEPT on the listening socket. + constexpr int kTCPDeferAccept = 3; + ASSERT_THAT(setsockopt(listen_fd.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, + &kTCPDeferAccept, sizeof(kTCPDeferAccept)), + SyscallSucceeds()); + + // Connect to the listening socket. + FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(), + reinterpret_cast(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + + // Set the listening socket to nonblock so that we can verify that there is no + // connection in queue despite the connect above succeeding since the peer has + // sent no data and TCP_DEFER_ACCEPT is set on the listening socket. Set the + // FD to O_NONBLOCK. + int opts; + ASSERT_THAT(opts = fcntl(listen_fd.get(), F_GETFL), SyscallSucceeds()); + opts |= O_NONBLOCK; + ASSERT_THAT(fcntl(listen_fd.get(), F_SETFL, opts), SyscallSucceeds()); + + ASSERT_THAT(accept(listen_fd.get(), nullptr, nullptr), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Set FD back to blocking. + opts &= ~O_NONBLOCK; + ASSERT_THAT(fcntl(listen_fd.get(), F_SETFL, opts), SyscallSucceeds()); + + // Now write some data to the socket. + int data = 0; + ASSERT_THAT(RetryEINTR(write)(conn_fd.get(), &data, sizeof(data)), + SyscallSucceedsWithValue(sizeof(data))); + + // This should now cause the connection to complete and be delivered to the + // accept socket. + + // Accept the connection. + auto accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + + // Verify that the accepted socket returns the data written. + int get = -1; + ASSERT_THAT(RetryEINTR(recv)(accepted.get(), &get, sizeof(get), 0), + SyscallSucceedsWithValue(sizeof(get))); + + EXPECT_EQ(get, data); +} + +// TODO(gvisor.dev/issue/1688): Partially completed passive endpoints are not +// saved. Enable S/R once issue is fixed. +TEST_P(SocketInetLoopbackTest, TCPDeferAcceptTimeout_NoRandomSave) { + // TODO(gvisor.dev/issue/1688): Partially completed passive endpoints are not + // saved. Enable S/R once issue is fixed. + DisableSave ds; + + auto const& param = GetParam(); + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + // Create the listening socket. + const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast(&listen_addr), &addrlen), + SyscallSucceeds()); + + const uint16_t port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + // Set the TCP_DEFER_ACCEPT on the listening socket. + constexpr int kTCPDeferAccept = 3; + ASSERT_THAT(setsockopt(listen_fd.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, + &kTCPDeferAccept, sizeof(kTCPDeferAccept)), + SyscallSucceeds()); + + // Connect to the listening socket. + FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(), + reinterpret_cast(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + + // Set the listening socket to nonblock so that we can verify that there is no + // connection in queue despite the connect above succeeding since the peer has + // sent no data and TCP_DEFER_ACCEPT is set on the listening socket. Set the + // FD to O_NONBLOCK. + int opts; + ASSERT_THAT(opts = fcntl(listen_fd.get(), F_GETFL), SyscallSucceeds()); + opts |= O_NONBLOCK; + ASSERT_THAT(fcntl(listen_fd.get(), F_SETFL, opts), SyscallSucceeds()); + + // Verify that there is no acceptable connection before TCP_DEFER_ACCEPT + // timeout is hit. + absl::SleepFor(absl::Seconds(kTCPDeferAccept - 1)); + ASSERT_THAT(accept(listen_fd.get(), nullptr, nullptr), + SyscallFailsWithErrno(EWOULDBLOCK)); + + // Set FD back to blocking. + opts &= ~O_NONBLOCK; + ASSERT_THAT(fcntl(listen_fd.get(), F_SETFL, opts), SyscallSucceeds()); + + // Now sleep for a little over the TCP_DEFER_ACCEPT duration. When the timeout + // is hit a SYN-ACK should be retransmitted by the listener as a last ditch + // attempt to complete the connection with or without data. + absl::SleepFor(absl::Seconds(2)); + + // Verify that we have a connection that can be accepted even though no + // data was written. + auto accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); +} + INSTANTIATE_TEST_SUITE_P( All, SocketInetLoopbackTest, ::testing::Values( diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc index 33a5ac66c..525ccbd88 100644 --- a/test/syscalls/linux/tcp_socket.cc +++ b/test/syscalls/linux/tcp_socket.cc @@ -1286,6 +1286,59 @@ TEST_P(SimpleTcpSocketTest, SetTCPUserTimeout) { EXPECT_EQ(get, kTCPUserTimeout); } +TEST_P(SimpleTcpSocketTest, SetTCPDeferAcceptNeg) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + // -ve TCP_DEFER_ACCEPT is same as setting it to zero. + constexpr int kNeg = -1; + EXPECT_THAT( + setsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &kNeg, sizeof(kNeg)), + SyscallSucceeds()); + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, 0); +} + +TEST_P(SimpleTcpSocketTest, GetTCPDeferAcceptDefault) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, 0); +} + +TEST_P(SimpleTcpSocketTest, SetTCPDeferAcceptGreaterThanZero) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + // kTCPDeferAccept is in seconds. + // NOTE: linux translates seconds to # of retries and back from + // #of retries to seconds. Which means only certain values + // translate back exactly. That's why we use 3 here, a value of + // 5 will result in us getting back 7 instead of 5 in the + // getsockopt. + constexpr int kTCPDeferAccept = 3; + ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, + &kTCPDeferAccept, sizeof(kTCPDeferAccept)), + SyscallSucceeds()); + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &get, &get_len), + SyscallSucceeds()); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kTCPDeferAccept); +} + INSTANTIATE_TEST_SUITE_P(AllInetTests, SimpleTcpSocketTest, ::testing::Values(AF_INET, AF_INET6)); -- cgit v1.2.3 From 0ade523f061d25c2b4abeba9c74e879aae2ce376 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Wed, 29 Jan 2020 16:26:28 -0800 Subject: Fix iptables tests that were broken by rename. The name of the runner binary target changed from "runner" to "runner-image", causing iptables tests to fail. PiperOrigin-RevId: 292242263 --- scripts/iptables_tests.sh | 4 ++-- test/iptables/README.md | 2 +- test/iptables/iptables_test.go | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/scripts/iptables_tests.sh b/scripts/iptables_tests.sh index c47cbd675..3069d8628 100755 --- a/scripts/iptables_tests.sh +++ b/scripts/iptables_tests.sh @@ -19,9 +19,9 @@ source $(dirname $0)/common.sh install_runsc_for_test iptables # Build the docker image for the test. -run //test/iptables/runner --norun +run //test/iptables/runner-image --norun # TODO(gvisor.dev/issue/170): Also test this on runsc once iptables are better # supported test //test/iptables:iptables_test "--test_arg=--runtime=runc" \ - "--test_arg=--image=bazel/test/iptables/runner:runner" + "--test_arg=--image=bazel/test/iptables/runner:runner-image" diff --git a/test/iptables/README.md b/test/iptables/README.md index 9f8e34420..8f61b4c41 100644 --- a/test/iptables/README.md +++ b/test/iptables/README.md @@ -28,7 +28,7 @@ Your test is now runnable with bazel! Build the testing Docker container: ```bash -$ bazel run //test/iptables/runner -- --norun +$ bazel run //test/iptables/runner-image -- --norun ``` Run an individual test via: diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 679a29bef..41909582a 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -30,7 +30,7 @@ import ( const timeout = 18 * time.Second -var image = flag.String("image", "bazel/test/iptables/runner:runner", "image to run tests in") +var image = flag.String("image", "bazel/test/iptables/runner:runner-image", "image to run tests in") type result struct { output string -- cgit v1.2.3 From ede8dfab3760afc8063c3418f217e52f7ec70d42 Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Thu, 30 Jan 2020 09:13:36 -0800 Subject: Enforce splice offset limits Splice must not allow negative offsets. Writes also must not allow offset + size to overflow int64. Reads are similarly broken, but not just in splice (b/148095030). Reported-by: syzbot+0e1ff0b95fb2859b4190@syzkaller.appspotmail.com PiperOrigin-RevId: 292361208 --- pkg/sentry/fs/tmpfs/inode_file.go | 10 ++++-- pkg/sentry/syscalls/linux/sys_splice.go | 16 ++++------ test/syscalls/linux/splice.cc | 56 +++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 12 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fs/tmpfs/inode_file.go b/pkg/sentry/fs/tmpfs/inode_file.go index dabc10662..25abbc151 100644 --- a/pkg/sentry/fs/tmpfs/inode_file.go +++ b/pkg/sentry/fs/tmpfs/inode_file.go @@ -17,6 +17,7 @@ package tmpfs import ( "fmt" "io" + "math" "time" "gvisor.dev/gvisor/pkg/abi/linux" @@ -444,10 +445,15 @@ func (rw *fileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) defer rw.f.dataMu.Unlock() // Compute the range to write. - end := fs.WriteEndOffset(rw.offset, int64(srcs.NumBytes())) - if end == rw.offset { // srcs.NumBytes() == 0? + if srcs.NumBytes() == 0 { + // Nothing to do. return 0, nil } + end := fs.WriteEndOffset(rw.offset, int64(srcs.NumBytes())) + if end == math.MaxInt64 { + // Overflow. + return 0, syserror.EINVAL + } // Check if seals prevent either file growth or all writes. switch { diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go index f43d6c155..fd642834b 100644 --- a/pkg/sentry/syscalls/linux/sys_splice.go +++ b/pkg/sentry/syscalls/linux/sys_splice.go @@ -25,6 +25,10 @@ import ( // doSplice implements a blocking splice operation. func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonBlocking bool) (int64, error) { + if opts.Length < 0 || opts.SrcStart < 0 || opts.DstStart < 0 { + return 0, syserror.EINVAL + } + var ( total int64 n int64 @@ -82,11 +86,6 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc offsetAddr := args[2].Pointer() count := int64(args[3].SizeT()) - // Don't send a negative number of bytes. - if count < 0 { - return 0, nil, syserror.EINVAL - } - // Get files. inFile := t.GetFile(inFD) if inFile == nil { @@ -136,11 +135,6 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc return 0, nil, err } - // The offset must be valid. - if offset < 0 { - return 0, nil, syserror.EINVAL - } - // Do the splice. n, err = doSplice(t, outFile, inFile, fs.SpliceOpts{ Length: count, @@ -227,6 +221,7 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if _, err := t.CopyIn(outOffset, &offset); err != nil { return 0, nil, err } + // Use the destination offset. opts.DstOffset = true opts.DstStart = offset @@ -244,6 +239,7 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if _, err := t.CopyIn(inOffset, &offset); err != nil { return 0, nil, err } + // Use the source offset. opts.SrcOffset = true opts.SrcStart = offset diff --git a/test/syscalls/linux/splice.cc b/test/syscalls/linux/splice.cc index 85232cb1f..faa1247f6 100644 --- a/test/syscalls/linux/splice.cc +++ b/test/syscalls/linux/splice.cc @@ -60,6 +60,62 @@ TEST(SpliceTest, TwoRegularFiles) { SyscallFailsWithErrno(EINVAL)); } +int memfd_create(const std::string& name, unsigned int flags) { + return syscall(__NR_memfd_create, name.c_str(), flags); +} + +TEST(SpliceTest, NegativeOffset) { + // Create a new pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + // Fill the pipe. + std::vector buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + + // Open the output file as write only. + int fd; + EXPECT_THAT(fd = memfd_create("negative", 0), SyscallSucceeds()); + const FileDescriptor out_fd(fd); + + loff_t out_offset = 0xffffffffffffffffull; + constexpr int kSize = 2; + EXPECT_THAT(splice(rfd.get(), nullptr, out_fd.get(), &out_offset, kSize, 0), + SyscallFailsWithErrno(EINVAL)); +} + +// Write offset + size overflows int64. +// +// This is a regression test for b/148041624. +TEST(SpliceTest, WriteOverflow) { + // Create a new pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + // Fill the pipe. + std::vector buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + + // Open the output file. + int fd; + EXPECT_THAT(fd = memfd_create("overflow", 0), SyscallSucceeds()); + const FileDescriptor out_fd(fd); + + // out_offset + kSize overflows INT64_MAX. + loff_t out_offset = 0x7ffffffffffffffeull; + constexpr int kSize = 3; + EXPECT_THAT(splice(rfd.get(), nullptr, out_fd.get(), &out_offset, kSize, 0), + SyscallFailsWithErrno(EINVAL)); +} + TEST(SpliceTest, SamePipe) { // Create a new pipe. int fds[2]; -- cgit v1.2.3 From 4ee64a248ec16fcc9e526a457a66648546611bfb Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Thu, 30 Jan 2020 11:48:36 -0800 Subject: Fix for panic in endpoint.Close(). When sending a RST on shutdown we need to double check the state after acquiring the work mutex as the endpoint could have transitioned out of a connected state from the time we checked it and we acquired the workMutex. I added two tests but sadly neither reproduce the panic. I am going to leave the tests in as they are good to have anyway. PiperOrigin-RevId: 292393800 --- pkg/tcpip/transport/tcp/BUILD | 1 + pkg/tcpip/transport/tcp/endpoint.go | 10 ++++- pkg/tcpip/transport/tcp/tcp_test.go | 55 ++++++++++++++++++++++++++++ test/syscalls/linux/BUILD | 1 + test/syscalls/linux/socket_ip_tcp_generic.cc | 33 +++++++++++++++++ 5 files changed, 98 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD index 7b4a87a2d..272e8f570 100644 --- a/pkg/tcpip/transport/tcp/BUILD +++ b/pkg/tcpip/transport/tcp/BUILD @@ -91,6 +91,7 @@ go_test( tags = ["flaky"], deps = [ ":tcp", + "//pkg/sync", "//pkg/tcpip", "//pkg/tcpip/buffer", "//pkg/tcpip/checker", diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 8d52414b7..b5a8e15ee 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -2047,8 +2047,14 @@ func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error { // work mutex is available. if e.workMu.TryLock() { e.mu.Lock() - e.resetConnectionLocked(tcpip.ErrConnectionAborted) - e.notifyProtocolGoroutine(notifyTickleWorker) + // We need to double check here to make + // sure worker has not transitioned the + // endpoint out of a connected state + // before trying to send a reset. + if e.EndpointState().connected() { + e.resetConnectionLocked(tcpip.ErrConnectionAborted) + e.notifyProtocolGoroutine(notifyTickleWorker) + } e.mu.Unlock() e.workMu.Unlock() } else { diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index a12336d47..2c1505067 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -21,6 +21,7 @@ import ( "testing" "time" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/checker" @@ -6913,3 +6914,57 @@ func TestTCPDeferAcceptTimeout(t *testing.T) { checker.SeqNum(uint32(iss+1)), checker.AckNum(uint32(irs+5)))) } + +func TestResetDuringClose(t *testing.T) { + c := context.New(t, defaultMTU) + defer c.Cleanup() + + iss := seqnum.Value(789) + c.CreateConnected(iss, 30000, -1 /* epRecvBuf */) + // Send some data to make sure there is some unread + // data to trigger a reset on c.Close. + irs := c.IRS + c.SendPacket([]byte{1, 2, 3, 4}, &context.Headers{ + SrcPort: context.TestPort, + DstPort: c.Port, + Flags: header.TCPFlagAck, + SeqNum: iss.Add(1), + AckNum: irs.Add(1), + RcvWnd: 30000, + }) + + // Receive ACK for the data we sent. + checker.IPv4(t, c.GetPacket(), checker.TCP( + checker.DstPort(context.TestPort), + checker.TCPFlags(header.TCPFlagAck), + checker.SeqNum(uint32(irs.Add(1))), + checker.AckNum(uint32(iss.Add(5))))) + + // Close in a separate goroutine so that we can trigger + // a race with the RST we send below. This should not + // panic due to the route being released depeding on + // whether Close() sends an active RST or the RST sent + // below is processed by the worker first. + var wg sync.WaitGroup + + wg.Add(1) + go func() { + defer wg.Done() + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: c.Port, + SeqNum: iss.Add(5), + AckNum: c.IRS.Add(5), + RcvWnd: 30000, + Flags: header.TCPFlagRst, + }) + }() + + wg.Add(1) + go func() { + defer wg.Done() + c.EP.Close() + }() + + wg.Wait() +} diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 74bf068ec..7958fd0d7 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -2173,6 +2173,7 @@ cc_library( ":socket_test_util", "//test/util:test_util", "//test/util:thread_util", + "@com_google_absl//absl/memory", "@com_google_absl//absl/time", "@com_google_googletest//:gtest", ], diff --git a/test/syscalls/linux/socket_ip_tcp_generic.cc b/test/syscalls/linux/socket_ip_tcp_generic.cc index 57ce8e169..27779e47c 100644 --- a/test/syscalls/linux/socket_ip_tcp_generic.cc +++ b/test/syscalls/linux/socket_ip_tcp_generic.cc @@ -24,6 +24,7 @@ #include #include "gtest/gtest.h" +#include "absl/memory/memory.h" #include "absl/time/clock.h" #include "absl/time/time.h" #include "test/syscalls/linux/socket_test_util.h" @@ -875,5 +876,37 @@ TEST_P(TCPSocketPairTest, SetTCPUserTimeoutAboveZero) { EXPECT_EQ(get, kAbove); } +TEST_P(TCPSocketPairTest, TCPResetDuringClose_NoRandomSave) { + DisableSave ds; // Too many syscalls. + constexpr int kThreadCount = 1000; + std::unique_ptr instances[kThreadCount]; + for (int i = 0; i < kThreadCount; i++) { + instances[i] = absl::make_unique([&]() { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + ScopedThread t([&]() { + // Close one end to trigger sending of a FIN. + struct pollfd poll_fd = {sockets->second_fd(), POLLIN | POLLHUP, 0}; + // Wait up to 20 seconds for the data. + constexpr int kPollTimeoutMs = 20000; + ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs), + SyscallSucceedsWithValue(1)); + ASSERT_THAT(close(sockets->release_second_fd()), SyscallSucceeds()); + }); + + // Send some data then close. + constexpr char kStr[] = "abc"; + ASSERT_THAT(write(sockets->first_fd(), kStr, 3), + SyscallSucceedsWithValue(3)); + absl::SleepFor(absl::Milliseconds(10)); + ASSERT_THAT(close(sockets->release_first_fd()), SyscallSucceeds()); + t.Join(); + }); + } + for (int i = 0; i < kThreadCount; i++) { + instances[i]->Join(); + } +} + } // namespace testing } // namespace gvisor -- cgit v1.2.3 From 9988cf2eeff596ce519046d80c54d09166f7d84b Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Thu, 30 Jan 2020 14:06:54 -0800 Subject: Wrap all GetSocketPairs() in unnamed namespaces This avoids conflicting definitions of GetSocketPairs() in outer namespace when multiple such cc files are complied for one binary. PiperOrigin-RevId: 292420885 --- test/syscalls/linux/socket_abstract.cc | 2 ++ test/syscalls/linux/socket_filesystem.cc | 2 ++ test/syscalls/linux/socket_ip_tcp_generic_loopback.cc | 2 ++ test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc | 2 ++ test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc | 2 ++ test/syscalls/linux/socket_ip_udp_loopback.cc | 2 ++ test/syscalls/linux/socket_ip_udp_loopback_blocking.cc | 2 ++ test/syscalls/linux/socket_ip_udp_loopback_nonblock.cc | 2 ++ test/syscalls/linux/socket_unix_abstract_nonblock.cc | 2 ++ test/syscalls/linux/socket_unix_blocking_local.cc | 2 ++ test/syscalls/linux/socket_unix_dgram_local.cc | 2 ++ test/syscalls/linux/socket_unix_domain.cc | 2 ++ test/syscalls/linux/socket_unix_filesystem_nonblock.cc | 2 ++ test/syscalls/linux/socket_unix_non_stream_blocking_local.cc | 2 ++ test/syscalls/linux/socket_unix_pair.cc | 2 ++ test/syscalls/linux/socket_unix_pair_nonblock.cc | 2 ++ test/syscalls/linux/socket_unix_seqpacket_local.cc | 2 ++ test/syscalls/linux/socket_unix_stream_blocking_local.cc | 2 ++ test/syscalls/linux/socket_unix_stream_local.cc | 2 ++ test/syscalls/linux/socket_unix_stream_nonblock_local.cc | 2 ++ 20 files changed, 40 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/socket_abstract.cc b/test/syscalls/linux/socket_abstract.cc index 715d87b76..00999f192 100644 --- a/test/syscalls/linux/socket_abstract.cc +++ b/test/syscalls/linux/socket_abstract.cc @@ -23,6 +23,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSocketPairs() { return ApplyVec( @@ -43,5 +44,6 @@ INSTANTIATE_TEST_SUITE_P( AbstractUnixSockets, UnixSocketPairCmsgTest, ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); +} // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_filesystem.cc b/test/syscalls/linux/socket_filesystem.cc index 74e262959..287359363 100644 --- a/test/syscalls/linux/socket_filesystem.cc +++ b/test/syscalls/linux/socket_filesystem.cc @@ -23,6 +23,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSocketPairs() { return ApplyVec( @@ -43,5 +44,6 @@ INSTANTIATE_TEST_SUITE_P( FilesystemUnixSockets, UnixSocketPairCmsgTest, ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); +} // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc b/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc index d11f7cc23..4e79d21f4 100644 --- a/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc +++ b/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc @@ -23,6 +23,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSocketPairs() { return ApplyVecToVec( @@ -39,5 +40,6 @@ INSTANTIATE_TEST_SUITE_P( AllTCPSockets, TCPSocketPairTest, ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); +} // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc b/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc index fcd20102f..f996b93d2 100644 --- a/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc +++ b/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc @@ -23,6 +23,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSocketPairs() { return ApplyVecToVec( @@ -39,5 +40,6 @@ INSTANTIATE_TEST_SUITE_P( BlockingTCPSockets, BlockingStreamSocketPairTest, ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); +} // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc b/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc index 63a05b799..ffa377210 100644 --- a/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc +++ b/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc @@ -23,6 +23,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSocketPairs() { return ApplyVecToVec( @@ -38,5 +39,6 @@ INSTANTIATE_TEST_SUITE_P( NonBlockingTCPSockets, NonBlockingSocketPairTest, ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); +} // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_udp_loopback.cc b/test/syscalls/linux/socket_ip_udp_loopback.cc index 1df74a348..c7fa44884 100644 --- a/test/syscalls/linux/socket_ip_udp_loopback.cc +++ b/test/syscalls/linux/socket_ip_udp_loopback.cc @@ -23,6 +23,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSocketPairs() { return { @@ -44,5 +45,6 @@ INSTANTIATE_TEST_SUITE_P( AllUDPSockets, UDPSocketPairTest, ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); +} // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_udp_loopback_blocking.cc b/test/syscalls/linux/socket_ip_udp_loopback_blocking.cc index 1e259efa7..d6925a8df 100644 --- a/test/syscalls/linux/socket_ip_udp_loopback_blocking.cc +++ b/test/syscalls/linux/socket_ip_udp_loopback_blocking.cc @@ -21,6 +21,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSocketPairs() { return { @@ -33,5 +34,6 @@ INSTANTIATE_TEST_SUITE_P( BlockingUDPSockets, BlockingNonStreamSocketPairTest, ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); +} // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_ip_udp_loopback_nonblock.cc b/test/syscalls/linux/socket_ip_udp_loopback_nonblock.cc index 74cbd326d..d675eddc6 100644 --- a/test/syscalls/linux/socket_ip_udp_loopback_nonblock.cc +++ b/test/syscalls/linux/socket_ip_udp_loopback_nonblock.cc @@ -21,6 +21,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSocketPairs() { return { @@ -33,5 +34,6 @@ INSTANTIATE_TEST_SUITE_P( NonBlockingUDPSockets, NonBlockingSocketPairTest, ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); +} // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_abstract_nonblock.cc b/test/syscalls/linux/socket_unix_abstract_nonblock.cc index be31ab2a7..8bef76b67 100644 --- a/test/syscalls/linux/socket_unix_abstract_nonblock.cc +++ b/test/syscalls/linux/socket_unix_abstract_nonblock.cc @@ -21,6 +21,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSocketPairs() { return ApplyVec( @@ -33,5 +34,6 @@ INSTANTIATE_TEST_SUITE_P( NonBlockingAbstractUnixSockets, NonBlockingSocketPairTest, ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); +} // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_blocking_local.cc b/test/syscalls/linux/socket_unix_blocking_local.cc index 6f84221b2..77cb8c6d6 100644 --- a/test/syscalls/linux/socket_unix_blocking_local.cc +++ b/test/syscalls/linux/socket_unix_blocking_local.cc @@ -21,6 +21,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSocketPairs() { return VecCat( @@ -39,5 +40,6 @@ INSTANTIATE_TEST_SUITE_P( NonBlockingUnixDomainSockets, BlockingSocketPairTest, ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); +} // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_dgram_local.cc b/test/syscalls/linux/socket_unix_dgram_local.cc index 9134fcdf7..31d2d5216 100644 --- a/test/syscalls/linux/socket_unix_dgram_local.cc +++ b/test/syscalls/linux/socket_unix_dgram_local.cc @@ -23,6 +23,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSocketPairs() { return VecCat(VecCat( @@ -52,5 +53,6 @@ INSTANTIATE_TEST_SUITE_P( DgramUnixSockets, NonStreamSocketPairTest, ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); +} // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_domain.cc b/test/syscalls/linux/socket_unix_domain.cc index fa3efc7f8..f7dff8b4d 100644 --- a/test/syscalls/linux/socket_unix_domain.cc +++ b/test/syscalls/linux/socket_unix_domain.cc @@ -21,6 +21,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSocketPairs() { return ApplyVec( @@ -33,5 +34,6 @@ INSTANTIATE_TEST_SUITE_P( AllUnixDomainSockets, AllSocketPairTest, ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); +} // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_filesystem_nonblock.cc b/test/syscalls/linux/socket_unix_filesystem_nonblock.cc index 8ba7af971..6700b4d90 100644 --- a/test/syscalls/linux/socket_unix_filesystem_nonblock.cc +++ b/test/syscalls/linux/socket_unix_filesystem_nonblock.cc @@ -21,6 +21,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSocketPairs() { return ApplyVec( @@ -33,5 +34,6 @@ INSTANTIATE_TEST_SUITE_P( NonBlockingFilesystemUnixSockets, NonBlockingSocketPairTest, ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); +} // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc b/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc index 8855d5001..fddcdf1c5 100644 --- a/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc +++ b/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc @@ -21,6 +21,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSocketPairs() { return VecCat( @@ -36,5 +37,6 @@ INSTANTIATE_TEST_SUITE_P( BlockingNonStreamUnixSockets, BlockingNonStreamSocketPairTest, ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); +} // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_pair.cc b/test/syscalls/linux/socket_unix_pair.cc index 411fb4518..85999db04 100644 --- a/test/syscalls/linux/socket_unix_pair.cc +++ b/test/syscalls/linux/socket_unix_pair.cc @@ -22,6 +22,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSocketPairs() { return VecCat(ApplyVec( @@ -38,5 +39,6 @@ INSTANTIATE_TEST_SUITE_P( AllUnixDomainSockets, UnixSocketPairCmsgTest, ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); +} // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_pair_nonblock.cc b/test/syscalls/linux/socket_unix_pair_nonblock.cc index 3135d325f..281410a9a 100644 --- a/test/syscalls/linux/socket_unix_pair_nonblock.cc +++ b/test/syscalls/linux/socket_unix_pair_nonblock.cc @@ -21,6 +21,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSocketPairs() { return ApplyVec( @@ -33,5 +34,6 @@ INSTANTIATE_TEST_SUITE_P( NonBlockingUnixSockets, NonBlockingSocketPairTest, ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); +} // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_seqpacket_local.cc b/test/syscalls/linux/socket_unix_seqpacket_local.cc index dff75a532..69a5f150d 100644 --- a/test/syscalls/linux/socket_unix_seqpacket_local.cc +++ b/test/syscalls/linux/socket_unix_seqpacket_local.cc @@ -23,6 +23,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSocketPairs() { return VecCat(VecCat( @@ -52,5 +53,6 @@ INSTANTIATE_TEST_SUITE_P( SeqpacketUnixSockets, UnixNonStreamSocketPairTest, ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); +} // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_stream_blocking_local.cc b/test/syscalls/linux/socket_unix_stream_blocking_local.cc index 08e579ba7..8429bd429 100644 --- a/test/syscalls/linux/socket_unix_stream_blocking_local.cc +++ b/test/syscalls/linux/socket_unix_stream_blocking_local.cc @@ -21,6 +21,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSocketPairs() { return { @@ -34,5 +35,6 @@ INSTANTIATE_TEST_SUITE_P( BlockingStreamUnixSockets, BlockingStreamSocketPairTest, ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); +} // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_stream_local.cc b/test/syscalls/linux/socket_unix_stream_local.cc index 65eef1a81..a7e3449a9 100644 --- a/test/syscalls/linux/socket_unix_stream_local.cc +++ b/test/syscalls/linux/socket_unix_stream_local.cc @@ -21,6 +21,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSocketPairs() { return VecCat( @@ -42,5 +43,6 @@ INSTANTIATE_TEST_SUITE_P( StreamUnixSockets, StreamSocketPairTest, ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); +} // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_unix_stream_nonblock_local.cc b/test/syscalls/linux/socket_unix_stream_nonblock_local.cc index 1936aa135..4b763c8e2 100644 --- a/test/syscalls/linux/socket_unix_stream_nonblock_local.cc +++ b/test/syscalls/linux/socket_unix_stream_nonblock_local.cc @@ -20,6 +20,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSocketPairs() { return { @@ -33,5 +34,6 @@ INSTANTIATE_TEST_SUITE_P( NonBlockingStreamUnixSockets, NonBlockingStreamSocketPairTest, ::testing::ValuesIn(IncludeReversals(GetSocketPairs()))); +} // namespace } // namespace testing } // namespace gvisor -- cgit v1.2.3 From 14959250feb71df74dea13f3cb15dcbe8ce6b3f3 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Thu, 30 Jan 2020 17:37:17 -0800 Subject: Simplify testing link rules. PiperOrigin-RevId: 292458933 --- test/syscalls/linux/BUILD | 688 +++++++++++++++++++++++----------------------- test/util/BUILD | 30 +- tools/build/defs.bzl | 1 + tools/defs.bzl | 3 +- tools/images/BUILD | 4 +- 5 files changed, 363 insertions(+), 363 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index ee7a8a673..e4ca5b6db 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "cc_binary", "cc_library", "default_net_util", "select_arch", "select_system") +load("//tools:defs.bzl", "cc_binary", "cc_library", "default_net_util", "gtest", "select_arch", "select_system") package( default_visibility = ["//:sandbox"], @@ -82,14 +82,14 @@ cc_library( srcs = ["base_poll_test.cc"], hdrs = ["base_poll_test.h"], deps = [ + "@com_google_absl//absl/memory", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:signal_util", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -99,11 +99,11 @@ cc_library( hdrs = ["file_base.h"], deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -130,7 +130,7 @@ cc_library( hdrs = ["socket_test_util.h"], defines = select_system(), deps = default_net_util() + [ - "@com_google_googletest//:gtest", + gtest, "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", @@ -155,9 +155,9 @@ cc_library( hdrs = ["unix_domain_socket_test_util.h"], deps = [ ":socket_test_util", - "//test/util:test_util", "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_util", ], ) @@ -179,14 +179,14 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:cleanup", + "@com_google_absl//absl/time", + gtest, "//test/util:posix_error", "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", "//test/util:timer_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -199,13 +199,13 @@ cc_binary( ), linkstatic = 1, deps = [ + "@com_google_absl//absl/base:core_headers", + gtest, "//test/util:memory_util", "//test/util:platform_util", "//test/util:posix_error", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/base:core_headers", - "@com_google_googletest//:gtest", ], ) @@ -218,9 +218,9 @@ cc_binary( ":socket_test_util", ":unix_domain_socket_test_util", "//test/util:file_descriptor", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -233,9 +233,9 @@ cc_binary( ":socket_test_util", ":unix_domain_socket_test_util", "//test/util:file_descriptor", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -247,10 +247,10 @@ cc_binary( deps = [ "//test/util:capability_util", "//test/util:fs_util", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -262,12 +262,12 @@ cc_binary( deps = [ "//test/util:cleanup", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -280,12 +280,11 @@ cc_binary( ], linkstatic = 1, deps = [ - # The heapchecker doesn't recognize that io_destroy munmaps. - "@com_google_googletest//:gtest", - "@com_google_absl//absl/strings", "//test/util:cleanup", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:memory_util", "//test/util:posix_error", "//test/util:proc_util", @@ -302,12 +301,12 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:signal_util", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -320,9 +319,9 @@ cc_binary( "//:sandbox", ], deps = [ + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -334,9 +333,9 @@ cc_binary( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -348,9 +347,9 @@ cc_binary( deps = [ ":socket_test_util", "//test/util:file_descriptor", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -372,10 +371,10 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:capability_util", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -388,10 +387,10 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -404,14 +403,14 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/synchronization", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/synchronization", - "@com_google_googletest//:gtest", ], ) @@ -424,12 +423,12 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/flags:flag", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/flags:flag", - "@com_google_googletest//:gtest", ], ) @@ -443,12 +442,12 @@ cc_binary( "//test/util:cleanup", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:mount_util", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -458,9 +457,9 @@ cc_binary( srcs = ["clock_getres.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -470,11 +469,11 @@ cc_binary( srcs = ["clock_gettime.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/time", + gtest, "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -484,13 +483,13 @@ cc_binary( srcs = ["concurrency.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, "//test/util:platform_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -503,9 +502,9 @@ cc_binary( ":socket_test_util", "//test/util:file_descriptor", "//test/util:fs_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -516,10 +515,10 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:fs_util", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -530,9 +529,9 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -544,11 +543,11 @@ cc_binary( deps = [ "//test/util:eventfd_util", "//test/util:file_descriptor", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -561,10 +560,10 @@ cc_binary( "//test/util:epoll_util", "//test/util:eventfd_util", "//test/util:file_descriptor", + gtest, "//test/util:posix_error", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -576,10 +575,10 @@ cc_binary( deps = [ "//test/util:epoll_util", "//test/util:eventfd_util", + gtest, "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_googletest//:gtest", ], ) @@ -589,12 +588,12 @@ cc_binary( srcs = ["exceptions.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:logging", "//test/util:platform_util", "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -604,10 +603,10 @@ cc_binary( srcs = ["getcpu.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/time", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -617,10 +616,10 @@ cc_binary( srcs = ["getcpu.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/time", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -630,13 +629,13 @@ cc_binary( srcs = ["getrusage.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:memory_util", "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -652,14 +651,14 @@ cc_binary( "//test/util:cleanup", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:multiprocess_util", "//test/util:posix_error", "//test/util:proc_util", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -682,15 +681,15 @@ cc_binary( deps = [ "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/types:optional", + gtest, "//test/util:multiprocess_util", "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/types:optional", - "@com_google_googletest//:gtest", ], ) @@ -701,11 +700,11 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/time", + gtest, "//test/util:test_main", "//test/util:test_util", "//test/util:time_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -718,10 +717,10 @@ cc_binary( ":file_base", "//test/util:cleanup", "//test/util:file_descriptor", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -731,9 +730,9 @@ cc_binary( srcs = ["fault.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -744,10 +743,10 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:capability_util", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -761,18 +760,18 @@ cc_binary( "//test/util:cleanup", "//test/util:eventfd_util", "//test/util:fs_util", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, "//test/util:multiprocess_util", "//test/util:posix_error", "//test/util:save_util", "//test/util:temp_path", "//test/util:test_util", "//test/util:timer_util", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -786,15 +785,15 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", "//test/util:timer_util", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -805,13 +804,13 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:capability_util", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:memory_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -824,11 +823,11 @@ cc_binary( ), linkstatic = 1, deps = [ + gtest, "//test/util:logging", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_googletest//:gtest", ], ) @@ -841,10 +840,10 @@ cc_binary( ), linkstatic = 1, deps = [ + gtest, "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_googletest//:gtest", ], ) @@ -855,10 +854,10 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -869,10 +868,10 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -884,6 +883,9 @@ cc_binary( deps = [ "//test/util:cleanup", "//test/util:file_descriptor", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/time", + gtest, "//test/util:memory_util", "//test/util:save_util", "//test/util:temp_path", @@ -892,9 +894,6 @@ cc_binary( "//test/util:thread_util", "//test/util:time_util", "//test/util:timer_util", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -907,12 +906,12 @@ cc_binary( "//test/util:eventfd_util", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -922,9 +921,9 @@ cc_binary( srcs = ["getrandom.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -957,10 +956,10 @@ cc_binary( ":socket_test_util", ":unix_domain_socket_test_util", "//test/util:file_descriptor", + gtest, "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -984,9 +983,9 @@ cc_binary( ":socket_test_util", "//test/util:capability_util", "//test/util:file_descriptor", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -997,6 +996,9 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:multiprocess_util", "//test/util:posix_error", @@ -1004,9 +1006,6 @@ cc_binary( "//test/util:test_util", "//test/util:thread_util", "//test/util:timer_util", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1018,15 +1017,15 @@ cc_binary( deps = [ "//test/util:capability_util", "//test/util:file_descriptor", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1039,14 +1038,14 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -1057,10 +1056,10 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1071,6 +1070,7 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + gtest, "//test/util:logging", "//test/util:memory_util", "//test/util:multiprocess_util", @@ -1078,7 +1078,6 @@ cc_binary( "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1089,12 +1088,12 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:cleanup", + "@com_google_absl//absl/memory", + gtest, "//test/util:memory_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/memory", - "@com_google_googletest//:gtest", ], ) @@ -1104,11 +1103,11 @@ cc_binary( srcs = ["mincore.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:memory_util", "//test/util:posix_error", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1121,10 +1120,10 @@ cc_binary( ":temp_umask", "//test/util:capability_util", "//test/util:fs_util", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1135,11 +1134,11 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_googletest//:gtest", ], ) @@ -1151,12 +1150,12 @@ cc_binary( deps = [ "//test/util:capability_util", "//test/util:cleanup", + gtest, "//test/util:memory_util", "//test/util:multiprocess_util", "//test/util:rlimit_util", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1169,13 +1168,13 @@ cc_binary( "//test/util:cleanup", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:memory_util", "//test/util:multiprocess_util", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -1188,6 +1187,9 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, "//test/util:mount_util", "//test/util:multiprocess_util", "//test/util:posix_error", @@ -1195,9 +1197,6 @@ cc_binary( "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1207,10 +1206,9 @@ cc_binary( srcs = ["mremap.cc"], linkstatic = 1, deps = [ - # The heap check fails due to MremapDeathTest - "@com_google_googletest//:gtest", - "@com_google_absl//absl/strings", "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, "//test/util:logging", "//test/util:memory_util", "//test/util:multiprocess_util", @@ -1242,9 +1240,9 @@ cc_binary( srcs = ["munmap.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1261,14 +1259,14 @@ cc_binary( "//test/util:cleanup", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -1282,10 +1280,10 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1299,11 +1297,11 @@ cc_binary( ":unix_domain_socket_test_util", "//test/util:capability_util", "//test/util:file_descriptor", - "//test/util:test_main", - "//test/util:test_util", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:endian", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_main", + "//test/util:test_util", ], ) @@ -1317,11 +1315,11 @@ cc_binary( ":unix_domain_socket_test_util", "//test/util:capability_util", "//test/util:file_descriptor", - "//test/util:test_main", - "//test/util:test_util", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:endian", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_main", + "//test/util:test_util", ], ) @@ -1333,16 +1331,16 @@ cc_binary( deps = [ "//test/util:capability_util", "//test/util:file_descriptor", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, "//test/util:posix_error", "//test/util:pty_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1354,12 +1352,12 @@ cc_binary( deps = [ "//test/util:capability_util", "//test/util:file_descriptor", + "@com_google_absl//absl/base:core_headers", + gtest, "//test/util:posix_error", "//test/util:pty_util", "//test/util:test_main", "//test/util:thread_util", - "@com_google_absl//absl/base:core_headers", - "@com_google_googletest//:gtest", ], ) @@ -1372,12 +1370,12 @@ cc_binary( "//test/syscalls/linux:socket_test_util", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/time", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1387,13 +1385,13 @@ cc_binary( srcs = ["pause.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1405,15 +1403,15 @@ cc_binary( deps = [ "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1426,13 +1424,13 @@ cc_binary( ":base_poll_test", "//test/util:eventfd_util", "//test/util:file_descriptor", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1443,11 +1441,11 @@ cc_binary( linkstatic = 1, deps = [ ":base_poll_test", + "@com_google_absl//absl/time", + gtest, "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1458,9 +1456,9 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1472,12 +1470,12 @@ cc_binary( deps = [ "//test/util:capability_util", "//test/util:cleanup", + "@com_google_absl//absl/flags:flag", + gtest, "//test/util:multiprocess_util", "//test/util:posix_error", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/flags:flag", - "@com_google_googletest//:gtest", ], ) @@ -1488,13 +1486,13 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:capability_util", + "@com_google_absl//absl/flags:flag", + gtest, "//test/util:logging", "//test/util:multiprocess_util", "//test/util:posix_error", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/flags:flag", - "@com_google_googletest//:gtest", ], ) @@ -1505,10 +1503,10 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1519,6 +1517,8 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:memory_util", "//test/util:temp_path", @@ -1526,8 +1526,6 @@ cc_binary( "//test/util:test_util", "//test/util:thread_util", "//test/util:timer_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1541,13 +1539,13 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -1559,11 +1557,11 @@ cc_binary( deps = [ "//test/util:capability_util", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -1577,6 +1575,10 @@ cc_binary( "//test/util:cleanup", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, "//test/util:memory_util", "//test/util:posix_error", "//test/util:temp_path", @@ -1584,10 +1586,6 @@ cc_binary( "//test/util:thread_util", "//test/util:time_util", "//test/util:timer_util", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1601,11 +1599,11 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", - "//test/util:test_main", - "//test/util:test_util", "@com_google_absl//absl/strings", "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_main", + "//test/util:test_util", ], ) @@ -1617,17 +1615,17 @@ cc_binary( deps = [ "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/types:optional", + gtest, "//test/util:memory_util", "//test/util:posix_error", "//test/util:proc_util", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/container:flat_hash_set", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/strings:str_format", - "@com_google_absl//absl/types:optional", - "@com_google_googletest//:gtest", ], ) @@ -1641,6 +1639,8 @@ cc_binary( "//test/util:cleanup", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:logging", "//test/util:multiprocess_util", "//test/util:posix_error", @@ -1648,8 +1648,6 @@ cc_binary( "//test/util:test_main", "//test/util:test_util", "//test/util:time_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -1660,11 +1658,11 @@ cc_binary( linkstatic = 1, deps = [ ":base_poll_test", + "@com_google_absl//absl/time", + gtest, "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1674,6 +1672,9 @@ cc_binary( srcs = ["ptrace.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:multiprocess_util", "//test/util:platform_util", @@ -1681,9 +1682,6 @@ cc_binary( "//test/util:test_util", "//test/util:thread_util", "//test/util:time_util", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1693,10 +1691,10 @@ cc_binary( srcs = ["pwrite64.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1710,12 +1708,12 @@ cc_binary( deps = [ ":file_base", "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -1729,11 +1727,11 @@ cc_binary( ":unix_domain_socket_test_util", "//test/util:capability_util", "//test/util:file_descriptor", - "//test/util:test_main", - "//test/util:test_util", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:endian", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_main", + "//test/util:test_util", ], ) @@ -1747,10 +1745,10 @@ cc_binary( ":unix_domain_socket_test_util", "//test/util:capability_util", "//test/util:file_descriptor", + "@com_google_absl//absl/base:core_headers", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/base:core_headers", - "@com_google_googletest//:gtest", ], ) @@ -1764,10 +1762,10 @@ cc_binary( ":unix_domain_socket_test_util", "//test/util:capability_util", "//test/util:file_descriptor", + "@com_google_absl//absl/base:core_headers", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/base:core_headers", - "@com_google_googletest//:gtest", ], ) @@ -1778,10 +1776,10 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1792,10 +1790,10 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1811,13 +1809,13 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:timer_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -1832,12 +1830,12 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -1851,11 +1849,11 @@ cc_binary( "//test/util:cleanup", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -1879,11 +1877,11 @@ cc_binary( linkstatic = 1, deps = [ "//test/syscalls/linux/rseq:lib", + gtest, "//test/util:logging", "//test/util:multiprocess_util", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1894,11 +1892,11 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:cleanup", + gtest, "//test/util:logging", "//test/util:posix_error", "//test/util:signal_util", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1908,9 +1906,9 @@ cc_binary( srcs = ["sched.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1920,9 +1918,9 @@ cc_binary( srcs = ["sched_yield.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -1932,6 +1930,8 @@ cc_binary( srcs = ["seccomp.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/base:core_headers", + gtest, "//test/util:logging", "//test/util:memory_util", "//test/util:multiprocess_util", @@ -1939,8 +1939,6 @@ cc_binary( "//test/util:proc_util", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/base:core_headers", - "@com_google_googletest//:gtest", ], ) @@ -1952,14 +1950,14 @@ cc_binary( deps = [ ":base_poll_test", "//test/util:file_descriptor", + "@com_google_absl//absl/time", + gtest, "//test/util:multiprocess_util", "//test/util:posix_error", "//test/util:rlimit_util", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1971,13 +1969,13 @@ cc_binary( deps = [ "//test/util:eventfd_util", "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -1989,12 +1987,12 @@ cc_binary( deps = [ ":socket_test_util", "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -2005,13 +2003,13 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -2021,9 +2019,9 @@ cc_binary( srcs = ["sigaction.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -2038,13 +2036,13 @@ cc_binary( deps = [ "//test/util:cleanup", "//test/util:fs_util", + gtest, "//test/util:multiprocess_util", "//test/util:posix_error", "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_googletest//:gtest", ], ) @@ -2057,7 +2055,7 @@ cc_binary( ), linkstatic = 1, deps = [ - "@com_google_googletest//:gtest", + gtest, "//test/util:logging", "//test/util:signal_util", "//test/util:test_util", @@ -2075,14 +2073,14 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/synchronization", + gtest, "//test/util:logging", "//test/util:posix_error", "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/synchronization", - "@com_google_googletest//:gtest", ], ) @@ -2092,10 +2090,10 @@ cc_binary( srcs = ["sigprocmask.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -2105,13 +2103,13 @@ cc_binary( srcs = ["sigstop.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/time", + gtest, "//test/util:multiprocess_util", "//test/util:posix_error", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -2122,13 +2120,13 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:signal_util", "//test/util:test_util", "//test/util:thread_util", "//test/util:timer_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -2144,10 +2142,10 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", - "//test/util:test_util", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_util", ], alwayslink = 1, ) @@ -2160,8 +2158,8 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_util", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2174,8 +2172,8 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_util", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2191,11 +2189,11 @@ cc_library( ], deps = [ ":socket_test_util", - "//test/util:test_util", - "//test/util:thread_util", "@com_google_absl//absl/memory", "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_util", + "//test/util:thread_util", ], alwayslink = 1, ) @@ -2212,8 +2210,8 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_util", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2230,9 +2228,9 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:memory_util", "//test/util:test_util", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2250,8 +2248,8 @@ cc_library( ":ip_socket_test_util", ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_util", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2268,8 +2266,8 @@ cc_library( deps = [ ":ip_socket_test_util", ":socket_test_util", + gtest, "//test/util:test_util", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2286,9 +2284,9 @@ cc_library( deps = [ ":ip_socket_test_util", ":socket_test_util", - "//test/util:test_util", "@com_google_absl//absl/memory", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_util", ], alwayslink = 1, ) @@ -2305,8 +2303,8 @@ cc_library( deps = [ ":ip_socket_test_util", ":socket_test_util", + gtest, "//test/util:test_util", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2323,8 +2321,8 @@ cc_library( deps = [ ":ip_socket_test_util", ":socket_test_util", + gtest, "//test/util:test_util", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2387,9 +2385,9 @@ cc_binary( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -2419,9 +2417,9 @@ cc_binary( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -2451,9 +2449,9 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -2551,10 +2549,10 @@ cc_binary( ":socket_bind_to_device_util", ":socket_test_util", "//test/util:capability_util", + gtest, "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_googletest//:gtest", ], ) @@ -2570,10 +2568,10 @@ cc_binary( ":socket_bind_to_device_util", ":socket_test_util", "//test/util:capability_util", + gtest, "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_googletest//:gtest", ], ) @@ -2589,10 +2587,10 @@ cc_binary( ":socket_bind_to_device_util", ":socket_test_util", "//test/util:capability_util", + gtest, "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_googletest//:gtest", ], ) @@ -2638,9 +2636,9 @@ cc_binary( deps = [ ":ip_socket_test_util", ":socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -2719,15 +2717,15 @@ cc_binary( ":ip_socket_test_util", ":socket_test_util", "//test/util:file_descriptor", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", + gtest, "//test/util:posix_error", "//test/util:save_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -2739,9 +2737,9 @@ cc_binary( deps = [ ":socket_test_util", "//test/util:file_descriptor", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -2755,10 +2753,10 @@ cc_binary( ":socket_test_util", "//test/util:cleanup", "//test/util:file_descriptor", + "@com_google_absl//absl/strings:str_format", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings:str_format", - "@com_google_googletest//:gtest", ], ) @@ -2771,9 +2769,9 @@ cc_binary( ":socket_netlink_util", ":socket_test_util", "//test/util:file_descriptor", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -2791,9 +2789,9 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", - "//test/util:test_util", "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_util", ], alwayslink = 1, ) @@ -2810,11 +2808,11 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + "@com_google_absl//absl/time", + gtest, "//test/util:test_util", "//test/util:thread_util", "//test/util:timer_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2831,10 +2829,10 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2851,10 +2849,10 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2871,11 +2869,11 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + "@com_google_absl//absl/time", + gtest, "//test/util:test_util", "//test/util:thread_util", "//test/util:timer_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2892,8 +2890,8 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_util", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -2910,10 +2908,10 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + "@com_google_absl//absl/time", + gtest, "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -3007,9 +3005,9 @@ cc_binary( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3021,9 +3019,9 @@ cc_binary( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3035,9 +3033,9 @@ cc_binary( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3052,9 +3050,9 @@ cc_binary( ":socket_blocking_test_cases", ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3069,9 +3067,9 @@ cc_binary( ":ip_socket_test_util", ":socket_blocking_test_cases", ":socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3086,9 +3084,9 @@ cc_binary( ":socket_non_stream_blocking_test_cases", ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3103,9 +3101,9 @@ cc_binary( ":ip_socket_test_util", ":socket_non_stream_blocking_test_cases", ":socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3121,9 +3119,9 @@ cc_binary( ":socket_unix_cmsg_test_cases", ":socket_unix_test_cases", ":unix_domain_socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3135,9 +3133,9 @@ cc_binary( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3149,9 +3147,9 @@ cc_binary( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3164,10 +3162,10 @@ cc_binary( ":socket_netlink_util", ":socket_test_util", "//test/util:file_descriptor", + "@com_google_absl//absl/base:endian", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/base:endian", - "@com_google_googletest//:gtest", ], ) @@ -3183,12 +3181,12 @@ cc_binary( "//test/util:cleanup", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -3199,11 +3197,11 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/time", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -3217,12 +3215,12 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -3235,10 +3233,10 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3248,10 +3246,10 @@ cc_binary( srcs = ["sync.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3261,10 +3259,10 @@ cc_binary( srcs = ["sysinfo.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/time", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -3274,9 +3272,9 @@ cc_binary( srcs = ["syslog.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3286,10 +3284,10 @@ cc_binary( srcs = ["sysret.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:logging", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3301,12 +3299,12 @@ cc_binary( deps = [ ":socket_test_util", "//test/util:file_descriptor", + "@com_google_absl//absl/time", + gtest, "//test/util:posix_error", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -3316,11 +3314,11 @@ cc_binary( srcs = ["tgkill.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:signal_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_googletest//:gtest", ], ) @@ -3330,10 +3328,10 @@ cc_binary( srcs = ["time.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:proc_util", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3358,15 +3356,15 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:cleanup", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:multiprocess_util", "//test/util:posix_error", "//test/util:signal_util", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -3376,11 +3374,11 @@ cc_binary( srcs = ["tkill.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:logging", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_googletest//:gtest", ], ) @@ -3394,11 +3392,11 @@ cc_binary( "//test/util:capability_util", "//test/util:cleanup", "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -3414,12 +3412,12 @@ cc_library( deps = [ ":socket_test_util", ":unix_domain_socket_test_util", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/time", + gtest, "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], alwayslink = 1, ) @@ -3442,9 +3440,9 @@ cc_binary( deps = [ ":socket_test_util", "//test/util:file_descriptor", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3455,14 +3453,14 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:capability_util", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", "//test/util:uid_util", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -3473,11 +3471,11 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:capability_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -3490,11 +3488,11 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -3504,11 +3502,11 @@ cc_binary( srcs = ["unshare.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/synchronization", + gtest, "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/synchronization", - "@com_google_googletest//:gtest", ], ) @@ -3534,11 +3532,11 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:fs_util", + gtest, "//test/util:posix_error", "//test/util:proc_util", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3548,13 +3546,13 @@ cc_binary( srcs = ["vfork.cc"], linkstatic = 1, deps = [ + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:multiprocess_util", "//test/util:test_util", "//test/util:time_util", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -3566,6 +3564,10 @@ cc_binary( deps = [ "//test/util:cleanup", "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", + gtest, "//test/util:logging", "//test/util:multiprocess_util", "//test/util:posix_error", @@ -3574,10 +3576,6 @@ cc_binary( "//test/util:test_util", "//test/util:thread_util", "//test/util:time_util", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", ], ) @@ -3588,10 +3586,10 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:cleanup", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3602,12 +3600,12 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:fs_util", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + gtest, "//test/util:posix_error", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/strings:str_format", - "@com_google_googletest//:gtest", ], ) @@ -3618,14 +3616,14 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:capability_util", - "//test/util:test_main", - "//test/util:test_util", - "//test/util:thread_util", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/memory", "@com_google_absl//absl/synchronization", "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", ], ) @@ -3651,10 +3649,10 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:file_descriptor", + gtest, "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3664,11 +3662,11 @@ cc_binary( srcs = ["vdso_clock_gettime.cc"], linkstatic = 1, deps = [ - "//test/util:test_main", - "//test/util:test_util", "@com_google_absl//absl/strings", "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_main", + "//test/util:test_util", ], ) @@ -3678,10 +3676,10 @@ cc_binary( srcs = ["vsyscall.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:proc_util", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3694,11 +3692,11 @@ cc_binary( ":unix_domain_socket_test_util", "//test/util:file_descriptor", "//test/util:fs_util", - "//test/util:test_main", - "//test/util:test_util", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", - "@com_google_googletest//:gtest", + gtest, + "//test/util:test_main", + "//test/util:test_util", ], ) @@ -3710,12 +3708,12 @@ cc_binary( deps = [ "//test/util:file_descriptor", "//test/util:fs_util", + gtest, "//test/util:memory_util", "//test/util:multiprocess_util", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_googletest//:gtest", ], ) @@ -3727,10 +3725,10 @@ cc_binary( deps = [ ":ip_socket_test_util", "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -3742,10 +3740,10 @@ cc_binary( deps = [ ":ip_socket_test_util", "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + gtest, "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) @@ -3761,11 +3759,11 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/strings", + gtest, "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", ], ) diff --git a/test/util/BUILD b/test/util/BUILD index 1ac8b3fd6..1f22ebe29 100644 --- a/test/util/BUILD +++ b/test/util/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "cc_library", "cc_test", "select_system") +load("//tools:defs.bzl", "cc_library", "cc_test", "gtest", "select_system") package( default_visibility = ["//:sandbox"], @@ -41,7 +41,7 @@ cc_library( ":save_util", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -55,7 +55,7 @@ cc_library( ":posix_error", ":test_util", "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -67,7 +67,7 @@ cc_test( ":proc_util", ":test_main", ":test_util", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -87,7 +87,7 @@ cc_library( ":file_descriptor", ":posix_error", "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -101,7 +101,7 @@ cc_test( ":temp_path", ":test_main", ":test_util", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -134,7 +134,7 @@ cc_library( ":cleanup", ":posix_error", ":test_util", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -183,7 +183,7 @@ cc_library( "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:variant", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -194,7 +194,7 @@ cc_test( deps = [ ":posix_error", ":test_main", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -218,7 +218,7 @@ cc_library( ":cleanup", ":posix_error", ":test_util", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -233,7 +233,7 @@ cc_library( ":test_util", "@com_google_absl//absl/strings", "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -259,7 +259,7 @@ cc_library( "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -291,7 +291,7 @@ cc_library( ":posix_error", ":test_util", "@com_google_absl//absl/time", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -302,7 +302,7 @@ cc_test( deps = [ ":test_main", ":test_util", - "@com_google_googletest//:gtest", + gtest, ], ) @@ -322,7 +322,7 @@ cc_library( ":file_descriptor", ":posix_error", ":save_util", - "@com_google_googletest//:gtest", + gtest, ], ) diff --git a/tools/build/defs.bzl b/tools/build/defs.bzl index d0556abd1..967c1f900 100644 --- a/tools/build/defs.bzl +++ b/tools/build/defs.bzl @@ -18,6 +18,7 @@ cc_test = _cc_test cc_toolchain = "@bazel_tools//tools/cpp:current_cc_toolchain" go_image = _go_image go_embed_data = _go_embed_data +gtest = "@com_google_googletest//:gtest" loopback = "//tools/build:loopback" proto_library = native.proto_library pkg_deb = _pkg_deb diff --git a/tools/defs.bzl b/tools/defs.bzl index 819f12b0d..ce677cbbf 100644 --- a/tools/defs.bzl +++ b/tools/defs.bzl @@ -7,7 +7,7 @@ change for Google-internal and bazel-compatible rules. load("//tools/go_stateify:defs.bzl", "go_stateify") load("//tools/go_marshal:defs.bzl", "go_marshal", "marshal_deps", "marshal_test_deps") -load("//tools/build:defs.bzl", _cc_binary = "cc_binary", _cc_flags_supplier = "cc_flags_supplier", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test", _cc_toolchain = "cc_toolchain", _container_image = "container_image", _default_installer = "default_installer", _default_net_util = "default_net_util", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_image = "go_image", _go_library = "go_library", _go_proto_library = "go_proto_library", _go_test = "go_test", _go_tool_library = "go_tool_library", _loopback = "loopback", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar", _proto_library = "proto_library", _py_binary = "py_binary", _py_library = "py_library", _py_requirement = "py_requirement", _py_test = "py_test", _select_arch = "select_arch", _select_system = "select_system") +load("//tools/build:defs.bzl", _cc_binary = "cc_binary", _cc_flags_supplier = "cc_flags_supplier", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test", _cc_toolchain = "cc_toolchain", _container_image = "container_image", _default_installer = "default_installer", _default_net_util = "default_net_util", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_image = "go_image", _go_library = "go_library", _go_proto_library = "go_proto_library", _go_test = "go_test", _go_tool_library = "go_tool_library", _gtest = "gtest", _loopback = "loopback", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar", _proto_library = "proto_library", _py_binary = "py_binary", _py_library = "py_library", _py_requirement = "py_requirement", _py_test = "py_test", _select_arch = "select_arch", _select_system = "select_system") # Delegate directly. cc_binary = _cc_binary @@ -20,6 +20,7 @@ go_embed_data = _go_embed_data go_image = _go_image go_test = _go_test go_tool_library = _go_tool_library +gtest = _gtest pkg_deb = _pkg_deb pkg_tar = _pkg_tar py_library = _py_library diff --git a/tools/images/BUILD b/tools/images/BUILD index f1699b184..fe11f08a3 100644 --- a/tools/images/BUILD +++ b/tools/images/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "cc_binary") +load("//tools:defs.bzl", "cc_binary", "gtest") load("//tools/images:defs.bzl", "vm_image", "vm_test") package( @@ -32,8 +32,8 @@ cc_binary( srcs = ["test.cc"], linkstatic = 1, deps = [ + gtest, "//test/util:test_main", - "@com_google_googletest//:gtest", ], ) -- cgit v1.2.3 From 7c118f7e192d403e716807c0f75f3f6d077a31ba Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Fri, 31 Jan 2020 09:55:51 -0800 Subject: KVM platform does not support 32bit. Fixes: //test/syscalls:32bit_test_runsc_kvm Ref change: 5d569408ef94c753b7aae9392b5e4ebf7e5ea50d PiperOrigin-RevId: 292563926 --- test/util/platform_util.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/test/util/platform_util.cc b/test/util/platform_util.cc index 2724e63f3..c9200d381 100644 --- a/test/util/platform_util.cc +++ b/test/util/platform_util.cc @@ -20,10 +20,9 @@ namespace gvisor { namespace testing { PlatformSupport PlatformSupport32Bit() { - if (GvisorPlatform() == Platform::kPtrace) { + if (GvisorPlatform() == Platform::kPtrace || + GvisorPlatform() == Platform::kKVM) { return PlatformSupport::NotSupported; - } else if (GvisorPlatform() == Platform::kKVM) { - return PlatformSupport::Segfault; } else { return PlatformSupport::Allowed; } -- cgit v1.2.3 From 04cccaaeeed22a28a42fc4c1406b43a966a5d886 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Fri, 31 Jan 2020 14:44:50 -0800 Subject: Fix logic around AMD/Intel cases. If the support is Ignored, then the call is still executed. We simply rely on it to fall through to the int3. Therefore, we must also bail on the vendor check. PiperOrigin-RevId: 292620558 --- test/syscalls/linux/32bit.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/32bit.cc b/test/syscalls/linux/32bit.cc index 2751fb4e7..9883aef61 100644 --- a/test/syscalls/linux/32bit.cc +++ b/test/syscalls/linux/32bit.cc @@ -102,7 +102,8 @@ TEST(Syscall32Bit, Int80) { } TEST(Syscall32Bit, Sysenter) { - if (PlatformSupport32Bit() == PlatformSupport::Allowed && + if ((PlatformSupport32Bit() == PlatformSupport::Allowed || + PlatformSupport32Bit() == PlatformSupport::Ignored) && GetCPUVendor() == CPUVendor::kAMD) { // SYSENTER is an illegal instruction in compatibility mode on AMD. EXPECT_EXIT(ExitGroup32(kSysenter, kExitCode), @@ -133,7 +134,8 @@ TEST(Syscall32Bit, Sysenter) { } TEST(Syscall32Bit, Syscall) { - if (PlatformSupport32Bit() == PlatformSupport::Allowed && + if ((PlatformSupport32Bit() == PlatformSupport::Allowed || + PlatformSupport32Bit() == PlatformSupport::Ignored) && GetCPUVendor() == CPUVendor::kIntel) { // SYSCALL is an illegal instruction in compatibility mode on Intel. EXPECT_EXIT(ExitGroup32(kSyscall, kExitCode), -- cgit v1.2.3 From 9742daf3c201771d257c0d043347f4eebf3088e0 Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Mon, 3 Feb 2020 12:03:38 -0800 Subject: Add packetdrill tests that use docker. PiperOrigin-RevId: 292973224 --- test/packetdrill/BUILD | 8 ++ test/packetdrill/Dockerfile | 9 ++ test/packetdrill/defs.bzl | 85 +++++++++++++ test/packetdrill/fin_wait2_timeout.pkt | 23 ++++ test/packetdrill/packetdrill_setup.sh | 26 ++++ test/packetdrill/packetdrill_test.sh | 213 +++++++++++++++++++++++++++++++++ 6 files changed, 364 insertions(+) create mode 100644 test/packetdrill/BUILD create mode 100644 test/packetdrill/Dockerfile create mode 100644 test/packetdrill/defs.bzl create mode 100644 test/packetdrill/fin_wait2_timeout.pkt create mode 100755 test/packetdrill/packetdrill_setup.sh create mode 100755 test/packetdrill/packetdrill_test.sh (limited to 'test') diff --git a/test/packetdrill/BUILD b/test/packetdrill/BUILD new file mode 100644 index 000000000..d113555b1 --- /dev/null +++ b/test/packetdrill/BUILD @@ -0,0 +1,8 @@ +load("defs.bzl", "packetdrill_test") + +package(licenses = ["notice"]) + +packetdrill_test( + name = "fin_wait2_timeout", + scripts = ["fin_wait2_timeout.pkt"], +) diff --git a/test/packetdrill/Dockerfile b/test/packetdrill/Dockerfile new file mode 100644 index 000000000..bd4451355 --- /dev/null +++ b/test/packetdrill/Dockerfile @@ -0,0 +1,9 @@ +FROM ubuntu:bionic + +RUN apt-get update +RUN apt-get install -y net-tools git iptables iputils-ping netcat tcpdump jq tar +RUN hash -r +RUN git clone --branch packetdrill-v2.0 \ + https://github.com/google/packetdrill.git +RUN cd packetdrill/gtests/net/packetdrill && ./configure && \ + apt-get install -y bison flex make && make diff --git a/test/packetdrill/defs.bzl b/test/packetdrill/defs.bzl new file mode 100644 index 000000000..582f97e0c --- /dev/null +++ b/test/packetdrill/defs.bzl @@ -0,0 +1,85 @@ +"""Defines a rule for packetdrill test targets.""" + +def _packetdrill_test_impl(ctx): + test_runner = ctx.executable._test_runner + runner = ctx.actions.declare_file("%s-runner" % ctx.label.name) + + script_paths = [] + for script in ctx.files.scripts: + script_paths.append(script.short_path) + runner_content = "\n".join([ + "#!/bin/bash", + # This test will run part in a distinct user namespace. This can cause + # permission problems, because all runfiles may not be owned by the + # current user, and no other users will be mapped in that namespace. + # Make sure that everything is readable here. + "find . -type f -exec chmod a+rx {} \\;", + "find . -type d -exec chmod a+rx {} \\;", + "%s %s --init_script %s -- %s\n" % ( + test_runner.short_path, + " ".join(ctx.attr.flags), + ctx.files._init_script[0].short_path, + " ".join(script_paths), + ), + ]) + ctx.actions.write(runner, runner_content, is_executable = True) + + transitive_files = depset() + if hasattr(ctx.attr._test_runner, "data_runfiles"): + transitive_files = depset(ctx.attr._test_runner.data_runfiles.files) + runfiles = ctx.runfiles( + files = [test_runner] + ctx.files._init_script + ctx.files.scripts, + transitive_files = transitive_files, + collect_default = True, + collect_data = True, + ) + return [DefaultInfo(executable = runner, runfiles = runfiles)] + +_packetdrill_test = rule( + attrs = { + "_test_runner": attr.label( + executable = True, + cfg = "host", + allow_files = True, + default = "packetdrill_test.sh", + ), + "_init_script": attr.label( + allow_single_file = True, + default = "packetdrill_setup.sh", + ), + "flags": attr.string_list( + mandatory = False, + default = [], + ), + "scripts": attr.label_list( + mandatory = True, + allow_files = True, + ), + }, + test = True, + implementation = _packetdrill_test_impl, +) + +_PACKETDRILL_TAGS = ["local", "manual"] + +def packetdrill_linux_test(name, **kwargs): + if "tags" not in kwargs: + kwargs["tags"] = _PACKETDRILL_TAGS + _packetdrill_test( + name = name + "_linux_test", + flags = ["--dut_platform", "linux"], + **kwargs + ) + +def packetdrill_netstack_test(name, **kwargs): + if "tags" not in kwargs: + kwargs["tags"] = _PACKETDRILL_TAGS + _packetdrill_test( + name = name + "_netstack_test", + flags = ["--dut_platform", "netstack"], + **kwargs + ) + +def packetdrill_test(**kwargs): + packetdrill_linux_test(**kwargs) + packetdrill_netstack_test(**kwargs) diff --git a/test/packetdrill/fin_wait2_timeout.pkt b/test/packetdrill/fin_wait2_timeout.pkt new file mode 100644 index 000000000..613f0bec9 --- /dev/null +++ b/test/packetdrill/fin_wait2_timeout.pkt @@ -0,0 +1,23 @@ +// Test that a socket in FIN_WAIT_2 eventually times out and a subsequent +// packet generates a RST. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 bind(3, ..., ...) = 0 + ++0 listen(3, 1) = 0 + +// Establish a connection without timestamps. ++0 < S 0:0(0) win 32792 ++0 > S. 0:0(0) ack 1 <...> ++0 < P. 1:1(0) ack 1 win 257 + ++0.100 accept(3, ..., ...) = 4 +// set FIN_WAIT2 timeout to 1 seconds. ++0.100 setsockopt(4, SOL_TCP, TCP_LINGER2, [1], 4) = 0 ++0 close(4) = 0 + ++0 > F. 1:1(0) ack 1 <...> ++0 < . 1:1(0) ack 2 win 257 + ++1.1 < . 1:1(0) ack 2 win 257 ++0 > R 2:2(0) win 0 diff --git a/test/packetdrill/packetdrill_setup.sh b/test/packetdrill/packetdrill_setup.sh new file mode 100755 index 000000000..b858072f0 --- /dev/null +++ b/test/packetdrill/packetdrill_setup.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Copyright 2018 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script runs both within the sentry context and natively. It should tweak +# TCP parameters to match expectations found in the script files. +sysctl -q net.ipv4.tcp_sack=1 +sysctl -q net.ipv4.tcp_rmem="4096 2097152 $((8*1024*1024))" +sysctl -q net.ipv4.tcp_wmem="4096 2097152 $((8*1024*1024))" + +# There may be errors from the above, but they will show up in the test logs and +# we always want to proceed from this point. It's possible that values were +# already set correctly and the nodes were not available in the namespace. +exit 0 diff --git a/test/packetdrill/packetdrill_test.sh b/test/packetdrill/packetdrill_test.sh new file mode 100755 index 000000000..614d94d74 --- /dev/null +++ b/test/packetdrill/packetdrill_test.sh @@ -0,0 +1,213 @@ +#!/bin/bash + +# Copyright 2020 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Run a packetdrill test. Two docker containers are made, one for the +# Device-Under-Test (DUT) and one for the test runner. Each is attached with +# two networks, one for control packets that aid the test and one for test +# packets which are sent as part of the test and observed for correctness. + +set -euxo pipefail + +function failure() { + local lineno=$1 + local msg=$2 + local filename="$0" + echo "FAIL: $filename:$lineno: $msg" +} +trap 'failure ${LINENO} "$BASH_COMMAND"' ERR + +declare -r LONGOPTS="dut_platform:,init_script:" + +# Don't use declare below so that the error from getopt will end the script. +PARSED=$(getopt --options "" --longoptions=$LONGOPTS --name "$0" -- "$@") + +eval set -- "$PARSED" + +while true; do + case "$1" in + --dut_platform) + declare -r DUT_PLATFORM="$2" + shift 2 + ;; + --init_script) + declare -r INIT_SCRIPT="$2" + shift 2 + ;; + --) + shift + break + ;; + *) + echo "Programming error" + exit 3 + esac +done + +# All the other arguments are scripts. +declare -r scripts="$@" + +# Check that the required flags are defined in a way that is safe for "set -u". +if [[ "${DUT_PLATFORM-}" == "netstack" ]]; then + declare -r RUNTIME="--runtime runsc-d" +elif [[ "${DUT_PLATFORM-}" == "linux" ]]; then + declare -r RUNTIME="" +else + echo "FAIL: Bad or missing --dut_platform argument: ${DUT_PLATFORM-}" + exit 2 +fi +if [[ ! -x "${INIT_SCRIPT-}" ]]; then + echo "FAIL: Bad or missing --init_script: ${INIT_SCRIPT-}" + exit 2 +fi + +# Variables specific to the control network and interface start with CTRL_. +# Variables specific to the test network and interface start with TEST_. +# Variables specific to the DUT start with DUT_. +# Variables specific to the test runner start with TEST_RUNNER_. +declare -r PACKETDRILL="/packetdrill/gtests/net/packetdrill/packetdrill" +# Use random numbers so that test networks don't collide. +declare -r CTRL_NET="ctrl_net-${RANDOM}${RANDOM}" +declare -r TEST_NET="test_net-${RANDOM}${RANDOM}" +declare -r tolerance_usecs=100000 +# On both DUT and test runner, testing packets are on the eth2 interface. +declare -r TEST_DEVICE="eth2" +# Number of bits in the *_NET_PREFIX variables. +declare -r NET_MASK="24" +function new_net_prefix() { + # Class C, 192.0.0.0 to 223.255.255.255, transitionally has mask 24. + echo "$(shuf -i 192-223 -n 1).$(shuf -i 0-255 -n 1).$(shuf -i 0-255 -n 1)" +} +# Last bits of the DUT's IP address. +declare -r DUT_NET_SUFFIX=".10" +# Control port. +declare -r CTRL_PORT="40000" +# Last bits of the test runner's IP address. +declare -r TEST_RUNNER_NET_SUFFIX=".20" +declare -r TIMEOUT="60" +declare -r IMAGE_TAG="gcr.io/gvisor-presubmit/packetdrill" + +# Make sure that docker is installed. +docker --version + +function finish { + local cleanup_success=1 + for net in "${CTRL_NET}" "${TEST_NET}"; do + # Kill all processes attached to ${net}. + for docker_command in "kill" "rm"; do + (docker network inspect "${net}" \ + --format '{{range $key, $value := .Containers}}{{$key}} {{end}}' \ + | xargs -r docker "${docker_command}") || \ + cleanup_success=0 + done + # Remove the network. + docker network rm "${net}" || \ + cleanup_success=0 + done + + if ((!$cleanup_success)); then + echo "FAIL: Cleanup command failed" + exit 4 + fi +} +trap finish EXIT + +# Subnet for control packets between test runner and DUT. +declare CTRL_NET_PREFIX=$(new_net_prefix) +while ! docker network create \ + "--subnet=${CTRL_NET_PREFIX}.0/${NET_MASK}" "${CTRL_NET}"; do + sleep 0.1 + declare CTRL_NET_PREFIX=$(new_net_prefix) +done + +# Subnet for the packets that are part of the test. +declare TEST_NET_PREFIX=$(new_net_prefix) +while ! docker network create \ + "--subnet=${TEST_NET_PREFIX}.0/${NET_MASK}" "${TEST_NET}"; do + sleep 0.1 + declare TEST_NET_PREFIX=$(new_net_prefix) +done + +docker pull "${IMAGE_TAG}" + +# Create the DUT container and connect to network. +DUT=$(docker create ${RUNTIME} --privileged --rm \ + --stop-timeout ${TIMEOUT} -it ${IMAGE_TAG}) +docker network connect "${CTRL_NET}" \ + --ip "${CTRL_NET_PREFIX}${DUT_NET_SUFFIX}" "${DUT}" \ + || (docker kill ${DUT}; docker rm ${DUT}; false) +docker network connect "${TEST_NET}" \ + --ip "${TEST_NET_PREFIX}${DUT_NET_SUFFIX}" "${DUT}" \ + || (docker kill ${DUT}; docker rm ${DUT}; false) +docker start "${DUT}" + +# Create the test runner container and connect to network. +TEST_RUNNER=$(docker create --privileged --rm \ + --stop-timeout ${TIMEOUT} -it ${IMAGE_TAG}) +docker network connect "${CTRL_NET}" \ + --ip "${CTRL_NET_PREFIX}${TEST_RUNNER_NET_SUFFIX}" "${TEST_RUNNER}" \ + || (docker kill ${TEST_RUNNER}; docker rm ${REST_RUNNER}; false) +docker network connect "${TEST_NET}" \ + --ip "${TEST_NET_PREFIX}${TEST_RUNNER_NET_SUFFIX}" "${TEST_RUNNER}" \ + || (docker kill ${TEST_RUNNER}; docker rm ${REST_RUNNER}; false) +docker start "${TEST_RUNNER}" + +# Run tcpdump in the test runner unbuffered, without dns resolution, just on the +# interface with the test packets. +docker exec -t ${TEST_RUNNER} tcpdump -U -n -i "${TEST_DEVICE}" & + +# Start a packetdrill server on the test_runner. The packetdrill server sends +# packets and asserts that they are received. +docker exec -d "${TEST_RUNNER}" \ + ${PACKETDRILL} --wire_server --wire_server_dev="${TEST_DEVICE}" \ + --wire_server_ip="${CTRL_NET_PREFIX}${TEST_RUNNER_NET_SUFFIX}" \ + --wire_server_port="${CTRL_PORT}" \ + --local_ip="${TEST_NET_PREFIX}${TEST_RUNNER_NET_SUFFIX}" \ + --remote_ip="${TEST_NET_PREFIX}${DUT_NET_SUFFIX}" + +# Because the Linux kernel receives the SYN-ACK but didn't send the SYN it will +# issue a RST. To prevent this IPtables can be used to filter those out. +docker exec "${TEST_RUNNER}" \ + iptables -A OUTPUT -p tcp --tcp-flags RST RST -j DROP + +# Wait for the packetdrill server on the test runner to come. Attempt to +# connect to it from the DUT every 100 milliseconds until success. +while ! docker exec "${DUT}" \ + nc -zv "${CTRL_NET_PREFIX}${TEST_RUNNER_NET_SUFFIX}" "${CTRL_PORT}"; do + sleep 0.1 +done + +# Copy the packetdrill setup script to the DUT. +docker cp -L "${INIT_SCRIPT}" "${DUT}:packetdrill_setup.sh" + +# Copy the packetdrill scripts to the DUT. +declare -a dut_scripts +for script in $scripts; do + docker cp -L "${script}" "${DUT}:$(basename ${script})" + dut_scripts+=("/$(basename ${script})") +done + +# Start a packetdrill client on the DUT. The packetdrill client runs POSIX +# socket commands and also sends instructions to the server. +docker exec -t "${DUT}" \ + ${PACKETDRILL} --wire_client --wire_client_dev="${TEST_DEVICE}" \ + --wire_server_ip="${CTRL_NET_PREFIX}${TEST_RUNNER_NET_SUFFIX}" \ + --wire_server_port="${CTRL_PORT}" \ + --local_ip="${TEST_NET_PREFIX}${DUT_NET_SUFFIX}" \ + --remote_ip="${TEST_NET_PREFIX}${TEST_RUNNER_NET_SUFFIX}" \ + --init_scripts=/packetdrill_setup.sh \ + --tolerance_usecs="${tolerance_usecs}" "${dut_scripts[@]}" + +echo PASS: No errors. -- cgit v1.2.3 From e7846e50f2df070a15dd33235b334e2223f715f3 Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Mon, 3 Feb 2020 15:30:28 -0800 Subject: Reduce run time for //test/syscalls:socket_inet_loopback_test_runsc_ptrace. * Tests are picked for a shard differently. It now picks one test from each block, instead of picking the whole block. This makes the same kind of tests spreads across different shards. * Reduce the number of connect() calls in TCPListenClose. PiperOrigin-RevId: 293019281 --- runsc/testutil/testutil.go | 61 ++++++++++++++--------------- test/runtimes/runner.go | 9 ++--- test/syscalls/linux/socket_inet_loopback.cc | 13 +++--- test/syscalls/syscall_test_runner.go | 7 ++-- 4 files changed, 43 insertions(+), 47 deletions(-) (limited to 'test') diff --git a/runsc/testutil/testutil.go b/runsc/testutil/testutil.go index fb22eae39..5d0b0ae54 100644 --- a/runsc/testutil/testutil.go +++ b/runsc/testutil/testutil.go @@ -434,43 +434,40 @@ func IsStatic(filename string) (bool, error) { return true, nil } -// TestBoundsForShard calculates the beginning and end indices for the test -// based on the TEST_SHARD_INDEX and TEST_TOTAL_SHARDS environment vars. The -// returned ints are the beginning (inclusive) and end (exclusive) of the -// subslice corresponding to the shard. If either of the env vars are not -// present, then the function will return bounds that include all tests. If -// there are more shards than there are tests, then the returned list may be -// empty. -func TestBoundsForShard(numTests int) (int, int, error) { +// TestIndicesForShard returns indices for this test shard based on the +// TEST_SHARD_INDEX and TEST_TOTAL_SHARDS environment vars. +// +// If either of the env vars are not present, then the function will return all +// tests. If there are more shards than there are tests, then the returned list +// may be empty. +func TestIndicesForShard(numTests int) ([]int, error) { var ( - begin = 0 - end = numTests + shardIndex = 0 + shardTotal = 1 ) - indexStr, totalStr := os.Getenv("TEST_SHARD_INDEX"), os.Getenv("TEST_TOTAL_SHARDS") - if indexStr == "" || totalStr == "" { - return begin, end, nil - } - // Parse index and total to ints. - shardIndex, err := strconv.Atoi(indexStr) - if err != nil { - return 0, 0, fmt.Errorf("invalid TEST_SHARD_INDEX %q: %v", indexStr, err) - } - shardTotal, err := strconv.Atoi(totalStr) - if err != nil { - return 0, 0, fmt.Errorf("invalid TEST_TOTAL_SHARDS %q: %v", totalStr, err) + indexStr, totalStr := os.Getenv("TEST_SHARD_INDEX"), os.Getenv("TEST_TOTAL_SHARDS") + if indexStr != "" && totalStr != "" { + // Parse index and total to ints. + var err error + shardIndex, err = strconv.Atoi(indexStr) + if err != nil { + return nil, fmt.Errorf("invalid TEST_SHARD_INDEX %q: %v", indexStr, err) + } + shardTotal, err = strconv.Atoi(totalStr) + if err != nil { + return nil, fmt.Errorf("invalid TEST_TOTAL_SHARDS %q: %v", totalStr, err) + } } // Calculate! - shardSize := int(math.Ceil(float64(numTests) / float64(shardTotal))) - begin = shardIndex * shardSize - end = ((shardIndex + 1) * shardSize) - if begin > numTests { - // Nothing to run. - return 0, 0, nil - } - if end > numTests { - end = numTests + var indices []int + numBlocks := int(math.Ceil(float64(numTests) / float64(shardTotal))) + for i := 0; i < numBlocks; i++ { + pick := i*shardTotal + shardIndex + if pick < numTests { + indices = append(indices, pick) + } } - return begin, end, nil + return indices, nil } diff --git a/test/runtimes/runner.go b/test/runtimes/runner.go index bec37c69d..ddb890dbc 100644 --- a/test/runtimes/runner.go +++ b/test/runtimes/runner.go @@ -20,7 +20,6 @@ import ( "flag" "fmt" "io" - "log" "os" "sort" "strings" @@ -101,17 +100,15 @@ func getTests(d dockerutil.Docker, blacklist map[string]struct{}) ([]testing.Int // shard. tests := strings.Fields(list) sort.Strings(tests) - begin, end, err := testutil.TestBoundsForShard(len(tests)) + indices, err := testutil.TestIndicesForShard(len(tests)) if err != nil { return nil, fmt.Errorf("TestsForShard() failed: %v", err) } - log.Printf("Got bounds [%d:%d) for shard out of %d total tests", begin, end, len(tests)) - tests = tests[begin:end] var itests []testing.InternalTest - for _, tc := range tests { + for _, tci := range indices { // Capture tc in this scope. - tc := tc + tc := tests[tci] itests = append(itests, testing.InternalTest{ Name: tc, F: func(t *testing.T) { diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 3bf7081b9..b24618a88 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -325,6 +325,12 @@ TEST_P(SocketInetLoopbackTest, TCPListenClose) { TestAddress const& listener = param.listener; TestAddress const& connector = param.connector; + constexpr int kAcceptCount = 32; + constexpr int kBacklog = kAcceptCount * 2; + constexpr int kFDs = 128; + constexpr int kThreadCount = 4; + constexpr int kFDsPerThread = kFDs / kThreadCount; + // Create the listening socket. FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); @@ -332,7 +338,7 @@ TEST_P(SocketInetLoopbackTest, TCPListenClose) { ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast(&listen_addr), listener.addr_len), SyscallSucceeds()); - ASSERT_THAT(listen(listen_fd.get(), 1001), SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds()); // Get the port bound by the listening socket. socklen_t addrlen = listener.addr_len; @@ -345,9 +351,6 @@ TEST_P(SocketInetLoopbackTest, TCPListenClose) { DisableSave ds; // Too many system calls. sockaddr_storage conn_addr = connector.addr; ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); - constexpr int kFDs = 2048; - constexpr int kThreadCount = 4; - constexpr int kFDsPerThread = kFDs / kThreadCount; FileDescriptor clients[kFDs]; std::unique_ptr threads[kThreadCount]; for (int i = 0; i < kFDs; i++) { @@ -371,7 +374,7 @@ TEST_P(SocketInetLoopbackTest, TCPListenClose) { for (int i = 0; i < kThreadCount; i++) { threads[i]->Join(); } - for (int i = 0; i < 32; i++) { + for (int i = 0; i < kAcceptCount; i++) { auto accepted = ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); } diff --git a/test/syscalls/syscall_test_runner.go b/test/syscalls/syscall_test_runner.go index b9fd885ff..ae342b68c 100644 --- a/test/syscalls/syscall_test_runner.go +++ b/test/syscalls/syscall_test_runner.go @@ -450,17 +450,16 @@ func main() { } // Get subset of tests corresponding to shard. - begin, end, err := testutil.TestBoundsForShard(len(testCases)) + indices, err := testutil.TestIndicesForShard(len(testCases)) if err != nil { fatalf("TestsForShard() failed: %v", err) } - testCases = testCases[begin:end] // Run the tests. var tests []testing.InternalTest - for _, tc := range testCases { + for _, tci := range indices { // Capture tc. - tc := tc + tc := testCases[tci] testName := fmt.Sprintf("%s_%s", tc.Suite, tc.Name) tests = append(tests, testing.InternalTest{ Name: testName, -- cgit v1.2.3 From d7cd484091543827678f1548b8e5668a7a86e13f Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Tue, 4 Feb 2020 08:20:10 -0800 Subject: Add support for sentry internal pipe for gofer mounts Internal pipes are supported similarly to how internal UDS is done. It is also controlled by the same flag. Fixes #1102 PiperOrigin-RevId: 293150045 --- pkg/sentry/fs/gofer/BUILD | 2 + pkg/sentry/fs/gofer/cache_policy.go | 3 + pkg/sentry/fs/gofer/fifo.go | 40 ++++++++ pkg/sentry/fs/gofer/gofer_test.go | 2 +- pkg/sentry/fs/gofer/path.go | 183 +++++++++++++++++++++++------------ pkg/sentry/fs/gofer/session.go | 183 ++++++++++++++++++++++------------- pkg/sentry/fs/gofer/session_state.go | 12 +-- pkg/sentry/fs/gofer/socket.go | 8 +- test/syscalls/BUILD | 1 - 9 files changed, 293 insertions(+), 141 deletions(-) create mode 100644 pkg/sentry/fs/gofer/fifo.go (limited to 'test') diff --git a/pkg/sentry/fs/gofer/BUILD b/pkg/sentry/fs/gofer/BUILD index 971d3718e..fea135eea 100644 --- a/pkg/sentry/fs/gofer/BUILD +++ b/pkg/sentry/fs/gofer/BUILD @@ -9,6 +9,7 @@ go_library( "cache_policy.go", "context_file.go", "device.go", + "fifo.go", "file.go", "file_state.go", "fs.go", @@ -38,6 +39,7 @@ go_library( "//pkg/sentry/fs/fsutil", "//pkg/sentry/fs/host", "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/pipe", "//pkg/sentry/kernel/time", "//pkg/sentry/memmap", "//pkg/sentry/socket/unix/transport", diff --git a/pkg/sentry/fs/gofer/cache_policy.go b/pkg/sentry/fs/gofer/cache_policy.go index ebea03c42..07a564e92 100644 --- a/pkg/sentry/fs/gofer/cache_policy.go +++ b/pkg/sentry/fs/gofer/cache_policy.go @@ -127,6 +127,9 @@ func (cp cachePolicy) revalidate(ctx context.Context, name string, parent, child childIops, ok := child.InodeOperations.(*inodeOperations) if !ok { + if _, ok := child.InodeOperations.(*fifo); ok { + return false + } panic(fmt.Sprintf("revalidating inode operations of unknown type %T", child.InodeOperations)) } parentIops, ok := parent.InodeOperations.(*inodeOperations) diff --git a/pkg/sentry/fs/gofer/fifo.go b/pkg/sentry/fs/gofer/fifo.go new file mode 100644 index 000000000..456557058 --- /dev/null +++ b/pkg/sentry/fs/gofer/fifo.go @@ -0,0 +1,40 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fs" +) + +// +stateify savable +type fifo struct { + fs.InodeOperations + fileIops *inodeOperations +} + +var _ fs.InodeOperations = (*fifo)(nil) + +// Rename implements fs.InodeOperations. It forwards the call to the underlying +// file inode to handle the file rename. Note that file key remains the same +// after the rename to keep the endpoint mapping. +func (i *fifo) Rename(ctx context.Context, inode *fs.Inode, oldParent *fs.Inode, oldName string, newParent *fs.Inode, newName string, replacement bool) error { + return i.fileIops.Rename(ctx, inode, oldParent, oldName, newParent, newName, replacement) +} + +// StatFS implements fs.InodeOperations. +func (i *fifo) StatFS(ctx context.Context) (fs.Info, error) { + return i.fileIops.StatFS(ctx) +} diff --git a/pkg/sentry/fs/gofer/gofer_test.go b/pkg/sentry/fs/gofer/gofer_test.go index 0c2f89ae8..2df2fe889 100644 --- a/pkg/sentry/fs/gofer/gofer_test.go +++ b/pkg/sentry/fs/gofer/gofer_test.go @@ -61,7 +61,7 @@ func rootTest(t *testing.T, name string, cp cachePolicy, fn func(context.Context ctx := contexttest.Context(t) sattr, rootInodeOperations := newInodeOperations(ctx, s, contextFile{ file: rootFile, - }, root.QID, p9.AttrMaskAll(), root.Attr, false /* socket */) + }, root.QID, p9.AttrMaskAll(), root.Attr) m := fs.NewMountSource(ctx, s, &filesystem{}, fs.MountSourceFlags{}) rootInode := fs.NewInode(ctx, rootInodeOperations, m, sattr) diff --git a/pkg/sentry/fs/gofer/path.go b/pkg/sentry/fs/gofer/path.go index 0c1be05ef..a35c3a23d 100644 --- a/pkg/sentry/fs/gofer/path.go +++ b/pkg/sentry/fs/gofer/path.go @@ -23,14 +23,24 @@ import ( "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // maxFilenameLen is the maximum length of a filename. This is dictated by 9P's // encoding of strings, which uses 2 bytes for the length prefix. const maxFilenameLen = (1 << 16) - 1 +func changeType(mode p9.FileMode, newType p9.FileMode) p9.FileMode { + if newType&^p9.FileModeMask != 0 { + panic(fmt.Sprintf("newType contained more bits than just file mode: %x", newType)) + } + clear := mode &^ p9.FileModeMask + return clear | newType +} + // Lookup loads an Inode at name into a Dirent based on the session's cache // policy. func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dirent, error) { @@ -69,8 +79,25 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string return nil, err } + if i.session().overrides != nil { + // Check if file belongs to a internal named pipe. Note that it doesn't need + // to check for sockets because it's done in newInodeOperations below. + deviceKey := device.MultiDeviceKey{ + Device: p9attr.RDev, + SecondaryDevice: i.session().connID, + Inode: qids[0].Path, + } + unlock := i.session().overrides.lock() + if pipeInode := i.session().overrides.getPipe(deviceKey); pipeInode != nil { + unlock() + pipeInode.IncRef() + return fs.NewDirent(ctx, pipeInode, name), nil + } + unlock() + } + // Construct the Inode operations. - sattr, node := newInodeOperations(ctx, i.fileState.s, newFile, qids[0], mask, p9attr, false) + sattr, node := newInodeOperations(ctx, i.fileState.s, newFile, qids[0], mask, p9attr) // Construct a positive Dirent. return fs.NewDirent(ctx, fs.NewInode(ctx, node, dir.MountSource, sattr), name), nil @@ -138,7 +165,7 @@ func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string qid := qids[0] // Construct the InodeOperations. - sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, p9attr, false) + sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, p9attr) // Construct the positive Dirent. d := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name) @@ -223,82 +250,115 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, return nil, syserror.ENAMETOOLONG } - if i.session().endpoints == nil { + if i.session().overrides == nil { return nil, syscall.EOPNOTSUPP } - // Create replaces the directory fid with the newly created/opened - // file, so clone this directory so it doesn't change out from under - // this node. - _, newFile, err := i.fileState.file.walk(ctx, nil) + // Stabilize the override map while creation is in progress. + unlock := i.session().overrides.lock() + defer unlock() + + sattr, iops, err := i.createEndpointFile(ctx, dir, name, perm, p9.ModeSocket) if err != nil { return nil, err } - // We're not going to use newFile after return. - defer newFile.close(ctx) - // Stabilize the endpoint map while creation is in progress. - unlock := i.session().endpoints.lock() - defer unlock() + // Construct the positive Dirent. + childDir := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name) + i.session().overrides.addBoundEndpoint(iops.fileState.key, childDir, ep) + return childDir, nil +} - // Create a regular file in the gofer and then mark it as a socket by - // adding this inode key in the 'endpoints' map. - owner := fs.FileOwnerFromContext(ctx) - hostFile, err := newFile.create(ctx, name, p9.ReadWrite, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID)) - if err != nil { - return nil, err +// CreateFifo implements fs.InodeOperations.CreateFifo. +func (i *inodeOperations) CreateFifo(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error { + if len(name) > maxFilenameLen { + return syserror.ENAMETOOLONG } - // We're not going to use this file. - hostFile.Close() - i.touchModificationAndStatusChangeTime(ctx, dir) + owner := fs.FileOwnerFromContext(ctx) + mode := p9.FileMode(perm.LinuxMode()) | p9.ModeNamedPipe - // Get the attributes of the file to create inode key. - qid, mask, attr, err := getattr(ctx, newFile) - if err != nil { - return nil, err + // N.B. FIFOs use major/minor numbers 0. + if _, err := i.fileState.file.mknod(ctx, name, mode, 0, 0, p9.UID(owner.UID), p9.GID(owner.GID)); err != nil { + if i.session().overrides == nil || err != syscall.EPERM { + return err + } + // If gofer doesn't support mknod, check if we can create an internal fifo. + return i.createInternalFifo(ctx, dir, name, owner, perm) } - key := device.MultiDeviceKey{ - Device: attr.RDev, - SecondaryDevice: i.session().connID, - Inode: qid.Path, + i.touchModificationAndStatusChangeTime(ctx, dir) + return nil +} + +func (i *inodeOperations) createInternalFifo(ctx context.Context, dir *fs.Inode, name string, owner fs.FileOwner, perm fs.FilePermissions) error { + if i.session().overrides == nil { + return syserror.EPERM } - // Create child dirent. + // Stabilize the override map while creation is in progress. + unlock := i.session().overrides.lock() + defer unlock() - // Get an unopened p9.File for the file we created so that it can be - // cloned and re-opened multiple times after creation. - _, unopened, err := i.fileState.file.walk(ctx, []string{name}) + sattr, fileOps, err := i.createEndpointFile(ctx, dir, name, perm, p9.ModeNamedPipe) if err != nil { - return nil, err + return err } - // Construct the InodeOperations. - sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, attr, true) + // First create a pipe. + p := pipe.NewPipe(true /* isNamed */, pipe.DefaultPipeSize, usermem.PageSize) + + // Wrap the fileOps with our Fifo. + iops := &fifo{ + InodeOperations: pipe.NewInodeOperations(ctx, perm, p), + fileIops: fileOps, + } + inode := fs.NewInode(ctx, iops, dir.MountSource, sattr) // Construct the positive Dirent. childDir := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name) - i.session().endpoints.add(key, childDir, ep) - return childDir, nil + i.session().overrides.addPipe(fileOps.fileState.key, childDir, inode) + return nil } -// CreateFifo implements fs.InodeOperations.CreateFifo. -func (i *inodeOperations) CreateFifo(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error { - if len(name) > maxFilenameLen { - return syserror.ENAMETOOLONG +// Caller must hold Session.endpoint lock. +func (i *inodeOperations) createEndpointFile(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions, fileType p9.FileMode) (fs.StableAttr, *inodeOperations, error) { + _, dirClone, err := i.fileState.file.walk(ctx, nil) + if err != nil { + return fs.StableAttr{}, nil, err } + // We're not going to use dirClone after return. + defer dirClone.close(ctx) + // Create a regular file in the gofer and then mark it as a socket by + // adding this inode key in the 'overrides' map. owner := fs.FileOwnerFromContext(ctx) - mode := p9.FileMode(perm.LinuxMode()) | p9.ModeNamedPipe - - // N.B. FIFOs use major/minor numbers 0. - if _, err := i.fileState.file.mknod(ctx, name, mode, 0, 0, p9.UID(owner.UID), p9.GID(owner.GID)); err != nil { - return err + hostFile, err := dirClone.create(ctx, name, p9.ReadWrite, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID)) + if err != nil { + return fs.StableAttr{}, nil, err } + // We're not going to use this file. + hostFile.Close() i.touchModificationAndStatusChangeTime(ctx, dir) - return nil + + // Get the attributes of the file to create inode key. + qid, mask, attr, err := getattr(ctx, dirClone) + if err != nil { + return fs.StableAttr{}, nil, err + } + + // Get an unopened p9.File for the file we created so that it can be + // cloned and re-opened multiple times after creation. + _, unopened, err := i.fileState.file.walk(ctx, []string{name}) + if err != nil { + return fs.StableAttr{}, nil, err + } + + // Construct new inode with file type overridden. + attr.Mode = changeType(attr.Mode, fileType) + sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, attr) + return sattr, iops, nil } // Remove implements InodeOperations.Remove. @@ -307,20 +367,23 @@ func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string return syserror.ENAMETOOLONG } - var key device.MultiDeviceKey - removeSocket := false - if i.session().endpoints != nil { - // Find out if file being deleted is a socket that needs to be + var key *device.MultiDeviceKey + if i.session().overrides != nil { + // Find out if file being deleted is a socket or pipe that needs to be // removed from endpoint map. if d, err := i.Lookup(ctx, dir, name); err == nil { defer d.DecRef() - if fs.IsSocket(d.Inode.StableAttr) { - child := d.Inode.InodeOperations.(*inodeOperations) - key = child.fileState.key - removeSocket = true - // Stabilize the endpoint map while deletion is in progress. - unlock := i.session().endpoints.lock() + if fs.IsSocket(d.Inode.StableAttr) || fs.IsPipe(d.Inode.StableAttr) { + switch iops := d.Inode.InodeOperations.(type) { + case *inodeOperations: + key = &iops.fileState.key + case *fifo: + key = &iops.fileIops.fileState.key + } + + // Stabilize the override map while deletion is in progress. + unlock := i.session().overrides.lock() defer unlock() } } @@ -329,8 +392,8 @@ func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string if err := i.fileState.file.unlinkAt(ctx, name, 0); err != nil { return err } - if removeSocket { - i.session().endpoints.remove(key) + if key != nil { + i.session().overrides.remove(*key) } i.touchModificationAndStatusChangeTime(ctx, dir) diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go index 498c4645a..f6b3ef178 100644 --- a/pkg/sentry/fs/gofer/session.go +++ b/pkg/sentry/fs/gofer/session.go @@ -33,60 +33,107 @@ import ( var DefaultDirentCacheSize uint64 = fs.DefaultDirentCacheSize // +stateify savable -type endpointMaps struct { - // mu protexts the direntMap, the keyMap, and the pathMap below. - mu sync.RWMutex `state:"nosave"` +type overrideInfo struct { + dirent *fs.Dirent + + // endpoint is set when dirent points to a socket. inode must not be set. + endpoint transport.BoundEndpoint + + // inode is set when dirent points to a pipe. endpoint must not be set. + inode *fs.Inode +} - // direntMap links sockets to their dirents. - // It is filled concurrently with the keyMap and is stored upon save. - // Before saving, this map is used to populate the pathMap. - direntMap map[transport.BoundEndpoint]*fs.Dirent +func (l *overrideInfo) inodeType() fs.InodeType { + switch { + case l.endpoint != nil: + return fs.Socket + case l.inode != nil: + return fs.Pipe + } + panic("endpoint or node must be set") +} - // keyMap links MultiDeviceKeys (containing inode IDs) to their sockets. +// +stateify savable +type overrideMaps struct { + // mu protexts the keyMap, and the pathMap below. + mu sync.RWMutex `state:"nosave"` + + // keyMap links MultiDeviceKeys (containing inode IDs) to their sockets/pipes. // It is not stored during save because the inode ID may change upon restore. - keyMap map[device.MultiDeviceKey]transport.BoundEndpoint `state:"nosave"` + keyMap map[device.MultiDeviceKey]*overrideInfo `state:"nosave"` - // pathMap links the sockets to their paths. + // pathMap links the sockets/pipes to their paths. // It is filled before saving from the direntMap and is stored upon save. // Upon restore, this map is used to re-populate the keyMap. - pathMap map[transport.BoundEndpoint]string + pathMap map[*overrideInfo]string +} + +// addBoundEndpoint adds the bound endpoint to the map. +// A reference is taken on the dirent argument. +// +// Precondition: maps must have been locked with 'lock'. +func (e *overrideMaps) addBoundEndpoint(key device.MultiDeviceKey, d *fs.Dirent, ep transport.BoundEndpoint) { + d.IncRef() + e.keyMap[key] = &overrideInfo{dirent: d, endpoint: ep} } -// add adds the endpoint to the maps. +// addPipe adds the pipe inode to the map. // A reference is taken on the dirent argument. // // Precondition: maps must have been locked with 'lock'. -func (e *endpointMaps) add(key device.MultiDeviceKey, d *fs.Dirent, ep transport.BoundEndpoint) { - e.keyMap[key] = ep +func (e *overrideMaps) addPipe(key device.MultiDeviceKey, d *fs.Dirent, inode *fs.Inode) { d.IncRef() - e.direntMap[ep] = d + e.keyMap[key] = &overrideInfo{dirent: d, inode: inode} } // remove deletes the key from the maps. // // Precondition: maps must have been locked with 'lock'. -func (e *endpointMaps) remove(key device.MultiDeviceKey) { - endpoint := e.get(key) +func (e *overrideMaps) remove(key device.MultiDeviceKey) { + endpoint := e.keyMap[key] delete(e.keyMap, key) - - d := e.direntMap[endpoint] - d.DecRef() - delete(e.direntMap, endpoint) + endpoint.dirent.DecRef() } // lock blocks other addition and removal operations from happening while // the backing file is being created or deleted. Returns a function that unlocks // the endpoint map. -func (e *endpointMaps) lock() func() { +func (e *overrideMaps) lock() func() { e.mu.Lock() return func() { e.mu.Unlock() } } -// get returns the endpoint mapped to the given key. +// getBoundEndpoint returns the bound endpoint mapped to the given key. // -// Precondition: maps must have been locked for reading. -func (e *endpointMaps) get(key device.MultiDeviceKey) transport.BoundEndpoint { - return e.keyMap[key] +// Precondition: maps must have been locked. +func (e *overrideMaps) getBoundEndpoint(key device.MultiDeviceKey) transport.BoundEndpoint { + if v := e.keyMap[key]; v != nil { + return v.endpoint + } + return nil +} + +// getPipe returns the pipe inode mapped to the given key. +// +// Precondition: maps must have been locked. +func (e *overrideMaps) getPipe(key device.MultiDeviceKey) *fs.Inode { + if v := e.keyMap[key]; v != nil { + return v.inode + } + return nil +} + +// getType returns the inode type if there is a corresponding endpoint for the +// given key. Returns false otherwise. +func (e *overrideMaps) getType(key device.MultiDeviceKey) (fs.InodeType, bool) { + e.mu.Lock() + v := e.keyMap[key] + e.mu.Unlock() + + if v != nil { + return v.inodeType(), true + } + return 0, false } // session holds state for each 9p session established during sys_mount. @@ -137,16 +184,16 @@ type session struct { // mounter is the EUID/EGID that mounted this file system. mounter fs.FileOwner `state:"wait"` - // endpoints is used to map inodes that represent socket files to their - // corresponding endpoint. Socket files are created as regular files in the - // gofer and their presence in this map indicate that they should indeed be - // socket files. This allows unix domain sockets to be used with paths that - // belong to a gofer. + // overrides is used to map inodes that represent socket/pipes files to their + // corresponding endpoint/iops. These files are created as regular files in + // the gofer and their presence in this map indicate that they should indeed + // be socket/pipe files. This allows unix domain sockets and named pipes to + // be used with paths that belong to a gofer. // // TODO(gvisor.dev/issue/1200): there are few possible races with someone // stat'ing the file and another deleting it concurrently, where the file // will not be reported as socket file. - endpoints *endpointMaps `state:"wait"` + overrides *overrideMaps `state:"wait"` } // Destroy tears down the session. @@ -179,15 +226,21 @@ func (s *session) SaveInodeMapping(inode *fs.Inode, path string) { // This is very unintuitive. We *CANNOT* trust the inode's StableAttrs, // because overlay copyUp may have changed them out from under us. // So much for "immutable". - sattr := inode.InodeOperations.(*inodeOperations).fileState.sattr - s.inodeMappings[sattr.InodeID] = path + switch iops := inode.InodeOperations.(type) { + case *inodeOperations: + s.inodeMappings[iops.fileState.sattr.InodeID] = path + case *fifo: + s.inodeMappings[iops.fileIops.fileState.sattr.InodeID] = path + default: + panic(fmt.Sprintf("Invalid type: %T", iops)) + } } -// newInodeOperations creates a new 9p fs.InodeOperations backed by a p9.File and attributes -// (p9.QID, p9.AttrMask, p9.Attr). +// newInodeOperations creates a new 9p fs.InodeOperations backed by a p9.File +// and attributes (p9.QID, p9.AttrMask, p9.Attr). // // Endpoints lock must not be held if socket == false. -func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p9.QID, valid p9.AttrMask, attr p9.Attr, socket bool) (fs.StableAttr, *inodeOperations) { +func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p9.QID, valid p9.AttrMask, attr p9.Attr) (fs.StableAttr, *inodeOperations) { deviceKey := device.MultiDeviceKey{ Device: attr.RDev, SecondaryDevice: s.connID, @@ -201,17 +254,11 @@ func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p BlockSize: bsize(attr), } - if s.endpoints != nil { - if socket { - sattr.Type = fs.Socket - } else { - // If unix sockets are allowed on this filesystem, check if this file is - // supposed to be a socket file. - unlock := s.endpoints.lock() - if s.endpoints.get(deviceKey) != nil { - sattr.Type = fs.Socket - } - unlock() + if s.overrides != nil && sattr.Type == fs.RegularFile { + // If overrides are allowed on this filesystem, check if this file is + // supposed to be of a different type, e.g. socket. + if t, ok := s.overrides.getType(deviceKey); ok { + sattr.Type = t } } @@ -267,7 +314,7 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF s.EnableLeakCheck("gofer.session") if o.privateunixsocket { - s.endpoints = newEndpointMaps() + s.overrides = newOverrideMaps() } // Construct the MountSource with the session and superBlockFlags. @@ -305,26 +352,24 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF return nil, err } - sattr, iops := newInodeOperations(ctx, &s, s.attach, qid, valid, attr, false) + sattr, iops := newInodeOperations(ctx, &s, s.attach, qid, valid, attr) return fs.NewInode(ctx, iops, m, sattr), nil } -// newEndpointMaps creates a new endpointMaps. -func newEndpointMaps() *endpointMaps { - return &endpointMaps{ - direntMap: make(map[transport.BoundEndpoint]*fs.Dirent), - keyMap: make(map[device.MultiDeviceKey]transport.BoundEndpoint), - pathMap: make(map[transport.BoundEndpoint]string), +// newOverrideMaps creates a new overrideMaps. +func newOverrideMaps() *overrideMaps { + return &overrideMaps{ + keyMap: make(map[device.MultiDeviceKey]*overrideInfo), + pathMap: make(map[*overrideInfo]string), } } -// fillKeyMap populates key and dirent maps upon restore from saved -// pathmap. +// fillKeyMap populates key and dirent maps upon restore from saved pathmap. func (s *session) fillKeyMap(ctx context.Context) error { - unlock := s.endpoints.lock() + unlock := s.overrides.lock() defer unlock() - for ep, dirPath := range s.endpoints.pathMap { + for ep, dirPath := range s.overrides.pathMap { _, file, err := s.attach.walk(ctx, splitAbsolutePath(dirPath)) if err != nil { return fmt.Errorf("error filling endpointmaps, failed to walk to %q: %v", dirPath, err) @@ -341,25 +386,25 @@ func (s *session) fillKeyMap(ctx context.Context) error { Inode: qid.Path, } - s.endpoints.keyMap[key] = ep + s.overrides.keyMap[key] = ep } return nil } -// fillPathMap populates paths for endpoints from dirents in direntMap +// fillPathMap populates paths for overrides from dirents in direntMap // before save. func (s *session) fillPathMap() error { - unlock := s.endpoints.lock() + unlock := s.overrides.lock() defer unlock() - for ep, dir := range s.endpoints.direntMap { - mountRoot := dir.MountRoot() + for _, endpoint := range s.overrides.keyMap { + mountRoot := endpoint.dirent.MountRoot() defer mountRoot.DecRef() - dirPath, _ := dir.FullName(mountRoot) + dirPath, _ := endpoint.dirent.FullName(mountRoot) if dirPath == "" { return fmt.Errorf("error getting path from dirent") } - s.endpoints.pathMap[ep] = dirPath + s.overrides.pathMap[endpoint] = dirPath } return nil } @@ -368,7 +413,7 @@ func (s *session) fillPathMap() error { func (s *session) restoreEndpointMaps(ctx context.Context) error { // When restoring, only need to create the keyMap because the dirent and path // maps got stored through the save. - s.endpoints.keyMap = make(map[device.MultiDeviceKey]transport.BoundEndpoint) + s.overrides.keyMap = make(map[device.MultiDeviceKey]*overrideInfo) if err := s.fillKeyMap(ctx); err != nil { return fmt.Errorf("failed to insert sockets into endpoint map: %v", err) } @@ -376,6 +421,6 @@ func (s *session) restoreEndpointMaps(ctx context.Context) error { // Re-create pathMap because it can no longer be trusted as socket paths can // change while process continues to run. Empty pathMap will be re-filled upon // next save. - s.endpoints.pathMap = make(map[transport.BoundEndpoint]string) + s.overrides.pathMap = make(map[*overrideInfo]string) return nil } diff --git a/pkg/sentry/fs/gofer/session_state.go b/pkg/sentry/fs/gofer/session_state.go index 0285c5361..111da59f9 100644 --- a/pkg/sentry/fs/gofer/session_state.go +++ b/pkg/sentry/fs/gofer/session_state.go @@ -25,9 +25,9 @@ import ( // beforeSave is invoked by stateify. func (s *session) beforeSave() { - if s.endpoints != nil { + if s.overrides != nil { if err := s.fillPathMap(); err != nil { - panic("failed to save paths to endpoint map before saving" + err.Error()) + panic("failed to save paths to override map before saving" + err.Error()) } } } @@ -74,10 +74,10 @@ func (s *session) afterLoad() { panic(fmt.Sprintf("new attach name %v, want %v", opts.aname, s.aname)) } - // Check if endpointMaps exist when uds sockets are enabled - // (only pathmap will actualy have been saved). - if opts.privateunixsocket != (s.endpoints != nil) { - panic(fmt.Sprintf("new privateunixsocket option %v, want %v", opts.privateunixsocket, s.endpoints != nil)) + // Check if overrideMaps exist when uds sockets are enabled (only pathmaps + // will actually have been saved). + if opts.privateunixsocket != (s.overrides != nil) { + panic(fmt.Sprintf("new privateunixsocket option %v, want %v", opts.privateunixsocket, s.overrides != nil)) } if args.Flags != s.superBlockFlags { panic(fmt.Sprintf("new mount flags %v, want %v", args.Flags, s.superBlockFlags)) diff --git a/pkg/sentry/fs/gofer/socket.go b/pkg/sentry/fs/gofer/socket.go index 376cfce2c..10ba2f5f0 100644 --- a/pkg/sentry/fs/gofer/socket.go +++ b/pkg/sentry/fs/gofer/socket.go @@ -32,15 +32,15 @@ func (i *inodeOperations) BoundEndpoint(inode *fs.Inode, path string) transport. return nil } - if i.session().endpoints != nil { - unlock := i.session().endpoints.lock() + if i.session().overrides != nil { + unlock := i.session().overrides.lock() defer unlock() - ep := i.session().endpoints.get(i.fileState.key) + ep := i.session().overrides.getBoundEndpoint(i.fileState.key) if ep != nil { return ep } - // Not found in endpoints map, it may be a gofer backed unix socket... + // Not found in overrides map, it may be a gofer backed unix socket... } inode.IncRef() diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 40e974314..8f2b75a1c 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -225,7 +225,6 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:mknod_test", - use_tmpfs = True, # mknod is not supported over gofer. ) syscall_test( -- cgit v1.2.3 From c5d4041623ac6405135e966af6d06c178a86870d Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Tue, 4 Feb 2020 12:53:10 -0800 Subject: Include socket_ip_udp_loopback.cc in exportes_files So it can be included in fuchsia's syscall tests PiperOrigin-RevId: 293208306 --- test/syscalls/linux/BUILD | 1 + 1 file changed, 1 insertion(+) (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index e4ca5b6db..737e2329f 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -11,6 +11,7 @@ exports_files( "socket_inet_loopback.cc", "socket_ip_loopback_blocking.cc", "socket_ip_tcp_loopback.cc", + "socket_ip_udp_loopback.cc", "socket_ipv4_udp_unbound_loopback.cc", "tcp_socket.cc", "udp_socket.cc", -- cgit v1.2.3 From 6823b5e244a5748032130574ae3a25a0a36bbbf5 Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Tue, 4 Feb 2020 13:05:30 -0800 Subject: timer_create(2) should return 0 on success The timer ID is copied out to the argument. Fixes #1738 PiperOrigin-RevId: 293210801 --- pkg/sentry/syscalls/linux/sys_timer.go | 2 +- test/syscalls/linux/timers.cc | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/pkg/sentry/syscalls/linux/sys_timer.go b/pkg/sentry/syscalls/linux/sys_timer.go index 432351917..a4c400f87 100644 --- a/pkg/sentry/syscalls/linux/sys_timer.go +++ b/pkg/sentry/syscalls/linux/sys_timer.go @@ -146,7 +146,7 @@ func TimerCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S return 0, nil, err } - return uintptr(id), nil, nil + return 0, nil, nil } // TimerSettime implements linux syscall timer_settime(2). diff --git a/test/syscalls/linux/timers.cc b/test/syscalls/linux/timers.cc index 3db18d7ac..2f92c27da 100644 --- a/test/syscalls/linux/timers.cc +++ b/test/syscalls/linux/timers.cc @@ -297,9 +297,13 @@ class IntervalTimer { PosixErrorOr TimerCreate(clockid_t clockid, const struct sigevent& sev) { int timerid; - if (syscall(SYS_timer_create, clockid, &sev, &timerid) < 0) { + int ret = syscall(SYS_timer_create, clockid, &sev, &timerid); + if (ret < 0) { return PosixError(errno, "timer_create"); } + if (ret > 0) { + return PosixError(EINVAL, "timer_create should never return positive"); + } MaybeSave(); return IntervalTimer(timerid); } @@ -317,6 +321,18 @@ TEST(IntervalTimerTest, IsInitiallyStopped) { EXPECT_EQ(0, its.it_value.tv_nsec); } +// Kernel can create multiple timers without issue. +// +// Regression test for gvisor.dev/issue/1738. +TEST(IntervalTimerTest, MultipleTimers) { + struct sigevent sev = {}; + sev.sigev_notify = SIGEV_NONE; + const auto timer1 = + ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); + const auto timer2 = + ASSERT_NO_ERRNO_AND_VALUE(TimerCreate(CLOCK_MONOTONIC, sev)); +} + TEST(IntervalTimerTest, SingleShotSilent) { struct sigevent sev = {}; sev.sigev_notify = SIGEV_NONE; -- cgit v1.2.3 From a26a954946ad2e7910d3ad7578960a93b73a1f9b Mon Sep 17 00:00:00 2001 From: Ian Gudger Date: Tue, 4 Feb 2020 15:20:30 -0800 Subject: Add socket connection stress test. Tests 65k connection attempts on common types of sockets to check for port leaks. Also fixes a bug where dual-stack sockets wouldn't properly re-queue segments received while closing. PiperOrigin-RevId: 293241166 --- pkg/tcpip/transport/tcp/connect.go | 4 ++ test/syscalls/BUILD | 9 +++ test/syscalls/linux/BUILD | 17 +++++ test/syscalls/linux/ip_socket_test_util.cc | 27 +++++++ test/syscalls/linux/ip_socket_test_util.h | 15 ++++ test/syscalls/linux/socket_generic_stress.cc | 83 ++++++++++++++++++++++ test/syscalls/linux/socket_test_util.cc | 101 ++++++++++++++++++++++++--- test/syscalls/linux/socket_test_util.h | 6 ++ 8 files changed, 251 insertions(+), 11 deletions(-) create mode 100644 test/syscalls/linux/socket_generic_stress.cc (limited to 'test') diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index 9ff7ac261..5c5397823 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -989,6 +989,10 @@ func (e *endpoint) transitionToStateCloseLocked() { // to any other listening endpoint. We reply with RST if we cannot find one. func (e *endpoint) tryDeliverSegmentFromClosedEndpoint(s *segment) { ep := e.stack.FindTransportEndpoint(e.NetProto, e.TransProto, e.ID, &s.route) + if ep == nil && e.NetProto == header.IPv6ProtocolNumber && e.EndpointInfo.TransportEndpointInfo.ID.LocalAddress.To4() != "" { + // Dual-stack socket, try IPv4. + ep = e.stack.FindTransportEndpoint(header.IPv4ProtocolNumber, e.TransProto, e.ID, &s.route) + } if ep == nil { replyWithReset(s) s.decRef() diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 8f2b75a1c..31d239c0e 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -45,6 +45,15 @@ syscall_test(test = "//test/syscalls/linux:brk_test") syscall_test(test = "//test/syscalls/linux:socket_test") +syscall_test( + size = "large", + shard_count = 50, + # Takes too long for TSAN. Since this is kind of a stress test that doesn't + # involve much concurrency, TSAN's usefulness here is limited anyway. + tags = ["nogotsan"], + test = "//test/syscalls/linux:socket_stress_test", +) + syscall_test( add_overlay = True, test = "//test/syscalls/linux:chdir_test", diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 737e2329f..273b014d6 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -136,6 +136,7 @@ cc_library( "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/time", + "@com_google_absl//absl/types:optional", "//test/util:file_descriptor", "//test/util:posix_error", "//test/util:temp_path", @@ -2151,6 +2152,22 @@ cc_library( alwayslink = 1, ) +cc_binary( + name = "socket_stress_test", + testonly = 1, + srcs = [ + "socket_generic_stress.cc", + ], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_test_util", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + cc_library( name = "socket_unix_dgram_test_cases", testonly = 1, diff --git a/test/syscalls/linux/ip_socket_test_util.cc b/test/syscalls/linux/ip_socket_test_util.cc index 6b472eb2f..bba022a41 100644 --- a/test/syscalls/linux/ip_socket_test_util.cc +++ b/test/syscalls/linux/ip_socket_test_util.cc @@ -79,6 +79,33 @@ SocketPairKind DualStackTCPAcceptBindSocketPair(int type) { /* dual_stack = */ true)}; } +SocketPairKind IPv6TCPAcceptBindPersistentListenerSocketPair(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "connected IPv6 TCP socket"); + return SocketPairKind{description, AF_INET6, type | SOCK_STREAM, IPPROTO_TCP, + TCPAcceptBindPersistentListenerSocketPairCreator( + AF_INET6, type | SOCK_STREAM, 0, + /* dual_stack = */ false)}; +} + +SocketPairKind IPv4TCPAcceptBindPersistentListenerSocketPair(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "connected IPv4 TCP socket"); + return SocketPairKind{description, AF_INET, type | SOCK_STREAM, IPPROTO_TCP, + TCPAcceptBindPersistentListenerSocketPairCreator( + AF_INET, type | SOCK_STREAM, 0, + /* dual_stack = */ false)}; +} + +SocketPairKind DualStackTCPAcceptBindPersistentListenerSocketPair(int type) { + std::string description = + absl::StrCat(DescribeSocketType(type), "connected dual stack TCP socket"); + return SocketPairKind{description, AF_INET6, type | SOCK_STREAM, IPPROTO_TCP, + TCPAcceptBindPersistentListenerSocketPairCreator( + AF_INET6, type | SOCK_STREAM, 0, + /* dual_stack = */ true)}; +} + SocketPairKind IPv6UDPBidirectionalBindSocketPair(int type) { std::string description = absl::StrCat(DescribeSocketType(type), "connected IPv6 UDP socket"); diff --git a/test/syscalls/linux/ip_socket_test_util.h b/test/syscalls/linux/ip_socket_test_util.h index 0f58e0f77..083ebbcf0 100644 --- a/test/syscalls/linux/ip_socket_test_util.h +++ b/test/syscalls/linux/ip_socket_test_util.h @@ -50,6 +50,21 @@ SocketPairKind IPv4TCPAcceptBindSocketPair(int type); // given type bound to the IPv4 loopback. SocketPairKind DualStackTCPAcceptBindSocketPair(int type); +// IPv6TCPAcceptBindPersistentListenerSocketPair is like +// IPv6TCPAcceptBindSocketPair except it uses a persistent listening socket to +// create all socket pairs. +SocketPairKind IPv6TCPAcceptBindPersistentListenerSocketPair(int type); + +// IPv4TCPAcceptBindPersistentListenerSocketPair is like +// IPv4TCPAcceptBindSocketPair except it uses a persistent listening socket to +// create all socket pairs. +SocketPairKind IPv4TCPAcceptBindPersistentListenerSocketPair(int type); + +// DualStackTCPAcceptBindPersistentListenerSocketPair is like +// DualStackTCPAcceptBindSocketPair except it uses a persistent listening socket +// to create all socket pairs. +SocketPairKind DualStackTCPAcceptBindPersistentListenerSocketPair(int type); + // IPv6UDPBidirectionalBindSocketPair returns a SocketPairKind that represents // SocketPairs created with bind() and connect() syscalls with AF_INET6 and the // given type bound to the IPv6 loopback. diff --git a/test/syscalls/linux/socket_generic_stress.cc b/test/syscalls/linux/socket_generic_stress.cc new file mode 100644 index 000000000..6a232238d --- /dev/null +++ b/test/syscalls/linux/socket_generic_stress.cc @@ -0,0 +1,83 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +// Test fixture for tests that apply to pairs of connected sockets. +using ConnectStressTest = SocketPairTest; + +TEST_P(ConnectStressTest, Reset65kTimes) { + for (int i = 0; i < 1 << 16; ++i) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + // Send some data to ensure that the connection gets reset and the port gets + // released immediately. This avoids either end entering TIME-WAIT. + char sent_data[100] = {}; + ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)), + SyscallSucceedsWithValue(sizeof(sent_data))); + } +} + +INSTANTIATE_TEST_SUITE_P( + AllConnectedSockets, ConnectStressTest, + ::testing::Values(IPv6UDPBidirectionalBindSocketPair(0), + IPv4UDPBidirectionalBindSocketPair(0), + DualStackUDPBidirectionalBindSocketPair(0), + + // Without REUSEADDR, we get port exhaustion on Linux. + SetSockOpt(SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn)(IPv6TCPAcceptBindSocketPair(0)), + SetSockOpt(SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn)(IPv4TCPAcceptBindSocketPair(0)), + SetSockOpt(SOL_SOCKET, SO_REUSEADDR, &kSockOptOn)( + DualStackTCPAcceptBindSocketPair(0)))); + +// Test fixture for tests that apply to pairs of connected sockets created with +// a persistent listener (if applicable). +using PersistentListenerConnectStressTest = SocketPairTest; + +TEST_P(PersistentListenerConnectStressTest, 65kTimes) { + for (int i = 0; i < 1 << 16; ++i) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + } +} + +INSTANTIATE_TEST_SUITE_P( + AllConnectedSockets, PersistentListenerConnectStressTest, + ::testing::Values( + IPv6UDPBidirectionalBindSocketPair(0), + IPv4UDPBidirectionalBindSocketPair(0), + DualStackUDPBidirectionalBindSocketPair(0), + + // Without REUSEADDR, we get port exhaustion on Linux. + SetSockOpt(SOL_SOCKET, SO_REUSEADDR, &kSockOptOn)( + IPv6TCPAcceptBindPersistentListenerSocketPair(0)), + SetSockOpt(SOL_SOCKET, SO_REUSEADDR, &kSockOptOn)( + IPv4TCPAcceptBindPersistentListenerSocketPair(0)), + SetSockOpt(SOL_SOCKET, SO_REUSEADDR, &kSockOptOn)( + DualStackTCPAcceptBindPersistentListenerSocketPair(0)))); + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_test_util.cc b/test/syscalls/linux/socket_test_util.cc index eff7d577e..c0c5ab3fe 100644 --- a/test/syscalls/linux/socket_test_util.cc +++ b/test/syscalls/linux/socket_test_util.cc @@ -18,10 +18,13 @@ #include #include +#include + #include "gtest/gtest.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" #include "absl/time/clock.h" +#include "absl/types/optional.h" #include "test/util/file_descriptor.h" #include "test/util/posix_error.h" #include "test/util/temp_path.h" @@ -109,7 +112,10 @@ Creator AcceptBindSocketPairCreator(bool abstract, int domain, MaybeSave(); // Unlinked path. } - return absl::make_unique(connected, accepted, bind_addr, + // accepted is before connected to destruct connected before accepted. + // Destructors for nonstatic member objects are called in the reverse order + // in which they appear in the class declaration. + return absl::make_unique(accepted, connected, bind_addr, extra_addr); }; } @@ -311,11 +317,16 @@ PosixErrorOr BindIP(int fd, bool dual_stack) { } template -PosixErrorOr> CreateTCPAcceptBindSocketPair( - int bound, int connected, int type, bool dual_stack) { - ASSIGN_OR_RETURN_ERRNO(T bind_addr, BindIP(bound, dual_stack)); - RETURN_ERROR_IF_SYSCALL_FAIL(listen(bound, /* backlog = */ 5)); +PosixErrorOr TCPBindAndListen(int fd, bool dual_stack) { + ASSIGN_OR_RETURN_ERRNO(T addr, BindIP(fd, dual_stack)); + RETURN_ERROR_IF_SYSCALL_FAIL(listen(fd, /* backlog = */ 5)); + return addr; +} +template +PosixErrorOr> +CreateTCPConnectAcceptSocketPair(int bound, int connected, int type, + bool dual_stack, T bind_addr) { int connect_result = 0; RETURN_ERROR_IF_SYSCALL_FAIL( (connect_result = RetryEINTR(connect)( @@ -358,16 +369,27 @@ PosixErrorOr> CreateTCPAcceptBindSocketPair( absl::SleepFor(absl::Seconds(1)); } - // Cleanup no longer needed resources. - RETURN_ERROR_IF_SYSCALL_FAIL(close(bound)); - MaybeSave(); // Successful close. - T extra_addr = {}; LocalhostAddr(&extra_addr, dual_stack); return absl::make_unique(connected, accepted, bind_addr, extra_addr); } +template +PosixErrorOr> CreateTCPAcceptBindSocketPair( + int bound, int connected, int type, bool dual_stack) { + ASSIGN_OR_RETURN_ERRNO(T bind_addr, TCPBindAndListen(bound, dual_stack)); + + auto result = CreateTCPConnectAcceptSocketPair(bound, connected, type, + dual_stack, bind_addr); + + // Cleanup no longer needed resources. + RETURN_ERROR_IF_SYSCALL_FAIL(close(bound)); + MaybeSave(); // Successful close. + + return result; +} + Creator TCPAcceptBindSocketPairCreator(int domain, int type, int protocol, bool dual_stack) { @@ -389,6 +411,63 @@ Creator TCPAcceptBindSocketPairCreator(int domain, int type, }; } +Creator TCPAcceptBindPersistentListenerSocketPairCreator( + int domain, int type, int protocol, bool dual_stack) { + // These are lazily initialized below, on the first call to the returned + // lambda. These values are private to each returned lambda, but shared across + // invocations of a specific lambda. + // + // The sharing allows pairs created with the same parameters to share a + // listener. This prevents future connects from failing if the connecting + // socket selects a port which had previously been used by a listening socket + // that still has some connections in TIME-WAIT. + // + // The lazy initialization is to avoid creating sockets during parameter + // enumeration. This is important because parameters are enumerated during the + // build process where networking may not be available. + auto listener = std::make_shared>(absl::optional()); + auto addr4 = std::make_shared>( + absl::optional()); + auto addr6 = std::make_shared>( + absl::optional()); + + return [=]() -> PosixErrorOr> { + int connected; + RETURN_ERROR_IF_SYSCALL_FAIL(connected = socket(domain, type, protocol)); + MaybeSave(); // Successful socket creation. + + // Share the listener across invocations. + if (!listener->has_value()) { + int fd = socket(domain, type, protocol); + if (fd < 0) { + return PosixError(errno, absl::StrCat("socket(", domain, ", ", type, + ", ", protocol, ")")); + } + listener->emplace(fd); + MaybeSave(); // Successful socket creation. + } + + // Bind the listener once, but create a new connect/accept pair each + // time. + if (domain == AF_INET) { + if (!addr4->has_value()) { + addr4->emplace( + TCPBindAndListen(listener->value(), dual_stack) + .ValueOrDie()); + } + return CreateTCPConnectAcceptSocketPair(listener->value(), connected, + type, dual_stack, addr4->value()); + } + if (!addr6->has_value()) { + addr6->emplace( + TCPBindAndListen(listener->value(), dual_stack) + .ValueOrDie()); + } + return CreateTCPConnectAcceptSocketPair(listener->value(), connected, type, + dual_stack, addr6->value()); + }; +} + template PosixErrorOr> CreateUDPBoundSocketPair( int sock1, int sock2, int type, bool dual_stack) { @@ -518,8 +597,8 @@ size_t CalculateUnixSockAddrLen(const char* sun_path) { if (sun_path[0] == 0) { return sizeof(sockaddr_un); } - // Filesystem addresses use the address length plus the 2 byte sun_family and - // null terminator. + // Filesystem addresses use the address length plus the 2 byte sun_family + // and null terminator. return strlen(sun_path) + 3; } diff --git a/test/syscalls/linux/socket_test_util.h b/test/syscalls/linux/socket_test_util.h index 2dbb8bed3..bfaa6e397 100644 --- a/test/syscalls/linux/socket_test_util.h +++ b/test/syscalls/linux/socket_test_util.h @@ -273,6 +273,12 @@ Creator TCPAcceptBindSocketPairCreator(int domain, int type, int protocol, bool dual_stack); +// TCPAcceptBindPersistentListenerSocketPairCreator is like +// TCPAcceptBindSocketPairCreator, except it uses the same listening socket to +// create all SocketPairs. +Creator TCPAcceptBindPersistentListenerSocketPairCreator( + int domain, int type, int protocol, bool dual_stack); + // UDPBidirectionalBindSocketPairCreator returns a Creator that // obtains file descriptors by invoking the bind() and connect() syscalls on UDP // sockets. -- cgit v1.2.3 From 665b614e4a6e715bac25bea15c5c29184016e549 Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Tue, 4 Feb 2020 18:04:26 -0800 Subject: Support RTM_NEWADDR and RTM_GETLINK in (rt)netlink. PiperOrigin-RevId: 293271055 --- pkg/sentry/inet/inet.go | 4 + pkg/sentry/inet/test_stack.go | 6 + pkg/sentry/socket/hostinet/stack.go | 5 + pkg/sentry/socket/netlink/BUILD | 14 +- pkg/sentry/socket/netlink/message.go | 129 +++++++++++ pkg/sentry/socket/netlink/message_test.go | 312 +++++++++++++++++++++++++++ pkg/sentry/socket/netlink/provider.go | 2 +- pkg/sentry/socket/netlink/route/BUILD | 2 - pkg/sentry/socket/netlink/route/protocol.go | 238 ++++++++++++++------ pkg/sentry/socket/netlink/socket.go | 54 ++--- pkg/sentry/socket/netlink/uevent/protocol.go | 2 +- pkg/sentry/socket/netstack/stack.go | 55 +++++ pkg/tcpip/stack/stack.go | 9 + test/syscalls/linux/BUILD | 2 + test/syscalls/linux/socket_netlink_route.cc | 296 ++++++++++++++++++++----- test/syscalls/linux/socket_netlink_util.cc | 45 +++- test/syscalls/linux/socket_netlink_util.h | 9 + 17 files changed, 1022 insertions(+), 162 deletions(-) create mode 100644 pkg/sentry/socket/netlink/message_test.go (limited to 'test') diff --git a/pkg/sentry/inet/inet.go b/pkg/sentry/inet/inet.go index a7dfb78a7..2916a0644 100644 --- a/pkg/sentry/inet/inet.go +++ b/pkg/sentry/inet/inet.go @@ -28,6 +28,10 @@ type Stack interface { // interface indexes to a slice of associated interface address properties. InterfaceAddrs() map[int32][]InterfaceAddr + // AddInterfaceAddr adds an address to the network interface identified by + // index. + AddInterfaceAddr(idx int32, addr InterfaceAddr) error + // SupportsIPv6 returns true if the stack supports IPv6 connectivity. SupportsIPv6() bool diff --git a/pkg/sentry/inet/test_stack.go b/pkg/sentry/inet/test_stack.go index dcfcbd97e..d8961fc94 100644 --- a/pkg/sentry/inet/test_stack.go +++ b/pkg/sentry/inet/test_stack.go @@ -47,6 +47,12 @@ func (s *TestStack) InterfaceAddrs() map[int32][]InterfaceAddr { return s.InterfaceAddrsMap } +// AddInterfaceAddr implements Stack.AddInterfaceAddr. +func (s *TestStack) AddInterfaceAddr(idx int32, addr InterfaceAddr) error { + s.InterfaceAddrsMap[idx] = append(s.InterfaceAddrsMap[idx], addr) + return nil +} + // SupportsIPv6 implements Stack.SupportsIPv6. func (s *TestStack) SupportsIPv6() bool { return s.SupportsIPv6Flag diff --git a/pkg/sentry/socket/hostinet/stack.go b/pkg/sentry/socket/hostinet/stack.go index 034eca676..a48082631 100644 --- a/pkg/sentry/socket/hostinet/stack.go +++ b/pkg/sentry/socket/hostinet/stack.go @@ -310,6 +310,11 @@ func (s *Stack) InterfaceAddrs() map[int32][]inet.InterfaceAddr { return addrs } +// AddInterfaceAddr implements inet.Stack.AddInterfaceAddr. +func (s *Stack) AddInterfaceAddr(idx int32, addr inet.InterfaceAddr) error { + return syserror.EACCES +} + // SupportsIPv6 implements inet.Stack.SupportsIPv6. func (s *Stack) SupportsIPv6() bool { return s.supportsIPv6 diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD index f8b8e467d..1911cd9b8 100644 --- a/pkg/sentry/socket/netlink/BUILD +++ b/pkg/sentry/socket/netlink/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -33,3 +33,15 @@ go_library( "//pkg/waiter", ], ) + +go_test( + name = "netlink_test", + size = "small", + srcs = [ + "message_test.go", + ], + deps = [ + ":netlink", + "//pkg/abi/linux", + ], +) diff --git a/pkg/sentry/socket/netlink/message.go b/pkg/sentry/socket/netlink/message.go index b21e0ca4b..4ea252ccb 100644 --- a/pkg/sentry/socket/netlink/message.go +++ b/pkg/sentry/socket/netlink/message.go @@ -30,8 +30,16 @@ func alignUp(length int, align uint) int { return (length + int(align) - 1) &^ (int(align) - 1) } +// alignPad returns the length of padding required for alignment. +// +// Preconditions: align is a power of two. +func alignPad(length int, align uint) int { + return alignUp(length, align) - length +} + // Message contains a complete serialized netlink message. type Message struct { + hdr linux.NetlinkMessageHeader buf []byte } @@ -40,10 +48,86 @@ type Message struct { // The header length will be updated by Finalize. func NewMessage(hdr linux.NetlinkMessageHeader) *Message { return &Message{ + hdr: hdr, buf: binary.Marshal(nil, usermem.ByteOrder, hdr), } } +// ParseMessage parses the first message seen at buf, returning the rest of the +// buffer. If message is malformed, ok of false is returned. For last message, +// padding check is loose, if there isn't enought padding, whole buf is consumed +// and ok is set to true. +func ParseMessage(buf []byte) (msg *Message, rest []byte, ok bool) { + b := BytesView(buf) + + hdrBytes, ok := b.Extract(linux.NetlinkMessageHeaderSize) + if !ok { + return + } + var hdr linux.NetlinkMessageHeader + binary.Unmarshal(hdrBytes, usermem.ByteOrder, &hdr) + + // Msg portion. + totalMsgLen := int(hdr.Length) + _, ok = b.Extract(totalMsgLen - linux.NetlinkMessageHeaderSize) + if !ok { + return + } + + // Padding. + numPad := alignPad(totalMsgLen, linux.NLMSG_ALIGNTO) + // Linux permits the last message not being aligned, just consume all of it. + // Ref: net/netlink/af_netlink.c:netlink_rcv_skb + if numPad > len(b) { + numPad = len(b) + } + _, ok = b.Extract(numPad) + if !ok { + return + } + + return &Message{ + hdr: hdr, + buf: buf[:totalMsgLen], + }, []byte(b), true +} + +// Header returns the header of this message. +func (m *Message) Header() linux.NetlinkMessageHeader { + return m.hdr +} + +// GetData unmarshals the payload message header from this netlink message, and +// returns the attributes portion. +func (m *Message) GetData(msg interface{}) (AttrsView, bool) { + b := BytesView(m.buf) + + _, ok := b.Extract(linux.NetlinkMessageHeaderSize) + if !ok { + return nil, false + } + + size := int(binary.Size(msg)) + msgBytes, ok := b.Extract(size) + if !ok { + return nil, false + } + binary.Unmarshal(msgBytes, usermem.ByteOrder, msg) + + numPad := alignPad(linux.NetlinkMessageHeaderSize+size, linux.NLMSG_ALIGNTO) + // Linux permits the last message not being aligned, just consume all of it. + // Ref: net/netlink/af_netlink.c:netlink_rcv_skb + if numPad > len(b) { + numPad = len(b) + } + _, ok = b.Extract(numPad) + if !ok { + return nil, false + } + + return AttrsView(b), true +} + // Finalize returns the []byte containing the entire message, with the total // length set in the message header. The Message must not be modified after // calling Finalize. @@ -157,3 +241,48 @@ func (ms *MessageSet) AddMessage(hdr linux.NetlinkMessageHeader) *Message { ms.Messages = append(ms.Messages, m) return m } + +// AttrsView is a view into the attributes portion of a netlink message. +type AttrsView []byte + +// Empty returns whether there is no attribute left in v. +func (v AttrsView) Empty() bool { + return len(v) == 0 +} + +// ParseFirst parses first netlink attribute at the beginning of v. +func (v AttrsView) ParseFirst() (hdr linux.NetlinkAttrHeader, value []byte, rest AttrsView, ok bool) { + b := BytesView(v) + + hdrBytes, ok := b.Extract(linux.NetlinkAttrHeaderSize) + if !ok { + return + } + binary.Unmarshal(hdrBytes, usermem.ByteOrder, &hdr) + + value, ok = b.Extract(int(hdr.Length) - linux.NetlinkAttrHeaderSize) + if !ok { + return + } + + _, ok = b.Extract(alignPad(int(hdr.Length), linux.NLA_ALIGNTO)) + if !ok { + return + } + + return hdr, value, AttrsView(b), ok +} + +// BytesView supports extracting data from a byte slice with bounds checking. +type BytesView []byte + +// Extract removes the first n bytes from v and returns it. If n is out of +// bounds, it returns false. +func (v *BytesView) Extract(n int) ([]byte, bool) { + if n < 0 || n > len(*v) { + return nil, false + } + extracted := (*v)[:n] + *v = (*v)[n:] + return extracted, true +} diff --git a/pkg/sentry/socket/netlink/message_test.go b/pkg/sentry/socket/netlink/message_test.go new file mode 100644 index 000000000..ef13d9386 --- /dev/null +++ b/pkg/sentry/socket/netlink/message_test.go @@ -0,0 +1,312 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package message_test + +import ( + "bytes" + "reflect" + "testing" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/socket/netlink" +) + +type dummyNetlinkMsg struct { + Foo uint16 +} + +func TestParseMessage(t *testing.T) { + tests := []struct { + desc string + input []byte + + header linux.NetlinkMessageHeader + dataMsg *dummyNetlinkMsg + restLen int + ok bool + }{ + { + desc: "valid", + input: []byte{ + 0x14, 0x00, 0x00, 0x00, // Length + 0x01, 0x00, // Type + 0x02, 0x00, // Flags + 0x03, 0x00, 0x00, 0x00, // Seq + 0x04, 0x00, 0x00, 0x00, // PortID + 0x30, 0x31, 0x00, 0x00, // Data message with 2 bytes padding + }, + header: linux.NetlinkMessageHeader{ + Length: 20, + Type: 1, + Flags: 2, + Seq: 3, + PortID: 4, + }, + dataMsg: &dummyNetlinkMsg{ + Foo: 0x3130, + }, + restLen: 0, + ok: true, + }, + { + desc: "valid with next message", + input: []byte{ + 0x14, 0x00, 0x00, 0x00, // Length + 0x01, 0x00, // Type + 0x02, 0x00, // Flags + 0x03, 0x00, 0x00, 0x00, // Seq + 0x04, 0x00, 0x00, 0x00, // PortID + 0x30, 0x31, 0x00, 0x00, // Data message with 2 bytes padding + 0xFF, // Next message (rest) + }, + header: linux.NetlinkMessageHeader{ + Length: 20, + Type: 1, + Flags: 2, + Seq: 3, + PortID: 4, + }, + dataMsg: &dummyNetlinkMsg{ + Foo: 0x3130, + }, + restLen: 1, + ok: true, + }, + { + desc: "valid for last message without padding", + input: []byte{ + 0x12, 0x00, 0x00, 0x00, // Length + 0x01, 0x00, // Type + 0x02, 0x00, // Flags + 0x03, 0x00, 0x00, 0x00, // Seq + 0x04, 0x00, 0x00, 0x00, // PortID + 0x30, 0x31, // Data message + }, + header: linux.NetlinkMessageHeader{ + Length: 18, + Type: 1, + Flags: 2, + Seq: 3, + PortID: 4, + }, + dataMsg: &dummyNetlinkMsg{ + Foo: 0x3130, + }, + restLen: 0, + ok: true, + }, + { + desc: "valid for last message not to be aligned", + input: []byte{ + 0x13, 0x00, 0x00, 0x00, // Length + 0x01, 0x00, // Type + 0x02, 0x00, // Flags + 0x03, 0x00, 0x00, 0x00, // Seq + 0x04, 0x00, 0x00, 0x00, // PortID + 0x30, 0x31, // Data message + 0x00, // Excessive 1 byte permitted at end + }, + header: linux.NetlinkMessageHeader{ + Length: 19, + Type: 1, + Flags: 2, + Seq: 3, + PortID: 4, + }, + dataMsg: &dummyNetlinkMsg{ + Foo: 0x3130, + }, + restLen: 0, + ok: true, + }, + { + desc: "header.Length too short", + input: []byte{ + 0x04, 0x00, 0x00, 0x00, // Length + 0x01, 0x00, // Type + 0x02, 0x00, // Flags + 0x03, 0x00, 0x00, 0x00, // Seq + 0x04, 0x00, 0x00, 0x00, // PortID + 0x30, 0x31, 0x00, 0x00, // Data message with 2 bytes padding + }, + ok: false, + }, + { + desc: "header.Length too long", + input: []byte{ + 0xFF, 0xFF, 0x00, 0x00, // Length + 0x01, 0x00, // Type + 0x02, 0x00, // Flags + 0x03, 0x00, 0x00, 0x00, // Seq + 0x04, 0x00, 0x00, 0x00, // PortID + 0x30, 0x31, 0x00, 0x00, // Data message with 2 bytes padding + }, + ok: false, + }, + { + desc: "header incomplete", + input: []byte{ + 0x04, 0x00, 0x00, 0x00, // Length + }, + ok: false, + }, + { + desc: "empty message", + input: []byte{}, + ok: false, + }, + } + for _, test := range tests { + msg, rest, ok := netlink.ParseMessage(test.input) + if ok != test.ok { + t.Errorf("%v: got ok = %v, want = %v", test.desc, ok, test.ok) + continue + } + if !test.ok { + continue + } + if !reflect.DeepEqual(msg.Header(), test.header) { + t.Errorf("%v: got hdr = %+v, want = %+v", test.desc, msg.Header(), test.header) + } + + dataMsg := &dummyNetlinkMsg{} + _, dataOk := msg.GetData(dataMsg) + if !dataOk { + t.Errorf("%v: GetData.ok = %v, want = true", test.desc, dataOk) + } else if !reflect.DeepEqual(dataMsg, test.dataMsg) { + t.Errorf("%v: GetData.msg = %+v, want = %+v", test.desc, dataMsg, test.dataMsg) + } + + if got, want := rest, test.input[len(test.input)-test.restLen:]; !bytes.Equal(got, want) { + t.Errorf("%v: got rest = %v, want = %v", test.desc, got, want) + } + } +} + +func TestAttrView(t *testing.T) { + tests := []struct { + desc string + input []byte + + // Outputs for ParseFirst. + hdr linux.NetlinkAttrHeader + value []byte + restLen int + ok bool + + // Outputs for Empty. + isEmpty bool + }{ + { + desc: "valid", + input: []byte{ + 0x06, 0x00, // Length + 0x01, 0x00, // Type + 0x30, 0x31, 0x00, 0x00, // Data with 2 bytes padding + }, + hdr: linux.NetlinkAttrHeader{ + Length: 6, + Type: 1, + }, + value: []byte{0x30, 0x31}, + restLen: 0, + ok: true, + isEmpty: false, + }, + { + desc: "at alignment", + input: []byte{ + 0x08, 0x00, // Length + 0x01, 0x00, // Type + 0x30, 0x31, 0x32, 0x33, // Data + }, + hdr: linux.NetlinkAttrHeader{ + Length: 8, + Type: 1, + }, + value: []byte{0x30, 0x31, 0x32, 0x33}, + restLen: 0, + ok: true, + isEmpty: false, + }, + { + desc: "at alignment with rest data", + input: []byte{ + 0x08, 0x00, // Length + 0x01, 0x00, // Type + 0x30, 0x31, 0x32, 0x33, // Data + 0xFF, 0xFE, // Rest data + }, + hdr: linux.NetlinkAttrHeader{ + Length: 8, + Type: 1, + }, + value: []byte{0x30, 0x31, 0x32, 0x33}, + restLen: 2, + ok: true, + isEmpty: false, + }, + { + desc: "hdr.Length too long", + input: []byte{ + 0xFF, 0x00, // Length + 0x01, 0x00, // Type + 0x30, 0x31, 0x32, 0x33, // Data + }, + ok: false, + isEmpty: false, + }, + { + desc: "hdr.Length too short", + input: []byte{ + 0x01, 0x00, // Length + 0x01, 0x00, // Type + 0x30, 0x31, 0x32, 0x33, // Data + }, + ok: false, + isEmpty: false, + }, + { + desc: "empty", + input: []byte{}, + ok: false, + isEmpty: true, + }, + } + for _, test := range tests { + attrs := netlink.AttrsView(test.input) + + // Test ParseFirst(). + hdr, value, rest, ok := attrs.ParseFirst() + if ok != test.ok { + t.Errorf("%v: got ok = %v, want = %v", test.desc, ok, test.ok) + } else if test.ok { + if !reflect.DeepEqual(hdr, test.hdr) { + t.Errorf("%v: got hdr = %+v, want = %+v", test.desc, hdr, test.hdr) + } + if !bytes.Equal(value, test.value) { + t.Errorf("%v: got value = %v, want = %v", test.desc, value, test.value) + } + if wantRest := test.input[len(test.input)-test.restLen:]; !bytes.Equal(rest, wantRest) { + t.Errorf("%v: got rest = %v, want = %v", test.desc, rest, wantRest) + } + } + + // Test Empty(). + if got, want := attrs.Empty(), test.isEmpty; got != want { + t.Errorf("%v: got empty = %v, want = %v", test.desc, got, want) + } + } +} diff --git a/pkg/sentry/socket/netlink/provider.go b/pkg/sentry/socket/netlink/provider.go index 07f860a49..b0dc70e5c 100644 --- a/pkg/sentry/socket/netlink/provider.go +++ b/pkg/sentry/socket/netlink/provider.go @@ -42,7 +42,7 @@ type Protocol interface { // If err == nil, any messages added to ms will be sent back to the // other end of the socket. Setting ms.Multi will cause an NLMSG_DONE // message to be sent even if ms contains no messages. - ProcessMessage(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *MessageSet) *syserr.Error + ProcessMessage(ctx context.Context, msg *Message, ms *MessageSet) *syserr.Error } // Provider is a function that creates a new Protocol for a specific netlink diff --git a/pkg/sentry/socket/netlink/route/BUILD b/pkg/sentry/socket/netlink/route/BUILD index 622a1eafc..93127398d 100644 --- a/pkg/sentry/socket/netlink/route/BUILD +++ b/pkg/sentry/socket/netlink/route/BUILD @@ -10,13 +10,11 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", - "//pkg/binary", "//pkg/context", "//pkg/sentry/inet", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/socket/netlink", "//pkg/syserr", - "//pkg/usermem", ], ) diff --git a/pkg/sentry/socket/netlink/route/protocol.go b/pkg/sentry/socket/netlink/route/protocol.go index 2b3c7f5b3..c84d8bd7c 100644 --- a/pkg/sentry/socket/netlink/route/protocol.go +++ b/pkg/sentry/socket/netlink/route/protocol.go @@ -17,16 +17,15 @@ package route import ( "bytes" + "syscall" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/socket/netlink" "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/usermem" ) // commandKind describes the operational class of a message type. @@ -69,13 +68,7 @@ func (p *Protocol) CanSend() bool { } // dumpLinks handles RTM_GETLINK dump requests. -func (p *Protocol) dumpLinks(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error { - // TODO(b/68878065): Only the dump variant of the types below are - // supported. - if hdr.Flags&linux.NLM_F_DUMP != linux.NLM_F_DUMP { - return syserr.ErrNotSupported - } - +func (p *Protocol) dumpLinks(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { // NLM_F_DUMP + RTM_GETLINK messages are supposed to include an // ifinfomsg. However, Linux <3.9 only checked for rtgenmsg, and some // userspace applications (including glibc) still include rtgenmsg. @@ -99,44 +92,105 @@ func (p *Protocol) dumpLinks(ctx context.Context, hdr linux.NetlinkMessageHeader return nil } - for id, i := range stack.Interfaces() { - m := ms.AddMessage(linux.NetlinkMessageHeader{ - Type: linux.RTM_NEWLINK, - }) + for idx, i := range stack.Interfaces() { + addNewLinkMessage(ms, idx, i) + } - m.Put(linux.InterfaceInfoMessage{ - Family: linux.AF_UNSPEC, - Type: i.DeviceType, - Index: id, - Flags: i.Flags, - }) + return nil +} - m.PutAttrString(linux.IFLA_IFNAME, i.Name) - m.PutAttr(linux.IFLA_MTU, i.MTU) +// getLinks handles RTM_GETLINK requests. +func (p *Protocol) getLink(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { + stack := inet.StackFromContext(ctx) + if stack == nil { + // No network devices. + return nil + } - mac := make([]byte, 6) - brd := mac - if len(i.Addr) > 0 { - mac = i.Addr - brd = bytes.Repeat([]byte{0xff}, len(i.Addr)) + // Parse message. + var ifi linux.InterfaceInfoMessage + attrs, ok := msg.GetData(&ifi) + if !ok { + return syserr.ErrInvalidArgument + } + + // Parse attributes. + var byName []byte + for !attrs.Empty() { + ahdr, value, rest, ok := attrs.ParseFirst() + if !ok { + return syserr.ErrInvalidArgument } - m.PutAttr(linux.IFLA_ADDRESS, mac) - m.PutAttr(linux.IFLA_BROADCAST, brd) + attrs = rest - // TODO(gvisor.dev/issue/578): There are many more attributes. + switch ahdr.Type { + case linux.IFLA_IFNAME: + if len(value) < 1 { + return syserr.ErrInvalidArgument + } + byName = value[:len(value)-1] + + // TODO(gvisor.dev/issue/578): Support IFLA_EXT_MASK. + } } + found := false + for idx, i := range stack.Interfaces() { + switch { + case ifi.Index > 0: + if idx != ifi.Index { + continue + } + case byName != nil: + if string(byName) != i.Name { + continue + } + default: + // Criteria not specified. + return syserr.ErrInvalidArgument + } + + addNewLinkMessage(ms, idx, i) + found = true + break + } + if !found { + return syserr.ErrNoDevice + } return nil } -// dumpAddrs handles RTM_GETADDR dump requests. -func (p *Protocol) dumpAddrs(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error { - // TODO(b/68878065): Only the dump variant of the types below are - // supported. - if hdr.Flags&linux.NLM_F_DUMP != linux.NLM_F_DUMP { - return syserr.ErrNotSupported +// addNewLinkMessage appends RTM_NEWLINK message for the given interface into +// the message set. +func addNewLinkMessage(ms *netlink.MessageSet, idx int32, i inet.Interface) { + m := ms.AddMessage(linux.NetlinkMessageHeader{ + Type: linux.RTM_NEWLINK, + }) + + m.Put(linux.InterfaceInfoMessage{ + Family: linux.AF_UNSPEC, + Type: i.DeviceType, + Index: idx, + Flags: i.Flags, + }) + + m.PutAttrString(linux.IFLA_IFNAME, i.Name) + m.PutAttr(linux.IFLA_MTU, i.MTU) + + mac := make([]byte, 6) + brd := mac + if len(i.Addr) > 0 { + mac = i.Addr + brd = bytes.Repeat([]byte{0xff}, len(i.Addr)) } + m.PutAttr(linux.IFLA_ADDRESS, mac) + m.PutAttr(linux.IFLA_BROADCAST, brd) + + // TODO(gvisor.dev/issue/578): There are many more attributes. +} +// dumpAddrs handles RTM_GETADDR dump requests. +func (p *Protocol) dumpAddrs(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { // RTM_GETADDR dump requests need not contain anything more than the // netlink header and 1 byte protocol family common to all // NETLINK_ROUTE requests. @@ -168,6 +222,7 @@ func (p *Protocol) dumpAddrs(ctx context.Context, hdr linux.NetlinkMessageHeader Index: uint32(id), }) + m.PutAttr(linux.IFA_LOCAL, []byte(a.Addr)) m.PutAttr(linux.IFA_ADDRESS, []byte(a.Addr)) // TODO(gvisor.dev/issue/578): There are many more attributes. @@ -252,12 +307,12 @@ func fillRoute(routes []inet.Route, addr []byte) (inet.Route, *syserr.Error) { } // parseForDestination parses a message as format of RouteMessage-RtAttr-dst. -func parseForDestination(data []byte) ([]byte, *syserr.Error) { +func parseForDestination(msg *netlink.Message) ([]byte, *syserr.Error) { var rtMsg linux.RouteMessage - if len(data) < linux.SizeOfRouteMessage { + attrs, ok := msg.GetData(&rtMsg) + if !ok { return nil, syserr.ErrInvalidArgument } - binary.Unmarshal(data[:linux.SizeOfRouteMessage], usermem.ByteOrder, &rtMsg) // iproute2 added the RTM_F_LOOKUP_TABLE flag in version v4.4.0. See // commit bc234301af12. Note we don't check this flag for backward // compatibility. @@ -265,26 +320,15 @@ func parseForDestination(data []byte) ([]byte, *syserr.Error) { return nil, syserr.ErrNotSupported } - data = data[linux.SizeOfRouteMessage:] - - // TODO(gvisor.dev/issue/1611): Add generic attribute parsing. - var rtAttr linux.RtAttr - if len(data) < linux.SizeOfRtAttr { - return nil, syserr.ErrInvalidArgument + // Expect first attribute is RTA_DST. + if hdr, value, _, ok := attrs.ParseFirst(); ok && hdr.Type == linux.RTA_DST { + return value, nil } - binary.Unmarshal(data[:linux.SizeOfRtAttr], usermem.ByteOrder, &rtAttr) - if rtAttr.Type != linux.RTA_DST { - return nil, syserr.ErrInvalidArgument - } - - if len(data) < int(rtAttr.Len) { - return nil, syserr.ErrInvalidArgument - } - return data[linux.SizeOfRtAttr:rtAttr.Len], nil + return nil, syserr.ErrInvalidArgument } // dumpRoutes handles RTM_GETROUTE requests. -func (p *Protocol) dumpRoutes(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error { +func (p *Protocol) dumpRoutes(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { // RTM_GETROUTE dump requests need not contain anything more than the // netlink header and 1 byte protocol family common to all // NETLINK_ROUTE requests. @@ -295,10 +339,11 @@ func (p *Protocol) dumpRoutes(ctx context.Context, hdr linux.NetlinkMessageHeade return nil } + hdr := msg.Header() routeTables := stack.RouteTable() if hdr.Flags == linux.NLM_F_REQUEST { - dst, err := parseForDestination(data) + dst, err := parseForDestination(msg) if err != nil { return err } @@ -357,10 +402,55 @@ func (p *Protocol) dumpRoutes(ctx context.Context, hdr linux.NetlinkMessageHeade return nil } +// newAddr handles RTM_NEWADDR requests. +func (p *Protocol) newAddr(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { + stack := inet.StackFromContext(ctx) + if stack == nil { + // No network stack. + return syserr.ErrProtocolNotSupported + } + + var ifa linux.InterfaceAddrMessage + attrs, ok := msg.GetData(&ifa) + if !ok { + return syserr.ErrInvalidArgument + } + + for !attrs.Empty() { + ahdr, value, rest, ok := attrs.ParseFirst() + if !ok { + return syserr.ErrInvalidArgument + } + attrs = rest + + switch ahdr.Type { + case linux.IFA_LOCAL: + err := stack.AddInterfaceAddr(int32(ifa.Index), inet.InterfaceAddr{ + Family: ifa.Family, + PrefixLen: ifa.PrefixLen, + Flags: ifa.Flags, + Addr: value, + }) + if err == syscall.EEXIST { + flags := msg.Header().Flags + if flags&linux.NLM_F_EXCL != 0 { + return syserr.ErrExists + } + } else if err != nil { + return syserr.ErrInvalidArgument + } + } + } + return nil +} + // ProcessMessage implements netlink.Protocol.ProcessMessage. -func (p *Protocol) ProcessMessage(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error { +func (p *Protocol) ProcessMessage(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { + hdr := msg.Header() + // All messages start with a 1 byte protocol family. - if len(data) < 1 { + var family uint8 + if _, ok := msg.GetData(&family); !ok { // Linux ignores messages missing the protocol family. See // net/core/rtnetlink.c:rtnetlink_rcv_msg. return nil @@ -374,16 +464,32 @@ func (p *Protocol) ProcessMessage(ctx context.Context, hdr linux.NetlinkMessageH } } - switch hdr.Type { - case linux.RTM_GETLINK: - return p.dumpLinks(ctx, hdr, data, ms) - case linux.RTM_GETADDR: - return p.dumpAddrs(ctx, hdr, data, ms) - case linux.RTM_GETROUTE: - return p.dumpRoutes(ctx, hdr, data, ms) - default: - return syserr.ErrNotSupported + if hdr.Flags&linux.NLM_F_DUMP == linux.NLM_F_DUMP { + // TODO(b/68878065): Only the dump variant of the types below are + // supported. + switch hdr.Type { + case linux.RTM_GETLINK: + return p.dumpLinks(ctx, msg, ms) + case linux.RTM_GETADDR: + return p.dumpAddrs(ctx, msg, ms) + case linux.RTM_GETROUTE: + return p.dumpRoutes(ctx, msg, ms) + default: + return syserr.ErrNotSupported + } + } else if hdr.Flags&linux.NLM_F_REQUEST == linux.NLM_F_REQUEST { + switch hdr.Type { + case linux.RTM_GETLINK: + return p.getLink(ctx, msg, ms) + case linux.RTM_GETROUTE: + return p.dumpRoutes(ctx, msg, ms) + case linux.RTM_NEWADDR: + return p.newAddr(ctx, msg, ms) + default: + return syserr.ErrNotSupported + } } + return syserr.ErrNotSupported } // init registers the NETLINK_ROUTE provider. diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go index c4b95debb..2ca02567d 100644 --- a/pkg/sentry/socket/netlink/socket.go +++ b/pkg/sentry/socket/netlink/socket.go @@ -644,47 +644,38 @@ func (s *Socket) sendResponse(ctx context.Context, ms *MessageSet) *syserr.Error return nil } -func (s *Socket) dumpErrorMesage(ctx context.Context, hdr linux.NetlinkMessageHeader, ms *MessageSet, err *syserr.Error) *syserr.Error { +func dumpErrorMesage(hdr linux.NetlinkMessageHeader, ms *MessageSet, err *syserr.Error) { m := ms.AddMessage(linux.NetlinkMessageHeader{ Type: linux.NLMSG_ERROR, }) - m.Put(linux.NetlinkErrorMessage{ Error: int32(-err.ToLinux().Number()), Header: hdr, }) - return nil +} +func dumpAckMesage(hdr linux.NetlinkMessageHeader, ms *MessageSet) { + m := ms.AddMessage(linux.NetlinkMessageHeader{ + Type: linux.NLMSG_ERROR, + }) + m.Put(linux.NetlinkErrorMessage{ + Error: 0, + Header: hdr, + }) } // processMessages handles each message in buf, passing it to the protocol // handler for final handling. func (s *Socket) processMessages(ctx context.Context, buf []byte) *syserr.Error { for len(buf) > 0 { - if len(buf) < linux.NetlinkMessageHeaderSize { + msg, rest, ok := ParseMessage(buf) + if !ok { // Linux ignores messages that are too short. See // net/netlink/af_netlink.c:netlink_rcv_skb. break } - - var hdr linux.NetlinkMessageHeader - binary.Unmarshal(buf[:linux.NetlinkMessageHeaderSize], usermem.ByteOrder, &hdr) - - if hdr.Length < linux.NetlinkMessageHeaderSize || uint64(hdr.Length) > uint64(len(buf)) { - // Linux ignores malformed messages. See - // net/netlink/af_netlink.c:netlink_rcv_skb. - break - } - - // Data from this message. - data := buf[linux.NetlinkMessageHeaderSize:hdr.Length] - - // Advance to the next message. - next := alignUp(int(hdr.Length), linux.NLMSG_ALIGNTO) - if next >= len(buf)-1 { - next = len(buf) - 1 - } - buf = buf[next:] + buf = rest + hdr := msg.Header() // Ignore control messages. if hdr.Type < linux.NLMSG_MIN_TYPE { @@ -692,19 +683,10 @@ func (s *Socket) processMessages(ctx context.Context, buf []byte) *syserr.Error } ms := NewMessageSet(s.portID, hdr.Seq) - var err *syserr.Error - // TODO(b/68877377): ACKs not supported yet. - if hdr.Flags&linux.NLM_F_ACK == linux.NLM_F_ACK { - err = syserr.ErrNotSupported - } else { - - err = s.protocol.ProcessMessage(ctx, hdr, data, ms) - } - if err != nil { - ms = NewMessageSet(s.portID, hdr.Seq) - if err := s.dumpErrorMesage(ctx, hdr, ms, err); err != nil { - return err - } + if err := s.protocol.ProcessMessage(ctx, msg, ms); err != nil { + dumpErrorMesage(hdr, ms, err) + } else if hdr.Flags&linux.NLM_F_ACK == linux.NLM_F_ACK { + dumpAckMesage(hdr, ms) } if err := s.sendResponse(ctx, ms); err != nil { diff --git a/pkg/sentry/socket/netlink/uevent/protocol.go b/pkg/sentry/socket/netlink/uevent/protocol.go index 1ee4296bc..029ba21b5 100644 --- a/pkg/sentry/socket/netlink/uevent/protocol.go +++ b/pkg/sentry/socket/netlink/uevent/protocol.go @@ -49,7 +49,7 @@ func (p *Protocol) CanSend() bool { } // ProcessMessage implements netlink.Protocol.ProcessMessage. -func (p *Protocol) ProcessMessage(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error { +func (p *Protocol) ProcessMessage(ctx context.Context, msg *netlink.Message, ms *netlink.MessageSet) *syserr.Error { // Silently ignore all messages. return nil } diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go index 31ea66eca..0692482e9 100644 --- a/pkg/sentry/socket/netstack/stack.go +++ b/pkg/sentry/socket/netstack/stack.go @@ -20,6 +20,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/socket/netfilter" "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/iptables" "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" @@ -88,6 +90,59 @@ func (s *Stack) InterfaceAddrs() map[int32][]inet.InterfaceAddr { return nicAddrs } +// AddInterfaceAddr implements inet.Stack.AddInterfaceAddr. +func (s *Stack) AddInterfaceAddr(idx int32, addr inet.InterfaceAddr) error { + var ( + protocol tcpip.NetworkProtocolNumber + address tcpip.Address + ) + switch addr.Family { + case linux.AF_INET: + if len(addr.Addr) < header.IPv4AddressSize { + return syserror.EINVAL + } + if addr.PrefixLen > header.IPv4AddressSize*8 { + return syserror.EINVAL + } + protocol = ipv4.ProtocolNumber + address = tcpip.Address(addr.Addr[:header.IPv4AddressSize]) + + case linux.AF_INET6: + if len(addr.Addr) < header.IPv6AddressSize { + return syserror.EINVAL + } + if addr.PrefixLen > header.IPv6AddressSize*8 { + return syserror.EINVAL + } + protocol = ipv6.ProtocolNumber + address = tcpip.Address(addr.Addr[:header.IPv6AddressSize]) + + default: + return syserror.ENOTSUP + } + + protocolAddress := tcpip.ProtocolAddress{ + Protocol: protocol, + AddressWithPrefix: tcpip.AddressWithPrefix{ + Address: address, + PrefixLen: int(addr.PrefixLen), + }, + } + + // Attach address to interface. + if err := s.Stack.AddProtocolAddressWithOptions(tcpip.NICID(idx), protocolAddress, stack.CanBePrimaryEndpoint); err != nil { + return syserr.TranslateNetstackError(err).ToError() + } + + // Add route for local network. + s.Stack.AddRoute(tcpip.Route{ + Destination: protocolAddress.AddressWithPrefix.Subnet(), + Gateway: "", // No gateway for local network. + NIC: tcpip.NICID(idx), + }) + return nil +} + // TCPReceiveBufferSize implements inet.Stack.TCPReceiveBufferSize. func (s *Stack) TCPReceiveBufferSize() (inet.TCPBufferSize, error) { var rs tcp.ReceiveBufferSizeOption diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index 7057b110e..b793f1d74 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -795,6 +795,8 @@ func (s *Stack) Forwarding() bool { // SetRouteTable assigns the route table to be used by this stack. It // specifies which NIC to use for given destination address ranges. +// +// This method takes ownership of the table. func (s *Stack) SetRouteTable(table []tcpip.Route) { s.mu.Lock() defer s.mu.Unlock() @@ -809,6 +811,13 @@ func (s *Stack) GetRouteTable() []tcpip.Route { return append([]tcpip.Route(nil), s.routeTable...) } +// AddRoute appends a route to the route table. +func (s *Stack) AddRoute(route tcpip.Route) { + s.mu.Lock() + defer s.mu.Unlock() + s.routeTable = append(s.routeTable, route) +} + // NewEndpoint creates a new transport layer endpoint of the given protocol. func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { t, ok := s.transportProtocols[transport] diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 273b014d6..f2e3c7072 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -2769,9 +2769,11 @@ cc_binary( deps = [ ":socket_netlink_util", ":socket_test_util", + "//test/util:capability_util", "//test/util:cleanup", "//test/util:file_descriptor", "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/types:optional", gtest, "//test/util:test_main", "//test/util:test_util", diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc index 1e28e658d..e5aed1eec 100644 --- a/test/syscalls/linux/socket_netlink_route.cc +++ b/test/syscalls/linux/socket_netlink_route.cc @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -25,8 +26,10 @@ #include "gtest/gtest.h" #include "absl/strings/str_format.h" +#include "absl/types/optional.h" #include "test/syscalls/linux/socket_netlink_util.h" #include "test/syscalls/linux/socket_test_util.h" +#include "test/util/capability_util.h" #include "test/util/cleanup.h" #include "test/util/file_descriptor.h" #include "test/util/test_util.h" @@ -38,6 +41,8 @@ namespace testing { namespace { +constexpr uint32_t kSeq = 12345; + using ::testing::AnyOf; using ::testing::Eq; @@ -113,58 +118,224 @@ void CheckGetLinkResponse(const struct nlmsghdr* hdr, int seq, int port) { // TODO(mpratt): Check ifinfomsg contents and following attrs. } +PosixError DumpLinks( + const FileDescriptor& fd, uint32_t seq, + const std::function& fn) { + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + }; + + struct request req = {}; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_seq = seq; + req.ifm.ifi_family = AF_UNSPEC; + + return NetlinkRequestResponse(fd, &req, sizeof(req), fn, false); +} + TEST(NetlinkRouteTest, GetLinkDump) { FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); + // Loopback is common among all tests, check that it's found. + bool loopbackFound = false; + ASSERT_NO_ERRNO(DumpLinks(fd, kSeq, [&](const struct nlmsghdr* hdr) { + CheckGetLinkResponse(hdr, kSeq, port); + if (hdr->nlmsg_type != RTM_NEWLINK) { + return; + } + ASSERT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct ifinfomsg))); + const struct ifinfomsg* msg = + reinterpret_cast(NLMSG_DATA(hdr)); + std::cout << "Found interface idx=" << msg->ifi_index + << ", type=" << std::hex << msg->ifi_type; + if (msg->ifi_type == ARPHRD_LOOPBACK) { + loopbackFound = true; + EXPECT_NE(msg->ifi_flags & IFF_LOOPBACK, 0); + } + })); + EXPECT_TRUE(loopbackFound); +} + +struct Link { + int index; + std::string name; +}; + +PosixErrorOr> FindLoopbackLink() { + ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, NetlinkBoundSocket(NETLINK_ROUTE)); + + absl::optional link; + RETURN_IF_ERRNO(DumpLinks(fd, kSeq, [&](const struct nlmsghdr* hdr) { + if (hdr->nlmsg_type != RTM_NEWLINK || + hdr->nlmsg_len < NLMSG_SPACE(sizeof(struct ifinfomsg))) { + return; + } + const struct ifinfomsg* msg = + reinterpret_cast(NLMSG_DATA(hdr)); + if (msg->ifi_type == ARPHRD_LOOPBACK) { + const auto* rta = FindRtAttr(hdr, msg, IFLA_IFNAME); + if (rta == nullptr) { + // Ignore links that do not have a name. + return; + } + + link = Link(); + link->index = msg->ifi_index; + link->name = std::string(reinterpret_cast(RTA_DATA(rta))); + } + })); + return link; +} + +// CheckLinkMsg checks a netlink message against an expected link. +void CheckLinkMsg(const struct nlmsghdr* hdr, const Link& link) { + ASSERT_THAT(hdr->nlmsg_type, Eq(RTM_NEWLINK)); + ASSERT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct ifinfomsg))); + const struct ifinfomsg* msg = + reinterpret_cast(NLMSG_DATA(hdr)); + EXPECT_EQ(msg->ifi_index, link.index); + + const struct rtattr* rta = FindRtAttr(hdr, msg, IFLA_IFNAME); + EXPECT_NE(nullptr, rta) << "IFLA_IFNAME not found in message."; + if (rta != nullptr) { + std::string name(reinterpret_cast(RTA_DATA(rta))); + EXPECT_EQ(name, link.name); + } +} + +TEST(NetlinkRouteTest, GetLinkByIndex) { + absl::optional loopback_link = + ASSERT_NO_ERRNO_AND_VALUE(FindLoopbackLink()); + ASSERT_TRUE(loopback_link.has_value()); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + struct request { struct nlmsghdr hdr; struct ifinfomsg ifm; }; - constexpr uint32_t kSeq = 12345; - struct request req = {}; req.hdr.nlmsg_len = sizeof(req); req.hdr.nlmsg_type = RTM_GETLINK; - req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_flags = NLM_F_REQUEST; req.hdr.nlmsg_seq = kSeq; req.ifm.ifi_family = AF_UNSPEC; + req.ifm.ifi_index = loopback_link->index; - // Loopback is common among all tests, check that it's found. - bool loopbackFound = false; + bool found = false; ASSERT_NO_ERRNO(NetlinkRequestResponse( fd, &req, sizeof(req), [&](const struct nlmsghdr* hdr) { - CheckGetLinkResponse(hdr, kSeq, port); - if (hdr->nlmsg_type != RTM_NEWLINK) { - return; - } - ASSERT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct ifinfomsg))); - const struct ifinfomsg* msg = - reinterpret_cast(NLMSG_DATA(hdr)); - std::cout << "Found interface idx=" << msg->ifi_index - << ", type=" << std::hex << msg->ifi_type; - if (msg->ifi_type == ARPHRD_LOOPBACK) { - loopbackFound = true; - EXPECT_NE(msg->ifi_flags & IFF_LOOPBACK, 0); - } + CheckLinkMsg(hdr, *loopback_link); + found = true; }, false)); - EXPECT_TRUE(loopbackFound); + EXPECT_TRUE(found) << "Netlink response does not contain any links."; } -TEST(NetlinkRouteTest, MsgHdrMsgUnsuppType) { +TEST(NetlinkRouteTest, GetLinkByName) { + absl::optional loopback_link = + ASSERT_NO_ERRNO_AND_VALUE(FindLoopbackLink()); + ASSERT_TRUE(loopback_link.has_value()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); struct request { struct nlmsghdr hdr; struct ifinfomsg ifm; + struct rtattr rtattr; + char ifname[IFNAMSIZ]; + char pad[NLMSG_ALIGNTO + RTA_ALIGNTO]; }; - constexpr uint32_t kSeq = 12345; + struct request req = {}; + req.hdr.nlmsg_type = RTM_GETLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST; + req.hdr.nlmsg_seq = kSeq; + req.ifm.ifi_family = AF_UNSPEC; + req.rtattr.rta_type = IFLA_IFNAME; + req.rtattr.rta_len = RTA_LENGTH(loopback_link->name.size() + 1); + strncpy(req.ifname, loopback_link->name.c_str(), sizeof(req.ifname)); + req.hdr.nlmsg_len = + NLMSG_LENGTH(sizeof(req.ifm)) + NLMSG_ALIGN(req.rtattr.rta_len); + + bool found = false; + ASSERT_NO_ERRNO(NetlinkRequestResponse( + fd, &req, sizeof(req), + [&](const struct nlmsghdr* hdr) { + CheckLinkMsg(hdr, *loopback_link); + found = true; + }, + false)); + EXPECT_TRUE(found) << "Netlink response does not contain any links."; +} + +TEST(NetlinkRouteTest, GetLinkByIndexNotFound) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + }; + + struct request req = {}; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST; + req.hdr.nlmsg_seq = kSeq; + req.ifm.ifi_family = AF_UNSPEC; + req.ifm.ifi_index = 1234590; + + EXPECT_THAT(NetlinkRequestAckOrError(fd, kSeq, &req, sizeof(req)), + PosixErrorIs(ENODEV, ::testing::_)); +} + +TEST(NetlinkRouteTest, GetLinkByNameNotFound) { + const std::string name = "nodevice?!"; + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + struct rtattr rtattr; + char ifname[IFNAMSIZ]; + char pad[NLMSG_ALIGNTO + RTA_ALIGNTO]; + }; + + struct request req = {}; + req.hdr.nlmsg_type = RTM_GETLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST; + req.hdr.nlmsg_seq = kSeq; + req.ifm.ifi_family = AF_UNSPEC; + req.rtattr.rta_type = IFLA_IFNAME; + req.rtattr.rta_len = RTA_LENGTH(name.size() + 1); + strncpy(req.ifname, name.c_str(), sizeof(req.ifname)); + req.hdr.nlmsg_len = + NLMSG_LENGTH(sizeof(req.ifm)) + NLMSG_ALIGN(req.rtattr.rta_len); + + EXPECT_THAT(NetlinkRequestAckOrError(fd, kSeq, &req, sizeof(req)), + PosixErrorIs(ENODEV, ::testing::_)); +} + +TEST(NetlinkRouteTest, MsgHdrMsgUnsuppType) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + }; struct request req = {}; req.hdr.nlmsg_len = sizeof(req); @@ -175,18 +346,8 @@ TEST(NetlinkRouteTest, MsgHdrMsgUnsuppType) { req.hdr.nlmsg_seq = kSeq; req.ifm.ifi_family = AF_UNSPEC; - ASSERT_NO_ERRNO(NetlinkRequestResponse( - fd, &req, sizeof(req), - [&](const struct nlmsghdr* hdr) { - EXPECT_THAT(hdr->nlmsg_type, Eq(NLMSG_ERROR)); - EXPECT_EQ(hdr->nlmsg_seq, kSeq); - EXPECT_GE(hdr->nlmsg_len, sizeof(*hdr) + sizeof(struct nlmsgerr)); - - const struct nlmsgerr* msg = - reinterpret_cast(NLMSG_DATA(hdr)); - EXPECT_EQ(msg->error, -EOPNOTSUPP); - }, - true)); + EXPECT_THAT(NetlinkRequestAckOrError(fd, kSeq, &req, sizeof(req)), + PosixErrorIs(EOPNOTSUPP, ::testing::_)); } TEST(NetlinkRouteTest, MsgHdrMsgTrunc) { @@ -198,8 +359,6 @@ TEST(NetlinkRouteTest, MsgHdrMsgTrunc) { struct ifinfomsg ifm; }; - constexpr uint32_t kSeq = 12345; - struct request req = {}; req.hdr.nlmsg_len = sizeof(req); req.hdr.nlmsg_type = RTM_GETLINK; @@ -238,8 +397,6 @@ TEST(NetlinkRouteTest, MsgTruncMsgHdrMsgTrunc) { struct ifinfomsg ifm; }; - constexpr uint32_t kSeq = 12345; - struct request req = {}; req.hdr.nlmsg_len = sizeof(req); req.hdr.nlmsg_type = RTM_GETLINK; @@ -282,8 +439,6 @@ TEST(NetlinkRouteTest, ControlMessageIgnored) { struct ifinfomsg ifm; }; - constexpr uint32_t kSeq = 12345; - struct request req = {}; // This control message is ignored. We still receive a response for the @@ -317,8 +472,6 @@ TEST(NetlinkRouteTest, GetAddrDump) { struct rtgenmsg rgm; }; - constexpr uint32_t kSeq = 12345; - struct request req; req.hdr.nlmsg_len = sizeof(req); req.hdr.nlmsg_type = RTM_GETADDR; @@ -367,6 +520,57 @@ TEST(NetlinkRouteTest, LookupAll) { ASSERT_GT(count, 0); } +TEST(NetlinkRouteTest, AddAddr) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + absl::optional loopback_link = + ASSERT_NO_ERRNO_AND_VALUE(FindLoopbackLink()); + ASSERT_TRUE(loopback_link.has_value()); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifaddrmsg ifa; + struct rtattr rtattr; + struct in_addr addr; + char pad[NLMSG_ALIGNTO + RTA_ALIGNTO]; + }; + + struct request req = {}; + req.hdr.nlmsg_type = RTM_NEWADDR; + req.hdr.nlmsg_seq = kSeq; + req.ifa.ifa_family = AF_INET; + req.ifa.ifa_prefixlen = 24; + req.ifa.ifa_flags = 0; + req.ifa.ifa_scope = 0; + req.ifa.ifa_index = loopback_link->index; + req.rtattr.rta_type = IFA_LOCAL; + req.rtattr.rta_len = RTA_LENGTH(sizeof(req.addr)); + inet_pton(AF_INET, "10.0.0.1", &req.addr); + req.hdr.nlmsg_len = + NLMSG_LENGTH(sizeof(req.ifa)) + NLMSG_ALIGN(req.rtattr.rta_len); + + // Create should succeed, as no such address in kernel. + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_ACK; + EXPECT_NO_ERRNO( + NetlinkRequestAckOrError(fd, req.hdr.nlmsg_seq, &req, req.hdr.nlmsg_len)); + + // Replace an existing address should succeed. + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_REPLACE | NLM_F_ACK; + req.hdr.nlmsg_seq++; + EXPECT_NO_ERRNO( + NetlinkRequestAckOrError(fd, req.hdr.nlmsg_seq, &req, req.hdr.nlmsg_len)); + + // Create exclusive should fail, as we created the address above. + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK; + req.hdr.nlmsg_seq++; + EXPECT_THAT( + NetlinkRequestAckOrError(fd, req.hdr.nlmsg_seq, &req, req.hdr.nlmsg_len), + PosixErrorIs(EEXIST, ::testing::_)); +} + // GetRouteDump tests a RTM_GETROUTE + NLM_F_DUMP request. TEST(NetlinkRouteTest, GetRouteDump) { FileDescriptor fd = @@ -378,8 +582,6 @@ TEST(NetlinkRouteTest, GetRouteDump) { struct rtmsg rtm; }; - constexpr uint32_t kSeq = 12345; - struct request req = {}; req.hdr.nlmsg_len = sizeof(req); req.hdr.nlmsg_type = RTM_GETROUTE; @@ -538,8 +740,6 @@ TEST(NetlinkRouteTest, RecvmsgTrunc) { struct rtgenmsg rgm; }; - constexpr uint32_t kSeq = 12345; - struct request req; req.hdr.nlmsg_len = sizeof(req); req.hdr.nlmsg_type = RTM_GETADDR; @@ -615,8 +815,6 @@ TEST(NetlinkRouteTest, RecvmsgTruncPeek) { struct rtgenmsg rgm; }; - constexpr uint32_t kSeq = 12345; - struct request req; req.hdr.nlmsg_len = sizeof(req); req.hdr.nlmsg_type = RTM_GETADDR; @@ -695,8 +893,6 @@ TEST(NetlinkRouteTest, NoPasscredNoCreds) { struct rtgenmsg rgm; }; - constexpr uint32_t kSeq = 12345; - struct request req; req.hdr.nlmsg_len = sizeof(req); req.hdr.nlmsg_type = RTM_GETADDR; @@ -743,8 +939,6 @@ TEST(NetlinkRouteTest, PasscredCreds) { struct rtgenmsg rgm; }; - constexpr uint32_t kSeq = 12345; - struct request req; req.hdr.nlmsg_len = sizeof(req); req.hdr.nlmsg_type = RTM_GETADDR; diff --git a/test/syscalls/linux/socket_netlink_util.cc b/test/syscalls/linux/socket_netlink_util.cc index cd2212a1a..952eecfe8 100644 --- a/test/syscalls/linux/socket_netlink_util.cc +++ b/test/syscalls/linux/socket_netlink_util.cc @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -71,9 +72,10 @@ PosixError NetlinkRequestResponse( iov.iov_base = buf.data(); iov.iov_len = buf.size(); - // Response is a series of NLM_F_MULTI messages, ending with a NLMSG_DONE - // message. + // If NLM_F_MULTI is set, response is a series of messages that ends with a + // NLMSG_DONE message. int type = -1; + int flags = 0; do { int len; RETURN_ERROR_IF_SYSCALL_FAIL(len = RetryEINTR(recvmsg)(fd.get(), &msg, 0)); @@ -89,6 +91,7 @@ PosixError NetlinkRequestResponse( for (struct nlmsghdr* hdr = reinterpret_cast(buf.data()); NLMSG_OK(hdr, len); hdr = NLMSG_NEXT(hdr, len)) { fn(hdr); + flags = hdr->nlmsg_flags; type = hdr->nlmsg_type; // Done should include an integer payload for dump_done_errno. // See net/netlink/af_netlink.c:netlink_dump @@ -98,11 +101,11 @@ PosixError NetlinkRequestResponse( EXPECT_GE(hdr->nlmsg_len, NLMSG_LENGTH(sizeof(int))); } } - } while (type != NLMSG_DONE && type != NLMSG_ERROR); + } while ((flags & NLM_F_MULTI) && type != NLMSG_DONE && type != NLMSG_ERROR); if (expect_nlmsgerr) { EXPECT_EQ(type, NLMSG_ERROR); - } else { + } else if (flags & NLM_F_MULTI) { EXPECT_EQ(type, NLMSG_DONE); } return NoError(); @@ -146,5 +149,39 @@ PosixError NetlinkRequestResponseSingle( return NoError(); } +PosixError NetlinkRequestAckOrError(const FileDescriptor& fd, uint32_t seq, + void* request, size_t len) { + // Dummy negative number for no error message received. + // We won't get a negative error number so there will be no confusion. + int err = -42; + RETURN_IF_ERRNO(NetlinkRequestResponse( + fd, request, len, + [&](const struct nlmsghdr* hdr) { + EXPECT_EQ(NLMSG_ERROR, hdr->nlmsg_type); + EXPECT_EQ(hdr->nlmsg_seq, seq); + EXPECT_GE(hdr->nlmsg_len, sizeof(*hdr) + sizeof(struct nlmsgerr)); + + const struct nlmsgerr* msg = + reinterpret_cast(NLMSG_DATA(hdr)); + err = -msg->error; + }, + true)); + return PosixError(err); +} + +const struct rtattr* FindRtAttr(const struct nlmsghdr* hdr, + const struct ifinfomsg* msg, int16_t attr) { + const int ifi_space = NLMSG_SPACE(sizeof(*msg)); + int attrlen = hdr->nlmsg_len - ifi_space; + const struct rtattr* rta = reinterpret_cast( + reinterpret_cast(hdr) + NLMSG_ALIGN(ifi_space)); + for (; RTA_OK(rta, attrlen); rta = RTA_NEXT(rta, attrlen)) { + if (rta->rta_type == attr) { + return rta; + } + } + return nullptr; +} + } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_netlink_util.h b/test/syscalls/linux/socket_netlink_util.h index 3678c0599..e13ead406 100644 --- a/test/syscalls/linux/socket_netlink_util.h +++ b/test/syscalls/linux/socket_netlink_util.h @@ -19,6 +19,7 @@ // socket.h has to be included before if_arp.h. #include #include +#include #include "test/util/file_descriptor.h" #include "test/util/posix_error.h" @@ -47,6 +48,14 @@ PosixError NetlinkRequestResponseSingle( const FileDescriptor& fd, void* request, size_t len, const std::function& fn); +// Send the passed request then expect and return an ack or error. +PosixError NetlinkRequestAckOrError(const FileDescriptor& fd, uint32_t seq, + void* request, size_t len); + +// Find rtnetlink attribute in message. +const struct rtattr* FindRtAttr(const struct nlmsghdr* hdr, + const struct ifinfomsg* msg, int16_t attr); + } // namespace testing } // namespace gvisor -- cgit v1.2.3 From 37abbbc547d9d78e0bf42f192403c8eca30593d8 Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Wed, 5 Feb 2020 11:17:14 -0800 Subject: Add packetdrill tests to presubmit and CI testing PiperOrigin-RevId: 293409718 --- kokoro/packetdrill_tests.cfg | 9 +++++++++ scripts/packetdrill_tests.sh | 20 ++++++++++++++++++++ test/packetdrill/defs.bzl | 6 ++++-- test/packetdrill/packetdrill_test.sh | 20 ++++++++++++++++---- 4 files changed, 49 insertions(+), 6 deletions(-) create mode 100644 kokoro/packetdrill_tests.cfg create mode 100755 scripts/packetdrill_tests.sh (limited to 'test') diff --git a/kokoro/packetdrill_tests.cfg b/kokoro/packetdrill_tests.cfg new file mode 100644 index 000000000..258d7deb4 --- /dev/null +++ b/kokoro/packetdrill_tests.cfg @@ -0,0 +1,9 @@ +build_file: "repo/scripts/packetdrill_tests.sh" + +action { + define_artifacts { + regex: "**/sponge_log.xml" + regex: "**/sponge_log.log" + regex: "**/outputs.zip" + } +} diff --git a/scripts/packetdrill_tests.sh b/scripts/packetdrill_tests.sh new file mode 100755 index 000000000..fc6bef79c --- /dev/null +++ b/scripts/packetdrill_tests.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# Copyright 2019 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +source $(dirname $0)/common.sh + +install_runsc_for_test runsc-d +test_runsc $(bazel query "attr(tags, manual, tests(//test/packetdrill/...))") diff --git a/test/packetdrill/defs.bzl b/test/packetdrill/defs.bzl index 582f97e0c..8623ce7b1 100644 --- a/test/packetdrill/defs.bzl +++ b/test/packetdrill/defs.bzl @@ -15,7 +15,7 @@ def _packetdrill_test_impl(ctx): # Make sure that everything is readable here. "find . -type f -exec chmod a+rx {} \\;", "find . -type d -exec chmod a+rx {} \\;", - "%s %s --init_script %s -- %s\n" % ( + "%s %s --init_script %s $@ -- %s\n" % ( test_runner.short_path, " ".join(ctx.attr.flags), ctx.files._init_script[0].short_path, @@ -76,7 +76,9 @@ def packetdrill_netstack_test(name, **kwargs): kwargs["tags"] = _PACKETDRILL_TAGS _packetdrill_test( name = name + "_netstack_test", - flags = ["--dut_platform", "netstack"], + # This is the default runtime unless + # "--test_arg=--runtime=OTHER_RUNTIME" is used to override the value. + flags = ["--dut_platform", "netstack", "--runtime", "runsc-d"], **kwargs ) diff --git a/test/packetdrill/packetdrill_test.sh b/test/packetdrill/packetdrill_test.sh index 614d94d74..0b22dfd5c 100755 --- a/test/packetdrill/packetdrill_test.sh +++ b/test/packetdrill/packetdrill_test.sh @@ -29,7 +29,7 @@ function failure() { } trap 'failure ${LINENO} "$BASH_COMMAND"' ERR -declare -r LONGOPTS="dut_platform:,init_script:" +declare -r LONGOPTS="dut_platform:,init_script:,runtime:" # Don't use declare below so that the error from getopt will end the script. PARSED=$(getopt --options "" --longoptions=$LONGOPTS --name "$0" -- "$@") @@ -39,6 +39,7 @@ eval set -- "$PARSED" while true; do case "$1" in --dut_platform) + # Either "linux" or "netstack". declare -r DUT_PLATFORM="$2" shift 2 ;; @@ -46,6 +47,13 @@ while true; do declare -r INIT_SCRIPT="$2" shift 2 ;; + --runtime) + # Not readonly because there might be multiple --runtime arguments and we + # want to use just the last one. Only used if --dut_platform is + # "netstack". + declare RUNTIME="$2" + shift 2 + ;; --) shift break @@ -61,9 +69,13 @@ declare -r scripts="$@" # Check that the required flags are defined in a way that is safe for "set -u". if [[ "${DUT_PLATFORM-}" == "netstack" ]]; then - declare -r RUNTIME="--runtime runsc-d" + if [[ -z "${RUNTIME-}" ]]; then + echo "FAIL: Missing --runtime argument: ${RUNTIME-}" + exit 2 + fi + declare -r RUNTIME_ARG="--runtime ${RUNTIME}" elif [[ "${DUT_PLATFORM-}" == "linux" ]]; then - declare -r RUNTIME="" + declare -r RUNTIME_ARG="" else echo "FAIL: Bad or missing --dut_platform argument: ${DUT_PLATFORM-}" exit 2 @@ -143,7 +155,7 @@ done docker pull "${IMAGE_TAG}" # Create the DUT container and connect to network. -DUT=$(docker create ${RUNTIME} --privileged --rm \ +DUT=$(docker create ${RUNTIME_ARG} --privileged --rm \ --stop-timeout ${TIMEOUT} -it ${IMAGE_TAG}) docker network connect "${CTRL_NET}" \ --ip "${CTRL_NET_PREFIX}${DUT_NET_SUFFIX}" "${DUT}" \ -- cgit v1.2.3 From eea0eeee933ba8406ae688fce4348271f9513514 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Wed, 5 Feb 2020 11:25:10 -0800 Subject: Disable get/set xattrs until list/remove exist too. PiperOrigin-RevId: 293411655 --- pkg/sentry/syscalls/linux/linux64_amd64.go | 27 ++++--- pkg/sentry/syscalls/linux/linux64_arm64.go | 37 +++++---- test/syscalls/linux/xattr.cc | 124 +++++++++++++++++++++++++++++ 3 files changed, 159 insertions(+), 29 deletions(-) (limited to 'test') diff --git a/pkg/sentry/syscalls/linux/linux64_amd64.go b/pkg/sentry/syscalls/linux/linux64_amd64.go index 7435b50bf..588f8b087 100644 --- a/pkg/sentry/syscalls/linux/linux64_amd64.go +++ b/pkg/sentry/syscalls/linux/linux64_amd64.go @@ -228,18 +228,21 @@ var AMD64 = &kernel.SyscallTable{ 185: syscalls.Error("security", syserror.ENOSYS, "Not implemented in Linux.", nil), 186: syscalls.Supported("gettid", Gettid), 187: syscalls.Supported("readahead", Readahead), - 188: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil), - 189: syscalls.PartiallySupported("lsetxattr", LSetXattr, "Only supported for tmpfs.", nil), - 190: syscalls.PartiallySupported("fsetxattr", FSetXattr, "Only supported for tmpfs.", nil), - 191: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil), - 192: syscalls.PartiallySupported("lgetxattr", LGetXattr, "Only supported for tmpfs.", nil), - 193: syscalls.PartiallySupported("fgetxattr", FGetXattr, "Only supported for tmpfs.", nil), - 194: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 195: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 196: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 197: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 198: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 199: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", nil), + // TODO(b/148303075): Enable set/getxattr (in their various + // forms) once we also have list and removexattr. The JVM + // assumes that if get/set exist, then list and remove do too. + 188: syscalls.ErrorWithEvent("setxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 189: syscalls.ErrorWithEvent("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 190: syscalls.ErrorWithEvent("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 191: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 192: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 193: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 194: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 195: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 196: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 197: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 198: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 199: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), 200: syscalls.Supported("tkill", Tkill), 201: syscalls.Supported("time", Time), 202: syscalls.PartiallySupported("futex", Futex, "Robust futexes not supported.", nil), diff --git a/pkg/sentry/syscalls/linux/linux64_arm64.go b/pkg/sentry/syscalls/linux/linux64_arm64.go index 03a39fe65..06e5ee401 100644 --- a/pkg/sentry/syscalls/linux/linux64_arm64.go +++ b/pkg/sentry/syscalls/linux/linux64_arm64.go @@ -36,23 +36,26 @@ var ARM64 = &kernel.SyscallTable{ }, AuditNumber: linux.AUDIT_ARCH_AARCH64, Table: map[uintptr]kernel.Syscall{ - 0: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 1: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 2: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 5: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil), - 6: syscalls.PartiallySupported("lsetxattr", LSetXattr, "Only supported for tmpfs.", nil), - 7: syscalls.PartiallySupported("fsetxattr", FSetXattr, "Only supported for tmpfs.", nil), - 8: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil), - 9: syscalls.PartiallySupported("lgetxattr", LGetXattr, "Only supported for tmpfs.", nil), - 10: syscalls.PartiallySupported("fgetxattr", FGetXattr, "Only supported for tmpfs.", nil), - 11: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 12: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 13: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 14: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 15: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 16: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", nil), + 0: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 1: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 2: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + // TODO(b/148303075): Enable set/getxattr (in their various + // forms) once we also have list and removexattr. The JVM + // assumes that if get/set exist, then list and remove do too. + 5: syscalls.ErrorWithEvent("setxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 6: syscalls.ErrorWithEvent("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 7: syscalls.ErrorWithEvent("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 8: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 9: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 10: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 11: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 13: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 13: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 14: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 15: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 16: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), 17: syscalls.Supported("getcwd", Getcwd), 18: syscalls.CapError("lookup_dcookie", linux.CAP_SYS_ADMIN, "", nil), 19: syscalls.Supported("eventfd2", Eventfd2), diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc index ab21d68c6..85eb31847 100644 --- a/test/syscalls/linux/xattr.cc +++ b/test/syscalls/linux/xattr.cc @@ -39,6 +39,10 @@ namespace { class XattrTest : public FileTest {}; TEST_F(XattrTest, XattrNullName) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, /*flags=*/0), @@ -48,6 +52,10 @@ TEST_F(XattrTest, XattrNullName) { } TEST_F(XattrTest, XattrEmptyName) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); EXPECT_THAT(setxattr(path, "", nullptr, 0, /*flags=*/0), @@ -56,6 +64,10 @@ TEST_F(XattrTest, XattrEmptyName) { } TEST_F(XattrTest, XattrLargeName) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); std::string name = "user."; name += std::string(XATTR_NAME_MAX - name.length(), 'a'); @@ -77,6 +89,10 @@ TEST_F(XattrTest, XattrLargeName) { } TEST_F(XattrTest, XattrInvalidPrefix) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); std::string name(XATTR_NAME_MAX, 'a'); EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), @@ -88,6 +104,10 @@ TEST_F(XattrTest, XattrInvalidPrefix) { // Do not allow save/restore cycles after making the test file read-only, as // the restore will fail to open it with r/w permissions. TEST_F(XattrTest, XattrReadOnly_NoRandomSave) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + // Drop capabilities that allow us to override file and directory permissions. ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); @@ -113,6 +133,10 @@ TEST_F(XattrTest, XattrReadOnly_NoRandomSave) { // Do not allow save/restore cycles after making the test file write-only, as // the restore will fail to open it with r/w permissions. TEST_F(XattrTest, XattrWriteOnly_NoRandomSave) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + // Drop capabilities that allow us to override file and directory permissions. ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); @@ -143,6 +167,10 @@ TEST_F(XattrTest, XattrTrustedWithNonadmin) { } TEST_F(XattrTest, XattrOnDirectory) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); const char name[] = "user.test"; EXPECT_THAT(setxattr(dir.path().c_str(), name, NULL, 0, /*flags=*/0), @@ -152,6 +180,10 @@ TEST_F(XattrTest, XattrOnDirectory) { } TEST_F(XattrTest, XattrOnSymlink) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); TempPath link = ASSERT_NO_ERRNO_AND_VALUE( TempPath::CreateSymlinkTo(dir.path(), test_file_name_)); @@ -163,6 +195,10 @@ TEST_F(XattrTest, XattrOnSymlink) { } TEST_F(XattrTest, XattrOnInvalidFileTypes) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char name[] = "user.test"; char char_device[] = "/dev/zero"; @@ -181,6 +217,10 @@ TEST_F(XattrTest, XattrOnInvalidFileTypes) { } TEST_F(XattrTest, SetxattrSizeSmallerThanValue) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; std::vector val = {'a', 'a'}; @@ -196,6 +236,10 @@ TEST_F(XattrTest, SetxattrSizeSmallerThanValue) { } TEST_F(XattrTest, SetxattrZeroSize) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -208,6 +252,10 @@ TEST_F(XattrTest, SetxattrZeroSize) { } TEST_F(XattrTest, SetxattrSizeTooLarge) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; @@ -223,6 +271,10 @@ TEST_F(XattrTest, SetxattrSizeTooLarge) { } TEST_F(XattrTest, SetxattrNullValueAndNonzeroSize) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; EXPECT_THAT(setxattr(path, name, nullptr, 1, /*flags=*/0), @@ -232,6 +284,10 @@ TEST_F(XattrTest, SetxattrNullValueAndNonzeroSize) { } TEST_F(XattrTest, SetxattrNullValueAndZeroSize) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); @@ -240,6 +296,10 @@ TEST_F(XattrTest, SetxattrNullValueAndZeroSize) { } TEST_F(XattrTest, SetxattrValueTooLargeButOKSize) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; std::vector val(XATTR_SIZE_MAX + 1); @@ -256,6 +316,10 @@ TEST_F(XattrTest, SetxattrValueTooLargeButOKSize) { } TEST_F(XattrTest, SetxattrReplaceWithSmaller) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; std::vector val = {'a', 'a'}; @@ -271,6 +335,10 @@ TEST_F(XattrTest, SetxattrReplaceWithSmaller) { } TEST_F(XattrTest, SetxattrReplaceWithLarger) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; std::vector val = {'a', 'a'}; @@ -285,6 +353,10 @@ TEST_F(XattrTest, SetxattrReplaceWithLarger) { } TEST_F(XattrTest, SetxattrCreateFlag) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_CREATE), @@ -296,6 +368,10 @@ TEST_F(XattrTest, SetxattrCreateFlag) { } TEST_F(XattrTest, SetxattrReplaceFlag) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_REPLACE), @@ -308,6 +384,10 @@ TEST_F(XattrTest, SetxattrReplaceFlag) { } TEST_F(XattrTest, SetxattrInvalidFlags) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); int invalid_flags = 0xff; EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, invalid_flags), @@ -315,6 +395,10 @@ TEST_F(XattrTest, SetxattrInvalidFlags) { } TEST_F(XattrTest, Getxattr) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; int val = 1234; @@ -327,6 +411,10 @@ TEST_F(XattrTest, Getxattr) { } TEST_F(XattrTest, GetxattrSizeSmallerThanValue) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; std::vector val = {'a', 'a'}; @@ -339,6 +427,10 @@ TEST_F(XattrTest, GetxattrSizeSmallerThanValue) { } TEST_F(XattrTest, GetxattrSizeLargerThanValue) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -354,6 +446,10 @@ TEST_F(XattrTest, GetxattrSizeLargerThanValue) { } TEST_F(XattrTest, GetxattrZeroSize) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -367,6 +463,10 @@ TEST_F(XattrTest, GetxattrZeroSize) { } TEST_F(XattrTest, GetxattrSizeTooLarge) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -383,6 +483,10 @@ TEST_F(XattrTest, GetxattrSizeTooLarge) { } TEST_F(XattrTest, GetxattrNullValue) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -394,6 +498,10 @@ TEST_F(XattrTest, GetxattrNullValue) { } TEST_F(XattrTest, GetxattrNullValueAndZeroSize) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -410,12 +518,20 @@ TEST_F(XattrTest, GetxattrNullValueAndZeroSize) { } TEST_F(XattrTest, GetxattrNonexistentName) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); } TEST_F(XattrTest, LGetSetxattrOnSymlink) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); TempPath link = ASSERT_NO_ERRNO_AND_VALUE( TempPath::CreateSymlinkTo(dir.path(), test_file_name_)); @@ -427,6 +543,10 @@ TEST_F(XattrTest, LGetSetxattrOnSymlink) { } TEST_F(XattrTest, LGetSetxattrOnNonsymlink) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); const char name[] = "user.test"; int val = 1234; @@ -441,6 +561,10 @@ TEST_F(XattrTest, LGetSetxattrOnNonsymlink) { } TEST_F(XattrTest, FGetSetxattr) { + // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are + // supported, and get/set have been added pack to the syscall table. + SKIP_IF(IsRunningOnGvisor()); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_.c_str(), 0)); const char name[] = "user.test"; -- cgit v1.2.3 From f3d95607036b8a502c65aa7b3e8145227274dbbc Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Wed, 5 Feb 2020 17:56:00 -0800 Subject: recv() on a closed TCP socket returns ENOTCONN From RFC 793 s3.9 p58 Event Processing: If RECEIVE Call arrives in CLOSED state and the user has access to such a connection, the return should be "error: connection does not exist" Fixes #1598 PiperOrigin-RevId: 293494287 --- pkg/sentry/socket/netstack/netstack.go | 7 ++++++- pkg/tcpip/tcpip.go | 4 ++++ pkg/tcpip/transport/tcp/endpoint.go | 4 ++-- pkg/tcpip/transport/tcp/tcp_test.go | 9 ++++----- test/syscalls/linux/tcp_socket.cc | 9 +++++++++ 5 files changed, 25 insertions(+), 8 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 049d04bf2..ed2fbcceb 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -2229,11 +2229,16 @@ func (s *SocketOperations) coalescingRead(ctx context.Context, dst usermem.IOSeq var copied int // Copy as many views as possible into the user-provided buffer. - for dst.NumBytes() != 0 { + for { + // Always do at least one fetchReadView, even if the number of bytes to + // read is 0. err = s.fetchReadView() if err != nil { break } + if dst.NumBytes() == 0 { + break + } var n int var e error diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 0fa141d58..d29d9a704 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -1124,6 +1124,10 @@ type ReadErrors struct { // InvalidEndpointState is the number of times we found the endpoint state // to be unexpected. InvalidEndpointState StatCounter + + // NotConnected is the number of times we tried to read but found that the + // endpoint was not connected. + NotConnected StatCounter } // WriteErrors collects packet write errors from an endpoint write call. diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index b5a8e15ee..e4a6b1b8b 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -1003,8 +1003,8 @@ func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, if s == StateError { return buffer.View{}, tcpip.ControlMessages{}, he } - e.stats.ReadErrors.InvalidEndpointState.Increment() - return buffer.View{}, tcpip.ControlMessages{}, tcpip.ErrInvalidEndpointState + e.stats.ReadErrors.NotConnected.Increment() + return buffer.View{}, tcpip.ControlMessages{}, tcpip.ErrNotConnected } v, err := e.readLocked() diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 2c1505067..cc118c993 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -5405,12 +5405,11 @@ func TestEndpointBindListenAcceptState(t *testing.T) { t.Errorf("Unexpected endpoint state: want %v, got %v", want, got) } - // Expect InvalidEndpointState errors on a read at this point. - if _, _, err := ep.Read(nil); err != tcpip.ErrInvalidEndpointState { - t.Fatalf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrInvalidEndpointState) + if _, _, err := ep.Read(nil); err != tcpip.ErrNotConnected { + t.Errorf("got c.EP.Read(nil) = %v, want = %v", err, tcpip.ErrNotConnected) } - if got := ep.Stats().(*tcp.Stats).ReadErrors.InvalidEndpointState.Value(); got != 1 { - t.Fatalf("got EP stats Stats.ReadErrors.InvalidEndpointState got %v want %v", got, 1) + if got := ep.Stats().(*tcp.Stats).ReadErrors.NotConnected.Value(); got != 1 { + t.Errorf("got EP stats Stats.ReadErrors.NotConnected got %v want %v", got, 1) } if err := ep.Listen(10); err != nil { diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc index 525ccbd88..8a8b68e75 100644 --- a/test/syscalls/linux/tcp_socket.cc +++ b/test/syscalls/linux/tcp_socket.cc @@ -1339,6 +1339,15 @@ TEST_P(SimpleTcpSocketTest, SetTCPDeferAcceptGreaterThanZero) { EXPECT_EQ(get, kTCPDeferAccept); } +TEST_P(SimpleTcpSocketTest, RecvOnClosedSocket) { + auto s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + char buf[1]; + EXPECT_THAT(recv(s.get(), buf, 0, 0), SyscallFailsWithErrno(ENOTCONN)); + EXPECT_THAT(recv(s.get(), buf, sizeof(buf), 0), + SyscallFailsWithErrno(ENOTCONN)); +} + INSTANTIATE_TEST_SUITE_P(AllInetTests, SimpleTcpSocketTest, ::testing::Values(AF_INET, AF_INET6)); -- cgit v1.2.3 From 1b6a12a768216a99a5e0428c42ea4faf79cf3b50 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Wed, 5 Feb 2020 22:45:44 -0800 Subject: Add notes to relevant tests. These were out-of-band notes that can help provide additional context and simplify automated imports. PiperOrigin-RevId: 293525915 --- pkg/metric/metric.go | 1 - pkg/sentry/arch/arch_x86.go | 4 ++ pkg/sentry/arch/signal_amd64.go | 2 +- pkg/sentry/fs/file_overlay_test.go | 1 + pkg/sentry/fs/proc/README.md | 4 ++ pkg/sentry/kernel/BUILD | 1 + pkg/sentry/kernel/kernel.go | 3 ++ pkg/sentry/kernel/kernel_opts.go | 20 +++++++ pkg/sentry/socket/hostinet/BUILD | 1 + pkg/sentry/socket/hostinet/socket.go | 5 +- pkg/sentry/socket/hostinet/sockopt_impl.go | 27 ++++++++++ pkg/tcpip/transport/tcp/endpoint.go | 3 ++ runsc/boot/filter/BUILD | 1 + runsc/boot/filter/config.go | 13 ----- runsc/boot/filter/config_profile.go | 34 ++++++++++++ runsc/container/console_test.go | 5 +- runsc/dockerutil/dockerutil.go | 11 ++-- runsc/testutil/BUILD | 5 +- runsc/testutil/testutil.go | 54 ------------------- runsc/testutil/testutil_runfiles.go | 75 +++++++++++++++++++++++++++ test/image/image_test.go | 8 +-- test/syscalls/build_defs.bzl | 35 +++++++++++-- test/syscalls/linux/chroot.cc | 2 +- test/syscalls/linux/concurrency.cc | 3 +- test/syscalls/linux/exec_proc_exe_workload.cc | 6 +++ test/syscalls/linux/fork.cc | 5 +- test/syscalls/linux/mmap.cc | 8 +-- test/syscalls/linux/open_create.cc | 1 + test/syscalls/linux/preadv.cc | 1 + test/syscalls/linux/proc.cc | 46 +++++++++++++--- test/syscalls/linux/readv.cc | 4 +- test/syscalls/linux/rseq.cc | 2 +- test/syscalls/linux/select.cc | 2 +- test/syscalls/linux/shm.cc | 2 +- test/syscalls/linux/sigprocmask.cc | 2 +- test/syscalls/linux/socket_unix_non_stream.cc | 4 +- test/syscalls/linux/symlink.cc | 2 +- test/syscalls/linux/tcp_socket.cc | 3 +- test/syscalls/linux/time.cc | 1 + test/syscalls/linux/tkill.cc | 2 +- test/util/temp_path.cc | 1 + tools/build/tags.bzl | 4 ++ tools/defs.bzl | 17 +++++- 43 files changed, 318 insertions(+), 113 deletions(-) create mode 100644 pkg/sentry/kernel/kernel_opts.go create mode 100644 pkg/sentry/socket/hostinet/sockopt_impl.go create mode 100644 runsc/boot/filter/config_profile.go create mode 100644 runsc/testutil/testutil_runfiles.go (limited to 'test') diff --git a/pkg/metric/metric.go b/pkg/metric/metric.go index 93d4f2b8c..006fcd9ab 100644 --- a/pkg/metric/metric.go +++ b/pkg/metric/metric.go @@ -46,7 +46,6 @@ var ( // // TODO(b/67298402): Support non-cumulative metrics. // TODO(b/67298427): Support metric fields. -// type Uint64Metric struct { // value is the actual value of the metric. It must be accessed // atomically. diff --git a/pkg/sentry/arch/arch_x86.go b/pkg/sentry/arch/arch_x86.go index a18093155..3db8bd34b 100644 --- a/pkg/sentry/arch/arch_x86.go +++ b/pkg/sentry/arch/arch_x86.go @@ -114,6 +114,10 @@ func newX86FPStateSlice() []byte { size, align := cpuid.HostFeatureSet().ExtendedStateSize() capacity := size // Always use at least 4096 bytes. + // + // For the KVM platform, this state is a fixed 4096 bytes, so make sure + // that the underlying array is at _least_ that size otherwise we will + // corrupt random memory. This is not a pleasant thing to debug. if capacity < 4096 { capacity = 4096 } diff --git a/pkg/sentry/arch/signal_amd64.go b/pkg/sentry/arch/signal_amd64.go index 81b92bb43..6fb756f0e 100644 --- a/pkg/sentry/arch/signal_amd64.go +++ b/pkg/sentry/arch/signal_amd64.go @@ -55,7 +55,7 @@ type SignalContext64 struct { Trapno uint64 Oldmask linux.SignalSet Cr2 uint64 - // Pointer to a struct _fpstate. + // Pointer to a struct _fpstate. See b/33003106#comment8. Fpstate uint64 Reserved [8]uint64 } diff --git a/pkg/sentry/fs/file_overlay_test.go b/pkg/sentry/fs/file_overlay_test.go index 02538bb4f..a76d87e3a 100644 --- a/pkg/sentry/fs/file_overlay_test.go +++ b/pkg/sentry/fs/file_overlay_test.go @@ -177,6 +177,7 @@ func TestReaddirRevalidation(t *testing.T) { // TestReaddirOverlayFrozen tests that calling Readdir on an overlay file with // a frozen dirent tree does not make Readdir calls to the underlying files. +// This is a regression test for b/114808269. func TestReaddirOverlayFrozen(t *testing.T) { ctx := contexttest.Context(t) diff --git a/pkg/sentry/fs/proc/README.md b/pkg/sentry/fs/proc/README.md index 5d4ec6c7b..6667a0916 100644 --- a/pkg/sentry/fs/proc/README.md +++ b/pkg/sentry/fs/proc/README.md @@ -11,6 +11,8 @@ inconsistency, please file a bug. The following files are implemented: + + | File /proc/ | Content | | :------------------------ | :---------------------------------------------------- | | [cpuinfo](#cpuinfo) | Info about the CPU | @@ -22,6 +24,8 @@ The following files are implemented: | [uptime](#uptime) | Wall clock since boot, combined idle time of all cpus | | [version](#version) | Kernel version | + + ### cpuinfo ```bash diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index a27628c0a..2231d6973 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -91,6 +91,7 @@ go_library( "fs_context.go", "ipc_namespace.go", "kernel.go", + "kernel_opts.go", "kernel_state.go", "pending_signals.go", "pending_signals_list.go", diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index dcd6e91c4..3ee760ba2 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -235,6 +235,9 @@ type Kernel struct { // events. This is initialized lazily on the first unimplemented // syscall. unimplementedSyscallEmitter eventchannel.Emitter `state:"nosave"` + + // SpecialOpts contains special kernel options. + SpecialOpts } // InitKernelArgs holds arguments to Init. diff --git a/pkg/sentry/kernel/kernel_opts.go b/pkg/sentry/kernel/kernel_opts.go new file mode 100644 index 000000000..2e66ec587 --- /dev/null +++ b/pkg/sentry/kernel/kernel_opts.go @@ -0,0 +1,20 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package kernel + +// SpecialOpts contains non-standard options for the kernel. +// +// +stateify savable +type SpecialOpts struct{} diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD index 5a07d5d0e..023bad156 100644 --- a/pkg/sentry/socket/hostinet/BUILD +++ b/pkg/sentry/socket/hostinet/BUILD @@ -10,6 +10,7 @@ go_library( "save_restore.go", "socket.go", "socket_unsafe.go", + "sockopt_impl.go", "stack.go", ], visibility = ["//pkg/sentry:internal"], diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go index 34f63986f..de76388ac 100644 --- a/pkg/sentry/socket/hostinet/socket.go +++ b/pkg/sentry/socket/hostinet/socket.go @@ -285,7 +285,7 @@ func (s *socketOperations) GetSockOpt(t *kernel.Task, level int, name int, outPt } // Whitelist options and constrain option length. - var optlen int + optlen := getSockOptLen(t, level, name) switch level { case linux.SOL_IP: switch name { @@ -330,7 +330,7 @@ func (s *socketOperations) GetSockOpt(t *kernel.Task, level int, name int, outPt // SetSockOpt implements socket.Socket.SetSockOpt. func (s *socketOperations) SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *syserr.Error { // Whitelist options and constrain option length. - var optlen int + optlen := setSockOptLen(t, level, name) switch level { case linux.SOL_IP: switch name { @@ -353,6 +353,7 @@ func (s *socketOperations) SetSockOpt(t *kernel.Task, level int, name int, opt [ optlen = sizeofInt32 } } + if optlen == 0 { // Pretend to accept socket options we don't understand. This seems // dangerous, but it's what netstack does... diff --git a/pkg/sentry/socket/hostinet/sockopt_impl.go b/pkg/sentry/socket/hostinet/sockopt_impl.go new file mode 100644 index 000000000..8a783712e --- /dev/null +++ b/pkg/sentry/socket/hostinet/sockopt_impl.go @@ -0,0 +1,27 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package hostinet + +import ( + "gvisor.dev/gvisor/pkg/sentry/kernel" +) + +func getSockOptLen(t *kernel.Task, level, name int) int { + return 0 // No custom options. +} + +func setSockOptLen(t *kernel.Task, level, name int) int { + return 0 // No custom options. +} diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index e4a6b1b8b..f2be0e651 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -2166,6 +2166,9 @@ func (e *endpoint) listen(backlog int) *tcpip.Error { e.isRegistered = true e.setEndpointState(StateListen) + // The channel may be non-nil when we're restoring the endpoint, and it + // may be pre-populated with some previously accepted (but not Accepted) + // endpoints. if e.acceptedChan == nil { e.acceptedChan = make(chan *endpoint, backlog) } diff --git a/runsc/boot/filter/BUILD b/runsc/boot/filter/BUILD index ce30f6c53..ed18f0047 100644 --- a/runsc/boot/filter/BUILD +++ b/runsc/boot/filter/BUILD @@ -8,6 +8,7 @@ go_library( "config.go", "config_amd64.go", "config_arm64.go", + "config_profile.go", "extra_filters.go", "extra_filters_msan.go", "extra_filters_race.go", diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index f8d351c7b..c69f4c602 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -536,16 +536,3 @@ func controlServerFilters(fd int) seccomp.SyscallRules { }, } } - -// profileFilters returns extra syscalls made by runtime/pprof package. -func profileFilters() seccomp.SyscallRules { - return seccomp.SyscallRules{ - syscall.SYS_OPENAT: []seccomp.Rule{ - { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.O_RDONLY | syscall.O_LARGEFILE | syscall.O_CLOEXEC), - }, - }, - } -} diff --git a/runsc/boot/filter/config_profile.go b/runsc/boot/filter/config_profile.go new file mode 100644 index 000000000..194952a7b --- /dev/null +++ b/runsc/boot/filter/config_profile.go @@ -0,0 +1,34 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package filter + +import ( + "syscall" + + "gvisor.dev/gvisor/pkg/seccomp" +) + +// profileFilters returns extra syscalls made by runtime/pprof package. +func profileFilters() seccomp.SyscallRules { + return seccomp.SyscallRules{ + syscall.SYS_OPENAT: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.O_RDONLY | syscall.O_LARGEFILE | syscall.O_CLOEXEC), + }, + }, + } +} diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go index 060b63bf3..c2518d52b 100644 --- a/runsc/container/console_test.go +++ b/runsc/container/console_test.go @@ -196,7 +196,10 @@ func TestJobControlSignalExec(t *testing.T) { defer ptyMaster.Close() defer ptySlave.Close() - // Exec bash and attach a terminal. + // Exec bash and attach a terminal. Note that occasionally /bin/sh + // may be a different shell or have a different configuration (such + // as disabling interactive mode and job control). Since we want to + // explicitly test interactive mode, use /bin/bash. See b/116981926. execArgs := &control.ExecArgs{ Filename: "/bin/bash", // Don't let bash execute from profile or rc files, otherwise diff --git a/runsc/dockerutil/dockerutil.go b/runsc/dockerutil/dockerutil.go index 9b6346ca2..1ff5e8cc3 100644 --- a/runsc/dockerutil/dockerutil.go +++ b/runsc/dockerutil/dockerutil.go @@ -143,8 +143,11 @@ func PrepareFiles(names ...string) (string, error) { return "", fmt.Errorf("os.Chmod(%q, 0777) failed: %v", dir, err) } for _, name := range names { - src := getLocalPath(name) - dst := path.Join(dir, name) + src, err := testutil.FindFile(name) + if err != nil { + return "", fmt.Errorf("testutil.Preparefiles(%q) failed: %v", name, err) + } + dst := path.Join(dir, path.Base(name)) if err := testutil.Copy(src, dst); err != nil { return "", fmt.Errorf("testutil.Copy(%q, %q) failed: %v", src, dst, err) } @@ -152,10 +155,6 @@ func PrepareFiles(names ...string) (string, error) { return dir, nil } -func getLocalPath(file string) string { - return path.Join(".", file) -} - // do executes docker command. func do(args ...string) (string, error) { log.Printf("Running: docker %s\n", args) diff --git a/runsc/testutil/BUILD b/runsc/testutil/BUILD index f845120b0..945405303 100644 --- a/runsc/testutil/BUILD +++ b/runsc/testutil/BUILD @@ -5,7 +5,10 @@ package(licenses = ["notice"]) go_library( name = "testutil", testonly = 1, - srcs = ["testutil.go"], + srcs = [ + "testutil.go", + "testutil_runfiles.go", + ], visibility = ["//:sandbox"], deps = [ "//pkg/log", diff --git a/runsc/testutil/testutil.go b/runsc/testutil/testutil.go index edf2e809a..80c2c9680 100644 --- a/runsc/testutil/testutil.go +++ b/runsc/testutil/testutil.go @@ -79,60 +79,6 @@ func ConfigureExePath() error { return nil } -// FindFile searchs for a file inside the test run environment. It returns the -// full path to the file. It fails if none or more than one file is found. -func FindFile(path string) (string, error) { - wd, err := os.Getwd() - if err != nil { - return "", err - } - - // The test root is demarcated by a path element called "__main__". Search for - // it backwards from the working directory. - root := wd - for { - dir, name := filepath.Split(root) - if name == "__main__" { - break - } - if len(dir) == 0 { - return "", fmt.Errorf("directory __main__ not found in %q", wd) - } - // Remove ending slash to loop around. - root = dir[:len(dir)-1] - } - - // Annoyingly, bazel adds the build type to the directory path for go - // binaries, but not for c++ binaries. We use two different patterns to - // to find our file. - patterns := []string{ - // Try the obvious path first. - filepath.Join(root, path), - // If it was a go binary, use a wildcard to match the build - // type. The pattern is: /test-path/__main__/directories/*/file. - filepath.Join(root, filepath.Dir(path), "*", filepath.Base(path)), - } - - for _, p := range patterns { - matches, err := filepath.Glob(p) - if err != nil { - // "The only possible returned error is ErrBadPattern, - // when pattern is malformed." -godoc - return "", fmt.Errorf("error globbing %q: %v", p, err) - } - switch len(matches) { - case 0: - // Try the next pattern. - case 1: - // We found it. - return matches[0], nil - default: - return "", fmt.Errorf("more than one match found for %q: %s", path, matches) - } - } - return "", fmt.Errorf("file %q not found", path) -} - // TestConfig returns the default configuration to use in tests. Note that // 'RootDir' must be set by caller if required. func TestConfig() *boot.Config { diff --git a/runsc/testutil/testutil_runfiles.go b/runsc/testutil/testutil_runfiles.go new file mode 100644 index 000000000..ece9ea9a1 --- /dev/null +++ b/runsc/testutil/testutil_runfiles.go @@ -0,0 +1,75 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package testutil + +import ( + "fmt" + "os" + "path/filepath" +) + +// FindFile searchs for a file inside the test run environment. It returns the +// full path to the file. It fails if none or more than one file is found. +func FindFile(path string) (string, error) { + wd, err := os.Getwd() + if err != nil { + return "", err + } + + // The test root is demarcated by a path element called "__main__". Search for + // it backwards from the working directory. + root := wd + for { + dir, name := filepath.Split(root) + if name == "__main__" { + break + } + if len(dir) == 0 { + return "", fmt.Errorf("directory __main__ not found in %q", wd) + } + // Remove ending slash to loop around. + root = dir[:len(dir)-1] + } + + // Annoyingly, bazel adds the build type to the directory path for go + // binaries, but not for c++ binaries. We use two different patterns to + // to find our file. + patterns := []string{ + // Try the obvious path first. + filepath.Join(root, path), + // If it was a go binary, use a wildcard to match the build + // type. The pattern is: /test-path/__main__/directories/*/file. + filepath.Join(root, filepath.Dir(path), "*", filepath.Base(path)), + } + + for _, p := range patterns { + matches, err := filepath.Glob(p) + if err != nil { + // "The only possible returned error is ErrBadPattern, + // when pattern is malformed." -godoc + return "", fmt.Errorf("error globbing %q: %v", p, err) + } + switch len(matches) { + case 0: + // Try the next pattern. + case 1: + // We found it. + return matches[0], nil + default: + return "", fmt.Errorf("more than one match found for %q: %s", path, matches) + } + } + return "", fmt.Errorf("file %q not found", path) +} diff --git a/test/image/image_test.go b/test/image/image_test.go index d0dcb1861..0a1e19d6f 100644 --- a/test/image/image_test.go +++ b/test/image/image_test.go @@ -107,7 +107,7 @@ func TestHttpd(t *testing.T) { } d := dockerutil.MakeDocker("http-test") - dir, err := dockerutil.PrepareFiles("latin10k.txt") + dir, err := dockerutil.PrepareFiles("test/image/latin10k.txt") if err != nil { t.Fatalf("PrepareFiles() failed: %v", err) } @@ -139,7 +139,7 @@ func TestNginx(t *testing.T) { } d := dockerutil.MakeDocker("net-test") - dir, err := dockerutil.PrepareFiles("latin10k.txt") + dir, err := dockerutil.PrepareFiles("test/image/latin10k.txt") if err != nil { t.Fatalf("PrepareFiles() failed: %v", err) } @@ -183,7 +183,7 @@ func TestMysql(t *testing.T) { } client := dockerutil.MakeDocker("mysql-client-test") - dir, err := dockerutil.PrepareFiles("mysql.sql") + dir, err := dockerutil.PrepareFiles("test/image/mysql.sql") if err != nil { t.Fatalf("PrepareFiles() failed: %v", err) } @@ -283,7 +283,7 @@ func TestRuby(t *testing.T) { } d := dockerutil.MakeDocker("ruby-test") - dir, err := dockerutil.PrepareFiles("ruby.rb", "ruby.sh") + dir, err := dockerutil.PrepareFiles("test/image/ruby.rb", "test/image/ruby.sh") if err != nil { t.Fatalf("PrepareFiles() failed: %v", err) } diff --git a/test/syscalls/build_defs.bzl b/test/syscalls/build_defs.bzl index 1df761dd0..cbab85ef7 100644 --- a/test/syscalls/build_defs.bzl +++ b/test/syscalls/build_defs.bzl @@ -2,8 +2,6 @@ load("//tools:defs.bzl", "loopback") -# syscall_test is a macro that will create targets to run the given test target -# on the host (native) and runsc. def syscall_test( test, shard_count = 5, @@ -13,6 +11,19 @@ def syscall_test( add_uds_tree = False, add_hostinet = False, tags = None): + """syscall_test is a macro that will create targets for all platforms. + + Args: + test: the test target. + shard_count: shards for defined tests. + size: the defined test size. + use_tmpfs: use tmpfs in the defined tests. + add_overlay: add an overlay test. + add_uds_tree: add a UDS test. + add_hostinet: add a hostinet test. + tags: starting test tags. + """ + _syscall_test( test = test, shard_count = shard_count, @@ -111,6 +122,19 @@ def _syscall_test( # all the tests on a specific flavor. Use --test_tag_filters=ptrace,file_shared. tags += [full_platform, "file_" + file_access] + # Hash this target into one of 15 buckets. This can be used to + # randomly split targets between different workflows. + hash15 = hash(native.package_name() + name) % 15 + tags.append("hash15:" + str(hash15)) + + # TODO(b/139838000): Tests using hostinet must be disabled on Guitar until + # we figure out how to request ipv4 sockets on Guitar machines. + if network == "host": + tags.append("noguitar") + + # Disable off-host networking. + tags.append("requires-net:loopback") + # Add tag to prevent the tests from running in a Bazel sandbox. # TODO(b/120560048): Make the tests run without this tag. tags.append("no-sandbox") @@ -118,8 +142,11 @@ def _syscall_test( # TODO(b/112165693): KVM tests are tagged "manual" to until the platform is # more stable. if platform == "kvm": - tags += ["manual"] - tags += ["requires-kvm"] + tags.append("manual") + tags.append("requires-kvm") + + # TODO(b/112165693): Remove when tests pass reliably. + tags.append("notap") args = [ # Arguments are passed directly to syscall_test_runner binary. diff --git a/test/syscalls/linux/chroot.cc b/test/syscalls/linux/chroot.cc index 0a2d44a2c..85ec013d5 100644 --- a/test/syscalls/linux/chroot.cc +++ b/test/syscalls/linux/chroot.cc @@ -167,7 +167,7 @@ TEST(ChrootTest, DotDotFromOpenFD) { } // Test that link resolution in a chroot can escape the root by following an -// open proc fd. +// open proc fd. Regression test for b/32316719. TEST(ChrootTest, ProcFdLinkResolutionInChroot) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT))); diff --git a/test/syscalls/linux/concurrency.cc b/test/syscalls/linux/concurrency.cc index f41f99900..7cd6a75bd 100644 --- a/test/syscalls/linux/concurrency.cc +++ b/test/syscalls/linux/concurrency.cc @@ -46,7 +46,8 @@ TEST(ConcurrencyTest, SingleProcessMultithreaded) { } // Test that multiple threads in this process continue to execute in parallel, -// even if an unrelated second process is spawned. +// even if an unrelated second process is spawned. Regression test for +// b/32119508. TEST(ConcurrencyTest, MultiProcessMultithreaded) { // In PID 1, start TIDs 1 and 2, and put both to sleep. // diff --git a/test/syscalls/linux/exec_proc_exe_workload.cc b/test/syscalls/linux/exec_proc_exe_workload.cc index b790fe5be..2989379b7 100644 --- a/test/syscalls/linux/exec_proc_exe_workload.cc +++ b/test/syscalls/linux/exec_proc_exe_workload.cc @@ -21,6 +21,12 @@ #include "test/util/posix_error.h" int main(int argc, char** argv, char** envp) { + // This is annoying. Because remote build systems may put these binaries + // in a content-addressable-store, you may wind up with /proc/self/exe + // pointing to some random path (but with a sensible argv[0]). + // + // Therefore, this test simply checks that the /proc/self/exe + // is absolute and *doesn't* match argv[1]. std::string exe = gvisor::testing::ProcessExePath(getpid()).ValueOrDie(); if (exe[0] != '/') { diff --git a/test/syscalls/linux/fork.cc b/test/syscalls/linux/fork.cc index 906f3358d..ff8bdfeb0 100644 --- a/test/syscalls/linux/fork.cc +++ b/test/syscalls/linux/fork.cc @@ -271,7 +271,7 @@ TEST_F(ForkTest, Alarm) { EXPECT_EQ(0, alarmed); } -// Child cannot affect parent private memory. +// Child cannot affect parent private memory. Regression test for b/24137240. TEST_F(ForkTest, PrivateMemory) { std::atomic local(0); @@ -298,6 +298,9 @@ TEST_F(ForkTest, PrivateMemory) { } // Kernel-accessed buffers should remain coherent across COW. +// +// The buffer must be >= usermem.ZeroCopyMinBytes, as UnsafeAccess operates +// differently. Regression test for b/33811887. TEST_F(ForkTest, COWSegment) { constexpr int kBufSize = 1024; char* read_buf = private_; diff --git a/test/syscalls/linux/mmap.cc b/test/syscalls/linux/mmap.cc index 1c4d9f1c7..11fb1b457 100644 --- a/test/syscalls/linux/mmap.cc +++ b/test/syscalls/linux/mmap.cc @@ -1418,7 +1418,7 @@ TEST_P(MMapFileParamTest, NoSigBusOnPageContainingEOF) { // // On most platforms this is trivial, but when the file is mapped via the sentry // page cache (which does not yet support writing to shared mappings), a bug -// caused reads to fail unnecessarily on such mappings. +// caused reads to fail unnecessarily on such mappings. See b/28913513. TEST_F(MMapFileTest, ReadingWritableSharedFilePageSucceeds) { uintptr_t addr; size_t len = strlen(kFileContents); @@ -1435,7 +1435,7 @@ TEST_F(MMapFileTest, ReadingWritableSharedFilePageSucceeds) { // Tests that EFAULT is returned when invoking a syscall that requires the OS to // read past end of file (resulting in a fault in sentry context in the gVisor -// case). +// case). See b/28913513. TEST_F(MMapFileTest, InternalSigBus) { uintptr_t addr; ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, @@ -1578,7 +1578,7 @@ TEST_F(MMapFileTest, Bug38498194) { } // Tests that reading from a file to a memory mapping of the same file does not -// deadlock. +// deadlock. See b/34813270. TEST_F(MMapFileTest, SelfRead) { uintptr_t addr; ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, @@ -1590,7 +1590,7 @@ TEST_F(MMapFileTest, SelfRead) { } // Tests that writing to a file from a memory mapping of the same file does not -// deadlock. +// deadlock. Regression test for b/34813270. TEST_F(MMapFileTest, SelfWrite) { uintptr_t addr; ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), diff --git a/test/syscalls/linux/open_create.cc b/test/syscalls/linux/open_create.cc index 431733dbe..902d0a0dc 100644 --- a/test/syscalls/linux/open_create.cc +++ b/test/syscalls/linux/open_create.cc @@ -132,6 +132,7 @@ TEST(CreateTest, CreateFailsOnDirWithoutWritePerms) { } // A file originally created RW, but opened RO can later be opened RW. +// Regression test for b/65385065. TEST(CreateTest, OpenCreateROThenRW) { TempPath file(NewTempAbsPath()); diff --git a/test/syscalls/linux/preadv.cc b/test/syscalls/linux/preadv.cc index f7ea44054..5b0743fe9 100644 --- a/test/syscalls/linux/preadv.cc +++ b/test/syscalls/linux/preadv.cc @@ -37,6 +37,7 @@ namespace testing { namespace { +// Stress copy-on-write. Attempts to reproduce b/38430174. TEST(PreadvTest, MMConcurrencyStress) { // Fill a one-page file with zeroes (the contents don't really matter). const auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index 169b723eb..a23fdb58d 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -1352,13 +1352,19 @@ TEST(ProcPidSymlink, SubprocessZombied) { // FIXME(gvisor.dev/issue/164): Inconsistent behavior between gVisor and linux // on proc files. - // 4.17 & gVisor: Syscall succeeds and returns 1 + // + // ~4.3: Syscall fails with EACCES. + // 4.17 & gVisor: Syscall succeeds and returns 1. + // // EXPECT_THAT(ReadlinkWhileZombied("ns/pid", buf, sizeof(buf)), // SyscallFailsWithErrno(EACCES)); // FIXME(gvisor.dev/issue/164): Inconsistent behavior between gVisor and linux // on proc files. - // 4.17 & gVisor: Syscall succeeds and returns 1. + // + // ~4.3: Syscall fails with EACCES. + // 4.17 & gVisor: Syscall succeeds and returns 1. + // // EXPECT_THAT(ReadlinkWhileZombied("ns/user", buf, sizeof(buf)), // SyscallFailsWithErrno(EACCES)); } @@ -1431,8 +1437,12 @@ TEST(ProcPidFile, SubprocessRunning) { TEST(ProcPidFile, SubprocessZombie) { char buf[1]; - // 4.17: Succeeds and returns 1 - // gVisor: Succeeds and returns 0 + // FIXME(gvisor.dev/issue/164): Loosen requirement due to inconsistent + // behavior on different kernels. + // + // ~4.3: Succeds and returns 0. + // 4.17: Succeeds and returns 1. + // gVisor: Succeeds and returns 0. EXPECT_THAT(ReadWhileZombied("auxv", buf, sizeof(buf)), SyscallSucceeds()); EXPECT_THAT(ReadWhileZombied("cmdline", buf, sizeof(buf)), @@ -1458,7 +1468,10 @@ TEST(ProcPidFile, SubprocessZombie) { // FIXME(gvisor.dev/issue/164): Inconsistent behavior between gVisor and linux // on proc files. + // + // ~4.3: Fails and returns EACCES. // gVisor & 4.17: Succeeds and returns 1. + // // EXPECT_THAT(ReadWhileZombied("io", buf, sizeof(buf)), // SyscallFailsWithErrno(EACCES)); } @@ -1467,9 +1480,12 @@ TEST(ProcPidFile, SubprocessZombie) { TEST(ProcPidFile, SubprocessExited) { char buf[1]; - // FIXME(gvisor.dev/issue/164): Inconsistent behavior between kernels + // FIXME(gvisor.dev/issue/164): Inconsistent behavior between kernels. + // + // ~4.3: Fails and returns ESRCH. // gVisor: Fails with ESRCH. // 4.17: Succeeds and returns 1. + // // EXPECT_THAT(ReadWhileExited("auxv", buf, sizeof(buf)), // SyscallFailsWithErrno(ESRCH)); @@ -1641,7 +1657,7 @@ TEST(ProcTask, KilledThreadsDisappear) { EXPECT_NO_ERRNO(DirContainsExactly("/proc/self/task", TaskFiles(initial, {child1.Tid()}))); - // Stat child1's task file. + // Stat child1's task file. Regression test for b/32097707. struct stat statbuf; const std::string child1_task_file = absl::StrCat("/proc/self/task/", child1.Tid()); @@ -1669,7 +1685,7 @@ TEST(ProcTask, KilledThreadsDisappear) { EXPECT_NO_ERRNO(EventuallyDirContainsExactly( "/proc/self/task", TaskFiles(initial, {child3.Tid(), child5.Tid()}))); - // Stat child1's task file again. This time it should fail. + // Stat child1's task file again. This time it should fail. See b/32097707. EXPECT_THAT(stat(child1_task_file.c_str(), &statbuf), SyscallFailsWithErrno(ENOENT)); @@ -1824,7 +1840,7 @@ TEST(ProcSysVmOvercommitMemory, HasNumericValue) { } // Check that link for proc fd entries point the target node, not the -// symlink itself. +// symlink itself. Regression test for b/31155070. TEST(ProcTaskFd, FstatatFollowsSymlink) { const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); const FileDescriptor fd = @@ -1883,6 +1899,20 @@ TEST(ProcMounts, IsSymlink) { EXPECT_EQ(link, "self/mounts"); } +TEST(ProcSelfMountinfo, RequiredFieldsArePresent) { + auto mountinfo = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/mountinfo")); + EXPECT_THAT( + mountinfo, + AllOf( + // Root mount. + ContainsRegex( + R"([0-9]+ [0-9]+ [0-9]+:[0-9]+ / / (rw|ro).*- \S+ \S+ (rw|ro)\S*)"), + // Proc mount - always rw. + ContainsRegex( + R"([0-9]+ [0-9]+ [0-9]+:[0-9]+ / /proc rw.*- \S+ \S+ rw\S*)"))); +} + // Check that /proc/self/mounts looks something like a real mounts file. TEST(ProcSelfMounts, RequiredFieldsArePresent) { auto mounts = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/mounts")); diff --git a/test/syscalls/linux/readv.cc b/test/syscalls/linux/readv.cc index 4069cbc7e..baaf9f757 100644 --- a/test/syscalls/linux/readv.cc +++ b/test/syscalls/linux/readv.cc @@ -254,7 +254,9 @@ TEST_F(ReadvTest, IovecOutsideTaskAddressRangeInNonemptyArray) { // This test depends on the maximum extent of a single readv() syscall, so // we can't tolerate interruption from saving. TEST(ReadvTestNoFixture, TruncatedAtMax_NoRandomSave) { - // Ensure that we won't be interrupted by ITIMER_PROF. + // Ensure that we won't be interrupted by ITIMER_PROF. This is particularly + // important in environments where automated profiling tools may start + // ITIMER_PROF automatically. struct itimerval itv = {}; auto const cleanup_itimer = ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_PROF, itv)); diff --git a/test/syscalls/linux/rseq.cc b/test/syscalls/linux/rseq.cc index 106c045e3..4bfb1ff56 100644 --- a/test/syscalls/linux/rseq.cc +++ b/test/syscalls/linux/rseq.cc @@ -36,7 +36,7 @@ namespace { // We must be very careful about how these tests are written. Each thread may // only have one struct rseq registration, which may be done automatically at // thread start (as of 2019-11-13, glibc does *not* support rseq and thus does -// not do so). +// not do so, but other libraries do). // // Testing of rseq is thus done primarily in a child process with no // registration. This means exec'ing a nostdlib binary, as rseq registration can diff --git a/test/syscalls/linux/select.cc b/test/syscalls/linux/select.cc index 424e2a67f..be2364fb8 100644 --- a/test/syscalls/linux/select.cc +++ b/test/syscalls/linux/select.cc @@ -146,7 +146,7 @@ TEST_F(SelectTest, IgnoreBitsAboveNfds) { // This test illustrates Linux's behavior of 'select' calls passing after // setrlimit RLIMIT_NOFILE is called. In particular, versions of sshd rely on -// this behavior. +// this behavior. See b/122318458. TEST_F(SelectTest, SetrlimitCallNOFILE) { fd_set read_set; FD_ZERO(&read_set); diff --git a/test/syscalls/linux/shm.cc b/test/syscalls/linux/shm.cc index 7ba752599..c7fdbb924 100644 --- a/test/syscalls/linux/shm.cc +++ b/test/syscalls/linux/shm.cc @@ -473,7 +473,7 @@ TEST(ShmTest, PartialUnmap) { } // Check that sentry does not panic when asked for a zero-length private shm -// segment. +// segment. Regression test for b/110694797. TEST(ShmTest, GracefullyFailOnZeroLenSegmentCreation) { EXPECT_THAT(Shmget(IPC_PRIVATE, 0, 0), PosixErrorIs(EINVAL, _)); } diff --git a/test/syscalls/linux/sigprocmask.cc b/test/syscalls/linux/sigprocmask.cc index 654c6a47f..a603fc1d1 100644 --- a/test/syscalls/linux/sigprocmask.cc +++ b/test/syscalls/linux/sigprocmask.cc @@ -237,7 +237,7 @@ TEST_F(SigProcMaskTest, SignalHandler) { } // Check that sigprocmask correctly handles aliasing of the set and oldset -// pointers. +// pointers. Regression test for b/30502311. TEST_F(SigProcMaskTest, AliasedSets) { sigset_t mask; diff --git a/test/syscalls/linux/socket_unix_non_stream.cc b/test/syscalls/linux/socket_unix_non_stream.cc index 276a94eb8..884319e1d 100644 --- a/test/syscalls/linux/socket_unix_non_stream.cc +++ b/test/syscalls/linux/socket_unix_non_stream.cc @@ -109,7 +109,7 @@ PosixErrorOr> CreateFragmentedRegion(const int size, } // A contiguous iov that is heavily fragmented in FileMem can still be sent -// successfully. +// successfully. See b/115833655. TEST_P(UnixNonStreamSocketPairTest, FragmentedSendMsg) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); @@ -165,7 +165,7 @@ TEST_P(UnixNonStreamSocketPairTest, FragmentedSendMsg) { } // A contiguous iov that is heavily fragmented in FileMem can still be received -// into successfully. +// into successfully. Regression test for b/115833655. TEST_P(UnixNonStreamSocketPairTest, FragmentedRecvMsg) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); diff --git a/test/syscalls/linux/symlink.cc b/test/syscalls/linux/symlink.cc index b249ff91f..03ee1250d 100644 --- a/test/syscalls/linux/symlink.cc +++ b/test/syscalls/linux/symlink.cc @@ -38,7 +38,7 @@ mode_t FilePermission(const std::string& path) { } // Test that name collisions are checked on the new link path, not the source -// path. +// path. Regression test for b/31782115. TEST(SymlinkTest, CanCreateSymlinkWithCachedSourceDirent) { const std::string srcname = NewTempAbsPath(); const std::string newname = NewTempAbsPath(); diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc index 8a8b68e75..c4591a3b9 100644 --- a/test/syscalls/linux/tcp_socket.cc +++ b/test/syscalls/linux/tcp_socket.cc @@ -244,7 +244,8 @@ TEST_P(TcpSocketTest, ZeroWriteAllowed) { } // Test that a non-blocking write with a buffer that is larger than the send -// buffer size will not actually write the whole thing at once. +// buffer size will not actually write the whole thing at once. Regression test +// for b/64438887. TEST_P(TcpSocketTest, NonblockingLargeWrite) { // Set the FD to O_NONBLOCK. int opts; diff --git a/test/syscalls/linux/time.cc b/test/syscalls/linux/time.cc index c7eead17e..1ccb95733 100644 --- a/test/syscalls/linux/time.cc +++ b/test/syscalls/linux/time.cc @@ -62,6 +62,7 @@ TEST(TimeTest, VsyscallTime_InvalidAddressSIGSEGV) { ::testing::KilledBySignal(SIGSEGV), ""); } +// Mimics the gettimeofday(2) wrapper from the Go runtime <= 1.2. int vsyscall_gettimeofday(struct timeval* tv, struct timezone* tz) { constexpr uint64_t kVsyscallGettimeofdayEntry = 0xffffffffff600000; return reinterpret_cast( diff --git a/test/syscalls/linux/tkill.cc b/test/syscalls/linux/tkill.cc index bae377c69..8d8ebbb24 100644 --- a/test/syscalls/linux/tkill.cc +++ b/test/syscalls/linux/tkill.cc @@ -54,7 +54,7 @@ void SigHandler(int sig, siginfo_t* info, void* context) { TEST_CHECK(info->si_code == SI_TKILL); } -// Test with a real signal. +// Test with a real signal. Regression test for b/24790092. TEST(TkillTest, ValidTIDAndRealSignal) { struct sigaction sa; sa.sa_sigaction = SigHandler; diff --git a/test/util/temp_path.cc b/test/util/temp_path.cc index 35aacb172..9c10b6674 100644 --- a/test/util/temp_path.cc +++ b/test/util/temp_path.cc @@ -77,6 +77,7 @@ std::string NewTempAbsPath() { std::string NewTempRelPath() { return NextTempBasename(); } std::string GetAbsoluteTestTmpdir() { + // Note that TEST_TMPDIR is guaranteed to be set. char* env_tmpdir = getenv("TEST_TMPDIR"); std::string tmp_dir = env_tmpdir != nullptr ? std::string(env_tmpdir) : "/tmp"; diff --git a/tools/build/tags.bzl b/tools/build/tags.bzl index e99c87f81..a6db44e47 100644 --- a/tools/build/tags.bzl +++ b/tools/build/tags.bzl @@ -33,4 +33,8 @@ go_suffixes = [ "_wasm_unsafe", "_linux", "_linux_unsafe", + "_opts", + "_opts_unsafe", + "_impl", + "_impl_unsafe", ] diff --git a/tools/defs.bzl b/tools/defs.bzl index 5d5fa134a..c03b557ae 100644 --- a/tools/defs.bzl +++ b/tools/defs.bzl @@ -73,6 +73,16 @@ def calculate_sets(srcs): result[target].append(file) return result +def go_imports(name, src, out): + """Simplify a single Go source file by eliminating unused imports.""" + native.genrule( + name = name, + srcs = [src], + outs = [out], + tools = ["@org_golang_x_tools//cmd/goimports:goimports"], + cmd = ("$(location @org_golang_x_tools//cmd/goimports:goimports) $(SRCS) > $@"), + ) + def go_library(name, srcs, deps = [], imports = [], stateify = True, marshal = False, **kwargs): """Wraps the standard go_library and does stateification and marshalling. @@ -107,10 +117,15 @@ def go_library(name, srcs, deps = [], imports = [], stateify = True, marshal = F state_sets = calculate_sets(srcs) for (suffix, srcs) in state_sets.items(): go_stateify( - name = name + suffix + "_state_autogen", + name = name + suffix + "_state_autogen_with_imports", srcs = srcs, imports = imports, package = name, + out = name + suffix + "_state_autogen_with_imports.go", + ) + go_imports( + name = name + suffix + "_state_autogen", + src = name + suffix + "_state_autogen_with_imports.go", out = name + suffix + "_state_autogen.go", ) all_srcs = all_srcs + [ -- cgit v1.2.3 From 0e96fcafd4404e1418c84b7830b9455867e174bb Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Thu, 6 Feb 2020 10:11:15 -0800 Subject: Fix test case on AMD. When ignored, the trap should be executed which generates a SIGSEGV as in the above case. PiperOrigin-RevId: 293618489 --- test/syscalls/linux/32bit.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/syscalls/linux/32bit.cc b/test/syscalls/linux/32bit.cc index 9883aef61..c47a05181 100644 --- a/test/syscalls/linux/32bit.cc +++ b/test/syscalls/linux/32bit.cc @@ -155,7 +155,7 @@ TEST(Syscall32Bit, Syscall) { case PlatformSupport::Ignored: // See above. EXPECT_EXIT(ExitGroup32(kSyscall, kExitCode), - ::testing::KilledBySignal(SIGILL), ""); + ::testing::KilledBySignal(SIGSEGV), ""); break; case PlatformSupport::Allowed: -- cgit v1.2.3 From bfa4a235f401599492a2cf39471df62715f9f1cf Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Thu, 6 Feb 2020 14:26:41 -0800 Subject: Fix `bazel run` target in docs. PiperOrigin-RevId: 293676954 --- test/iptables/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/iptables/README.md b/test/iptables/README.md index 8f61b4c41..c2b934e1f 100644 --- a/test/iptables/README.md +++ b/test/iptables/README.md @@ -28,7 +28,7 @@ Your test is now runnable with bazel! Build the testing Docker container: ```bash -$ bazel run //test/iptables/runner-image -- --norun +$ bazel run //test/iptables/runner:runner-image -- --norun ``` Run an individual test via: -- cgit v1.2.3 From 6de49546cb32806896cec27d3ab76e96323ecac1 Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Fri, 7 Feb 2020 13:18:19 -0800 Subject: Refactor syscall tests - Move shared helpers V4Multicast and V4Broadcast to socket_test_util - Add unnamed namespace so socket_ipv4_tcp_unbound_external_networking_test.cc and socket_ipv4_udp_unbound_external_networking_test.cc can be compiled together - Add test files to "exports_files" so they can be included by Fuchsia's syscall test setup PiperOrigin-RevId: 293880429 --- test/syscalls/linux/BUILD | 3 +++ ...ket_ipv4_tcp_unbound_external_networking_test.cc | 3 +++ test/syscalls/linux/socket_ipv4_udp_unbound.cc | 21 --------------------- .../socket_ipv4_udp_unbound_external_networking.cc | 20 -------------------- ...ket_ipv4_udp_unbound_external_networking_test.cc | 3 +++ test/syscalls/linux/socket_test_util.cc | 18 ++++++++++++++++++ test/syscalls/linux/socket_test_util.h | 5 +++++ 7 files changed, 32 insertions(+), 41 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index f2e3c7072..12d389c3e 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -12,6 +12,9 @@ exports_files( "socket_ip_loopback_blocking.cc", "socket_ip_tcp_loopback.cc", "socket_ip_udp_loopback.cc", + "socket_ip_unbound.cc", + "socket_ipv4_tcp_unbound_external_networking_test.cc", + "socket_ipv4_udp_unbound_external_networking_test.cc", "socket_ipv4_udp_unbound_loopback.cc", "tcp_socket.cc", "udp_socket.cc", diff --git a/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking_test.cc b/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking_test.cc index 3ac790873..797c4174e 100644 --- a/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking_test.cc +++ b/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking_test.cc @@ -22,6 +22,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSockets() { return ApplyVec( @@ -32,5 +33,7 @@ std::vector GetSockets() { INSTANTIATE_TEST_SUITE_P(IPv4TCPUnboundSockets, IPv4TCPUnboundExternalNetworkingSocketTest, ::testing::ValuesIn(GetSockets())); + +} // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound.cc b/test/syscalls/linux/socket_ipv4_udp_unbound.cc index aa6fb4e3f..990ccf23c 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound.cc @@ -30,27 +30,6 @@ namespace gvisor { namespace testing { -constexpr char kMulticastAddress[] = "224.0.2.1"; -constexpr char kBroadcastAddress[] = "255.255.255.255"; - -TestAddress V4Multicast() { - TestAddress t("V4Multicast"); - t.addr.ss_family = AF_INET; - t.addr_len = sizeof(sockaddr_in); - reinterpret_cast(&t.addr)->sin_addr.s_addr = - inet_addr(kMulticastAddress); - return t; -} - -TestAddress V4Broadcast() { - TestAddress t("V4Broadcast"); - t.addr.ss_family = AF_INET; - t.addr_len = sizeof(sockaddr_in); - reinterpret_cast(&t.addr)->sin_addr.s_addr = - inet_addr(kBroadcastAddress); - return t; -} - // Check that packets are not received without a group membership. Default send // interface configured by bind. TEST_P(IPv4UDPUnboundSocketTest, IpMulticastLoopbackNoGroup) { diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.cc index 98ae414f3..40e673625 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.cc @@ -41,26 +41,6 @@ TestAddress V4EmptyAddress() { return t; } -constexpr char kMulticastAddress[] = "224.0.2.1"; - -TestAddress V4Multicast() { - TestAddress t("V4Multicast"); - t.addr.ss_family = AF_INET; - t.addr_len = sizeof(sockaddr_in); - reinterpret_cast(&t.addr)->sin_addr.s_addr = - inet_addr(kMulticastAddress); - return t; -} - -TestAddress V4Broadcast() { - TestAddress t("V4Broadcast"); - t.addr.ss_family = AF_INET; - t.addr_len = sizeof(sockaddr_in); - reinterpret_cast(&t.addr)->sin_addr.s_addr = - htonl(INADDR_BROADCAST); - return t; -} - void IPv4UDPUnboundExternalNetworkingSocketTest::SetUp() { got_if_infos_ = false; diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking_test.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking_test.cc index 8f47952b0..f6e64c157 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking_test.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking_test.cc @@ -22,6 +22,7 @@ namespace gvisor { namespace testing { +namespace { std::vector GetSockets() { return ApplyVec( @@ -32,5 +33,7 @@ std::vector GetSockets() { INSTANTIATE_TEST_SUITE_P(IPv4UDPUnboundSockets, IPv4UDPUnboundExternalNetworkingSocketTest, ::testing::ValuesIn(GetSockets())); + +} // namespace } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_test_util.cc b/test/syscalls/linux/socket_test_util.cc index c0c5ab3fe..5d3a39868 100644 --- a/test/syscalls/linux/socket_test_util.cc +++ b/test/syscalls/linux/socket_test_util.cc @@ -805,6 +805,24 @@ TestAddress V4MappedLoopback() { return t; } +TestAddress V4Multicast() { + TestAddress t("V4Multicast"); + t.addr.ss_family = AF_INET; + t.addr_len = sizeof(sockaddr_in); + reinterpret_cast(&t.addr)->sin_addr.s_addr = + inet_addr(kMulticastAddress); + return t; +} + +TestAddress V4Broadcast() { + TestAddress t("V4Broadcast"); + t.addr.ss_family = AF_INET; + t.addr_len = sizeof(sockaddr_in); + reinterpret_cast(&t.addr)->sin_addr.s_addr = + htonl(INADDR_BROADCAST); + return t; +} + TestAddress V6Any() { TestAddress t("V6Any"); t.addr.ss_family = AF_INET6; diff --git a/test/syscalls/linux/socket_test_util.h b/test/syscalls/linux/socket_test_util.h index bfaa6e397..734b48b96 100644 --- a/test/syscalls/linux/socket_test_util.h +++ b/test/syscalls/linux/socket_test_util.h @@ -484,10 +484,15 @@ struct TestAddress { : description(std::move(description)), addr(), addr_len() {} }; +constexpr char kMulticastAddress[] = "224.0.2.1"; +constexpr char kBroadcastAddress[] = "255.255.255.255"; + TestAddress V4Any(); +TestAddress V4Broadcast(); TestAddress V4Loopback(); TestAddress V4MappedAny(); TestAddress V4MappedLoopback(); +TestAddress V4Multicast(); TestAddress V6Any(); TestAddress V6Loopback(); -- cgit v1.2.3 From 17b9f5e66238bde1e4ed3bd9e5fb67342c8b58ec Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Fri, 7 Feb 2020 14:46:24 -0800 Subject: Support listxattr and removexattr syscalls. Note that these are only implemented for tmpfs, and other impls will still return EOPNOTSUPP. PiperOrigin-RevId: 293899385 --- pkg/p9/client_file.go | 33 ++++ pkg/p9/file.go | 16 ++ pkg/p9/handlers.go | 33 ++++ pkg/p9/messages.go | 199 +++++++++++++++---- pkg/p9/p9.go | 4 + pkg/p9/version.go | 8 +- pkg/sentry/fs/copy_up.go | 2 +- pkg/sentry/fs/fsutil/inode.go | 20 +- pkg/sentry/fs/gofer/context_file.go | 14 ++ pkg/sentry/fs/gofer/inode.go | 13 +- pkg/sentry/fs/inode.go | 14 +- pkg/sentry/fs/inode_operations.go | 13 +- pkg/sentry/fs/inode_overlay.go | 18 +- pkg/sentry/fs/tmpfs/tmpfs.go | 9 +- pkg/sentry/syscalls/linux/linux64_amd64.go | 27 ++- pkg/sentry/syscalls/linux/linux64_arm64.go | 37 ++-- pkg/sentry/syscalls/linux/sys_xattr.go | 200 +++++++++++++++++++- runsc/fsgofer/fsgofer.go | 16 +- test/syscalls/linux/BUILD | 1 + test/syscalls/linux/xattr.cc | 294 ++++++++++++++++------------- 20 files changed, 733 insertions(+), 238 deletions(-) (limited to 'test') diff --git a/pkg/p9/client_file.go b/pkg/p9/client_file.go index 0254e4ccc..2ee07b664 100644 --- a/pkg/p9/client_file.go +++ b/pkg/p9/client_file.go @@ -194,6 +194,39 @@ func (c *clientFile) SetXattr(name, value string, flags uint32) error { return c.client.sendRecv(&Tsetxattr{FID: c.fid, Name: name, Value: value, Flags: flags}, &Rsetxattr{}) } +// ListXattr implements File.ListXattr. +func (c *clientFile) ListXattr(size uint64) (map[string]struct{}, error) { + if atomic.LoadUint32(&c.closed) != 0 { + return nil, syscall.EBADF + } + if !versionSupportsListRemoveXattr(c.client.version) { + return nil, syscall.EOPNOTSUPP + } + + rlistxattr := Rlistxattr{} + if err := c.client.sendRecv(&Tlistxattr{FID: c.fid, Size: size}, &rlistxattr); err != nil { + return nil, err + } + + xattrs := make(map[string]struct{}, len(rlistxattr.Xattrs)) + for _, x := range rlistxattr.Xattrs { + xattrs[x] = struct{}{} + } + return xattrs, nil +} + +// RemoveXattr implements File.RemoveXattr. +func (c *clientFile) RemoveXattr(name string) error { + if atomic.LoadUint32(&c.closed) != 0 { + return syscall.EBADF + } + if !versionSupportsListRemoveXattr(c.client.version) { + return syscall.EOPNOTSUPP + } + + return c.client.sendRecv(&Tremovexattr{FID: c.fid, Name: name}, &Rremovexattr{}) +} + // Allocate implements File.Allocate. func (c *clientFile) Allocate(mode AllocateMode, offset, length uint64) error { if atomic.LoadUint32(&c.closed) != 0 { diff --git a/pkg/p9/file.go b/pkg/p9/file.go index 4607cfcdf..d4ffbc8e3 100644 --- a/pkg/p9/file.go +++ b/pkg/p9/file.go @@ -105,6 +105,22 @@ type File interface { // TODO(b/127675828): Determine concurrency guarantees once implemented. SetXattr(name, value string, flags uint32) error + // ListXattr lists the names of the extended attributes on this node. + // + // Size indicates the size of the buffer that has been allocated to hold the + // attribute list. If the list would be larger than size, implementations may + // return ERANGE to indicate that the buffer is too small, but they are also + // free to ignore the hint entirely (i.e. the value returned may be larger + // than size). All size checking is done independently at the syscall layer. + // + // TODO(b/148303075): Determine concurrency guarantees once implemented. + ListXattr(size uint64) (map[string]struct{}, error) + + // RemoveXattr removes extended attributes on this node. + // + // TODO(b/148303075): Determine concurrency guarantees once implemented. + RemoveXattr(name string) error + // Allocate allows the caller to directly manipulate the allocated disk space // for the file. See fallocate(2) for more details. Allocate(mode AllocateMode, offset, length uint64) error diff --git a/pkg/p9/handlers.go b/pkg/p9/handlers.go index 7d6653a07..2ac45eb80 100644 --- a/pkg/p9/handlers.go +++ b/pkg/p9/handlers.go @@ -941,6 +941,39 @@ func (t *Tsetxattr) handle(cs *connState) message { return &Rsetxattr{} } +// handle implements handler.handle. +func (t *Tlistxattr) handle(cs *connState) message { + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + xattrs, err := ref.file.ListXattr(t.Size) + if err != nil { + return newErr(err) + } + xattrList := make([]string, 0, len(xattrs)) + for x := range xattrs { + xattrList = append(xattrList, x) + } + return &Rlistxattr{Xattrs: xattrList} +} + +// handle implements handler.handle. +func (t *Tremovexattr) handle(cs *connState) message { + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + if err := ref.file.RemoveXattr(t.Name); err != nil { + return newErr(err) + } + return &Rremovexattr{} +} + // handle implements handler.handle. func (t *Treaddir) handle(cs *connState) message { ref, ok := cs.LookupFID(t.Directory) diff --git a/pkg/p9/messages.go b/pkg/p9/messages.go index ceb723d86..b1cede5f5 100644 --- a/pkg/p9/messages.go +++ b/pkg/p9/messages.go @@ -174,11 +174,11 @@ type Rflush struct { } // Decode implements encoder.Decode. -func (*Rflush) Decode(b *buffer) { +func (*Rflush) Decode(*buffer) { } // Encode implements encoder.Encode. -func (*Rflush) Encode(b *buffer) { +func (*Rflush) Encode(*buffer) { } // Type implements message.Type. @@ -188,7 +188,7 @@ func (*Rflush) Type() MsgType { // String implements fmt.Stringer. func (r *Rflush) String() string { - return fmt.Sprintf("RFlush{}") + return "RFlush{}" } // Twalk is a walk request. @@ -300,11 +300,11 @@ type Rclunk struct { } // Decode implements encoder.Decode. -func (*Rclunk) Decode(b *buffer) { +func (*Rclunk) Decode(*buffer) { } // Encode implements encoder.Encode. -func (*Rclunk) Encode(b *buffer) { +func (*Rclunk) Encode(*buffer) { } // Type implements message.Type. @@ -314,7 +314,7 @@ func (*Rclunk) Type() MsgType { // String implements fmt.Stringer. func (r *Rclunk) String() string { - return fmt.Sprintf("Rclunk{}") + return "Rclunk{}" } // Tremove is a remove request. @@ -350,11 +350,11 @@ type Rremove struct { } // Decode implements encoder.Decode. -func (*Rremove) Decode(b *buffer) { +func (*Rremove) Decode(*buffer) { } // Encode implements encoder.Encode. -func (*Rremove) Encode(b *buffer) { +func (*Rremove) Encode(*buffer) { } // Type implements message.Type. @@ -364,7 +364,7 @@ func (*Rremove) Type() MsgType { // String implements fmt.Stringer. func (r *Rremove) String() string { - return fmt.Sprintf("Rremove{}") + return "Rremove{}" } // Rlerror is an error response. @@ -745,16 +745,16 @@ func (*Rlink) Type() MsgType { } // Decode implements encoder.Decode. -func (*Rlink) Decode(b *buffer) { +func (*Rlink) Decode(*buffer) { } // Encode implements encoder.Encode. -func (*Rlink) Encode(b *buffer) { +func (*Rlink) Encode(*buffer) { } // String implements fmt.Stringer. func (r *Rlink) String() string { - return fmt.Sprintf("Rlink{}") + return "Rlink{}" } // Trenameat is a rename request. @@ -803,11 +803,11 @@ type Rrenameat struct { } // Decode implements encoder.Decode. -func (*Rrenameat) Decode(b *buffer) { +func (*Rrenameat) Decode(*buffer) { } // Encode implements encoder.Encode. -func (*Rrenameat) Encode(b *buffer) { +func (*Rrenameat) Encode(*buffer) { } // Type implements message.Type. @@ -817,7 +817,7 @@ func (*Rrenameat) Type() MsgType { // String implements fmt.Stringer. func (r *Rrenameat) String() string { - return fmt.Sprintf("Rrenameat{}") + return "Rrenameat{}" } // Tunlinkat is an unlink request. @@ -861,11 +861,11 @@ type Runlinkat struct { } // Decode implements encoder.Decode. -func (*Runlinkat) Decode(b *buffer) { +func (*Runlinkat) Decode(*buffer) { } // Encode implements encoder.Encode. -func (*Runlinkat) Encode(b *buffer) { +func (*Runlinkat) Encode(*buffer) { } // Type implements message.Type. @@ -875,7 +875,7 @@ func (*Runlinkat) Type() MsgType { // String implements fmt.Stringer. func (r *Runlinkat) String() string { - return fmt.Sprintf("Runlinkat{}") + return "Runlinkat{}" } // Trename is a rename request. @@ -922,11 +922,11 @@ type Rrename struct { } // Decode implements encoder.Decode. -func (*Rrename) Decode(b *buffer) { +func (*Rrename) Decode(*buffer) { } // Encode implements encoder.Encode. -func (*Rrename) Encode(b *buffer) { +func (*Rrename) Encode(*buffer) { } // Type implements message.Type. @@ -936,7 +936,7 @@ func (*Rrename) Type() MsgType { // String implements fmt.Stringer. func (r *Rrename) String() string { - return fmt.Sprintf("Rrename{}") + return "Rrename{}" } // Treadlink is a readlink request. @@ -1409,11 +1409,11 @@ type Rsetattr struct { } // Decode implements encoder.Decode. -func (*Rsetattr) Decode(b *buffer) { +func (*Rsetattr) Decode(*buffer) { } // Encode implements encoder.Encode. -func (*Rsetattr) Encode(b *buffer) { +func (*Rsetattr) Encode(*buffer) { } // Type implements message.Type. @@ -1423,7 +1423,7 @@ func (*Rsetattr) Type() MsgType { // String implements fmt.Stringer. func (r *Rsetattr) String() string { - return fmt.Sprintf("Rsetattr{}") + return "Rsetattr{}" } // Tallocate is an allocate request. This is an extension to 9P protocol, not @@ -1466,11 +1466,11 @@ type Rallocate struct { } // Decode implements encoder.Decode. -func (*Rallocate) Decode(b *buffer) { +func (*Rallocate) Decode(*buffer) { } // Encode implements encoder.Encode. -func (*Rallocate) Encode(b *buffer) { +func (*Rallocate) Encode(*buffer) { } // Type implements message.Type. @@ -1480,7 +1480,71 @@ func (*Rallocate) Type() MsgType { // String implements fmt.Stringer. func (r *Rallocate) String() string { - return fmt.Sprintf("Rallocate{}") + return "Rallocate{}" +} + +// Tlistxattr is a listxattr request. +type Tlistxattr struct { + // FID refers to the file on which to list xattrs. + FID FID + + // Size is the buffer size for the xattr list. + Size uint64 +} + +// Decode implements encoder.Decode. +func (t *Tlistxattr) Decode(b *buffer) { + t.FID = b.ReadFID() + t.Size = b.Read64() +} + +// Encode implements encoder.Encode. +func (t *Tlistxattr) Encode(b *buffer) { + b.WriteFID(t.FID) + b.Write64(t.Size) +} + +// Type implements message.Type. +func (*Tlistxattr) Type() MsgType { + return MsgTlistxattr +} + +// String implements fmt.Stringer. +func (t *Tlistxattr) String() string { + return fmt.Sprintf("Tlistxattr{FID: %d, Size: %d}", t.FID, t.Size) +} + +// Rlistxattr is a listxattr response. +type Rlistxattr struct { + // Xattrs is a list of extended attribute names. + Xattrs []string +} + +// Decode implements encoder.Decode. +func (r *Rlistxattr) Decode(b *buffer) { + n := b.Read16() + r.Xattrs = r.Xattrs[:0] + for i := 0; i < int(n); i++ { + r.Xattrs = append(r.Xattrs, b.ReadString()) + } +} + +// Encode implements encoder.Encode. +func (r *Rlistxattr) Encode(b *buffer) { + b.Write16(uint16(len(r.Xattrs))) + for _, x := range r.Xattrs { + b.WriteString(x) + } +} + +// Type implements message.Type. +func (*Rlistxattr) Type() MsgType { + return MsgRlistxattr +} + +// String implements fmt.Stringer. +func (r *Rlistxattr) String() string { + return fmt.Sprintf("Rlistxattr{Xattrs: %v}", r.Xattrs) } // Txattrwalk walks extended attributes. @@ -1594,11 +1658,11 @@ type Rxattrcreate struct { } // Decode implements encoder.Decode. -func (r *Rxattrcreate) Decode(b *buffer) { +func (r *Rxattrcreate) Decode(*buffer) { } // Encode implements encoder.Encode. -func (r *Rxattrcreate) Encode(b *buffer) { +func (r *Rxattrcreate) Encode(*buffer) { } // Type implements message.Type. @@ -1608,7 +1672,7 @@ func (*Rxattrcreate) Type() MsgType { // String implements fmt.Stringer. func (r *Rxattrcreate) String() string { - return fmt.Sprintf("Rxattrcreate{}") + return "Rxattrcreate{}" } // Tgetxattr is a getxattr request. @@ -1719,11 +1783,11 @@ type Rsetxattr struct { } // Decode implements encoder.Decode. -func (r *Rsetxattr) Decode(b *buffer) { +func (r *Rsetxattr) Decode(*buffer) { } // Encode implements encoder.Encode. -func (r *Rsetxattr) Encode(b *buffer) { +func (r *Rsetxattr) Encode(*buffer) { } // Type implements message.Type. @@ -1733,7 +1797,60 @@ func (*Rsetxattr) Type() MsgType { // String implements fmt.Stringer. func (r *Rsetxattr) String() string { - return fmt.Sprintf("Rsetxattr{}") + return "Rsetxattr{}" +} + +// Tremovexattr is a removexattr request. +type Tremovexattr struct { + // FID refers to the file on which to set xattrs. + FID FID + + // Name is the attribute name. + Name string +} + +// Decode implements encoder.Decode. +func (t *Tremovexattr) Decode(b *buffer) { + t.FID = b.ReadFID() + t.Name = b.ReadString() +} + +// Encode implements encoder.Encode. +func (t *Tremovexattr) Encode(b *buffer) { + b.WriteFID(t.FID) + b.WriteString(t.Name) +} + +// Type implements message.Type. +func (*Tremovexattr) Type() MsgType { + return MsgTremovexattr +} + +// String implements fmt.Stringer. +func (t *Tremovexattr) String() string { + return fmt.Sprintf("Tremovexattr{FID: %d, Name: %s}", t.FID, t.Name) +} + +// Rremovexattr is a removexattr response. +type Rremovexattr struct { +} + +// Decode implements encoder.Decode. +func (r *Rremovexattr) Decode(*buffer) { +} + +// Encode implements encoder.Encode. +func (r *Rremovexattr) Encode(*buffer) { +} + +// Type implements message.Type. +func (*Rremovexattr) Type() MsgType { + return MsgRremovexattr +} + +// String implements fmt.Stringer. +func (r *Rremovexattr) String() string { + return "Rremovexattr{}" } // Treaddir is a readdir request. @@ -1880,11 +1997,11 @@ type Rfsync struct { } // Decode implements encoder.Decode. -func (*Rfsync) Decode(b *buffer) { +func (*Rfsync) Decode(*buffer) { } // Encode implements encoder.Encode. -func (*Rfsync) Encode(b *buffer) { +func (*Rfsync) Encode(*buffer) { } // Type implements message.Type. @@ -1894,7 +2011,7 @@ func (*Rfsync) Type() MsgType { // String implements fmt.Stringer. func (r *Rfsync) String() string { - return fmt.Sprintf("Rfsync{}") + return "Rfsync{}" } // Tstatfs is a stat request. @@ -1980,11 +2097,11 @@ type Rflushf struct { } // Decode implements encoder.Decode. -func (*Rflushf) Decode(b *buffer) { +func (*Rflushf) Decode(*buffer) { } // Encode implements encoder.Encode. -func (*Rflushf) Encode(b *buffer) { +func (*Rflushf) Encode(*buffer) { } // Type implements message.Type. @@ -1994,7 +2111,7 @@ func (*Rflushf) Type() MsgType { // String implements fmt.Stringer. func (*Rflushf) String() string { - return fmt.Sprintf("Rflushf{}") + return "Rflushf{}" } // Twalkgetattr is a walk request. @@ -2484,6 +2601,8 @@ func init() { msgRegistry.register(MsgRgetattr, func() message { return &Rgetattr{} }) msgRegistry.register(MsgTsetattr, func() message { return &Tsetattr{} }) msgRegistry.register(MsgRsetattr, func() message { return &Rsetattr{} }) + msgRegistry.register(MsgTlistxattr, func() message { return &Tlistxattr{} }) + msgRegistry.register(MsgRlistxattr, func() message { return &Rlistxattr{} }) msgRegistry.register(MsgTxattrwalk, func() message { return &Txattrwalk{} }) msgRegistry.register(MsgRxattrwalk, func() message { return &Rxattrwalk{} }) msgRegistry.register(MsgTxattrcreate, func() message { return &Txattrcreate{} }) @@ -2492,6 +2611,8 @@ func init() { msgRegistry.register(MsgRgetxattr, func() message { return &Rgetxattr{} }) msgRegistry.register(MsgTsetxattr, func() message { return &Tsetxattr{} }) msgRegistry.register(MsgRsetxattr, func() message { return &Rsetxattr{} }) + msgRegistry.register(MsgTremovexattr, func() message { return &Tremovexattr{} }) + msgRegistry.register(MsgRremovexattr, func() message { return &Rremovexattr{} }) msgRegistry.register(MsgTreaddir, func() message { return &Treaddir{} }) msgRegistry.register(MsgRreaddir, func() message { return &Rreaddir{} }) msgRegistry.register(MsgTfsync, func() message { return &Tfsync{} }) diff --git a/pkg/p9/p9.go b/pkg/p9/p9.go index 5ab00d625..20ab31f7a 100644 --- a/pkg/p9/p9.go +++ b/pkg/p9/p9.go @@ -335,6 +335,8 @@ const ( MsgRgetattr = 25 MsgTsetattr = 26 MsgRsetattr = 27 + MsgTlistxattr = 28 + MsgRlistxattr = 29 MsgTxattrwalk = 30 MsgRxattrwalk = 31 MsgTxattrcreate = 32 @@ -343,6 +345,8 @@ const ( MsgRgetxattr = 35 MsgTsetxattr = 36 MsgRsetxattr = 37 + MsgTremovexattr = 38 + MsgRremovexattr = 39 MsgTreaddir = 40 MsgRreaddir = 41 MsgTfsync = 50 diff --git a/pkg/p9/version.go b/pkg/p9/version.go index 34a15eb55..09cde9f5a 100644 --- a/pkg/p9/version.go +++ b/pkg/p9/version.go @@ -26,7 +26,7 @@ const ( // // Clients are expected to start requesting this version number and // to continuously decrement it until a Tversion request succeeds. - highestSupportedVersion uint32 = 10 + highestSupportedVersion uint32 = 11 // lowestSupportedVersion is the lowest supported version X in a // version string of the format 9P2000.L.Google.X. @@ -167,3 +167,9 @@ func VersionSupportsOpenTruncateFlag(v uint32) bool { func versionSupportsGetSetXattr(v uint32) bool { return v >= 10 } + +// versionSupportsListRemoveXattr returns true if version v supports +// the Tlistxattr and Tremovexattr messages. +func versionSupportsListRemoveXattr(v uint32) bool { + return v >= 11 +} diff --git a/pkg/sentry/fs/copy_up.go b/pkg/sentry/fs/copy_up.go index f6c79e51b..b060a12ff 100644 --- a/pkg/sentry/fs/copy_up.go +++ b/pkg/sentry/fs/copy_up.go @@ -401,7 +401,7 @@ func copyAttributesLocked(ctx context.Context, upper *Inode, lower *Inode) error if err != nil { return err } - lowerXattr, err := lower.ListXattr(ctx) + lowerXattr, err := lower.ListXattr(ctx, linux.XATTR_SIZE_MAX) if err != nil && err != syserror.EOPNOTSUPP { return err } diff --git a/pkg/sentry/fs/fsutil/inode.go b/pkg/sentry/fs/fsutil/inode.go index 252830572..daecc4ffe 100644 --- a/pkg/sentry/fs/fsutil/inode.go +++ b/pkg/sentry/fs/fsutil/inode.go @@ -247,7 +247,7 @@ func (i *InodeSimpleExtendedAttributes) SetXattr(_ context.Context, _ *fs.Inode, } // ListXattr implements fs.InodeOperations.ListXattr. -func (i *InodeSimpleExtendedAttributes) ListXattr(context.Context, *fs.Inode) (map[string]struct{}, error) { +func (i *InodeSimpleExtendedAttributes) ListXattr(context.Context, *fs.Inode, uint64) (map[string]struct{}, error) { i.mu.RLock() names := make(map[string]struct{}, len(i.xattrs)) for name := range i.xattrs { @@ -257,6 +257,17 @@ func (i *InodeSimpleExtendedAttributes) ListXattr(context.Context, *fs.Inode) (m return names, nil } +// RemoveXattr implements fs.InodeOperations.RemoveXattr. +func (i *InodeSimpleExtendedAttributes) RemoveXattr(_ context.Context, _ *fs.Inode, name string) error { + i.mu.RLock() + defer i.mu.RUnlock() + if _, ok := i.xattrs[name]; ok { + delete(i.xattrs, name) + return nil + } + return syserror.ENOATTR +} + // staticFile is a file with static contents. It is returned by // InodeStaticFileGetter.GetFile. // @@ -460,10 +471,15 @@ func (InodeNoExtendedAttributes) SetXattr(context.Context, *fs.Inode, string, st } // ListXattr implements fs.InodeOperations.ListXattr. -func (InodeNoExtendedAttributes) ListXattr(context.Context, *fs.Inode) (map[string]struct{}, error) { +func (InodeNoExtendedAttributes) ListXattr(context.Context, *fs.Inode, uint64) (map[string]struct{}, error) { return nil, syserror.EOPNOTSUPP } +// RemoveXattr implements fs.InodeOperations.RemoveXattr. +func (InodeNoExtendedAttributes) RemoveXattr(context.Context, *fs.Inode, string) error { + return syserror.EOPNOTSUPP +} + // InodeNoopRelease implements fs.InodeOperations.Release as a noop. type InodeNoopRelease struct{} diff --git a/pkg/sentry/fs/gofer/context_file.go b/pkg/sentry/fs/gofer/context_file.go index 3da818aed..125907d70 100644 --- a/pkg/sentry/fs/gofer/context_file.go +++ b/pkg/sentry/fs/gofer/context_file.go @@ -73,6 +73,20 @@ func (c *contextFile) setXattr(ctx context.Context, name, value string, flags ui return err } +func (c *contextFile) listXattr(ctx context.Context, size uint64) (map[string]struct{}, error) { + ctx.UninterruptibleSleepStart(false) + xattrs, err := c.file.ListXattr(size) + ctx.UninterruptibleSleepFinish(false) + return xattrs, err +} + +func (c *contextFile) removeXattr(ctx context.Context, name string) error { + ctx.UninterruptibleSleepStart(false) + err := c.file.RemoveXattr(name) + ctx.UninterruptibleSleepFinish(false) + return err +} + func (c *contextFile) allocate(ctx context.Context, mode p9.AllocateMode, offset, length uint64) error { ctx.UninterruptibleSleepStart(false) err := c.file.Allocate(mode, offset, length) diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go index ac28174d2..1c934981b 100644 --- a/pkg/sentry/fs/gofer/inode.go +++ b/pkg/sentry/fs/gofer/inode.go @@ -604,18 +604,23 @@ func (i *inodeOperations) Truncate(ctx context.Context, inode *fs.Inode, length } // GetXattr implements fs.InodeOperations.GetXattr. -func (i *inodeOperations) GetXattr(ctx context.Context, inode *fs.Inode, name string, size uint64) (string, error) { +func (i *inodeOperations) GetXattr(ctx context.Context, _ *fs.Inode, name string, size uint64) (string, error) { return i.fileState.file.getXattr(ctx, name, size) } // SetXattr implements fs.InodeOperations.SetXattr. -func (i *inodeOperations) SetXattr(ctx context.Context, inode *fs.Inode, name string, value string, flags uint32) error { +func (i *inodeOperations) SetXattr(ctx context.Context, _ *fs.Inode, name string, value string, flags uint32) error { return i.fileState.file.setXattr(ctx, name, value, flags) } // ListXattr implements fs.InodeOperations.ListXattr. -func (i *inodeOperations) ListXattr(context.Context, *fs.Inode) (map[string]struct{}, error) { - return nil, syscall.EOPNOTSUPP +func (i *inodeOperations) ListXattr(ctx context.Context, _ *fs.Inode, size uint64) (map[string]struct{}, error) { + return i.fileState.file.listXattr(ctx, size) +} + +// RemoveXattr implements fs.InodeOperations.RemoveXattr. +func (i *inodeOperations) RemoveXattr(ctx context.Context, _ *fs.Inode, name string) error { + return i.fileState.file.removeXattr(ctx, name) } // Allocate implements fs.InodeOperations.Allocate. diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index b66c091ab..55fb71c16 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -278,11 +278,19 @@ func (i *Inode) SetXattr(ctx context.Context, d *Dirent, name, value string, fla } // ListXattr calls i.InodeOperations.ListXattr with i as the Inode. -func (i *Inode) ListXattr(ctx context.Context) (map[string]struct{}, error) { +func (i *Inode) ListXattr(ctx context.Context, size uint64) (map[string]struct{}, error) { if i.overlay != nil { - return overlayListXattr(ctx, i.overlay) + return overlayListXattr(ctx, i.overlay, size) } - return i.InodeOperations.ListXattr(ctx, i) + return i.InodeOperations.ListXattr(ctx, i, size) +} + +// RemoveXattr calls i.InodeOperations.RemoveXattr with i as the Inode. +func (i *Inode) RemoveXattr(ctx context.Context, d *Dirent, name string) error { + if i.overlay != nil { + return overlayRemoveXattr(ctx, i.overlay, d, name) + } + return i.InodeOperations.RemoveXattr(ctx, i, name) } // CheckPermission will check if the caller may access this file in the diff --git a/pkg/sentry/fs/inode_operations.go b/pkg/sentry/fs/inode_operations.go index 70f2eae96..2bbfb72ef 100644 --- a/pkg/sentry/fs/inode_operations.go +++ b/pkg/sentry/fs/inode_operations.go @@ -190,7 +190,18 @@ type InodeOperations interface { // ListXattr returns the set of all extended attributes names that // have values. Inodes that do not support extended attributes return // EOPNOTSUPP. - ListXattr(ctx context.Context, inode *Inode) (map[string]struct{}, error) + // + // If this is called through the listxattr(2) syscall, size indicates the + // size of the buffer that the application has allocated to hold the + // attribute list. If the list would be larger than size, implementations may + // return ERANGE to indicate that the buffer is too small, but they are also + // free to ignore the hint entirely. All size checking is done independently + // at the syscall layer. + ListXattr(ctx context.Context, inode *Inode, size uint64) (map[string]struct{}, error) + + // RemoveXattr removes an extended attribute specified by name. Inodes that + // do not support extended attributes return EOPNOTSUPP. + RemoveXattr(ctx context.Context, inode *Inode, name string) error // Check determines whether an Inode can be accessed with the // requested permission mask using the context (which gives access diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go index 4729b4aac..5ada33a32 100644 --- a/pkg/sentry/fs/inode_overlay.go +++ b/pkg/sentry/fs/inode_overlay.go @@ -564,15 +564,15 @@ func overlaySetxattr(ctx context.Context, o *overlayEntry, d *Dirent, name, valu return o.upper.SetXattr(ctx, d, name, value, flags) } -func overlayListXattr(ctx context.Context, o *overlayEntry) (map[string]struct{}, error) { +func overlayListXattr(ctx context.Context, o *overlayEntry, size uint64) (map[string]struct{}, error) { o.copyMu.RLock() defer o.copyMu.RUnlock() var names map[string]struct{} var err error if o.upper != nil { - names, err = o.upper.ListXattr(ctx) + names, err = o.upper.ListXattr(ctx, size) } else { - names, err = o.lower.ListXattr(ctx) + names, err = o.lower.ListXattr(ctx, size) } for name := range names { // Same as overlayGetXattr, we shouldn't forward along @@ -584,6 +584,18 @@ func overlayListXattr(ctx context.Context, o *overlayEntry) (map[string]struct{} return names, err } +func overlayRemoveXattr(ctx context.Context, o *overlayEntry, d *Dirent, name string) error { + // Don't allow changes to overlay xattrs through a removexattr syscall. + if strings.HasPrefix(XattrOverlayPrefix, name) { + return syserror.EPERM + } + + if err := copyUp(ctx, d); err != nil { + return err + } + return o.upper.RemoveXattr(ctx, d, name) +} + func overlayCheck(ctx context.Context, o *overlayEntry, p PermMask) error { o.copyMu.RLock() // Hot path. Avoid defers. diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go index c00cef0a5..3c2b583ae 100644 --- a/pkg/sentry/fs/tmpfs/tmpfs.go +++ b/pkg/sentry/fs/tmpfs/tmpfs.go @@ -159,8 +159,13 @@ func (d *Dir) SetXattr(ctx context.Context, i *fs.Inode, name, value string, fla } // ListXattr implements fs.InodeOperations.ListXattr. -func (d *Dir) ListXattr(ctx context.Context, i *fs.Inode) (map[string]struct{}, error) { - return d.ramfsDir.ListXattr(ctx, i) +func (d *Dir) ListXattr(ctx context.Context, i *fs.Inode, size uint64) (map[string]struct{}, error) { + return d.ramfsDir.ListXattr(ctx, i, size) +} + +// RemoveXattr implements fs.InodeOperations.RemoveXattr. +func (d *Dir) RemoveXattr(ctx context.Context, i *fs.Inode, name string) error { + return d.ramfsDir.RemoveXattr(ctx, i, name) } // Lookup implements fs.InodeOperations.Lookup. diff --git a/pkg/sentry/syscalls/linux/linux64_amd64.go b/pkg/sentry/syscalls/linux/linux64_amd64.go index 588f8b087..79066ad2a 100644 --- a/pkg/sentry/syscalls/linux/linux64_amd64.go +++ b/pkg/sentry/syscalls/linux/linux64_amd64.go @@ -228,21 +228,18 @@ var AMD64 = &kernel.SyscallTable{ 185: syscalls.Error("security", syserror.ENOSYS, "Not implemented in Linux.", nil), 186: syscalls.Supported("gettid", Gettid), 187: syscalls.Supported("readahead", Readahead), - // TODO(b/148303075): Enable set/getxattr (in their various - // forms) once we also have list and removexattr. The JVM - // assumes that if get/set exist, then list and remove do too. - 188: syscalls.ErrorWithEvent("setxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 189: syscalls.ErrorWithEvent("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 190: syscalls.ErrorWithEvent("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 191: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 192: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 193: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 194: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 195: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 196: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 197: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 198: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 199: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 188: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil), + 189: syscalls.PartiallySupported("lsetxattr", LSetXattr, "Only supported for tmpfs.", nil), + 190: syscalls.PartiallySupported("fsetxattr", FSetXattr, "Only supported for tmpfs.", nil), + 191: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil), + 192: syscalls.PartiallySupported("lgetxattr", LGetXattr, "Only supported for tmpfs.", nil), + 193: syscalls.PartiallySupported("fgetxattr", FGetXattr, "Only supported for tmpfs.", nil), + 194: syscalls.PartiallySupported("listxattr", ListXattr, "Only supported for tmpfs", nil), + 195: syscalls.PartiallySupported("llistxattr", LListXattr, "Only supported for tmpfs", nil), + 196: syscalls.PartiallySupported("flistxattr", FListXattr, "Only supported for tmpfs", nil), + 197: syscalls.PartiallySupported("removexattr", RemoveXattr, "Only supported for tmpfs", nil), + 198: syscalls.PartiallySupported("lremovexattr", LRemoveXattr, "Only supported for tmpfs", nil), + 199: syscalls.PartiallySupported("fremovexattr", FRemoveXattr, "Only supported for tmpfs", nil), 200: syscalls.Supported("tkill", Tkill), 201: syscalls.Supported("time", Time), 202: syscalls.PartiallySupported("futex", Futex, "Robust futexes not supported.", nil), diff --git a/pkg/sentry/syscalls/linux/linux64_arm64.go b/pkg/sentry/syscalls/linux/linux64_arm64.go index 06e5ee401..7421619de 100644 --- a/pkg/sentry/syscalls/linux/linux64_arm64.go +++ b/pkg/sentry/syscalls/linux/linux64_arm64.go @@ -36,26 +36,23 @@ var ARM64 = &kernel.SyscallTable{ }, AuditNumber: linux.AUDIT_ARCH_AARCH64, Table: map[uintptr]kernel.Syscall{ - 0: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 1: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 2: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - // TODO(b/148303075): Enable set/getxattr (in their various - // forms) once we also have list and removexattr. The JVM - // assumes that if get/set exist, then list and remove do too. - 5: syscalls.ErrorWithEvent("setxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 6: syscalls.ErrorWithEvent("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 7: syscalls.ErrorWithEvent("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 8: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 9: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 10: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 11: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 13: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 13: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 14: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 15: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 16: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 0: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 1: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 2: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 5: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil), + 6: syscalls.PartiallySupported("lsetxattr", LSetXattr, "Only supported for tmpfs.", nil), + 7: syscalls.PartiallySupported("fsetxattr", FSetXattr, "Only supported for tmpfs.", nil), + 8: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil), + 9: syscalls.PartiallySupported("lgetxattr", LGetXattr, "Only supported for tmpfs.", nil), + 10: syscalls.PartiallySupported("fgetxattr", FGetXattr, "Only supported for tmpfs.", nil), + 11: syscalls.PartiallySupported("listxattr", ListXattr, "Only supported for tmpfs", nil), + 12: syscalls.PartiallySupported("llistxattr", LListXattr, "Only supported for tmpfs", nil), + 13: syscalls.PartiallySupported("flistxattr", FListXattr, "Only supported for tmpfs", nil), + 14: syscalls.PartiallySupported("removexattr", RemoveXattr, "Only supported for tmpfs", nil), + 15: syscalls.PartiallySupported("lremovexattr", LRemoveXattr, "Only supported for tmpfs", nil), + 16: syscalls.PartiallySupported("fremovexattr", FRemoveXattr, "Only supported for tmpfs", nil), 17: syscalls.Supported("getcwd", Getcwd), 18: syscalls.CapError("lookup_dcookie", linux.CAP_SYS_ADMIN, "", nil), 19: syscalls.Supported("eventfd2", Eventfd2), diff --git a/pkg/sentry/syscalls/linux/sys_xattr.go b/pkg/sentry/syscalls/linux/sys_xattr.go index efb95555c..342337726 100644 --- a/pkg/sentry/syscalls/linux/sys_xattr.go +++ b/pkg/sentry/syscalls/linux/sys_xattr.go @@ -72,7 +72,7 @@ func getXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink } valueLen := 0 - err = fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { + err = fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error { if dirPath && !fs.IsDir(d.Inode.StableAttr) { return syserror.ENOTDIR } @@ -172,7 +172,7 @@ func setXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink return 0, nil, err } - return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error { if dirPath && !fs.IsDir(d.Inode.StableAttr) { return syserror.ENOTDIR } @@ -187,12 +187,12 @@ func setXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr usermem.Addr, si return syserror.EINVAL } - if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Write: true}); err != nil { + name, err := copyInXattrName(t, nameAddr) + if err != nil { return err } - name, err := copyInXattrName(t, nameAddr) - if err != nil { + if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Write: true}); err != nil { return err } @@ -226,12 +226,18 @@ func copyInXattrName(t *kernel.Task, nameAddr usermem.Addr) (string, error) { return name, nil } +// Restrict xattrs to regular files and directories. +// +// TODO(b/148380782): In Linux, this restriction technically only applies to +// xattrs in the "user.*" namespace. Make file type checks specific to the +// namespace once we allow other xattr prefixes. +func xattrFileTypeOk(i *fs.Inode) bool { + return fs.IsRegular(i.StableAttr) || fs.IsDir(i.StableAttr) +} + func checkXattrPermissions(t *kernel.Task, i *fs.Inode, perms fs.PermMask) error { // Restrict xattrs to regular files and directories. - // - // In Linux, this restriction technically only applies to xattrs in the - // "user.*" namespace, but we don't allow any other xattr prefixes anyway. - if !fs.IsRegular(i.StableAttr) && !fs.IsDir(i.StableAttr) { + if !xattrFileTypeOk(i) { if perms.Write { return syserror.EPERM } @@ -240,3 +246,179 @@ func checkXattrPermissions(t *kernel.Task, i *fs.Inode, perms fs.PermMask) error return i.CheckPermission(t, perms) } + +// ListXattr implements linux syscall listxattr(2). +func ListXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + return listXattrFromPath(t, args, true) +} + +// LListXattr implements linux syscall llistxattr(2). +func LListXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + return listXattrFromPath(t, args, false) +} + +// FListXattr implements linux syscall flistxattr(2). +func FListXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + listAddr := args[1].Pointer() + size := uint64(args[2].SizeT()) + + // TODO(b/113957122): Return EBADF if the fd was opened with O_PATH. + f := t.GetFile(fd) + if f == nil { + return 0, nil, syserror.EBADF + } + defer f.DecRef() + + n, err := listXattr(t, f.Dirent, listAddr, size) + if err != nil { + return 0, nil, err + } + + return uintptr(n), nil, nil +} + +func listXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink bool) (uintptr, *kernel.SyscallControl, error) { + pathAddr := args[0].Pointer() + listAddr := args[1].Pointer() + size := uint64(args[2].SizeT()) + + path, dirPath, err := copyInPath(t, pathAddr, false /* allowEmpty */) + if err != nil { + return 0, nil, err + } + + n := 0 + err = fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error { + if dirPath && !fs.IsDir(d.Inode.StableAttr) { + return syserror.ENOTDIR + } + + n, err = listXattr(t, d, listAddr, size) + return err + }) + if err != nil { + return 0, nil, err + } + + return uintptr(n), nil, nil +} + +func listXattr(t *kernel.Task, d *fs.Dirent, addr usermem.Addr, size uint64) (int, error) { + if !xattrFileTypeOk(d.Inode) { + return 0, nil + } + + // If listxattr(2) is called with size 0, the buffer size needed to contain + // the xattr list will be returned successfully even if it is nonzero. In + // that case, we need to retrieve the entire list so we can compute and + // return the correct size. + requestedSize := size + if size == 0 || size > linux.XATTR_SIZE_MAX { + requestedSize = linux.XATTR_SIZE_MAX + } + xattrs, err := d.Inode.ListXattr(t, requestedSize) + if err != nil { + return 0, err + } + + // TODO(b/148380782): support namespaces other than "user". + for x := range xattrs { + if !strings.HasPrefix(x, linux.XATTR_USER_PREFIX) { + delete(xattrs, x) + } + } + + listSize := xattrListSize(xattrs) + if listSize > linux.XATTR_SIZE_MAX { + return 0, syserror.E2BIG + } + if uint64(listSize) > requestedSize { + return 0, syserror.ERANGE + } + + // Don't copy out the attributes if size is 0. + if size == 0 { + return listSize, nil + } + + buf := make([]byte, 0, listSize) + for x := range xattrs { + buf = append(buf, []byte(x)...) + buf = append(buf, 0) + } + if _, err := t.CopyOutBytes(addr, buf); err != nil { + return 0, err + } + + return len(buf), nil +} + +func xattrListSize(xattrs map[string]struct{}) int { + size := 0 + for x := range xattrs { + size += len(x) + 1 + } + return size +} + +// RemoveXattr implements linux syscall removexattr(2). +func RemoveXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + return removeXattrFromPath(t, args, true) +} + +// LRemoveXattr implements linux syscall lremovexattr(2). +func LRemoveXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + return removeXattrFromPath(t, args, false) +} + +// FRemoveXattr implements linux syscall fremovexattr(2). +func FRemoveXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + nameAddr := args[1].Pointer() + + // TODO(b/113957122): Return EBADF if the fd was opened with O_PATH. + f := t.GetFile(fd) + if f == nil { + return 0, nil, syserror.EBADF + } + defer f.DecRef() + + return 0, nil, removeXattr(t, f.Dirent, nameAddr) +} + +func removeXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink bool) (uintptr, *kernel.SyscallControl, error) { + pathAddr := args[0].Pointer() + nameAddr := args[1].Pointer() + + path, dirPath, err := copyInPath(t, pathAddr, false /* allowEmpty */) + if err != nil { + return 0, nil, err + } + + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error { + if dirPath && !fs.IsDir(d.Inode.StableAttr) { + return syserror.ENOTDIR + } + + return removeXattr(t, d, nameAddr) + }) +} + +// removeXattr implements removexattr(2) from the given *fs.Dirent. +func removeXattr(t *kernel.Task, d *fs.Dirent, nameAddr usermem.Addr) error { + name, err := copyInXattrName(t, nameAddr) + if err != nil { + return err + } + + if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Write: true}); err != nil { + return err + } + + if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) { + return syserror.EOPNOTSUPP + } + + return d.Inode.RemoveXattr(t, d, name) +} diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index 4d84ad999..cadd83273 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -768,12 +768,22 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error { } // TODO(b/127675828): support getxattr. -func (l *localFile) GetXattr(name string, size uint64) (string, error) { +func (*localFile) GetXattr(string, uint64) (string, error) { return "", syscall.EOPNOTSUPP } // TODO(b/127675828): support setxattr. -func (l *localFile) SetXattr(name, value string, flags uint32) error { +func (*localFile) SetXattr(string, string, uint32) error { + return syscall.EOPNOTSUPP +} + +// TODO(b/148303075): support listxattr. +func (*localFile) ListXattr(uint64) (map[string]struct{}, error) { + return nil, syscall.EOPNOTSUPP +} + +// TODO(b/148303075): support removexattr. +func (*localFile) RemoveXattr(string) error { return syscall.EOPNOTSUPP } @@ -790,7 +800,7 @@ func (l *localFile) Allocate(mode p9.AllocateMode, offset, length uint64) error } // Rename implements p9.File; this should never be called. -func (l *localFile) Rename(p9.File, string) error { +func (*localFile) Rename(p9.File, string) error { panic("rename called directly") } diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 12d389c3e..ca1af209a 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -3782,6 +3782,7 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", gtest, "//test/util:posix_error", diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc index 85eb31847..8b00ef44c 100644 --- a/test/syscalls/linux/xattr.cc +++ b/test/syscalls/linux/xattr.cc @@ -24,6 +24,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/container/flat_hash_set.h" #include "test/syscalls/linux/file_base.h" #include "test/util/capability_util.h" #include "test/util/file_descriptor.h" @@ -38,36 +39,36 @@ namespace { class XattrTest : public FileTest {}; -TEST_F(XattrTest, XattrNullName) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); +TEST_F(XattrTest, XattrNonexistentFile) { + const char* path = "/does/not/exist"; + EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, /*flags=*/0), + SyscallFailsWithErrno(ENOENT)); + EXPECT_THAT(getxattr(path, nullptr, nullptr, 0), + SyscallFailsWithErrno(ENOENT)); + EXPECT_THAT(listxattr(path, nullptr, 0), SyscallFailsWithErrno(ENOENT)); + EXPECT_THAT(removexattr(path, nullptr), SyscallFailsWithErrno(ENOENT)); +} +TEST_F(XattrTest, XattrNullName) { const char* path = test_file_name_.c_str(); EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, /*flags=*/0), SyscallFailsWithErrno(EFAULT)); EXPECT_THAT(getxattr(path, nullptr, nullptr, 0), SyscallFailsWithErrno(EFAULT)); + EXPECT_THAT(removexattr(path, nullptr), SyscallFailsWithErrno(EFAULT)); } TEST_F(XattrTest, XattrEmptyName) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); EXPECT_THAT(setxattr(path, "", nullptr, 0, /*flags=*/0), SyscallFailsWithErrno(ERANGE)); EXPECT_THAT(getxattr(path, "", nullptr, 0), SyscallFailsWithErrno(ERANGE)); + EXPECT_THAT(removexattr(path, ""), SyscallFailsWithErrno(ERANGE)); } TEST_F(XattrTest, XattrLargeName) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); std::string name = "user."; name += std::string(XATTR_NAME_MAX - name.length(), 'a'); @@ -86,28 +87,23 @@ TEST_F(XattrTest, XattrLargeName) { SyscallFailsWithErrno(ERANGE)); EXPECT_THAT(getxattr(path, name.c_str(), nullptr, 0), SyscallFailsWithErrno(ERANGE)); + EXPECT_THAT(removexattr(path, name.c_str()), SyscallFailsWithErrno(ERANGE)); } TEST_F(XattrTest, XattrInvalidPrefix) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); std::string name(XATTR_NAME_MAX, 'a'); EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), SyscallFailsWithErrno(EOPNOTSUPP)); EXPECT_THAT(getxattr(path, name.c_str(), nullptr, 0), SyscallFailsWithErrno(EOPNOTSUPP)); + EXPECT_THAT(removexattr(path, name.c_str()), + SyscallFailsWithErrno(EOPNOTSUPP)); } // Do not allow save/restore cycles after making the test file read-only, as // the restore will fail to open it with r/w permissions. TEST_F(XattrTest, XattrReadOnly_NoRandomSave) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - // Drop capabilities that allow us to override file and directory permissions. ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); @@ -124,19 +120,21 @@ TEST_F(XattrTest, XattrReadOnly_NoRandomSave) { EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallFailsWithErrno(EACCES)); + EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(EACCES)); char buf = '-'; EXPECT_THAT(getxattr(path, name, &buf, size), SyscallSucceedsWithValue(size)); EXPECT_EQ(buf, val); + + char list[sizeof(name)]; + EXPECT_THAT(listxattr(path, list, sizeof(list)), + SyscallSucceedsWithValue(sizeof(name))); + EXPECT_STREQ(list, name); } // Do not allow save/restore cycles after making the test file write-only, as // the restore will fail to open it with r/w permissions. TEST_F(XattrTest, XattrWriteOnly_NoRandomSave) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - // Drop capabilities that allow us to override file and directory permissions. ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); @@ -152,6 +150,14 @@ TEST_F(XattrTest, XattrWriteOnly_NoRandomSave) { EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(EACCES)); + + // listxattr will succeed even without read permissions. + char list[sizeof(name)]; + EXPECT_THAT(listxattr(path, list, sizeof(list)), + SyscallSucceedsWithValue(sizeof(name))); + EXPECT_STREQ(list, name); + + EXPECT_THAT(removexattr(path, name), SyscallSucceeds()); } TEST_F(XattrTest, XattrTrustedWithNonadmin) { @@ -163,64 +169,66 @@ TEST_F(XattrTest, XattrTrustedWithNonadmin) { const char name[] = "trusted.abc"; EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(EPERM)); EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); } TEST_F(XattrTest, XattrOnDirectory) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); const char name[] = "user.test"; - EXPECT_THAT(setxattr(dir.path().c_str(), name, NULL, 0, /*flags=*/0), + EXPECT_THAT(setxattr(dir.path().c_str(), name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); - EXPECT_THAT(getxattr(dir.path().c_str(), name, NULL, 0), + EXPECT_THAT(getxattr(dir.path().c_str(), name, nullptr, 0), SyscallSucceedsWithValue(0)); + + char list[sizeof(name)]; + EXPECT_THAT(listxattr(dir.path().c_str(), list, sizeof(list)), + SyscallSucceedsWithValue(sizeof(name))); + EXPECT_STREQ(list, name); + + EXPECT_THAT(removexattr(dir.path().c_str(), name), SyscallSucceeds()); } TEST_F(XattrTest, XattrOnSymlink) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); TempPath link = ASSERT_NO_ERRNO_AND_VALUE( TempPath::CreateSymlinkTo(dir.path(), test_file_name_)); const char name[] = "user.test"; - EXPECT_THAT(setxattr(link.path().c_str(), name, NULL, 0, /*flags=*/0), + EXPECT_THAT(setxattr(link.path().c_str(), name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); - EXPECT_THAT(getxattr(link.path().c_str(), name, NULL, 0), + EXPECT_THAT(getxattr(link.path().c_str(), name, nullptr, 0), SyscallSucceedsWithValue(0)); + + char list[sizeof(name)]; + EXPECT_THAT(listxattr(link.path().c_str(), list, sizeof(list)), + SyscallSucceedsWithValue(sizeof(name))); + EXPECT_STREQ(list, name); + + EXPECT_THAT(removexattr(link.path().c_str(), name), SyscallSucceeds()); } TEST_F(XattrTest, XattrOnInvalidFileTypes) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char name[] = "user.test"; char char_device[] = "/dev/zero"; - EXPECT_THAT(setxattr(char_device, name, NULL, 0, /*flags=*/0), + EXPECT_THAT(setxattr(char_device, name, nullptr, 0, /*flags=*/0), SyscallFailsWithErrno(EPERM)); - EXPECT_THAT(getxattr(char_device, name, NULL, 0), + EXPECT_THAT(getxattr(char_device, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); + EXPECT_THAT(listxattr(char_device, nullptr, 0), SyscallSucceedsWithValue(0)); // Use tmpfs, where creation of named pipes is supported. const std::string fifo = NewTempAbsPathInDir("/dev/shm"); const char* path = fifo.c_str(); EXPECT_THAT(mknod(path, S_IFIFO | S_IRUSR | S_IWUSR, 0), SyscallSucceeds()); - EXPECT_THAT(setxattr(path, name, NULL, 0, /*flags=*/0), + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallFailsWithErrno(EPERM)); - EXPECT_THAT(getxattr(path, name, NULL, 0), SyscallFailsWithErrno(ENODATA)); + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); + EXPECT_THAT(listxattr(path, nullptr, 0), SyscallSucceedsWithValue(0)); + EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(EPERM)); } TEST_F(XattrTest, SetxattrSizeSmallerThanValue) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; std::vector val = {'a', 'a'}; @@ -236,10 +244,6 @@ TEST_F(XattrTest, SetxattrSizeSmallerThanValue) { } TEST_F(XattrTest, SetxattrZeroSize) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -252,10 +256,6 @@ TEST_F(XattrTest, SetxattrZeroSize) { } TEST_F(XattrTest, SetxattrSizeTooLarge) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; @@ -271,10 +271,6 @@ TEST_F(XattrTest, SetxattrSizeTooLarge) { } TEST_F(XattrTest, SetxattrNullValueAndNonzeroSize) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; EXPECT_THAT(setxattr(path, name, nullptr, 1, /*flags=*/0), @@ -284,10 +280,6 @@ TEST_F(XattrTest, SetxattrNullValueAndNonzeroSize) { } TEST_F(XattrTest, SetxattrNullValueAndZeroSize) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); @@ -296,10 +288,6 @@ TEST_F(XattrTest, SetxattrNullValueAndZeroSize) { } TEST_F(XattrTest, SetxattrValueTooLargeButOKSize) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; std::vector val(XATTR_SIZE_MAX + 1); @@ -316,10 +304,6 @@ TEST_F(XattrTest, SetxattrValueTooLargeButOKSize) { } TEST_F(XattrTest, SetxattrReplaceWithSmaller) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; std::vector val = {'a', 'a'}; @@ -335,10 +319,6 @@ TEST_F(XattrTest, SetxattrReplaceWithSmaller) { } TEST_F(XattrTest, SetxattrReplaceWithLarger) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; std::vector val = {'a', 'a'}; @@ -353,10 +333,6 @@ TEST_F(XattrTest, SetxattrReplaceWithLarger) { } TEST_F(XattrTest, SetxattrCreateFlag) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_CREATE), @@ -368,10 +344,6 @@ TEST_F(XattrTest, SetxattrCreateFlag) { } TEST_F(XattrTest, SetxattrReplaceFlag) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_REPLACE), @@ -384,10 +356,6 @@ TEST_F(XattrTest, SetxattrReplaceFlag) { } TEST_F(XattrTest, SetxattrInvalidFlags) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); int invalid_flags = 0xff; EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, invalid_flags), @@ -395,10 +363,6 @@ TEST_F(XattrTest, SetxattrInvalidFlags) { } TEST_F(XattrTest, Getxattr) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; int val = 1234; @@ -411,10 +375,6 @@ TEST_F(XattrTest, Getxattr) { } TEST_F(XattrTest, GetxattrSizeSmallerThanValue) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; std::vector val = {'a', 'a'}; @@ -427,10 +387,6 @@ TEST_F(XattrTest, GetxattrSizeSmallerThanValue) { } TEST_F(XattrTest, GetxattrSizeLargerThanValue) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -446,10 +402,6 @@ TEST_F(XattrTest, GetxattrSizeLargerThanValue) { } TEST_F(XattrTest, GetxattrZeroSize) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -463,10 +415,6 @@ TEST_F(XattrTest, GetxattrZeroSize) { } TEST_F(XattrTest, GetxattrSizeTooLarge) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -483,10 +431,6 @@ TEST_F(XattrTest, GetxattrSizeTooLarge) { } TEST_F(XattrTest, GetxattrNullValue) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -498,10 +442,6 @@ TEST_F(XattrTest, GetxattrNullValue) { } TEST_F(XattrTest, GetxattrNullValueAndZeroSize) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -518,35 +458,109 @@ TEST_F(XattrTest, GetxattrNullValueAndZeroSize) { } TEST_F(XattrTest, GetxattrNonexistentName) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); +} + +TEST_F(XattrTest, Listxattr) { + const char* path = test_file_name_.c_str(); + const std::string name = "user.test"; + const std::string name2 = "user.test2"; + const std::string name3 = "user.test3"; + EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), + SyscallSucceeds()); + EXPECT_THAT(setxattr(path, name2.c_str(), nullptr, 0, /*flags=*/0), + SyscallSucceeds()); + EXPECT_THAT(setxattr(path, name3.c_str(), nullptr, 0, /*flags=*/0), + SyscallSucceeds()); + std::vector list(name.size() + 1 + name2.size() + 1 + name3.size() + 1); + char* buf = list.data(); + EXPECT_THAT(listxattr(path, buf, XATTR_SIZE_MAX), + SyscallSucceedsWithValue(list.size())); + + absl::flat_hash_set got = {}; + for (char* p = buf; p < buf + list.size(); p += strlen(p) + 1) { + got.insert(std::string{p}); + } + + absl::flat_hash_set expected = {name, name2, name3}; + EXPECT_EQ(got, expected); +} + +TEST_F(XattrTest, ListxattrNoXattrs) { + const char* path = test_file_name_.c_str(); + + std::vector list, expected; + EXPECT_THAT(listxattr(path, list.data(), sizeof(list)), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(list, expected); + + // Listxattr should succeed if there are no attributes, even if the buffer + // passed in is a nullptr. + EXPECT_THAT(listxattr(path, nullptr, sizeof(list)), + SyscallSucceedsWithValue(0)); +} + +TEST_F(XattrTest, ListxattrNullBuffer) { const char* path = test_file_name_.c_str(); const char name[] = "user.test"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); + + EXPECT_THAT(listxattr(path, nullptr, sizeof(name)), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(XattrTest, ListxattrSizeTooSmall) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); + + char list[sizeof(name) - 1]; + EXPECT_THAT(listxattr(path, list, sizeof(list)), + SyscallFailsWithErrno(ERANGE)); +} + +TEST_F(XattrTest, ListxattrZeroSize) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); + EXPECT_THAT(listxattr(path, nullptr, 0), + SyscallSucceedsWithValue(sizeof(name))); +} + +TEST_F(XattrTest, RemoveXattr) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); + EXPECT_THAT(removexattr(path, name), SyscallSucceeds()); EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); } -TEST_F(XattrTest, LGetSetxattrOnSymlink) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); +TEST_F(XattrTest, RemoveXattrNonexistentName) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(ENODATA)); +} +TEST_F(XattrTest, LXattrOnSymlink) { + const char name[] = "user.test"; TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); TempPath link = ASSERT_NO_ERRNO_AND_VALUE( TempPath::CreateSymlinkTo(dir.path(), test_file_name_)); - EXPECT_THAT(lsetxattr(link.path().c_str(), nullptr, nullptr, 0, 0), + EXPECT_THAT(lsetxattr(link.path().c_str(), name, nullptr, 0, 0), SyscallFailsWithErrno(EPERM)); - EXPECT_THAT(lgetxattr(link.path().c_str(), nullptr, nullptr, 0), + EXPECT_THAT(lgetxattr(link.path().c_str(), name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); + EXPECT_THAT(llistxattr(link.path().c_str(), nullptr, 0), + SyscallSucceedsWithValue(0)); + EXPECT_THAT(lremovexattr(link.path().c_str(), name), + SyscallFailsWithErrno(EPERM)); } -TEST_F(XattrTest, LGetSetxattrOnNonsymlink) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - +TEST_F(XattrTest, LXattrOnNonsymlink) { const char* path = test_file_name_.c_str(); const char name[] = "user.test"; int val = 1234; @@ -558,13 +572,16 @@ TEST_F(XattrTest, LGetSetxattrOnNonsymlink) { EXPECT_THAT(lgetxattr(path, name, &buf, size), SyscallSucceedsWithValue(size)); EXPECT_EQ(buf, val); -} -TEST_F(XattrTest, FGetSetxattr) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); + char list[sizeof(name)]; + EXPECT_THAT(llistxattr(path, list, sizeof(list)), + SyscallSucceedsWithValue(sizeof(name))); + EXPECT_STREQ(list, name); + EXPECT_THAT(lremovexattr(path, name), SyscallSucceeds()); +} + +TEST_F(XattrTest, XattrWithFD) { const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_.c_str(), 0)); const char name[] = "user.test"; @@ -577,6 +594,13 @@ TEST_F(XattrTest, FGetSetxattr) { EXPECT_THAT(fgetxattr(fd.get(), name, &buf, size), SyscallSucceedsWithValue(size)); EXPECT_EQ(buf, val); + + char list[sizeof(name)]; + EXPECT_THAT(flistxattr(fd.get(), list, sizeof(list)), + SyscallSucceedsWithValue(sizeof(name))); + EXPECT_STREQ(list, name); + + EXPECT_THAT(fremovexattr(fd.get(), name), SyscallSucceeds()); } } // namespace -- cgit v1.2.3 From 75412ed9f5b6b327dec05ffff99d7fe6198d25a8 Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Mon, 10 Feb 2020 10:28:56 -0800 Subject: Internal change. PiperOrigin-RevId: 294250370 --- test/syscalls/linux/inotify.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc index fdef646eb..0e13ad190 100644 --- a/test/syscalls/linux/inotify.cc +++ b/test/syscalls/linux/inotify.cc @@ -1055,9 +1055,9 @@ TEST(Inotify, ChmodGeneratesAttribEvent_NoRandomSave) { const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); - const FileDescriptor root_fd = + FileDescriptor root_fd = ASSERT_NO_ERRNO_AND_VALUE(Open(root.path(), O_RDONLY)); - const FileDescriptor file1_fd = + FileDescriptor file1_fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR)); FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); @@ -1091,6 +1091,11 @@ TEST(Inotify, ChmodGeneratesAttribEvent_NoRandomSave) { ASSERT_THAT(fchmodat(root_fd.get(), file1_basename.c_str(), S_IWGRP, 0), SyscallSucceeds()); verify_chmod_events(); + + // Make sure the chmod'ed file descriptors are destroyed before DisableSave + // is destructed. + root_fd.reset(); + file1_fd.reset(); } TEST(Inotify, TruncateGeneratesModifyEvent) { -- cgit v1.2.3 From 31f2182cd3fc2a6fdb1aecf1c56f1302f16f6453 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Mon, 10 Feb 2020 11:08:24 -0800 Subject: iptables: add instructions for runsc building. The readme didn't mention that users need to: - `bazel build` when working on iptables tests - enable raw sockets in /etc/docker/daemon.json. PiperOrigin-RevId: 294260169 --- test/iptables/README.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/test/iptables/README.md b/test/iptables/README.md index c2b934e1f..cc8a2fcac 100644 --- a/test/iptables/README.md +++ b/test/iptables/README.md @@ -2,6 +2,9 @@ iptables tests are run via `scripts/iptables_test.sh`. +iptables requires raw socket support, so you must add the `--net-raw=true` flag +to `/etc/docker/daemon.json` in order to use it. + ## Test Structure Each test implements `TestCase`, providing (1) a function to run inside the @@ -25,7 +28,14 @@ Your test is now runnable with bazel! ## Run individual tests -Build the testing Docker container: +Build and install `runsc`. Re-run this when you modify gVisor: + +```bash +$ bazel build //runsc && sudo cp bazel-bin/runsc/linux_amd64_pure_stripped/runsc $(which runsc) +``` + +Build the testing Docker container. Re-run this when you modify the test code in +this directory: ```bash $ bazel run //test/iptables/runner:runner-image -- --norun -- cgit v1.2.3 From 0efa8168c7c04ec0a4bd62e2d2eb8718b5d72ea7 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Mon, 10 Feb 2020 11:28:57 -0800 Subject: Update visibility. PiperOrigin-RevId: 294265019 --- pkg/seccomp/BUILD | 2 +- test/root/testdata/BUILD | 2 +- tools/build/BUILD | 2 +- tools/checkunsafe/BUILD | 2 +- tools/go_generics/BUILD | 2 +- tools/go_generics/go_merge/BUILD | 2 +- tools/go_stateify/BUILD | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) (limited to 'test') diff --git a/pkg/seccomp/BUILD b/pkg/seccomp/BUILD index 742c8b79b..c5fca2ba3 100644 --- a/pkg/seccomp/BUILD +++ b/pkg/seccomp/BUILD @@ -26,7 +26,7 @@ go_library( "seccomp_rules.go", "seccomp_unsafe.go", ], - visibility = ["//visibility:public"], + visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", "//pkg/bpf", diff --git a/test/root/testdata/BUILD b/test/root/testdata/BUILD index bca5f9cab..6859541ad 100644 --- a/test/root/testdata/BUILD +++ b/test/root/testdata/BUILD @@ -13,6 +13,6 @@ go_library( "simple.go", ], visibility = [ - "//visibility:public", + "//:sandbox", ], ) diff --git a/tools/build/BUILD b/tools/build/BUILD index 0c0ce3f4d..00a467473 100644 --- a/tools/build/BUILD +++ b/tools/build/BUILD @@ -6,5 +6,5 @@ genrule( name = "loopback", outs = ["loopback.txt"], cmd = "touch $@", - visibility = ["//visibility:public"], + visibility = ["//:sandbox"], ) diff --git a/tools/checkunsafe/BUILD b/tools/checkunsafe/BUILD index 92ba8ab06..4f1a31a6d 100644 --- a/tools/checkunsafe/BUILD +++ b/tools/checkunsafe/BUILD @@ -5,7 +5,7 @@ package(licenses = ["notice"]) go_tool_library( name = "checkunsafe", srcs = ["check_unsafe.go"], - visibility = ["//visibility:public"], + visibility = ["//:sandbox"], deps = [ "@org_golang_x_tools//go/analysis:go_tool_library", ], diff --git a/tools/go_generics/BUILD b/tools/go_generics/BUILD index 069df3856..32a949c93 100644 --- a/tools/go_generics/BUILD +++ b/tools/go_generics/BUILD @@ -9,7 +9,7 @@ go_binary( "imports.go", "remove.go", ], - visibility = ["//visibility:public"], + visibility = ["//:sandbox"], deps = ["//tools/go_generics/globals"], ) diff --git a/tools/go_generics/go_merge/BUILD b/tools/go_generics/go_merge/BUILD index b7d35e272..2fd5a200d 100644 --- a/tools/go_generics/go_merge/BUILD +++ b/tools/go_generics/go_merge/BUILD @@ -5,5 +5,5 @@ package(licenses = ["notice"]) go_binary( name = "go_merge", srcs = ["main.go"], - visibility = ["//visibility:public"], + visibility = ["//:sandbox"], ) diff --git a/tools/go_stateify/BUILD b/tools/go_stateify/BUILD index 6036faf7b..503cdf2e5 100644 --- a/tools/go_stateify/BUILD +++ b/tools/go_stateify/BUILD @@ -5,6 +5,6 @@ package(licenses = ["notice"]) go_binary( name = "stateify", srcs = ["main.go"], - visibility = ["//visibility:public"], + visibility = ["//:sandbox"], deps = ["//tools/tags"], ) -- cgit v1.2.3 From e07eacc99f00c7318df789f24e7559a7fb941b8e Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Tue, 11 Feb 2020 13:37:40 -0800 Subject: Fix up test/runtimes/README.md. In particular, explain how to push updates to the images. PiperOrigin-RevId: 294508879 --- test/runtimes/README.md | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'test') diff --git a/test/runtimes/README.md b/test/runtimes/README.md index e41e78f77..42d722553 100644 --- a/test/runtimes/README.md +++ b/test/runtimes/README.md @@ -12,24 +12,39 @@ The following runtimes are currently supported: - PHP 7.3 - Python 3.7 -#### Prerequisites: +### Building and pushing the images: -1) [Install and configure Docker](https://docs.docker.com/install/) - -2) Build each Docker container from the runtimes/images directory: +The canonical source of images is the +[gvisor-presubmit container registry](https://gcr.io/gvisor-presubmit/). You can +build new images with the following command: ```bash $ cd images $ docker build -f Dockerfile_$LANG [-t $NAME] . ``` -### Testing: +To push them to our container registry, set the tag in the command above to +`gcr.io/gvisor-presubmit/$LANG`, then push them. (Note that you will need +appropriate permissions to the `gvisor-presubmit` GCP project.) + +```bash +gcloud docker -- push gcr.io/gvisor-presubmit/$LANG +``` + +#### Running in Docker locally: + +1) [Install and configure Docker](https://docs.docker.com/install/) + +2) Pull the image you want to run: + +```bash +$ docker pull gcr.io/gvisor-presubmit/$LANG +``` -If the prerequisites have been fulfilled, you can run the tests with the -following command: +3) Run docker with the image. ```bash -$ docker run --rm -it $NAME [FLAG] +$ docker run [--runtime=runsc] --rm -it $NAME [FLAG] ``` Running the command with no flags will cause all the available tests to execute. -- cgit v1.2.3 From 46a36b64d5164d1ac887aa528d23bb2f2c74489e Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Wed, 12 Feb 2020 06:35:20 -0800 Subject: Include more test files in exports_files So that they can be included by Fuchsia's syscall tests PiperOrigin-RevId: 294654890 --- test/syscalls/linux/BUILD | 3 +++ 1 file changed, 3 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index ca1af209a..e7c82adfc 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -10,13 +10,16 @@ exports_files( "socket.cc", "socket_inet_loopback.cc", "socket_ip_loopback_blocking.cc", + "socket_ip_tcp_generic_loopback.cc", "socket_ip_tcp_loopback.cc", + "socket_ip_tcp_udp_generic.cc", "socket_ip_udp_loopback.cc", "socket_ip_unbound.cc", "socket_ipv4_tcp_unbound_external_networking_test.cc", "socket_ipv4_udp_unbound_external_networking_test.cc", "socket_ipv4_udp_unbound_loopback.cc", "tcp_socket.cc", + "udp_bind.cc", "udp_socket.cc", ], visibility = ["//:sandbox"], -- cgit v1.2.3 From 6fdf2c53a1d084b70602170b660242036fd8fe4f Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Fri, 7 Feb 2020 11:21:07 -0800 Subject: iptables: User chains - Adds creation of user chains via `-N ` - Adds `-j RETURN` support for built-in chains, which triggers the chain's underflow rule (usually the default policy). - Adds tests for chain creation, default policies, and `-j RETURN' from built-in chains. --- pkg/sentry/socket/netfilter/netfilter.go | 115 +++++++++++++++++++----------- pkg/tcpip/iptables/iptables.go | 74 ++++++++++++------- pkg/tcpip/iptables/targets.go | 41 ++++++++--- pkg/tcpip/iptables/types.go | 50 +++++-------- test/iptables/filter_input.go | 117 ++++++++++++++++++++++++++++++- test/iptables/iptables_test.go | 24 +++++++ 6 files changed, 310 insertions(+), 111 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index ea02627de..3fc80e0de 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -50,7 +50,9 @@ type metadata struct { // nflog logs messages related to the writing and reading of iptables. func nflog(format string, args ...interface{}) { - log.Infof("netfilter: "+format, args...) + if log.IsLogging(log.Debug) { + log.Debugf("netfilter: "+format, args...) + } } // GetInfo returns information about iptables. @@ -227,19 +229,23 @@ func convertNetstackToBinary(tablename string, table iptables.Table) (linux.Kern } func marshalTarget(target iptables.Target) []byte { - switch target.(type) { - case iptables.UnconditionalAcceptTarget: - return marshalStandardTarget(iptables.Accept) - case iptables.UnconditionalDropTarget: - return marshalStandardTarget(iptables.Drop) + switch tg := target.(type) { + case iptables.AcceptTarget: + return marshalStandardTarget(iptables.RuleAccept) + case iptables.DropTarget: + return marshalStandardTarget(iptables.RuleDrop) case iptables.ErrorTarget: - return marshalErrorTarget() + return marshalErrorTarget(errorTargetName) + case iptables.UserChainTarget: + return marshalErrorTarget(tg.Name) + case iptables.ReturnTarget: + return marshalStandardTarget(iptables.RuleReturn) default: panic(fmt.Errorf("unknown target of type %T", target)) } } -func marshalStandardTarget(verdict iptables.Verdict) []byte { +func marshalStandardTarget(verdict iptables.RuleVerdict) []byte { nflog("convert to binary: marshalling standard target with size %d", linux.SizeOfXTStandardTarget) // The target's name will be the empty string. @@ -254,14 +260,14 @@ func marshalStandardTarget(verdict iptables.Verdict) []byte { return binary.Marshal(ret, usermem.ByteOrder, target) } -func marshalErrorTarget() []byte { +func marshalErrorTarget(errorName string) []byte { // This is an error target named error target := linux.XTErrorTarget{ Target: linux.XTEntryTarget{ TargetSize: linux.SizeOfXTErrorTarget, }, } - copy(target.Name[:], errorTargetName) + copy(target.Name[:], errorName) copy(target.Target.Name[:], errorTargetName) ret := make([]byte, 0, linux.SizeOfXTErrorTarget) @@ -270,38 +276,35 @@ func marshalErrorTarget() []byte { // translateFromStandardVerdict translates verdicts the same way as the iptables // tool. -func translateFromStandardVerdict(verdict iptables.Verdict) int32 { +func translateFromStandardVerdict(verdict iptables.RuleVerdict) int32 { switch verdict { - case iptables.Accept: + case iptables.RuleAccept: return -linux.NF_ACCEPT - 1 - case iptables.Drop: + case iptables.RuleDrop: return -linux.NF_DROP - 1 - case iptables.Queue: - return -linux.NF_QUEUE - 1 - case iptables.Return: + case iptables.RuleReturn: return linux.NF_RETURN - case iptables.Jump: + default: // TODO(gvisor.dev/issue/170): Support Jump. - panic("Jump isn't supported yet") + panic(fmt.Sprintf("unknown standard verdict: %d", verdict)) } - panic(fmt.Sprintf("unknown standard verdict: %d", verdict)) } -// translateToStandardVerdict translates from the value in a +// translateToStandardTarget translates from the value in a // linux.XTStandardTarget to an iptables.Verdict. -func translateToStandardVerdict(val int32) (iptables.Verdict, error) { +func translateToStandardTarget(val int32) (iptables.Target, error) { // TODO(gvisor.dev/issue/170): Support other verdicts. switch val { case -linux.NF_ACCEPT - 1: - return iptables.Accept, nil + return iptables.AcceptTarget{}, nil case -linux.NF_DROP - 1: - return iptables.Drop, nil + return iptables.DropTarget{}, nil case -linux.NF_QUEUE - 1: - return iptables.Invalid, errors.New("unsupported iptables verdict QUEUE") + return nil, errors.New("unsupported iptables verdict QUEUE") case linux.NF_RETURN: - return iptables.Invalid, errors.New("unsupported iptables verdict RETURN") + return iptables.ReturnTarget{}, nil default: - return iptables.Invalid, fmt.Errorf("unknown iptables verdict %d", val) + return nil, fmt.Errorf("unknown iptables verdict %d", val) } } @@ -411,6 +414,10 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { table.BuiltinChains[hk] = ruleIdx } if offset == replace.Underflow[hook] { + if !validUnderflow(table.Rules[ruleIdx]) { + nflog("underflow for hook %d isn't an unconditional ACCEPT or DROP.") + return syserr.ErrInvalidArgument + } table.Underflows[hk] = ruleIdx } } @@ -425,12 +432,34 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { } } + // Add the user chains. + for ruleIdx, rule := range table.Rules { + target, ok := rule.Target.(iptables.UserChainTarget) + if !ok { + continue + } + + // We found a user chain. Before inserting it into the table, + // check that: + // - There's some other rule after it. + // - There are no matchers. + if ruleIdx == len(table.Rules)-1 { + nflog("user chain must have a rule or default policy.") + return syserr.ErrInvalidArgument + } + if len(table.Rules[ruleIdx].Matchers) != 0 { + nflog("user chain's first node must have no matcheres.") + return syserr.ErrInvalidArgument + } + table.UserChains[target.Name] = ruleIdx + 1 + } + // TODO(gvisor.dev/issue/170): Support other chains. // Since we only support modifying the INPUT chain right now, make sure // all other chains point to ACCEPT rules. for hook, ruleIdx := range table.BuiltinChains { if hook != iptables.Input { - if _, ok := table.Rules[ruleIdx].Target.(iptables.UnconditionalAcceptTarget); !ok { + if _, ok := table.Rules[ruleIdx].Target.(iptables.AcceptTarget); !ok { nflog("hook %d is unsupported.", hook) return syserr.ErrInvalidArgument } @@ -519,18 +548,7 @@ func parseTarget(optVal []byte) (iptables.Target, error) { buf = optVal[:linux.SizeOfXTStandardTarget] binary.Unmarshal(buf, usermem.ByteOrder, &standardTarget) - verdict, err := translateToStandardVerdict(standardTarget.Verdict) - if err != nil { - return nil, err - } - switch verdict { - case iptables.Accept: - return iptables.UnconditionalAcceptTarget{}, nil - case iptables.Drop: - return iptables.UnconditionalDropTarget{}, nil - default: - return nil, fmt.Errorf("Unknown verdict: %v", verdict) - } + return translateToStandardTarget(standardTarget.Verdict) case errorTargetName: // Error target. @@ -548,11 +566,14 @@ func parseTarget(optVal []byte) (iptables.Target, error) { // somehow fall through every rule. // * To mark the start of a user defined chain. These // rules have an error with the name of the chain. - switch errorTarget.Name.String() { + switch name := errorTarget.Name.String(); name { case errorTargetName: + nflog("set entries: error target") return iptables.ErrorTarget{}, nil default: - return nil, fmt.Errorf("unknown error target %q doesn't exist or isn't supported yet.", errorTarget.Name.String()) + // User defined chain. + nflog("set entries: user-defined target %q", name) + return iptables.UserChainTarget{Name: name}, nil } } @@ -585,6 +606,18 @@ func containsUnsupportedFields(iptip linux.IPTIP) bool { iptip.InverseFlags != 0 } +func validUnderflow(rule iptables.Rule) bool { + if len(rule.Matchers) != 0 { + return false + } + switch rule.Target.(type) { + case iptables.AcceptTarget, iptables.DropTarget: + return true + default: + return false + } +} + func hookFromLinux(hook int) iptables.Hook { switch hook { case linux.NF_INET_PRE_ROUTING: diff --git a/pkg/tcpip/iptables/iptables.go b/pkg/tcpip/iptables/iptables.go index 1b9485bbd..75a433a3b 100644 --- a/pkg/tcpip/iptables/iptables.go +++ b/pkg/tcpip/iptables/iptables.go @@ -52,10 +52,10 @@ func DefaultTables() IPTables { Tables: map[string]Table{ TablenameNat: Table{ Rules: []Rule{ - Rule{Target: UnconditionalAcceptTarget{}}, - Rule{Target: UnconditionalAcceptTarget{}}, - Rule{Target: UnconditionalAcceptTarget{}}, - Rule{Target: UnconditionalAcceptTarget{}}, + Rule{Target: AcceptTarget{}}, + Rule{Target: AcceptTarget{}}, + Rule{Target: AcceptTarget{}}, + Rule{Target: AcceptTarget{}}, Rule{Target: ErrorTarget{}}, }, BuiltinChains: map[Hook]int{ @@ -74,8 +74,8 @@ func DefaultTables() IPTables { }, TablenameMangle: Table{ Rules: []Rule{ - Rule{Target: UnconditionalAcceptTarget{}}, - Rule{Target: UnconditionalAcceptTarget{}}, + Rule{Target: AcceptTarget{}}, + Rule{Target: AcceptTarget{}}, Rule{Target: ErrorTarget{}}, }, BuiltinChains: map[Hook]int{ @@ -90,9 +90,9 @@ func DefaultTables() IPTables { }, TablenameFilter: Table{ Rules: []Rule{ - Rule{Target: UnconditionalAcceptTarget{}}, - Rule{Target: UnconditionalAcceptTarget{}}, - Rule{Target: UnconditionalAcceptTarget{}}, + Rule{Target: AcceptTarget{}}, + Rule{Target: AcceptTarget{}}, + Rule{Target: AcceptTarget{}}, Rule{Target: ErrorTarget{}}, }, BuiltinChains: map[Hook]int{ @@ -149,13 +149,11 @@ func (it *IPTables) Check(hook Hook, pkt tcpip.PacketBuffer) bool { for _, tablename := range it.Priorities[hook] { switch verdict := it.checkTable(hook, pkt, tablename); verdict { // If the table returns Accept, move on to the next table. - case Accept: + case TableAccept: continue // The Drop verdict is final. - case Drop: + case TableDrop: return false - case Stolen, Queue, Repeat, None, Jump, Return, Continue: - panic(fmt.Sprintf("Unimplemented verdict %v.", verdict)) default: panic(fmt.Sprintf("Unknown verdict %v.", verdict)) } @@ -166,36 +164,58 @@ func (it *IPTables) Check(hook Hook, pkt tcpip.PacketBuffer) bool { } // Precondition: pkt.NetworkHeader is set. -func (it *IPTables) checkTable(hook Hook, pkt tcpip.PacketBuffer, tablename string) Verdict { +func (it *IPTables) checkTable(hook Hook, pkt tcpip.PacketBuffer, tablename string) TableVerdict { // Start from ruleIdx and walk the list of rules until a rule gives us // a verdict. table := it.Tables[tablename] for ruleIdx := table.BuiltinChains[hook]; ruleIdx < len(table.Rules); ruleIdx++ { switch verdict := it.checkRule(hook, pkt, table, ruleIdx); verdict { - // In either of these cases, this table is done with the packet. - case Accept, Drop: - return verdict - // Continue traversing the rules of the table. - case Continue: + case RuleAccept: + return TableAccept + + case RuleDrop: + return TableDrop + + case RuleContinue: continue - case Stolen, Queue, Repeat, None, Jump, Return: - panic(fmt.Sprintf("Unimplemented verdict %v.", verdict)) + + case RuleReturn: + // TODO(gvisor.dev/issue/170): We don't implement jump + // yet, so any Return is from a built-in chain. That + // means we have to to call the underflow. + underflow := table.Rules[table.Underflows[hook]] + // Underflow is guaranteed to be an unconditional + // ACCEPT or DROP. + switch v, _ := underflow.Target.Action(pkt); v { + case RuleAccept: + return TableAccept + case RuleDrop: + return TableDrop + case RuleContinue, RuleReturn: + panic("Underflows should only return RuleAccept or RuleDrop.") + default: + panic(fmt.Sprintf("Unknown verdict: %d", v)) + } + default: - panic(fmt.Sprintf("Unknown verdict %v.", verdict)) + panic(fmt.Sprintf("Unknown verdict: %d", verdict)) } + } - panic(fmt.Sprintf("Traversed past the entire list of iptables rules in table %q.", tablename)) + // We got through the entire table without a decision. Default to DROP + // for safety. + return TableDrop } // Precondition: pk.NetworkHeader is set. -func (it *IPTables) checkRule(hook Hook, pkt tcpip.PacketBuffer, table Table, ruleIdx int) Verdict { +func (it *IPTables) checkRule(hook Hook, pkt tcpip.PacketBuffer, table Table, ruleIdx int) RuleVerdict { rule := table.Rules[ruleIdx] // First check whether the packet matches the IP header filter. // TODO(gvisor.dev/issue/170): Support other fields of the filter. if rule.Filter.Protocol != 0 && rule.Filter.Protocol != header.IPv4(pkt.NetworkHeader).TransportProtocol() { - return Continue + return RuleContinue } // Go through each rule matcher. If they all match, run @@ -203,10 +223,10 @@ func (it *IPTables) checkRule(hook Hook, pkt tcpip.PacketBuffer, table Table, ru for _, matcher := range rule.Matchers { matches, hotdrop := matcher.Match(hook, pkt, "") if hotdrop { - return Drop + return RuleDrop } if !matches { - return Continue + return RuleContinue } } diff --git a/pkg/tcpip/iptables/targets.go b/pkg/tcpip/iptables/targets.go index 4dd281371..9fc60cfad 100644 --- a/pkg/tcpip/iptables/targets.go +++ b/pkg/tcpip/iptables/targets.go @@ -21,20 +21,20 @@ import ( "gvisor.dev/gvisor/pkg/tcpip" ) -// UnconditionalAcceptTarget accepts all packets. -type UnconditionalAcceptTarget struct{} +// AcceptTarget accepts packets. +type AcceptTarget struct{} // Action implements Target.Action. -func (UnconditionalAcceptTarget) Action(packet tcpip.PacketBuffer) (Verdict, string) { - return Accept, "" +func (AcceptTarget) Action(packet tcpip.PacketBuffer) (RuleVerdict, string) { + return RuleAccept, "" } -// UnconditionalDropTarget denies all packets. -type UnconditionalDropTarget struct{} +// DropTarget drops packets. +type DropTarget struct{} // Action implements Target.Action. -func (UnconditionalDropTarget) Action(packet tcpip.PacketBuffer) (Verdict, string) { - return Drop, "" +func (DropTarget) Action(packet tcpip.PacketBuffer) (RuleVerdict, string) { + return RuleDrop, "" } // ErrorTarget logs an error and drops the packet. It represents a target that @@ -42,7 +42,26 @@ func (UnconditionalDropTarget) Action(packet tcpip.PacketBuffer) (Verdict, strin type ErrorTarget struct{} // Action implements Target.Action. -func (ErrorTarget) Action(packet tcpip.PacketBuffer) (Verdict, string) { - log.Warningf("ErrorTarget triggered.") - return Drop, "" +func (ErrorTarget) Action(packet tcpip.PacketBuffer) (RuleVerdict, string) { + log.Debugf("ErrorTarget triggered.") + return RuleDrop, "" +} + +// UserChainTarget marks a rule as the beginning of a user chain. +type UserChainTarget struct { + Name string +} + +// Action implements Target.Action. +func (UserChainTarget) Action(tcpip.PacketBuffer) (RuleVerdict, string) { + panic("UserChainTarget should never be called.") +} + +// ReturnTarget returns from the current chain. If the chain is a built-in, the +// hook's underflow should be called. +type ReturnTarget struct{} + +// Action implements Target.Action. +func (ReturnTarget) Action(tcpip.PacketBuffer) (RuleVerdict, string) { + return RuleReturn, "" } diff --git a/pkg/tcpip/iptables/types.go b/pkg/tcpip/iptables/types.go index 7d593c35c..5735d001b 100644 --- a/pkg/tcpip/iptables/types.go +++ b/pkg/tcpip/iptables/types.go @@ -56,44 +56,32 @@ const ( NumHooks ) -// A Verdict is returned by a rule's target to indicate how traversal of rules -// should (or should not) continue. -type Verdict int +// A TableVerdict is what a table decides should be done with a packet. +type TableVerdict int const ( - // Invalid indicates an unkonwn or erroneous verdict. - Invalid Verdict = iota + // TableAccept indicates the packet should continue through netstack. + TableAccept TableVerdict = iota - // Accept indicates the packet should continue traversing netstack as - // normal. - Accept - - // Drop inicates the packet should be dropped, stopping traversing - // netstack. - Drop - - // Stolen indicates the packet was co-opted by the target and should - // stop traversing netstack. - Stolen - - // Queue indicates the packet should be queued for userspace processing. - Queue + // TableAccept indicates the packet should be dropped. + TableDrop +) - // Repeat indicates the packet should re-traverse the chains for the - // current hook. - Repeat +// A RuleVerdict is what a rule decides should be done with a packet. +type RuleVerdict int - // None indicates no verdict was reached. - None +const ( + // RuleAccept indicates the packet should continue through netstack. + RuleAccept RuleVerdict = iota - // Jump indicates a jump to another chain. - Jump + // RuleContinue indicates the packet should continue to the next rule. + RuleContinue - // Continue indicates that traversal should continue at the next rule. - Continue + // RuleDrop indicates the packet should be dropped. + RuleDrop - // Return indicates that traversal should return to the calling chain. - Return + // RuleReturn indicates the packet should return to the previous chain. + RuleReturn ) // IPTables holds all the tables for a netstack. @@ -187,5 +175,5 @@ type Target interface { // Action takes an action on the packet and returns a verdict on how // traversal should (or should not) continue. If the return value is // Jump, it also returns the name of the chain to jump to. - Action(packet tcpip.PacketBuffer) (Verdict, string) + Action(packet tcpip.PacketBuffer) (RuleVerdict, string) } diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index bd6059921..e26d6a7d2 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -36,6 +36,10 @@ func init() { RegisterTestCase(FilterInputDropTCPSrcPort{}) RegisterTestCase(FilterInputDropUDPPort{}) RegisterTestCase(FilterInputDropUDP{}) + RegisterTestCase(FilterInputCreateUserChain{}) + RegisterTestCase(FilterInputDefaultPolicyAccept{}) + RegisterTestCase(FilterInputDefaultPolicyDrop{}) + RegisterTestCase(FilterInputReturnUnderflow{}) } // FilterInputDropUDP tests that we can drop UDP traffic. @@ -295,8 +299,119 @@ func (FilterInputRequireProtocolUDP) ContainerAction(ip net.IP) error { return nil } -// LocalAction implements TestCase.LocalAction. func (FilterInputRequireProtocolUDP) LocalAction(ip net.IP) error { // No-op. return nil } + +// FilterInputCreateUserChain tests chain creation. +type FilterInputCreateUserChain struct{} + +// Name implements TestCase.Name. +func (FilterInputCreateUserChain) Name() string { + return "FilterInputCreateUserChain" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterInputCreateUserChain) ContainerAction(ip net.IP) error { + // Create a chain. + const chainName = "foochain" + if err := filterTable("-N", chainName); err != nil { + return err + } + + // Add a simple rule to the chain. + return filterTable("-A", chainName, "-j", "DROP") +} + +// LocalAction implements TestCase.LocalAction. +func (FilterInputCreateUserChain) LocalAction(ip net.IP) error { + // No-op. + return nil +} + +// FilterInputDefaultPolicyAccept tests the default ACCEPT policy. +type FilterInputDefaultPolicyAccept struct{} + +// Name implements TestCase.Name. +func (FilterInputDefaultPolicyAccept) Name() string { + return "FilterInputDefaultPolicyAccept" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterInputDefaultPolicyAccept) ContainerAction(ip net.IP) error { + // Set the default policy to accept, then receive a packet. + if err := filterTable("-P", "INPUT", "ACCEPT"); err != nil { + return err + } + return listenUDP(acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (FilterInputDefaultPolicyAccept) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} + +// FilterInputDefaultPolicyDrop tests the default DROP policy. +type FilterInputDefaultPolicyDrop struct{} + +// Name implements TestCase.Name. +func (FilterInputDefaultPolicyDrop) Name() string { + return "FilterInputDefaultPolicyDrop" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterInputDefaultPolicyDrop) ContainerAction(ip net.IP) error { + if err := filterTable("-P", "INPUT", "DROP"); err != nil { + return err + } + + // Listen for UDP packets on dropPort. + if err := listenUDP(dropPort, sendloopDuration); err == nil { + return fmt.Errorf("packets on port %d should have been dropped, but got a packet", dropPort) + } else if netErr, ok := err.(net.Error); !ok || !netErr.Timeout() { + return fmt.Errorf("error reading: %v", err) + } + + // At this point we know that reading timed out and never received a + // packet. + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (FilterInputDefaultPolicyDrop) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} + +// FilterInputReturnUnderflow tests that -j RETURN in a built-in chain causes +// the underflow rule (i.e. default policy) to be executed. +type FilterInputReturnUnderflow struct{} + +// Name implements TestCase.Name. +func (FilterInputReturnUnderflow) Name() string { + return "FilterInputReturnUnderflow" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterInputReturnUnderflow) ContainerAction(ip net.IP) error { + // Add a RETURN rule followed by an unconditional accept, and set the + // default policy to DROP. + if err := filterTable("-A", "INPUT", "-j", "RETURN"); err != nil { + return err + } + if err := filterTable("-A", "INPUT", "-j", "DROP"); err != nil { + return err + } + if err := filterTable("-P", "INPUT", "ACCEPT"); err != nil { + return err + } + + // We should receive packets, as the RETURN rule will trigger the default + // ACCEPT policy. + return listenUDP(acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (FilterInputReturnUnderflow) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 41909582a..46a7c99b0 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -214,6 +214,30 @@ func TestFilterInputDropTCPSrcPort(t *testing.T) { } } +func TestFilterInputCreateUserChain(t *testing.T) { + if err := singleTest(FilterInputCreateUserChain{}); err != nil { + t.Fatal(err) + } +} + +func TestFilterInputDefaultPolicyAccept(t *testing.T) { + if err := singleTest(FilterInputDefaultPolicyAccept{}); err != nil { + t.Fatal(err) + } +} + +func TestFilterInputDefaultPolicyDrop(t *testing.T) { + if err := singleTest(FilterInputDefaultPolicyDrop{}); err != nil { + t.Fatal(err) + } +} + +func TestFilterInputReturnUnderflow(t *testing.T) { + if err := singleTest(FilterInputReturnUnderflow{}); err != nil { + t.Fatal(err) + } +} + func TestFilterOutputDropTCPDestPort(t *testing.T) { if err := singleTest(FilterOutputDropTCPDestPort{}); err != nil { t.Fatal(err) -- cgit v1.2.3 From 69bf39e8a47d3b4dcbbd04d2e8df476cdfab5e74 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Thu, 13 Feb 2020 10:58:47 -0800 Subject: Internal change. PiperOrigin-RevId: 294952610 --- pkg/abi/linux/socket.go | 13 ++++ pkg/sentry/socket/control/BUILD | 1 + pkg/sentry/socket/control/control.go | 43 +++++++++++++ pkg/sentry/socket/hostinet/socket.go | 11 +++- pkg/sentry/socket/netstack/netstack.go | 37 ++++++++++-- pkg/tcpip/tcpip.go | 25 ++++++++ pkg/tcpip/transport/udp/endpoint.go | 26 ++++++++ test/syscalls/linux/socket_ip_udp_generic.cc | 44 ++++++++++++++ test/syscalls/linux/socket_ipv4_udp_unbound.cc | 84 ++++++++++++++++++++++++++ test/syscalls/linux/udp_socket_test_cases.cc | 1 - 10 files changed, 278 insertions(+), 7 deletions(-) (limited to 'test') diff --git a/pkg/abi/linux/socket.go b/pkg/abi/linux/socket.go index 766ee4014..4a14ef691 100644 --- a/pkg/abi/linux/socket.go +++ b/pkg/abi/linux/socket.go @@ -411,6 +411,15 @@ type ControlMessageCredentials struct { GID uint32 } +// A ControlMessageIPPacketInfo is IP_PKTINFO socket control message. +// +// ControlMessageIPPacketInfo represents struct in_pktinfo from linux/in.h. +type ControlMessageIPPacketInfo struct { + NIC int32 + LocalAddr InetAddr + DestinationAddr InetAddr +} + // SizeOfControlMessageCredentials is the binary size of a // ControlMessageCredentials struct. var SizeOfControlMessageCredentials = int(binary.Size(ControlMessageCredentials{})) @@ -431,6 +440,10 @@ const SizeOfControlMessageTOS = 1 // SizeOfControlMessageTClass is the size of an IPV6_TCLASS control message. const SizeOfControlMessageTClass = 4 +// SizeOfControlMessageIPPacketInfo is the size of an IP_PKTINFO +// control message. +const SizeOfControlMessageIPPacketInfo = 12 + // SCM_MAX_FD is the maximum number of FDs accepted in a single sendmsg call. // From net/scm.h. const SCM_MAX_FD = 253 diff --git a/pkg/sentry/socket/control/BUILD b/pkg/sentry/socket/control/BUILD index 79e16d6e8..4d42d29cb 100644 --- a/pkg/sentry/socket/control/BUILD +++ b/pkg/sentry/socket/control/BUILD @@ -19,6 +19,7 @@ go_library( "//pkg/sentry/socket", "//pkg/sentry/socket/unix/transport", "//pkg/syserror", + "//pkg/tcpip", "//pkg/usermem", ], ) diff --git a/pkg/sentry/socket/control/control.go b/pkg/sentry/socket/control/control.go index 6145a7fc3..4667373d2 100644 --- a/pkg/sentry/socket/control/control.go +++ b/pkg/sentry/socket/control/control.go @@ -26,6 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/usermem" ) @@ -338,6 +339,22 @@ func PackTClass(t *kernel.Task, tClass int32, buf []byte) []byte { ) } +// PackIPPacketInfo packs an IP_PKTINFO socket control message. +func PackIPPacketInfo(t *kernel.Task, packetInfo tcpip.IPPacketInfo, buf []byte) []byte { + var p linux.ControlMessageIPPacketInfo + p.NIC = int32(packetInfo.NIC) + copy(p.LocalAddr[:], []byte(packetInfo.LocalAddr)) + copy(p.DestinationAddr[:], []byte(packetInfo.DestinationAddr)) + + return putCmsgStruct( + buf, + linux.SOL_IP, + linux.IP_PKTINFO, + t.Arch().Width(), + p, + ) +} + // PackControlMessages packs control messages into the given buffer. // // We skip control messages specific to Unix domain sockets. @@ -362,6 +379,10 @@ func PackControlMessages(t *kernel.Task, cmsgs socket.ControlMessages, buf []byt buf = PackTClass(t, cmsgs.IP.TClass, buf) } + if cmsgs.IP.HasIPPacketInfo { + buf = PackIPPacketInfo(t, cmsgs.IP.PacketInfo, buf) + } + return buf } @@ -394,6 +415,16 @@ func CmsgsSpace(t *kernel.Task, cmsgs socket.ControlMessages) int { return space } +// NewIPPacketInfo returns the IPPacketInfo struct. +func NewIPPacketInfo(packetInfo linux.ControlMessageIPPacketInfo) tcpip.IPPacketInfo { + var p tcpip.IPPacketInfo + p.NIC = tcpip.NICID(packetInfo.NIC) + copy([]byte(p.LocalAddr), packetInfo.LocalAddr[:]) + copy([]byte(p.DestinationAddr), packetInfo.DestinationAddr[:]) + + return p +} + // Parse parses a raw socket control message into portable objects. func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte) (socket.ControlMessages, error) { var ( @@ -468,6 +499,18 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte) (socket.Con binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageTOS], usermem.ByteOrder, &cmsgs.IP.TOS) i += binary.AlignUp(length, width) + case linux.IP_PKTINFO: + if length < linux.SizeOfControlMessageIPPacketInfo { + return socket.ControlMessages{}, syserror.EINVAL + } + + cmsgs.IP.HasIPPacketInfo = true + var packetInfo linux.ControlMessageIPPacketInfo + binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageIPPacketInfo], usermem.ByteOrder, &packetInfo) + + cmsgs.IP.PacketInfo = NewIPPacketInfo(packetInfo) + i += binary.AlignUp(length, width) + default: return socket.ControlMessages{}, syserror.EINVAL } diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go index de76388ac..22f78d2e2 100644 --- a/pkg/sentry/socket/hostinet/socket.go +++ b/pkg/sentry/socket/hostinet/socket.go @@ -289,7 +289,7 @@ func (s *socketOperations) GetSockOpt(t *kernel.Task, level int, name int, outPt switch level { case linux.SOL_IP: switch name { - case linux.IP_TOS, linux.IP_RECVTOS: + case linux.IP_TOS, linux.IP_RECVTOS, linux.IP_PKTINFO: optlen = sizeofInt32 } case linux.SOL_IPV6: @@ -336,6 +336,8 @@ func (s *socketOperations) SetSockOpt(t *kernel.Task, level int, name int, opt [ switch name { case linux.IP_TOS, linux.IP_RECVTOS: optlen = sizeofInt32 + case linux.IP_PKTINFO: + optlen = linux.SizeOfControlMessageIPPacketInfo } case linux.SOL_IPV6: switch name { @@ -473,7 +475,14 @@ func (s *socketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags case syscall.IP_TOS: controlMessages.IP.HasTOS = true binary.Unmarshal(unixCmsg.Data[:linux.SizeOfControlMessageTOS], usermem.ByteOrder, &controlMessages.IP.TOS) + + case syscall.IP_PKTINFO: + controlMessages.IP.HasIPPacketInfo = true + var packetInfo linux.ControlMessageIPPacketInfo + binary.Unmarshal(unixCmsg.Data[:linux.SizeOfControlMessageIPPacketInfo], usermem.ByteOrder, &packetInfo) + controlMessages.IP.PacketInfo = control.NewIPPacketInfo(packetInfo) } + case syscall.SOL_IPV6: switch unixCmsg.Header.Type { case syscall.IPV6_TCLASS: diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index ed2fbcceb..9757fbfba 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -1414,6 +1414,21 @@ func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family in } return o, nil + case linux.IP_PKTINFO: + if outLen < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + + v, err := ep.GetSockOptBool(tcpip.ReceiveIPPacketInfoOption) + if err != nil { + return nil, syserr.TranslateNetstackError(err) + } + var o int32 + if v { + o = 1 + } + return o, nil + default: emitUnimplementedEventIP(t, name) } @@ -1762,6 +1777,7 @@ func setSockOptIPv6(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) linux.IPV6_IPSEC_POLICY, linux.IPV6_JOIN_ANYCAST, linux.IPV6_LEAVE_ANYCAST, + // TODO(b/148887420): Add support for IPV6_PKTINFO. linux.IPV6_PKTINFO, linux.IPV6_ROUTER_ALERT, linux.IPV6_XFRM_POLICY, @@ -1949,6 +1965,16 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s } return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.ReceiveTOSOption, v != 0)) + case linux.IP_PKTINFO: + if len(optVal) == 0 { + return nil + } + v, err := parseIntOrChar(optVal) + if err != nil { + return err + } + return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.ReceiveIPPacketInfoOption, v != 0)) + case linux.IP_ADD_SOURCE_MEMBERSHIP, linux.IP_BIND_ADDRESS_NO_PORT, linux.IP_BLOCK_SOURCE, @@ -1964,7 +1990,6 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s linux.IP_NODEFRAG, linux.IP_OPTIONS, linux.IP_PASSSEC, - linux.IP_PKTINFO, linux.IP_RECVERR, linux.IP_RECVFRAGSIZE, linux.IP_RECVOPTS, @@ -2395,10 +2420,12 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe func (s *SocketOperations) controlMessages() socket.ControlMessages { return socket.ControlMessages{ IP: tcpip.ControlMessages{ - HasTimestamp: s.readCM.HasTimestamp && s.sockOptTimestamp, - Timestamp: s.readCM.Timestamp, - HasTOS: s.readCM.HasTOS, - TOS: s.readCM.TOS, + HasTimestamp: s.readCM.HasTimestamp && s.sockOptTimestamp, + Timestamp: s.readCM.Timestamp, + HasTOS: s.readCM.HasTOS, + TOS: s.readCM.TOS, + HasIPPacketInfo: s.readCM.HasIPPacketInfo, + PacketInfo: s.readCM.PacketInfo, }, } } diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 0e944712f..9ca39ce40 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -328,6 +328,12 @@ type ControlMessages struct { // Tclass is the IPv6 traffic class of the associated packet. TClass int32 + + // HasIPPacketInfo indicates whether PacketInfo is set. + HasIPPacketInfo bool + + // PacketInfo holds interface and address data on an incoming packet. + PacketInfo IPPacketInfo } // Endpoint is the interface implemented by transport protocols (e.g., tcp, udp) @@ -503,6 +509,11 @@ const ( // V6OnlyOption is used by {G,S}etSockOptBool to specify whether an IPv6 // socket is to be restricted to sending and receiving IPv6 packets only. V6OnlyOption + + // ReceiveIPPacketInfoOption is used by {G,S}etSockOptBool to specify + // if more inforamtion is provided with incoming packets such + // as interface index and address. + ReceiveIPPacketInfoOption ) // SockOptInt represents socket options which values have the int type. @@ -685,6 +696,20 @@ type IPv4TOSOption uint8 // for all subsequent outgoing IPv6 packets from the endpoint. type IPv6TrafficClassOption uint8 +// IPPacketInfo is the message struture for IP_PKTINFO. +// +// +stateify savable +type IPPacketInfo struct { + // NIC is the ID of the NIC to be used. + NIC NICID + + // LocalAddr is the local address. + LocalAddr Address + + // DestinationAddr is the destination address. + DestinationAddr Address +} + // Route is a row in the routing table. It specifies through which NIC (and // gateway) sets of packets should be routed. A row is considered viable if the // masked target address matches the destination address in the row. diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index c9cbed8f4..3fe91cac2 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -29,6 +29,7 @@ import ( type udpPacket struct { udpPacketEntry senderAddress tcpip.FullAddress + packetInfo tcpip.IPPacketInfo data buffer.VectorisedView `state:".(buffer.VectorisedView)"` timestamp int64 tos uint8 @@ -118,6 +119,9 @@ type endpoint struct { // as ancillary data to ControlMessages on Read. receiveTOS bool + // receiveIPPacketInfo determines if the packet info is returned by Read. + receiveIPPacketInfo bool + // shutdownFlags represent the current shutdown state of the endpoint. shutdownFlags tcpip.ShutdownFlags @@ -254,11 +258,17 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess } e.mu.RLock() receiveTOS := e.receiveTOS + receiveIPPacketInfo := e.receiveIPPacketInfo e.mu.RUnlock() if receiveTOS { cm.HasTOS = true cm.TOS = p.tos } + + if receiveIPPacketInfo { + cm.HasIPPacketInfo = true + cm.PacketInfo = p.packetInfo + } return p.data.ToView(), cm, nil } @@ -495,6 +505,13 @@ func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error { } e.v6only = v + return nil + + case tcpip.ReceiveIPPacketInfoOption: + e.mu.Lock() + e.receiveIPPacketInfo = v + e.mu.Unlock() + return nil } return nil @@ -703,6 +720,12 @@ func (e *endpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) { e.mu.RUnlock() return v, nil + + case tcpip.ReceiveIPPacketInfoOption: + e.mu.RLock() + v := e.receiveIPPacketInfo + e.mu.RUnlock() + return v, nil } return false, tcpip.ErrUnknownProtocolOption @@ -1247,6 +1270,9 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk switch r.NetProto { case header.IPv4ProtocolNumber: packet.tos, _ = header.IPv4(pkt.NetworkHeader).TOS() + packet.packetInfo.LocalAddr = r.LocalAddress + packet.packetInfo.DestinationAddr = r.RemoteAddress + packet.packetInfo.NIC = r.NICID() } packet.timestamp = e.stack.NowNanoseconds() diff --git a/test/syscalls/linux/socket_ip_udp_generic.cc b/test/syscalls/linux/socket_ip_udp_generic.cc index 53290bed7..db5663ecd 100644 --- a/test/syscalls/linux/socket_ip_udp_generic.cc +++ b/test/syscalls/linux/socket_ip_udp_generic.cc @@ -357,5 +357,49 @@ TEST_P(UDPSocketPairTest, SetReuseAddrReusePort) { EXPECT_EQ(get, kSockOptOn); } +// Test getsockopt for a socket which is not set with IP_PKTINFO option. +TEST_P(UDPSocketPairTest, IPPKTINFODefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_IP, IP_PKTINFO, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +// Test setsockopt and getsockopt for a socket with IP_PKTINFO option. +TEST_P(UDPSocketPairTest, SetAndGetIPPKTINFO) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int level = SOL_IP; + int type = IP_PKTINFO; + + // Check getsockopt before IP_PKTINFO is set. + int get = -1; + socklen_t get_len = sizeof(get); + + ASSERT_THAT(setsockopt(sockets->first_fd(), level, type, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceedsWithValue(0)); + + ASSERT_THAT(getsockopt(sockets->first_fd(), level, type, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get, kSockOptOn); + EXPECT_EQ(get_len, sizeof(get)); + + ASSERT_THAT(setsockopt(sockets->first_fd(), level, type, &kSockOptOff, + sizeof(kSockOptOff)), + SyscallSucceedsWithValue(0)); + + ASSERT_THAT(getsockopt(sockets->first_fd(), level, type, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get, kSockOptOff); + EXPECT_EQ(get_len, sizeof(get)); +} + } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound.cc b/test/syscalls/linux/socket_ipv4_udp_unbound.cc index 990ccf23c..bc4b07a62 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound.cc @@ -15,6 +15,7 @@ #include "test/syscalls/linux/socket_ipv4_udp_unbound.h" #include +#include #include #include #include @@ -2128,5 +2129,88 @@ TEST_P(IPv4UDPUnboundSocketTest, ReuseAddrReusePortDistribution) { SyscallSucceedsWithValue(kMessageSize)); } +// Test that socket will receive packet info control message. +TEST_P(IPv4UDPUnboundSocketTest, SetAndReceiveIPPKTINFO) { + // TODO(gvisor.dev/issue/1202): ioctl() is not supported by hostinet. + SKIP_IF((IsRunningWithHostinet())); + + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto sender_addr = V4Loopback(); + int level = SOL_IP; + int type = IP_PKTINFO; + + ASSERT_THAT( + bind(receiver->get(), reinterpret_cast(&sender_addr.addr), + sender_addr.addr_len), + SyscallSucceeds()); + socklen_t sender_addr_len = sender_addr.addr_len; + ASSERT_THAT(getsockname(receiver->get(), + reinterpret_cast(&sender_addr.addr), + &sender_addr_len), + SyscallSucceeds()); + EXPECT_EQ(sender_addr_len, sender_addr.addr_len); + + auto receiver_addr = V4Loopback(); + reinterpret_cast(&receiver_addr.addr)->sin_port = + reinterpret_cast(&sender_addr.addr)->sin_port; + ASSERT_THAT( + connect(sender->get(), reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + + // Allow socket to receive control message. + ASSERT_THAT( + setsockopt(receiver->get(), level, type, &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Prepare message to send. + constexpr size_t kDataLength = 1024; + msghdr sent_msg = {}; + iovec sent_iov = {}; + char sent_data[kDataLength]; + sent_iov.iov_base = sent_data; + sent_iov.iov_len = kDataLength; + sent_msg.msg_iov = &sent_iov; + sent_msg.msg_iovlen = 1; + sent_msg.msg_flags = 0; + + ASSERT_THAT(RetryEINTR(sendmsg)(sender->get(), &sent_msg, 0), + SyscallSucceedsWithValue(kDataLength)); + + msghdr received_msg = {}; + iovec received_iov = {}; + char received_data[kDataLength]; + char received_cmsg_buf[CMSG_SPACE(sizeof(in_pktinfo))] = {}; + size_t cmsg_data_len = sizeof(in_pktinfo); + received_iov.iov_base = received_data; + received_iov.iov_len = kDataLength; + received_msg.msg_iov = &received_iov; + received_msg.msg_iovlen = 1; + received_msg.msg_controllen = CMSG_LEN(cmsg_data_len); + received_msg.msg_control = received_cmsg_buf; + + ASSERT_THAT(RetryEINTR(recvmsg)(receiver->get(), &received_msg, 0), + SyscallSucceedsWithValue(kDataLength)); + + cmsghdr* cmsg = CMSG_FIRSTHDR(&received_msg); + ASSERT_NE(cmsg, nullptr); + EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(cmsg_data_len)); + EXPECT_EQ(cmsg->cmsg_level, level); + EXPECT_EQ(cmsg->cmsg_type, type); + + // Get loopback index. + ifreq ifr = {}; + absl::SNPrintF(ifr.ifr_name, IFNAMSIZ, "lo"); + ASSERT_THAT(ioctl(sender->get(), SIOCGIFINDEX, &ifr), SyscallSucceeds()); + ASSERT_NE(ifr.ifr_ifindex, 0); + + // Check the data + in_pktinfo received_pktinfo = {}; + memcpy(&received_pktinfo, CMSG_DATA(cmsg), sizeof(in_pktinfo)); + EXPECT_EQ(received_pktinfo.ipi_ifindex, ifr.ifr_ifindex); + EXPECT_EQ(received_pktinfo.ipi_spec_dst.s_addr, htonl(INADDR_LOOPBACK)); + EXPECT_EQ(received_pktinfo.ipi_addr.s_addr, htonl(INADDR_LOOPBACK)); +} } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc index a2f6ef8cc..9f8de6b48 100644 --- a/test/syscalls/linux/udp_socket_test_cases.cc +++ b/test/syscalls/linux/udp_socket_test_cases.cc @@ -1495,6 +1495,5 @@ TEST_P(UdpSocketTest, SendAndReceiveTOS) { memcpy(&received_tos, CMSG_DATA(cmsg), sizeof(received_tos)); EXPECT_EQ(received_tos, sent_tos); } - } // namespace testing } // namespace gvisor -- cgit v1.2.3 From 6ef63cd7da107d487fda7c48af50fa9802913cd9 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Wed, 12 Feb 2020 16:19:06 -0800 Subject: We can now create and jump in iptables. For example: $ iptables -N foochain $ iptables -A INPUT -j foochain --- pkg/abi/linux/netfilter.go | 9 +- pkg/sentry/socket/netfilter/BUILD | 1 + pkg/sentry/socket/netfilter/netfilter.go | 62 +++++++-- pkg/sentry/socket/netfilter/targets.go | 35 +++++ pkg/tcpip/iptables/iptables.go | 103 +++++++++------ pkg/tcpip/iptables/targets.go | 20 ++- pkg/tcpip/iptables/types.go | 21 +-- test/iptables/filter_input.go | 217 ++++++++++++++++++++++++++++--- test/iptables/iptables_test.go | 36 +++++ test/iptables/iptables_util.go | 10 ++ 10 files changed, 420 insertions(+), 94 deletions(-) create mode 100644 pkg/sentry/socket/netfilter/targets.go (limited to 'test') diff --git a/pkg/abi/linux/netfilter.go b/pkg/abi/linux/netfilter.go index bbc4df74c..bd2e13ba1 100644 --- a/pkg/abi/linux/netfilter.go +++ b/pkg/abi/linux/netfilter.go @@ -225,11 +225,14 @@ type XTEntryTarget struct { // SizeOfXTEntryTarget is the size of an XTEntryTarget. const SizeOfXTEntryTarget = 32 -// XTStandardTarget is a builtin target, one of ACCEPT, DROP, JUMP, QUEUE, or -// RETURN. It corresponds to struct xt_standard_target in +// XTStandardTarget is a built-in target, one of ACCEPT, DROP, JUMP, QUEUE, +// RETURN, or jump. It corresponds to struct xt_standard_target in // include/uapi/linux/netfilter/x_tables.h. type XTStandardTarget struct { - Target XTEntryTarget + Target XTEntryTarget + // A positive verdict indicates a jump, and is the offset from the + // start of the table to jump to. A negative value means one of the + // other built-in targets. Verdict int32 _ [4]byte } diff --git a/pkg/sentry/socket/netfilter/BUILD b/pkg/sentry/socket/netfilter/BUILD index c91ec7494..7cd2ce55b 100644 --- a/pkg/sentry/socket/netfilter/BUILD +++ b/pkg/sentry/socket/netfilter/BUILD @@ -7,6 +7,7 @@ go_library( srcs = [ "extensions.go", "netfilter.go", + "targets.go", "tcp_matcher.go", "udp_matcher.go", ], diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index 3fc80e0de..d322e4144 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -240,13 +240,15 @@ func marshalTarget(target iptables.Target) []byte { return marshalErrorTarget(tg.Name) case iptables.ReturnTarget: return marshalStandardTarget(iptables.RuleReturn) + case JumpTarget: + return marshalJumpTarget(tg) default: panic(fmt.Errorf("unknown target of type %T", target)) } } func marshalStandardTarget(verdict iptables.RuleVerdict) []byte { - nflog("convert to binary: marshalling standard target with size %d", linux.SizeOfXTStandardTarget) + nflog("convert to binary: marshalling standard target") // The target's name will be the empty string. target := linux.XTStandardTarget{ @@ -274,6 +276,23 @@ func marshalErrorTarget(errorName string) []byte { return binary.Marshal(ret, usermem.ByteOrder, target) } +func marshalJumpTarget(jt JumpTarget) []byte { + nflog("convert to binary: marshalling jump target") + + // The target's name will be the empty string. + target := linux.XTStandardTarget{ + Target: linux.XTEntryTarget{ + TargetSize: linux.SizeOfXTStandardTarget, + }, + // Verdict is overloaded by the ABI. When positive, it holds + // the jump offset from the start of the table. + Verdict: int32(jt.Offset), + } + + ret := make([]byte, 0, linux.SizeOfXTStandardTarget) + return binary.Marshal(ret, usermem.ByteOrder, target) +} + // translateFromStandardVerdict translates verdicts the same way as the iptables // tool. func translateFromStandardVerdict(verdict iptables.RuleVerdict) int32 { @@ -335,7 +354,8 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { // Convert input into a list of rules and their offsets. var offset uint32 - var offsets []uint32 + // offsets maps rule byte offsets to their position in table.Rules. + offsets := map[uint32]int{} for entryIdx := uint32(0); entryIdx < replace.NumEntries; entryIdx++ { nflog("set entries: processing entry at offset %d", offset) @@ -396,11 +416,12 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { Target: target, Matchers: matchers, }) - offsets = append(offsets, offset) + offsets[offset] = int(entryIdx) offset += uint32(entry.NextOffset) if initialOptValLen-len(optVal) != int(entry.NextOffset) { nflog("entry NextOffset is %d, but entry took up %d bytes", entry.NextOffset, initialOptValLen-len(optVal)) + return syserr.ErrInvalidArgument } } @@ -409,13 +430,13 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { for hook, _ := range replace.HookEntry { if table.ValidHooks()&(1<= 0 indicates a jump. + return JumpTarget{Offset: uint32(standardTarget.Verdict)}, nil + } case errorTargetName: // Error target. diff --git a/pkg/sentry/socket/netfilter/targets.go b/pkg/sentry/socket/netfilter/targets.go new file mode 100644 index 000000000..c421b87cf --- /dev/null +++ b/pkg/sentry/socket/netfilter/targets.go @@ -0,0 +1,35 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package netfilter + +import ( + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/iptables" +) + +// JumpTarget implements iptables.Target. +type JumpTarget struct { + // Offset is the byte offset of the rule to jump to. It is used for + // marshaling and unmarshaling. + Offset uint32 + + // RuleNum is the rule to jump to. + RuleNum int +} + +// Action implements iptables.Target.Action. +func (jt JumpTarget) Action(tcpip.PacketBuffer) (iptables.RuleVerdict, int) { + return iptables.RuleJump, jt.RuleNum +} diff --git a/pkg/tcpip/iptables/iptables.go b/pkg/tcpip/iptables/iptables.go index 75a433a3b..dbaccbb36 100644 --- a/pkg/tcpip/iptables/iptables.go +++ b/pkg/tcpip/iptables/iptables.go @@ -135,25 +135,53 @@ func EmptyFilterTable() Table { } } +// A chainVerdict is what a table decides should be done with a packet. +type chainVerdict int + +const ( + // chainAccept indicates the packet should continue through netstack. + chainAccept chainVerdict = iota + + // chainAccept indicates the packet should be dropped. + chainDrop + + // chainReturn indicates the packet should return to the calling chain + // or the underflow rule of a builtin chain. + chainReturn +) + // Check runs pkt through the rules for hook. It returns true when the packet // should continue traversing the network stack and false when it should be // dropped. // // Precondition: pkt.NetworkHeader is set. func (it *IPTables) Check(hook Hook, pkt tcpip.PacketBuffer) bool { - // TODO(gvisor.dev/issue/170): A lot of this is uncomplicated because - // we're missing features. Jumps, the call stack, etc. aren't checked - // for yet because we're yet to support them. - // Go through each table containing the hook. for _, tablename := range it.Priorities[hook] { - switch verdict := it.checkTable(hook, pkt, tablename); verdict { + table := it.Tables[tablename] + ruleIdx := table.BuiltinChains[hook] + switch verdict := it.checkChain(hook, pkt, table, ruleIdx); verdict { // If the table returns Accept, move on to the next table. - case TableAccept: + case chainAccept: continue // The Drop verdict is final. - case TableDrop: + case chainDrop: return false + case chainReturn: + // Any Return from a built-in chain means we have to + // call the underflow. + underflow := table.Rules[table.Underflows[hook]] + switch v, _ := underflow.Target.Action(pkt); v { + case RuleAccept: + continue + case RuleDrop: + return false + case RuleJump, RuleReturn: + panic("Underflows should only return RuleAccept or RuleDrop.") + default: + panic(fmt.Sprintf("Unknown verdict: %d", v)) + } + default: panic(fmt.Sprintf("Unknown verdict %v.", verdict)) } @@ -164,37 +192,37 @@ func (it *IPTables) Check(hook Hook, pkt tcpip.PacketBuffer) bool { } // Precondition: pkt.NetworkHeader is set. -func (it *IPTables) checkTable(hook Hook, pkt tcpip.PacketBuffer, tablename string) TableVerdict { +func (it *IPTables) checkChain(hook Hook, pkt tcpip.PacketBuffer, table Table, ruleIdx int) chainVerdict { // Start from ruleIdx and walk the list of rules until a rule gives us // a verdict. - table := it.Tables[tablename] - for ruleIdx := table.BuiltinChains[hook]; ruleIdx < len(table.Rules); ruleIdx++ { - switch verdict := it.checkRule(hook, pkt, table, ruleIdx); verdict { + for ruleIdx < len(table.Rules) { + switch verdict, jumpTo := it.checkRule(hook, pkt, table, ruleIdx); verdict { case RuleAccept: - return TableAccept + return chainAccept case RuleDrop: - return TableDrop - - case RuleContinue: - continue + return chainDrop case RuleReturn: - // TODO(gvisor.dev/issue/170): We don't implement jump - // yet, so any Return is from a built-in chain. That - // means we have to to call the underflow. - underflow := table.Rules[table.Underflows[hook]] - // Underflow is guaranteed to be an unconditional - // ACCEPT or DROP. - switch v, _ := underflow.Target.Action(pkt); v { - case RuleAccept: - return TableAccept - case RuleDrop: - return TableDrop - case RuleContinue, RuleReturn: - panic("Underflows should only return RuleAccept or RuleDrop.") + return chainReturn + + case RuleJump: + // "Jumping" to the next rule just means we're + // continuing on down the list. + if jumpTo == ruleIdx+1 { + ruleIdx++ + continue + } + switch verdict := it.checkChain(hook, pkt, table, jumpTo); verdict { + case chainAccept: + return chainAccept + case chainDrop: + return chainDrop + case chainReturn: + ruleIdx++ + continue default: - panic(fmt.Sprintf("Unknown verdict: %d", v)) + panic(fmt.Sprintf("Unknown verdict: %d", verdict)) } default: @@ -205,17 +233,18 @@ func (it *IPTables) checkTable(hook Hook, pkt tcpip.PacketBuffer, tablename stri // We got through the entire table without a decision. Default to DROP // for safety. - return TableDrop + return chainDrop } // Precondition: pk.NetworkHeader is set. -func (it *IPTables) checkRule(hook Hook, pkt tcpip.PacketBuffer, table Table, ruleIdx int) RuleVerdict { +func (it *IPTables) checkRule(hook Hook, pkt tcpip.PacketBuffer, table Table, ruleIdx int) (RuleVerdict, int) { rule := table.Rules[ruleIdx] // First check whether the packet matches the IP header filter. // TODO(gvisor.dev/issue/170): Support other fields of the filter. if rule.Filter.Protocol != 0 && rule.Filter.Protocol != header.IPv4(pkt.NetworkHeader).TransportProtocol() { - return RuleContinue + // Continue on to the next rule. + return RuleJump, ruleIdx + 1 } // Go through each rule matcher. If they all match, run @@ -223,14 +252,14 @@ func (it *IPTables) checkRule(hook Hook, pkt tcpip.PacketBuffer, table Table, ru for _, matcher := range rule.Matchers { matches, hotdrop := matcher.Match(hook, pkt, "") if hotdrop { - return RuleDrop + return RuleDrop, 0 } if !matches { - return RuleContinue + // Continue on to the next rule. + return RuleJump, ruleIdx + 1 } } // All the matchers matched, so run the target. - verdict, _ := rule.Target.Action(pkt) - return verdict + return rule.Target.Action(pkt) } diff --git a/pkg/tcpip/iptables/targets.go b/pkg/tcpip/iptables/targets.go index 9fc60cfad..81a2e39a2 100644 --- a/pkg/tcpip/iptables/targets.go +++ b/pkg/tcpip/iptables/targets.go @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -// This file contains various Targets. - package iptables import ( @@ -25,16 +23,16 @@ import ( type AcceptTarget struct{} // Action implements Target.Action. -func (AcceptTarget) Action(packet tcpip.PacketBuffer) (RuleVerdict, string) { - return RuleAccept, "" +func (AcceptTarget) Action(packet tcpip.PacketBuffer) (RuleVerdict, int) { + return RuleAccept, 0 } // DropTarget drops packets. type DropTarget struct{} // Action implements Target.Action. -func (DropTarget) Action(packet tcpip.PacketBuffer) (RuleVerdict, string) { - return RuleDrop, "" +func (DropTarget) Action(packet tcpip.PacketBuffer) (RuleVerdict, int) { + return RuleDrop, 0 } // ErrorTarget logs an error and drops the packet. It represents a target that @@ -42,9 +40,9 @@ func (DropTarget) Action(packet tcpip.PacketBuffer) (RuleVerdict, string) { type ErrorTarget struct{} // Action implements Target.Action. -func (ErrorTarget) Action(packet tcpip.PacketBuffer) (RuleVerdict, string) { +func (ErrorTarget) Action(packet tcpip.PacketBuffer) (RuleVerdict, int) { log.Debugf("ErrorTarget triggered.") - return RuleDrop, "" + return RuleDrop, 0 } // UserChainTarget marks a rule as the beginning of a user chain. @@ -53,7 +51,7 @@ type UserChainTarget struct { } // Action implements Target.Action. -func (UserChainTarget) Action(tcpip.PacketBuffer) (RuleVerdict, string) { +func (UserChainTarget) Action(tcpip.PacketBuffer) (RuleVerdict, int) { panic("UserChainTarget should never be called.") } @@ -62,6 +60,6 @@ func (UserChainTarget) Action(tcpip.PacketBuffer) (RuleVerdict, string) { type ReturnTarget struct{} // Action implements Target.Action. -func (ReturnTarget) Action(tcpip.PacketBuffer) (RuleVerdict, string) { - return RuleReturn, "" +func (ReturnTarget) Action(tcpip.PacketBuffer) (RuleVerdict, int) { + return RuleReturn, 0 } diff --git a/pkg/tcpip/iptables/types.go b/pkg/tcpip/iptables/types.go index 5735d001b..7d032fd23 100644 --- a/pkg/tcpip/iptables/types.go +++ b/pkg/tcpip/iptables/types.go @@ -56,17 +56,6 @@ const ( NumHooks ) -// A TableVerdict is what a table decides should be done with a packet. -type TableVerdict int - -const ( - // TableAccept indicates the packet should continue through netstack. - TableAccept TableVerdict = iota - - // TableAccept indicates the packet should be dropped. - TableDrop -) - // A RuleVerdict is what a rule decides should be done with a packet. type RuleVerdict int @@ -74,12 +63,12 @@ const ( // RuleAccept indicates the packet should continue through netstack. RuleAccept RuleVerdict = iota - // RuleContinue indicates the packet should continue to the next rule. - RuleContinue - // RuleDrop indicates the packet should be dropped. RuleDrop + // RuleJump indicates the packet should jump to another chain. + RuleJump + // RuleReturn indicates the packet should return to the previous chain. RuleReturn ) @@ -174,6 +163,6 @@ type Matcher interface { type Target interface { // Action takes an action on the packet and returns a verdict on how // traversal should (or should not) continue. If the return value is - // Jump, it also returns the name of the chain to jump to. - Action(packet tcpip.PacketBuffer) (RuleVerdict, string) + // Jump, it also returns the index of the rule to jump to. + Action(packet tcpip.PacketBuffer) (RuleVerdict, int) } diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index e26d6a7d2..706c09cea 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -26,6 +26,7 @@ const ( acceptPort = 2402 sendloopDuration = 2 * time.Second network = "udp4" + chainName = "foochain" ) func init() { @@ -40,6 +41,12 @@ func init() { RegisterTestCase(FilterInputDefaultPolicyAccept{}) RegisterTestCase(FilterInputDefaultPolicyDrop{}) RegisterTestCase(FilterInputReturnUnderflow{}) + RegisterTestCase(FilterInputSerializeJump{}) + RegisterTestCase(FilterInputJumpBasic{}) + RegisterTestCase(FilterInputJumpReturn{}) + RegisterTestCase(FilterInputJumpReturnDrop{}) + RegisterTestCase(FilterInputJumpBuiltin{}) + RegisterTestCase(FilterInputJumpTwice{}) } // FilterInputDropUDP tests that we can drop UDP traffic. @@ -267,13 +274,12 @@ func (FilterInputMultiUDPRules) Name() string { // ContainerAction implements TestCase.ContainerAction. func (FilterInputMultiUDPRules) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { - return err - } - if err := filterTable("-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", acceptPort), "-j", "ACCEPT"); err != nil { - return err + rules := [][]string{ + {"-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", dropPort), "-j", "DROP"}, + {"-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", acceptPort), "-j", "ACCEPT"}, + {"-L"}, } - return filterTable("-L") + return filterTableRules(rules) } // LocalAction implements TestCase.LocalAction. @@ -314,14 +320,13 @@ func (FilterInputCreateUserChain) Name() string { // ContainerAction implements TestCase.ContainerAction. func (FilterInputCreateUserChain) ContainerAction(ip net.IP) error { - // Create a chain. - const chainName = "foochain" - if err := filterTable("-N", chainName); err != nil { - return err + rules := [][]string{ + // Create a chain. + {"-N", chainName}, + // Add a simple rule to the chain. + {"-A", chainName, "-j", "DROP"}, } - - // Add a simple rule to the chain. - return filterTable("-A", chainName, "-j", "DROP") + return filterTableRules(rules) } // LocalAction implements TestCase.LocalAction. @@ -396,13 +401,12 @@ func (FilterInputReturnUnderflow) Name() string { func (FilterInputReturnUnderflow) ContainerAction(ip net.IP) error { // Add a RETURN rule followed by an unconditional accept, and set the // default policy to DROP. - if err := filterTable("-A", "INPUT", "-j", "RETURN"); err != nil { - return err + rules := [][]string{ + {"-A", "INPUT", "-j", "RETURN"}, + {"-A", "INPUT", "-j", "DROP"}, + {"-P", "INPUT", "ACCEPT"}, } - if err := filterTable("-A", "INPUT", "-j", "DROP"); err != nil { - return err - } - if err := filterTable("-P", "INPUT", "ACCEPT"); err != nil { + if err := filterTableRules(rules); err != nil { return err } @@ -415,3 +419,178 @@ func (FilterInputReturnUnderflow) ContainerAction(ip net.IP) error { func (FilterInputReturnUnderflow) LocalAction(ip net.IP) error { return sendUDPLoop(ip, acceptPort, sendloopDuration) } + +// Verify that we can serialize jumps. +type FilterInputSerializeJump struct{} + +// Name implements TestCase.Name. +func (FilterInputSerializeJump) Name() string { + return "FilterInputSerializeJump" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterInputSerializeJump) ContainerAction(ip net.IP) error { + // Write a JUMP rule, the serialize it with `-L`. + rules := [][]string{ + {"-N", chainName}, + {"-A", "INPUT", "-j", chainName}, + {"-L"}, + } + return filterTableRules(rules) +} + +// LocalAction implements TestCase.LocalAction. +func (FilterInputSerializeJump) LocalAction(ip net.IP) error { + // No-op. + return nil +} + +// Jump to a chain and execute a rule there. +type FilterInputJumpBasic struct{} + +// Name implements TestCase.Name. +func (FilterInputJumpBasic) Name() string { + return "FilterInputJumpBasic" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterInputJumpBasic) ContainerAction(ip net.IP) error { + rules := [][]string{ + {"-P", "INPUT", "DROP"}, + {"-N", chainName}, + {"-A", "INPUT", "-j", chainName}, + {"-A", chainName, "-j", "ACCEPT"}, + } + if err := filterTableRules(rules); err != nil { + return err + } + + // Listen for UDP packets on acceptPort. + return listenUDP(acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (FilterInputJumpBasic) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} + +// Jump, return, and execute a rule. +type FilterInputJumpReturn struct{} + +// Name implements TestCase.Name. +func (FilterInputJumpReturn) Name() string { + return "FilterInputJumpReturn" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterInputJumpReturn) ContainerAction(ip net.IP) error { + rules := [][]string{ + {"-N", chainName}, + {"-P", "INPUT", "ACCEPT"}, + {"-A", "INPUT", "-j", chainName}, + {"-A", chainName, "-j", "RETURN"}, + {"-A", chainName, "-j", "DROP"}, + } + if err := filterTableRules(rules); err != nil { + return err + } + + // Listen for UDP packets on acceptPort. + return listenUDP(acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (FilterInputJumpReturn) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} + +type FilterInputJumpReturnDrop struct{} + +// Name implements TestCase.Name. +func (FilterInputJumpReturnDrop) Name() string { + return "FilterInputJumpReturnDrop" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterInputJumpReturnDrop) ContainerAction(ip net.IP) error { + rules := [][]string{ + {"-N", chainName}, + {"-A", "INPUT", "-j", chainName}, + {"-A", "INPUT", "-j", "DROP"}, + {"-A", chainName, "-j", "RETURN"}, + } + if err := filterTableRules(rules); err != nil { + return err + } + + // Listen for UDP packets on dropPort. + if err := listenUDP(dropPort, sendloopDuration); err == nil { + return fmt.Errorf("packets on port %d should have been dropped, but got a packet", dropPort) + } else if netErr, ok := err.(net.Error); !ok || !netErr.Timeout() { + return fmt.Errorf("error reading: %v", err) + } + + // At this point we know that reading timed out and never received a + // packet. + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (FilterInputJumpReturnDrop) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, dropPort, sendloopDuration) +} + +// Jumping to a top-levl chain is illegal. +type FilterInputJumpBuiltin struct{} + +// Name implements TestCase.Name. +func (FilterInputJumpBuiltin) Name() string { + return "FilterInputJumpBuiltin" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterInputJumpBuiltin) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "INPUT", "-j", "OUTPUT"); err == nil { + return fmt.Errorf("iptables should be unable to jump to a built-in chain") + } + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (FilterInputJumpBuiltin) LocalAction(ip net.IP) error { + // No-op. + return nil +} + +// Jump twice, then return twice and execute a rule. +type FilterInputJumpTwice struct{} + +// Name implements TestCase.Name. +func (FilterInputJumpTwice) Name() string { + return "FilterInputJumpTwice" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterInputJumpTwice) ContainerAction(ip net.IP) error { + const chainName2 = chainName + "2" + rules := [][]string{ + {"-P", "INPUT", "DROP"}, + {"-N", chainName}, + {"-N", chainName2}, + {"-A", "INPUT", "-j", chainName}, + {"-A", chainName, "-j", chainName2}, + {"-A", "INPUT", "-j", "ACCEPT"}, + } + if err := filterTableRules(rules); err != nil { + return err + } + + // UDP packets should jump and return twice, eventually hitting the + // ACCEPT rule. + return listenUDP(acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (FilterInputJumpTwice) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 46a7c99b0..0621861eb 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -249,3 +249,39 @@ func TestFilterOutputDropTCPSrcPort(t *testing.T) { t.Fatal(err) } } + +func TestJumpSerialize(t *testing.T) { + if err := singleTest(FilterInputSerializeJump{}); err != nil { + t.Fatal(err) + } +} + +func TestJumpBasic(t *testing.T) { + if err := singleTest(FilterInputJumpBasic{}); err != nil { + t.Fatal(err) + } +} + +func TestJumpReturn(t *testing.T) { + if err := singleTest(FilterInputJumpReturn{}); err != nil { + t.Fatal(err) + } +} + +func TestJumpReturnDrop(t *testing.T) { + if err := singleTest(FilterInputJumpReturnDrop{}); err != nil { + t.Fatal(err) + } +} + +func TestJumpBuiltin(t *testing.T) { + if err := singleTest(FilterInputJumpBuiltin{}); err != nil { + t.Fatal(err) + } +} + +func TestJumpTwice(t *testing.T) { + if err := singleTest(FilterInputJumpTwice{}); err != nil { + t.Fatal(err) + } +} diff --git a/test/iptables/iptables_util.go b/test/iptables/iptables_util.go index 043114c78..293c4e6ed 100644 --- a/test/iptables/iptables_util.go +++ b/test/iptables/iptables_util.go @@ -35,6 +35,16 @@ func filterTable(args ...string) error { return nil } +// filterTableRules is like filterTable, but runs multiple iptables commands. +func filterTableRules(argsList [][]string) error { + for _, args := range argsList { + if err := filterTable(args...); err != nil { + return err + } + } + return nil +} + // listenUDP listens on a UDP port and returns the value of net.Conn.Read() for // the first read on that port. func listenUDP(port int, timeout time.Duration) error { -- cgit v1.2.3 From b30b7f3422202232ad1c385a7ac0d775151fee2f Mon Sep 17 00:00:00 2001 From: Nayana Bidari Date: Tue, 18 Feb 2020 11:30:42 -0800 Subject: Add nat table support for iptables. Add nat table support for Prerouting hook with Redirect option. Add tests to check redirect of ports. --- pkg/abi/linux/netfilter.go | 27 ++++++++++++++ pkg/sentry/socket/netfilter/netfilter.go | 58 +++++++++++++++++++++++++++-- pkg/tcpip/iptables/iptables.go | 21 +++++++++++ pkg/tcpip/iptables/targets.go | 24 ++++++++++++ pkg/tcpip/stack/nic.go | 23 ++++++++++++ test/iptables/iptables_test.go | 12 ++++++ test/iptables/iptables_util.go | 10 +++++ test/iptables/nat.go | 64 +++++++++++++++++++++++++++++++- 8 files changed, 234 insertions(+), 5 deletions(-) (limited to 'test') diff --git a/pkg/abi/linux/netfilter.go b/pkg/abi/linux/netfilter.go index bbc4df74c..ba4d84962 100644 --- a/pkg/abi/linux/netfilter.go +++ b/pkg/abi/linux/netfilter.go @@ -250,6 +250,33 @@ type XTErrorTarget struct { // SizeOfXTErrorTarget is the size of an XTErrorTarget. const SizeOfXTErrorTarget = 64 +// NfNATIPV4Range. It corresponds to struct nf_nat_ipv4_range +// in include/uapi/linux/netfilter/nf_nat.h. +type NfNATIPV4Range struct { + Flags uint32 + MinIP [4]byte + MaxIP [4]byte + MinPort uint16 + MaxPort uint16 +} + +// NfNATIPV4MultiRangeCompat. It corresponds to struct +// nf_nat_ipv4_multi_range_compat in include/uapi/linux/netfilter/nf_nat.h. +type NfNATIPV4MultiRangeCompat struct { + Rangesize uint32 + RangeIPV4 [1]NfNATIPV4Range +} + +// XTRedirectTarget triggers a redirect when reached. +type XTRedirectTarget struct { + Target XTEntryTarget + NfRange NfNATIPV4MultiRangeCompat + _ [4]byte +} + +// SizeOfXTRedirectTarget is the size of an XTRedirectTarget. +const SizeOfXTRedirectTarget = 56 + // IPTGetinfo is the argument for the IPT_SO_GET_INFO sockopt. It corresponds // to struct ipt_getinfo in include/uapi/linux/netfilter_ipv4/ip_tables.h. type IPTGetinfo struct { diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index 3fc80e0de..512ad624a 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -35,6 +35,11 @@ import ( // shouldn't be reached - an error has occurred if we fall through to one. const errorTargetName = "ERROR" +// redirectTargetName is used to mark targets as redirect targets. Redirect +// targets should be reached for only NAT and Mangle tables. These targets will +// change the destination port/destination IP for packets. +const redirectTargetName = "REDIRECT" + // Metadata is used to verify that we are correctly serializing and // deserializing iptables into structs consumable by the iptables tool. We save // a metadata struct when the tables are written, and when they are read out we @@ -240,6 +245,8 @@ func marshalTarget(target iptables.Target) []byte { return marshalErrorTarget(tg.Name) case iptables.ReturnTarget: return marshalStandardTarget(iptables.RuleReturn) + case iptables.RedirectTarget: + return marshalRedirectTarget() default: panic(fmt.Errorf("unknown target of type %T", target)) } @@ -274,6 +281,18 @@ func marshalErrorTarget(errorName string) []byte { return binary.Marshal(ret, usermem.ByteOrder, target) } +func marshalRedirectTarget() []byte { + // This is a redirect target named redirect + target := linux.XTRedirectTarget{ + Target: linux.XTEntryTarget{ + TargetSize: linux.SizeOfXTRedirectTarget, + }, + } + + ret := make([]byte, 0, linux.SizeOfXTRedirectTarget) + return binary.Marshal(ret, usermem.ByteOrder, target) +} + // translateFromStandardVerdict translates verdicts the same way as the iptables // tool. func translateFromStandardVerdict(verdict iptables.RuleVerdict) int32 { @@ -326,6 +345,8 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { switch replace.Name.String() { case iptables.TablenameFilter: table = iptables.EmptyFilterTable() + case iptables.TablenameNat: + table = iptables.EmptyNatTable() default: nflog("we don't yet support writing to the %q table (gvisor.dev/issue/170)", replace.Name.String()) return syserr.ErrInvalidArgument @@ -455,10 +476,11 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { } // TODO(gvisor.dev/issue/170): Support other chains. - // Since we only support modifying the INPUT chain right now, make sure - // all other chains point to ACCEPT rules. + // Since we only support modifying the INPUT chain and redirect for + // PREROUTING chain right now, make sure all other chains point to + // ACCEPT rules. for hook, ruleIdx := range table.BuiltinChains { - if hook != iptables.Input { + if hook != iptables.Input && hook != iptables.Prerouting { if _, ok := table.Rules[ruleIdx].Target.(iptables.AcceptTarget); !ok { nflog("hook %d is unsupported.", hook) return syserr.ErrInvalidArgument @@ -575,6 +597,36 @@ func parseTarget(optVal []byte) (iptables.Target, error) { nflog("set entries: user-defined target %q", name) return iptables.UserChainTarget{Name: name}, nil } + + case redirectTargetName: + // Redirect target. + if len(optVal) < linux.SizeOfXTRedirectTarget { + return nil, fmt.Errorf("netfilter.SetEntries: optVal has insufficient size for redirect target %d", len(optVal)) + } + + var redirectTarget linux.XTRedirectTarget + buf = optVal[:linux.SizeOfXTRedirectTarget] + binary.Unmarshal(buf, usermem.ByteOrder, &redirectTarget) + + // Copy linux.XTRedirectTarget to iptables.RedirectTarget. + var target iptables.RedirectTarget + nfRange := redirectTarget.NfRange + + target.RangeSize = nfRange.Rangesize + target.Flags = nfRange.RangeIPV4[0].Flags + + target.MinIP = tcpip.Address(nfRange.RangeIPV4[0].MinIP[:]) + target.MaxIP = tcpip.Address(nfRange.RangeIPV4[0].MaxIP[:]) + + // Convert port from big endian to little endian. + port := make([]byte, 2) + binary.BigEndian.PutUint16(port, nfRange.RangeIPV4[0].MinPort) + target.MinPort = binary.LittleEndian.Uint16(port) + + binary.BigEndian.PutUint16(port, nfRange.RangeIPV4[0].MaxPort) + target.MaxPort = binary.LittleEndian.Uint16(port) + return target, nil + } // Unknown target. diff --git a/pkg/tcpip/iptables/iptables.go b/pkg/tcpip/iptables/iptables.go index 75a433a3b..c00d012c0 100644 --- a/pkg/tcpip/iptables/iptables.go +++ b/pkg/tcpip/iptables/iptables.go @@ -135,6 +135,27 @@ func EmptyFilterTable() Table { } } +// EmptyNatTable returns a Table with no rules and the filter table chains +// mapped to HookUnset. +func EmptyNatTable() Table { + return Table{ + Rules: []Rule{}, + BuiltinChains: map[Hook]int{ + Prerouting: HookUnset, + Input: HookUnset, + Output: HookUnset, + Postrouting: HookUnset, + }, + Underflows: map[Hook]int{ + Prerouting: HookUnset, + Input: HookUnset, + Output: HookUnset, + Postrouting: HookUnset, + }, + UserChains: map[string]int{}, + } +} + // Check runs pkt through the rules for hook. It returns true when the packet // should continue traversing the network stack and false when it should be // dropped. diff --git a/pkg/tcpip/iptables/targets.go b/pkg/tcpip/iptables/targets.go index 9fc60cfad..06e65bece 100644 --- a/pkg/tcpip/iptables/targets.go +++ b/pkg/tcpip/iptables/targets.go @@ -19,6 +19,7 @@ package iptables import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" ) // AcceptTarget accepts packets. @@ -65,3 +66,26 @@ type ReturnTarget struct{} func (ReturnTarget) Action(tcpip.PacketBuffer) (RuleVerdict, string) { return RuleReturn, "" } + +// RedirectTarget redirects the packet by modifying the destination port/IP. +type RedirectTarget struct { + RangeSize uint32 + Flags uint32 + MinIP tcpip.Address + MaxIP tcpip.Address + MinPort uint16 + MaxPort uint16 +} + +// Action implements Target.Action. +func (rt RedirectTarget) Action(packet tcpip.PacketBuffer) (RuleVerdict, string) { + log.Infof("RedirectTarget triggered.") + + // TODO(gvisor.dev/issue/170): Checking only for UDP protocol. + // We're yet to support for TCP protocol. + headerView := packet.Data.First() + h := header.UDP(headerView) + h.SetDestinationPort(rt.MinPort) + + return RuleAccept, "" +} diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index ca3a7a07e..2028f5201 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -25,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/iptables" ) // NIC represents a "network interface card" to which the networking stack is @@ -1012,6 +1013,7 @@ func (n *NIC) leaveGroupLocked(addr tcpip.Address) *tcpip.Error { func handlePacket(protocol tcpip.NetworkProtocolNumber, dst, src tcpip.Address, localLinkAddr, remotelinkAddr tcpip.LinkAddress, ref *referencedNetworkEndpoint, pkt tcpip.PacketBuffer) { r := makeRoute(protocol, dst, src, localLinkAddr, ref, false /* handleLocal */, false /* multicastLoop */) r.RemoteLinkAddress = remotelinkAddr + ref.ep.HandlePacket(&r, pkt) ref.decRef() } @@ -1082,6 +1084,27 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.Link n.stack.stats.IP.InvalidSourceAddressesReceived.Increment() return } + + // TODO(gvisor.dev/issue/170): Not supporting iptables for IPv6 yet. + if protocol == header.IPv4ProtocolNumber { + newPkt := pkt.Clone() + + headerView := newPkt.Data.First() + h := header.IPv4(headerView) + newPkt.NetworkHeader = headerView[:h.HeaderLength()] + + hlen := int(h.HeaderLength()) + tlen := int(h.TotalLength()) + newPkt.Data.TrimFront(hlen) + newPkt.Data.CapLength(tlen - hlen) + + ipt := n.stack.IPTables() + if ok := ipt.Check(iptables.Prerouting, newPkt); !ok { + // iptables is telling us to drop the packet. + return + } + } + if ref := n.getRef(protocol, dst); ref != nil { handlePacket(protocol, dst, src, linkEP.LinkAddress(), remote, ref, pkt) return diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 46a7c99b0..7d061acba 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -196,12 +196,24 @@ func TestNATRedirectUDPPort(t *testing.T) { } } +func TestNATRedirectTCPPort(t *testing.T) { + if err := singleTest(NATRedirectTCPPort{}); err != nil { + t.Fatal(err) + } +} + func TestNATDropUDP(t *testing.T) { if err := singleTest(NATDropUDP{}); err != nil { t.Fatal(err) } } +func TestNATAcceptAll(t *testing.T) { + if err := singleTest(NATAcceptAll{}); err != nil { + t.Fatal(err) + } +} + func TestFilterInputDropTCPDestPort(t *testing.T) { if err := singleTest(FilterInputDropTCPDestPort{}); err != nil { t.Fatal(err) diff --git a/test/iptables/iptables_util.go b/test/iptables/iptables_util.go index 043114c78..5c9199abf 100644 --- a/test/iptables/iptables_util.go +++ b/test/iptables/iptables_util.go @@ -35,6 +35,16 @@ func filterTable(args ...string) error { return nil } +// natTable calls `iptables -t nat` with the given args. +func natTable(args ...string) error { + args = append([]string{"-t", "nat"}, args...) + cmd := exec.Command(iptablesBinary, args...) + if out, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("error running iptables with args %v\nerror: %v\noutput: %s", args, err, string(out)) + } + return nil +} + // listenUDP listens on a UDP port and returns the value of net.Conn.Read() for // the first read on that port. func listenUDP(port int, timeout time.Duration) error { diff --git a/test/iptables/nat.go b/test/iptables/nat.go index b5c6f927e..306cbd1b3 100644 --- a/test/iptables/nat.go +++ b/test/iptables/nat.go @@ -25,7 +25,9 @@ const ( func init() { RegisterTestCase(NATRedirectUDPPort{}) + RegisterTestCase(NATRedirectTCPPort{}) RegisterTestCase(NATDropUDP{}) + RegisterTestCase(NATAcceptAll{}) } // NATRedirectUDPPort tests that packets are redirected to different port. @@ -38,13 +40,14 @@ func (NATRedirectUDPPort) Name() string { // ContainerAction implements TestCase.ContainerAction. func (NATRedirectUDPPort) ContainerAction(ip net.IP) error { - if err := filterTable("-t", "nat", "-A", "PREROUTING", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil { + if err := natTable("-A", "PREROUTING", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil { return err } if err := listenUDP(redirectPort, sendloopDuration); err != nil { return fmt.Errorf("packets on port %d should be allowed, but encountered an error: %v", redirectPort, err) } + return nil } @@ -53,6 +56,37 @@ func (NATRedirectUDPPort) LocalAction(ip net.IP) error { return sendUDPLoop(ip, acceptPort, sendloopDuration) } +// NATRedirectTCPPort tests that connections are redirected on specified ports. +type NATRedirectTCPPort struct{} + +// Name implements TestCase.Name. +func (NATRedirectTCPPort) Name() string { + return "NATRedirectTCPPort" +} + +// ContainerAction implements TestCase.ContainerAction. +func (NATRedirectTCPPort) ContainerAction(ip net.IP) error { + if err := natTable("-A", "PREROUTING", "-p", "tcp", "-m", "tcp", "--dport", fmt.Sprintf("%d", dropPort), "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil { + return err + } + + // Listen for TCP packets on redirect port. + if err := listenTCP(redirectPort, sendloopDuration); err != nil { + return fmt.Errorf("connection on port %d should be accepted, but got error %v", redirectPort, err) + } + + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (NATRedirectTCPPort) LocalAction(ip net.IP) error { + if err := connectTCP(ip, dropPort, acceptPort, sendloopDuration); err != nil { + return fmt.Errorf("connection destined to port %d should be accepted, but got error %v", dropPort, err) + } + + return nil +} + // NATDropUDP tests that packets are not received in ports other than redirect port. type NATDropUDP struct{} @@ -63,7 +97,7 @@ func (NATDropUDP) Name() string { // ContainerAction implements TestCase.ContainerAction. func (NATDropUDP) ContainerAction(ip net.IP) error { - if err := filterTable("-t", "nat", "-A", "PREROUTING", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil { + if err := natTable("-A", "PREROUTING", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil { return err } @@ -78,3 +112,29 @@ func (NATDropUDP) ContainerAction(ip net.IP) error { func (NATDropUDP) LocalAction(ip net.IP) error { return sendUDPLoop(ip, acceptPort, sendloopDuration) } + +// NATAcceptAll tests that all UDP packets are accepted. +type NATAcceptAll struct{} + +// Name implements TestCase.Name. +func (NATAcceptAll) Name() string { + return "NATAcceptAll" +} + +// ContainerAction implements TestCase.ContainerAction. +func (NATAcceptAll) ContainerAction(ip net.IP) error { + if err := natTable("-A", "PREROUTING", "-p", "udp", "-j", "ACCEPT"); err != nil { + return err + } + + if err := listenUDP(acceptPort, sendloopDuration); err != nil { + return fmt.Errorf("packets on port %d should be allowed, but encountered an error: %v", acceptPort, err) + } + + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (NATAcceptAll) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} -- cgit v1.2.3 From c841373013ec8659b2954563796479f275b00bfa Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Tue, 18 Feb 2020 12:00:58 -0800 Subject: Deflake fallocate syscall test. - Retry if fallocate returns EINTR. - If fallocate fails, don't try to fstat and confirm the result. PiperOrigin-RevId: 295789790 --- test/syscalls/linux/fallocate.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/fallocate.cc b/test/syscalls/linux/fallocate.cc index 1c3d00287..7819f4ac3 100644 --- a/test/syscalls/linux/fallocate.cc +++ b/test/syscalls/linux/fallocate.cc @@ -33,7 +33,7 @@ namespace testing { namespace { int fallocate(int fd, int mode, off_t offset, off_t len) { - return syscall(__NR_fallocate, fd, mode, offset, len); + return RetryEINTR(syscall)(__NR_fallocate, fd, mode, offset, len); } class AllocateTest : public FileTest { @@ -47,27 +47,27 @@ TEST_F(AllocateTest, Fallocate) { EXPECT_EQ(buf.st_size, 0); // Grow to ten bytes. - EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 0, 10), SyscallSucceeds()); + ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 0, 10), SyscallSucceeds()); ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); EXPECT_EQ(buf.st_size, 10); // Allocate to a smaller size should be noop. - EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 0, 5), SyscallSucceeds()); + ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 0, 5), SyscallSucceeds()); ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); EXPECT_EQ(buf.st_size, 10); // Grow again. - EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 0, 20), SyscallSucceeds()); + ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 0, 20), SyscallSucceeds()); ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); EXPECT_EQ(buf.st_size, 20); // Grow with offset. - EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 10, 20), SyscallSucceeds()); + ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 10, 20), SyscallSucceeds()); ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); EXPECT_EQ(buf.st_size, 30); // Grow with offset beyond EOF. - EXPECT_THAT(fallocate(test_file_fd_.get(), 0, 39, 1), SyscallSucceeds()); + ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 39, 1), SyscallSucceeds()); ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); EXPECT_EQ(buf.st_size, 40); } -- cgit v1.2.3 From 247843bbc51d459b279db24a262f68b4dac1cc01 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Tue, 18 Feb 2020 15:24:59 -0800 Subject: iptables: use "-t nat" for NAT tests PiperOrigin-RevId: 295835807 --- test/iptables/iptables_util.go | 11 ++++++++++- test/iptables/nat.go | 4 ++-- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/test/iptables/iptables_util.go b/test/iptables/iptables_util.go index 293c4e6ed..32cf5a417 100644 --- a/test/iptables/iptables_util.go +++ b/test/iptables/iptables_util.go @@ -27,7 +27,16 @@ const iptablesBinary = "iptables" // filterTable calls `iptables -t filter` with the given args. func filterTable(args ...string) error { - args = append([]string{"-t", "filter"}, args...) + return tableCmd("filter", args) +} + +// natTable calls `iptables -t nat` with the given args. +func natTable(args ...string) error { + return tableCmd("nat", args) +} + +func tableCmd(table string, args []string) error { + args = append([]string{"-t", table}, args...) cmd := exec.Command(iptablesBinary, args...) if out, err := cmd.CombinedOutput(); err != nil { return fmt.Errorf("error running iptables with args %v\nerror: %v\noutput: %s", args, err, string(out)) diff --git a/test/iptables/nat.go b/test/iptables/nat.go index b5c6f927e..a01117ec8 100644 --- a/test/iptables/nat.go +++ b/test/iptables/nat.go @@ -38,7 +38,7 @@ func (NATRedirectUDPPort) Name() string { // ContainerAction implements TestCase.ContainerAction. func (NATRedirectUDPPort) ContainerAction(ip net.IP) error { - if err := filterTable("-t", "nat", "-A", "PREROUTING", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil { + if err := natTable("-A", "PREROUTING", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil { return err } @@ -63,7 +63,7 @@ func (NATDropUDP) Name() string { // ContainerAction implements TestCase.ContainerAction. func (NATDropUDP) ContainerAction(ip net.IP) error { - if err := filterTable("-t", "nat", "-A", "PREROUTING", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil { + if err := natTable("-A", "PREROUTING", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil { return err } -- cgit v1.2.3 From 56fd9504aab44a738d3df164cbee8e572b309f28 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Tue, 18 Feb 2020 15:44:22 -0800 Subject: Enable IPV6_RECVTCLASS socket option for datagram sockets Added the ability to get/set the IP_RECVTCLASS socket option on UDP endpoints. If enabled, traffic class from the incoming Network Header passed as ancillary data in the ControlMessages. Adding Get/SetSockOptBool to decrease the overhead of getting/setting simple options. (This was absorbed in a CL that will be landing before this one). Test: * Added unit test to udp_test.go that tests getting/setting as well as verifying that we receive expected TOS from incoming packet. * Added a syscall test for verifying getting/setting * Removed test skip for existing syscall test to enable end to end test. PiperOrigin-RevId: 295840218 --- pkg/sentry/socket/control/control.go | 2 +- pkg/sentry/socket/netstack/netstack.go | 27 +++++- pkg/tcpip/checker/checker.go | 14 +++ pkg/tcpip/tcpip.go | 15 ++- pkg/tcpip/transport/udp/endpoint.go | 38 +++++++- pkg/tcpip/transport/udp/udp_test.go | 120 ++++++++++++++---------- test/syscalls/linux/ip_socket_test_util.h | 16 ++-- test/syscalls/linux/socket_ip_udp_generic.cc | 133 +++++++++++++++++++-------- test/syscalls/linux/udp_socket_test_cases.cc | 4 - 9 files changed, 260 insertions(+), 109 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/control/control.go b/pkg/sentry/socket/control/control.go index 4667373d2..8834a1e1a 100644 --- a/pkg/sentry/socket/control/control.go +++ b/pkg/sentry/socket/control/control.go @@ -329,7 +329,7 @@ func PackTOS(t *kernel.Task, tos uint8, buf []byte) []byte { } // PackTClass packs an IPV6_TCLASS socket control message. -func PackTClass(t *kernel.Task, tClass int32, buf []byte) []byte { +func PackTClass(t *kernel.Task, tClass uint32, buf []byte) []byte { return putCmsgStruct( buf, linux.SOL_IPV6, diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 9757fbfba..e187276c5 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -1318,6 +1318,22 @@ func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (interf } return ib, nil + case linux.IPV6_RECVTCLASS: + if outLen < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + + v, err := ep.GetSockOptBool(tcpip.ReceiveTClassOption) + if err != nil { + return nil, syserr.TranslateNetstackError(err) + } + + var o int32 + if v { + o = 1 + } + return o, nil + default: emitUnimplementedEventIPv6(t, name) } @@ -1803,6 +1819,14 @@ func setSockOptIPv6(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) } return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.IPv6TrafficClassOption(v))) + case linux.IPV6_RECVTCLASS: + v, err := parseIntOrChar(optVal) + if err != nil { + return err + } + + return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.ReceiveTClassOption, v != 0)) + default: emitUnimplementedEventIPv6(t, name) } @@ -2086,7 +2110,6 @@ func emitUnimplementedEventIPv6(t *kernel.Task, name int) { linux.IPV6_RECVPATHMTU, linux.IPV6_RECVPKTINFO, linux.IPV6_RECVRTHDR, - linux.IPV6_RECVTCLASS, linux.IPV6_RTHDR, linux.IPV6_RTHDRDSTOPTS, linux.IPV6_TCLASS, @@ -2424,6 +2447,8 @@ func (s *SocketOperations) controlMessages() socket.ControlMessages { Timestamp: s.readCM.Timestamp, HasTOS: s.readCM.HasTOS, TOS: s.readCM.TOS, + HasTClass: s.readCM.HasTClass, + TClass: s.readCM.TClass, HasIPPacketInfo: s.readCM.HasIPPacketInfo, PacketInfo: s.readCM.PacketInfo, }, diff --git a/pkg/tcpip/checker/checker.go b/pkg/tcpip/checker/checker.go index 4d6ae0871..c6c160dfc 100644 --- a/pkg/tcpip/checker/checker.go +++ b/pkg/tcpip/checker/checker.go @@ -161,6 +161,20 @@ func FragmentFlags(flags uint8) NetworkChecker { } } +// ReceiveTClass creates a checker that checks the TCLASS field in +// ControlMessages. +func ReceiveTClass(want uint32) ControlMessagesChecker { + return func(t *testing.T, cm tcpip.ControlMessages) { + t.Helper() + if !cm.HasTClass { + t.Fatalf("got cm.HasTClass = %t, want cm.TClass = %d", cm.HasTClass, want) + } + if got := cm.TClass; got != want { + t.Fatalf("got cm.TClass = %d, want %d", got, want) + } + } +} + // ReceiveTOS creates a checker that checks the TOS field in ControlMessages. func ReceiveTOS(want uint8) ControlMessagesChecker { return func(t *testing.T, cm tcpip.ControlMessages) { diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 9ca39ce40..ce5527391 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -323,11 +323,11 @@ type ControlMessages struct { // TOS is the IPv4 type of service of the associated packet. TOS uint8 - // HasTClass indicates whether Tclass is valid/set. + // HasTClass indicates whether TClass is valid/set. HasTClass bool - // Tclass is the IPv6 traffic class of the associated packet. - TClass int32 + // TClass is the IPv6 traffic class of the associated packet. + TClass uint32 // HasIPPacketInfo indicates whether PacketInfo is set. HasIPPacketInfo bool @@ -502,9 +502,13 @@ type WriteOptions struct { type SockOptBool int const ( + // ReceiveTClassOption is used by SetSockOpt/GetSockOpt to specify if the + // IPV6_TCLASS ancillary message is passed with incoming packets. + ReceiveTClassOption SockOptBool = iota + // ReceiveTOSOption is used by SetSockOpt/GetSockOpt to specify if the TOS // ancillary message is passed with incoming packets. - ReceiveTOSOption SockOptBool = iota + ReceiveTOSOption // V6OnlyOption is used by {G,S}etSockOptBool to specify whether an IPv6 // socket is to be restricted to sending and receiving IPv6 packets only. @@ -514,6 +518,9 @@ const ( // if more inforamtion is provided with incoming packets such // as interface index and address. ReceiveIPPacketInfoOption + + // TODO(b/146901447): convert existing bool socket options to be handled via + // Get/SetSockOptBool ) // SockOptInt represents socket options which values have the int type. diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 3fe91cac2..eff7f3600 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -32,7 +32,8 @@ type udpPacket struct { packetInfo tcpip.IPPacketInfo data buffer.VectorisedView `state:".(buffer.VectorisedView)"` timestamp int64 - tos uint8 + // tos stores either the receiveTOS or receiveTClass value. + tos uint8 } // EndpointState represents the state of a UDP endpoint. @@ -119,6 +120,10 @@ type endpoint struct { // as ancillary data to ControlMessages on Read. receiveTOS bool + // receiveTClass determines if the incoming IPv6 TClass header field is + // passed as ancillary data to ControlMessages on Read. + receiveTClass bool + // receiveIPPacketInfo determines if the packet info is returned by Read. receiveIPPacketInfo bool @@ -258,13 +263,18 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess } e.mu.RLock() receiveTOS := e.receiveTOS + receiveTClass := e.receiveTClass receiveIPPacketInfo := e.receiveIPPacketInfo e.mu.RUnlock() if receiveTOS { cm.HasTOS = true cm.TOS = p.tos } - + if receiveTClass { + cm.HasTClass = true + // Although TClass is an 8-bit value it's read in the CMsg as a uint32. + cm.TClass = uint32(p.tos) + } if receiveIPPacketInfo { cm.HasIPPacketInfo = true cm.PacketInfo = p.packetInfo @@ -490,6 +500,17 @@ func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error { e.mu.Unlock() return nil + case tcpip.ReceiveTClassOption: + // We only support this option on v6 endpoints. + if e.NetProto != header.IPv6ProtocolNumber { + return tcpip.ErrNotSupported + } + + e.mu.Lock() + e.receiveTClass = v + e.mu.Unlock() + return nil + case tcpip.V6OnlyOption: // We only recognize this option on v6 endpoints. if e.NetProto != header.IPv6ProtocolNumber { @@ -709,6 +730,17 @@ func (e *endpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) { e.mu.RUnlock() return v, nil + case tcpip.ReceiveTClassOption: + // We only support this option on v6 endpoints. + if e.NetProto != header.IPv6ProtocolNumber { + return false, tcpip.ErrNotSupported + } + + e.mu.RLock() + v := e.receiveTClass + e.mu.RUnlock() + return v, nil + case tcpip.V6OnlyOption: // We only recognize this option on v6 endpoints. if e.NetProto != header.IPv6ProtocolNumber { @@ -1273,6 +1305,8 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk packet.packetInfo.LocalAddr = r.LocalAddress packet.packetInfo.DestinationAddr = r.RemoteAddress packet.packetInfo.NIC = r.NICID() + case header.IPv6ProtocolNumber: + packet.tos, _ = header.IPv6(pkt.NetworkHeader).TOS() } packet.timestamp = e.stack.NowNanoseconds() diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go index f0ff3fe71..34b7c2360 100644 --- a/pkg/tcpip/transport/udp/udp_test.go +++ b/pkg/tcpip/transport/udp/udp_test.go @@ -409,6 +409,7 @@ func (c *testContext) injectV6Packet(payload []byte, h *header4Tuple, valid bool // Initialize the IP header. ip := header.IPv6(buf) ip.Encode(&header.IPv6Fields{ + TrafficClass: testTOS, PayloadLength: uint16(header.UDPMinimumSize + len(payload)), NextHeader: uint8(udp.ProtocolNumber), HopLimit: 65, @@ -1336,7 +1337,7 @@ func TestSetTTL(t *testing.T) { } } -func TestTOSV4(t *testing.T) { +func TestSetTOS(t *testing.T) { for _, flow := range []testFlow{unicastV4, multicastV4, broadcast} { t.Run(fmt.Sprintf("flow:%s", flow), func(t *testing.T) { c := newDualTestContext(t, defaultMTU) @@ -1347,23 +1348,23 @@ func TestTOSV4(t *testing.T) { const tos = testTOS var v tcpip.IPv4TOSOption if err := c.ep.GetSockOpt(&v); err != nil { - c.t.Errorf("GetSockopt failed: %s", err) + c.t.Errorf("GetSockopt(%T) failed: %s", v, err) } // Test for expected default value. if v != 0 { - c.t.Errorf("got GetSockOpt(...) = %#v, want = %#v", v, 0) + c.t.Errorf("got GetSockOpt(%T) = 0x%x, want = 0x%x", v, v, 0) } if err := c.ep.SetSockOpt(tcpip.IPv4TOSOption(tos)); err != nil { - c.t.Errorf("SetSockOpt(%#v) failed: %s", tcpip.IPv4TOSOption(tos), err) + c.t.Errorf("SetSockOpt(%T, 0x%x) failed: %s", v, tcpip.IPv4TOSOption(tos), err) } if err := c.ep.GetSockOpt(&v); err != nil { - c.t.Errorf("GetSockopt failed: %s", err) + c.t.Errorf("GetSockopt(%T) failed: %s", v, err) } if want := tcpip.IPv4TOSOption(tos); v != want { - c.t.Errorf("got GetSockOpt(...) = %#v, want = %#v", v, want) + c.t.Errorf("got GetSockOpt(%T) = 0x%x, want = 0x%x", v, v, want) } testWrite(c, flow, checker.TOS(tos, 0)) @@ -1371,7 +1372,7 @@ func TestTOSV4(t *testing.T) { } } -func TestTOSV6(t *testing.T) { +func TestSetTClass(t *testing.T) { for _, flow := range []testFlow{unicastV4in6, unicastV6, unicastV6Only, multicastV4in6, multicastV6, broadcastIn6} { t.Run(fmt.Sprintf("flow:%s", flow), func(t *testing.T) { c := newDualTestContext(t, defaultMTU) @@ -1379,71 +1380,92 @@ func TestTOSV6(t *testing.T) { c.createEndpointForFlow(flow) - const tos = testTOS + const tClass = testTOS var v tcpip.IPv6TrafficClassOption if err := c.ep.GetSockOpt(&v); err != nil { - c.t.Errorf("GetSockopt failed: %s", err) + c.t.Errorf("GetSockopt(%T) failed: %s", v, err) } // Test for expected default value. if v != 0 { - c.t.Errorf("got GetSockOpt(...) = %#v, want = %#v", v, 0) + c.t.Errorf("got GetSockOpt(%T) = 0x%x, want = 0x%x", v, v, 0) } - if err := c.ep.SetSockOpt(tcpip.IPv6TrafficClassOption(tos)); err != nil { - c.t.Errorf("SetSockOpt failed: %s", err) + if err := c.ep.SetSockOpt(tcpip.IPv6TrafficClassOption(tClass)); err != nil { + c.t.Errorf("SetSockOpt(%T, 0x%x) failed: %s", v, tcpip.IPv6TrafficClassOption(tClass), err) } if err := c.ep.GetSockOpt(&v); err != nil { - c.t.Errorf("GetSockopt failed: %s", err) + c.t.Errorf("GetSockopt(%T) failed: %s", v, err) } - if want := tcpip.IPv6TrafficClassOption(tos); v != want { - c.t.Errorf("got GetSockOpt(...) = %#v, want = %#v", v, want) + if want := tcpip.IPv6TrafficClassOption(tClass); v != want { + c.t.Errorf("got GetSockOpt(%T) = 0x%x, want = 0x%x", v, v, want) } - testWrite(c, flow, checker.TOS(tos, 0)) + // The header getter for TClass is called TOS, so use that checker. + testWrite(c, flow, checker.TOS(tClass, 0)) }) } } -func TestReceiveTOSV4(t *testing.T) { - for _, flow := range []testFlow{unicastV4, broadcast} { - t.Run(fmt.Sprintf("flow:%s", flow), func(t *testing.T) { - c := newDualTestContext(t, defaultMTU) - defer c.cleanup() +func TestReceiveTosTClass(t *testing.T) { + testCases := []struct { + name string + getReceiveOption tcpip.SockOptBool + tests []testFlow + }{ + {"ReceiveTosOption", tcpip.ReceiveTOSOption, []testFlow{unicastV4, broadcast}}, + {"ReceiveTClassOption", tcpip.ReceiveTClassOption, []testFlow{unicastV4in6, unicastV6, unicastV6Only, broadcastIn6}}, + } + for _, testCase := range testCases { + for _, flow := range testCase.tests { + t.Run(fmt.Sprintf("%s:flow:%s", testCase.name, flow), func(t *testing.T) { + c := newDualTestContext(t, defaultMTU) + defer c.cleanup() - c.createEndpointForFlow(flow) + c.createEndpointForFlow(flow) + option := testCase.getReceiveOption + name := testCase.name - // Verify that setting and reading the option works. - v, err := c.ep.GetSockOptBool(tcpip.ReceiveTOSOption) - if err != nil { - c.t.Fatal("GetSockOptBool(tcpip.ReceiveTOSOption) failed:", err) - } - // Test for expected default value. - if v != false { - c.t.Errorf("got GetSockOptBool(tcpip.ReceiveTOSOption) = %t, want = %t", v, false) - } + // Verify that setting and reading the option works. + v, err := c.ep.GetSockOptBool(option) + if err != nil { + c.t.Errorf("GetSockoptBool(%s) failed: %s", name, err) + } + // Test for expected default value. + if v != false { + c.t.Errorf("got GetSockOptBool(%s) = %t, want = %t", name, v, false) + } - want := true - if err := c.ep.SetSockOptBool(tcpip.ReceiveTOSOption, want); err != nil { - c.t.Fatalf("SetSockOptBool(tcpip.ReceiveTOSOption, %t) failed: %s", want, err) - } + want := true + if err := c.ep.SetSockOptBool(option, want); err != nil { + c.t.Fatalf("SetSockOptBool(%s, %t) failed: %s", name, want, err) + } - got, err := c.ep.GetSockOptBool(tcpip.ReceiveTOSOption) - if err != nil { - c.t.Fatal("GetSockOptBool(tcpip.ReceiveTOSOption) failed:", err) - } - if got != want { - c.t.Fatalf("got GetSockOptBool(tcpip.ReceiveTOSOption) = %t, want = %t", got, want) - } + got, err := c.ep.GetSockOptBool(option) + if err != nil { + c.t.Errorf("GetSockoptBool(%s) failed: %s", name, err) + } - // Verify that the correct received TOS is handed through as - // ancillary data to the ControlMessages struct. - if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil { - c.t.Fatal("Bind failed:", err) - } - testRead(c, flow, checker.ReceiveTOS(testTOS)) - }) + if got != want { + c.t.Errorf("got GetSockOptBool(%s) = %t, want = %t", name, got, want) + } + + // Verify that the correct received TOS or TClass is handed through as + // ancillary data to the ControlMessages struct. + if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil { + c.t.Fatalf("Bind failed: %s", err) + } + switch option { + case tcpip.ReceiveTClassOption: + testRead(c, flow, checker.ReceiveTClass(testTOS)) + case tcpip.ReceiveTOSOption: + testRead(c, flow, checker.ReceiveTOS(testTOS)) + default: + t.Fatalf("unknown test variant: %s", name) + } + }) + } } } diff --git a/test/syscalls/linux/ip_socket_test_util.h b/test/syscalls/linux/ip_socket_test_util.h index 083ebbcf0..39fd6709d 100644 --- a/test/syscalls/linux/ip_socket_test_util.h +++ b/test/syscalls/linux/ip_socket_test_util.h @@ -84,20 +84,20 @@ SocketPairKind DualStackUDPBidirectionalBindSocketPair(int type); // SocketPairs created with AF_INET and the given type. SocketPairKind IPv4UDPUnboundSocketPair(int type); -// IPv4UDPUnboundSocketPair returns a SocketKind that represents -// a SimpleSocket created with AF_INET, SOCK_DGRAM, and the given type. +// IPv4UDPUnboundSocket returns a SocketKind that represents a SimpleSocket +// created with AF_INET, SOCK_DGRAM, and the given type. SocketKind IPv4UDPUnboundSocket(int type); -// IPv6UDPUnboundSocketPair returns a SocketKind that represents -// a SimpleSocket created with AF_INET6, SOCK_DGRAM, and the given type. +// IPv6UDPUnboundSocket returns a SocketKind that represents a SimpleSocket +// created with AF_INET6, SOCK_DGRAM, and the given type. SocketKind IPv6UDPUnboundSocket(int type); -// IPv4TCPUnboundSocketPair returns a SocketKind that represents -// a SimpleSocket created with AF_INET, SOCK_STREAM and the given type. +// IPv4TCPUnboundSocket returns a SocketKind that represents a SimpleSocket +// created with AF_INET, SOCK_STREAM and the given type. SocketKind IPv4TCPUnboundSocket(int type); -// IPv6TCPUnboundSocketPair returns a SocketKind that represents -// a SimpleSocket created with AF_INET6, SOCK_STREAM and the given type. +// IPv6TCPUnboundSocket returns a SocketKind that represents a SimpleSocket +// created with AF_INET6, SOCK_STREAM and the given type. SocketKind IPv6TCPUnboundSocket(int type); // IfAddrHelper is a helper class that determines the local interfaces present diff --git a/test/syscalls/linux/socket_ip_udp_generic.cc b/test/syscalls/linux/socket_ip_udp_generic.cc index db5663ecd..1c533fdf2 100644 --- a/test/syscalls/linux/socket_ip_udp_generic.cc +++ b/test/syscalls/linux/socket_ip_udp_generic.cc @@ -14,6 +14,7 @@ #include "test/syscalls/linux/socket_ip_udp_generic.h" +#include #include #include #include @@ -209,46 +210,6 @@ TEST_P(UDPSocketPairTest, SetMulticastLoopChar) { EXPECT_EQ(get, kSockOptOn); } -// Ensure that Receiving TOS is off by default. -TEST_P(UDPSocketPairTest, RecvTosDefault) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); - - int get = -1; - socklen_t get_len = sizeof(get); - ASSERT_THAT( - getsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, &get, &get_len), - SyscallSucceedsWithValue(0)); - EXPECT_EQ(get_len, sizeof(get)); - EXPECT_EQ(get, kSockOptOff); -} - -// Test that setting and getting IP_RECVTOS works as expected. -TEST_P(UDPSocketPairTest, SetRecvTos) { - auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); - - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, - &kSockOptOff, sizeof(kSockOptOff)), - SyscallSucceeds()); - - int get = -1; - socklen_t get_len = sizeof(get); - ASSERT_THAT( - getsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, &get, &get_len), - SyscallSucceedsWithValue(0)); - EXPECT_EQ(get_len, sizeof(get)); - EXPECT_EQ(get, kSockOptOff); - - ASSERT_THAT(setsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, - &kSockOptOn, sizeof(kSockOptOn)), - SyscallSucceeds()); - - ASSERT_THAT( - getsockopt(sockets->first_fd(), IPPROTO_IP, IP_RECVTOS, &get, &get_len), - SyscallSucceedsWithValue(0)); - EXPECT_EQ(get_len, sizeof(get)); - EXPECT_EQ(get, kSockOptOn); -} - TEST_P(UDPSocketPairTest, ReuseAddrDefault) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); @@ -401,5 +362,97 @@ TEST_P(UDPSocketPairTest, SetAndGetIPPKTINFO) { EXPECT_EQ(get_len, sizeof(get)); } +// Holds TOS or TClass information for IPv4 or IPv6 respectively. +struct RecvTosOption { + int level; + int option; +}; + +RecvTosOption GetRecvTosOption(int domain) { + TEST_CHECK(domain == AF_INET || domain == AF_INET6); + RecvTosOption opt; + switch (domain) { + case AF_INET: + opt.level = IPPROTO_IP; + opt.option = IP_RECVTOS; + break; + case AF_INET6: + opt.level = IPPROTO_IPV6; + opt.option = IPV6_RECVTCLASS; + break; + } + return opt; +} + +// Ensure that Receiving TOS or TCLASS is off by default. +TEST_P(UDPSocketPairTest, RecvTosDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + RecvTosOption t = GetRecvTosOption(GetParam().domain); + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(sockets->first_fd(), t.level, t.option, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); +} + +// Test that setting and getting IP_RECVTOS or IPV6_RECVTCLASS works as +// expected. +TEST_P(UDPSocketPairTest, SetRecvTos) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + RecvTosOption t = GetRecvTosOption(GetParam().domain); + + ASSERT_THAT(setsockopt(sockets->first_fd(), t.level, t.option, &kSockOptOff, + sizeof(kSockOptOff)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(sockets->first_fd(), t.level, t.option, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOff); + + ASSERT_THAT(setsockopt(sockets->first_fd(), t.level, t.option, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + ASSERT_THAT( + getsockopt(sockets->first_fd(), t.level, t.option, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kSockOptOn); +} + +// Test that any socket (including IPv6 only) accepts the IPv4 TOS option: this +// mirrors behavior in linux. +TEST_P(UDPSocketPairTest, TOSRecvMismatch) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + RecvTosOption t = GetRecvTosOption(AF_INET); + int get = -1; + socklen_t get_len = sizeof(get); + + ASSERT_THAT( + getsockopt(sockets->first_fd(), t.level, t.option, &get, &get_len), + SyscallSucceedsWithValue(0)); +} + +// Test that an IPv4 socket does not support the IPv6 TClass option. +TEST_P(UDPSocketPairTest, TClassRecvMismatch) { + // This should only test AF_INET sockets for the mismatch behavior. + SKIP_IF(GetParam().domain != AF_INET); + + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IPV6, IPV6_RECVTCLASS, + &get, &get_len), + SyscallFailsWithErrno(EOPNOTSUPP)); +} + } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc index 9f8de6b48..57b1a357c 100644 --- a/test/syscalls/linux/udp_socket_test_cases.cc +++ b/test/syscalls/linux/udp_socket_test_cases.cc @@ -1349,9 +1349,6 @@ TEST_P(UdpSocketTest, TimestampIoctlPersistence) { // outgoing packets, and that a receiving socket with IP_RECVTOS or // IPV6_RECVTCLASS will create the corresponding control message. TEST_P(UdpSocketTest, SetAndReceiveTOS) { - // TODO(b/144868438): IPV6_RECVTCLASS not supported for netstack. - SKIP_IF((GetParam() != AddressFamily::kIpv4) && IsRunningOnGvisor() && - !IsRunningWithHostinet()); ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); @@ -1422,7 +1419,6 @@ TEST_P(UdpSocketTest, SetAndReceiveTOS) { // TOS byte on outgoing packets, and that a receiving socket with IP_RECVTOS or // IPV6_RECVTCLASS will create the corresponding control message. TEST_P(UdpSocketTest, SendAndReceiveTOS) { - // TODO(b/144868438): IPV6_RECVTCLASS not supported for netstack. // TODO(b/146661005): Setting TOS via cmsg not supported for netstack. SKIP_IF(IsRunningOnGvisor() && !IsRunningWithHostinet()); ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); -- cgit v1.2.3 From 55c99ce106e03c419729318947e0be477ed181d0 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Wed, 19 Feb 2020 12:31:43 -0800 Subject: Include more test files in exports_files So that they can be included by Fuchsia's syscall tests PiperOrigin-RevId: 296030383 --- test/syscalls/linux/BUILD | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index e7c82adfc..05a818795 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -12,8 +12,12 @@ exports_files( "socket_ip_loopback_blocking.cc", "socket_ip_tcp_generic_loopback.cc", "socket_ip_tcp_loopback.cc", + "socket_ip_tcp_loopback_blocking.cc", + "socket_ip_tcp_loopback_nonblock.cc", "socket_ip_tcp_udp_generic.cc", "socket_ip_udp_loopback.cc", + "socket_ip_udp_loopback_blocking.cc", + "socket_ip_udp_loopback_nonblock.cc", "socket_ip_unbound.cc", "socket_ipv4_tcp_unbound_external_networking_test.cc", "socket_ipv4_udp_unbound_external_networking_test.cc", -- cgit v1.2.3 From 30794512d3977ebb2b185e5e9cfb969d558a07a4 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Wed, 19 Feb 2020 18:20:52 -0800 Subject: Add basic microbenchmarks. PiperOrigin-RevId: 296104390 --- WORKSPACE | 10 + test/perf/BUILD | 114 +++++++ test/perf/linux/BUILD | 356 +++++++++++++++++++++ test/perf/linux/clock_getres_benchmark.cc | 39 +++ test/perf/linux/clock_gettime_benchmark.cc | 60 ++++ test/perf/linux/death_benchmark.cc | 36 +++ test/perf/linux/epoll_benchmark.cc | 99 ++++++ test/perf/linux/fork_benchmark.cc | 350 +++++++++++++++++++++ test/perf/linux/futex_benchmark.cc | 248 +++++++++++++++ test/perf/linux/getdents_benchmark.cc | 149 +++++++++ test/perf/linux/getpid_benchmark.cc | 37 +++ test/perf/linux/gettid_benchmark.cc | 38 +++ test/perf/linux/mapping_benchmark.cc | 163 ++++++++++ test/perf/linux/open_benchmark.cc | 56 ++++ test/perf/linux/pipe_benchmark.cc | 66 ++++ test/perf/linux/randread_benchmark.cc | 100 ++++++ test/perf/linux/read_benchmark.cc | 53 ++++ test/perf/linux/sched_yield_benchmark.cc | 37 +++ test/perf/linux/send_recv_benchmark.cc | 372 ++++++++++++++++++++++ test/perf/linux/seqwrite_benchmark.cc | 66 ++++ test/perf/linux/signal_benchmark.cc | 59 ++++ test/perf/linux/sleep_benchmark.cc | 60 ++++ test/perf/linux/stat_benchmark.cc | 62 ++++ test/perf/linux/unlink_benchmark.cc | 66 ++++ test/perf/linux/write_benchmark.cc | 52 ++++ test/runner/BUILD | 22 ++ test/runner/defs.bzl | 218 +++++++++++++ test/runner/gtest/BUILD | 9 + test/runner/gtest/gtest.go | 154 +++++++++ test/runner/runner.go | 477 ++++++++++++++++++++++++++++ test/syscalls/BUILD | 21 +- test/syscalls/build_defs.bzl | 180 ----------- test/syscalls/gtest/BUILD | 9 - test/syscalls/gtest/gtest.go | 93 ------ test/syscalls/linux/alarm.cc | 3 +- test/syscalls/linux/exec.cc | 3 +- test/syscalls/linux/fcntl.cc | 2 +- test/syscalls/linux/itimer.cc | 3 +- test/syscalls/linux/prctl.cc | 2 +- test/syscalls/linux/prctl_setuid.cc | 2 +- test/syscalls/linux/proc.cc | 2 +- test/syscalls/linux/ptrace.cc | 2 +- test/syscalls/linux/rtsignal.cc | 3 +- test/syscalls/linux/seccomp.cc | 2 +- test/syscalls/linux/sigiret.cc | 3 +- test/syscalls/linux/signalfd.cc | 2 +- test/syscalls/linux/sigstop.cc | 2 +- test/syscalls/linux/sigtimedwait.cc | 3 +- test/syscalls/linux/timers.cc | 2 +- test/syscalls/linux/vfork.cc | 2 +- test/syscalls/syscall_test_runner.go | 482 ----------------------------- test/syscalls/syscall_test_runner.sh | 34 -- test/util/BUILD | 3 +- test/util/test_main.cc | 2 +- test/util/test_util.h | 1 + test/util/test_util_impl.cc | 14 + tools/bazeldefs/defs.bzl | 1 + tools/defs.bzl | 3 +- 58 files changed, 3666 insertions(+), 843 deletions(-) create mode 100644 test/perf/BUILD create mode 100644 test/perf/linux/BUILD create mode 100644 test/perf/linux/clock_getres_benchmark.cc create mode 100644 test/perf/linux/clock_gettime_benchmark.cc create mode 100644 test/perf/linux/death_benchmark.cc create mode 100644 test/perf/linux/epoll_benchmark.cc create mode 100644 test/perf/linux/fork_benchmark.cc create mode 100644 test/perf/linux/futex_benchmark.cc create mode 100644 test/perf/linux/getdents_benchmark.cc create mode 100644 test/perf/linux/getpid_benchmark.cc create mode 100644 test/perf/linux/gettid_benchmark.cc create mode 100644 test/perf/linux/mapping_benchmark.cc create mode 100644 test/perf/linux/open_benchmark.cc create mode 100644 test/perf/linux/pipe_benchmark.cc create mode 100644 test/perf/linux/randread_benchmark.cc create mode 100644 test/perf/linux/read_benchmark.cc create mode 100644 test/perf/linux/sched_yield_benchmark.cc create mode 100644 test/perf/linux/send_recv_benchmark.cc create mode 100644 test/perf/linux/seqwrite_benchmark.cc create mode 100644 test/perf/linux/signal_benchmark.cc create mode 100644 test/perf/linux/sleep_benchmark.cc create mode 100644 test/perf/linux/stat_benchmark.cc create mode 100644 test/perf/linux/unlink_benchmark.cc create mode 100644 test/perf/linux/write_benchmark.cc create mode 100644 test/runner/BUILD create mode 100644 test/runner/defs.bzl create mode 100644 test/runner/gtest/BUILD create mode 100644 test/runner/gtest/gtest.go create mode 100644 test/runner/runner.go delete mode 100644 test/syscalls/build_defs.bzl delete mode 100644 test/syscalls/gtest/BUILD delete mode 100644 test/syscalls/gtest/gtest.go delete mode 100644 test/syscalls/syscall_test_runner.go delete mode 100755 test/syscalls/syscall_test_runner.sh (limited to 'test') diff --git a/WORKSPACE b/WORKSPACE index 2827c3a26..ff0196dc6 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -330,3 +330,13 @@ http_archive( "https://github.com/google/googletest/archive/565f1b848215b77c3732bca345fe76a0431d8b34.tar.gz", ], ) + +http_archive( + name = "com_google_benchmark", + sha256 = "3c6a165b6ecc948967a1ead710d4a181d7b0fbcaa183ef7ea84604994966221a", + strip_prefix = "benchmark-1.5.0", + urls = [ + "https://mirror.bazel.build/github.com/google/benchmark/archive/v1.5.0.tar.gz", + "https://github.com/google/benchmark/archive/v1.5.0.tar.gz", + ], +) diff --git a/test/perf/BUILD b/test/perf/BUILD new file mode 100644 index 000000000..7a2bf10ed --- /dev/null +++ b/test/perf/BUILD @@ -0,0 +1,114 @@ +load("//test/runner:defs.bzl", "syscall_test") + +package(licenses = ["notice"]) + +syscall_test( + test = "//test/perf/linux:clock_getres_benchmark", +) + +syscall_test( + test = "//test/perf/linux:clock_gettime_benchmark", +) + +syscall_test( + test = "//test/perf/linux:death_benchmark", +) + +syscall_test( + test = "//test/perf/linux:epoll_benchmark", +) + +syscall_test( + size = "large", + test = "//test/perf/linux:fork_benchmark", +) + +syscall_test( + size = "large", + test = "//test/perf/linux:futex_benchmark", +) + +syscall_test( + size = "large", + test = "//test/perf/linux:getdents_benchmark", +) + +syscall_test( + size = "large", + test = "//test/perf/linux:getpid_benchmark", +) + +syscall_test( + size = "large", + test = "//test/perf/linux:gettid_benchmark", +) + +syscall_test( + size = "large", + test = "//test/perf/linux:mapping_benchmark", +) + +syscall_test( + size = "large", + add_overlay = True, + test = "//test/perf/linux:open_benchmark", +) + +syscall_test( + test = "//test/perf/linux:pipe_benchmark", +) + +syscall_test( + size = "large", + add_overlay = True, + test = "//test/perf/linux:randread_benchmark", +) + +syscall_test( + size = "large", + add_overlay = True, + test = "//test/perf/linux:read_benchmark", +) + +syscall_test( + size = "large", + test = "//test/perf/linux:sched_yield_benchmark", +) + +syscall_test( + size = "large", + test = "//test/perf/linux:send_recv_benchmark", +) + +syscall_test( + size = "large", + add_overlay = True, + test = "//test/perf/linux:seqwrite_benchmark", +) + +syscall_test( + size = "large", + test = "//test/perf/linux:signal_benchmark", +) + +syscall_test( + test = "//test/perf/linux:sleep_benchmark", +) + +syscall_test( + size = "large", + add_overlay = True, + test = "//test/perf/linux:stat_benchmark", +) + +syscall_test( + size = "large", + add_overlay = True, + test = "//test/perf/linux:unlink_benchmark", +) + +syscall_test( + size = "large", + add_overlay = True, + test = "//test/perf/linux:write_benchmark", +) diff --git a/test/perf/linux/BUILD b/test/perf/linux/BUILD new file mode 100644 index 000000000..b4e907826 --- /dev/null +++ b/test/perf/linux/BUILD @@ -0,0 +1,356 @@ +load("//tools:defs.bzl", "cc_binary", "gbenchmark", "gtest") + +package( + default_visibility = ["//:sandbox"], + licenses = ["notice"], +) + +cc_binary( + name = "getpid_benchmark", + testonly = 1, + srcs = [ + "getpid_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:test_main", + ], +) + +cc_binary( + name = "send_recv_benchmark", + testonly = 1, + srcs = [ + "send_recv_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/syscalls/linux:socket_test_util", + "//test/util:file_descriptor", + "//test/util:logging", + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/synchronization", + ], +) + +cc_binary( + name = "gettid_benchmark", + testonly = 1, + srcs = [ + "gettid_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:test_main", + ], +) + +cc_binary( + name = "sched_yield_benchmark", + testonly = 1, + srcs = [ + "sched_yield_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "clock_getres_benchmark", + testonly = 1, + srcs = [ + "clock_getres_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:test_main", + ], +) + +cc_binary( + name = "clock_gettime_benchmark", + testonly = 1, + srcs = [ + "clock_gettime_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:test_main", + "@com_google_absl//absl/time", + ], +) + +cc_binary( + name = "open_benchmark", + testonly = 1, + srcs = [ + "open_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:fs_util", + "//test/util:logging", + "//test/util:temp_path", + "//test/util:test_main", + ], +) + +cc_binary( + name = "read_benchmark", + testonly = 1, + srcs = [ + "read_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:fs_util", + "//test/util:logging", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "randread_benchmark", + testonly = 1, + srcs = [ + "randread_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:file_descriptor", + "//test/util:logging", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/random", + ], +) + +cc_binary( + name = "write_benchmark", + testonly = 1, + srcs = [ + "write_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:logging", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "seqwrite_benchmark", + testonly = 1, + srcs = [ + "seqwrite_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:logging", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/random", + ], +) + +cc_binary( + name = "pipe_benchmark", + testonly = 1, + srcs = [ + "pipe_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:logging", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + +cc_binary( + name = "fork_benchmark", + testonly = 1, + srcs = [ + "fork_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:cleanup", + "//test/util:file_descriptor", + "//test/util:logging", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/synchronization", + ], +) + +cc_binary( + name = "futex_benchmark", + testonly = 1, + srcs = [ + "futex_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:logging", + "//test/util:test_main", + "//test/util:thread_util", + "@com_google_absl//absl/time", + ], +) + +cc_binary( + name = "epoll_benchmark", + testonly = 1, + srcs = [ + "epoll_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:epoll_util", + "//test/util:file_descriptor", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/time", + ], +) + +cc_binary( + name = "death_benchmark", + testonly = 1, + srcs = [ + "death_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:logging", + "//test/util:test_main", + ], +) + +cc_binary( + name = "mapping_benchmark", + testonly = 1, + srcs = [ + "mapping_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:logging", + "//test/util:memory_util", + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "signal_benchmark", + testonly = 1, + srcs = [ + "signal_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:logging", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "getdents_benchmark", + testonly = 1, + srcs = [ + "getdents_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) + +cc_binary( + name = "sleep_benchmark", + testonly = 1, + srcs = [ + "sleep_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:logging", + "//test/util:test_main", + ], +) + +cc_binary( + name = "stat_benchmark", + testonly = 1, + srcs = [ + "stat_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:fs_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + ], +) + +cc_binary( + name = "unlink_benchmark", + testonly = 1, + srcs = [ + "unlink_benchmark.cc", + ], + deps = [ + gbenchmark, + gtest, + "//test/util:fs_util", + "//test/util:temp_path", + "//test/util:test_main", + "//test/util:test_util", + ], +) diff --git a/test/perf/linux/clock_getres_benchmark.cc b/test/perf/linux/clock_getres_benchmark.cc new file mode 100644 index 000000000..b051293ad --- /dev/null +++ b/test/perf/linux/clock_getres_benchmark.cc @@ -0,0 +1,39 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "gtest/gtest.h" +#include "benchmark/benchmark.h" + +namespace gvisor { +namespace testing { + +namespace { + +// clock_getres(1) is very nearly a no-op syscall, but it does require copying +// out to a userspace struct. It thus provides a nice small copy-out benchmark. +void BM_ClockGetRes(benchmark::State& state) { + struct timespec ts; + for (auto _ : state) { + clock_getres(CLOCK_MONOTONIC, &ts); + } +} + +BENCHMARK(BM_ClockGetRes); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/clock_gettime_benchmark.cc b/test/perf/linux/clock_gettime_benchmark.cc new file mode 100644 index 000000000..6691bebd9 --- /dev/null +++ b/test/perf/linux/clock_gettime_benchmark.cc @@ -0,0 +1,60 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "benchmark/benchmark.h" + +namespace gvisor { +namespace testing { + +namespace { + +void BM_ClockGettimeThreadCPUTime(benchmark::State& state) { + clockid_t clockid; + ASSERT_EQ(0, pthread_getcpuclockid(pthread_self(), &clockid)); + struct timespec tp; + + for (auto _ : state) { + clock_gettime(clockid, &tp); + } +} + +BENCHMARK(BM_ClockGettimeThreadCPUTime); + +void BM_VDSOClockGettime(benchmark::State& state) { + const clockid_t clock = state.range(0); + struct timespec tp; + absl::Time start = absl::Now(); + + // Don't benchmark the calibration phase. + while (absl::Now() < start + absl::Milliseconds(2100)) { + clock_gettime(clock, &tp); + } + + for (auto _ : state) { + clock_gettime(clock, &tp); + } +} + +BENCHMARK(BM_VDSOClockGettime)->Arg(CLOCK_MONOTONIC)->Arg(CLOCK_REALTIME); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/death_benchmark.cc b/test/perf/linux/death_benchmark.cc new file mode 100644 index 000000000..cb2b6fd07 --- /dev/null +++ b/test/perf/linux/death_benchmark.cc @@ -0,0 +1,36 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "gtest/gtest.h" +#include "benchmark/benchmark.h" +#include "test/util/logging.h" + +namespace gvisor { +namespace testing { + +namespace { + +// DeathTest is not so much a microbenchmark as a macrobenchmark. It is testing +// the ability of gVisor (on whatever platform) to execute all the related +// stack-dumping routines associated with EXPECT_EXIT / EXPECT_DEATH. +TEST(DeathTest, ZeroEqualsOne) { + EXPECT_EXIT({ TEST_CHECK(0 == 1); }, ::testing::KilledBySignal(SIGABRT), ""); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/epoll_benchmark.cc b/test/perf/linux/epoll_benchmark.cc new file mode 100644 index 000000000..0b121338a --- /dev/null +++ b/test/perf/linux/epoll_benchmark.cc @@ -0,0 +1,99 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "absl/time/time.h" +#include "benchmark/benchmark.h" +#include "test/util/epoll_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Returns a new eventfd. +PosixErrorOr NewEventFD() { + int fd = eventfd(0, /* flags = */ 0); + MaybeSave(); + if (fd < 0) { + return PosixError(errno, "eventfd"); + } + return FileDescriptor(fd); +} + +// Also stolen from epoll.cc unit tests. +void BM_EpollTimeout(benchmark::State& state) { + constexpr int kFDsPerEpoll = 3; + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + + std::vector eventfds; + for (int i = 0; i < kFDsPerEpoll; i++) { + eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD())); + ASSERT_NO_ERRNO( + RegisterEpollFD(epollfd.get(), eventfds[i].get(), EPOLLIN, 0)); + } + + struct epoll_event result[kFDsPerEpoll]; + int timeout_ms = state.range(0); + + for (auto _ : state) { + EXPECT_EQ(0, epoll_wait(epollfd.get(), result, kFDsPerEpoll, timeout_ms)); + } +} + +BENCHMARK(BM_EpollTimeout)->Range(0, 8); + +// Also stolen from epoll.cc unit tests. +void BM_EpollAllEvents(benchmark::State& state) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + const int fds_per_epoll = state.range(0); + constexpr uint64_t kEventVal = 5; + + std::vector eventfds; + for (int i = 0; i < fds_per_epoll; i++) { + eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD())); + ASSERT_NO_ERRNO( + RegisterEpollFD(epollfd.get(), eventfds[i].get(), EPOLLIN, 0)); + + ASSERT_THAT(WriteFd(eventfds[i].get(), &kEventVal, sizeof(kEventVal)), + SyscallSucceedsWithValue(sizeof(kEventVal))); + } + + std::vector result(fds_per_epoll); + + for (auto _ : state) { + EXPECT_EQ(fds_per_epoll, + epoll_wait(epollfd.get(), result.data(), fds_per_epoll, 0)); + } +} + +BENCHMARK(BM_EpollAllEvents)->Range(2, 1024); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/fork_benchmark.cc b/test/perf/linux/fork_benchmark.cc new file mode 100644 index 000000000..84fdbc8a0 --- /dev/null +++ b/test/perf/linux/fork_benchmark.cc @@ -0,0 +1,350 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "gtest/gtest.h" +#include "absl/synchronization/barrier.h" +#include "benchmark/benchmark.h" +#include "test/util/cleanup.h" +#include "test/util/file_descriptor.h" +#include "test/util/logging.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +constexpr int kBusyMax = 250; + +// Do some CPU-bound busy-work. +int busy(int max) { + // Prevent the compiler from optimizing this work away, + volatile int count = 0; + + for (int i = 1; i < max; i++) { + for (int j = 2; j < i / 2; j++) { + if (i % j == 0) { + count++; + } + } + } + + return count; +} + +void BM_CPUBoundUniprocess(benchmark::State& state) { + for (auto _ : state) { + busy(kBusyMax); + } +} + +BENCHMARK(BM_CPUBoundUniprocess); + +void BM_CPUBoundAsymmetric(benchmark::State& state) { + const size_t max = state.max_iterations; + pid_t child = fork(); + if (child == 0) { + for (int i = 0; i < max; i++) { + busy(kBusyMax); + } + _exit(0); + } + ASSERT_THAT(child, SyscallSucceeds()); + ASSERT_TRUE(state.KeepRunningBatch(max)); + + int status; + EXPECT_THAT(RetryEINTR(waitpid)(child, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + ASSERT_FALSE(state.KeepRunning()); +} + +BENCHMARK(BM_CPUBoundAsymmetric)->UseRealTime(); + +void BM_CPUBoundSymmetric(benchmark::State& state) { + std::vector children; + auto child_cleanup = Cleanup([&] { + for (const pid_t child : children) { + int status; + EXPECT_THAT(RetryEINTR(waitpid)(child, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + } + ASSERT_FALSE(state.KeepRunning()); + }); + + const int processes = state.range(0); + for (int i = 0; i < processes; i++) { + size_t cur = (state.max_iterations + (processes - 1)) / processes; + if ((state.iterations() + cur) >= state.max_iterations) { + cur = state.max_iterations - state.iterations(); + } + pid_t child = fork(); + if (child == 0) { + for (int i = 0; i < cur; i++) { + busy(kBusyMax); + } + _exit(0); + } + ASSERT_THAT(child, SyscallSucceeds()); + if (cur > 0) { + // We can have a zero cur here, depending. + ASSERT_TRUE(state.KeepRunningBatch(cur)); + } + children.push_back(child); + } +} + +BENCHMARK(BM_CPUBoundSymmetric)->Range(2, 16)->UseRealTime(); + +// Child routine for ProcessSwitch/ThreadSwitch. +// Reads from readfd and writes the result to writefd. +void SwitchChild(int readfd, int writefd) { + while (1) { + char buf; + int ret = ReadFd(readfd, &buf, 1); + if (ret == 0) { + break; + } + TEST_CHECK_MSG(ret == 1, "read failed"); + + ret = WriteFd(writefd, &buf, 1); + if (ret == -1) { + TEST_CHECK_MSG(errno == EPIPE, "unexpected write failure"); + break; + } + TEST_CHECK_MSG(ret == 1, "write failed"); + } +} + +// Send bytes in a loop through a series of pipes, each passing through a +// different process. +// +// Proc 0 Proc 1 +// * ----------> * +// ^ Pipe 1 | +// | | +// | Pipe 0 | Pipe 2 +// | | +// | | +// | Pipe 3 v +// * <---------- * +// Proc 3 Proc 2 +// +// This exercises context switching through multiple processes. +void BM_ProcessSwitch(benchmark::State& state) { + // Code below assumes there are at least two processes. + const int num_processes = state.range(0); + ASSERT_GE(num_processes, 2); + + std::vector children; + auto child_cleanup = Cleanup([&] { + for (const pid_t child : children) { + int status; + EXPECT_THAT(RetryEINTR(waitpid)(child, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + } + }); + + // Must come after children, as the FDs must be closed before the children + // will exit. + std::vector read_fds; + std::vector write_fds; + + for (int i = 0; i < num_processes; i++) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + read_fds.emplace_back(fds[0]); + write_fds.emplace_back(fds[1]); + } + + // This process is one of the processes in the loop. It will be considered + // index 0. + for (int i = 1; i < num_processes; i++) { + // Read from current pipe index, write to next. + const int read_index = i; + const int read_fd = read_fds[read_index].get(); + + const int write_index = (i + 1) % num_processes; + const int write_fd = write_fds[write_index].get(); + + // std::vector isn't safe to use from the fork child. + FileDescriptor* read_array = read_fds.data(); + FileDescriptor* write_array = write_fds.data(); + + pid_t child = fork(); + if (!child) { + // Close all other FDs. + for (int j = 0; j < num_processes; j++) { + if (j != read_index) { + read_array[j].reset(); + } + if (j != write_index) { + write_array[j].reset(); + } + } + + SwitchChild(read_fd, write_fd); + _exit(0); + } + ASSERT_THAT(child, SyscallSucceeds()); + children.push_back(child); + } + + // Read from current pipe index (0), write to next (1). + const int read_index = 0; + const int read_fd = read_fds[read_index].get(); + + const int write_index = 1; + const int write_fd = write_fds[write_index].get(); + + // Kick start the loop. + char buf = 'a'; + ASSERT_THAT(WriteFd(write_fd, &buf, 1), SyscallSucceedsWithValue(1)); + + for (auto _ : state) { + ASSERT_THAT(ReadFd(read_fd, &buf, 1), SyscallSucceedsWithValue(1)); + ASSERT_THAT(WriteFd(write_fd, &buf, 1), SyscallSucceedsWithValue(1)); + } +} + +BENCHMARK(BM_ProcessSwitch)->Range(2, 16)->UseRealTime(); + +// Equivalent to BM_ThreadSwitch using threads instead of processes. +void BM_ThreadSwitch(benchmark::State& state) { + // Code below assumes there are at least two threads. + const int num_threads = state.range(0); + ASSERT_GE(num_threads, 2); + + // Must come after threads, as the FDs must be closed before the children + // will exit. + std::vector> threads; + std::vector read_fds; + std::vector write_fds; + + for (int i = 0; i < num_threads; i++) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + read_fds.emplace_back(fds[0]); + write_fds.emplace_back(fds[1]); + } + + // This thread is one of the threads in the loop. It will be considered + // index 0. + for (int i = 1; i < num_threads; i++) { + // Read from current pipe index, write to next. + // + // Transfer ownership of the FDs to the thread. + const int read_index = i; + const int read_fd = read_fds[read_index].release(); + + const int write_index = (i + 1) % num_threads; + const int write_fd = write_fds[write_index].release(); + + threads.emplace_back(std::make_unique([read_fd, write_fd] { + FileDescriptor read(read_fd); + FileDescriptor write(write_fd); + SwitchChild(read.get(), write.get()); + })); + } + + // Read from current pipe index (0), write to next (1). + const int read_index = 0; + const int read_fd = read_fds[read_index].get(); + + const int write_index = 1; + const int write_fd = write_fds[write_index].get(); + + // Kick start the loop. + char buf = 'a'; + ASSERT_THAT(WriteFd(write_fd, &buf, 1), SyscallSucceedsWithValue(1)); + + for (auto _ : state) { + ASSERT_THAT(ReadFd(read_fd, &buf, 1), SyscallSucceedsWithValue(1)); + ASSERT_THAT(WriteFd(write_fd, &buf, 1), SyscallSucceedsWithValue(1)); + } + + // The two FDs still owned by this thread are closed, causing the next thread + // to exit its loop and close its FDs, and so on until all threads exit. +} + +BENCHMARK(BM_ThreadSwitch)->Range(2, 16)->UseRealTime(); + +void BM_ThreadStart(benchmark::State& state) { + const int num_threads = state.range(0); + + for (auto _ : state) { + state.PauseTiming(); + + auto barrier = new absl::Barrier(num_threads + 1); + std::vector> threads; + + state.ResumeTiming(); + + for (size_t i = 0; i < num_threads; ++i) { + threads.emplace_back(std::make_unique([barrier] { + if (barrier->Block()) { + delete barrier; + } + })); + } + + if (barrier->Block()) { + delete barrier; + } + + state.PauseTiming(); + + for (const auto& thread : threads) { + thread->Join(); + } + + state.ResumeTiming(); + } +} + +BENCHMARK(BM_ThreadStart)->Range(1, 2048)->UseRealTime(); + +// Benchmark the complete fork + exit + wait. +void BM_ProcessLifecycle(benchmark::State& state) { + const int num_procs = state.range(0); + + std::vector pids(num_procs); + for (auto _ : state) { + for (size_t i = 0; i < num_procs; ++i) { + int pid = fork(); + if (pid == 0) { + _exit(0); + } + ASSERT_THAT(pid, SyscallSucceeds()); + pids[i] = pid; + } + + for (const int pid : pids) { + ASSERT_THAT(RetryEINTR(waitpid)(pid, nullptr, 0), + SyscallSucceedsWithValue(pid)); + } + } +} + +BENCHMARK(BM_ProcessLifecycle)->Range(1, 512)->UseRealTime(); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/futex_benchmark.cc b/test/perf/linux/futex_benchmark.cc new file mode 100644 index 000000000..b349d50bf --- /dev/null +++ b/test/perf/linux/futex_benchmark.cc @@ -0,0 +1,248 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "benchmark/benchmark.h" +#include "test/util/logging.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +inline int FutexWait(std::atomic* v, int32_t val) { + return syscall(SYS_futex, v, FUTEX_BITSET_MATCH_ANY, nullptr); +} + +inline int FutexWaitRelativeTimeout(std::atomic* v, int32_t val, + const struct timespec* reltime) { + return syscall(SYS_futex, v, FUTEX_WAIT_PRIVATE, reltime); +} + +inline int FutexWaitAbsoluteTimeout(std::atomic* v, int32_t val, + const struct timespec* abstime) { + return syscall(SYS_futex, v, FUTEX_BITSET_MATCH_ANY, abstime); +} + +inline int FutexWaitBitsetAbsoluteTimeout(std::atomic* v, int32_t val, + int32_t bits, + const struct timespec* abstime) { + return syscall(SYS_futex, v, FUTEX_WAIT_BITSET_PRIVATE | FUTEX_CLOCK_REALTIME, + val, abstime, nullptr, bits); +} + +inline int FutexWake(std::atomic* v, int32_t count) { + return syscall(SYS_futex, v, FUTEX_WAKE_PRIVATE, count); +} + +// This just uses FUTEX_WAKE on an address with nothing waiting, very simple. +void BM_FutexWakeNop(benchmark::State& state) { + std::atomic v(0); + + for (auto _ : state) { + EXPECT_EQ(0, FutexWake(&v, 1)); + } +} + +BENCHMARK(BM_FutexWakeNop); + +// This just uses FUTEX_WAIT on an address whose value has changed, i.e., the +// syscall won't wait. +void BM_FutexWaitNop(benchmark::State& state) { + std::atomic v(0); + + for (auto _ : state) { + EXPECT_EQ(-EAGAIN, FutexWait(&v, 1)); + } +} + +BENCHMARK(BM_FutexWaitNop); + +// This uses FUTEX_WAIT with a timeout on an address whose value never +// changes, such that it always times out. Timeout overhead can be estimated by +// timer overruns for short timeouts. +void BM_FutexWaitTimeout(benchmark::State& state) { + const int timeout_ns = state.range(0); + std::atomic v(0); + auto ts = absl::ToTimespec(absl::Nanoseconds(timeout_ns)); + + for (auto _ : state) { + EXPECT_EQ(-ETIMEDOUT, FutexWaitRelativeTimeout(&v, 0, &ts)); + } +} + +BENCHMARK(BM_FutexWaitTimeout) + ->Arg(1) + ->Arg(10) + ->Arg(100) + ->Arg(1000) + ->Arg(10000); + +// This calls FUTEX_WAIT_BITSET with CLOCK_REALTIME. +void BM_FutexWaitBitset(benchmark::State& state) { + std::atomic v(0); + int timeout_ns = state.range(0); + auto ts = absl::ToTimespec(absl::Nanoseconds(timeout_ns)); + for (auto _ : state) { + EXPECT_EQ(-ETIMEDOUT, FutexWaitBitsetAbsoluteTimeout(&v, 0, 1, &ts)); + } +} + +BENCHMARK(BM_FutexWaitBitset)->Range(0, 100000); + +int64_t GetCurrentMonotonicTimeNanos() { + struct timespec ts; + TEST_CHECK(clock_gettime(CLOCK_MONOTONIC, &ts) != -1); + return ts.tv_sec * 1000000000ULL + ts.tv_nsec; +} + +void SpinNanos(int64_t delay_ns) { + if (delay_ns <= 0) { + return; + } + const int64_t end = GetCurrentMonotonicTimeNanos() + delay_ns; + while (GetCurrentMonotonicTimeNanos() < end) { + // spin + } +} + +// Each iteration of FutexRoundtripDelayed involves a thread sending a futex +// wakeup to another thread, which spins for delay_us and then sends a futex +// wakeup back. The time per iteration is 2* (delay_us + kBeforeWakeDelayNs + +// futex/scheduling overhead). +void BM_FutexRoundtripDelayed(benchmark::State& state) { + const int delay_us = state.range(0); + + const int64_t delay_ns = delay_us * 1000; + // Spin for an extra kBeforeWakeDelayNs before invoking FUTEX_WAKE to reduce + // the probability that the wakeup comes before the wait, preventing the wait + // from ever taking effect and causing the benchmark to underestimate the + // actual wakeup time. + constexpr int64_t kBeforeWakeDelayNs = 500; + std::atomic v(0); + ScopedThread t([&] { + for (int i = 0; i < state.max_iterations; i++) { + SpinNanos(delay_ns); + while (v.load(std::memory_order_acquire) == 0) { + FutexWait(&v, 0); + } + SpinNanos(kBeforeWakeDelayNs + delay_ns); + v.store(0, std::memory_order_release); + FutexWake(&v, 1); + } + }); + for (auto _ : state) { + SpinNanos(kBeforeWakeDelayNs + delay_ns); + v.store(1, std::memory_order_release); + FutexWake(&v, 1); + SpinNanos(delay_ns); + while (v.load(std::memory_order_acquire) == 1) { + FutexWait(&v, 1); + } + } +} + +BENCHMARK(BM_FutexRoundtripDelayed) + ->Arg(0) + ->Arg(10) + ->Arg(20) + ->Arg(50) + ->Arg(100); + +// FutexLock is a simple, dumb futex based lock implementation. +// It will try to acquire the lock by atomically incrementing the +// lock word. If it did not increment the lock from 0 to 1, someone +// else has the lock, so it will FUTEX_WAIT until it is woken in +// the unlock path. +class FutexLock { + public: + FutexLock() : lock_word_(0) {} + + void lock(struct timespec* deadline) { + int32_t val; + while ((val = lock_word_.fetch_add(1, std::memory_order_acquire) + 1) != + 1) { + // If we didn't get the lock by incrementing from 0 to 1, + // do a FUTEX_WAIT with the desired current value set to + // val. If val is no longer what the atomic increment returned, + // someone might have set it to 0 so we can try to acquire + // again. + int ret = FutexWaitAbsoluteTimeout(&lock_word_, val, deadline); + if (ret == 0 || ret == -EWOULDBLOCK || ret == -EINTR) { + continue; + } else { + FAIL() << "unexpected FUTEX_WAIT return: " << ret; + } + } + } + + void unlock() { + // Store 0 into the lock word and wake one waiter. We intentionally + // ignore the return value of the FUTEX_WAKE here, since there may be + // no waiters to wake anyway. + lock_word_.store(0, std::memory_order_release); + (void)FutexWake(&lock_word_, 1); + } + + private: + std::atomic lock_word_; +}; + +FutexLock* test_lock; // Used below. + +void FutexContend(benchmark::State& state, int thread_index, + struct timespec* deadline) { + int counter = 0; + if (thread_index == 0) { + test_lock = new FutexLock(); + } + for (auto _ : state) { + test_lock->lock(deadline); + counter++; + test_lock->unlock(); + } + if (thread_index == 0) { + delete test_lock; + } + state.SetItemsProcessed(state.iterations()); +} + +void BM_FutexContend(benchmark::State& state) { + FutexContend(state, state.thread_index, nullptr); +} + +BENCHMARK(BM_FutexContend)->ThreadRange(1, 1024)->UseRealTime(); + +void BM_FutexDeadlineContend(benchmark::State& state) { + auto deadline = absl::ToTimespec(absl::Now() + absl::Minutes(10)); + FutexContend(state, state.thread_index, &deadline); +} + +BENCHMARK(BM_FutexDeadlineContend)->ThreadRange(1, 1024)->UseRealTime(); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/getdents_benchmark.cc b/test/perf/linux/getdents_benchmark.cc new file mode 100644 index 000000000..0e03975b4 --- /dev/null +++ b/test/perf/linux/getdents_benchmark.cc @@ -0,0 +1,149 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "gtest/gtest.h" +#include "benchmark/benchmark.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +#ifndef SYS_getdents64 +#if defined(__x86_64__) +#define SYS_getdents64 217 +#elif defined(__aarch64__) +#define SYS_getdents64 217 +#else +#error "Unknown architecture" +#endif +#endif // SYS_getdents64 + +namespace gvisor { +namespace testing { + +namespace { + +constexpr int kBufferSize = 16384; + +PosixErrorOr CreateDirectory(int count, + std::vector* files) { + ASSIGN_OR_RETURN_ERRNO(TempPath dir, TempPath::CreateDir()); + + ASSIGN_OR_RETURN_ERRNO(FileDescriptor dfd, + Open(dir.path(), O_RDONLY | O_DIRECTORY)); + + for (int i = 0; i < count; i++) { + auto file = NewTempRelPath(); + auto res = MknodAt(dfd, file, S_IFREG | 0644, 0); + RETURN_IF_ERRNO(res); + files->push_back(file); + } + + return std::move(dir); +} + +PosixError CleanupDirectory(const TempPath& dir, + std::vector* files) { + ASSIGN_OR_RETURN_ERRNO(FileDescriptor dfd, + Open(dir.path(), O_RDONLY | O_DIRECTORY)); + + for (auto it = files->begin(); it != files->end(); ++it) { + auto res = UnlinkAt(dfd, *it, 0); + RETURN_IF_ERRNO(res); + } + return NoError(); +} + +// Creates a directory containing `files` files, and reads all the directory +// entries from the directory using a single FD. +void BM_GetdentsSameFD(benchmark::State& state) { + // Create directory with given files. + const int count = state.range(0); + + // Keep a vector of all of the file TempPaths that is destroyed before dir. + // + // Normally, we'd simply allow dir to recursively clean up the contained + // files, but that recursive cleanup uses getdents, which may be very slow in + // extreme benchmarks. + TempPath dir; + std::vector files; + dir = ASSERT_NO_ERRNO_AND_VALUE(CreateDirectory(count, &files)); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY | O_DIRECTORY)); + char buffer[kBufferSize]; + + // We read all directory entries on each iteration, but report this as a + // "batch" iteration so that reported times are per file. + while (state.KeepRunningBatch(count)) { + ASSERT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallSucceeds()); + + int ret; + do { + ASSERT_THAT(ret = syscall(SYS_getdents64, fd.get(), buffer, kBufferSize), + SyscallSucceeds()); + } while (ret > 0); + } + + ASSERT_NO_ERRNO(CleanupDirectory(dir, &files)); + + state.SetItemsProcessed(state.iterations()); +} + +BENCHMARK(BM_GetdentsSameFD)->Range(1, 1 << 16)->UseRealTime(); + +// Creates a directory containing `files` files, and reads all the directory +// entries from the directory using a new FD each time. +void BM_GetdentsNewFD(benchmark::State& state) { + // Create directory with given files. + const int count = state.range(0); + + // Keep a vector of all of the file TempPaths that is destroyed before dir. + // + // Normally, we'd simply allow dir to recursively clean up the contained + // files, but that recursive cleanup uses getdents, which may be very slow in + // extreme benchmarks. + TempPath dir; + std::vector files; + dir = ASSERT_NO_ERRNO_AND_VALUE(CreateDirectory(count, &files)); + char buffer[kBufferSize]; + + // We read all directory entries on each iteration, but report this as a + // "batch" iteration so that reported times are per file. + while (state.KeepRunningBatch(count)) { + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY | O_DIRECTORY)); + + int ret; + do { + ASSERT_THAT(ret = syscall(SYS_getdents64, fd.get(), buffer, kBufferSize), + SyscallSucceeds()); + } while (ret > 0); + } + + ASSERT_NO_ERRNO(CleanupDirectory(dir, &files)); + + state.SetItemsProcessed(state.iterations()); +} + +BENCHMARK(BM_GetdentsNewFD)->Range(1, 1 << 16)->UseRealTime(); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/getpid_benchmark.cc b/test/perf/linux/getpid_benchmark.cc new file mode 100644 index 000000000..db74cb264 --- /dev/null +++ b/test/perf/linux/getpid_benchmark.cc @@ -0,0 +1,37 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "gtest/gtest.h" +#include "benchmark/benchmark.h" + +namespace gvisor { +namespace testing { + +namespace { + +void BM_Getpid(benchmark::State& state) { + for (auto _ : state) { + syscall(SYS_getpid); + } +} + +BENCHMARK(BM_Getpid); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/gettid_benchmark.cc b/test/perf/linux/gettid_benchmark.cc new file mode 100644 index 000000000..8f4961f5e --- /dev/null +++ b/test/perf/linux/gettid_benchmark.cc @@ -0,0 +1,38 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "gtest/gtest.h" +#include "benchmark/benchmark.h" + +namespace gvisor { +namespace testing { + +namespace { + +void BM_Gettid(benchmark::State& state) { + for (auto _ : state) { + syscall(SYS_gettid); + } +} + +BENCHMARK(BM_Gettid)->ThreadRange(1, 4000)->UseRealTime(); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/mapping_benchmark.cc b/test/perf/linux/mapping_benchmark.cc new file mode 100644 index 000000000..39c30fe69 --- /dev/null +++ b/test/perf/linux/mapping_benchmark.cc @@ -0,0 +1,163 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "gtest/gtest.h" +#include "benchmark/benchmark.h" +#include "test/util/logging.h" +#include "test/util/memory_util.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Conservative value for /proc/sys/vm/max_map_count, which limits the number of +// VMAs, minus a safety margin for VMAs that already exist for the test binary. +// The default value for max_map_count is +// include/linux/mm.h:DEFAULT_MAX_MAP_COUNT = 65530. +constexpr size_t kMaxVMAs = 64001; + +// Map then unmap pages without touching them. +void BM_MapUnmap(benchmark::State& state) { + // Number of pages to map. + const int pages = state.range(0); + + while (state.KeepRunning()) { + void* addr = mmap(0, pages * kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + TEST_CHECK_MSG(addr != MAP_FAILED, "mmap failed"); + + int ret = munmap(addr, pages * kPageSize); + TEST_CHECK_MSG(ret == 0, "munmap failed"); + } +} + +BENCHMARK(BM_MapUnmap)->Range(1, 1 << 17)->UseRealTime(); + +// Map, touch, then unmap pages. +void BM_MapTouchUnmap(benchmark::State& state) { + // Number of pages to map. + const int pages = state.range(0); + + while (state.KeepRunning()) { + void* addr = mmap(0, pages * kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + TEST_CHECK_MSG(addr != MAP_FAILED, "mmap failed"); + + char* c = reinterpret_cast(addr); + char* end = c + pages * kPageSize; + while (c < end) { + *c = 42; + c += kPageSize; + } + + int ret = munmap(addr, pages * kPageSize); + TEST_CHECK_MSG(ret == 0, "munmap failed"); + } +} + +BENCHMARK(BM_MapTouchUnmap)->Range(1, 1 << 17)->UseRealTime(); + +// Map and touch many pages, unmapping all at once. +// +// NOTE(b/111429208): This is a regression test to ensure performant mapping and +// allocation even with tons of mappings. +void BM_MapTouchMany(benchmark::State& state) { + // Number of pages to map. + const int page_count = state.range(0); + + while (state.KeepRunning()) { + std::vector pages; + + for (int i = 0; i < page_count; i++) { + void* addr = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + TEST_CHECK_MSG(addr != MAP_FAILED, "mmap failed"); + + char* c = reinterpret_cast(addr); + *c = 42; + + pages.push_back(addr); + } + + for (void* addr : pages) { + int ret = munmap(addr, kPageSize); + TEST_CHECK_MSG(ret == 0, "munmap failed"); + } + } + + state.SetBytesProcessed(kPageSize * page_count * state.iterations()); +} + +BENCHMARK(BM_MapTouchMany)->Range(1, 1 << 12)->UseRealTime(); + +void BM_PageFault(benchmark::State& state) { + // Map the region in which we will take page faults. To ensure that each page + // fault maps only a single page, each page we touch must correspond to a + // distinct VMA. Thus we need a 1-page gap between each 1-page VMA. However, + // each gap consists of a PROT_NONE VMA, instead of an unmapped hole, so that + // if there are background threads running, they can't inadvertently creating + // mappings in our gaps that are unmapped when the test ends. + size_t test_pages = kMaxVMAs; + // Ensure that test_pages is odd, since we want the test region to both + // begin and end with a mapped page. + if (test_pages % 2 == 0) { + test_pages--; + } + const size_t test_region_bytes = test_pages * kPageSize; + // Use MAP_SHARED here because madvise(MADV_DONTNEED) on private mappings on + // gVisor won't force future sentry page faults (by design). Use MAP_POPULATE + // so that Linux pre-allocates the shmem file used to back the mapping. + Mapping m = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(test_region_bytes, PROT_READ, MAP_SHARED | MAP_POPULATE)); + for (size_t i = 0; i < test_pages / 2; i++) { + ASSERT_THAT( + mprotect(reinterpret_cast(m.addr() + ((2 * i + 1) * kPageSize)), + kPageSize, PROT_NONE), + SyscallSucceeds()); + } + + const size_t mapped_pages = test_pages / 2 + 1; + // "Start" at the end of the mapped region to force the mapped region to be + // reset, since we mapped it with MAP_POPULATE. + size_t cur_page = mapped_pages; + for (auto _ : state) { + if (cur_page >= mapped_pages) { + // We've reached the end of our mapped region and have to reset it to + // incur page faults again. + state.PauseTiming(); + ASSERT_THAT(madvise(m.ptr(), test_region_bytes, MADV_DONTNEED), + SyscallSucceeds()); + cur_page = 0; + state.ResumeTiming(); + } + const uintptr_t addr = m.addr() + (2 * cur_page * kPageSize); + const char c = *reinterpret_cast(addr); + benchmark::DoNotOptimize(c); + cur_page++; + } +} + +BENCHMARK(BM_PageFault)->UseRealTime(); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/open_benchmark.cc b/test/perf/linux/open_benchmark.cc new file mode 100644 index 000000000..68008f6d5 --- /dev/null +++ b/test/perf/linux/open_benchmark.cc @@ -0,0 +1,56 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include +#include +#include + +#include "gtest/gtest.h" +#include "benchmark/benchmark.h" +#include "test/util/fs_util.h" +#include "test/util/logging.h" +#include "test/util/temp_path.h" + +namespace gvisor { +namespace testing { + +namespace { + +void BM_Open(benchmark::State& state) { + const int size = state.range(0); + std::vector cache; + for (int i = 0; i < size; i++) { + auto path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + cache.emplace_back(std::move(path)); + } + + unsigned int seed = 1; + for (auto _ : state) { + const int chosen = rand_r(&seed) % size; + int fd = open(cache[chosen].path().c_str(), O_RDONLY); + TEST_CHECK(fd != -1); + close(fd); + } +} + +BENCHMARK(BM_Open)->Range(1, 128)->UseRealTime(); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/pipe_benchmark.cc b/test/perf/linux/pipe_benchmark.cc new file mode 100644 index 000000000..8f5f6a2a3 --- /dev/null +++ b/test/perf/linux/pipe_benchmark.cc @@ -0,0 +1,66 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include + +#include "gtest/gtest.h" +#include "benchmark/benchmark.h" +#include "test/util/logging.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +void BM_Pipe(benchmark::State& state) { + int fds[2]; + TEST_CHECK(pipe(fds) == 0); + + const int size = state.range(0); + std::vector wbuf(size); + std::vector rbuf(size); + RandomizeBuffer(wbuf.data(), size); + + ScopedThread t([&] { + auto const fd = fds[1]; + for (int i = 0; i < state.max_iterations; i++) { + TEST_CHECK(WriteFd(fd, wbuf.data(), wbuf.size()) == size); + } + }); + + for (auto _ : state) { + TEST_CHECK(ReadFd(fds[0], rbuf.data(), rbuf.size()) == size); + } + + t.Join(); + + close(fds[0]); + close(fds[1]); + + state.SetBytesProcessed(static_cast(size) * + static_cast(state.iterations())); +} + +BENCHMARK(BM_Pipe)->Range(1, 1 << 20)->UseRealTime(); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/randread_benchmark.cc b/test/perf/linux/randread_benchmark.cc new file mode 100644 index 000000000..b0eb8c24e --- /dev/null +++ b/test/perf/linux/randread_benchmark.cc @@ -0,0 +1,100 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "benchmark/benchmark.h" +#include "test/util/file_descriptor.h" +#include "test/util/logging.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Create a 1GB file that will be read from at random positions. This should +// invalid any performance gains from caching. +const uint64_t kFileSize = 1ULL << 30; + +// How many bytes to write at once to initialize the file used to read from. +const uint32_t kWriteSize = 65536; + +// Largest benchmarked read unit. +const uint32_t kMaxRead = 1UL << 26; + +TempPath CreateFile(uint64_t file_size) { + auto path = TempPath::CreateFile().ValueOrDie(); + FileDescriptor fd = Open(path.path(), O_WRONLY).ValueOrDie(); + + // Try to minimize syscalls by using maximum size writev() requests. + std::vector buffer(kWriteSize); + RandomizeBuffer(buffer.data(), buffer.size()); + const std::vector> iovecs_list = + GenerateIovecs(file_size, buffer.data(), buffer.size()); + for (const auto& iovecs : iovecs_list) { + TEST_CHECK(writev(fd.get(), iovecs.data(), iovecs.size()) >= 0); + } + + return path; +} + +// Global test state, initialized once per process lifetime. +struct GlobalState { + const TempPath tmpfile; + explicit GlobalState(TempPath tfile) : tmpfile(std::move(tfile)) {} +}; + +GlobalState& GetGlobalState() { + // This gets created only once throughout the lifetime of the process. + // Use a dynamically allocated object (that is never deleted) to avoid order + // of destruction of static storage variables issues. + static GlobalState* const state = + // The actual file size is the maximum random seek range (kFileSize) + the + // maximum read size so we can read that number of bytes at the end of the + // file. + new GlobalState(CreateFile(kFileSize + kMaxRead)); + return *state; +} + +void BM_RandRead(benchmark::State& state) { + const int size = state.range(0); + + GlobalState& global_state = GetGlobalState(); + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(global_state.tmpfile.path(), O_RDONLY)); + std::vector buf(size); + + unsigned int seed = 1; + for (auto _ : state) { + TEST_CHECK(PreadFd(fd.get(), buf.data(), buf.size(), + rand_r(&seed) % kFileSize) == size); + } + + state.SetBytesProcessed(static_cast(size) * + static_cast(state.iterations())); +} + +BENCHMARK(BM_RandRead)->Range(1, kMaxRead)->UseRealTime(); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/read_benchmark.cc b/test/perf/linux/read_benchmark.cc new file mode 100644 index 000000000..62445867d --- /dev/null +++ b/test/perf/linux/read_benchmark.cc @@ -0,0 +1,53 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "benchmark/benchmark.h" +#include "test/util/fs_util.h" +#include "test/util/logging.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +void BM_Read(benchmark::State& state) { + const int size = state.range(0); + const std::string contents(size, 0); + auto path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), contents, TempPath::kDefaultFileMode)); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path.path(), O_RDONLY)); + + std::vector buf(size); + for (auto _ : state) { + TEST_CHECK(PreadFd(fd.get(), buf.data(), buf.size(), 0) == size); + } + + state.SetBytesProcessed(static_cast(size) * + static_cast(state.iterations())); +} + +BENCHMARK(BM_Read)->Range(1, 1 << 26)->UseRealTime(); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/sched_yield_benchmark.cc b/test/perf/linux/sched_yield_benchmark.cc new file mode 100644 index 000000000..6756b5575 --- /dev/null +++ b/test/perf/linux/sched_yield_benchmark.cc @@ -0,0 +1,37 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "gtest/gtest.h" +#include "benchmark/benchmark.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +void BM_Sched_yield(benchmark::State& state) { + for (auto ignored : state) { + TEST_CHECK(sched_yield() == 0); + } +} + +BENCHMARK(BM_Sched_yield)->ThreadRange(1, 2000)->UseRealTime(); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/send_recv_benchmark.cc b/test/perf/linux/send_recv_benchmark.cc new file mode 100644 index 000000000..d73e49523 --- /dev/null +++ b/test/perf/linux/send_recv_benchmark.cc @@ -0,0 +1,372 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include + +#include "gtest/gtest.h" +#include "absl/synchronization/notification.h" +#include "benchmark/benchmark.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/logging.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +constexpr ssize_t kMessageSize = 1024; + +class Message { + public: + explicit Message(int byte = 0) : Message(byte, kMessageSize, 0) {} + + explicit Message(int byte, int sz) : Message(byte, sz, 0) {} + + explicit Message(int byte, int sz, int cmsg_sz) + : buffer_(sz, byte), cmsg_buffer_(cmsg_sz, 0) { + iov_.iov_base = buffer_.data(); + iov_.iov_len = sz; + hdr_.msg_iov = &iov_; + hdr_.msg_iovlen = 1; + hdr_.msg_control = cmsg_buffer_.data(); + hdr_.msg_controllen = cmsg_sz; + } + + struct msghdr* header() { + return &hdr_; + } + + private: + std::vector buffer_; + std::vector cmsg_buffer_; + struct iovec iov_ = {}; + struct msghdr hdr_ = {}; +}; + +void BM_Recvmsg(benchmark::State& state) { + int sockets[2]; + TEST_CHECK(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == 0); + FileDescriptor send_socket(sockets[0]), recv_socket(sockets[1]); + absl::Notification notification; + Message send_msg('a'), recv_msg; + + ScopedThread t([&send_msg, &send_socket, ¬ification] { + while (!notification.HasBeenNotified()) { + sendmsg(send_socket.get(), send_msg.header(), 0); + } + }); + + int64_t bytes_received = 0; + for (auto ignored : state) { + int n = recvmsg(recv_socket.get(), recv_msg.header(), 0); + TEST_CHECK(n > 0); + bytes_received += n; + } + + notification.Notify(); + recv_socket.reset(); + + state.SetBytesProcessed(bytes_received); +} + +BENCHMARK(BM_Recvmsg)->UseRealTime(); + +void BM_Sendmsg(benchmark::State& state) { + int sockets[2]; + TEST_CHECK(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == 0); + FileDescriptor send_socket(sockets[0]), recv_socket(sockets[1]); + absl::Notification notification; + Message send_msg('a'), recv_msg; + + ScopedThread t([&recv_msg, &recv_socket, ¬ification] { + while (!notification.HasBeenNotified()) { + recvmsg(recv_socket.get(), recv_msg.header(), 0); + } + }); + + int64_t bytes_sent = 0; + for (auto ignored : state) { + int n = sendmsg(send_socket.get(), send_msg.header(), 0); + TEST_CHECK(n > 0); + bytes_sent += n; + } + + notification.Notify(); + send_socket.reset(); + + state.SetBytesProcessed(bytes_sent); +} + +BENCHMARK(BM_Sendmsg)->UseRealTime(); + +void BM_Recvfrom(benchmark::State& state) { + int sockets[2]; + TEST_CHECK(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == 0); + FileDescriptor send_socket(sockets[0]), recv_socket(sockets[1]); + absl::Notification notification; + char send_buffer[kMessageSize], recv_buffer[kMessageSize]; + + ScopedThread t([&send_socket, &send_buffer, ¬ification] { + while (!notification.HasBeenNotified()) { + sendto(send_socket.get(), send_buffer, kMessageSize, 0, nullptr, 0); + } + }); + + int bytes_received = 0; + for (auto ignored : state) { + int n = recvfrom(recv_socket.get(), recv_buffer, kMessageSize, 0, nullptr, + nullptr); + TEST_CHECK(n > 0); + bytes_received += n; + } + + notification.Notify(); + recv_socket.reset(); + + state.SetBytesProcessed(bytes_received); +} + +BENCHMARK(BM_Recvfrom)->UseRealTime(); + +void BM_Sendto(benchmark::State& state) { + int sockets[2]; + TEST_CHECK(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == 0); + FileDescriptor send_socket(sockets[0]), recv_socket(sockets[1]); + absl::Notification notification; + char send_buffer[kMessageSize], recv_buffer[kMessageSize]; + + ScopedThread t([&recv_socket, &recv_buffer, ¬ification] { + while (!notification.HasBeenNotified()) { + recvfrom(recv_socket.get(), recv_buffer, kMessageSize, 0, nullptr, + nullptr); + } + }); + + int64_t bytes_sent = 0; + for (auto ignored : state) { + int n = sendto(send_socket.get(), send_buffer, kMessageSize, 0, nullptr, 0); + TEST_CHECK(n > 0); + bytes_sent += n; + } + + notification.Notify(); + send_socket.reset(); + + state.SetBytesProcessed(bytes_sent); +} + +BENCHMARK(BM_Sendto)->UseRealTime(); + +PosixErrorOr InetLoopbackAddr(int family) { + struct sockaddr_storage addr; + memset(&addr, 0, sizeof(addr)); + addr.ss_family = family; + switch (family) { + case AF_INET: + reinterpret_cast(&addr)->sin_addr.s_addr = + htonl(INADDR_LOOPBACK); + break; + case AF_INET6: + reinterpret_cast(&addr)->sin6_addr = + in6addr_loopback; + break; + default: + return PosixError(EINVAL, + absl::StrCat("unknown socket family: ", family)); + } + return addr; +} + +// BM_RecvmsgWithControlBuf measures the performance of recvmsg when we allocate +// space for control messages. Note that we do not expect to receive any. +void BM_RecvmsgWithControlBuf(benchmark::State& state) { + auto listen_socket = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP)); + + // Initialize address to the loopback one. + sockaddr_storage addr = ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(AF_INET6)); + socklen_t addrlen = sizeof(addr); + + // Bind to some port then start listening. + ASSERT_THAT(bind(listen_socket.get(), + reinterpret_cast(&addr), addrlen), + SyscallSucceeds()); + + ASSERT_THAT(listen(listen_socket.get(), SOMAXCONN), SyscallSucceeds()); + + // Get the address we're listening on, then connect to it. We need to do this + // because we're allowing the stack to pick a port for us. + ASSERT_THAT(getsockname(listen_socket.get(), + reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); + + auto send_socket = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP)); + + ASSERT_THAT( + RetryEINTR(connect)(send_socket.get(), + reinterpret_cast(&addr), addrlen), + SyscallSucceeds()); + + // Accept the connection. + auto recv_socket = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_socket.get(), nullptr, nullptr)); + + absl::Notification notification; + Message send_msg('a'); + // Create a msghdr with a buffer allocated for control messages. + Message recv_msg(0, kMessageSize, /*cmsg_sz=*/24); + + ScopedThread t([&send_msg, &send_socket, ¬ification] { + while (!notification.HasBeenNotified()) { + sendmsg(send_socket.get(), send_msg.header(), 0); + } + }); + + int64_t bytes_received = 0; + for (auto ignored : state) { + int n = recvmsg(recv_socket.get(), recv_msg.header(), 0); + TEST_CHECK(n > 0); + bytes_received += n; + } + + notification.Notify(); + recv_socket.reset(); + + state.SetBytesProcessed(bytes_received); +} + +BENCHMARK(BM_RecvmsgWithControlBuf)->UseRealTime(); + +// BM_SendmsgTCP measures the sendmsg throughput with varying payload sizes. +// +// state.Args[0] indicates whether the underlying socket should be blocking or +// non-blocking w/ 0 indicating non-blocking and 1 to indicate blocking. +// state.Args[1] is the size of the payload to be used per sendmsg call. +void BM_SendmsgTCP(benchmark::State& state) { + auto listen_socket = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)); + + // Initialize address to the loopback one. + sockaddr_storage addr = ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(AF_INET)); + socklen_t addrlen = sizeof(addr); + + // Bind to some port then start listening. + ASSERT_THAT(bind(listen_socket.get(), + reinterpret_cast(&addr), addrlen), + SyscallSucceeds()); + + ASSERT_THAT(listen(listen_socket.get(), SOMAXCONN), SyscallSucceeds()); + + // Get the address we're listening on, then connect to it. We need to do this + // because we're allowing the stack to pick a port for us. + ASSERT_THAT(getsockname(listen_socket.get(), + reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); + + auto send_socket = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)); + + ASSERT_THAT( + RetryEINTR(connect)(send_socket.get(), + reinterpret_cast(&addr), addrlen), + SyscallSucceeds()); + + // Accept the connection. + auto recv_socket = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_socket.get(), nullptr, nullptr)); + + // Check if we want to run the test w/ a blocking send socket + // or non-blocking. + const int blocking = state.range(0); + if (!blocking) { + // Set the send FD to O_NONBLOCK. + int opts; + ASSERT_THAT(opts = fcntl(send_socket.get(), F_GETFL), SyscallSucceeds()); + opts |= O_NONBLOCK; + ASSERT_THAT(fcntl(send_socket.get(), F_SETFL, opts), SyscallSucceeds()); + } + + absl::Notification notification; + + // Get the buffer size we should use for this iteration of the test. + const int buf_size = state.range(1); + Message send_msg('a', buf_size), recv_msg(0, buf_size); + + ScopedThread t([&recv_msg, &recv_socket, ¬ification] { + while (!notification.HasBeenNotified()) { + TEST_CHECK(recvmsg(recv_socket.get(), recv_msg.header(), 0) >= 0); + } + }); + + int64_t bytes_sent = 0; + int ncalls = 0; + for (auto ignored : state) { + int sent = 0; + while (true) { + struct msghdr hdr = {}; + struct iovec iov = {}; + struct msghdr* snd_header = send_msg.header(); + iov.iov_base = static_cast(snd_header->msg_iov->iov_base) + sent; + iov.iov_len = snd_header->msg_iov->iov_len - sent; + hdr.msg_iov = &iov; + hdr.msg_iovlen = 1; + int n = RetryEINTR(sendmsg)(send_socket.get(), &hdr, 0); + ncalls++; + if (n > 0) { + sent += n; + if (sent == buf_size) { + break; + } + // n can be > 0 but less than requested size. In which case we don't + // poll. + continue; + } + // Poll the fd for it to become writable. + struct pollfd poll_fd = {send_socket.get(), POLL_OUT, 0}; + EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10), + SyscallSucceedsWithValue(0)); + } + bytes_sent += static_cast(sent); + } + + notification.Notify(); + send_socket.reset(); + state.SetBytesProcessed(bytes_sent); +} + +void Args(benchmark::internal::Benchmark* benchmark) { + for (int blocking = 0; blocking < 2; blocking++) { + for (int buf_size = 1024; buf_size <= 256 << 20; buf_size *= 2) { + benchmark->Args({blocking, buf_size}); + } + } +} + +BENCHMARK(BM_SendmsgTCP)->Apply(&Args)->UseRealTime(); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/seqwrite_benchmark.cc b/test/perf/linux/seqwrite_benchmark.cc new file mode 100644 index 000000000..af49e4477 --- /dev/null +++ b/test/perf/linux/seqwrite_benchmark.cc @@ -0,0 +1,66 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "benchmark/benchmark.h" +#include "test/util/logging.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// The maximum file size of the test file, when writes get beyond this point +// they wrap around. This should be large enough to blow away caches. +const uint64_t kMaxFile = 1 << 30; + +// Perform writes of various sizes sequentially to one file. Wraps around if it +// goes above a certain maximum file size. +void BM_SeqWrite(benchmark::State& state) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_WRONLY)); + + const int size = state.range(0); + std::vector buf(size); + RandomizeBuffer(buf.data(), buf.size()); + + // Start writes at offset 0. + uint64_t offset = 0; + for (auto _ : state) { + TEST_CHECK(PwriteFd(fd.get(), buf.data(), buf.size(), offset) == + buf.size()); + offset += buf.size(); + // Wrap around if going above the maximum file size. + if (offset >= kMaxFile) { + offset = 0; + } + } + + state.SetBytesProcessed(static_cast(size) * + static_cast(state.iterations())); +} + +BENCHMARK(BM_SeqWrite)->Range(1, 1 << 26)->UseRealTime(); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/signal_benchmark.cc b/test/perf/linux/signal_benchmark.cc new file mode 100644 index 000000000..a6928df58 --- /dev/null +++ b/test/perf/linux/signal_benchmark.cc @@ -0,0 +1,59 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "gtest/gtest.h" +#include "benchmark/benchmark.h" +#include "test/util/logging.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +void FixupHandler(int sig, siginfo_t* si, void* void_ctx) { + static unsigned int dataval = 0; + + // Skip the offending instruction. + ucontext_t* ctx = reinterpret_cast(void_ctx); + ctx->uc_mcontext.gregs[REG_RAX] = reinterpret_cast(&dataval); +} + +void BM_FaultSignalFixup(benchmark::State& state) { + // Set up the signal handler. + struct sigaction sa = {}; + sigemptyset(&sa.sa_mask); + sa.sa_sigaction = FixupHandler; + sa.sa_flags = SA_SIGINFO; + TEST_CHECK(sigaction(SIGSEGV, &sa, nullptr) == 0); + + // Fault, fault, fault. + for (auto _ : state) { + register volatile unsigned int* ptr asm("rax"); + + // Trigger the segfault. + ptr = nullptr; + *ptr = 0; + } +} + +BENCHMARK(BM_FaultSignalFixup)->UseRealTime(); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/sleep_benchmark.cc b/test/perf/linux/sleep_benchmark.cc new file mode 100644 index 000000000..99ef05117 --- /dev/null +++ b/test/perf/linux/sleep_benchmark.cc @@ -0,0 +1,60 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "benchmark/benchmark.h" +#include "test/util/logging.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Sleep for 'param' nanoseconds. +void BM_Sleep(benchmark::State& state) { + const int nanoseconds = state.range(0); + + for (auto _ : state) { + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = nanoseconds; + + int ret; + do { + ret = syscall(SYS_nanosleep, &ts, &ts); + if (ret < 0) { + TEST_CHECK(errno == EINTR); + } + } while (ret < 0); + } +} + +BENCHMARK(BM_Sleep) + ->Arg(0) + ->Arg(1) + ->Arg(1000) // 1us + ->Arg(1000 * 1000) // 1ms + ->Arg(10 * 1000 * 1000) // 10ms + ->Arg(50 * 1000 * 1000) // 50ms + ->UseRealTime(); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/stat_benchmark.cc b/test/perf/linux/stat_benchmark.cc new file mode 100644 index 000000000..f15424482 --- /dev/null +++ b/test/perf/linux/stat_benchmark.cc @@ -0,0 +1,62 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "benchmark/benchmark.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Creates a file in a nested directory hierarchy at least `depth` directories +// deep, and stats that file multiple times. +void BM_Stat(benchmark::State& state) { + // Create nested directories with given depth. + int depth = state.range(0); + const TempPath top_dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + std::string dir_path = top_dir.path(); + + while (depth-- > 0) { + // Don't use TempPath because it will make paths too long to use. + // + // The top_dir destructor will clean up this whole tree. + dir_path = JoinPath(dir_path, absl::StrCat(depth)); + ASSERT_NO_ERRNO(Mkdir(dir_path, 0755)); + } + + // Create the file that will be stat'd. + const TempPath file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir_path)); + + struct stat st; + for (auto _ : state) { + ASSERT_THAT(stat(file.path().c_str(), &st), SyscallSucceeds()); + } +} + +BENCHMARK(BM_Stat)->Range(1, 100)->UseRealTime(); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/unlink_benchmark.cc b/test/perf/linux/unlink_benchmark.cc new file mode 100644 index 000000000..92243a042 --- /dev/null +++ b/test/perf/linux/unlink_benchmark.cc @@ -0,0 +1,66 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "gtest/gtest.h" +#include "benchmark/benchmark.h" +#include "test/util/fs_util.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +// Creates a directory containing `files` files, and unlinks all the files. +void BM_Unlink(benchmark::State& state) { + // Create directory with given files. + const int file_count = state.range(0); + + // We unlink all files on each iteration, but report this as a "batch" + // iteration so that reported times are per file. + TempPath dir; + while (state.KeepRunningBatch(file_count)) { + state.PauseTiming(); + // N.B. dir is declared outside the loop so that destruction of the previous + // iteration's directory occurs here, inside of PauseTiming. + dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + std::vector files; + for (int i = 0; i < file_count; i++) { + TempPath file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path())); + files.push_back(std::move(file)); + } + state.ResumeTiming(); + + while (!files.empty()) { + // Destructor unlinks. + files.pop_back(); + } + } + + state.SetItemsProcessed(state.iterations()); +} + +BENCHMARK(BM_Unlink)->Range(1, 100 * 1000)->UseRealTime(); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/perf/linux/write_benchmark.cc b/test/perf/linux/write_benchmark.cc new file mode 100644 index 000000000..7b060c70e --- /dev/null +++ b/test/perf/linux/write_benchmark.cc @@ -0,0 +1,52 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "benchmark/benchmark.h" +#include "test/util/logging.h" +#include "test/util/temp_path.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +void BM_Write(benchmark::State& state) { + auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_WRONLY)); + + const int size = state.range(0); + std::vector buf(size); + RandomizeBuffer(buf.data(), size); + + for (auto _ : state) { + TEST_CHECK(PwriteFd(fd.get(), buf.data(), size, 0) == size); + } + + state.SetBytesProcessed(static_cast(size) * + static_cast(state.iterations())); +} + +BENCHMARK(BM_Write)->Range(1, 1 << 26)->UseRealTime(); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/runner/BUILD b/test/runner/BUILD new file mode 100644 index 000000000..9959ef9b0 --- /dev/null +++ b/test/runner/BUILD @@ -0,0 +1,22 @@ +load("//tools:defs.bzl", "go_binary") + +package(licenses = ["notice"]) + +go_binary( + name = "runner", + testonly = 1, + srcs = ["runner.go"], + data = [ + "//runsc", + ], + visibility = ["//:sandbox"], + deps = [ + "//pkg/log", + "//runsc/specutils", + "//runsc/testutil", + "//test/runner/gtest", + "//test/uds", + "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", + "@org_golang_x_sys//unix:go_default_library", + ], +) diff --git a/test/runner/defs.bzl b/test/runner/defs.bzl new file mode 100644 index 000000000..5e97c1867 --- /dev/null +++ b/test/runner/defs.bzl @@ -0,0 +1,218 @@ +"""Defines a rule for syscall test targets.""" + +load("//tools:defs.bzl", "loopback") + +def _runner_test_impl(ctx): + # Generate a runner binary. + runner = ctx.actions.declare_file("%s-runner" % ctx.label.name) + runner_content = "\n".join([ + "#!/bin/bash", + "set -euf -x -o pipefail", + "if [[ -n \"${TEST_UNDECLARED_OUTPUTS_DIR}\" ]]; then", + " mkdir -p \"${TEST_UNDECLARED_OUTPUTS_DIR}\"", + " chmod a+rwx \"${TEST_UNDECLARED_OUTPUTS_DIR}\"", + "fi", + "exec %s %s %s\n" % ( + ctx.files.runner[0].short_path, + " ".join(ctx.attr.runner_args), + ctx.files.test[0].short_path, + ), + ]) + ctx.actions.write(runner, runner_content, is_executable = True) + + # Return with all transitive files. + runfiles = ctx.runfiles( + transitive_files = depset(transitive = [ + depset(target.data_runfiles.files) + for target in (ctx.attr.runner, ctx.attr.test) + if hasattr(target, "data_runfiles") + ]), + files = ctx.files.runner + ctx.files.test, + collect_default = True, + collect_data = True, + ) + return [DefaultInfo(executable = runner, runfiles = runfiles)] + +_runner_test = rule( + attrs = { + "runner": attr.label( + default = "//test/runner:runner", + ), + "test": attr.label( + mandatory = True, + ), + "runner_args": attr.string_list(), + "data": attr.label_list( + allow_files = True, + ), + }, + test = True, + implementation = _runner_test_impl, +) + +def _syscall_test( + test, + shard_count, + size, + platform, + use_tmpfs, + tags, + network = "none", + file_access = "exclusive", + overlay = False, + add_uds_tree = False): + # Prepend "runsc" to non-native platform names. + full_platform = platform if platform == "native" else "runsc_" + platform + + # Name the test appropriately. + name = test.split(":")[1] + "_" + full_platform + if file_access == "shared": + name += "_shared" + if overlay: + name += "_overlay" + if network != "none": + name += "_" + network + "net" + + # Apply all tags. + if tags == None: + tags = [] + + # Add the full_platform and file access in a tag to make it easier to run + # all the tests on a specific flavor. Use --test_tag_filters=ptrace,file_shared. + tags += [full_platform, "file_" + file_access] + + # Hash this target into one of 15 buckets. This can be used to + # randomly split targets between different workflows. + hash15 = hash(native.package_name() + name) % 15 + tags.append("hash15:" + str(hash15)) + + # TODO(b/139838000): Tests using hostinet must be disabled on Guitar until + # we figure out how to request ipv4 sockets on Guitar machines. + if network == "host": + tags.append("noguitar") + + # Disable off-host networking. + tags.append("requires-net:loopback") + + # Add tag to prevent the tests from running in a Bazel sandbox. + # TODO(b/120560048): Make the tests run without this tag. + tags.append("no-sandbox") + + # TODO(b/112165693): KVM tests are tagged "manual" to until the platform is + # more stable. + if platform == "kvm": + tags.append("manual") + tags.append("requires-kvm") + + # TODO(b/112165693): Remove when tests pass reliably. + tags.append("notap") + + runner_args = [ + # Arguments are passed directly to runner binary. + "--platform=" + platform, + "--network=" + network, + "--use-tmpfs=" + str(use_tmpfs), + "--file-access=" + file_access, + "--overlay=" + str(overlay), + "--add-uds-tree=" + str(add_uds_tree), + ] + + # Call the rule above. + _runner_test( + name = name, + test = test, + runner_args = runner_args, + data = [loopback], + size = size, + tags = tags, + shard_count = shard_count, + ) + +def syscall_test( + test, + shard_count = 5, + size = "small", + use_tmpfs = False, + add_overlay = False, + add_uds_tree = False, + add_hostinet = False, + tags = None): + """syscall_test is a macro that will create targets for all platforms. + + Args: + test: the test target. + shard_count: shards for defined tests. + size: the defined test size. + use_tmpfs: use tmpfs in the defined tests. + add_overlay: add an overlay test. + add_uds_tree: add a UDS test. + add_hostinet: add a hostinet test. + tags: starting test tags. + """ + + _syscall_test( + test = test, + shard_count = shard_count, + size = size, + platform = "native", + use_tmpfs = False, + add_uds_tree = add_uds_tree, + tags = tags, + ) + + _syscall_test( + test = test, + shard_count = shard_count, + size = size, + platform = "kvm", + use_tmpfs = use_tmpfs, + add_uds_tree = add_uds_tree, + tags = tags, + ) + + _syscall_test( + test = test, + shard_count = shard_count, + size = size, + platform = "ptrace", + use_tmpfs = use_tmpfs, + add_uds_tree = add_uds_tree, + tags = tags, + ) + + if add_overlay: + _syscall_test( + test = test, + shard_count = shard_count, + size = size, + platform = "ptrace", + use_tmpfs = False, # overlay is adding a writable tmpfs on top of root. + add_uds_tree = add_uds_tree, + tags = tags, + overlay = True, + ) + + if not use_tmpfs: + # Also test shared gofer access. + _syscall_test( + test = test, + shard_count = shard_count, + size = size, + platform = "ptrace", + use_tmpfs = use_tmpfs, + add_uds_tree = add_uds_tree, + tags = tags, + file_access = "shared", + ) + + if add_hostinet: + _syscall_test( + test = test, + shard_count = shard_count, + size = size, + platform = "ptrace", + use_tmpfs = use_tmpfs, + network = "host", + add_uds_tree = add_uds_tree, + tags = tags, + ) diff --git a/test/runner/gtest/BUILD b/test/runner/gtest/BUILD new file mode 100644 index 000000000..de4b2727c --- /dev/null +++ b/test/runner/gtest/BUILD @@ -0,0 +1,9 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "gtest", + srcs = ["gtest.go"], + visibility = ["//:sandbox"], +) diff --git a/test/runner/gtest/gtest.go b/test/runner/gtest/gtest.go new file mode 100644 index 000000000..23bf7b5f6 --- /dev/null +++ b/test/runner/gtest/gtest.go @@ -0,0 +1,154 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package gtest contains helpers for running google-test tests from Go. +package gtest + +import ( + "fmt" + "os/exec" + "strings" +) + +var ( + // listTestFlag is the flag that will list tests in gtest binaries. + listTestFlag = "--gtest_list_tests" + + // filterTestFlag is the flag that will filter tests in gtest binaries. + filterTestFlag = "--gtest_filter" + + // listBechmarkFlag is the flag that will list benchmarks in gtest binaries. + listBenchmarkFlag = "--benchmark_list_tests" + + // filterBenchmarkFlag is the flag that will run specified benchmarks. + filterBenchmarkFlag = "--benchmark_filter" +) + +// TestCase is a single gtest test case. +type TestCase struct { + // Suite is the suite for this test. + Suite string + + // Name is the name of this individual test. + Name string + + // benchmark indicates that this is a benchmark. In this case, the + // suite will be empty, and we will use the appropriate test and + // benchmark flags. + benchmark bool +} + +// FullName returns the name of the test including the suite. It is suitable to +// pass to "-gtest_filter". +func (tc TestCase) FullName() string { + return fmt.Sprintf("%s.%s", tc.Suite, tc.Name) +} + +// Args returns arguments to be passed when invoking the test. +func (tc TestCase) Args() []string { + if tc.benchmark { + return []string{ + fmt.Sprintf("%s=^$", filterTestFlag), + fmt.Sprintf("%s=^%s$", filterBenchmarkFlag, tc.Name), + } + } + return []string{ + fmt.Sprintf("%s=^%s$", filterTestFlag, tc.FullName()), + fmt.Sprintf("%s=^$", filterBenchmarkFlag), + } +} + +// ParseTestCases calls a gtest test binary to list its test and returns a +// slice with the name and suite of each test. +// +// If benchmarks is true, then benchmarks will be included in the list of test +// cases provided. Note that this requires the binary to support the +// benchmarks_list_tests flag. +func ParseTestCases(testBin string, benchmarks bool, extraArgs ...string) ([]TestCase, error) { + // Run to extract test cases. + args := append([]string{listTestFlag}, extraArgs...) + cmd := exec.Command(testBin, args...) + out, err := cmd.Output() + if err != nil { + exitErr, ok := err.(*exec.ExitError) + if !ok { + return nil, fmt.Errorf("could not enumerate gtest tests: %v", err) + } + return nil, fmt.Errorf("could not enumerate gtest tests: %v\nstderr:\n%s", err, exitErr.Stderr) + } + + // Parse test output. + var t []TestCase + var suite string + for _, line := range strings.Split(string(out), "\n") { + // Strip comments. + line = strings.Split(line, "#")[0] + + // New suite? + if !strings.HasPrefix(line, " ") { + suite = strings.TrimSuffix(strings.TrimSpace(line), ".") + continue + } + + // Individual test. + name := strings.TrimSpace(line) + + // Do we have a suite yet? + if suite == "" { + return nil, fmt.Errorf("test without a suite: %v", name) + } + + // Add this individual test. + t = append(t, TestCase{ + Suite: suite, + Name: name, + }) + + } + + // Finished? + if !benchmarks { + return t, nil + } + + // Run again to extract benchmarks. + args = append([]string{listBenchmarkFlag}, extraArgs...) + cmd = exec.Command(testBin, args...) + out, err = cmd.Output() + if err != nil { + exitErr, ok := err.(*exec.ExitError) + if !ok { + return nil, fmt.Errorf("could not enumerate gtest benchmarks: %v", err) + } + return nil, fmt.Errorf("could not enumerate gtest benchmarks: %v\nstderr\n%s", err, exitErr.Stderr) + } + + // Parse benchmark output. + for _, line := range strings.Split(string(out), "\n") { + // Strip comments. + line = strings.Split(line, "#")[0] + + // Single benchmark. + name := strings.TrimSpace(line) + + // Add the single benchmark. + t = append(t, TestCase{ + Suite: "Benchmarks", + Name: name, + benchmark: true, + }) + } + + return t, nil +} diff --git a/test/runner/runner.go b/test/runner/runner.go new file mode 100644 index 000000000..a78ef38e0 --- /dev/null +++ b/test/runner/runner.go @@ -0,0 +1,477 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Binary syscall_test_runner runs the syscall test suites in gVisor +// containers and on the host platform. +package main + +import ( + "flag" + "fmt" + "io/ioutil" + "os" + "os/exec" + "os/signal" + "path/filepath" + "strings" + "syscall" + "testing" + "time" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/testutil" + "gvisor.dev/gvisor/test/runner/gtest" + "gvisor.dev/gvisor/test/uds" +) + +var ( + debug = flag.Bool("debug", false, "enable debug logs") + strace = flag.Bool("strace", false, "enable strace logs") + platform = flag.String("platform", "ptrace", "platform to run on") + network = flag.String("network", "none", "network stack to run on (sandbox, host, none)") + useTmpfs = flag.Bool("use-tmpfs", false, "mounts tmpfs for /tmp") + fileAccess = flag.String("file-access", "exclusive", "mounts root in exclusive or shared mode") + overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable tmpfs overlay") + parallel = flag.Bool("parallel", false, "run tests in parallel") + runscPath = flag.String("runsc", "", "path to runsc binary") + + addUDSTree = flag.Bool("add-uds-tree", false, "expose a tree of UDS utilities for use in tests") +) + +// runTestCaseNative runs the test case directly on the host machine. +func runTestCaseNative(testBin string, tc gtest.TestCase, t *testing.T) { + // These tests might be running in parallel, so make sure they have a + // unique test temp dir. + tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "") + if err != nil { + t.Fatalf("could not create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + // Replace TEST_TMPDIR in the current environment with something + // unique. + env := os.Environ() + newEnvVar := "TEST_TMPDIR=" + tmpDir + var found bool + for i, kv := range env { + if strings.HasPrefix(kv, "TEST_TMPDIR=") { + env[i] = newEnvVar + found = true + break + } + } + if !found { + env = append(env, newEnvVar) + } + // Remove env variables that cause the gunit binary to write output + // files, since they will stomp on eachother, and on the output files + // from this go test. + env = filterEnv(env, []string{"GUNIT_OUTPUT", "TEST_PREMATURE_EXIT_FILE", "XML_OUTPUT_FILE"}) + + // Remove shard env variables so that the gunit binary does not try to + // intepret them. + env = filterEnv(env, []string{"TEST_SHARD_INDEX", "TEST_TOTAL_SHARDS", "GTEST_SHARD_INDEX", "GTEST_TOTAL_SHARDS"}) + + if *addUDSTree { + socketDir, cleanup, err := uds.CreateSocketTree("/tmp") + if err != nil { + t.Fatalf("failed to create socket tree: %v", err) + } + defer cleanup() + + env = append(env, "TEST_UDS_TREE="+socketDir) + // On Linux, the concept of "attach" location doesn't exist. + // Just pass the same path to make these test identical. + env = append(env, "TEST_UDS_ATTACH_TREE="+socketDir) + } + + cmd := exec.Command(testBin, tc.Args()...) + cmd.Env = env + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + ws := err.(*exec.ExitError).Sys().(syscall.WaitStatus) + t.Errorf("test %q exited with status %d, want 0", tc.FullName(), ws.ExitStatus()) + } +} + +// runRunsc runs spec in runsc in a standard test configuration. +// +// runsc logs will be saved to a path in TEST_UNDECLARED_OUTPUTS_DIR. +// +// Returns an error if the sandboxed application exits non-zero. +func runRunsc(tc gtest.TestCase, spec *specs.Spec) error { + bundleDir, err := testutil.SetupBundleDir(spec) + if err != nil { + return fmt.Errorf("SetupBundleDir failed: %v", err) + } + defer os.RemoveAll(bundleDir) + + rootDir, err := testutil.SetupRootDir() + if err != nil { + return fmt.Errorf("SetupRootDir failed: %v", err) + } + defer os.RemoveAll(rootDir) + + name := tc.FullName() + id := testutil.UniqueContainerID() + log.Infof("Running test %q in container %q", name, id) + specutils.LogSpec(spec) + + args := []string{ + "-root", rootDir, + "-network", *network, + "-log-format=text", + "-TESTONLY-unsafe-nonroot=true", + "-net-raw=true", + fmt.Sprintf("-panic-signal=%d", syscall.SIGTERM), + "-watchdog-action=panic", + "-platform", *platform, + "-file-access", *fileAccess, + } + if *overlay { + args = append(args, "-overlay") + } + if *debug { + args = append(args, "-debug", "-log-packets=true") + } + if *strace { + args = append(args, "-strace") + } + if *addUDSTree { + args = append(args, "-fsgofer-host-uds") + } + + if outDir, ok := syscall.Getenv("TEST_UNDECLARED_OUTPUTS_DIR"); ok { + tdir := filepath.Join(outDir, strings.Replace(name, "/", "_", -1)) + if err := os.MkdirAll(tdir, 0755); err != nil { + return fmt.Errorf("could not create test dir: %v", err) + } + debugLogDir, err := ioutil.TempDir(tdir, "runsc") + if err != nil { + return fmt.Errorf("could not create temp dir: %v", err) + } + debugLogDir += "/" + log.Infof("runsc logs: %s", debugLogDir) + args = append(args, "-debug-log", debugLogDir) + + // Default -log sends messages to stderr which makes reading the test log + // difficult. Instead, drop them when debug log is enabled given it's a + // better place for these messages. + args = append(args, "-log=/dev/null") + } + + // Current process doesn't have CAP_SYS_ADMIN, create user namespace and run + // as root inside that namespace to get it. + rArgs := append(args, "run", "--bundle", bundleDir, id) + cmd := exec.Command(*runscPath, rArgs...) + cmd.SysProcAttr = &syscall.SysProcAttr{ + Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS, + // Set current user/group as root inside the namespace. + UidMappings: []syscall.SysProcIDMap{ + {ContainerID: 0, HostID: os.Getuid(), Size: 1}, + }, + GidMappings: []syscall.SysProcIDMap{ + {ContainerID: 0, HostID: os.Getgid(), Size: 1}, + }, + GidMappingsEnableSetgroups: false, + Credential: &syscall.Credential{ + Uid: 0, + Gid: 0, + }, + } + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + sig := make(chan os.Signal, 1) + signal.Notify(sig, syscall.SIGTERM) + go func() { + s, ok := <-sig + if !ok { + return + } + log.Warningf("%s: Got signal: %v", name, s) + done := make(chan bool) + dArgs := append([]string{}, args...) + dArgs = append(dArgs, "-alsologtostderr=true", "debug", "--stacks", id) + go func(dArgs []string) { + cmd := exec.Command(*runscPath, dArgs...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.Run() + done <- true + }(dArgs) + + timeout := time.After(3 * time.Second) + select { + case <-timeout: + log.Infof("runsc debug --stacks is timeouted") + case <-done: + } + + log.Warningf("Send SIGTERM to the sandbox process") + dArgs = append(args, "debug", + fmt.Sprintf("--signal=%d", syscall.SIGTERM), + id) + cmd := exec.Command(*runscPath, dArgs...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.Run() + }() + + err = cmd.Run() + + signal.Stop(sig) + close(sig) + + return err +} + +// setupUDSTree updates the spec to expose a UDS tree for gofer socket testing. +func setupUDSTree(spec *specs.Spec) (cleanup func(), err error) { + socketDir, cleanup, err := uds.CreateSocketTree("/tmp") + if err != nil { + return nil, fmt.Errorf("failed to create socket tree: %v", err) + } + + // Standard access to entire tree. + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: "/tmp/sockets", + Source: socketDir, + Type: "bind", + }) + + // Individial attach points for each socket to test mounts that attach + // directly to the sockets. + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: "/tmp/sockets-attach/stream/echo", + Source: filepath.Join(socketDir, "stream/echo"), + Type: "bind", + }) + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: "/tmp/sockets-attach/stream/nonlistening", + Source: filepath.Join(socketDir, "stream/nonlistening"), + Type: "bind", + }) + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: "/tmp/sockets-attach/seqpacket/echo", + Source: filepath.Join(socketDir, "seqpacket/echo"), + Type: "bind", + }) + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: "/tmp/sockets-attach/seqpacket/nonlistening", + Source: filepath.Join(socketDir, "seqpacket/nonlistening"), + Type: "bind", + }) + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: "/tmp/sockets-attach/dgram/null", + Source: filepath.Join(socketDir, "dgram/null"), + Type: "bind", + }) + + spec.Process.Env = append(spec.Process.Env, "TEST_UDS_TREE=/tmp/sockets") + spec.Process.Env = append(spec.Process.Env, "TEST_UDS_ATTACH_TREE=/tmp/sockets-attach") + + return cleanup, nil +} + +// runsTestCaseRunsc runs the test case in runsc. +func runTestCaseRunsc(testBin string, tc gtest.TestCase, t *testing.T) { + // Run a new container with the test executable and filter for the + // given test suite and name. + spec := testutil.NewSpecWithArgs(append([]string{testBin}, tc.Args()...)...) + + // Mark the root as writeable, as some tests attempt to + // write to the rootfs, and expect EACCES, not EROFS. + spec.Root.Readonly = false + + // Test spec comes with pre-defined mounts that we don't want. Reset it. + spec.Mounts = nil + if *useTmpfs { + // Forces '/tmp' to be mounted as tmpfs, otherwise test that rely on + // features only available in gVisor's internal tmpfs may fail. + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: "/tmp", + Type: "tmpfs", + }) + } else { + // Use a gofer-backed directory as '/tmp'. + // + // Tests might be running in parallel, so make sure each has a + // unique test temp dir. + // + // Some tests (e.g., sticky) access this mount from other + // users, so make sure it is world-accessible. + tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "") + if err != nil { + t.Fatalf("could not create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + if err := os.Chmod(tmpDir, 0777); err != nil { + t.Fatalf("could not chmod temp dir: %v", err) + } + + spec.Mounts = append(spec.Mounts, specs.Mount{ + Type: "bind", + Destination: "/tmp", + Source: tmpDir, + }) + } + + // Set environment variables that indicate we are + // running in gVisor with the given platform and network. + platformVar := "TEST_ON_GVISOR" + networkVar := "GVISOR_NETWORK" + env := append(os.Environ(), platformVar+"="+*platform, networkVar+"="+*network) + + // Remove env variables that cause the gunit binary to write output + // files, since they will stomp on eachother, and on the output files + // from this go test. + env = filterEnv(env, []string{"GUNIT_OUTPUT", "TEST_PREMATURE_EXIT_FILE", "XML_OUTPUT_FILE"}) + + // Remove shard env variables so that the gunit binary does not try to + // intepret them. + env = filterEnv(env, []string{"TEST_SHARD_INDEX", "TEST_TOTAL_SHARDS", "GTEST_SHARD_INDEX", "GTEST_TOTAL_SHARDS"}) + + // Set TEST_TMPDIR to /tmp, as some of the syscall tests require it to + // be backed by tmpfs. + for i, kv := range env { + if strings.HasPrefix(kv, "TEST_TMPDIR=") { + env[i] = "TEST_TMPDIR=/tmp" + break + } + } + + spec.Process.Env = env + + if *addUDSTree { + cleanup, err := setupUDSTree(spec) + if err != nil { + t.Fatalf("error creating UDS tree: %v", err) + } + defer cleanup() + } + + if err := runRunsc(tc, spec); err != nil { + t.Errorf("test %q failed with error %v, want nil", tc.FullName(), err) + } +} + +// filterEnv returns an environment with the blacklisted variables removed. +func filterEnv(env, blacklist []string) []string { + var out []string + for _, kv := range env { + ok := true + for _, k := range blacklist { + if strings.HasPrefix(kv, k+"=") { + ok = false + break + } + } + if ok { + out = append(out, kv) + } + } + return out +} + +func fatalf(s string, args ...interface{}) { + fmt.Fprintf(os.Stderr, s+"\n", args...) + os.Exit(1) +} + +func matchString(a, b string) (bool, error) { + return a == b, nil +} + +func main() { + flag.Parse() + if flag.NArg() != 1 { + fatalf("test must be provided") + } + testBin := flag.Args()[0] // Only argument. + + log.SetLevel(log.Info) + if *debug { + log.SetLevel(log.Debug) + } + + if *platform != "native" && *runscPath == "" { + if err := testutil.ConfigureExePath(); err != nil { + panic(err.Error()) + } + *runscPath = specutils.ExePath + } + + // Make sure stdout and stderr are opened with O_APPEND, otherwise logs + // from outside the sandbox can (and will) stomp on logs from inside + // the sandbox. + for _, f := range []*os.File{os.Stdout, os.Stderr} { + flags, err := unix.FcntlInt(f.Fd(), unix.F_GETFL, 0) + if err != nil { + fatalf("error getting file flags for %v: %v", f, err) + } + if flags&unix.O_APPEND == 0 { + flags |= unix.O_APPEND + if _, err := unix.FcntlInt(f.Fd(), unix.F_SETFL, flags); err != nil { + fatalf("error setting file flags for %v: %v", f, err) + } + } + } + + // Get all test cases in each binary. + testCases, err := gtest.ParseTestCases(testBin, true) + if err != nil { + fatalf("ParseTestCases(%q) failed: %v", testBin, err) + } + + // Get subset of tests corresponding to shard. + indices, err := testutil.TestIndicesForShard(len(testCases)) + if err != nil { + fatalf("TestsForShard() failed: %v", err) + } + + // Resolve the absolute path for the binary. + testBin, err = filepath.Abs(testBin) + if err != nil { + fatalf("Abs() failed: %v", err) + } + + // Run the tests. + var tests []testing.InternalTest + for _, tci := range indices { + // Capture tc. + tc := testCases[tci] + tests = append(tests, testing.InternalTest{ + Name: fmt.Sprintf("%s_%s", tc.Suite, tc.Name), + F: func(t *testing.T) { + if *parallel { + t.Parallel() + } + if *platform == "native" { + // Run the test case on host. + runTestCaseNative(testBin, tc, t) + } else { + // Run the test case in runsc. + runTestCaseRunsc(testBin, tc, t) + } + }, + }) + } + + testing.Main(matchString, tests, nil, nil) +} diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 31d239c0e..d69ac8356 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -1,5 +1,4 @@ -load("//tools:defs.bzl", "go_binary") -load("//test/syscalls:build_defs.bzl", "syscall_test") +load("//test/runner:defs.bzl", "syscall_test") package(licenses = ["notice"]) @@ -726,21 +725,3 @@ syscall_test(test = "//test/syscalls/linux:proc_net_unix_test") syscall_test(test = "//test/syscalls/linux:proc_net_tcp_test") syscall_test(test = "//test/syscalls/linux:proc_net_udp_test") - -go_binary( - name = "syscall_test_runner", - testonly = 1, - srcs = ["syscall_test_runner.go"], - data = [ - "//runsc", - ], - deps = [ - "//pkg/log", - "//runsc/specutils", - "//runsc/testutil", - "//test/syscalls/gtest", - "//test/uds", - "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/test/syscalls/build_defs.bzl b/test/syscalls/build_defs.bzl deleted file mode 100644 index cbab85ef7..000000000 --- a/test/syscalls/build_defs.bzl +++ /dev/null @@ -1,180 +0,0 @@ -"""Defines a rule for syscall test targets.""" - -load("//tools:defs.bzl", "loopback") - -def syscall_test( - test, - shard_count = 5, - size = "small", - use_tmpfs = False, - add_overlay = False, - add_uds_tree = False, - add_hostinet = False, - tags = None): - """syscall_test is a macro that will create targets for all platforms. - - Args: - test: the test target. - shard_count: shards for defined tests. - size: the defined test size. - use_tmpfs: use tmpfs in the defined tests. - add_overlay: add an overlay test. - add_uds_tree: add a UDS test. - add_hostinet: add a hostinet test. - tags: starting test tags. - """ - - _syscall_test( - test = test, - shard_count = shard_count, - size = size, - platform = "native", - use_tmpfs = False, - add_uds_tree = add_uds_tree, - tags = tags, - ) - - _syscall_test( - test = test, - shard_count = shard_count, - size = size, - platform = "kvm", - use_tmpfs = use_tmpfs, - add_uds_tree = add_uds_tree, - tags = tags, - ) - - _syscall_test( - test = test, - shard_count = shard_count, - size = size, - platform = "ptrace", - use_tmpfs = use_tmpfs, - add_uds_tree = add_uds_tree, - tags = tags, - ) - - if add_overlay: - _syscall_test( - test = test, - shard_count = shard_count, - size = size, - platform = "ptrace", - use_tmpfs = False, # overlay is adding a writable tmpfs on top of root. - add_uds_tree = add_uds_tree, - tags = tags, - overlay = True, - ) - - if not use_tmpfs: - # Also test shared gofer access. - _syscall_test( - test = test, - shard_count = shard_count, - size = size, - platform = "ptrace", - use_tmpfs = use_tmpfs, - add_uds_tree = add_uds_tree, - tags = tags, - file_access = "shared", - ) - - if add_hostinet: - _syscall_test( - test = test, - shard_count = shard_count, - size = size, - platform = "ptrace", - use_tmpfs = use_tmpfs, - network = "host", - add_uds_tree = add_uds_tree, - tags = tags, - ) - -def _syscall_test( - test, - shard_count, - size, - platform, - use_tmpfs, - tags, - network = "none", - file_access = "exclusive", - overlay = False, - add_uds_tree = False): - test_name = test.split(":")[1] - - # Prepend "runsc" to non-native platform names. - full_platform = platform if platform == "native" else "runsc_" + platform - - name = test_name + "_" + full_platform - if file_access == "shared": - name += "_shared" - if overlay: - name += "_overlay" - if network != "none": - name += "_" + network + "net" - - if tags == None: - tags = [] - - # Add the full_platform and file access in a tag to make it easier to run - # all the tests on a specific flavor. Use --test_tag_filters=ptrace,file_shared. - tags += [full_platform, "file_" + file_access] - - # Hash this target into one of 15 buckets. This can be used to - # randomly split targets between different workflows. - hash15 = hash(native.package_name() + name) % 15 - tags.append("hash15:" + str(hash15)) - - # TODO(b/139838000): Tests using hostinet must be disabled on Guitar until - # we figure out how to request ipv4 sockets on Guitar machines. - if network == "host": - tags.append("noguitar") - - # Disable off-host networking. - tags.append("requires-net:loopback") - - # Add tag to prevent the tests from running in a Bazel sandbox. - # TODO(b/120560048): Make the tests run without this tag. - tags.append("no-sandbox") - - # TODO(b/112165693): KVM tests are tagged "manual" to until the platform is - # more stable. - if platform == "kvm": - tags.append("manual") - tags.append("requires-kvm") - - # TODO(b/112165693): Remove when tests pass reliably. - tags.append("notap") - - args = [ - # Arguments are passed directly to syscall_test_runner binary. - "--test-name=" + test_name, - "--platform=" + platform, - "--network=" + network, - "--use-tmpfs=" + str(use_tmpfs), - "--file-access=" + file_access, - "--overlay=" + str(overlay), - "--add-uds-tree=" + str(add_uds_tree), - ] - - sh_test( - srcs = ["syscall_test_runner.sh"], - name = name, - data = [ - ":syscall_test_runner", - loopback, - test, - ], - args = args, - size = size, - tags = tags, - shard_count = shard_count, - ) - -def sh_test(**kwargs): - """Wraps the standard sh_test.""" - native.sh_test( - **kwargs - ) diff --git a/test/syscalls/gtest/BUILD b/test/syscalls/gtest/BUILD deleted file mode 100644 index de4b2727c..000000000 --- a/test/syscalls/gtest/BUILD +++ /dev/null @@ -1,9 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "gtest", - srcs = ["gtest.go"], - visibility = ["//:sandbox"], -) diff --git a/test/syscalls/gtest/gtest.go b/test/syscalls/gtest/gtest.go deleted file mode 100644 index bdec8eb07..000000000 --- a/test/syscalls/gtest/gtest.go +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package gtest contains helpers for running google-test tests from Go. -package gtest - -import ( - "fmt" - "os/exec" - "strings" -) - -var ( - // ListTestFlag is the flag that will list tests in gtest binaries. - ListTestFlag = "--gtest_list_tests" - - // FilterTestFlag is the flag that will filter tests in gtest binaries. - FilterTestFlag = "--gtest_filter" -) - -// TestCase is a single gtest test case. -type TestCase struct { - // Suite is the suite for this test. - Suite string - - // Name is the name of this individual test. - Name string -} - -// FullName returns the name of the test including the suite. It is suitable to -// pass to "-gtest_filter". -func (tc TestCase) FullName() string { - return fmt.Sprintf("%s.%s", tc.Suite, tc.Name) -} - -// ParseTestCases calls a gtest test binary to list its test and returns a -// slice with the name and suite of each test. -func ParseTestCases(testBin string, extraArgs ...string) ([]TestCase, error) { - args := append([]string{ListTestFlag}, extraArgs...) - cmd := exec.Command(testBin, args...) - out, err := cmd.Output() - if err != nil { - exitErr, ok := err.(*exec.ExitError) - if !ok { - return nil, fmt.Errorf("could not enumerate gtest tests: %v", err) - } - return nil, fmt.Errorf("could not enumerate gtest tests: %v\nstderr:\n%s", err, exitErr.Stderr) - } - - var t []TestCase - var suite string - for _, line := range strings.Split(string(out), "\n") { - // Strip comments. - line = strings.Split(line, "#")[0] - - // New suite? - if !strings.HasPrefix(line, " ") { - suite = strings.TrimSuffix(strings.TrimSpace(line), ".") - continue - } - - // Individual test. - name := strings.TrimSpace(line) - - // Do we have a suite yet? - if suite == "" { - return nil, fmt.Errorf("test without a suite: %v", name) - } - - // Add this individual test. - t = append(t, TestCase{ - Suite: suite, - Name: name, - }) - - } - - if len(t) == 0 { - return nil, fmt.Errorf("no tests parsed from %v", testBin) - } - return t, nil -} diff --git a/test/syscalls/linux/alarm.cc b/test/syscalls/linux/alarm.cc index d89269985..940c97285 100644 --- a/test/syscalls/linux/alarm.cc +++ b/test/syscalls/linux/alarm.cc @@ -188,6 +188,5 @@ int main(int argc, char** argv) { TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0); gvisor::testing::TestInit(&argc, &argv); - - return RUN_ALL_TESTS(); + return gvisor::testing::RunAllTests(); } diff --git a/test/syscalls/linux/exec.cc b/test/syscalls/linux/exec.cc index b5e0a512b..07bd527e6 100644 --- a/test/syscalls/linux/exec.cc +++ b/test/syscalls/linux/exec.cc @@ -868,6 +868,5 @@ int main(int argc, char** argv) { } gvisor::testing::TestInit(&argc, &argv); - - return RUN_ALL_TESTS(); + return gvisor::testing::RunAllTests(); } diff --git a/test/syscalls/linux/fcntl.cc b/test/syscalls/linux/fcntl.cc index 421c15b87..c7cc5816e 100644 --- a/test/syscalls/linux/fcntl.cc +++ b/test/syscalls/linux/fcntl.cc @@ -1128,5 +1128,5 @@ int main(int argc, char** argv) { exit(err); } - return RUN_ALL_TESTS(); + return gvisor::testing::RunAllTests(); } diff --git a/test/syscalls/linux/itimer.cc b/test/syscalls/linux/itimer.cc index b77e4cbd1..8b48f0804 100644 --- a/test/syscalls/linux/itimer.cc +++ b/test/syscalls/linux/itimer.cc @@ -349,6 +349,5 @@ int main(int argc, char** argv) { } gvisor::testing::TestInit(&argc, &argv); - - return RUN_ALL_TESTS(); + return gvisor::testing::RunAllTests(); } diff --git a/test/syscalls/linux/prctl.cc b/test/syscalls/linux/prctl.cc index d07571a5f..04c5161f5 100644 --- a/test/syscalls/linux/prctl.cc +++ b/test/syscalls/linux/prctl.cc @@ -226,5 +226,5 @@ int main(int argc, char** argv) { prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0)); } - return RUN_ALL_TESTS(); + return gvisor::testing::RunAllTests(); } diff --git a/test/syscalls/linux/prctl_setuid.cc b/test/syscalls/linux/prctl_setuid.cc index 30f0d75b3..c4e9cf528 100644 --- a/test/syscalls/linux/prctl_setuid.cc +++ b/test/syscalls/linux/prctl_setuid.cc @@ -264,5 +264,5 @@ int main(int argc, char** argv) { prctl(PR_GET_KEEPCAPS, 0, 0, 0, 0); } - return RUN_ALL_TESTS(); + return gvisor::testing::RunAllTests(); } diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index a23fdb58d..f91187e75 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -2076,5 +2076,5 @@ int main(int argc, char** argv) { } gvisor::testing::TestInit(&argc, &argv); - return RUN_ALL_TESTS(); + return gvisor::testing::RunAllTests(); } diff --git a/test/syscalls/linux/ptrace.cc b/test/syscalls/linux/ptrace.cc index 4dd5cf27b..bfe3e2603 100644 --- a/test/syscalls/linux/ptrace.cc +++ b/test/syscalls/linux/ptrace.cc @@ -1208,5 +1208,5 @@ int main(int argc, char** argv) { gvisor::testing::RunExecveChild(); } - return RUN_ALL_TESTS(); + return gvisor::testing::RunAllTests(); } diff --git a/test/syscalls/linux/rtsignal.cc b/test/syscalls/linux/rtsignal.cc index 81d193ffd..ed27e2566 100644 --- a/test/syscalls/linux/rtsignal.cc +++ b/test/syscalls/linux/rtsignal.cc @@ -167,6 +167,5 @@ int main(int argc, char** argv) { TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0); gvisor::testing::TestInit(&argc, &argv); - - return RUN_ALL_TESTS(); + return gvisor::testing::RunAllTests(); } diff --git a/test/syscalls/linux/seccomp.cc b/test/syscalls/linux/seccomp.cc index 2c947feb7..cf6499f8b 100644 --- a/test/syscalls/linux/seccomp.cc +++ b/test/syscalls/linux/seccomp.cc @@ -411,5 +411,5 @@ int main(int argc, char** argv) { } gvisor::testing::TestInit(&argc, &argv); - return RUN_ALL_TESTS(); + return gvisor::testing::RunAllTests(); } diff --git a/test/syscalls/linux/sigiret.cc b/test/syscalls/linux/sigiret.cc index 4deb1ae95..6227774a4 100644 --- a/test/syscalls/linux/sigiret.cc +++ b/test/syscalls/linux/sigiret.cc @@ -132,6 +132,5 @@ int main(int argc, char** argv) { TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0); gvisor::testing::TestInit(&argc, &argv); - - return RUN_ALL_TESTS(); + return gvisor::testing::RunAllTests(); } diff --git a/test/syscalls/linux/signalfd.cc b/test/syscalls/linux/signalfd.cc index 95be4b66c..389e5fca2 100644 --- a/test/syscalls/linux/signalfd.cc +++ b/test/syscalls/linux/signalfd.cc @@ -369,5 +369,5 @@ int main(int argc, char** argv) { gvisor::testing::TestInit(&argc, &argv); - return RUN_ALL_TESTS(); + return gvisor::testing::RunAllTests(); } diff --git a/test/syscalls/linux/sigstop.cc b/test/syscalls/linux/sigstop.cc index 7db57d968..b2fcedd62 100644 --- a/test/syscalls/linux/sigstop.cc +++ b/test/syscalls/linux/sigstop.cc @@ -147,5 +147,5 @@ int main(int argc, char** argv) { return 1; } - return RUN_ALL_TESTS(); + return gvisor::testing::RunAllTests(); } diff --git a/test/syscalls/linux/sigtimedwait.cc b/test/syscalls/linux/sigtimedwait.cc index 1e5bf5942..4f8afff15 100644 --- a/test/syscalls/linux/sigtimedwait.cc +++ b/test/syscalls/linux/sigtimedwait.cc @@ -319,6 +319,5 @@ int main(int argc, char** argv) { TEST_PCHECK(sigprocmask(SIG_BLOCK, &set, nullptr) == 0); gvisor::testing::TestInit(&argc, &argv); - - return RUN_ALL_TESTS(); + return gvisor::testing::RunAllTests(); } diff --git a/test/syscalls/linux/timers.cc b/test/syscalls/linux/timers.cc index 2f92c27da..4b3c44527 100644 --- a/test/syscalls/linux/timers.cc +++ b/test/syscalls/linux/timers.cc @@ -658,5 +658,5 @@ int main(int argc, char** argv) { } } - return RUN_ALL_TESTS(); + return gvisor::testing::RunAllTests(); } diff --git a/test/syscalls/linux/vfork.cc b/test/syscalls/linux/vfork.cc index 0aaba482d..19d05998e 100644 --- a/test/syscalls/linux/vfork.cc +++ b/test/syscalls/linux/vfork.cc @@ -191,5 +191,5 @@ int main(int argc, char** argv) { return gvisor::testing::RunChild(); } - return RUN_ALL_TESTS(); + return gvisor::testing::RunAllTests(); } diff --git a/test/syscalls/syscall_test_runner.go b/test/syscalls/syscall_test_runner.go deleted file mode 100644 index ae342b68c..000000000 --- a/test/syscalls/syscall_test_runner.go +++ /dev/null @@ -1,482 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Binary syscall_test_runner runs the syscall test suites in gVisor -// containers and on the host platform. -package main - -import ( - "flag" - "fmt" - "io/ioutil" - "os" - "os/exec" - "os/signal" - "path/filepath" - "strings" - "syscall" - "testing" - "time" - - specs "github.com/opencontainers/runtime-spec/specs-go" - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/runsc/specutils" - "gvisor.dev/gvisor/runsc/testutil" - "gvisor.dev/gvisor/test/syscalls/gtest" - "gvisor.dev/gvisor/test/uds" -) - -// Location of syscall tests, relative to the repo root. -const testDir = "test/syscalls/linux" - -var ( - testName = flag.String("test-name", "", "name of test binary to run") - debug = flag.Bool("debug", false, "enable debug logs") - strace = flag.Bool("strace", false, "enable strace logs") - platform = flag.String("platform", "ptrace", "platform to run on") - network = flag.String("network", "none", "network stack to run on (sandbox, host, none)") - useTmpfs = flag.Bool("use-tmpfs", false, "mounts tmpfs for /tmp") - fileAccess = flag.String("file-access", "exclusive", "mounts root in exclusive or shared mode") - overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable tmpfs overlay") - parallel = flag.Bool("parallel", false, "run tests in parallel") - runscPath = flag.String("runsc", "", "path to runsc binary") - - addUDSTree = flag.Bool("add-uds-tree", false, "expose a tree of UDS utilities for use in tests") -) - -// runTestCaseNative runs the test case directly on the host machine. -func runTestCaseNative(testBin string, tc gtest.TestCase, t *testing.T) { - // These tests might be running in parallel, so make sure they have a - // unique test temp dir. - tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "") - if err != nil { - t.Fatalf("could not create temp dir: %v", err) - } - defer os.RemoveAll(tmpDir) - - // Replace TEST_TMPDIR in the current environment with something - // unique. - env := os.Environ() - newEnvVar := "TEST_TMPDIR=" + tmpDir - var found bool - for i, kv := range env { - if strings.HasPrefix(kv, "TEST_TMPDIR=") { - env[i] = newEnvVar - found = true - break - } - } - if !found { - env = append(env, newEnvVar) - } - // Remove env variables that cause the gunit binary to write output - // files, since they will stomp on eachother, and on the output files - // from this go test. - env = filterEnv(env, []string{"GUNIT_OUTPUT", "TEST_PREMATURE_EXIT_FILE", "XML_OUTPUT_FILE"}) - - // Remove shard env variables so that the gunit binary does not try to - // intepret them. - env = filterEnv(env, []string{"TEST_SHARD_INDEX", "TEST_TOTAL_SHARDS", "GTEST_SHARD_INDEX", "GTEST_TOTAL_SHARDS"}) - - if *addUDSTree { - socketDir, cleanup, err := uds.CreateSocketTree("/tmp") - if err != nil { - t.Fatalf("failed to create socket tree: %v", err) - } - defer cleanup() - - env = append(env, "TEST_UDS_TREE="+socketDir) - // On Linux, the concept of "attach" location doesn't exist. - // Just pass the same path to make these test identical. - env = append(env, "TEST_UDS_ATTACH_TREE="+socketDir) - } - - cmd := exec.Command(testBin, gtest.FilterTestFlag+"="+tc.FullName()) - cmd.Env = env - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - if err := cmd.Run(); err != nil { - ws := err.(*exec.ExitError).Sys().(syscall.WaitStatus) - t.Errorf("test %q exited with status %d, want 0", tc.FullName(), ws.ExitStatus()) - } -} - -// runRunsc runs spec in runsc in a standard test configuration. -// -// runsc logs will be saved to a path in TEST_UNDECLARED_OUTPUTS_DIR. -// -// Returns an error if the sandboxed application exits non-zero. -func runRunsc(tc gtest.TestCase, spec *specs.Spec) error { - bundleDir, err := testutil.SetupBundleDir(spec) - if err != nil { - return fmt.Errorf("SetupBundleDir failed: %v", err) - } - defer os.RemoveAll(bundleDir) - - rootDir, err := testutil.SetupRootDir() - if err != nil { - return fmt.Errorf("SetupRootDir failed: %v", err) - } - defer os.RemoveAll(rootDir) - - name := tc.FullName() - id := testutil.UniqueContainerID() - log.Infof("Running test %q in container %q", name, id) - specutils.LogSpec(spec) - - args := []string{ - "-root", rootDir, - "-network", *network, - "-log-format=text", - "-TESTONLY-unsafe-nonroot=true", - "-net-raw=true", - fmt.Sprintf("-panic-signal=%d", syscall.SIGTERM), - "-watchdog-action=panic", - "-platform", *platform, - "-file-access", *fileAccess, - } - if *overlay { - args = append(args, "-overlay") - } - if *debug { - args = append(args, "-debug", "-log-packets=true") - } - if *strace { - args = append(args, "-strace") - } - if *addUDSTree { - args = append(args, "-fsgofer-host-uds") - } - - if outDir, ok := syscall.Getenv("TEST_UNDECLARED_OUTPUTS_DIR"); ok { - tdir := filepath.Join(outDir, strings.Replace(name, "/", "_", -1)) - if err := os.MkdirAll(tdir, 0755); err != nil { - return fmt.Errorf("could not create test dir: %v", err) - } - debugLogDir, err := ioutil.TempDir(tdir, "runsc") - if err != nil { - return fmt.Errorf("could not create temp dir: %v", err) - } - debugLogDir += "/" - log.Infof("runsc logs: %s", debugLogDir) - args = append(args, "-debug-log", debugLogDir) - - // Default -log sends messages to stderr which makes reading the test log - // difficult. Instead, drop them when debug log is enabled given it's a - // better place for these messages. - args = append(args, "-log=/dev/null") - } - - // Current process doesn't have CAP_SYS_ADMIN, create user namespace and run - // as root inside that namespace to get it. - rArgs := append(args, "run", "--bundle", bundleDir, id) - cmd := exec.Command(*runscPath, rArgs...) - cmd.SysProcAttr = &syscall.SysProcAttr{ - Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS, - // Set current user/group as root inside the namespace. - UidMappings: []syscall.SysProcIDMap{ - {ContainerID: 0, HostID: os.Getuid(), Size: 1}, - }, - GidMappings: []syscall.SysProcIDMap{ - {ContainerID: 0, HostID: os.Getgid(), Size: 1}, - }, - GidMappingsEnableSetgroups: false, - Credential: &syscall.Credential{ - Uid: 0, - Gid: 0, - }, - } - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - sig := make(chan os.Signal, 1) - signal.Notify(sig, syscall.SIGTERM) - go func() { - s, ok := <-sig - if !ok { - return - } - log.Warningf("%s: Got signal: %v", name, s) - done := make(chan bool) - dArgs := append([]string{}, args...) - dArgs = append(dArgs, "-alsologtostderr=true", "debug", "--stacks", id) - go func(dArgs []string) { - cmd := exec.Command(*runscPath, dArgs...) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - cmd.Run() - done <- true - }(dArgs) - - timeout := time.After(3 * time.Second) - select { - case <-timeout: - log.Infof("runsc debug --stacks is timeouted") - case <-done: - } - - log.Warningf("Send SIGTERM to the sandbox process") - dArgs = append(args, "debug", - fmt.Sprintf("--signal=%d", syscall.SIGTERM), - id) - cmd := exec.Command(*runscPath, dArgs...) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - cmd.Run() - }() - - err = cmd.Run() - - signal.Stop(sig) - close(sig) - - return err -} - -// setupUDSTree updates the spec to expose a UDS tree for gofer socket testing. -func setupUDSTree(spec *specs.Spec) (cleanup func(), err error) { - socketDir, cleanup, err := uds.CreateSocketTree("/tmp") - if err != nil { - return nil, fmt.Errorf("failed to create socket tree: %v", err) - } - - // Standard access to entire tree. - spec.Mounts = append(spec.Mounts, specs.Mount{ - Destination: "/tmp/sockets", - Source: socketDir, - Type: "bind", - }) - - // Individial attach points for each socket to test mounts that attach - // directly to the sockets. - spec.Mounts = append(spec.Mounts, specs.Mount{ - Destination: "/tmp/sockets-attach/stream/echo", - Source: filepath.Join(socketDir, "stream/echo"), - Type: "bind", - }) - spec.Mounts = append(spec.Mounts, specs.Mount{ - Destination: "/tmp/sockets-attach/stream/nonlistening", - Source: filepath.Join(socketDir, "stream/nonlistening"), - Type: "bind", - }) - spec.Mounts = append(spec.Mounts, specs.Mount{ - Destination: "/tmp/sockets-attach/seqpacket/echo", - Source: filepath.Join(socketDir, "seqpacket/echo"), - Type: "bind", - }) - spec.Mounts = append(spec.Mounts, specs.Mount{ - Destination: "/tmp/sockets-attach/seqpacket/nonlistening", - Source: filepath.Join(socketDir, "seqpacket/nonlistening"), - Type: "bind", - }) - spec.Mounts = append(spec.Mounts, specs.Mount{ - Destination: "/tmp/sockets-attach/dgram/null", - Source: filepath.Join(socketDir, "dgram/null"), - Type: "bind", - }) - - spec.Process.Env = append(spec.Process.Env, "TEST_UDS_TREE=/tmp/sockets") - spec.Process.Env = append(spec.Process.Env, "TEST_UDS_ATTACH_TREE=/tmp/sockets-attach") - - return cleanup, nil -} - -// runsTestCaseRunsc runs the test case in runsc. -func runTestCaseRunsc(testBin string, tc gtest.TestCase, t *testing.T) { - // Run a new container with the test executable and filter for the - // given test suite and name. - spec := testutil.NewSpecWithArgs(testBin, gtest.FilterTestFlag+"="+tc.FullName()) - - // Mark the root as writeable, as some tests attempt to - // write to the rootfs, and expect EACCES, not EROFS. - spec.Root.Readonly = false - - // Test spec comes with pre-defined mounts that we don't want. Reset it. - spec.Mounts = nil - if *useTmpfs { - // Forces '/tmp' to be mounted as tmpfs, otherwise test that rely on - // features only available in gVisor's internal tmpfs may fail. - spec.Mounts = append(spec.Mounts, specs.Mount{ - Destination: "/tmp", - Type: "tmpfs", - }) - } else { - // Use a gofer-backed directory as '/tmp'. - // - // Tests might be running in parallel, so make sure each has a - // unique test temp dir. - // - // Some tests (e.g., sticky) access this mount from other - // users, so make sure it is world-accessible. - tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "") - if err != nil { - t.Fatalf("could not create temp dir: %v", err) - } - defer os.RemoveAll(tmpDir) - - if err := os.Chmod(tmpDir, 0777); err != nil { - t.Fatalf("could not chmod temp dir: %v", err) - } - - spec.Mounts = append(spec.Mounts, specs.Mount{ - Type: "bind", - Destination: "/tmp", - Source: tmpDir, - }) - } - - // Set environment variables that indicate we are - // running in gVisor with the given platform and network. - platformVar := "TEST_ON_GVISOR" - networkVar := "GVISOR_NETWORK" - env := append(os.Environ(), platformVar+"="+*platform, networkVar+"="+*network) - - // Remove env variables that cause the gunit binary to write output - // files, since they will stomp on eachother, and on the output files - // from this go test. - env = filterEnv(env, []string{"GUNIT_OUTPUT", "TEST_PREMATURE_EXIT_FILE", "XML_OUTPUT_FILE"}) - - // Remove shard env variables so that the gunit binary does not try to - // intepret them. - env = filterEnv(env, []string{"TEST_SHARD_INDEX", "TEST_TOTAL_SHARDS", "GTEST_SHARD_INDEX", "GTEST_TOTAL_SHARDS"}) - - // Set TEST_TMPDIR to /tmp, as some of the syscall tests require it to - // be backed by tmpfs. - for i, kv := range env { - if strings.HasPrefix(kv, "TEST_TMPDIR=") { - env[i] = "TEST_TMPDIR=/tmp" - break - } - } - - spec.Process.Env = env - - if *addUDSTree { - cleanup, err := setupUDSTree(spec) - if err != nil { - t.Fatalf("error creating UDS tree: %v", err) - } - defer cleanup() - } - - if err := runRunsc(tc, spec); err != nil { - t.Errorf("test %q failed with error %v, want nil", tc.FullName(), err) - } -} - -// filterEnv returns an environment with the blacklisted variables removed. -func filterEnv(env, blacklist []string) []string { - var out []string - for _, kv := range env { - ok := true - for _, k := range blacklist { - if strings.HasPrefix(kv, k+"=") { - ok = false - break - } - } - if ok { - out = append(out, kv) - } - } - return out -} - -func fatalf(s string, args ...interface{}) { - fmt.Fprintf(os.Stderr, s+"\n", args...) - os.Exit(1) -} - -func matchString(a, b string) (bool, error) { - return a == b, nil -} - -func main() { - flag.Parse() - if *testName == "" { - fatalf("test-name flag must be provided") - } - - log.SetLevel(log.Info) - if *debug { - log.SetLevel(log.Debug) - } - - if *platform != "native" && *runscPath == "" { - if err := testutil.ConfigureExePath(); err != nil { - panic(err.Error()) - } - *runscPath = specutils.ExePath - } - - // Make sure stdout and stderr are opened with O_APPEND, otherwise logs - // from outside the sandbox can (and will) stomp on logs from inside - // the sandbox. - for _, f := range []*os.File{os.Stdout, os.Stderr} { - flags, err := unix.FcntlInt(f.Fd(), unix.F_GETFL, 0) - if err != nil { - fatalf("error getting file flags for %v: %v", f, err) - } - if flags&unix.O_APPEND == 0 { - flags |= unix.O_APPEND - if _, err := unix.FcntlInt(f.Fd(), unix.F_SETFL, flags); err != nil { - fatalf("error setting file flags for %v: %v", f, err) - } - } - } - - // Get path to test binary. - fullTestName := filepath.Join(testDir, *testName) - testBin, err := testutil.FindFile(fullTestName) - if err != nil { - fatalf("FindFile(%q) failed: %v", fullTestName, err) - } - - // Get all test cases in each binary. - testCases, err := gtest.ParseTestCases(testBin) - if err != nil { - fatalf("ParseTestCases(%q) failed: %v", testBin, err) - } - - // Get subset of tests corresponding to shard. - indices, err := testutil.TestIndicesForShard(len(testCases)) - if err != nil { - fatalf("TestsForShard() failed: %v", err) - } - - // Run the tests. - var tests []testing.InternalTest - for _, tci := range indices { - // Capture tc. - tc := testCases[tci] - testName := fmt.Sprintf("%s_%s", tc.Suite, tc.Name) - tests = append(tests, testing.InternalTest{ - Name: testName, - F: func(t *testing.T) { - if *parallel { - t.Parallel() - } - if *platform == "native" { - // Run the test case on host. - runTestCaseNative(testBin, tc, t) - } else { - // Run the test case in runsc. - runTestCaseRunsc(testBin, tc, t) - } - }, - }) - } - - testing.Main(matchString, tests, nil, nil) -} diff --git a/test/syscalls/syscall_test_runner.sh b/test/syscalls/syscall_test_runner.sh deleted file mode 100755 index 864bb2de4..000000000 --- a/test/syscalls/syscall_test_runner.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash - -# Copyright 2018 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# syscall_test_runner.sh is a simple wrapper around the go syscall test runner. -# It exists so that we can build the syscall test runner once, and use it for -# all syscall tests, rather than build it for each test run. - -set -euf -x -o pipefail - -echo -- "$@" - -if [[ -n "${TEST_UNDECLARED_OUTPUTS_DIR}" ]]; then - mkdir -p "${TEST_UNDECLARED_OUTPUTS_DIR}" - chmod a+rwx "${TEST_UNDECLARED_OUTPUTS_DIR}" -fi - -# Get location of syscall_test_runner binary. -readonly runner=$(find "${TEST_SRCDIR}" -name syscall_test_runner) - -# Pass the arguments of this script directly to the runner. -exec "${runner}" "$@" diff --git a/test/util/BUILD b/test/util/BUILD index 1f22ebe29..8b5a0f25c 100644 --- a/test/util/BUILD +++ b/test/util/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "cc_library", "cc_test", "gtest", "select_system") +load("//tools:defs.bzl", "cc_library", "cc_test", "gbenchmark", "gtest", "select_system") package( default_visibility = ["//:sandbox"], @@ -260,6 +260,7 @@ cc_library( "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/time", gtest, + gbenchmark, ], ) diff --git a/test/util/test_main.cc b/test/util/test_main.cc index 5c7ee0064..1f389e58f 100644 --- a/test/util/test_main.cc +++ b/test/util/test_main.cc @@ -16,5 +16,5 @@ int main(int argc, char** argv) { gvisor::testing::TestInit(&argc, &argv); - return RUN_ALL_TESTS(); + return gvisor::testing::RunAllTests(); } diff --git a/test/util/test_util.h b/test/util/test_util.h index 2d22b0eb8..c5cb9d6d6 100644 --- a/test/util/test_util.h +++ b/test/util/test_util.h @@ -771,6 +771,7 @@ std::string RunfilePath(std::string path); #endif void TestInit(int* argc, char*** argv); +int RunAllTests(void); } // namespace testing } // namespace gvisor diff --git a/test/util/test_util_impl.cc b/test/util/test_util_impl.cc index ba7c0a85b..7e1ad9e66 100644 --- a/test/util/test_util_impl.cc +++ b/test/util/test_util_impl.cc @@ -17,8 +17,12 @@ #include "gtest/gtest.h" #include "absl/flags/flag.h" #include "absl/flags/parse.h" +#include "benchmark/benchmark.h" #include "test/util/logging.h" +extern bool FLAGS_benchmark_list_tests; +extern std::string FLAGS_benchmark_filter; + namespace gvisor { namespace testing { @@ -26,6 +30,7 @@ void SetupGvisorDeathTest() {} void TestInit(int* argc, char*** argv) { ::testing::InitGoogleTest(argc, *argv); + benchmark::Initialize(argc, *argv); ::absl::ParseCommandLine(*argc, *argv); // Always mask SIGPIPE as it's common and tests aren't expected to handle it. @@ -34,5 +39,14 @@ void TestInit(int* argc, char*** argv) { TEST_CHECK(sigaction(SIGPIPE, &sa, nullptr) == 0); } +int RunAllTests() { + if (FLAGS_benchmark_list_tests || FLAGS_benchmark_filter != ".") { + benchmark::RunSpecifiedBenchmarks(); + return 0; + } else { + return RUN_ALL_TESTS(); + } +} + } // namespace testing } // namespace gvisor diff --git a/tools/bazeldefs/defs.bzl b/tools/bazeldefs/defs.bzl index 6798362dc..6f091d759 100644 --- a/tools/bazeldefs/defs.bzl +++ b/tools/bazeldefs/defs.bzl @@ -21,6 +21,7 @@ go_image = _go_image go_embed_data = _go_embed_data go_suffixes = _go_suffixes gtest = "@com_google_googletest//:gtest" +gbenchmark = "@com_google_benchmark//:benchmark" loopback = "//tools/bazeldefs:loopback" proto_library = native.proto_library pkg_deb = _pkg_deb diff --git a/tools/defs.bzl b/tools/defs.bzl index 39f035f12..4eece2d83 100644 --- a/tools/defs.bzl +++ b/tools/defs.bzl @@ -7,7 +7,7 @@ change for Google-internal and bazel-compatible rules. load("//tools/go_stateify:defs.bzl", "go_stateify") load("//tools/go_marshal:defs.bzl", "go_marshal", "marshal_deps", "marshal_test_deps") -load("//tools/bazeldefs:defs.bzl", "go_suffixes", _cc_binary = "cc_binary", _cc_flags_supplier = "cc_flags_supplier", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test", _cc_toolchain = "cc_toolchain", _container_image = "container_image", _default_installer = "default_installer", _default_net_util = "default_net_util", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_image = "go_image", _go_library = "go_library", _go_proto_library = "go_proto_library", _go_test = "go_test", _go_tool_library = "go_tool_library", _gtest = "gtest", _loopback = "loopback", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar", _proto_library = "proto_library", _py_binary = "py_binary", _py_library = "py_library", _py_requirement = "py_requirement", _py_test = "py_test", _select_arch = "select_arch", _select_system = "select_system") +load("//tools/bazeldefs:defs.bzl", "go_suffixes", _cc_binary = "cc_binary", _cc_flags_supplier = "cc_flags_supplier", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test", _cc_toolchain = "cc_toolchain", _container_image = "container_image", _default_installer = "default_installer", _default_net_util = "default_net_util", _gbenchmark = "gbenchmark", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_image = "go_image", _go_library = "go_library", _go_proto_library = "go_proto_library", _go_test = "go_test", _go_tool_library = "go_tool_library", _gtest = "gtest", _loopback = "loopback", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar", _proto_library = "proto_library", _py_binary = "py_binary", _py_library = "py_library", _py_requirement = "py_requirement", _py_test = "py_test", _select_arch = "select_arch", _select_system = "select_system") # Delegate directly. cc_binary = _cc_binary @@ -21,6 +21,7 @@ go_image = _go_image go_test = _go_test go_tool_library = _go_tool_library gtest = _gtest +gbenchmark = _gbenchmark pkg_deb = _pkg_deb pkg_tar = _pkg_tar py_library = _py_library -- cgit v1.2.3 From 4a73bae269ae9f52a962ae3b08a17ccaacf7ba80 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Thu, 20 Feb 2020 15:19:40 -0800 Subject: Initial network namespace support. TCP/IP will work with netstack networking. hostinet doesn't work, and sockets will have the same behavior as it is now. Before the userspace is able to create device, the default loopback device can be used to test. /proc/net and /sys/net will still be connected to the root network stack; this is the same behavior now. Issue #1833 PiperOrigin-RevId: 296309389 --- pkg/sentry/fs/proc/net.go | 5 +- pkg/sentry/fs/proc/sys_net.go | 4 +- pkg/sentry/fsimpl/proc/tasks_net.go | 5 +- pkg/sentry/fsimpl/proc/tasks_sys.go | 4 +- pkg/sentry/fsimpl/testutil/kernel.go | 1 + pkg/sentry/inet/BUILD | 1 + pkg/sentry/inet/namespace.go | 99 +++++++++++++++++++++++++ pkg/sentry/kernel/kernel.go | 26 ++++--- pkg/sentry/kernel/task.go | 9 +-- pkg/sentry/kernel/task_clone.go | 16 ++-- pkg/sentry/kernel/task_net.go | 19 +++-- pkg/sentry/kernel/task_start.go | 8 +- pkg/tcpip/time_unsafe.go | 2 + runsc/boot/BUILD | 2 +- runsc/boot/controller.go | 11 +-- runsc/boot/loader.go | 121 +++++++++++++++++++++---------- runsc/boot/network.go | 27 +++++++ runsc/boot/pprof.go | 18 ----- runsc/boot/pprof/BUILD | 11 +++ runsc/boot/pprof/pprof.go | 20 +++++ runsc/sandbox/network.go | 25 +------ test/syscalls/BUILD | 2 + test/syscalls/linux/BUILD | 17 +++++ test/syscalls/linux/network_namespace.cc | 121 +++++++++++++++++++++++++++++++ 24 files changed, 451 insertions(+), 123 deletions(-) create mode 100644 pkg/sentry/inet/namespace.go delete mode 100644 runsc/boot/pprof.go create mode 100644 runsc/boot/pprof/BUILD create mode 100644 runsc/boot/pprof/pprof.go create mode 100644 test/syscalls/linux/network_namespace.cc (limited to 'test') diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go index 6f2775344..95d5817ff 100644 --- a/pkg/sentry/fs/proc/net.go +++ b/pkg/sentry/fs/proc/net.go @@ -43,7 +43,10 @@ import ( // newNet creates a new proc net entry. func (p *proc) newNetDir(ctx context.Context, k *kernel.Kernel, msrc *fs.MountSource) *fs.Inode { var contents map[string]*fs.Inode - if s := p.k.NetworkStack(); s != nil { + // TODO(gvisor.dev/issue/1833): Support for using the network stack in the + // network namespace of the calling process. We should make this per-process, + // a.k.a. /proc/PID/net, and make /proc/net a symlink to /proc/self/net. + if s := p.k.RootNetworkNamespace().Stack(); s != nil { contents = map[string]*fs.Inode{ "dev": seqfile.NewSeqFileInode(ctx, &netDev{s: s}, msrc), "snmp": seqfile.NewSeqFileInode(ctx, &netSnmp{s: s}, msrc), diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go index 0772d4ae4..d4c4b533d 100644 --- a/pkg/sentry/fs/proc/sys_net.go +++ b/pkg/sentry/fs/proc/sys_net.go @@ -357,7 +357,9 @@ func (p *proc) newSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource, s ine func (p *proc) newSysNetDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { var contents map[string]*fs.Inode - if s := p.k.NetworkStack(); s != nil { + // TODO(gvisor.dev/issue/1833): Support for using the network stack in the + // network namespace of the calling process. + if s := p.k.RootNetworkNamespace().Stack(); s != nil { contents = map[string]*fs.Inode{ "ipv4": p.newSysNetIPv4Dir(ctx, msrc, s), "core": p.newSysNetCore(ctx, msrc, s), diff --git a/pkg/sentry/fsimpl/proc/tasks_net.go b/pkg/sentry/fsimpl/proc/tasks_net.go index 608fec017..d4e1812d8 100644 --- a/pkg/sentry/fsimpl/proc/tasks_net.go +++ b/pkg/sentry/fsimpl/proc/tasks_net.go @@ -39,7 +39,10 @@ import ( func newNetDir(root *auth.Credentials, inoGen InoGenerator, k *kernel.Kernel) *kernfs.Dentry { var contents map[string]*kernfs.Dentry - if stack := k.NetworkStack(); stack != nil { + // TODO(gvisor.dev/issue/1833): Support for using the network stack in the + // network namespace of the calling process. We should make this per-process, + // a.k.a. /proc/PID/net, and make /proc/net a symlink to /proc/self/net. + if stack := k.RootNetworkNamespace().Stack(); stack != nil { const ( arp = "IP address HW type Flags HW address Mask Device\n" netlink = "sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode\n" diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go index c7ce74883..3d5dc463c 100644 --- a/pkg/sentry/fsimpl/proc/tasks_sys.go +++ b/pkg/sentry/fsimpl/proc/tasks_sys.go @@ -50,7 +50,9 @@ func newSysDir(root *auth.Credentials, inoGen InoGenerator, k *kernel.Kernel) *k func newSysNetDir(root *auth.Credentials, inoGen InoGenerator, k *kernel.Kernel) *kernfs.Dentry { var contents map[string]*kernfs.Dentry - if stack := k.NetworkStack(); stack != nil { + // TODO(gvisor.dev/issue/1833): Support for using the network stack in the + // network namespace of the calling process. + if stack := k.RootNetworkNamespace().Stack(); stack != nil { contents = map[string]*kernfs.Dentry{ "ipv4": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ "tcp_sack": newDentry(root, inoGen.NextIno(), 0644, &tcpSackData{stack: stack}), diff --git a/pkg/sentry/fsimpl/testutil/kernel.go b/pkg/sentry/fsimpl/testutil/kernel.go index d0be32e72..488478e29 100644 --- a/pkg/sentry/fsimpl/testutil/kernel.go +++ b/pkg/sentry/fsimpl/testutil/kernel.go @@ -128,6 +128,7 @@ func CreateTask(ctx context.Context, name string, tc *kernel.ThreadGroup, mntns ThreadGroup: tc, TaskContext: &kernel.TaskContext{Name: name}, Credentials: auth.CredentialsFromContext(ctx), + NetworkNamespace: k.RootNetworkNamespace(), AllowedCPUMask: sched.NewFullCPUSet(k.ApplicationCores()), UTSNamespace: kernel.UTSNamespaceFromContext(ctx), IPCNamespace: kernel.IPCNamespaceFromContext(ctx), diff --git a/pkg/sentry/inet/BUILD b/pkg/sentry/inet/BUILD index 334432abf..07bf39fed 100644 --- a/pkg/sentry/inet/BUILD +++ b/pkg/sentry/inet/BUILD @@ -10,6 +10,7 @@ go_library( srcs = [ "context.go", "inet.go", + "namespace.go", "test_stack.go", ], deps = [ diff --git a/pkg/sentry/inet/namespace.go b/pkg/sentry/inet/namespace.go new file mode 100644 index 000000000..c16667e7f --- /dev/null +++ b/pkg/sentry/inet/namespace.go @@ -0,0 +1,99 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package inet + +// Namespace represents a network namespace. See network_namespaces(7). +// +// +stateify savable +type Namespace struct { + // stack is the network stack implementation of this network namespace. + stack Stack `state:"nosave"` + + // creator allows kernel to create new network stack for network namespaces. + // If nil, no networking will function if network is namespaced. + creator NetworkStackCreator + + // isRoot indicates whether this is the root network namespace. + isRoot bool +} + +// NewRootNamespace creates the root network namespace, with creator +// allowing new network namespaces to be created. If creator is nil, no +// networking will function if the network is namespaced. +func NewRootNamespace(stack Stack, creator NetworkStackCreator) *Namespace { + return &Namespace{ + stack: stack, + creator: creator, + isRoot: true, + } +} + +// NewNamespace creates a new network namespace from the root. +func NewNamespace(root *Namespace) *Namespace { + n := &Namespace{ + creator: root.creator, + } + n.init() + return n +} + +// Stack returns the network stack of n. Stack may return nil if no network +// stack is configured. +func (n *Namespace) Stack() Stack { + return n.stack +} + +// IsRoot returns whether n is the root network namespace. +func (n *Namespace) IsRoot() bool { + return n.isRoot +} + +// RestoreRootStack restores the root network namespace with stack. This should +// only be called when restoring kernel. +func (n *Namespace) RestoreRootStack(stack Stack) { + if !n.isRoot { + panic("RestoreRootStack can only be called on root network namespace") + } + if n.stack != nil { + panic("RestoreRootStack called after a stack has already been set") + } + n.stack = stack +} + +func (n *Namespace) init() { + // Root network namespace will have stack assigned later. + if n.isRoot { + return + } + if n.creator != nil { + var err error + n.stack, err = n.creator.CreateStack() + if err != nil { + panic(err) + } + } +} + +// afterLoad is invoked by stateify. +func (n *Namespace) afterLoad() { + n.init() +} + +// NetworkStackCreator allows new instances of a network stack to be created. It +// is used by the kernel to create new network namespaces when requested. +type NetworkStackCreator interface { + // CreateStack creates a new network stack for a network namespace. + CreateStack() (Stack, error) +} diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 7da0368f1..c62fd6eb1 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -111,7 +111,7 @@ type Kernel struct { timekeeper *Timekeeper tasks *TaskSet rootUserNamespace *auth.UserNamespace - networkStack inet.Stack `state:"nosave"` + rootNetworkNamespace *inet.Namespace applicationCores uint useHostCores bool extraAuxv []arch.AuxEntry @@ -260,8 +260,9 @@ type InitKernelArgs struct { // RootUserNamespace is the root user namespace. RootUserNamespace *auth.UserNamespace - // NetworkStack is the TCP/IP network stack. NetworkStack may be nil. - NetworkStack inet.Stack + // RootNetworkNamespace is the root network namespace. If nil, no networking + // will be available. + RootNetworkNamespace *inet.Namespace // ApplicationCores is the number of logical CPUs visible to sandboxed // applications. The set of logical CPU IDs is [0, ApplicationCores); thus @@ -320,7 +321,10 @@ func (k *Kernel) Init(args InitKernelArgs) error { k.rootUTSNamespace = args.RootUTSNamespace k.rootIPCNamespace = args.RootIPCNamespace k.rootAbstractSocketNamespace = args.RootAbstractSocketNamespace - k.networkStack = args.NetworkStack + k.rootNetworkNamespace = args.RootNetworkNamespace + if k.rootNetworkNamespace == nil { + k.rootNetworkNamespace = inet.NewRootNamespace(nil, nil) + } k.applicationCores = args.ApplicationCores if args.UseHostCores { k.useHostCores = true @@ -543,8 +547,6 @@ func (ts *TaskSet) unregisterEpollWaiters() { func (k *Kernel) LoadFrom(r io.Reader, net inet.Stack, clocks sentrytime.Clocks) error { loadStart := time.Now() - k.networkStack = net - initAppCores := k.applicationCores // Load the pre-saved CPUID FeatureSet. @@ -575,6 +577,10 @@ func (k *Kernel) LoadFrom(r io.Reader, net inet.Stack, clocks sentrytime.Clocks) log.Infof("Kernel load stats: %s", &stats) log.Infof("Kernel load took [%s].", time.Since(kernelStart)) + // rootNetworkNamespace should be populated after loading the state file. + // Restore the root network stack. + k.rootNetworkNamespace.RestoreRootStack(net) + // Load the memory file's state. memoryStart := time.Now() if err := k.mf.LoadFrom(k.SupervisorContext(), r); err != nil { @@ -905,6 +911,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, FSContext: fsContext, FDTable: args.FDTable, Credentials: args.Credentials, + NetworkNamespace: k.RootNetworkNamespace(), AllowedCPUMask: sched.NewFullCPUSet(k.applicationCores), UTSNamespace: args.UTSNamespace, IPCNamespace: args.IPCNamespace, @@ -1255,10 +1262,9 @@ func (k *Kernel) RootAbstractSocketNamespace() *AbstractSocketNamespace { return k.rootAbstractSocketNamespace } -// NetworkStack returns the network stack. NetworkStack may return nil if no -// network stack is available. -func (k *Kernel) NetworkStack() inet.Stack { - return k.networkStack +// RootNetworkNamespace returns the root network namespace, always non-nil. +func (k *Kernel) RootNetworkNamespace() *inet.Namespace { + return k.rootNetworkNamespace } // GlobalInit returns the thread group with ID 1 in the root PID namespace, or diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index a3443ff21..e37e23231 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -486,13 +486,10 @@ type Task struct { numaPolicy int32 numaNodeMask uint64 - // If netns is true, the task is in a non-root network namespace. Network - // namespaces aren't currently implemented in full; being in a network - // namespace simply prevents the task from observing any network devices - // (including loopback) or using abstract socket addresses (see unix(7)). + // netns is the task's network namespace. netns is never nil. // - // netns is protected by mu. netns is owned by the task goroutine. - netns bool + // netns is protected by mu. + netns *inet.Namespace // If rseqPreempted is true, before the next call to p.Switch(), // interrupt rseq critical regions as defined by rseqAddr and diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go index ba74b4c1c..78866f280 100644 --- a/pkg/sentry/kernel/task_clone.go +++ b/pkg/sentry/kernel/task_clone.go @@ -17,6 +17,7 @@ package kernel import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -54,8 +55,7 @@ type SharingOptions struct { NewUserNamespace bool // If NewNetworkNamespace is true, the task should have an independent - // network namespace. (Note that network namespaces are not really - // implemented; see comment on Task.netns for details.) + // network namespace. NewNetworkNamespace bool // If NewFiles is true, the task should use an independent file descriptor @@ -199,6 +199,11 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { ipcns = NewIPCNamespace(userns) } + netns := t.NetworkNamespace() + if opts.NewNetworkNamespace { + netns = inet.NewNamespace(netns) + } + // TODO(b/63601033): Implement CLONE_NEWNS. mntnsVFS2 := t.mountNamespaceVFS2 if mntnsVFS2 != nil { @@ -268,7 +273,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { FDTable: fdTable, Credentials: creds, Niceness: t.Niceness(), - NetworkNamespaced: t.netns, + NetworkNamespace: netns, AllowedCPUMask: t.CPUMask(), UTSNamespace: utsns, IPCNamespace: ipcns, @@ -283,9 +288,6 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { } else { cfg.InheritParent = t } - if opts.NewNetworkNamespace { - cfg.NetworkNamespaced = true - } nt, err := t.tg.pidns.owner.NewTask(cfg) if err != nil { if opts.NewThreadGroup { @@ -482,7 +484,7 @@ func (t *Task) Unshare(opts *SharingOptions) error { t.mu.Unlock() return syserror.EPERM } - t.netns = true + t.netns = inet.NewNamespace(t.netns) } if opts.NewUTSNamespace { if !haveCapSysAdmin { diff --git a/pkg/sentry/kernel/task_net.go b/pkg/sentry/kernel/task_net.go index 172a31e1d..f7711232c 100644 --- a/pkg/sentry/kernel/task_net.go +++ b/pkg/sentry/kernel/task_net.go @@ -22,14 +22,23 @@ import ( func (t *Task) IsNetworkNamespaced() bool { t.mu.Lock() defer t.mu.Unlock() - return t.netns + return !t.netns.IsRoot() } // NetworkContext returns the network stack used by the task. NetworkContext // may return nil if no network stack is available. +// +// TODO(gvisor.dev/issue/1833): Migrate callers of this method to +// NetworkNamespace(). func (t *Task) NetworkContext() inet.Stack { - if t.IsNetworkNamespaced() { - return nil - } - return t.k.networkStack + t.mu.Lock() + defer t.mu.Unlock() + return t.netns.Stack() +} + +// NetworkNamespace returns the network namespace observed by the task. +func (t *Task) NetworkNamespace() *inet.Namespace { + t.mu.Lock() + defer t.mu.Unlock() + return t.netns } diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go index f9236a842..a5035bb7f 100644 --- a/pkg/sentry/kernel/task_start.go +++ b/pkg/sentry/kernel/task_start.go @@ -17,6 +17,7 @@ package kernel import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/futex" "gvisor.dev/gvisor/pkg/sentry/kernel/sched" @@ -65,9 +66,8 @@ type TaskConfig struct { // Niceness is the niceness of the new task. Niceness int - // If NetworkNamespaced is true, the new task should observe a non-root - // network namespace. - NetworkNamespaced bool + // NetworkNamespace is the network namespace to be used for the new task. + NetworkNamespace *inet.Namespace // AllowedCPUMask contains the cpus that this task can run on. AllowedCPUMask sched.CPUSet @@ -133,7 +133,7 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) { allowedCPUMask: cfg.AllowedCPUMask.Copy(), ioUsage: &usage.IO{}, niceness: cfg.Niceness, - netns: cfg.NetworkNamespaced, + netns: cfg.NetworkNamespace, utsns: cfg.UTSNamespace, ipcns: cfg.IPCNamespace, abstractSockets: cfg.AbstractSocketNamespace, diff --git a/pkg/tcpip/time_unsafe.go b/pkg/tcpip/time_unsafe.go index 48764b978..2f98a996f 100644 --- a/pkg/tcpip/time_unsafe.go +++ b/pkg/tcpip/time_unsafe.go @@ -25,6 +25,8 @@ import ( ) // StdClock implements Clock with the time package. +// +// +stateify savable type StdClock struct{} var _ Clock = (*StdClock)(nil) diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index ae4dd102a..26f68fe3d 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -19,7 +19,6 @@ go_library( "loader_amd64.go", "loader_arm64.go", "network.go", - "pprof.go", "strace.go", "user.go", ], @@ -91,6 +90,7 @@ go_library( "//pkg/usermem", "//runsc/boot/filter", "//runsc/boot/platforms", + "//runsc/boot/pprof", "//runsc/specutils", "@com_github_golang_protobuf//proto:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 9c9e94864..17e774e0c 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -32,6 +32,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/watchdog" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/urpc" + "gvisor.dev/gvisor/runsc/boot/pprof" "gvisor.dev/gvisor/runsc/specutils" ) @@ -142,7 +143,7 @@ func newController(fd int, l *Loader) (*controller, error) { } srv.Register(manager) - if eps, ok := l.k.NetworkStack().(*netstack.Stack); ok { + if eps, ok := l.k.RootNetworkNamespace().Stack().(*netstack.Stack); ok { net := &Network{ Stack: eps.Stack, } @@ -341,7 +342,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { return fmt.Errorf("creating memory file: %v", err) } k.SetMemoryFile(mf) - networkStack := cm.l.k.NetworkStack() + networkStack := cm.l.k.RootNetworkNamespace().Stack() cm.l.k = k // Set up the restore environment. @@ -365,9 +366,9 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { } if cm.l.conf.ProfileEnable { - // initializePProf opens /proc/self/maps, so has to be - // called before installing seccomp filters. - initializePProf() + // pprof.Initialize opens /proc/self/maps, so has to be called before + // installing seccomp filters. + pprof.Initialize() } // Seccomp filters have to be applied before parsing the state file. diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index eef43b9df..e7ca98134 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -49,6 +49,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/watchdog" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/link/loopback" "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" "gvisor.dev/gvisor/pkg/tcpip/network/arp" "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" @@ -60,6 +61,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/transport/udp" "gvisor.dev/gvisor/runsc/boot/filter" _ "gvisor.dev/gvisor/runsc/boot/platforms" // register all platforms. + "gvisor.dev/gvisor/runsc/boot/pprof" "gvisor.dev/gvisor/runsc/specutils" // Include supported socket providers. @@ -230,11 +232,8 @@ func New(args Args) (*Loader, error) { return nil, fmt.Errorf("enabling strace: %v", err) } - // Create an empty network stack because the network namespace may be empty at - // this point. Netns is configured before Run() is called. Netstack is - // configured using a control uRPC message. Host network is configured inside - // Run(). - networkStack, err := newEmptyNetworkStack(args.Conf, k, k) + // Create root network namespace/stack. + netns, err := newRootNetworkNamespace(args.Conf, k, k) if err != nil { return nil, fmt.Errorf("creating network: %v", err) } @@ -277,7 +276,7 @@ func New(args Args) (*Loader, error) { FeatureSet: cpuid.HostFeatureSet(), Timekeeper: tk, RootUserNamespace: creds.UserNamespace, - NetworkStack: networkStack, + RootNetworkNamespace: netns, ApplicationCores: uint(args.NumCPU), Vdso: vdso, RootUTSNamespace: kernel.NewUTSNamespace(args.Spec.Hostname, args.Spec.Hostname, creds.UserNamespace), @@ -466,7 +465,7 @@ func (l *Loader) run() error { // Delay host network configuration to this point because network namespace // is configured after the loader is created and before Run() is called. log.Debugf("Configuring host network") - stack := l.k.NetworkStack().(*hostinet.Stack) + stack := l.k.RootNetworkNamespace().Stack().(*hostinet.Stack) if err := stack.Configure(); err != nil { return err } @@ -485,7 +484,7 @@ func (l *Loader) run() error { // l.restore is set by the container manager when a restore call is made. if !l.restore { if l.conf.ProfileEnable { - initializePProf() + pprof.Initialize() } // Finally done with all configuration. Setup filters before user code @@ -908,48 +907,92 @@ func (l *Loader) WaitExit() kernel.ExitStatus { return l.k.GlobalInit().ExitStatus() } -func newEmptyNetworkStack(conf *Config, clock tcpip.Clock, uniqueID stack.UniqueID) (inet.Stack, error) { +func newRootNetworkNamespace(conf *Config, clock tcpip.Clock, uniqueID stack.UniqueID) (*inet.Namespace, error) { + // Create an empty network stack because the network namespace may be empty at + // this point. Netns is configured before Run() is called. Netstack is + // configured using a control uRPC message. Host network is configured inside + // Run(). switch conf.Network { case NetworkHost: - return hostinet.NewStack(), nil + // No network namespacing support for hostinet yet, hence creator is nil. + return inet.NewRootNamespace(hostinet.NewStack(), nil), nil case NetworkNone, NetworkSandbox: - // NetworkNone sets up loopback using netstack. - netProtos := []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol(), arp.NewProtocol()} - transProtos := []stack.TransportProtocol{tcp.NewProtocol(), udp.NewProtocol(), icmp.NewProtocol4()} - s := netstack.Stack{stack.New(stack.Options{ - NetworkProtocols: netProtos, - TransportProtocols: transProtos, - Clock: clock, - Stats: netstack.Metrics, - HandleLocal: true, - // Enable raw sockets for users with sufficient - // privileges. - RawFactory: raw.EndpointFactory{}, - UniqueID: uniqueID, - })} - - // Enable SACK Recovery. - if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil { - return nil, fmt.Errorf("failed to enable SACK: %v", err) + s, err := newEmptySandboxNetworkStack(clock, uniqueID) + if err != nil { + return nil, err } + creator := &sandboxNetstackCreator{ + clock: clock, + uniqueID: uniqueID, + } + return inet.NewRootNamespace(s, creator), nil - // Set default TTLs as required by socket/netstack. - s.Stack.SetNetworkProtocolOption(ipv4.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL)) - s.Stack.SetNetworkProtocolOption(ipv6.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL)) + default: + panic(fmt.Sprintf("invalid network configuration: %v", conf.Network)) + } - // Enable Receive Buffer Auto-Tuning. - if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil { - return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err) - } +} - s.FillDefaultIPTables() +func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (inet.Stack, error) { + netProtos := []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol(), arp.NewProtocol()} + transProtos := []stack.TransportProtocol{tcp.NewProtocol(), udp.NewProtocol(), icmp.NewProtocol4()} + s := netstack.Stack{stack.New(stack.Options{ + NetworkProtocols: netProtos, + TransportProtocols: transProtos, + Clock: clock, + Stats: netstack.Metrics, + HandleLocal: true, + // Enable raw sockets for users with sufficient + // privileges. + RawFactory: raw.EndpointFactory{}, + UniqueID: uniqueID, + })} - return &s, nil + // Enable SACK Recovery. + if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil { + return nil, fmt.Errorf("failed to enable SACK: %v", err) + } - default: - panic(fmt.Sprintf("invalid network configuration: %v", conf.Network)) + // Set default TTLs as required by socket/netstack. + s.Stack.SetNetworkProtocolOption(ipv4.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL)) + s.Stack.SetNetworkProtocolOption(ipv6.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL)) + + // Enable Receive Buffer Auto-Tuning. + if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil { + return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err) + } + + s.FillDefaultIPTables() + + return &s, nil +} + +// sandboxNetstackCreator implements kernel.NetworkStackCreator. +// +// +stateify savable +type sandboxNetstackCreator struct { + clock tcpip.Clock + uniqueID stack.UniqueID +} + +// CreateStack implements kernel.NetworkStackCreator.CreateStack. +func (f *sandboxNetstackCreator) CreateStack() (inet.Stack, error) { + s, err := newEmptySandboxNetworkStack(f.clock, f.uniqueID) + if err != nil { + return nil, err } + + // Setup loopback. + n := &Network{Stack: s.(*netstack.Stack).Stack} + nicID := tcpip.NICID(f.uniqueID.UniqueID()) + link := DefaultLoopbackLink + linkEP := loopback.New() + if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil { + return nil, err + } + + return s, nil } // signal sends a signal to one or more processes in a container. If PID is 0, diff --git a/runsc/boot/network.go b/runsc/boot/network.go index 6a8765ec8..bee6ee336 100644 --- a/runsc/boot/network.go +++ b/runsc/boot/network.go @@ -17,6 +17,7 @@ package boot import ( "fmt" "net" + "strings" "syscall" "gvisor.dev/gvisor/pkg/log" @@ -31,6 +32,32 @@ import ( "gvisor.dev/gvisor/pkg/urpc" ) +var ( + // DefaultLoopbackLink contains IP addresses and routes of "127.0.0.1/8" and + // "::1/8" on "lo" interface. + DefaultLoopbackLink = LoopbackLink{ + Name: "lo", + Addresses: []net.IP{ + net.IP("\x7f\x00\x00\x01"), + net.IPv6loopback, + }, + Routes: []Route{ + { + Destination: net.IPNet{ + IP: net.IPv4(0x7f, 0, 0, 0), + Mask: net.IPv4Mask(0xff, 0, 0, 0), + }, + }, + { + Destination: net.IPNet{ + IP: net.IPv6loopback, + Mask: net.IPMask(strings.Repeat("\xff", net.IPv6len)), + }, + }, + }, + } +) + // Network exposes methods that can be used to configure a network stack. type Network struct { Stack *stack.Stack diff --git a/runsc/boot/pprof.go b/runsc/boot/pprof.go deleted file mode 100644 index 463362f02..000000000 --- a/runsc/boot/pprof.go +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package boot - -func initializePProf() { -} diff --git a/runsc/boot/pprof/BUILD b/runsc/boot/pprof/BUILD new file mode 100644 index 000000000..29cb42b2f --- /dev/null +++ b/runsc/boot/pprof/BUILD @@ -0,0 +1,11 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "pprof", + srcs = ["pprof.go"], + visibility = [ + "//runsc:__subpackages__", + ], +) diff --git a/runsc/boot/pprof/pprof.go b/runsc/boot/pprof/pprof.go new file mode 100644 index 000000000..1ded20dee --- /dev/null +++ b/runsc/boot/pprof/pprof.go @@ -0,0 +1,20 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package pprof provides a stub to initialize custom profilers. +package pprof + +// Initialize will be called at boot for initializing custom profilers. +func Initialize() { +} diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go index 99e143696..bc093fba5 100644 --- a/runsc/sandbox/network.go +++ b/runsc/sandbox/network.go @@ -21,7 +21,6 @@ import ( "path/filepath" "runtime" "strconv" - "strings" "syscall" specs "github.com/opencontainers/runtime-spec/specs-go" @@ -75,30 +74,8 @@ func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *boot.Confi } func createDefaultLoopbackInterface(conn *urpc.Client) error { - link := boot.LoopbackLink{ - Name: "lo", - Addresses: []net.IP{ - net.IP("\x7f\x00\x00\x01"), - net.IPv6loopback, - }, - Routes: []boot.Route{ - { - Destination: net.IPNet{ - - IP: net.IPv4(0x7f, 0, 0, 0), - Mask: net.IPv4Mask(0xff, 0, 0, 0), - }, - }, - { - Destination: net.IPNet{ - IP: net.IPv6loopback, - Mask: net.IPMask(strings.Repeat("\xff", net.IPv6len)), - }, - }, - }, - } if err := conn.Call(boot.NetworkCreateLinksAndRoutes, &boot.CreateLinksAndRoutesArgs{ - LoopbackLinks: []boot.LoopbackLink{link}, + LoopbackLinks: []boot.LoopbackLink{boot.DefaultLoopbackLink}, }, nil); err != nil { return fmt.Errorf("creating loopback link and routes: %v", err) } diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index d69ac8356..d1977d4de 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -258,6 +258,8 @@ syscall_test( syscall_test(test = "//test/syscalls/linux:munmap_test") +syscall_test(test = "//test/syscalls/linux:network_namespace_test") + syscall_test( add_overlay = True, test = "//test/syscalls/linux:open_create_test", diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 05a818795..aa303af84 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -3639,6 +3639,23 @@ cc_binary( ], ) +cc_binary( + name = "network_namespace_test", + testonly = 1, + srcs = ["network_namespace.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + gtest, + "//test/util:capability_util", + "//test/util:memory_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/synchronization", + ], +) + cc_binary( name = "semaphore_test", testonly = 1, diff --git a/test/syscalls/linux/network_namespace.cc b/test/syscalls/linux/network_namespace.cc new file mode 100644 index 000000000..6ea48c263 --- /dev/null +++ b/test/syscalls/linux/network_namespace.cc @@ -0,0 +1,121 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/synchronization/notification.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/capability_util.h" +#include "test/util/memory_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +using TestFunc = std::function; +using RunFunc = std::function; + +struct NamespaceStrategy { + RunFunc run; + + static NamespaceStrategy Of(RunFunc run) { + NamespaceStrategy s; + s.run = run; + return s; + } +}; + +PosixError RunWithUnshare(TestFunc fn) { + PosixError err = PosixError(-1, "function did not return a value"); + ScopedThread t([&] { + if (unshare(CLONE_NEWNET) != 0) { + err = PosixError(errno); + return; + } + err = fn(); + }); + t.Join(); + return err; +} + +PosixError RunWithClone(TestFunc fn) { + struct Args { + absl::Notification n; + TestFunc fn; + PosixError err; + }; + Args args; + args.fn = fn; + args.err = PosixError(-1, "function did not return a value"); + + ASSIGN_OR_RETURN_ERRNO( + Mapping child_stack, + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + pid_t child = clone( + +[](void *arg) { + Args *args = reinterpret_cast(arg); + args->err = args->fn(); + args->n.Notify(); + syscall(SYS_exit, 0); // Exit manually. No return address on stack. + return 0; + }, + reinterpret_cast(child_stack.addr() + kPageSize), + CLONE_NEWNET | CLONE_THREAD | CLONE_SIGHAND | CLONE_VM, &args); + if (child < 0) { + return PosixError(errno, "clone() failed"); + } + args.n.WaitForNotification(); + return args.err; +} + +class NetworkNamespaceTest + : public ::testing::TestWithParam {}; + +TEST_P(NetworkNamespaceTest, LoopbackExists) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + EXPECT_NO_ERRNO(GetParam().run([]() { + // TODO(gvisor.dev/issue/1833): Update this to test that only "lo" exists. + // Check loopback device exists. + int sock = socket(AF_INET, SOCK_DGRAM, 0); + if (sock < 0) { + return PosixError(errno, "socket() failed"); + } + struct ifreq ifr; + snprintf(ifr.ifr_name, IFNAMSIZ, "lo"); + if (ioctl(sock, SIOCGIFINDEX, &ifr) < 0) { + return PosixError(errno, "ioctl() failed, lo cannot be found"); + } + return NoError(); + })); +} + +INSTANTIATE_TEST_SUITE_P( + AllNetworkNamespaceTest, NetworkNamespaceTest, + ::testing::Values(NamespaceStrategy::Of(RunWithUnshare), + NamespaceStrategy::Of(RunWithClone))); + +} // namespace + +} // namespace testing +} // namespace gvisor -- cgit v1.2.3 From 10aa4d3b343255db45f5ca4ff7b51f21a309e10b Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Fri, 21 Feb 2020 15:05:20 -0800 Subject: Factor platform tags. PiperOrigin-RevId: 296519566 --- test/runner/defs.bzl | 58 ++++++++++++++----------------------------- tools/bazeldefs/platforms.bzl | 17 +++++++++++++ tools/defs.bzl | 3 +++ 3 files changed, 39 insertions(+), 39 deletions(-) create mode 100644 tools/bazeldefs/platforms.bzl (limited to 'test') diff --git a/test/runner/defs.bzl b/test/runner/defs.bzl index 5e97c1867..56743a526 100644 --- a/test/runner/defs.bzl +++ b/test/runner/defs.bzl @@ -1,6 +1,6 @@ """Defines a rule for syscall test targets.""" -load("//tools:defs.bzl", "loopback") +load("//tools:defs.bzl", "default_platform", "loopback", "platforms") def _runner_test_impl(ctx): # Generate a runner binary. @@ -94,19 +94,6 @@ def _syscall_test( # Disable off-host networking. tags.append("requires-net:loopback") - # Add tag to prevent the tests from running in a Bazel sandbox. - # TODO(b/120560048): Make the tests run without this tag. - tags.append("no-sandbox") - - # TODO(b/112165693): KVM tests are tagged "manual" to until the platform is - # more stable. - if platform == "kvm": - tags.append("manual") - tags.append("requires-kvm") - - # TODO(b/112165693): Remove when tests pass reliably. - tags.append("notap") - runner_args = [ # Arguments are passed directly to runner binary. "--platform=" + platform, @@ -149,6 +136,8 @@ def syscall_test( add_hostinet: add a hostinet test. tags: starting test tags. """ + if not tags: + tags = [] _syscall_test( test = test, @@ -160,35 +149,26 @@ def syscall_test( tags = tags, ) - _syscall_test( - test = test, - shard_count = shard_count, - size = size, - platform = "kvm", - use_tmpfs = use_tmpfs, - add_uds_tree = add_uds_tree, - tags = tags, - ) - - _syscall_test( - test = test, - shard_count = shard_count, - size = size, - platform = "ptrace", - use_tmpfs = use_tmpfs, - add_uds_tree = add_uds_tree, - tags = tags, - ) + for (platform, platform_tags) in platforms.items(): + _syscall_test( + test = test, + shard_count = shard_count, + size = size, + platform = platform, + use_tmpfs = use_tmpfs, + add_uds_tree = add_uds_tree, + tags = platform_tags + tags, + ) if add_overlay: _syscall_test( test = test, shard_count = shard_count, size = size, - platform = "ptrace", + platform = default_platform, use_tmpfs = False, # overlay is adding a writable tmpfs on top of root. add_uds_tree = add_uds_tree, - tags = tags, + tags = platforms[default_platform] + tags, overlay = True, ) @@ -198,10 +178,10 @@ def syscall_test( test = test, shard_count = shard_count, size = size, - platform = "ptrace", + platform = default_platform, use_tmpfs = use_tmpfs, add_uds_tree = add_uds_tree, - tags = tags, + tags = platforms[default_platform] + tags, file_access = "shared", ) @@ -210,9 +190,9 @@ def syscall_test( test = test, shard_count = shard_count, size = size, - platform = "ptrace", + platform = default_platform, use_tmpfs = use_tmpfs, network = "host", add_uds_tree = add_uds_tree, - tags = tags, + tags = platforms[default_platform] + tags, ) diff --git a/tools/bazeldefs/platforms.bzl b/tools/bazeldefs/platforms.bzl new file mode 100644 index 000000000..92b0b5fc0 --- /dev/null +++ b/tools/bazeldefs/platforms.bzl @@ -0,0 +1,17 @@ +"""List of platforms.""" + +# Platform to associated tags. +platforms = { + "ptrace": [ + # TODO(b/120560048): Make the tests run without this tag. + "no-sandbox", + ], + "kvm": [ + "manual", + "local", + # TODO(b/120560048): Make the tests run without this tag. + "no-sandbox", + ], +} + +default_platform = "ptrace" diff --git a/tools/defs.bzl b/tools/defs.bzl index 45c065459..15a310403 100644 --- a/tools/defs.bzl +++ b/tools/defs.bzl @@ -8,6 +8,7 @@ change for Google-internal and bazel-compatible rules. load("//tools/go_stateify:defs.bzl", "go_stateify") load("//tools/go_marshal:defs.bzl", "go_marshal", "marshal_deps", "marshal_test_deps") load("//tools/bazeldefs:defs.bzl", _cc_binary = "cc_binary", _cc_flags_supplier = "cc_flags_supplier", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test", _cc_toolchain = "cc_toolchain", _container_image = "container_image", _default_installer = "default_installer", _default_net_util = "default_net_util", _gbenchmark = "gbenchmark", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_image = "go_image", _go_library = "go_library", _go_proto_library = "go_proto_library", _go_test = "go_test", _go_tool_library = "go_tool_library", _gtest = "gtest", _loopback = "loopback", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar", _proto_library = "proto_library", _py_binary = "py_binary", _py_library = "py_library", _py_requirement = "py_requirement", _py_test = "py_test", _select_arch = "select_arch", _select_system = "select_system") +load("//tools/bazeldefs:platforms.bzl", _default_platform = "default_platform", _platforms = "platforms") load("//tools/bazeldefs:tags.bzl", "go_suffixes") # Delegate directly. @@ -34,6 +35,8 @@ select_system = _select_system loopback = _loopback default_installer = _default_installer default_net_util = _default_net_util +platforms = _platforms +default_platform = _default_platform def go_binary(name, **kwargs): """Wraps the standard go_binary. -- cgit v1.2.3 From b8f56c79be40d9c75f4e2f279c9d821d1c1c3569 Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Fri, 21 Feb 2020 15:41:56 -0800 Subject: Implement tap/tun device in vfs. PiperOrigin-RevId: 296526279 --- pkg/abi/linux/BUILD | 1 + pkg/abi/linux/ioctl.go | 26 ++ pkg/abi/linux/ioctl_tun.go | 29 ++ pkg/sentry/fs/dev/BUILD | 5 + pkg/sentry/fs/dev/dev.go | 10 +- pkg/sentry/fs/dev/net_tun.go | 170 +++++++++++ pkg/syserror/syserror.go | 1 + pkg/tcpip/buffer/view.go | 6 + pkg/tcpip/link/channel/BUILD | 1 + pkg/tcpip/link/channel/channel.go | 180 +++++++++--- pkg/tcpip/link/tun/BUILD | 18 +- pkg/tcpip/link/tun/device.go | 352 +++++++++++++++++++++++ pkg/tcpip/link/tun/protocol.go | 56 ++++ pkg/tcpip/stack/nic.go | 32 +++ pkg/tcpip/stack/stack.go | 39 +++ test/syscalls/BUILD | 2 + test/syscalls/linux/BUILD | 30 ++ test/syscalls/linux/dev.cc | 7 + test/syscalls/linux/socket_netlink_route_util.cc | 163 +++++++++++ test/syscalls/linux/socket_netlink_route_util.h | 55 ++++ test/syscalls/linux/tuntap.cc | 346 ++++++++++++++++++++++ 21 files changed, 1490 insertions(+), 39 deletions(-) create mode 100644 pkg/abi/linux/ioctl_tun.go create mode 100644 pkg/sentry/fs/dev/net_tun.go create mode 100644 pkg/tcpip/link/tun/device.go create mode 100644 pkg/tcpip/link/tun/protocol.go create mode 100644 test/syscalls/linux/socket_netlink_route_util.cc create mode 100644 test/syscalls/linux/socket_netlink_route_util.h create mode 100644 test/syscalls/linux/tuntap.cc (limited to 'test') diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD index a89f34d4b..322d1ccc4 100644 --- a/pkg/abi/linux/BUILD +++ b/pkg/abi/linux/BUILD @@ -30,6 +30,7 @@ go_library( "futex.go", "inotify.go", "ioctl.go", + "ioctl_tun.go", "ip.go", "ipc.go", "limits.go", diff --git a/pkg/abi/linux/ioctl.go b/pkg/abi/linux/ioctl.go index 0e18db9ef..2062e6a4b 100644 --- a/pkg/abi/linux/ioctl.go +++ b/pkg/abi/linux/ioctl.go @@ -72,3 +72,29 @@ const ( SIOCGMIIPHY = 0x8947 SIOCGMIIREG = 0x8948 ) + +// ioctl(2) directions. Used to calculate requests number. +// Constants from asm-generic/ioctl.h. +const ( + _IOC_NONE = 0 + _IOC_WRITE = 1 + _IOC_READ = 2 +) + +// Constants from asm-generic/ioctl.h. +const ( + _IOC_NRBITS = 8 + _IOC_TYPEBITS = 8 + _IOC_SIZEBITS = 14 + _IOC_DIRBITS = 2 + + _IOC_NRSHIFT = 0 + _IOC_TYPESHIFT = _IOC_NRSHIFT + _IOC_NRBITS + _IOC_SIZESHIFT = _IOC_TYPESHIFT + _IOC_TYPEBITS + _IOC_DIRSHIFT = _IOC_SIZESHIFT + _IOC_SIZEBITS +) + +// IOC outputs the result of _IOC macro in asm-generic/ioctl.h. +func IOC(dir, typ, nr, size uint32) uint32 { + return uint32(dir)<<_IOC_DIRSHIFT | typ<<_IOC_TYPESHIFT | nr<<_IOC_NRSHIFT | size<<_IOC_SIZESHIFT +} diff --git a/pkg/abi/linux/ioctl_tun.go b/pkg/abi/linux/ioctl_tun.go new file mode 100644 index 000000000..c59c9c136 --- /dev/null +++ b/pkg/abi/linux/ioctl_tun.go @@ -0,0 +1,29 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// ioctl(2) request numbers from linux/if_tun.h +var ( + TUNSETIFF = IOC(_IOC_WRITE, 'T', 202, 4) + TUNGETIFF = IOC(_IOC_READ, 'T', 210, 4) +) + +// Flags from net/if_tun.h +const ( + IFF_TUN = 0x0001 + IFF_TAP = 0x0002 + IFF_NO_PI = 0x1000 + IFF_NOFILTER = 0x1000 +) diff --git a/pkg/sentry/fs/dev/BUILD b/pkg/sentry/fs/dev/BUILD index 4c4b7d5cc..9b6bb26d0 100644 --- a/pkg/sentry/fs/dev/BUILD +++ b/pkg/sentry/fs/dev/BUILD @@ -9,6 +9,7 @@ go_library( "device.go", "fs.go", "full.go", + "net_tun.go", "null.go", "random.go", "tty.go", @@ -19,15 +20,19 @@ go_library( "//pkg/context", "//pkg/rand", "//pkg/safemem", + "//pkg/sentry/arch", "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", "//pkg/sentry/fs/ramfs", "//pkg/sentry/fs/tmpfs", + "//pkg/sentry/kernel", "//pkg/sentry/memmap", "//pkg/sentry/mm", "//pkg/sentry/pgalloc", + "//pkg/sentry/socket/netstack", "//pkg/syserror", + "//pkg/tcpip/link/tun", "//pkg/usermem", "//pkg/waiter", ], diff --git a/pkg/sentry/fs/dev/dev.go b/pkg/sentry/fs/dev/dev.go index 35bd23991..7e66c29b0 100644 --- a/pkg/sentry/fs/dev/dev.go +++ b/pkg/sentry/fs/dev/dev.go @@ -66,8 +66,8 @@ func newMemDevice(ctx context.Context, iops fs.InodeOperations, msrc *fs.MountSo }) } -func newDirectory(ctx context.Context, msrc *fs.MountSource) *fs.Inode { - iops := ramfs.NewDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0555)) +func newDirectory(ctx context.Context, contents map[string]*fs.Inode, msrc *fs.MountSource) *fs.Inode { + iops := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) return fs.NewInode(ctx, iops, msrc, fs.StableAttr{ DeviceID: devDevice.DeviceID(), InodeID: devDevice.NextIno(), @@ -111,7 +111,7 @@ func New(ctx context.Context, msrc *fs.MountSource) *fs.Inode { // A devpts is typically mounted at /dev/pts to provide // pseudoterminal support. Place an empty directory there for // the devpts to be mounted over. - "pts": newDirectory(ctx, msrc), + "pts": newDirectory(ctx, nil, msrc), // Similarly, applications expect a ptmx device at /dev/ptmx // connected to the terminals provided by /dev/pts/. Rather // than creating a device directly (which requires a hairy @@ -124,6 +124,10 @@ func New(ctx context.Context, msrc *fs.MountSource) *fs.Inode { "ptmx": newSymlink(ctx, "pts/ptmx", msrc), "tty": newCharacterDevice(ctx, newTTYDevice(ctx, fs.RootOwner, 0666), msrc, ttyDevMajor, ttyDevMinor), + + "net": newDirectory(ctx, map[string]*fs.Inode{ + "tun": newCharacterDevice(ctx, newNetTunDevice(ctx, fs.RootOwner, 0666), msrc, netTunDevMajor, netTunDevMinor), + }, msrc), } iops := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) diff --git a/pkg/sentry/fs/dev/net_tun.go b/pkg/sentry/fs/dev/net_tun.go new file mode 100644 index 000000000..755644488 --- /dev/null +++ b/pkg/sentry/fs/dev/net_tun.go @@ -0,0 +1,170 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dev + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/socket/netstack" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/tcpip/link/tun" + "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/waiter" +) + +const ( + netTunDevMajor = 10 + netTunDevMinor = 200 +) + +// +stateify savable +type netTunInodeOperations struct { + fsutil.InodeGenericChecker `state:"nosave"` + fsutil.InodeNoExtendedAttributes `state:"nosave"` + fsutil.InodeNoopAllocate `state:"nosave"` + fsutil.InodeNoopRelease `state:"nosave"` + fsutil.InodeNoopTruncate `state:"nosave"` + fsutil.InodeNoopWriteOut `state:"nosave"` + fsutil.InodeNotDirectory `state:"nosave"` + fsutil.InodeNotMappable `state:"nosave"` + fsutil.InodeNotSocket `state:"nosave"` + fsutil.InodeNotSymlink `state:"nosave"` + fsutil.InodeVirtual `state:"nosave"` + + fsutil.InodeSimpleAttributes +} + +var _ fs.InodeOperations = (*netTunInodeOperations)(nil) + +func newNetTunDevice(ctx context.Context, owner fs.FileOwner, mode linux.FileMode) *netTunInodeOperations { + return &netTunInodeOperations{ + InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(ctx, owner, fs.FilePermsFromMode(mode), linux.TMPFS_MAGIC), + } +} + +// GetFile implements fs.InodeOperations.GetFile. +func (iops *netTunInodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + return fs.NewFile(ctx, d, flags, &netTunFileOperations{}), nil +} + +// +stateify savable +type netTunFileOperations struct { + fsutil.FileNoSeek `state:"nosave"` + fsutil.FileNoMMap `state:"nosave"` + fsutil.FileNoSplice `state:"nosave"` + fsutil.FileNoopFlush `state:"nosave"` + fsutil.FileNoopFsync `state:"nosave"` + fsutil.FileNotDirReaddir `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + + device tun.Device +} + +var _ fs.FileOperations = (*netTunFileOperations)(nil) + +// Release implements fs.FileOperations.Release. +func (fops *netTunFileOperations) Release() { + fops.device.Release() +} + +// Ioctl implements fs.FileOperations.Ioctl. +func (fops *netTunFileOperations) Ioctl(ctx context.Context, file *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { + request := args[1].Uint() + data := args[2].Pointer() + + switch request { + case linux.TUNSETIFF: + t := kernel.TaskFromContext(ctx) + if t == nil { + panic("Ioctl should be called from a task context") + } + if !t.HasCapability(linux.CAP_NET_ADMIN) { + return 0, syserror.EPERM + } + stack, ok := t.NetworkContext().(*netstack.Stack) + if !ok { + return 0, syserror.EINVAL + } + + var req linux.IFReq + if _, err := usermem.CopyObjectIn(ctx, io, data, &req, usermem.IOOpts{ + AddressSpaceActive: true, + }); err != nil { + return 0, err + } + flags := usermem.ByteOrder.Uint16(req.Data[:]) + return 0, fops.device.SetIff(stack.Stack, req.Name(), flags) + + case linux.TUNGETIFF: + var req linux.IFReq + + copy(req.IFName[:], fops.device.Name()) + + // Linux adds IFF_NOFILTER (the same value as IFF_NO_PI unfortunately) when + // there is no sk_filter. See __tun_chr_ioctl() in net/drivers/tun.c. + flags := fops.device.Flags() | linux.IFF_NOFILTER + usermem.ByteOrder.PutUint16(req.Data[:], flags) + + _, err := usermem.CopyObjectOut(ctx, io, data, &req, usermem.IOOpts{ + AddressSpaceActive: true, + }) + return 0, err + + default: + return 0, syserror.ENOTTY + } +} + +// Write implements fs.FileOperations.Write. +func (fops *netTunFileOperations) Write(ctx context.Context, file *fs.File, src usermem.IOSequence, offset int64) (int64, error) { + data := make([]byte, src.NumBytes()) + if _, err := src.CopyIn(ctx, data); err != nil { + return 0, err + } + return fops.device.Write(data) +} + +// Read implements fs.FileOperations.Read. +func (fops *netTunFileOperations) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + data, err := fops.device.Read() + if err != nil { + return 0, err + } + n, err := dst.CopyOut(ctx, data) + if n > 0 && n < len(data) { + // Not an error for partial copying. Packet truncated. + err = nil + } + return int64(n), err +} + +// Readiness implements watier.Waitable.Readiness. +func (fops *netTunFileOperations) Readiness(mask waiter.EventMask) waiter.EventMask { + return fops.device.Readiness(mask) +} + +// EventRegister implements watier.Waitable.EventRegister. +func (fops *netTunFileOperations) EventRegister(e *waiter.Entry, mask waiter.EventMask) { + fops.device.EventRegister(e, mask) +} + +// EventUnregister implements watier.Waitable.EventUnregister. +func (fops *netTunFileOperations) EventUnregister(e *waiter.Entry) { + fops.device.EventUnregister(e) +} diff --git a/pkg/syserror/syserror.go b/pkg/syserror/syserror.go index 2269f6237..4b5a0fca6 100644 --- a/pkg/syserror/syserror.go +++ b/pkg/syserror/syserror.go @@ -29,6 +29,7 @@ var ( EACCES = error(syscall.EACCES) EAGAIN = error(syscall.EAGAIN) EBADF = error(syscall.EBADF) + EBADFD = error(syscall.EBADFD) EBUSY = error(syscall.EBUSY) ECHILD = error(syscall.ECHILD) ECONNREFUSED = error(syscall.ECONNREFUSED) diff --git a/pkg/tcpip/buffer/view.go b/pkg/tcpip/buffer/view.go index 150310c11..17e94c562 100644 --- a/pkg/tcpip/buffer/view.go +++ b/pkg/tcpip/buffer/view.go @@ -156,3 +156,9 @@ func (vv *VectorisedView) Append(vv2 VectorisedView) { vv.views = append(vv.views, vv2.views...) vv.size += vv2.size } + +// AppendView appends the given view into this vectorised view. +func (vv *VectorisedView) AppendView(v View) { + vv.views = append(vv.views, v) + vv.size += len(v) +} diff --git a/pkg/tcpip/link/channel/BUILD b/pkg/tcpip/link/channel/BUILD index 3974c464e..b8b93e78e 100644 --- a/pkg/tcpip/link/channel/BUILD +++ b/pkg/tcpip/link/channel/BUILD @@ -7,6 +7,7 @@ go_library( srcs = ["channel.go"], visibility = ["//visibility:public"], deps = [ + "//pkg/sync", "//pkg/tcpip", "//pkg/tcpip/buffer", "//pkg/tcpip/stack", diff --git a/pkg/tcpip/link/channel/channel.go b/pkg/tcpip/link/channel/channel.go index 78d447acd..5944ba190 100644 --- a/pkg/tcpip/link/channel/channel.go +++ b/pkg/tcpip/link/channel/channel.go @@ -20,6 +20,7 @@ package channel import ( "context" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/stack" @@ -33,6 +34,118 @@ type PacketInfo struct { Route stack.Route } +// Notification is the interface for receiving notification from the packet +// queue. +type Notification interface { + // WriteNotify will be called when a write happens to the queue. + WriteNotify() +} + +// NotificationHandle is an opaque handle to the registered notification target. +// It can be used to unregister the notification when no longer interested. +// +// +stateify savable +type NotificationHandle struct { + n Notification +} + +type queue struct { + // mu protects fields below. + mu sync.RWMutex + // c is the outbound packet channel. Sending to c should hold mu. + c chan PacketInfo + numWrite int + numRead int + notify []*NotificationHandle +} + +func (q *queue) Close() { + close(q.c) +} + +func (q *queue) Read() (PacketInfo, bool) { + q.mu.Lock() + defer q.mu.Unlock() + select { + case p := <-q.c: + q.numRead++ + return p, true + default: + return PacketInfo{}, false + } +} + +func (q *queue) ReadContext(ctx context.Context) (PacketInfo, bool) { + // We have to receive from channel without holding the lock, since it can + // block indefinitely. This will cause a window that numWrite - numRead + // produces a larger number, but won't go to negative. numWrite >= numRead + // still holds. + select { + case pkt := <-q.c: + q.mu.Lock() + defer q.mu.Unlock() + q.numRead++ + return pkt, true + case <-ctx.Done(): + return PacketInfo{}, false + } +} + +func (q *queue) Write(p PacketInfo) bool { + wrote := false + + // It's important to make sure nobody can see numWrite until we increment it, + // so numWrite >= numRead holds. + q.mu.Lock() + select { + case q.c <- p: + wrote = true + q.numWrite++ + default: + } + notify := q.notify + q.mu.Unlock() + + if wrote { + // Send notification outside of lock. + for _, h := range notify { + h.n.WriteNotify() + } + } + return wrote +} + +func (q *queue) Num() int { + q.mu.RLock() + defer q.mu.RUnlock() + n := q.numWrite - q.numRead + if n < 0 { + panic("numWrite < numRead") + } + return n +} + +func (q *queue) AddNotify(notify Notification) *NotificationHandle { + q.mu.Lock() + defer q.mu.Unlock() + h := &NotificationHandle{n: notify} + q.notify = append(q.notify, h) + return h +} + +func (q *queue) RemoveNotify(handle *NotificationHandle) { + q.mu.Lock() + defer q.mu.Unlock() + // Make a copy, since we reads the array outside of lock when notifying. + notify := make([]*NotificationHandle, 0, len(q.notify)) + for _, h := range q.notify { + if h != handle { + notify = append(notify, h) + } + } + q.notify = notify +} + // Endpoint is link layer endpoint that stores outbound packets in a channel // and allows injection of inbound packets. type Endpoint struct { @@ -41,14 +154,16 @@ type Endpoint struct { linkAddr tcpip.LinkAddress LinkEPCapabilities stack.LinkEndpointCapabilities - // c is where outbound packets are queued. - c chan PacketInfo + // Outbound packet queue. + q *queue } // New creates a new channel endpoint. func New(size int, mtu uint32, linkAddr tcpip.LinkAddress) *Endpoint { return &Endpoint{ - c: make(chan PacketInfo, size), + q: &queue{ + c: make(chan PacketInfo, size), + }, mtu: mtu, linkAddr: linkAddr, } @@ -57,43 +172,36 @@ func New(size int, mtu uint32, linkAddr tcpip.LinkAddress) *Endpoint { // Close closes e. Further packet injections will panic. Reads continue to // succeed until all packets are read. func (e *Endpoint) Close() { - close(e.c) + e.q.Close() } -// Read does non-blocking read for one packet from the outbound packet queue. +// Read does non-blocking read one packet from the outbound packet queue. func (e *Endpoint) Read() (PacketInfo, bool) { - select { - case pkt := <-e.c: - return pkt, true - default: - return PacketInfo{}, false - } + return e.q.Read() } // ReadContext does blocking read for one packet from the outbound packet queue. // It can be cancelled by ctx, and in this case, it returns false. func (e *Endpoint) ReadContext(ctx context.Context) (PacketInfo, bool) { - select { - case pkt := <-e.c: - return pkt, true - case <-ctx.Done(): - return PacketInfo{}, false - } + return e.q.ReadContext(ctx) } // Drain removes all outbound packets from the channel and counts them. func (e *Endpoint) Drain() int { c := 0 for { - select { - case <-e.c: - c++ - default: + if _, ok := e.Read(); !ok { return c } + c++ } } +// NumQueued returns the number of packet queued for outbound. +func (e *Endpoint) NumQueued() int { + return e.q.Num() +} + // InjectInbound injects an inbound packet. func (e *Endpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) { e.InjectLinkAddr(protocol, "", pkt) @@ -155,10 +263,7 @@ func (e *Endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.Ne Route: route, } - select { - case e.c <- p: - default: - } + e.q.Write(p) return nil } @@ -171,7 +276,6 @@ func (e *Endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts []tcpip.Pac route.Release() payloadView := pkts[0].Data.ToView() n := 0 -packetLoop: for _, pkt := range pkts { off := pkt.DataOffset size := pkt.DataSize @@ -185,12 +289,10 @@ packetLoop: Route: route, } - select { - case e.c <- p: - n++ - default: - break packetLoop + if !e.q.Write(p) { + break } + n++ } return n, nil @@ -204,13 +306,21 @@ func (e *Endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error { GSO: nil, } - select { - case e.c <- p: - default: - } + e.q.Write(p) return nil } // Wait implements stack.LinkEndpoint.Wait. func (*Endpoint) Wait() {} + +// AddNotify adds a notification target for receiving event about outgoing +// packets. +func (e *Endpoint) AddNotify(notify Notification) *NotificationHandle { + return e.q.AddNotify(notify) +} + +// RemoveNotify removes handle from the list of notification targets. +func (e *Endpoint) RemoveNotify(handle *NotificationHandle) { + e.q.RemoveNotify(handle) +} diff --git a/pkg/tcpip/link/tun/BUILD b/pkg/tcpip/link/tun/BUILD index e5096ea38..e0db6cf54 100644 --- a/pkg/tcpip/link/tun/BUILD +++ b/pkg/tcpip/link/tun/BUILD @@ -4,6 +4,22 @@ package(licenses = ["notice"]) go_library( name = "tun", - srcs = ["tun_unsafe.go"], + srcs = [ + "device.go", + "protocol.go", + "tun_unsafe.go", + ], visibility = ["//visibility:public"], + deps = [ + "//pkg/abi/linux", + "//pkg/refs", + "//pkg/sync", + "//pkg/syserror", + "//pkg/tcpip", + "//pkg/tcpip/buffer", + "//pkg/tcpip/header", + "//pkg/tcpip/link/channel", + "//pkg/tcpip/stack", + "//pkg/waiter", + ], ) diff --git a/pkg/tcpip/link/tun/device.go b/pkg/tcpip/link/tun/device.go new file mode 100644 index 000000000..6ff47a742 --- /dev/null +++ b/pkg/tcpip/link/tun/device.go @@ -0,0 +1,352 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tun + +import ( + "fmt" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/channel" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/waiter" +) + +const ( + // drivers/net/tun.c:tun_net_init() + defaultDevMtu = 1500 + + // Queue length for outbound packet, arriving at fd side for read. Overflow + // causes packet drops. gVisor implementation-specific. + defaultDevOutQueueLen = 1024 +) + +var zeroMAC [6]byte + +// Device is an opened /dev/net/tun device. +// +// +stateify savable +type Device struct { + waiter.Queue + + mu sync.RWMutex `state:"nosave"` + endpoint *tunEndpoint + notifyHandle *channel.NotificationHandle + flags uint16 +} + +// beforeSave is invoked by stateify. +func (d *Device) beforeSave() { + d.mu.Lock() + defer d.mu.Unlock() + // TODO(b/110961832): Restore the device to stack. At this moment, the stack + // is not savable. + if d.endpoint != nil { + panic("/dev/net/tun does not support save/restore when a device is associated with it.") + } +} + +// Release implements fs.FileOperations.Release. +func (d *Device) Release() { + d.mu.Lock() + defer d.mu.Unlock() + + // Decrease refcount if there is an endpoint associated with this file. + if d.endpoint != nil { + d.endpoint.RemoveNotify(d.notifyHandle) + d.endpoint.DecRef() + d.endpoint = nil + } +} + +// SetIff services TUNSETIFF ioctl(2) request. +func (d *Device) SetIff(s *stack.Stack, name string, flags uint16) error { + d.mu.Lock() + defer d.mu.Unlock() + + if d.endpoint != nil { + return syserror.EINVAL + } + + // Input validations. + isTun := flags&linux.IFF_TUN != 0 + isTap := flags&linux.IFF_TAP != 0 + supportedFlags := uint16(linux.IFF_TUN | linux.IFF_TAP | linux.IFF_NO_PI) + if isTap && isTun || !isTap && !isTun || flags&^supportedFlags != 0 { + return syserror.EINVAL + } + + prefix := "tun" + if isTap { + prefix = "tap" + } + + endpoint, err := attachOrCreateNIC(s, name, prefix) + if err != nil { + return syserror.EINVAL + } + + d.endpoint = endpoint + d.notifyHandle = d.endpoint.AddNotify(d) + d.flags = flags + return nil +} + +func attachOrCreateNIC(s *stack.Stack, name, prefix string) (*tunEndpoint, error) { + for { + // 1. Try to attach to an existing NIC. + if name != "" { + if nic, found := s.GetNICByName(name); found { + endpoint, ok := nic.LinkEndpoint().(*tunEndpoint) + if !ok { + // Not a NIC created by tun device. + return nil, syserror.EOPNOTSUPP + } + if !endpoint.TryIncRef() { + // Race detected: NIC got deleted in between. + continue + } + return endpoint, nil + } + } + + // 2. Creating a new NIC. + id := tcpip.NICID(s.UniqueID()) + endpoint := &tunEndpoint{ + Endpoint: channel.New(defaultDevOutQueueLen, defaultDevMtu, ""), + stack: s, + nicID: id, + name: name, + } + if endpoint.name == "" { + endpoint.name = fmt.Sprintf("%s%d", prefix, id) + } + err := s.CreateNICWithOptions(endpoint.nicID, endpoint, stack.NICOptions{ + Name: endpoint.name, + }) + switch err { + case nil: + return endpoint, nil + case tcpip.ErrDuplicateNICID: + // Race detected: A NIC has been created in between. + continue + default: + return nil, syserror.EINVAL + } + } +} + +// Write inject one inbound packet to the network interface. +func (d *Device) Write(data []byte) (int64, error) { + d.mu.RLock() + endpoint := d.endpoint + d.mu.RUnlock() + if endpoint == nil { + return 0, syserror.EBADFD + } + if !endpoint.IsAttached() { + return 0, syserror.EIO + } + + dataLen := int64(len(data)) + + // Packet information. + var pktInfoHdr PacketInfoHeader + if !d.hasFlags(linux.IFF_NO_PI) { + if len(data) < PacketInfoHeaderSize { + // Ignore bad packet. + return dataLen, nil + } + pktInfoHdr = PacketInfoHeader(data[:PacketInfoHeaderSize]) + data = data[PacketInfoHeaderSize:] + } + + // Ethernet header (TAP only). + var ethHdr header.Ethernet + if d.hasFlags(linux.IFF_TAP) { + if len(data) < header.EthernetMinimumSize { + // Ignore bad packet. + return dataLen, nil + } + ethHdr = header.Ethernet(data[:header.EthernetMinimumSize]) + data = data[header.EthernetMinimumSize:] + } + + // Try to determine network protocol number, default zero. + var protocol tcpip.NetworkProtocolNumber + switch { + case pktInfoHdr != nil: + protocol = pktInfoHdr.Protocol() + case ethHdr != nil: + protocol = ethHdr.Type() + } + + // Try to determine remote link address, default zero. + var remote tcpip.LinkAddress + switch { + case ethHdr != nil: + remote = ethHdr.SourceAddress() + default: + remote = tcpip.LinkAddress(zeroMAC[:]) + } + + pkt := tcpip.PacketBuffer{ + Data: buffer.View(data).ToVectorisedView(), + } + if ethHdr != nil { + pkt.LinkHeader = buffer.View(ethHdr) + } + endpoint.InjectLinkAddr(protocol, remote, pkt) + return dataLen, nil +} + +// Read reads one outgoing packet from the network interface. +func (d *Device) Read() ([]byte, error) { + d.mu.RLock() + endpoint := d.endpoint + d.mu.RUnlock() + if endpoint == nil { + return nil, syserror.EBADFD + } + + for { + info, ok := endpoint.Read() + if !ok { + return nil, syserror.ErrWouldBlock + } + + v, ok := d.encodePkt(&info) + if !ok { + // Ignore unsupported packet. + continue + } + return v, nil + } +} + +// encodePkt encodes packet for fd side. +func (d *Device) encodePkt(info *channel.PacketInfo) (buffer.View, bool) { + var vv buffer.VectorisedView + + // Packet information. + if !d.hasFlags(linux.IFF_NO_PI) { + hdr := make(PacketInfoHeader, PacketInfoHeaderSize) + hdr.Encode(&PacketInfoFields{ + Protocol: info.Proto, + }) + vv.AppendView(buffer.View(hdr)) + } + + // If the packet does not already have link layer header, and the route + // does not exist, we can't compute it. This is possibly a raw packet, tun + // device doesn't support this at the moment. + if info.Pkt.LinkHeader == nil && info.Route.RemoteLinkAddress == "" { + return nil, false + } + + // Ethernet header (TAP only). + if d.hasFlags(linux.IFF_TAP) { + // Add ethernet header if not provided. + if info.Pkt.LinkHeader == nil { + hdr := &header.EthernetFields{ + SrcAddr: info.Route.LocalLinkAddress, + DstAddr: info.Route.RemoteLinkAddress, + Type: info.Proto, + } + if hdr.SrcAddr == "" { + hdr.SrcAddr = d.endpoint.LinkAddress() + } + + eth := make(header.Ethernet, header.EthernetMinimumSize) + eth.Encode(hdr) + vv.AppendView(buffer.View(eth)) + } else { + vv.AppendView(info.Pkt.LinkHeader) + } + } + + // Append upper headers. + vv.AppendView(buffer.View(info.Pkt.Header.View()[len(info.Pkt.LinkHeader):])) + // Append data payload. + vv.Append(info.Pkt.Data) + + return vv.ToView(), true +} + +// Name returns the name of the attached network interface. Empty string if +// unattached. +func (d *Device) Name() string { + d.mu.RLock() + defer d.mu.RUnlock() + if d.endpoint != nil { + return d.endpoint.name + } + return "" +} + +// Flags returns the flags set for d. Zero value if unset. +func (d *Device) Flags() uint16 { + d.mu.RLock() + defer d.mu.RUnlock() + return d.flags +} + +func (d *Device) hasFlags(flags uint16) bool { + return d.flags&flags == flags +} + +// Readiness implements watier.Waitable.Readiness. +func (d *Device) Readiness(mask waiter.EventMask) waiter.EventMask { + if mask&waiter.EventIn != 0 { + d.mu.RLock() + endpoint := d.endpoint + d.mu.RUnlock() + if endpoint != nil && endpoint.NumQueued() == 0 { + mask &= ^waiter.EventIn + } + } + return mask & (waiter.EventIn | waiter.EventOut) +} + +// WriteNotify implements channel.Notification.WriteNotify. +func (d *Device) WriteNotify() { + d.Notify(waiter.EventIn) +} + +// tunEndpoint is the link endpoint for the NIC created by the tun device. +// +// It is ref-counted as multiple opening files can attach to the same NIC. +// The last owner is responsible for deleting the NIC. +type tunEndpoint struct { + *channel.Endpoint + + refs.AtomicRefCount + + stack *stack.Stack + nicID tcpip.NICID + name string +} + +// DecRef decrements refcount of e, removes NIC if refcount goes to 0. +func (e *tunEndpoint) DecRef() { + e.DecRefWithDestructor(func() { + e.stack.RemoveNIC(e.nicID) + }) +} diff --git a/pkg/tcpip/link/tun/protocol.go b/pkg/tcpip/link/tun/protocol.go new file mode 100644 index 000000000..89d9d91a9 --- /dev/null +++ b/pkg/tcpip/link/tun/protocol.go @@ -0,0 +1,56 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tun + +import ( + "encoding/binary" + + "gvisor.dev/gvisor/pkg/tcpip" +) + +const ( + // PacketInfoHeaderSize is the size of the packet information header. + PacketInfoHeaderSize = 4 + + offsetFlags = 0 + offsetProtocol = 2 +) + +// PacketInfoFields contains fields sent through the wire if IFF_NO_PI flag is +// not set. +type PacketInfoFields struct { + Flags uint16 + Protocol tcpip.NetworkProtocolNumber +} + +// PacketInfoHeader is the wire representation of the packet information sent if +// IFF_NO_PI flag is not set. +type PacketInfoHeader []byte + +// Encode encodes f into h. +func (h PacketInfoHeader) Encode(f *PacketInfoFields) { + binary.BigEndian.PutUint16(h[offsetFlags:][:2], f.Flags) + binary.BigEndian.PutUint16(h[offsetProtocol:][:2], uint16(f.Protocol)) +} + +// Flags returns the flag field in h. +func (h PacketInfoHeader) Flags() uint16 { + return binary.BigEndian.Uint16(h[offsetFlags:]) +} + +// Protocol returns the protocol field in h. +func (h PacketInfoHeader) Protocol() tcpip.NetworkProtocolNumber { + return tcpip.NetworkProtocolNumber(binary.BigEndian.Uint16(h[offsetProtocol:])) +} diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index 862954ab2..46d3a6646 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -298,6 +298,33 @@ func (n *NIC) enable() *tcpip.Error { return nil } +// remove detaches NIC from the link endpoint, and marks existing referenced +// network endpoints expired. This guarantees no packets between this NIC and +// the network stack. +func (n *NIC) remove() *tcpip.Error { + n.mu.Lock() + defer n.mu.Unlock() + + // Detach from link endpoint, so no packet comes in. + n.linkEP.Attach(nil) + + // Remove permanent and permanentTentative addresses, so no packet goes out. + var errs []*tcpip.Error + for nid, ref := range n.mu.endpoints { + switch ref.getKind() { + case permanentTentative, permanent: + if err := n.removePermanentAddressLocked(nid.LocalAddress); err != nil { + errs = append(errs, err) + } + } + } + if len(errs) > 0 { + return errs[0] + } + + return nil +} + // becomeIPv6Router transitions n into an IPv6 router. // // When transitioning into an IPv6 router, host-only state (NDP discovered @@ -1302,6 +1329,11 @@ func (n *NIC) Stack() *Stack { return n.stack } +// LinkEndpoint returns the link endpoint of n. +func (n *NIC) LinkEndpoint() LinkEndpoint { + return n.linkEP +} + // isAddrTentative returns true if addr is tentative on n. // // Note that if addr is not associated with n, then this function will return diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index f0ed76fbe..900dd46c5 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -916,6 +916,18 @@ func (s *Stack) CreateNIC(id tcpip.NICID, ep LinkEndpoint) *tcpip.Error { return s.CreateNICWithOptions(id, ep, NICOptions{}) } +// GetNICByName gets the NIC specified by name. +func (s *Stack) GetNICByName(name string) (*NIC, bool) { + s.mu.RLock() + defer s.mu.RUnlock() + for _, nic := range s.nics { + if nic.Name() == name { + return nic, true + } + } + return nil, false +} + // EnableNIC enables the given NIC so that the link-layer endpoint can start // delivering packets to it. func (s *Stack) EnableNIC(id tcpip.NICID) *tcpip.Error { @@ -956,6 +968,33 @@ func (s *Stack) CheckNIC(id tcpip.NICID) bool { return nic.enabled() } +// RemoveNIC removes NIC and all related routes from the network stack. +func (s *Stack) RemoveNIC(id tcpip.NICID) *tcpip.Error { + s.mu.Lock() + defer s.mu.Unlock() + + nic, ok := s.nics[id] + if !ok { + return tcpip.ErrUnknownNICID + } + delete(s.nics, id) + + // Remove routes in-place. n tracks the number of routes written. + n := 0 + for i, r := range s.routeTable { + if r.NIC != id { + // Keep this route. + if i > n { + s.routeTable[n] = r + } + n++ + } + } + s.routeTable = s.routeTable[:n] + + return nic.remove() +} + // NICAddressRanges returns a map of NICIDs to their associated subnets. func (s *Stack) NICAddressRanges() map[tcpip.NICID][]tcpip.Subnet { s.mu.RLock() diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index d1977d4de..3518e862d 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -678,6 +678,8 @@ syscall_test( test = "//test/syscalls/linux:truncate_test", ) +syscall_test(test = "//test/syscalls/linux:tuntap_test") + syscall_test(test = "//test/syscalls/linux:udp_bind_test") syscall_test( diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index aa303af84..704bae17b 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -131,6 +131,17 @@ cc_library( ], ) +cc_library( + name = "socket_netlink_route_util", + testonly = 1, + srcs = ["socket_netlink_route_util.cc"], + hdrs = ["socket_netlink_route_util.h"], + deps = [ + ":socket_netlink_util", + "@com_google_absl//absl/types:optional", + ], +) + cc_library( name = "socket_test_util", testonly = 1, @@ -3430,6 +3441,25 @@ cc_binary( ], ) +cc_binary( + name = "tuntap_test", + testonly = 1, + srcs = ["tuntap.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + gtest, + "//test/syscalls/linux:socket_netlink_route_util", + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + ], +) + cc_library( name = "udp_socket_test_cases", testonly = 1, diff --git a/test/syscalls/linux/dev.cc b/test/syscalls/linux/dev.cc index 4dd302eed..4e473268c 100644 --- a/test/syscalls/linux/dev.cc +++ b/test/syscalls/linux/dev.cc @@ -153,6 +153,13 @@ TEST(DevTest, TTYExists) { EXPECT_EQ(statbuf.st_mode, S_IFCHR | 0666); } +TEST(DevTest, NetTunExists) { + struct stat statbuf = {}; + ASSERT_THAT(stat("/dev/net/tun", &statbuf), SyscallSucceeds()); + // Check that it's a character device with rw-rw-rw- permissions. + EXPECT_EQ(statbuf.st_mode, S_IFCHR | 0666); +} + } // namespace } // namespace testing diff --git a/test/syscalls/linux/socket_netlink_route_util.cc b/test/syscalls/linux/socket_netlink_route_util.cc new file mode 100644 index 000000000..53eb3b6b2 --- /dev/null +++ b/test/syscalls/linux/socket_netlink_route_util.cc @@ -0,0 +1,163 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_netlink_route_util.h" + +#include +#include +#include + +#include "absl/types/optional.h" +#include "test/syscalls/linux/socket_netlink_util.h" + +namespace gvisor { +namespace testing { +namespace { + +constexpr uint32_t kSeq = 12345; + +} // namespace + +PosixError DumpLinks( + const FileDescriptor& fd, uint32_t seq, + const std::function& fn) { + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + }; + + struct request req = {}; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_seq = seq; + req.ifm.ifi_family = AF_UNSPEC; + + return NetlinkRequestResponse(fd, &req, sizeof(req), fn, false); +} + +PosixErrorOr> DumpLinks() { + ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, NetlinkBoundSocket(NETLINK_ROUTE)); + + std::vector links; + RETURN_IF_ERRNO(DumpLinks(fd, kSeq, [&](const struct nlmsghdr* hdr) { + if (hdr->nlmsg_type != RTM_NEWLINK || + hdr->nlmsg_len < NLMSG_SPACE(sizeof(struct ifinfomsg))) { + return; + } + const struct ifinfomsg* msg = + reinterpret_cast(NLMSG_DATA(hdr)); + const auto* rta = FindRtAttr(hdr, msg, IFLA_IFNAME); + if (rta == nullptr) { + // Ignore links that do not have a name. + return; + } + + links.emplace_back(); + links.back().index = msg->ifi_index; + links.back().type = msg->ifi_type; + links.back().name = + std::string(reinterpret_cast(RTA_DATA(rta))); + })); + return links; +} + +PosixErrorOr> FindLoopbackLink() { + ASSIGN_OR_RETURN_ERRNO(auto links, DumpLinks()); + for (const auto& link : links) { + if (link.type == ARPHRD_LOOPBACK) { + return absl::optional(link); + } + } + return absl::optional(); +} + +PosixError LinkAddLocalAddr(int index, int family, int prefixlen, + const void* addr, int addrlen) { + ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifaddrmsg ifaddr; + char attrbuf[512]; + }; + + struct request req = {}; + req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifaddr)); + req.hdr.nlmsg_type = RTM_NEWADDR; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.hdr.nlmsg_seq = kSeq; + req.ifaddr.ifa_index = index; + req.ifaddr.ifa_family = family; + req.ifaddr.ifa_prefixlen = prefixlen; + + struct rtattr* rta = reinterpret_cast( + reinterpret_cast(&req) + NLMSG_ALIGN(req.hdr.nlmsg_len)); + rta->rta_type = IFA_LOCAL; + rta->rta_len = RTA_LENGTH(addrlen); + req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + RTA_LENGTH(addrlen); + memcpy(RTA_DATA(rta), addr, addrlen); + + return NetlinkRequestAckOrError(fd, kSeq, &req, req.hdr.nlmsg_len); +} + +PosixError LinkChangeFlags(int index, unsigned int flags, unsigned int change) { + ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifinfo; + char pad[NLMSG_ALIGNTO]; + }; + + struct request req = {}; + req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifinfo)); + req.hdr.nlmsg_type = RTM_NEWLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.hdr.nlmsg_seq = kSeq; + req.ifinfo.ifi_index = index; + req.ifinfo.ifi_flags = flags; + req.ifinfo.ifi_change = change; + + return NetlinkRequestAckOrError(fd, kSeq, &req, req.hdr.nlmsg_len); +} + +PosixError LinkSetMacAddr(int index, const void* addr, int addrlen) { + ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifinfo; + char attrbuf[512]; + }; + + struct request req = {}; + req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifinfo)); + req.hdr.nlmsg_type = RTM_NEWLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.hdr.nlmsg_seq = kSeq; + req.ifinfo.ifi_index = index; + + struct rtattr* rta = reinterpret_cast( + reinterpret_cast(&req) + NLMSG_ALIGN(req.hdr.nlmsg_len)); + rta->rta_type = IFLA_ADDRESS; + rta->rta_len = RTA_LENGTH(addrlen); + req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + RTA_LENGTH(addrlen); + memcpy(RTA_DATA(rta), addr, addrlen); + + return NetlinkRequestAckOrError(fd, kSeq, &req, req.hdr.nlmsg_len); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_netlink_route_util.h b/test/syscalls/linux/socket_netlink_route_util.h new file mode 100644 index 000000000..2c018e487 --- /dev/null +++ b/test/syscalls/linux/socket_netlink_route_util.h @@ -0,0 +1,55 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NETLINK_ROUTE_UTIL_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NETLINK_ROUTE_UTIL_H_ + +#include +#include + +#include + +#include "absl/types/optional.h" +#include "test/syscalls/linux/socket_netlink_util.h" + +namespace gvisor { +namespace testing { + +struct Link { + int index; + int16_t type; + std::string name; +}; + +PosixError DumpLinks(const FileDescriptor& fd, uint32_t seq, + const std::function& fn); + +PosixErrorOr> DumpLinks(); + +PosixErrorOr> FindLoopbackLink(); + +// LinkAddLocalAddr sets IFA_LOCAL attribute on the interface. +PosixError LinkAddLocalAddr(int index, int family, int prefixlen, + const void* addr, int addrlen); + +// LinkChangeFlags changes interface flags. E.g. IFF_UP. +PosixError LinkChangeFlags(int index, unsigned int flags, unsigned int change); + +// LinkSetMacAddr sets IFLA_ADDRESS attribute of the interface. +PosixError LinkSetMacAddr(int index, const void* addr, int addrlen); + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NETLINK_ROUTE_UTIL_H_ diff --git a/test/syscalls/linux/tuntap.cc b/test/syscalls/linux/tuntap.cc new file mode 100644 index 000000000..f6ac9d7b8 --- /dev/null +++ b/test/syscalls/linux/tuntap.cc @@ -0,0 +1,346 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/ascii.h" +#include "absl/strings/str_split.h" +#include "test/syscalls/linux/socket_netlink_route_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +constexpr int kIPLen = 4; + +constexpr const char kDevNetTun[] = "/dev/net/tun"; +constexpr const char kTapName[] = "tap0"; + +constexpr const uint8_t kMacA[ETH_ALEN] = {0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA}; +constexpr const uint8_t kMacB[ETH_ALEN] = {0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB}; + +PosixErrorOr> DumpLinkNames() { + ASSIGN_OR_RETURN_ERRNO(auto links, DumpLinks()); + std::set names; + for (const auto& link : links) { + names.emplace(link.name); + } + return names; +} + +PosixErrorOr> GetLinkByName(const std::string& name) { + ASSIGN_OR_RETURN_ERRNO(auto links, DumpLinks()); + for (const auto& link : links) { + if (link.name == name) { + return absl::optional(link); + } + } + return absl::optional(); +} + +struct pihdr { + uint16_t pi_flags; + uint16_t pi_protocol; +} __attribute__((packed)); + +struct ping_pkt { + pihdr pi; + struct ethhdr eth; + struct iphdr ip; + struct icmphdr icmp; + char payload[64]; +} __attribute__((packed)); + +ping_pkt CreatePingPacket(const uint8_t srcmac[ETH_ALEN], const char* srcip, + const uint8_t dstmac[ETH_ALEN], const char* dstip) { + ping_pkt pkt = {}; + + pkt.pi.pi_protocol = htons(ETH_P_IP); + + memcpy(pkt.eth.h_dest, dstmac, sizeof(pkt.eth.h_dest)); + memcpy(pkt.eth.h_source, srcmac, sizeof(pkt.eth.h_source)); + pkt.eth.h_proto = htons(ETH_P_IP); + + pkt.ip.ihl = 5; + pkt.ip.version = 4; + pkt.ip.tos = 0; + pkt.ip.tot_len = htons(sizeof(struct iphdr) + sizeof(struct icmphdr) + + sizeof(pkt.payload)); + pkt.ip.id = 1; + pkt.ip.frag_off = 1 << 6; // Do not fragment + pkt.ip.ttl = 64; + pkt.ip.protocol = IPPROTO_ICMP; + inet_pton(AF_INET, dstip, &pkt.ip.daddr); + inet_pton(AF_INET, srcip, &pkt.ip.saddr); + pkt.ip.check = IPChecksum(pkt.ip); + + pkt.icmp.type = ICMP_ECHO; + pkt.icmp.code = 0; + pkt.icmp.checksum = 0; + pkt.icmp.un.echo.sequence = 1; + pkt.icmp.un.echo.id = 1; + + strncpy(pkt.payload, "abcd", sizeof(pkt.payload)); + pkt.icmp.checksum = ICMPChecksum(pkt.icmp, pkt.payload, sizeof(pkt.payload)); + + return pkt; +} + +struct arp_pkt { + pihdr pi; + struct ethhdr eth; + struct arphdr arp; + uint8_t arp_sha[ETH_ALEN]; + uint8_t arp_spa[kIPLen]; + uint8_t arp_tha[ETH_ALEN]; + uint8_t arp_tpa[kIPLen]; +} __attribute__((packed)); + +std::string CreateArpPacket(const uint8_t srcmac[ETH_ALEN], const char* srcip, + const uint8_t dstmac[ETH_ALEN], const char* dstip) { + std::string buffer; + buffer.resize(sizeof(arp_pkt)); + + arp_pkt* pkt = reinterpret_cast(&buffer[0]); + { + pkt->pi.pi_protocol = htons(ETH_P_ARP); + + memcpy(pkt->eth.h_dest, kMacA, sizeof(pkt->eth.h_dest)); + memcpy(pkt->eth.h_source, kMacB, sizeof(pkt->eth.h_source)); + pkt->eth.h_proto = htons(ETH_P_ARP); + + pkt->arp.ar_hrd = htons(ARPHRD_ETHER); + pkt->arp.ar_pro = htons(ETH_P_IP); + pkt->arp.ar_hln = ETH_ALEN; + pkt->arp.ar_pln = kIPLen; + pkt->arp.ar_op = htons(ARPOP_REPLY); + + memcpy(pkt->arp_sha, srcmac, sizeof(pkt->arp_sha)); + inet_pton(AF_INET, srcip, pkt->arp_spa); + memcpy(pkt->arp_tha, dstmac, sizeof(pkt->arp_tha)); + inet_pton(AF_INET, dstip, pkt->arp_tpa); + } + return buffer; +} + +} // namespace + +class TuntapTest : public ::testing::Test { + protected: + void TearDown() override { + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))) { + // Bring back capability if we had dropped it in test case. + ASSERT_NO_ERRNO(SetCapability(CAP_NET_ADMIN, true)); + } + } +}; + +TEST_F(TuntapTest, CreateInterfaceNoCap) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + ASSERT_NO_ERRNO(SetCapability(CAP_NET_ADMIN, false)); + + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR)); + + struct ifreq ifr = {}; + ifr.ifr_flags = IFF_TAP; + strncpy(ifr.ifr_name, kTapName, IFNAMSIZ); + + EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr), SyscallFailsWithErrno(EPERM)); +} + +TEST_F(TuntapTest, CreateFixedNameInterface) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR)); + + struct ifreq ifr_set = {}; + ifr_set.ifr_flags = IFF_TAP; + strncpy(ifr_set.ifr_name, kTapName, IFNAMSIZ); + EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr_set), + SyscallSucceedsWithValue(0)); + + struct ifreq ifr_get = {}; + EXPECT_THAT(ioctl(fd.get(), TUNGETIFF, &ifr_get), + SyscallSucceedsWithValue(0)); + + struct ifreq ifr_expect = ifr_set; + // See __tun_chr_ioctl() in net/drivers/tun.c. + ifr_expect.ifr_flags |= IFF_NOFILTER; + + EXPECT_THAT(DumpLinkNames(), + IsPosixErrorOkAndHolds(::testing::Contains(kTapName))); + EXPECT_THAT(memcmp(&ifr_expect, &ifr_get, sizeof(ifr_get)), ::testing::Eq(0)); +} + +TEST_F(TuntapTest, CreateInterface) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR)); + + struct ifreq ifr = {}; + ifr.ifr_flags = IFF_TAP; + // Empty ifr.ifr_name. Let kernel assign. + + EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr), SyscallSucceedsWithValue(0)); + + struct ifreq ifr_get = {}; + EXPECT_THAT(ioctl(fd.get(), TUNGETIFF, &ifr_get), + SyscallSucceedsWithValue(0)); + + std::string ifname = ifr_get.ifr_name; + EXPECT_THAT(ifname, ::testing::StartsWith("tap")); + EXPECT_THAT(DumpLinkNames(), + IsPosixErrorOkAndHolds(::testing::Contains(ifname))); +} + +TEST_F(TuntapTest, InvalidReadWrite) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR)); + + char buf[128] = {}; + EXPECT_THAT(read(fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(EBADFD)); + EXPECT_THAT(write(fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(EBADFD)); +} + +TEST_F(TuntapTest, WriteToDownDevice) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + // FIXME: gVisor always creates enabled/up'd interfaces. + SKIP_IF(IsRunningOnGvisor()); + + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR)); + + // Device created should be down by default. + struct ifreq ifr = {}; + ifr.ifr_flags = IFF_TAP; + EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr), SyscallSucceedsWithValue(0)); + + char buf[128] = {}; + EXPECT_THAT(write(fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(EIO)); +} + +// This test sets up a TAP device and pings kernel by sending ICMP echo request. +// +// It works as the following: +// * Open /dev/net/tun, and create kTapName interface. +// * Use rtnetlink to do initial setup of the interface: +// * Assign IP address 10.0.0.1/24 to kernel. +// * MAC address: kMacA +// * Bring up the interface. +// * Send an ICMP echo reqest (ping) packet from 10.0.0.2 (kMacB) to kernel. +// * Loop to receive packets from TAP device/fd: +// * If packet is an ICMP echo reply, it stops and passes the test. +// * If packet is an ARP request, it responds with canned reply and resends +// the +// ICMP request packet. +TEST_F(TuntapTest, PingKernel) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + // Interface creation. + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR)); + + struct ifreq ifr_set = {}; + ifr_set.ifr_flags = IFF_TAP; + strncpy(ifr_set.ifr_name, kTapName, IFNAMSIZ); + EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr_set), + SyscallSucceedsWithValue(0)); + + absl::optional link = + ASSERT_NO_ERRNO_AND_VALUE(GetLinkByName(kTapName)); + ASSERT_TRUE(link.has_value()); + + // Interface setup. + struct in_addr addr; + inet_pton(AF_INET, "10.0.0.1", &addr); + EXPECT_NO_ERRNO(LinkAddLocalAddr(link->index, AF_INET, /*prefixlen=*/24, + &addr, sizeof(addr))); + + if (!IsRunningOnGvisor()) { + // FIXME: gVisor doesn't support setting MAC address on interfaces yet. + EXPECT_NO_ERRNO(LinkSetMacAddr(link->index, kMacA, sizeof(kMacA))); + + // FIXME: gVisor always creates enabled/up'd interfaces. + EXPECT_NO_ERRNO(LinkChangeFlags(link->index, IFF_UP, IFF_UP)); + } + + ping_pkt ping_req = CreatePingPacket(kMacB, "10.0.0.2", kMacA, "10.0.0.1"); + std::string arp_rep = CreateArpPacket(kMacB, "10.0.0.2", kMacA, "10.0.0.1"); + + // Send ping, this would trigger an ARP request on Linux. + EXPECT_THAT(write(fd.get(), &ping_req, sizeof(ping_req)), + SyscallSucceedsWithValue(sizeof(ping_req))); + + // Receive loop to process inbound packets. + struct inpkt { + union { + pihdr pi; + ping_pkt ping; + arp_pkt arp; + }; + }; + while (1) { + inpkt r = {}; + int n = read(fd.get(), &r, sizeof(r)); + EXPECT_THAT(n, SyscallSucceeds()); + + if (n < sizeof(pihdr)) { + std::cerr << "Ignored packet, protocol: " << r.pi.pi_protocol + << " len: " << n << std::endl; + continue; + } + + // Process ARP packet. + if (n >= sizeof(arp_pkt) && r.pi.pi_protocol == htons(ETH_P_ARP)) { + // Respond with canned ARP reply. + EXPECT_THAT(write(fd.get(), arp_rep.data(), arp_rep.size()), + SyscallSucceedsWithValue(arp_rep.size())); + // First ping request might have been dropped due to mac address not in + // ARP cache. Send it again. + EXPECT_THAT(write(fd.get(), &ping_req, sizeof(ping_req)), + SyscallSucceedsWithValue(sizeof(ping_req))); + } + + // Process ping response packet. + if (n >= sizeof(ping_pkt) && r.pi.pi_protocol == ping_req.pi.pi_protocol && + r.ping.ip.protocol == ping_req.ip.protocol && + !memcmp(&r.ping.ip.saddr, &ping_req.ip.daddr, kIPLen) && + !memcmp(&r.ping.ip.daddr, &ping_req.ip.saddr, kIPLen) && + r.ping.icmp.type == 0 && r.ping.icmp.code == 0) { + // Ends and passes the test. + break; + } + } +} + +} // namespace testing +} // namespace gvisor -- cgit v1.2.3 From 75d7f76a6cd81d77f5ce70440c1d95c0296b15ba Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 11 Nov 2019 20:26:38 -0800 Subject: arm64: add a travis build ci Build runsc and run "runsc do ls". Signed-off-by: Andrei Vagin --- .travis.yml | 19 ++++++++++++++++++ Dockerfile | 11 ++++++----- Makefile | 5 ++++- test/syscalls/linux/32bit.cc | 2 +- test/syscalls/linux/rseq/uapi.h | 29 ++++++++++++---------------- test/syscalls/linux/udp_socket_test_cases.cc | 4 ++++ 6 files changed, 46 insertions(+), 24 deletions(-) (limited to 'test') diff --git a/.travis.yml b/.travis.yml index e69de29bb..a2a260538 100644 --- a/.travis.yml +++ b/.travis.yml @@ -0,0 +1,19 @@ +language: minimal +sudo: required +dist: xenial +cache: + directories: + - /home/travis/.cache/bazel/ +services: + - docker +matrix: + include: + - os: linux + arch: amd64 + env: RUNSC_PATH=./bazel-bin/runsc/linux_amd64_pure_stripped/runsc + - os: linux + arch: arm64 + env: RUNSC_PATH=./bazel-bin/runsc/linux_arm64_pure_stripped/runsc +script: + - uname -a + - make DOCKER_RUN_OPTIONS="" BAZEL_OPTIONS="build runsc:runsc" bazel && $RUNSC_PATH --alsologtostderr --network none --debug --TESTONLY-unsafe-nonroot=true --rootless do ls diff --git a/Dockerfile b/Dockerfile index 738623023..2bfdfec6c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,9 @@ -FROM ubuntu:bionic +FROM fedora:31 -RUN apt-get update && apt-get install -y curl gnupg2 git python python3 python3-distutils python3-pip -RUN echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list && \ - curl https://bazel.build/bazel-release.pub.gpg | apt-key add - -RUN apt-get update && apt-get install -y bazel && apt-get clean +RUN dnf install -y dnf-plugins-core && dnf copr enable -y vbatts/bazel + +RUN dnf install -y bazel2 git gcc make golang gcc-c++ glibc-devel python3 which python3-pip python3-devel libffi-devel openssl-devel pkg-config glibc-static + +RUN pip install pycparser WORKDIR /gvisor diff --git a/Makefile b/Makefile index a73bc0c36..d9531fbd5 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,9 @@ UID := $(shell id -u ${USER}) GID := $(shell id -g ${USER}) GVISOR_BAZEL_CACHE := $(shell readlink -f ~/.cache/bazel/) +# The --privileged is required to run tests. +DOCKER_RUN_OPTIONS ?= --privileged + all: runsc docker-build: @@ -19,7 +22,7 @@ bazel-server-start: docker-build -v "$(CURDIR):$(CURDIR)" \ --workdir "$(CURDIR)" \ --tmpfs /tmp:rw,exec \ - --privileged \ + $(DOCKER_RUN_OPTIONS) \ gvisor-bazel \ sh -c "while :; do sleep 100; done" && \ docker exec --user 0:0 -i gvisor-bazel sh -c "groupadd --gid $(GID) --non-unique gvisor && useradd --uid $(UID) --non-unique --gid $(GID) -d $(HOME) gvisor" diff --git a/test/syscalls/linux/32bit.cc b/test/syscalls/linux/32bit.cc index c47a05181..3c825477c 100644 --- a/test/syscalls/linux/32bit.cc +++ b/test/syscalls/linux/32bit.cc @@ -74,7 +74,7 @@ void ExitGroup32(const char instruction[2], int code) { "int $3\n" : : [ code ] "m"(code), [ ip ] "d"(m.ptr()) - : "rax", "rbx", "rsp"); + : "rax", "rbx"); } constexpr int kExitCode = 42; diff --git a/test/syscalls/linux/rseq/uapi.h b/test/syscalls/linux/rseq/uapi.h index e3ff0579a..ca1d67691 100644 --- a/test/syscalls/linux/rseq/uapi.h +++ b/test/syscalls/linux/rseq/uapi.h @@ -15,14 +15,9 @@ #ifndef GVISOR_TEST_SYSCALLS_LINUX_RSEQ_UAPI_H_ #define GVISOR_TEST_SYSCALLS_LINUX_RSEQ_UAPI_H_ -// User-kernel ABI for restartable sequences. +#include -// Standard types. -// -// N.B. This header will be included in targets that do have the standard -// library, so we can't shadow the standard type names. -using __u32 = __UINT32_TYPE__; -using __u64 = __UINT64_TYPE__; +// User-kernel ABI for restartable sequences. #ifdef __x86_64__ // Syscall numbers. @@ -32,20 +27,20 @@ constexpr int kRseqSyscall = 334; #endif // __x86_64__ struct rseq_cs { - __u32 version; - __u32 flags; - __u64 start_ip; - __u64 post_commit_offset; - __u64 abort_ip; -} __attribute__((aligned(4 * sizeof(__u64)))); + uint32_t version; + uint32_t flags; + uint64_t start_ip; + uint64_t post_commit_offset; + uint64_t abort_ip; +} __attribute__((aligned(4 * sizeof(uint64_t)))); // N.B. alignment is enforced by the kernel. struct rseq { - __u32 cpu_id_start; - __u32 cpu_id; + uint32_t cpu_id_start; + uint32_t cpu_id; struct rseq_cs* rseq_cs; - __u32 flags; -} __attribute__((aligned(4 * sizeof(__u64)))); + uint32_t flags; +} __attribute__((aligned(4 * sizeof(uint64_t)))); constexpr int kRseqFlagUnregister = 1 << 0; diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc index 57b1a357c..740c7986d 100644 --- a/test/syscalls/linux/udp_socket_test_cases.cc +++ b/test/syscalls/linux/udp_socket_test_cases.cc @@ -21,6 +21,10 @@ #include #include +#ifndef SIOCGSTAMP +#include +#endif + #include "gtest/gtest.h" #include "absl/base/macros.h" #include "absl/time/clock.h" -- cgit v1.2.3 From 160d5751ab6a06c22aed7d829a17c88344cc7cf2 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Mon, 24 Feb 2020 17:28:27 -0800 Subject: Add default behavior for gtest runner. PiperOrigin-RevId: 297009116 --- test/perf/BUILD | 8 ++++---- test/perf/linux/getdents_benchmark.cc | 2 +- test/runner/gtest/gtest.go | 25 +++++++++++++++++++------ 3 files changed, 24 insertions(+), 11 deletions(-) (limited to 'test') diff --git a/test/perf/BUILD b/test/perf/BUILD index 7a2bf10ed..346a28e16 100644 --- a/test/perf/BUILD +++ b/test/perf/BUILD @@ -29,7 +29,7 @@ syscall_test( ) syscall_test( - size = "large", + size = "enormous", test = "//test/perf/linux:getdents_benchmark", ) @@ -39,7 +39,7 @@ syscall_test( ) syscall_test( - size = "large", + size = "enormous", test = "//test/perf/linux:gettid_benchmark", ) @@ -87,7 +87,7 @@ syscall_test( ) syscall_test( - size = "large", + size = "enormous", test = "//test/perf/linux:signal_benchmark", ) @@ -102,7 +102,7 @@ syscall_test( ) syscall_test( - size = "large", + size = "enormous", add_overlay = True, test = "//test/perf/linux:unlink_benchmark", ) diff --git a/test/perf/linux/getdents_benchmark.cc b/test/perf/linux/getdents_benchmark.cc index 0e03975b4..afc599ad2 100644 --- a/test/perf/linux/getdents_benchmark.cc +++ b/test/perf/linux/getdents_benchmark.cc @@ -141,7 +141,7 @@ void BM_GetdentsNewFD(benchmark::State& state) { state.SetItemsProcessed(state.iterations()); } -BENCHMARK(BM_GetdentsNewFD)->Range(1, 1 << 16)->UseRealTime(); +BENCHMARK(BM_GetdentsNewFD)->Range(1, 1 << 12)->UseRealTime(); } // namespace diff --git a/test/runner/gtest/gtest.go b/test/runner/gtest/gtest.go index 23bf7b5f6..f96e2415e 100644 --- a/test/runner/gtest/gtest.go +++ b/test/runner/gtest/gtest.go @@ -43,6 +43,10 @@ type TestCase struct { // Name is the name of this individual test. Name string + // all indicates that this will run without flags. This takes + // precendence over benchmark below. + all bool + // benchmark indicates that this is a benchmark. In this case, the // suite will be empty, and we will use the appropriate test and // benchmark flags. @@ -57,6 +61,9 @@ func (tc TestCase) FullName() string { // Args returns arguments to be passed when invoking the test. func (tc TestCase) Args() []string { + if tc.all { + return []string{} // No arguments. + } if tc.benchmark { return []string{ fmt.Sprintf("%s=^$", filterTestFlag), @@ -81,11 +88,16 @@ func ParseTestCases(testBin string, benchmarks bool, extraArgs ...string) ([]Tes cmd := exec.Command(testBin, args...) out, err := cmd.Output() if err != nil { - exitErr, ok := err.(*exec.ExitError) - if !ok { - return nil, fmt.Errorf("could not enumerate gtest tests: %v", err) - } - return nil, fmt.Errorf("could not enumerate gtest tests: %v\nstderr:\n%s", err, exitErr.Stderr) + // We failed to list tests with the given flags. Just + // return something that will run the binary with no + // flags, which should execute all tests. + return []TestCase{ + TestCase{ + Suite: "Default", + Name: "All", + all: true, + }, + }, nil } // Parse test output. @@ -114,7 +126,6 @@ func ParseTestCases(testBin string, benchmarks bool, extraArgs ...string) ([]Tes Suite: suite, Name: name, }) - } // Finished? @@ -127,6 +138,8 @@ func ParseTestCases(testBin string, benchmarks bool, extraArgs ...string) ([]Tes cmd = exec.Command(testBin, args...) out, err = cmd.Output() if err != nil { + // We were able to enumerate tests above, but not benchmarks? + // We requested them, so we return an error in this case. exitErr, ok := err.(*exec.ExitError) if !ok { return nil, fmt.Errorf("could not enumerate gtest benchmarks: %v", err) -- cgit v1.2.3 From acc405ba60834f5dce9ce04cd762d5cda02224cb Mon Sep 17 00:00:00 2001 From: Nayana Bidari Date: Tue, 25 Feb 2020 15:03:51 -0800 Subject: Add nat table support for iptables. - commit the changes for the comments. --- pkg/abi/linux/netfilter.go | 23 +++++++++-- pkg/sentry/socket/netfilter/netfilter.go | 33 +++++++++++----- pkg/tcpip/iptables/iptables.go | 10 ++++- pkg/tcpip/iptables/targets.go | 66 ++++++++++++++++++++++---------- pkg/tcpip/iptables/types.go | 4 +- pkg/tcpip/stack/nic.go | 13 +------ test/iptables/nat.go | 12 +----- 7 files changed, 103 insertions(+), 58 deletions(-) (limited to 'test') diff --git a/pkg/abi/linux/netfilter.go b/pkg/abi/linux/netfilter.go index ba4d84962..2179ac995 100644 --- a/pkg/abi/linux/netfilter.go +++ b/pkg/abi/linux/netfilter.go @@ -250,8 +250,24 @@ type XTErrorTarget struct { // SizeOfXTErrorTarget is the size of an XTErrorTarget. const SizeOfXTErrorTarget = 64 +// Flag values for NfNATIPV4Range. The values indicate whether to map +// protocol specific part(ports) or IPs. It corresponds to values in +// include/uapi/linux/netfilter/nf_nat.h. +const ( + NF_NAT_RANGE_MAP_IPS = 1 << 0 + NF_NAT_RANGE_PROTO_SPECIFIED = 1 << 1 + NF_NAT_RANGE_PROTO_RANDOM = 1 << 2 + NF_NAT_RANGE_PERSISTENT = 1 << 3 + NF_NAT_RANGE_PROTO_RANDOM_FULLY = 1 << 4 + NF_NAT_RANGE_PROTO_RANDOM_ALL = (NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY) + NF_NAT_RANGE_MASK = (NF_NAT_RANGE_MAP_IPS | + NF_NAT_RANGE_PROTO_SPECIFIED | NF_NAT_RANGE_PROTO_RANDOM | + NF_NAT_RANGE_PERSISTENT | NF_NAT_RANGE_PROTO_RANDOM_FULLY) +) + // NfNATIPV4Range. It corresponds to struct nf_nat_ipv4_range -// in include/uapi/linux/netfilter/nf_nat.h. +// in include/uapi/linux/netfilter/nf_nat.h. The fields are in +// network byte order. type NfNATIPV4Range struct { Flags uint32 MinIP [4]byte @@ -263,11 +279,12 @@ type NfNATIPV4Range struct { // NfNATIPV4MultiRangeCompat. It corresponds to struct // nf_nat_ipv4_multi_range_compat in include/uapi/linux/netfilter/nf_nat.h. type NfNATIPV4MultiRangeCompat struct { - Rangesize uint32 - RangeIPV4 [1]NfNATIPV4Range + RangeSize uint32 + RangeIPV4 NfNATIPV4Range } // XTRedirectTarget triggers a redirect when reached. +// Adding 4 bytes of padding to make the struct 8 byte aligned. type XTRedirectTarget struct { Target XTEntryTarget NfRange NfNATIPV4MultiRangeCompat diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index 512ad624a..257cb485b 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -26,6 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/iptables" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/usermem" @@ -288,6 +289,7 @@ func marshalRedirectTarget() []byte { TargetSize: linux.SizeOfXTRedirectTarget, }, } + copy(target.Target.Name[:], redirectTargetName) ret := make([]byte, 0, linux.SizeOfXTRedirectTarget) return binary.Marshal(ret, usermem.ByteOrder, target) @@ -405,7 +407,7 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { nflog("entry doesn't have enough room for its target (only %d bytes remain)", len(optVal)) return syserr.ErrInvalidArgument } - target, err := parseTarget(optVal[:targetSize]) + target, err := parseTarget(filter, optVal[:targetSize]) if err != nil { nflog("failed to parse target: %v", err) return syserr.ErrInvalidArgument @@ -552,7 +554,7 @@ func parseMatchers(filter iptables.IPHeaderFilter, optVal []byte) ([]iptables.Ma // parseTarget parses a target from optVal. optVal should contain only the // target. -func parseTarget(optVal []byte) (iptables.Target, error) { +func parseTarget(filter iptables.IPHeaderFilter, optVal []byte) (iptables.Target, error) { nflog("set entries: parsing target of size %d", len(optVal)) if len(optVal) < linux.SizeOfXTEntryTarget { return nil, fmt.Errorf("optVal has insufficient size for entry target %d", len(optVal)) @@ -604,6 +606,10 @@ func parseTarget(optVal []byte) (iptables.Target, error) { return nil, fmt.Errorf("netfilter.SetEntries: optVal has insufficient size for redirect target %d", len(optVal)) } + if filter.Protocol != header.TCPProtocolNumber && filter.Protocol != header.UDPProtocolNumber { + return nil, fmt.Errorf("netfilter.SetEntries: invalid argument") + } + var redirectTarget linux.XTRedirectTarget buf = optVal[:linux.SizeOfXTRedirectTarget] binary.Unmarshal(buf, usermem.ByteOrder, &redirectTarget) @@ -612,21 +618,30 @@ func parseTarget(optVal []byte) (iptables.Target, error) { var target iptables.RedirectTarget nfRange := redirectTarget.NfRange - target.RangeSize = nfRange.Rangesize - target.Flags = nfRange.RangeIPV4[0].Flags + // RangeSize should be 1. + if nfRange.RangeSize != 1 { + return nil, fmt.Errorf("netfilter.SetEntries: invalid argument") + } + + // TODO(gvisor.dev/issue/170): Check if the flags are valid. + // Also check if we need to map ports or IP. + // For now, redirect target only supports dest port change. + if nfRange.RangeIPV4.Flags&linux.NF_NAT_RANGE_PROTO_SPECIFIED == 0 { + return nil, fmt.Errorf("netfilter.SetEntries: invalid argument.") + } + target.Flags = nfRange.RangeIPV4.Flags - target.MinIP = tcpip.Address(nfRange.RangeIPV4[0].MinIP[:]) - target.MaxIP = tcpip.Address(nfRange.RangeIPV4[0].MaxIP[:]) + target.MinIP = tcpip.Address(nfRange.RangeIPV4.MinIP[:]) + target.MaxIP = tcpip.Address(nfRange.RangeIPV4.MaxIP[:]) // Convert port from big endian to little endian. port := make([]byte, 2) - binary.BigEndian.PutUint16(port, nfRange.RangeIPV4[0].MinPort) + binary.BigEndian.PutUint16(port, nfRange.RangeIPV4.MinPort) target.MinPort = binary.LittleEndian.Uint16(port) - binary.BigEndian.PutUint16(port, nfRange.RangeIPV4[0].MaxPort) + binary.BigEndian.PutUint16(port, nfRange.RangeIPV4.MaxPort) target.MaxPort = binary.LittleEndian.Uint16(port) return target, nil - } // Unknown target. diff --git a/pkg/tcpip/iptables/iptables.go b/pkg/tcpip/iptables/iptables.go index c00d012c0..f7dc4f720 100644 --- a/pkg/tcpip/iptables/iptables.go +++ b/pkg/tcpip/iptables/iptables.go @@ -207,7 +207,7 @@ func (it *IPTables) checkTable(hook Hook, pkt tcpip.PacketBuffer, tablename stri underflow := table.Rules[table.Underflows[hook]] // Underflow is guaranteed to be an unconditional // ACCEPT or DROP. - switch v, _ := underflow.Target.Action(pkt); v { + switch v, _ := underflow.Target.Action(pkt, underflow.Filter); v { case RuleAccept: return TableAccept case RuleDrop: @@ -233,6 +233,12 @@ func (it *IPTables) checkTable(hook Hook, pkt tcpip.PacketBuffer, tablename stri func (it *IPTables) checkRule(hook Hook, pkt tcpip.PacketBuffer, table Table, ruleIdx int) RuleVerdict { rule := table.Rules[ruleIdx] + // If pkt.NetworkHeader hasn't been set yet, it will be contained in + // pkt.Data.First(). + if pkt.NetworkHeader == nil { + pkt.NetworkHeader = pkt.Data.First() + } + // First check whether the packet matches the IP header filter. // TODO(gvisor.dev/issue/170): Support other fields of the filter. if rule.Filter.Protocol != 0 && rule.Filter.Protocol != header.IPv4(pkt.NetworkHeader).TransportProtocol() { @@ -252,6 +258,6 @@ func (it *IPTables) checkRule(hook Hook, pkt tcpip.PacketBuffer, table Table, ru } // All the matchers matched, so run the target. - verdict, _ := rule.Target.Action(pkt) + verdict, _ := rule.Target.Action(pkt, rule.Filter) return verdict } diff --git a/pkg/tcpip/iptables/targets.go b/pkg/tcpip/iptables/targets.go index 06e65bece..a75938da3 100644 --- a/pkg/tcpip/iptables/targets.go +++ b/pkg/tcpip/iptables/targets.go @@ -26,7 +26,7 @@ import ( type AcceptTarget struct{} // Action implements Target.Action. -func (AcceptTarget) Action(packet tcpip.PacketBuffer) (RuleVerdict, string) { +func (AcceptTarget) Action(packet tcpip.PacketBuffer, filter IPHeaderFilter) (RuleVerdict, string) { return RuleAccept, "" } @@ -34,7 +34,7 @@ func (AcceptTarget) Action(packet tcpip.PacketBuffer) (RuleVerdict, string) { type DropTarget struct{} // Action implements Target.Action. -func (DropTarget) Action(packet tcpip.PacketBuffer) (RuleVerdict, string) { +func (DropTarget) Action(packet tcpip.PacketBuffer, filter IPHeaderFilter) (RuleVerdict, string) { return RuleDrop, "" } @@ -43,7 +43,7 @@ func (DropTarget) Action(packet tcpip.PacketBuffer) (RuleVerdict, string) { type ErrorTarget struct{} // Action implements Target.Action. -func (ErrorTarget) Action(packet tcpip.PacketBuffer) (RuleVerdict, string) { +func (ErrorTarget) Action(packet tcpip.PacketBuffer, filter IPHeaderFilter) (RuleVerdict, string) { log.Debugf("ErrorTarget triggered.") return RuleDrop, "" } @@ -54,7 +54,7 @@ type UserChainTarget struct { } // Action implements Target.Action. -func (UserChainTarget) Action(tcpip.PacketBuffer) (RuleVerdict, string) { +func (UserChainTarget) Action(tcpip.PacketBuffer, IPHeaderFilter) (RuleVerdict, string) { panic("UserChainTarget should never be called.") } @@ -63,29 +63,55 @@ func (UserChainTarget) Action(tcpip.PacketBuffer) (RuleVerdict, string) { type ReturnTarget struct{} // Action implements Target.Action. -func (ReturnTarget) Action(tcpip.PacketBuffer) (RuleVerdict, string) { +func (ReturnTarget) Action(tcpip.PacketBuffer, IPHeaderFilter) (RuleVerdict, string) { return RuleReturn, "" } // RedirectTarget redirects the packet by modifying the destination port/IP. +// Min and Max values for IP and Ports in the struct indicate the range of +// values which can be used to redirect. type RedirectTarget struct { - RangeSize uint32 - Flags uint32 - MinIP tcpip.Address - MaxIP tcpip.Address - MinPort uint16 - MaxPort uint16 -} + // Flags to check if the redirect is for address or ports. + Flags uint32 -// Action implements Target.Action. -func (rt RedirectTarget) Action(packet tcpip.PacketBuffer) (RuleVerdict, string) { - log.Infof("RedirectTarget triggered.") + // Min address used to redirect. + MinIP tcpip.Address + + // Max address used to redirect. + MaxIP tcpip.Address - // TODO(gvisor.dev/issue/170): Checking only for UDP protocol. - // We're yet to support for TCP protocol. - headerView := packet.Data.First() - h := header.UDP(headerView) - h.SetDestinationPort(rt.MinPort) + // Min port used to redirect. + MinPort uint16 + // Max port used to redirect. + MaxPort uint16 +} + +// Action implements Target.Action. +func (rt RedirectTarget) Action(pkt tcpip.PacketBuffer, filter IPHeaderFilter) (RuleVerdict, string) { + headerView := pkt.Data.First() + + // Network header should be set. + netHeader := header.IPv4(headerView) + if netHeader == nil { + return RuleDrop, "" + } + + // TODO(gvisor.dev/issue/170): Check Flags in RedirectTarget if + // we need to change dest address (for OUTPUT chain) or ports. + hlen := int(netHeader.HeaderLength()) + + switch protocol := filter.Protocol; protocol { + case header.UDPProtocolNumber: + udp := header.UDP(headerView[hlen:]) + udp.SetDestinationPort(rt.MinPort) + case header.TCPProtocolNumber: + // TODO(gvisor.dev/issue/170): Need to recompute checksum + // and implement nat connection tracking to support TCP. + tcp := header.TCP(headerView[hlen:]) + tcp.SetDestinationPort(rt.MinPort) + default: + return RuleDrop, "" + } return RuleAccept, "" } diff --git a/pkg/tcpip/iptables/types.go b/pkg/tcpip/iptables/types.go index 5735d001b..0102831d0 100644 --- a/pkg/tcpip/iptables/types.go +++ b/pkg/tcpip/iptables/types.go @@ -63,7 +63,7 @@ const ( // TableAccept indicates the packet should continue through netstack. TableAccept TableVerdict = iota - // TableAccept indicates the packet should be dropped. + // TableDrop indicates the packet should be dropped. TableDrop ) @@ -175,5 +175,5 @@ type Target interface { // Action takes an action on the packet and returns a verdict on how // traversal should (or should not) continue. If the return value is // Jump, it also returns the name of the chain to jump to. - Action(packet tcpip.PacketBuffer) (RuleVerdict, string) + Action(packet tcpip.PacketBuffer, filter IPHeaderFilter) (RuleVerdict, string) } diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index 2028f5201..a75dc0322 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -1087,19 +1087,8 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.Link // TODO(gvisor.dev/issue/170): Not supporting iptables for IPv6 yet. if protocol == header.IPv4ProtocolNumber { - newPkt := pkt.Clone() - - headerView := newPkt.Data.First() - h := header.IPv4(headerView) - newPkt.NetworkHeader = headerView[:h.HeaderLength()] - - hlen := int(h.HeaderLength()) - tlen := int(h.TotalLength()) - newPkt.Data.TrimFront(hlen) - newPkt.Data.CapLength(tlen - hlen) - ipt := n.stack.IPTables() - if ok := ipt.Check(iptables.Prerouting, newPkt); !ok { + if ok := ipt.Check(iptables.Prerouting, pkt); !ok { // iptables is telling us to drop the packet. return } diff --git a/test/iptables/nat.go b/test/iptables/nat.go index 306cbd1b3..899d1c9d3 100644 --- a/test/iptables/nat.go +++ b/test/iptables/nat.go @@ -71,20 +71,12 @@ func (NATRedirectTCPPort) ContainerAction(ip net.IP) error { } // Listen for TCP packets on redirect port. - if err := listenTCP(redirectPort, sendloopDuration); err != nil { - return fmt.Errorf("connection on port %d should be accepted, but got error %v", redirectPort, err) - } - - return nil + return listenTCP(redirectPort, sendloopDuration) } // LocalAction implements TestCase.LocalAction. func (NATRedirectTCPPort) LocalAction(ip net.IP) error { - if err := connectTCP(ip, dropPort, acceptPort, sendloopDuration); err != nil { - return fmt.Errorf("connection destined to port %d should be accepted, but got error %v", dropPort, err) - } - - return nil + return connectTCP(ip, dropPort, acceptPort, sendloopDuration) } // NATDropUDP tests that packets are not received in ports other than redirect port. -- cgit v1.2.3 From 408979e619c4b5df74503c7a887aaaa06fd0d730 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Fri, 14 Feb 2020 17:19:32 -0800 Subject: iptables: filter by IP address (and range) Enables commands such as: $ iptables -A INPUT -d 127.0.0.1 -j ACCEPT $ iptables -t nat -A PREROUTING ! -d 127.0.0.1 -j REDIRECT Also adds a bunch of REDIRECT+destination tests. --- pkg/abi/linux/netfilter.go | 28 ++++- pkg/sentry/socket/netfilter/netfilter.go | 17 ++- pkg/tcpip/iptables/iptables.go | 28 ++++- pkg/tcpip/iptables/types.go | 12 ++ test/iptables/filter_input.go | 65 ++++++++++ test/iptables/filter_output.go | 56 +++++++++ test/iptables/iptables_test.go | 66 ++++++++++ test/iptables/iptables_util.go | 25 +++- test/iptables/nat.go | 201 ++++++++++++++++++++++++++++++- 9 files changed, 487 insertions(+), 11 deletions(-) (limited to 'test') diff --git a/pkg/abi/linux/netfilter.go b/pkg/abi/linux/netfilter.go index bd2e13ba1..aa149afb5 100644 --- a/pkg/abi/linux/netfilter.go +++ b/pkg/abi/linux/netfilter.go @@ -158,10 +158,36 @@ type IPTIP struct { // Flags define matching behavior for the IP header. Flags uint8 - // InverseFlags invert the meaning of fields in struct IPTIP. + // InverseFlags invert the meaning of fields in struct IPTIP. See the + // IPT_INV_* flags. InverseFlags uint8 } +// Inverts the meaning of the Protocol field. Corresponds to a constant in +// include/uapi/linux/netfilter/x_tables.h. +const XT_INV_PROTO = 0x40 + +// Flags in IPTIP.InverseFlags. Corresponding constants are in +// include/uapi/linux/netfilter_ipv4/ip_tables.h. +const ( + // Invert the meaning of InputInterface. + IPT_INV_VIA_IN = 0x01 + // Invert the meaning of OutputInterface. + IPT_INV_VIA_OUT = 0x02 + // Unclear what this is, as no references to it exist in the kernel. + IPT_INV_TOS = 0x04 + // Invert the meaning of Src. + IPT_INV_SRCIP = 0x08 + // Invert the meaning of Dst. + IPT_INV_DSTIP = 0x10 + // Invert the meaning of the IPT_F_FRAG flag. + IPT_INV_FRAG = 0x20 + // Invert the meaning of the Protocol field. + IPT_INV_PROTO = XT_INV_PROTO + // Enable all flags. + IPT_INV_MASK = 0x7F +) + // SizeOfIPTIP is the size of an IPTIP. const SizeOfIPTIP = 84 diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index 2ec11f6ac..faa3e892a 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -26,6 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/iptables" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/usermem" @@ -630,8 +631,14 @@ func filterFromIPTIP(iptip linux.IPTIP) (iptables.IPHeaderFilter, error) { if containsUnsupportedFields(iptip) { return iptables.IPHeaderFilter{}, fmt.Errorf("unsupported fields in struct iptip: %+v", iptip) } + if len(iptip.Dst) != header.IPv4AddressSize || len(iptip.DstMask) != header.IPv4AddressSize { + return iptables.IPHeaderFilter{}, fmt.Errorf("incorrect length of destination (%d) and/or destination mask (%d) fields", len(iptip.Dst), len(iptip.DstMask)) + } return iptables.IPHeaderFilter{ - Protocol: tcpip.TransportProtocolNumber(iptip.Protocol), + Protocol: tcpip.TransportProtocolNumber(iptip.Protocol), + Dst: tcpip.Address(iptip.Dst[:]), + DstMask: tcpip.Address(iptip.DstMask[:]), + DstInvert: iptip.InverseFlags&linux.IPT_INV_DSTIP != 0, }, nil } @@ -639,16 +646,16 @@ func containsUnsupportedFields(iptip linux.IPTIP) bool { // Currently we check that everything except protocol is zeroed. var emptyInetAddr = linux.InetAddr{} var emptyInterface = [linux.IFNAMSIZ]byte{} - return iptip.Dst != emptyInetAddr || - iptip.Src != emptyInetAddr || + // Disable any supported inverse flags. + inverseMask := uint8(linux.IPT_INV_DSTIP) + return iptip.Src != emptyInetAddr || iptip.SrcMask != emptyInetAddr || - iptip.DstMask != emptyInetAddr || iptip.InputInterface != emptyInterface || iptip.OutputInterface != emptyInterface || iptip.InputInterfaceMask != emptyInterface || iptip.OutputInterfaceMask != emptyInterface || iptip.Flags != 0 || - iptip.InverseFlags != 0 + iptip.InverseFlags&^inverseMask != 0 } func validUnderflow(rule iptables.Rule) bool { diff --git a/pkg/tcpip/iptables/iptables.go b/pkg/tcpip/iptables/iptables.go index dbaccbb36..262b6448d 100644 --- a/pkg/tcpip/iptables/iptables.go +++ b/pkg/tcpip/iptables/iptables.go @@ -240,9 +240,8 @@ func (it *IPTables) checkChain(hook Hook, pkt tcpip.PacketBuffer, table Table, r func (it *IPTables) checkRule(hook Hook, pkt tcpip.PacketBuffer, table Table, ruleIdx int) (RuleVerdict, int) { rule := table.Rules[ruleIdx] - // First check whether the packet matches the IP header filter. - // TODO(gvisor.dev/issue/170): Support other fields of the filter. - if rule.Filter.Protocol != 0 && rule.Filter.Protocol != header.IPv4(pkt.NetworkHeader).TransportProtocol() { + // Check whether the packet matches the IP header filter. + if !filterMatch(rule.Filter, header.IPv4(pkt.NetworkHeader)) { // Continue on to the next rule. return RuleJump, ruleIdx + 1 } @@ -263,3 +262,26 @@ func (it *IPTables) checkRule(hook Hook, pkt tcpip.PacketBuffer, table Table, ru // All the matchers matched, so run the target. return rule.Target.Action(pkt) } + +func filterMatch(filter IPHeaderFilter, hdr header.IPv4) bool { + // TODO(gvisor.dev/issue/170): Support other fields of the filter. + // Check the transport protocol. + if filter.Protocol != 0 && filter.Protocol != hdr.TransportProtocol() { + return false + } + + // Check the destination IP. + dest := hdr.DestinationAddress() + matches := true + for i := range filter.Dst { + if dest[i]&filter.DstMask[i] != filter.Dst[i] { + matches = false + break + } + } + if matches == filter.DstInvert { + return false + } + + return true +} diff --git a/pkg/tcpip/iptables/types.go b/pkg/tcpip/iptables/types.go index 7d032fd23..e7fcf6bff 100644 --- a/pkg/tcpip/iptables/types.go +++ b/pkg/tcpip/iptables/types.go @@ -144,6 +144,18 @@ type Rule struct { type IPHeaderFilter struct { // Protocol matches the transport protocol. Protocol tcpip.TransportProtocolNumber + + // Dst matches the destination IP address. + Dst tcpip.Address + + // DstMask masks bits of the destination IP address when comparing with + // Dst. + DstMask tcpip.Address + + // DstInvert inverts the meaning of the destination IP check, i.e. when + // true the filter will match packets that fail the destination + // comparison. + DstInvert bool } // A Matcher is the interface for matching packets. diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index b2fb6401a..0d3350d8a 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -47,6 +47,8 @@ func init() { RegisterTestCase(FilterInputJumpReturnDrop{}) RegisterTestCase(FilterInputJumpBuiltin{}) RegisterTestCase(FilterInputJumpTwice{}) + RegisterTestCase(FilterInputDestination{}) + RegisterTestCase(FilterInputInvertDestination{}) } // FilterInputDropUDP tests that we can drop UDP traffic. @@ -595,3 +597,66 @@ func (FilterInputJumpTwice) ContainerAction(ip net.IP) error { func (FilterInputJumpTwice) LocalAction(ip net.IP) error { return sendUDPLoop(ip, acceptPort, sendloopDuration) } + +// FilterInputDestination verifies that we can filter packets via `-d +// `. +type FilterInputDestination struct{} + +// Name implements TestCase.Name. +func (FilterInputDestination) Name() string { + return "FilterInputDestination" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterInputDestination) ContainerAction(ip net.IP) error { + addrs, err := localAddrs() + if err != nil { + return err + } + + // Make INPUT's default action DROP, then ACCEPT all packets bound for + // this machine. + rules := [][]string{{"-P", "INPUT", "DROP"}} + for _, addr := range addrs { + rules = append(rules, []string{"-A", "INPUT", "-d", addr, "-j", "ACCEPT"}) + } + if err := filterTableRules(rules); err != nil { + return err + } + + return listenUDP(acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (FilterInputDestination) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} + +// FilterInputDestination verifies that we can filter packets via `! -d +// `. +type FilterInputInvertDestination struct{} + +// Name implements TestCase.Name. +func (FilterInputInvertDestination) Name() string { + return "FilterInputInvertDestination" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterInputInvertDestination) ContainerAction(ip net.IP) error { + // Make INPUT's default action DROP, then ACCEPT all packets not bound + // for 127.0.0.1. + rules := [][]string{ + {"-P", "INPUT", "DROP"}, + {"-A", "INPUT", "!", "-d", localIP, "-j", "ACCEPT"}, + } + if err := filterTableRules(rules); err != nil { + return err + } + + return listenUDP(acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (FilterInputInvertDestination) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} diff --git a/test/iptables/filter_output.go b/test/iptables/filter_output.go index ee2c49f9a..39e648e32 100644 --- a/test/iptables/filter_output.go +++ b/test/iptables/filter_output.go @@ -22,6 +22,8 @@ import ( func init() { RegisterTestCase(FilterOutputDropTCPDestPort{}) RegisterTestCase(FilterOutputDropTCPSrcPort{}) + RegisterTestCase(FilterOutputDestination{}) + RegisterTestCase(FilterOutputInvertDestination{}) } // FilterOutputDropTCPDestPort tests that connections are not accepted on specified source ports. @@ -85,3 +87,57 @@ func (FilterOutputDropTCPSrcPort) LocalAction(ip net.IP) error { return nil } + +// FilterOutputDestination tests that we can selectively allow packets to +// certain destinations. +type FilterOutputDestination struct{} + +// Name implements TestCase.Name. +func (FilterOutputDestination) Name() string { + return "FilterOutputDestination" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterOutputDestination) ContainerAction(ip net.IP) error { + rules := [][]string{ + {"-A", "OUTPUT", "-d", ip.String(), "-j", "ACCEPT"}, + {"-P", "OUTPUT", "DROP"}, + } + if err := filterTableRules(rules); err != nil { + return err + } + + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (FilterOutputDestination) LocalAction(ip net.IP) error { + return listenUDP(acceptPort, sendloopDuration) +} + +// FilterOutputInvertDestination tests that we can selectively allow packets +// not headed for a particular destination. +type FilterOutputInvertDestination struct{} + +// Name implements TestCase.Name. +func (FilterOutputInvertDestination) Name() string { + return "FilterOutputInvertDestination" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterOutputInvertDestination) ContainerAction(ip net.IP) error { + rules := [][]string{ + {"-A", "OUTPUT", "!", "-d", localIP, "-j", "ACCEPT"}, + {"-P", "OUTPUT", "DROP"}, + } + if err := filterTableRules(rules); err != nil { + return err + } + + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (FilterOutputInvertDestination) LocalAction(ip net.IP) error { + return listenUDP(acceptPort, sendloopDuration) +} diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 0621861eb..5eabd2461 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -285,3 +285,69 @@ func TestJumpTwice(t *testing.T) { t.Fatal(err) } } + +func TestInputDestination(t *testing.T) { + if err := singleTest(FilterInputDestination{}); err != nil { + t.Fatal(err) + } +} + +func TestInputInvertDestination(t *testing.T) { + if err := singleTest(FilterInputInvertDestination{}); err != nil { + t.Fatal(err) + } +} + +func TestOutputDestination(t *testing.T) { + if err := singleTest(FilterOutputDestination{}); err != nil { + t.Fatal(err) + } +} + +func TestOutputInvertDestination(t *testing.T) { + if err := singleTest(FilterOutputInvertDestination{}); err != nil { + t.Fatal(err) + } +} + +func TestNATOutRedirectIP(t *testing.T) { + if err := singleTest(NATOutRedirectIP{}); err != nil { + t.Fatal(err) + } +} + +func TestNATOutDontRedirectIP(t *testing.T) { + if err := singleTest(NATOutDontRedirectIP{}); err != nil { + t.Fatal(err) + } +} + +func TestNATOutRedirectInvert(t *testing.T) { + if err := singleTest(NATOutRedirectInvert{}); err != nil { + t.Fatal(err) + } +} + +func TestNATPreRedirectIP(t *testing.T) { + if err := singleTest(NATPreRedirectIP{}); err != nil { + t.Fatal(err) + } +} + +func TestNATPreDontRedirectIP(t *testing.T) { + if err := singleTest(NATPreDontRedirectIP{}); err != nil { + t.Fatal(err) + } +} + +func TestNATPreRedirectInvert(t *testing.T) { + if err := singleTest(NATPreRedirectInvert{}); err != nil { + t.Fatal(err) + } +} + +func TestNATRedirectRequiresProtocol(t *testing.T) { + if err := singleTest(NATRedirectRequiresProtocol{}); err != nil { + t.Fatal(err) + } +} diff --git a/test/iptables/iptables_util.go b/test/iptables/iptables_util.go index 32cf5a417..178a662e1 100644 --- a/test/iptables/iptables_util.go +++ b/test/iptables/iptables_util.go @@ -24,6 +24,7 @@ import ( ) const iptablesBinary = "iptables" +const localIP = "127.0.0.1" // filterTable calls `iptables -t filter` with the given args. func filterTable(args ...string) error { @@ -46,8 +47,17 @@ func tableCmd(table string, args []string) error { // filterTableRules is like filterTable, but runs multiple iptables commands. func filterTableRules(argsList [][]string) error { + return tableRules("filter", argsList) +} + +// natTableRules is like natTable, but runs multiple iptables commands. +func natTableRules(argsList [][]string) error { + return tableRules("nat", argsList) +} + +func tableRules(table string, argsList [][]string) error { for _, args := range argsList { - if err := filterTable(args...); err != nil { + if err := tableCmd(table, args); err != nil { return err } } @@ -149,3 +159,16 @@ func connectTCP(ip net.IP, remotePort, localPort int, timeout time.Duration) err return nil } + +// localAddrs returns a list of local network interface addresses. +func localAddrs() ([]string, error) { + addrs, err := net.InterfaceAddrs() + if err != nil { + return nil, err + } + addrStrs := make([]string, 0, len(addrs)) + for _, addr := range addrs { + addrStrs = append(addrStrs, addr.String()) + } + return addrStrs, nil +} diff --git a/test/iptables/nat.go b/test/iptables/nat.go index a01117ec8..020c862ad 100644 --- a/test/iptables/nat.go +++ b/test/iptables/nat.go @@ -15,8 +15,10 @@ package iptables import ( + "errors" "fmt" "net" + "time" ) const ( @@ -26,6 +28,13 @@ const ( func init() { RegisterTestCase(NATRedirectUDPPort{}) RegisterTestCase(NATDropUDP{}) + RegisterTestCase(NATPreRedirectIP{}) + RegisterTestCase(NATPreDontRedirectIP{}) + RegisterTestCase(NATPreRedirectInvert{}) + RegisterTestCase(NATOutRedirectIP{}) + RegisterTestCase(NATOutDontRedirectIP{}) + RegisterTestCase(NATOutRedirectInvert{}) + RegisterTestCase(NATRedirectRequiresProtocol{}) } // NATRedirectUDPPort tests that packets are redirected to different port. @@ -53,7 +62,8 @@ func (NATRedirectUDPPort) LocalAction(ip net.IP) error { return sendUDPLoop(ip, acceptPort, sendloopDuration) } -// NATDropUDP tests that packets are not received in ports other than redirect port. +// NATDropUDP tests that packets are not received in ports other than redirect +// port. type NATDropUDP struct{} // Name implements TestCase.Name. @@ -78,3 +88,192 @@ func (NATDropUDP) ContainerAction(ip net.IP) error { func (NATDropUDP) LocalAction(ip net.IP) error { return sendUDPLoop(ip, acceptPort, sendloopDuration) } + +// NATOutRedirectIP uses iptables to select packets based on destination IP and +// redirects them. +type NATOutRedirectIP struct{} + +// Name implements TestCase.Name. +func (NATOutRedirectIP) Name() string { + return "NATOutRedirectIP" +} + +// ContainerAction implements TestCase.ContainerAction. +func (NATOutRedirectIP) ContainerAction(ip net.IP) error { + // Redirect OUTPUT packets to a listening localhost port. + dest := net.IP([]byte{200, 0, 0, 2}) + return loopbackTest(dest, "-A", "OUTPUT", "-d", dest.String(), "-p", "udp", "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", acceptPort)) +} + +// LocalAction implements TestCase.LocalAction. +func (NATOutRedirectIP) LocalAction(ip net.IP) error { + // No-op. + return nil +} + +// NATOutDontRedirectIP tests that iptables matching with "-d" does not match +// packets it shouldn't. +type NATOutDontRedirectIP struct{} + +// Name implements TestCase.Name. +func (NATOutDontRedirectIP) Name() string { + return "NATOutDontRedirectIP" +} + +// ContainerAction implements TestCase.ContainerAction. +func (NATOutDontRedirectIP) ContainerAction(ip net.IP) error { + if err := natTable("-A", "OUTPUT", "-d", localIP, "-p", "udp", "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", dropPort)); err != nil { + return err + } + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (NATOutDontRedirectIP) LocalAction(ip net.IP) error { + return listenUDP(acceptPort, sendloopDuration) +} + +// NATOutRedirectInvert tests that iptables can match with "! -d". +type NATOutRedirectInvert struct{} + +// Name implements TestCase.Name. +func (NATOutRedirectInvert) Name() string { + return "NATOutRedirectInvert" +} + +// ContainerAction implements TestCase.ContainerAction. +func (NATOutRedirectInvert) ContainerAction(ip net.IP) error { + // Redirect OUTPUT packets to a listening localhost port. + dest := []byte{200, 0, 0, 3} + destStr := "200.0.0.2" + return loopbackTest(dest, "-A", "OUTPUT", "!", "-d", destStr, "-p", "udp", "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", acceptPort)) +} + +// LocalAction implements TestCase.LocalAction. +func (NATOutRedirectInvert) LocalAction(ip net.IP) error { + // No-op. + return nil +} + +// NATPreRedirectIP tests that we can use iptables to select packets based on +// destination IP and redirect them. +type NATPreRedirectIP struct{} + +// Name implements TestCase.Name. +func (NATPreRedirectIP) Name() string { + return "NATPreRedirectIP" +} + +// ContainerAction implements TestCase.ContainerAction. +func (NATPreRedirectIP) ContainerAction(ip net.IP) error { + addrs, err := localAddrs() + if err != nil { + return err + } + + var rules [][]string + for _, addr := range addrs { + rules = append(rules, []string{"-A", "PREROUTING", "-p", "udp", "-d", addr, "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)}) + } + if err := natTableRules(rules); err != nil { + return err + } + return listenUDP(acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (NATPreRedirectIP) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, dropPort, sendloopDuration) +} + +// NATPreDontRedirectIP tests that iptables matching with "-d" does not match +// packets it shouldn't. +type NATPreDontRedirectIP struct{} + +// Name implements TestCase.Name. +func (NATPreDontRedirectIP) Name() string { + return "NATPreDontRedirectIP" +} + +// ContainerAction implements TestCase.ContainerAction. +func (NATPreDontRedirectIP) ContainerAction(ip net.IP) error { + if err := natTable("-A", "PREROUTING", "-p", "udp", "-d", localIP, "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", dropPort)); err != nil { + return err + } + return listenUDP(acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (NATPreDontRedirectIP) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} + +// NATPreRedirectInvert tests that iptables can match with "! -d". +type NATPreRedirectInvert struct{} + +// Name implements TestCase.Name. +func (NATPreRedirectInvert) Name() string { + return "NATPreRedirectInvert" +} + +// ContainerAction implements TestCase.ContainerAction. +func (NATPreRedirectInvert) ContainerAction(ip net.IP) error { + if err := natTable("-A", "PREROUTING", "-p", "udp", "!", "-d", localIP, "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)); err != nil { + return err + } + return listenUDP(acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (NATPreRedirectInvert) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, dropPort, sendloopDuration) +} + +// NATRedirectRequiresProtocol tests that use of the --to-ports flag requires a +// protocol to be specified with -p. +type NATRedirectRequiresProtocol struct{} + +// Name implements TestCase.Name. +func (NATRedirectRequiresProtocol) Name() string { + return "NATRedirectRequiresProtocol" +} + +// ContainerAction implements TestCase.ContainerAction. +func (NATRedirectRequiresProtocol) ContainerAction(ip net.IP) error { + if err := natTable("-A", "PREROUTING", "-d", localIP, "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)); err == nil { + return errors.New("expected an error using REDIRECT --to-ports without a protocol") + } + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (NATRedirectRequiresProtocol) LocalAction(ip net.IP) error { + // No-op. + return nil +} + +// loopbackTests runs an iptables rule and ensures that packets sent to +// dest:dropPort are received by localhost:acceptPort. +func loopbackTest(dest net.IP, args ...string) error { + if err := natTable(args...); err != nil { + return err + } + sendCh := make(chan error) + listenCh := make(chan error) + go func() { + sendCh <- sendUDPLoop(dest, dropPort, sendloopDuration) + }() + go func() { + listenCh <- listenUDP(acceptPort, sendloopDuration) + }() + select { + case err := <-listenCh: + if err != nil { + return err + } + case <-time.After(sendloopDuration): + return errors.New("timed out") + } + // sendCh will always take the full sendloop time. + return <-sendCh +} -- cgit v1.2.3 From de0b2ebf8635a75bfabfd0a8b48de7923017574e Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Wed, 26 Feb 2020 18:16:19 -0800 Subject: Add getsockopt tests for SO_SNDTIMEO and SO_RCVTIMEO PiperOrigin-RevId: 297485310 --- test/syscalls/linux/socket_generic.cc | 96 ++++++++++++++++++++++++++++++++--- 1 file changed, 88 insertions(+), 8 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/socket_generic.cc b/test/syscalls/linux/socket_generic.cc index e8f24a59e..f7d6139f1 100644 --- a/test/syscalls/linux/socket_generic.cc +++ b/test/syscalls/linux/socket_generic.cc @@ -447,6 +447,60 @@ TEST_P(AllSocketPairTest, RecvTimeoutRecvmsgSucceeds) { SyscallFailsWithErrno(EAGAIN)); } +TEST_P(AllSocketPairTest, SendTimeoutDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + timeval actual_tv = {.tv_sec = -1, .tv_usec = -1}; + socklen_t len = sizeof(actual_tv); + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, + &actual_tv, &len), + SyscallSucceeds()); + EXPECT_EQ(actual_tv.tv_sec, 0); + EXPECT_EQ(actual_tv.tv_usec, 0); +} + +TEST_P(AllSocketPairTest, SetGetSendTimeout) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + timeval tv = {.tv_sec = 89, .tv_usec = 42000}; + EXPECT_THAT( + setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + timeval actual_tv = {}; + socklen_t len = sizeof(actual_tv); + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, + &actual_tv, &len), + SyscallSucceeds()); + EXPECT_EQ(actual_tv.tv_sec, 89); + EXPECT_EQ(actual_tv.tv_usec, 42000); +} + +TEST_P(AllSocketPairTest, SetGetSendTimeoutLargerArg) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct timeval_with_extra { + struct timeval tv; + int64_t extra_data; + } ABSL_ATTRIBUTE_PACKED; + + timeval_with_extra tv_extra = { + .tv = {.tv_sec = 0, .tv_usec = 123000}, + }; + + EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, + &tv_extra, sizeof(tv_extra)), + SyscallSucceeds()); + + timeval_with_extra actual_tv = {}; + socklen_t len = sizeof(actual_tv); + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, + &actual_tv, &len), + SyscallSucceeds()); + EXPECT_EQ(actual_tv.tv.tv_sec, 0); + EXPECT_EQ(actual_tv.tv.tv_usec, 123000); +} + TEST_P(AllSocketPairTest, SendTimeoutAllowsWrite) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); @@ -491,18 +545,36 @@ TEST_P(AllSocketPairTest, SendTimeoutAllowsSendmsg) { ASSERT_NO_FATAL_FAILURE(SendNullCmsg(sockets->first_fd(), buf, sizeof(buf))); } -TEST_P(AllSocketPairTest, SoRcvTimeoIsSet) { +TEST_P(AllSocketPairTest, RecvTimeoutDefault) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); - struct timeval tv { - .tv_sec = 0, .tv_usec = 35 - }; + timeval actual_tv = {.tv_sec = -1, .tv_usec = -1}; + socklen_t len = sizeof(actual_tv); + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, + &actual_tv, &len), + SyscallSucceeds()); + EXPECT_EQ(actual_tv.tv_sec, 0); + EXPECT_EQ(actual_tv.tv_usec, 0); +} + +TEST_P(AllSocketPairTest, SetGetRecvTimeout) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + timeval tv = {.tv_sec = 123, .tv_usec = 456000}; EXPECT_THAT( setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), SyscallSucceeds()); + + timeval actual_tv = {}; + socklen_t len = sizeof(actual_tv); + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, + &actual_tv, &len), + SyscallSucceeds()); + EXPECT_EQ(actual_tv.tv_sec, 123); + EXPECT_EQ(actual_tv.tv_usec, 456000); } -TEST_P(AllSocketPairTest, SoRcvTimeoIsSetLargerArg) { +TEST_P(AllSocketPairTest, SetGetRecvTimeoutLargerArg) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); struct timeval_with_extra { @@ -510,13 +582,21 @@ TEST_P(AllSocketPairTest, SoRcvTimeoIsSetLargerArg) { int64_t extra_data; } ABSL_ATTRIBUTE_PACKED; - timeval_with_extra tv_extra; - tv_extra.tv.tv_sec = 0; - tv_extra.tv.tv_usec = 25; + timeval_with_extra tv_extra = { + .tv = {.tv_sec = 0, .tv_usec = 432000}, + }; EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, &tv_extra, sizeof(tv_extra)), SyscallSucceeds()); + + timeval_with_extra actual_tv = {}; + socklen_t len = sizeof(actual_tv); + EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVTIMEO, + &actual_tv, &len), + SyscallSucceeds()); + EXPECT_EQ(actual_tv.tv.tv_sec, 0); + EXPECT_EQ(actual_tv.tv.tv_usec, 432000); } TEST_P(AllSocketPairTest, RecvTimeoutRecvmsgOneSecondSucceeds) { -- cgit v1.2.3 From abf7ebcd38e8c2750f4542f29115140bb2b44a9b Mon Sep 17 00:00:00 2001 From: Nayana Bidari Date: Thu, 27 Feb 2020 10:59:32 -0800 Subject: Internal change. PiperOrigin-RevId: 297638665 --- pkg/sentry/socket/netstack/netstack.go | 40 +++++++++-- pkg/tcpip/transport/packet/endpoint.go | 21 +++++- test/syscalls/linux/packet_socket.cc | 124 ++++++++++++++++++++++++++++++--- 3 files changed, 167 insertions(+), 18 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index e187276c5..48c268bfa 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -712,14 +712,40 @@ func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking boo // Bind implements the linux syscall bind(2) for sockets backed by // tcpip.Endpoint. func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { - addr, family, err := AddressAndFamily(sockaddr) - if err != nil { - return err - } - if err := s.checkFamily(family, true /* exact */); err != nil { - return err + family := usermem.ByteOrder.Uint16(sockaddr) + var addr tcpip.FullAddress + + // Bind for AF_PACKET requires only family, protocol and ifindex. + // In function AddressAndFamily, we check the address length which is + // not needed for AF_PACKET bind. + if family == linux.AF_PACKET { + var a linux.SockAddrLink + if len(sockaddr) < sockAddrLinkSize { + return syserr.ErrInvalidArgument + } + binary.Unmarshal(sockaddr[:sockAddrLinkSize], usermem.ByteOrder, &a) + + if a.Protocol != uint16(s.protocol) { + return syserr.ErrInvalidArgument + } + + addr = tcpip.FullAddress{ + NIC: tcpip.NICID(a.InterfaceIndex), + Addr: tcpip.Address(a.HardwareAddr[:header.EthernetAddressSize]), + } + } else { + var err *syserr.Error + addr, family, err = AddressAndFamily(sockaddr) + if err != nil { + return err + } + + if err = s.checkFamily(family, true /* exact */); err != nil { + return err + } + + addr = s.mapFamily(addr, family) } - addr = s.mapFamily(addr, family) // Issue the bind request to the endpoint. return syserr.TranslateNetstackError(s.Endpoint.Bind(addr)) diff --git a/pkg/tcpip/transport/packet/endpoint.go b/pkg/tcpip/transport/packet/endpoint.go index 5722815e9..09a1cd436 100644 --- a/pkg/tcpip/transport/packet/endpoint.go +++ b/pkg/tcpip/transport/packet/endpoint.go @@ -76,6 +76,7 @@ type endpoint struct { sndBufSize int closed bool stats tcpip.TransportEndpointStats `state:"nosave"` + bound bool } // NewEndpoint returns a new packet endpoint. @@ -125,6 +126,7 @@ func (ep *endpoint) Close() { } ep.closed = true + ep.bound = false ep.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut) } @@ -216,7 +218,24 @@ func (ep *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error { // sll_family (should be AF_PACKET), sll_protocol, and sll_ifindex." // - packet(7). - return tcpip.ErrNotSupported + ep.mu.Lock() + defer ep.mu.Unlock() + + if ep.bound { + return tcpip.ErrAlreadyBound + } + + // Unregister endpoint with all the nics. + ep.stack.UnregisterPacketEndpoint(0, ep.netProto, ep) + + // Bind endpoint to receive packets from specific interface. + if err := ep.stack.RegisterPacketEndpoint(addr.NIC, ep.netProto, ep); err != nil { + return err + } + + ep.bound = true + + return nil } // GetLocalAddress implements tcpip.Endpoint.GetLocalAddress. diff --git a/test/syscalls/linux/packet_socket.cc b/test/syscalls/linux/packet_socket.cc index 92ae55eec..bc22de788 100644 --- a/test/syscalls/linux/packet_socket.cc +++ b/test/syscalls/linux/packet_socket.cc @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -163,16 +164,11 @@ int CookedPacketTest::GetLoopbackIndex() { return ifr.ifr_ifindex; } -// Receive via a packet socket. -TEST_P(CookedPacketTest, Receive) { - // Let's use a simple IP payload: a UDP datagram. - FileDescriptor udp_sock = - ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); - SendUDPMessage(udp_sock.get()); - +// Receive and verify the message via packet socket on interface. +void ReceiveMessage(int sock, int ifindex) { // Wait for the socket to become readable. struct pollfd pfd = {}; - pfd.fd = socket_; + pfd.fd = sock; pfd.events = POLLIN; EXPECT_THAT(RetryEINTR(poll)(&pfd, 1, 2000), SyscallSucceedsWithValue(1)); @@ -182,9 +178,10 @@ TEST_P(CookedPacketTest, Receive) { char buf[64]; struct sockaddr_ll src = {}; socklen_t src_len = sizeof(src); - ASSERT_THAT(recvfrom(socket_, buf, sizeof(buf), 0, + ASSERT_THAT(recvfrom(sock, buf, sizeof(buf), 0, reinterpret_cast(&src), &src_len), SyscallSucceedsWithValue(packet_size)); + // sockaddr_ll ends with an 8 byte physical address field, but ethernet // addresses only use 6 bytes. Linux used to return sizeof(sockaddr_ll)-2 // here, but since commit b2cf86e1563e33a14a1c69b3e508d15dc12f804c returns @@ -194,7 +191,7 @@ TEST_P(CookedPacketTest, Receive) { // TODO(b/129292371): Verify protocol once we return it. // Verify the source address. EXPECT_EQ(src.sll_family, AF_PACKET); - EXPECT_EQ(src.sll_ifindex, GetLoopbackIndex()); + EXPECT_EQ(src.sll_ifindex, ifindex); EXPECT_EQ(src.sll_halen, ETH_ALEN); // This came from the loopback device, so the address is all 0s. for (int i = 0; i < src.sll_halen; i++) { @@ -222,6 +219,18 @@ TEST_P(CookedPacketTest, Receive) { EXPECT_EQ(strncmp(payload, kMessage, sizeof(kMessage)), 0); } +// Receive via a packet socket. +TEST_P(CookedPacketTest, Receive) { + // Let's use a simple IP payload: a UDP datagram. + FileDescriptor udp_sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + SendUDPMessage(udp_sock.get()); + + // Receive and verify the data. + int loopback_index = GetLoopbackIndex(); + ReceiveMessage(socket_, loopback_index); +} + // Send via a packet socket. TEST_P(CookedPacketTest, Send) { // TODO(b/129292371): Remove once we support packet socket writing. @@ -313,6 +322,101 @@ TEST_P(CookedPacketTest, Send) { EXPECT_EQ(src.sin_addr.s_addr, htonl(INADDR_LOOPBACK)); } +// Bind and receive via packet socket. +TEST_P(CookedPacketTest, BindReceive) { + struct sockaddr_ll bind_addr = {}; + bind_addr.sll_family = AF_PACKET; + bind_addr.sll_protocol = htons(GetParam()); + bind_addr.sll_ifindex = GetLoopbackIndex(); + + ASSERT_THAT(bind(socket_, reinterpret_cast(&bind_addr), + sizeof(bind_addr)), + SyscallSucceeds()); + + // Let's use a simple IP payload: a UDP datagram. + FileDescriptor udp_sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + SendUDPMessage(udp_sock.get()); + + // Receive and verify the data. + ReceiveMessage(socket_, bind_addr.sll_ifindex); +} + +// Double Bind socket. +TEST_P(CookedPacketTest, DoubleBind) { + struct sockaddr_ll bind_addr = {}; + bind_addr.sll_family = AF_PACKET; + bind_addr.sll_protocol = htons(GetParam()); + bind_addr.sll_ifindex = GetLoopbackIndex(); + + ASSERT_THAT(bind(socket_, reinterpret_cast(&bind_addr), + sizeof(bind_addr)), + SyscallSucceeds()); + + // Binding socket again should fail. + ASSERT_THAT( + bind(socket_, reinterpret_cast(&bind_addr), + sizeof(bind_addr)), + // Linux 4.09 returns EINVAL here, but some time before 4.19 it switched + // to EADDRINUSE. + AnyOf(SyscallFailsWithErrno(EADDRINUSE), SyscallFailsWithErrno(EINVAL))); +} + +// Bind and verify we do not receive data on interface which is not bound +TEST_P(CookedPacketTest, BindDrop) { + // Let's use a simple IP payload: a UDP datagram. + FileDescriptor udp_sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + + struct ifaddrs* if_addr_list = nullptr; + auto cleanup = Cleanup([&if_addr_list]() { freeifaddrs(if_addr_list); }); + + ASSERT_THAT(getifaddrs(&if_addr_list), SyscallSucceeds()); + + // Get interface other than loopback. + struct ifreq ifr = {}; + for (struct ifaddrs* i = if_addr_list; i; i = i->ifa_next) { + if (strcmp(i->ifa_name, "lo") != 0) { + strncpy(ifr.ifr_name, i->ifa_name, sizeof(ifr.ifr_name)); + break; + } + } + + // Skip if no interface is available other than loopback. + if (strlen(ifr.ifr_name) == 0) { + GTEST_SKIP(); + } + + // Get interface index. + EXPECT_THAT(ioctl(socket_, SIOCGIFINDEX, &ifr), SyscallSucceeds()); + EXPECT_NE(ifr.ifr_ifindex, 0); + + // Bind to packet socket requires only family, protocol and ifindex. + struct sockaddr_ll bind_addr = {}; + bind_addr.sll_family = AF_PACKET; + bind_addr.sll_protocol = htons(GetParam()); + bind_addr.sll_ifindex = ifr.ifr_ifindex; + + ASSERT_THAT(bind(socket_, reinterpret_cast(&bind_addr), + sizeof(bind_addr)), + SyscallSucceeds()); + + // Send to loopback interface. + struct sockaddr_in dest = {}; + dest.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + dest.sin_family = AF_INET; + dest.sin_port = kPort; + EXPECT_THAT(sendto(udp_sock.get(), kMessage, sizeof(kMessage), 0, + reinterpret_cast(&dest), sizeof(dest)), + SyscallSucceedsWithValue(sizeof(kMessage))); + + // Wait and make sure the socket never receives any data. + struct pollfd pfd = {}; + pfd.fd = socket_; + pfd.events = POLLIN; + EXPECT_THAT(RetryEINTR(poll)(&pfd, 1, 1000), SyscallSucceedsWithValue(0)); +} + INSTANTIATE_TEST_SUITE_P(AllInetTests, CookedPacketTest, ::testing::Values(ETH_P_IP, ETH_P_ALL)); -- cgit v1.2.3 From dd1ed5c789ff72fd6bbacda0ff7c7acf9672d25a Mon Sep 17 00:00:00 2001 From: Bin Lu Date: Fri, 28 Feb 2020 14:47:34 +0800 Subject: skip vsyscall test cases on Arm64 Signed-off-by: Bin Lu --- test/syscalls/linux/time.cc | 2 ++ test/syscalls/linux/vsyscall.cc | 2 ++ 2 files changed, 4 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/time.cc b/test/syscalls/linux/time.cc index 1ccb95733..e75bba669 100644 --- a/test/syscalls/linux/time.cc +++ b/test/syscalls/linux/time.cc @@ -26,6 +26,7 @@ namespace { constexpr long kFudgeSeconds = 5; +#if defined(__x86_64__) || defined(__i386__) // Mimics the time(2) wrapper from glibc prior to 2.15. time_t vsyscall_time(time_t* t) { constexpr uint64_t kVsyscallTimeEntry = 0xffffffffff600400; @@ -98,6 +99,7 @@ TEST(TimeTest, VsyscallGettimeofday_InvalidAddressSIGSEGV) { reinterpret_cast(0x1)), ::testing::KilledBySignal(SIGSEGV), ""); } +#endif } // namespace diff --git a/test/syscalls/linux/vsyscall.cc b/test/syscalls/linux/vsyscall.cc index 2c2303358..ae4377108 100644 --- a/test/syscalls/linux/vsyscall.cc +++ b/test/syscalls/linux/vsyscall.cc @@ -24,6 +24,7 @@ namespace testing { namespace { +#if defined(__x86_64__) || defined(__i386__) time_t vsyscall_time(time_t* t) { constexpr uint64_t kVsyscallTimeEntry = 0xffffffffff600400; return reinterpret_cast(kVsyscallTimeEntry)(t); @@ -37,6 +38,7 @@ TEST(VsyscallTest, VsyscallAlwaysAvailableOnGvisor) { time_t t; EXPECT_THAT(vsyscall_time(&t), SyscallSucceeds()); } +#endif } // namespace -- cgit v1.2.3 From 6b4d36e3253238dd72d0861ac1220d147e1de8dd Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Fri, 28 Feb 2020 10:37:52 -0800 Subject: Hide /dev/net/tun when using hostinet. /dev/net/tun does not currently work with hostinet. This has caused some program starts failing because it thinks the feature exists. PiperOrigin-RevId: 297876196 --- pkg/sentry/fs/dev/BUILD | 1 + pkg/sentry/fs/dev/dev.go | 7 +++++-- pkg/sentry/fs/dev/net_tun.go | 7 +++++++ pkg/sentry/kernel/kernel.go | 4 ++++ test/syscalls/BUILD | 5 +++++ test/syscalls/linux/BUILD | 12 +++++++++++ test/syscalls/linux/dev.cc | 7 ------- test/syscalls/linux/tuntap.cc | 7 +++++++ test/syscalls/linux/tuntap_hostinet.cc | 37 ++++++++++++++++++++++++++++++++++ 9 files changed, 78 insertions(+), 9 deletions(-) create mode 100644 test/syscalls/linux/tuntap_hostinet.cc (limited to 'test') diff --git a/pkg/sentry/fs/dev/BUILD b/pkg/sentry/fs/dev/BUILD index 9b6bb26d0..9379a4d7b 100644 --- a/pkg/sentry/fs/dev/BUILD +++ b/pkg/sentry/fs/dev/BUILD @@ -26,6 +26,7 @@ go_library( "//pkg/sentry/fs/fsutil", "//pkg/sentry/fs/ramfs", "//pkg/sentry/fs/tmpfs", + "//pkg/sentry/inet", "//pkg/sentry/kernel", "//pkg/sentry/memmap", "//pkg/sentry/mm", diff --git a/pkg/sentry/fs/dev/dev.go b/pkg/sentry/fs/dev/dev.go index 7e66c29b0..acbd401a0 100644 --- a/pkg/sentry/fs/dev/dev.go +++ b/pkg/sentry/fs/dev/dev.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" + "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/usermem" ) @@ -124,10 +125,12 @@ func New(ctx context.Context, msrc *fs.MountSource) *fs.Inode { "ptmx": newSymlink(ctx, "pts/ptmx", msrc), "tty": newCharacterDevice(ctx, newTTYDevice(ctx, fs.RootOwner, 0666), msrc, ttyDevMajor, ttyDevMinor), + } - "net": newDirectory(ctx, map[string]*fs.Inode{ + if isNetTunSupported(inet.StackFromContext(ctx)) { + contents["net"] = newDirectory(ctx, map[string]*fs.Inode{ "tun": newCharacterDevice(ctx, newNetTunDevice(ctx, fs.RootOwner, 0666), msrc, netTunDevMajor, netTunDevMinor), - }, msrc), + }, msrc) } iops := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) diff --git a/pkg/sentry/fs/dev/net_tun.go b/pkg/sentry/fs/dev/net_tun.go index 755644488..dc7ad075a 100644 --- a/pkg/sentry/fs/dev/net_tun.go +++ b/pkg/sentry/fs/dev/net_tun.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/socket/netstack" "gvisor.dev/gvisor/pkg/syserror" @@ -168,3 +169,9 @@ func (fops *netTunFileOperations) EventRegister(e *waiter.Entry, mask waiter.Eve func (fops *netTunFileOperations) EventUnregister(e *waiter.Entry) { fops.device.EventUnregister(e) } + +// isNetTunSupported returns whether /dev/net/tun device is supported for s. +func isNetTunSupported(s inet.Stack) bool { + _, ok := s.(*netstack.Stack) + return ok +} diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 8b76750e9..1d627564f 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -755,6 +755,8 @@ func (ctx *createProcessContext) Value(key interface{}) interface{} { return ctx.k.GlobalInit().Leader().MountNamespaceVFS2() case fs.CtxDirentCacheLimiter: return ctx.k.DirentCacheLimiter + case inet.CtxStack: + return ctx.k.RootNetworkNamespace().Stack() case ktime.CtxRealtimeClock: return ctx.k.RealtimeClock() case limits.CtxLimits: @@ -1481,6 +1483,8 @@ func (ctx supervisorContext) Value(key interface{}) interface{} { return ctx.k.GlobalInit().Leader().MountNamespaceVFS2() case fs.CtxDirentCacheLimiter: return ctx.k.DirentCacheLimiter + case inet.CtxStack: + return ctx.k.RootNetworkNamespace().Stack() case ktime.CtxRealtimeClock: return ctx.k.RealtimeClock() case limits.CtxLimits: diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 3518e862d..a69b0ce13 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -680,6 +680,11 @@ syscall_test( syscall_test(test = "//test/syscalls/linux:tuntap_test") +syscall_test( + add_hostinet = True, + test = "//test/syscalls/linux:tuntap_hostinet_test", +) + syscall_test(test = "//test/syscalls/linux:udp_bind_test") syscall_test( diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 704bae17b..70c120e42 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -3460,6 +3460,18 @@ cc_binary( ], ) +cc_binary( + name = "tuntap_hostinet_test", + testonly = 1, + srcs = ["tuntap_hostinet.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + cc_library( name = "udp_socket_test_cases", testonly = 1, diff --git a/test/syscalls/linux/dev.cc b/test/syscalls/linux/dev.cc index 4e473268c..4dd302eed 100644 --- a/test/syscalls/linux/dev.cc +++ b/test/syscalls/linux/dev.cc @@ -153,13 +153,6 @@ TEST(DevTest, TTYExists) { EXPECT_EQ(statbuf.st_mode, S_IFCHR | 0666); } -TEST(DevTest, NetTunExists) { - struct stat statbuf = {}; - ASSERT_THAT(stat("/dev/net/tun", &statbuf), SyscallSucceeds()); - // Check that it's a character device with rw-rw-rw- permissions. - EXPECT_EQ(statbuf.st_mode, S_IFCHR | 0666); -} - } // namespace } // namespace testing diff --git a/test/syscalls/linux/tuntap.cc b/test/syscalls/linux/tuntap.cc index f6ac9d7b8..f734511d6 100644 --- a/test/syscalls/linux/tuntap.cc +++ b/test/syscalls/linux/tuntap.cc @@ -153,6 +153,13 @@ std::string CreateArpPacket(const uint8_t srcmac[ETH_ALEN], const char* srcip, } // namespace +TEST(TuntapStaticTest, NetTunExists) { + struct stat statbuf; + ASSERT_THAT(stat(kDevNetTun, &statbuf), SyscallSucceeds()); + // Check that it's a character device with rw-rw-rw- permissions. + EXPECT_EQ(statbuf.st_mode, S_IFCHR | 0666); +} + class TuntapTest : public ::testing::Test { protected: void TearDown() override { diff --git a/test/syscalls/linux/tuntap_hostinet.cc b/test/syscalls/linux/tuntap_hostinet.cc new file mode 100644 index 000000000..0c527419e --- /dev/null +++ b/test/syscalls/linux/tuntap_hostinet.cc @@ -0,0 +1,37 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(TuntapHostInetTest, NoNetTun) { + SKIP_IF(!IsRunningOnGvisor()); + + struct stat statbuf; + ASSERT_THAT(stat("/dev/net/tun", &statbuf), SyscallFailsWithErrno(ENOENT)); +} + +} // namespace +} // namespace testing + +} // namespace gvisor -- cgit v1.2.3 From df8740b8a7fb8fa05d7a0387749b61d57a74c06c Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Fri, 28 Feb 2020 13:21:33 -0800 Subject: Mark gettid and getdents as nogotsan PiperOrigin-RevId: 297915815 --- test/perf/BUILD | 2 ++ 1 file changed, 2 insertions(+) (limited to 'test') diff --git a/test/perf/BUILD b/test/perf/BUILD index 346a28e16..0a0def6a3 100644 --- a/test/perf/BUILD +++ b/test/perf/BUILD @@ -30,6 +30,7 @@ syscall_test( syscall_test( size = "enormous", + tags = ["nogotsan"], test = "//test/perf/linux:getdents_benchmark", ) @@ -40,6 +41,7 @@ syscall_test( syscall_test( size = "enormous", + tags = ["nogotsan"], test = "//test/perf/linux:gettid_benchmark", ) -- cgit v1.2.3 From 36b193b1db60cad3c1c65ce3abef03a6a0594e3e Mon Sep 17 00:00:00 2001 From: Haibo Xu Date: Mon, 2 Mar 2020 07:13:47 +0000 Subject: Fix syscall test build error on arm64. The error was introduced in the merge of PR #1471. Some codes are missing when adding bazel select_arch command to the test/syscall/linux/BUILD file. Signed-off-by: Haibo Xu Change-Id: I8cae3f4ae78c2e14671f3ac6e7361dc2806d9305 --- test/syscalls/linux/BUILD | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 70c120e42..9ab13ba07 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -608,7 +608,10 @@ cc_binary( cc_binary( name = "exceptions_test", testonly = 1, - srcs = ["exceptions.cc"], + srcs = select_arch( + amd64 = ["exceptions.cc"], + arm64 = [], + ), linkstatic = 1, deps = [ gtest, @@ -1475,7 +1478,10 @@ cc_binary( cc_binary( name = "arch_prctl_test", testonly = 1, - srcs = ["arch_prctl.cc"], + srcs = select_arch( + amd64 = ["arch_prctl.cc"], + arm64 = [], + ), linkstatic = 1, deps = [ "//test/util:file_descriptor", @@ -3322,7 +3328,10 @@ cc_binary( cc_binary( name = "sysret_test", testonly = 1, - srcs = ["sysret.cc"], + srcs = select_arch( + amd64 = ["sysret.cc"], + arm64 = [], + ), linkstatic = 1, deps = [ gtest, -- cgit v1.2.3 From 33101752501fafea99d77f34bbd65f3e0083d22e Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Mon, 2 Mar 2020 14:43:52 -0800 Subject: Fix data-race when reading/writing e.amss. PiperOrigin-RevId: 298451319 --- pkg/tcpip/transport/tcp/connect.go | 11 +++++++++-- pkg/tcpip/transport/tcp/endpoint.go | 29 ++++++++++++++++++----------- test/syscalls/linux/tcp_socket.cc | 15 +++++++++++++++ 3 files changed, 42 insertions(+), 13 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index cd247f3e1..ae4f3f3a9 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -295,6 +295,7 @@ func (h *handshake) synSentState(s *segment) *tcpip.Error { h.state = handshakeSynRcvd h.ep.mu.Lock() ttl := h.ep.ttl + amss := h.ep.amss h.ep.setEndpointState(StateSynRecv) h.ep.mu.Unlock() synOpts := header.TCPSynOptions{ @@ -307,7 +308,7 @@ func (h *handshake) synSentState(s *segment) *tcpip.Error { // permits SACK. This is not explicitly defined in the RFC but // this is the behaviour implemented by Linux. SACKPermitted: rcvSynOpts.SACKPermitted, - MSS: h.ep.amss, + MSS: amss, } if ttl == 0 { ttl = s.route.DefaultTTL() @@ -356,6 +357,10 @@ func (h *handshake) synRcvdState(s *segment) *tcpip.Error { return tcpip.ErrInvalidEndpointState } + h.ep.mu.RLock() + amss := h.ep.amss + h.ep.mu.RUnlock() + h.resetState() synOpts := header.TCPSynOptions{ WS: h.rcvWndScale, @@ -363,7 +368,7 @@ func (h *handshake) synRcvdState(s *segment) *tcpip.Error { TSVal: h.ep.timestamp(), TSEcr: h.ep.recentTimestamp(), SACKPermitted: h.ep.sackPermitted, - MSS: h.ep.amss, + MSS: amss, } h.ep.sendSynTCP(&s.route, h.ep.ID, h.ep.ttl, h.ep.sendTOS, h.flags, h.iss, h.ackNum, h.rcvWnd, synOpts) return nil @@ -530,6 +535,7 @@ func (h *handshake) execute() *tcpip.Error { // Send the initial SYN segment and loop until the handshake is // completed. + h.ep.mu.Lock() h.ep.amss = calculateAdvertisedMSS(h.ep.userMSS, h.ep.route) synOpts := header.TCPSynOptions{ @@ -540,6 +546,7 @@ func (h *handshake) execute() *tcpip.Error { SACKPermitted: bool(sackEnabled), MSS: h.ep.amss, } + h.ep.mu.Unlock() // Execute is also called in a listen context so we want to make sure we // only send the TS/SACK option when we received the TS/SACK in the diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 9e72730bd..8b9154e69 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -959,15 +959,18 @@ func (e *endpoint) initialReceiveWindow() int { // ModerateRecvBuf adjusts the receive buffer and the advertised window // based on the number of bytes copied to user space. func (e *endpoint) ModerateRecvBuf(copied int) { + e.mu.RLock() e.rcvListMu.Lock() if e.rcvAutoParams.disabled { e.rcvListMu.Unlock() + e.mu.RUnlock() return } now := time.Now() if rtt := e.rcvAutoParams.rtt; rtt == 0 || now.Sub(e.rcvAutoParams.measureTime) < rtt { e.rcvAutoParams.copied += copied e.rcvListMu.Unlock() + e.mu.RUnlock() return } prevRTTCopied := e.rcvAutoParams.copied + copied @@ -1008,7 +1011,7 @@ func (e *endpoint) ModerateRecvBuf(copied int) { e.rcvBufSize = rcvWnd availAfter := e.receiveBufferAvailableLocked() mask := uint32(notifyReceiveWindowChanged) - if crossed, above := e.windowCrossedACKThreshold(availAfter - availBefore); crossed && above { + if crossed, above := e.windowCrossedACKThresholdLocked(availAfter - availBefore); crossed && above { mask |= notifyNonZeroReceiveWindow } e.notifyProtocolGoroutine(mask) @@ -1023,6 +1026,7 @@ func (e *endpoint) ModerateRecvBuf(copied int) { e.rcvAutoParams.measureTime = now e.rcvAutoParams.copied = 0 e.rcvListMu.Unlock() + e.mu.RUnlock() } // IPTables implements tcpip.Endpoint.IPTables. @@ -1052,7 +1056,6 @@ func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, v, err := e.readLocked() e.rcvListMu.Unlock() - e.mu.RUnlock() if err == tcpip.ErrClosedForReceive { @@ -1085,7 +1088,7 @@ func (e *endpoint) readLocked() (buffer.View, *tcpip.Error) { // enough buffer space, to either fit an aMSS or half a receive buffer // (whichever smaller), then notify the protocol goroutine to send a // window update. - if crossed, above := e.windowCrossedACKThreshold(len(v)); crossed && above { + if crossed, above := e.windowCrossedACKThresholdLocked(len(v)); crossed && above { e.notifyProtocolGoroutine(notifyNonZeroReceiveWindow) } @@ -1303,9 +1306,9 @@ func (e *endpoint) Peek(vec [][]byte) (int64, tcpip.ControlMessages, *tcpip.Erro return num, tcpip.ControlMessages{}, nil } -// windowCrossedACKThreshold checks if the receive window to be announced now -// would be under aMSS or under half receive buffer, whichever smaller. This is -// useful as a receive side silly window syndrome prevention mechanism. If +// windowCrossedACKThresholdLocked checks if the receive window to be announced +// now would be under aMSS or under half receive buffer, whichever smaller. This +// is useful as a receive side silly window syndrome prevention mechanism. If // window grows to reasonable value, we should send ACK to the sender to inform // the rx space is now large. We also want ensure a series of small read()'s // won't trigger a flood of spurious tiny ACK's. @@ -1316,7 +1319,9 @@ func (e *endpoint) Peek(vec [][]byte) (int64, tcpip.ControlMessages, *tcpip.Erro // crossed will be true if the window size crossed the ACK threshold. // above will be true if the new window is >= ACK threshold and false // otherwise. -func (e *endpoint) windowCrossedACKThreshold(deltaBefore int) (crossed bool, above bool) { +// +// Precondition: e.mu and e.rcvListMu must be held. +func (e *endpoint) windowCrossedACKThresholdLocked(deltaBefore int) (crossed bool, above bool) { newAvail := e.receiveBufferAvailableLocked() oldAvail := newAvail - deltaBefore if oldAvail < 0 { @@ -1379,6 +1384,7 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { mask := uint32(notifyReceiveWindowChanged) + e.mu.RLock() e.rcvListMu.Lock() // Make sure the receive buffer size allows us to send a @@ -1405,11 +1411,11 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { // Immediately send an ACK to uncork the sender silly window // syndrome prevetion, when our available space grows above aMSS // or half receive buffer, whichever smaller. - if crossed, above := e.windowCrossedACKThreshold(availAfter - availBefore); crossed && above { + if crossed, above := e.windowCrossedACKThresholdLocked(availAfter - availBefore); crossed && above { mask |= notifyNonZeroReceiveWindow } e.rcvListMu.Unlock() - + e.mu.RUnlock() e.notifyProtocolGoroutine(mask) return nil @@ -2414,13 +2420,14 @@ func (e *endpoint) updateSndBufferUsage(v int) { // to be read, or when the connection is closed for receiving (in which case // s will be nil). func (e *endpoint) readyToRead(s *segment) { + e.mu.RLock() e.rcvListMu.Lock() if s != nil { s.incRef() e.rcvBufUsed += s.data.Size() // Increase counter if the receive window falls down below MSS // or half receive buffer size, whichever smaller. - if crossed, above := e.windowCrossedACKThreshold(-s.data.Size()); crossed && !above { + if crossed, above := e.windowCrossedACKThresholdLocked(-s.data.Size()); crossed && !above { e.stats.ReceiveErrors.ZeroRcvWindowState.Increment() } e.rcvList.PushBack(s) @@ -2428,7 +2435,7 @@ func (e *endpoint) readyToRead(s *segment) { e.rcvClosed = true } e.rcvListMu.Unlock() - + e.mu.RUnlock() e.waiterQueue.Notify(waiter.EventIn) } diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc index c4591a3b9..579463384 100644 --- a/test/syscalls/linux/tcp_socket.cc +++ b/test/syscalls/linux/tcp_socket.cc @@ -1349,6 +1349,21 @@ TEST_P(SimpleTcpSocketTest, RecvOnClosedSocket) { SyscallFailsWithErrno(ENOTCONN)); } +TEST_P(SimpleTcpSocketTest, TCPConnectSoRcvBufRace) { + auto s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); + sockaddr_storage addr = + ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam())); + socklen_t addrlen = sizeof(addr); + + RetryEINTR(connect)(s.get(), reinterpret_cast(&addr), + addrlen); + int buf_sz = 1 << 18; + EXPECT_THAT( + setsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &buf_sz, sizeof(buf_sz)), + SyscallSucceedsWithValue(0)); +} + INSTANTIATE_TEST_SUITE_P(AllInetTests, SimpleTcpSocketTest, ::testing::Values(AF_INET, AF_INET6)); -- cgit v1.2.3 From 43abb24657e737dee1108ff0d512b2e1b6d8a3f6 Mon Sep 17 00:00:00 2001 From: Nayana Bidari Date: Mon, 2 Mar 2020 16:30:51 -0800 Subject: Fix panic caused by invalid address for Bind in packet sockets. PiperOrigin-RevId: 298476533 --- pkg/sentry/socket/netstack/netstack.go | 4 ++++ test/syscalls/linux/packet_socket.cc | 13 +++++++++++++ 2 files changed, 17 insertions(+) (limited to 'test') diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 1eeb37446..13a9a60b4 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -712,6 +712,10 @@ func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking boo // Bind implements the linux syscall bind(2) for sockets backed by // tcpip.Endpoint. func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { + if len(sockaddr) < 2 { + return syserr.ErrInvalidArgument + } + family := usermem.ByteOrder.Uint16(sockaddr) var addr tcpip.FullAddress diff --git a/test/syscalls/linux/packet_socket.cc b/test/syscalls/linux/packet_socket.cc index bc22de788..248762ca9 100644 --- a/test/syscalls/linux/packet_socket.cc +++ b/test/syscalls/linux/packet_socket.cc @@ -417,6 +417,19 @@ TEST_P(CookedPacketTest, BindDrop) { EXPECT_THAT(RetryEINTR(poll)(&pfd, 1, 1000), SyscallSucceedsWithValue(0)); } +// Bind with invalid address. +TEST_P(CookedPacketTest, BindFail) { + // Null address. + ASSERT_THAT(bind(socket_, nullptr, sizeof(struct sockaddr)), + SyscallFailsWithErrno(EFAULT)); + + // Address of size 1. + uint8_t addr = 0; + ASSERT_THAT( + bind(socket_, reinterpret_cast(&addr), sizeof(addr)), + SyscallFailsWithErrno(EINVAL)); +} + INSTANTIATE_TEST_SUITE_P(AllInetTests, CookedPacketTest, ::testing::Values(ETH_P_IP, ETH_P_ALL)); -- cgit v1.2.3 From fc3a09cd3c56ef20fd398a5f61a5e59111ed55b3 Mon Sep 17 00:00:00 2001 From: Bin Lu Date: Tue, 3 Mar 2020 17:45:10 +0800 Subject: code clean: minor changes to compatible with ubuntu18.04 Signed-off-by: Bin Lu --- test/syscalls/linux/bad.cc | 2 +- test/syscalls/linux/seccomp.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/bad.cc b/test/syscalls/linux/bad.cc index adfb149df..a26fc6af3 100644 --- a/test/syscalls/linux/bad.cc +++ b/test/syscalls/linux/bad.cc @@ -28,7 +28,7 @@ namespace { constexpr uint32_t kNotImplementedSyscall = SYS_get_kernel_syms; #elif __aarch64__ // Use the last of arch_specific_syscalls which are not implemented on arm64. -constexpr uint32_t kNotImplementedSyscall = SYS_arch_specific_syscall + 15; +constexpr uint32_t kNotImplementedSyscall = __NR_arch_specific_syscall + 15; #endif TEST(BadSyscallTest, NotImplemented) { diff --git a/test/syscalls/linux/seccomp.cc b/test/syscalls/linux/seccomp.cc index cf6499f8b..8e0fc9acc 100644 --- a/test/syscalls/linux/seccomp.cc +++ b/test/syscalls/linux/seccomp.cc @@ -53,7 +53,7 @@ namespace { constexpr uint32_t kFilteredSyscall = SYS_vserver; #elif __aarch64__ // Use the last of arch_specific_syscalls which are not implemented on arm64. -constexpr uint32_t kFilteredSyscall = SYS_arch_specific_syscall + 15; +constexpr uint32_t kFilteredSyscall = __NR_arch_specific_syscall + 15; #endif // Applies a seccomp-bpf filter that returns `filtered_result` for -- cgit v1.2.3 From b3c549d8391e7cadd82a5ab9280bc63bb372aa97 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Tue, 3 Mar 2020 12:36:37 -0800 Subject: Move temp_umask to test/util. PiperOrigin-RevId: 298667595 --- test/syscalls/linux/BUILD | 9 ++------- test/syscalls/linux/mkdir.cc | 2 +- test/syscalls/linux/open_create.cc | 2 +- test/syscalls/linux/temp_umask.h | 39 -------------------------------------- test/util/BUILD | 6 ++++++ test/util/temp_umask.h | 39 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 49 insertions(+), 48 deletions(-) delete mode 100644 test/syscalls/linux/temp_umask.h create mode 100644 test/util/temp_umask.h (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 70c120e42..dae2b1077 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -166,11 +166,6 @@ cc_library( ], ) -cc_library( - name = "temp_umask", - hdrs = ["temp_umask.h"], -) - cc_library( name = "unix_domain_socket_test_util", testonly = 1, @@ -1140,11 +1135,11 @@ cc_binary( srcs = ["mkdir.cc"], linkstatic = 1, deps = [ - ":temp_umask", "//test/util:capability_util", "//test/util:fs_util", gtest, "//test/util:temp_path", + "//test/util:temp_umask", "//test/util:test_main", "//test/util:test_util", ], @@ -1299,12 +1294,12 @@ cc_binary( srcs = ["open_create.cc"], linkstatic = 1, deps = [ - ":temp_umask", "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", gtest, "//test/util:temp_path", + "//test/util:temp_umask", "//test/util:test_main", "//test/util:test_util", ], diff --git a/test/syscalls/linux/mkdir.cc b/test/syscalls/linux/mkdir.cc index cf138d328..def4c50a4 100644 --- a/test/syscalls/linux/mkdir.cc +++ b/test/syscalls/linux/mkdir.cc @@ -18,10 +18,10 @@ #include #include "gtest/gtest.h" -#include "test/syscalls/linux/temp_umask.h" #include "test/util/capability_util.h" #include "test/util/fs_util.h" #include "test/util/temp_path.h" +#include "test/util/temp_umask.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/open_create.cc b/test/syscalls/linux/open_create.cc index 902d0a0dc..51eacf3f2 100644 --- a/test/syscalls/linux/open_create.cc +++ b/test/syscalls/linux/open_create.cc @@ -19,11 +19,11 @@ #include #include "gtest/gtest.h" -#include "test/syscalls/linux/temp_umask.h" #include "test/util/capability_util.h" #include "test/util/file_descriptor.h" #include "test/util/fs_util.h" #include "test/util/temp_path.h" +#include "test/util/temp_umask.h" #include "test/util/test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/temp_umask.h b/test/syscalls/linux/temp_umask.h deleted file mode 100644 index 81a25440c..000000000 --- a/test/syscalls/linux/temp_umask.h +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef GVISOR_TEST_SYSCALLS_TEMP_UMASK_H_ -#define GVISOR_TEST_SYSCALLS_TEMP_UMASK_H_ - -#include -#include - -namespace gvisor { -namespace testing { - -class TempUmask { - public: - // Sets the process umask to `mask`. - explicit TempUmask(mode_t mask) : old_mask_(umask(mask)) {} - - // Sets the process umask to its previous value. - ~TempUmask() { umask(old_mask_); } - - private: - mode_t old_mask_; -}; - -} // namespace testing -} // namespace gvisor - -#endif // GVISOR_TEST_SYSCALLS_TEMP_UMASK_H_ diff --git a/test/util/BUILD b/test/util/BUILD index 8b5a0f25c..2a17c33ee 100644 --- a/test/util/BUILD +++ b/test/util/BUILD @@ -350,3 +350,9 @@ cc_library( ":save_util", ], ) + +cc_library( + name = "temp_umask", + testonly = 1, + hdrs = ["temp_umask.h"], +) diff --git a/test/util/temp_umask.h b/test/util/temp_umask.h new file mode 100644 index 000000000..e7de84a54 --- /dev/null +++ b/test/util/temp_umask.h @@ -0,0 +1,39 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_UTIL_TEMP_UMASK_H_ +#define GVISOR_TEST_UTIL_TEMP_UMASK_H_ + +#include +#include + +namespace gvisor { +namespace testing { + +class TempUmask { + public: + // Sets the process umask to `mask`. + explicit TempUmask(mode_t mask) : old_mask_(umask(mask)) {} + + // Sets the process umask to its previous value. + ~TempUmask() { umask(old_mask_); } + + private: + mode_t old_mask_; +}; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_UTIL_TEMP_UMASK_H_ -- cgit v1.2.3 From ef1219c1451a75916693a54ddad39d04cf763d90 Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Wed, 4 Mar 2020 00:14:41 -0800 Subject: Use shuf instead of $RANDOM everywhere. $RANDOM can cause collisions but shuf uses /dev/urandom so it ought to cause fewer. PiperOrigin-RevId: 298786344 --- test/packetdrill/packetdrill_test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/packetdrill/packetdrill_test.sh b/test/packetdrill/packetdrill_test.sh index 0b22dfd5c..c8268170f 100755 --- a/test/packetdrill/packetdrill_test.sh +++ b/test/packetdrill/packetdrill_test.sh @@ -91,8 +91,8 @@ fi # Variables specific to the test runner start with TEST_RUNNER_. declare -r PACKETDRILL="/packetdrill/gtests/net/packetdrill/packetdrill" # Use random numbers so that test networks don't collide. -declare -r CTRL_NET="ctrl_net-${RANDOM}${RANDOM}" -declare -r TEST_NET="test_net-${RANDOM}${RANDOM}" +declare -r CTRL_NET="ctrl_net-$(shuf -i 0-99999999 -n 1)" +declare -r TEST_NET="test_net-$(shuf -i 0-99999999 -n 1)" declare -r tolerance_usecs=100000 # On both DUT and test runner, testing packets are on the eth2 interface. declare -r TEST_DEVICE="eth2" -- cgit v1.2.3 From 504c9e14d61a9ca9fa3615290a05471684019ecc Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 3 Mar 2020 15:53:48 -0800 Subject: test/runner: use proper filters for test cases The benchmark_filter options accepts regex-s, but the gtest-filter option accepts shell-like wildcards. Fixes #2034 Signed-off-by: Andrei Vagin --- test/runner/gtest/gtest.go | 7 ++++--- test/syscalls/linux/tuntap_hostinet.cc | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/test/runner/gtest/gtest.go b/test/runner/gtest/gtest.go index f96e2415e..869169ad5 100644 --- a/test/runner/gtest/gtest.go +++ b/test/runner/gtest/gtest.go @@ -66,13 +66,12 @@ func (tc TestCase) Args() []string { } if tc.benchmark { return []string{ - fmt.Sprintf("%s=^$", filterTestFlag), fmt.Sprintf("%s=^%s$", filterBenchmarkFlag, tc.Name), + fmt.Sprintf("%s=", filterTestFlag), } } return []string{ - fmt.Sprintf("%s=^%s$", filterTestFlag, tc.FullName()), - fmt.Sprintf("%s=^$", filterBenchmarkFlag), + fmt.Sprintf("%s=%s", filterTestFlag, tc.FullName()), } } @@ -147,6 +146,8 @@ func ParseTestCases(testBin string, benchmarks bool, extraArgs ...string) ([]Tes return nil, fmt.Errorf("could not enumerate gtest benchmarks: %v\nstderr\n%s", err, exitErr.Stderr) } + out = []byte(strings.Trim(string(out), "\n")) + // Parse benchmark output. for _, line := range strings.Split(string(out), "\n") { // Strip comments. diff --git a/test/syscalls/linux/tuntap_hostinet.cc b/test/syscalls/linux/tuntap_hostinet.cc index 0c527419e..1513fb9d5 100644 --- a/test/syscalls/linux/tuntap_hostinet.cc +++ b/test/syscalls/linux/tuntap_hostinet.cc @@ -26,6 +26,7 @@ namespace { TEST(TuntapHostInetTest, NoNetTun) { SKIP_IF(!IsRunningOnGvisor()); + SKIP_IF(!IsRunningWithHostinet()); struct stat statbuf; ASSERT_THAT(stat("/dev/net/tun", &statbuf), SyscallFailsWithErrno(ENOENT)); -- cgit v1.2.3 From da48fc6cca23a38faef51c5b5f8ae609940773a0 Mon Sep 17 00:00:00 2001 From: Ian Lewis Date: Thu, 5 Mar 2020 18:21:39 -0800 Subject: Stub oom_score_adj and oom_score. Adds an oom_score_adj and oom_score proc file stub. oom_score_adj accepts writes of values -1000 to 1000 and persists the value with the task. New tasks inherit the parent's oom_score_adj. oom_score is a read-only stub that always returns the value '0'. Issue #202 PiperOrigin-RevId: 299245355 --- pkg/sentry/fs/proc/task.go | 126 ++++++++++++++++++++++++++----- pkg/sentry/fsimpl/proc/task.go | 12 +-- pkg/sentry/fsimpl/proc/task_files.go | 43 +++++++++++ pkg/sentry/fsimpl/proc/tasks_test.go | 32 ++++---- pkg/sentry/kernel/task.go | 33 ++++++++ pkg/sentry/kernel/task_clone.go | 6 ++ pkg/sentry/kernel/task_start.go | 4 + test/syscalls/BUILD | 8 +- test/syscalls/linux/BUILD | 13 ++++ test/syscalls/linux/proc.cc | 21 ++++++ test/syscalls/linux/proc_pid_oomscore.cc | 72 ++++++++++++++++++ 11 files changed, 330 insertions(+), 40 deletions(-) create mode 100644 test/syscalls/linux/proc_pid_oomscore.cc (limited to 'test') diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index 8ab8d8a02..4e9b0fc00 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -72,24 +72,26 @@ var _ fs.InodeOperations = (*taskDir)(nil) // newTaskDir creates a new proc task entry. func (p *proc) newTaskDir(t *kernel.Task, msrc *fs.MountSource, isThreadGroup bool) *fs.Inode { contents := map[string]*fs.Inode{ - "auxv": newAuxvec(t, msrc), - "cmdline": newExecArgInode(t, msrc, cmdlineExecArg), - "comm": newComm(t, msrc), - "environ": newExecArgInode(t, msrc, environExecArg), - "exe": newExe(t, msrc), - "fd": newFdDir(t, msrc), - "fdinfo": newFdInfoDir(t, msrc), - "gid_map": newGIDMap(t, msrc), - "io": newIO(t, msrc, isThreadGroup), - "maps": newMaps(t, msrc), - "mountinfo": seqfile.NewSeqFileInode(t, &mountInfoFile{t: t}, msrc), - "mounts": seqfile.NewSeqFileInode(t, &mountsFile{t: t}, msrc), - "ns": newNamespaceDir(t, msrc), - "smaps": newSmaps(t, msrc), - "stat": newTaskStat(t, msrc, isThreadGroup, p.pidns), - "statm": newStatm(t, msrc), - "status": newStatus(t, msrc, p.pidns), - "uid_map": newUIDMap(t, msrc), + "auxv": newAuxvec(t, msrc), + "cmdline": newExecArgInode(t, msrc, cmdlineExecArg), + "comm": newComm(t, msrc), + "environ": newExecArgInode(t, msrc, environExecArg), + "exe": newExe(t, msrc), + "fd": newFdDir(t, msrc), + "fdinfo": newFdInfoDir(t, msrc), + "gid_map": newGIDMap(t, msrc), + "io": newIO(t, msrc, isThreadGroup), + "maps": newMaps(t, msrc), + "mountinfo": seqfile.NewSeqFileInode(t, &mountInfoFile{t: t}, msrc), + "mounts": seqfile.NewSeqFileInode(t, &mountsFile{t: t}, msrc), + "ns": newNamespaceDir(t, msrc), + "oom_score": newOOMScore(t, msrc), + "oom_score_adj": newOOMScoreAdj(t, msrc), + "smaps": newSmaps(t, msrc), + "stat": newTaskStat(t, msrc, isThreadGroup, p.pidns), + "statm": newStatm(t, msrc), + "status": newStatus(t, msrc, p.pidns), + "uid_map": newUIDMap(t, msrc), } if isThreadGroup { contents["task"] = p.newSubtasks(t, msrc) @@ -796,4 +798,92 @@ func (f *auxvecFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequenc return int64(n), err } +// newOOMScore returns a oom_score file. It is a stub that always returns 0. +// TODO(gvisor.dev/issue/1967) +func newOOMScore(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { + return newStaticProcInode(t, msrc, []byte("0\n")) +} + +// oomScoreAdj is a file containing the oom_score adjustment for a task. +// +// +stateify savable +type oomScoreAdj struct { + fsutil.SimpleFileInode + + t *kernel.Task +} + +// +stateify savable +type oomScoreAdjFile struct { + fsutil.FileGenericSeek `state:"nosave"` + fsutil.FileNoIoctl `state:"nosave"` + fsutil.FileNoMMap `state:"nosave"` + fsutil.FileNoSplice `state:"nosave"` + fsutil.FileNoopFlush `state:"nosave"` + fsutil.FileNoopFsync `state:"nosave"` + fsutil.FileNoopRelease `state:"nosave"` + fsutil.FileNotDirReaddir `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` + + t *kernel.Task +} + +// newOOMScoreAdj returns a oom_score_adj file. +func newOOMScoreAdj(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { + i := &oomScoreAdj{ + SimpleFileInode: *fsutil.NewSimpleFileInode(t, fs.RootOwner, fs.FilePermsFromMode(0644), linux.PROC_SUPER_MAGIC), + t: t, + } + return newProcInode(t, i, msrc, fs.SpecialFile, t) +} + +// Truncate implements fs.InodeOperations.Truncate. Truncate is called when +// O_TRUNC is specified for any kind of existing Dirent but is not called via +// (f)truncate for proc files. +func (*oomScoreAdj) Truncate(context.Context, *fs.Inode, int64) error { + return nil +} + +// GetFile implements fs.InodeOperations.GetFile. +func (o *oomScoreAdj) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + return fs.NewFile(ctx, dirent, flags, &oomScoreAdjFile{t: o.t}), nil +} + +// Read implements fs.FileOperations.Read. +func (f *oomScoreAdjFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + if offset != 0 { + return 0, io.EOF + } + adj, err := f.t.OOMScoreAdj() + if err != nil { + return 0, err + } + adjBytes := []byte(strconv.FormatInt(int64(adj), 10) + "\n") + n, err := dst.CopyOut(ctx, adjBytes) + return int64(n), err +} + +// Write implements fs.FileOperations.Write. +func (f *oomScoreAdjFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, offset int64) (int64, error) { + if src.NumBytes() == 0 { + return 0, nil + } + + // Limit input size so as not to impact performance if input size is large. + src = src.TakeFirst(usermem.PageSize - 1) + + var v int32 + n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) + if err != nil { + return 0, err + } + + if err := f.t.SetOOMScoreAdj(v); err != nil { + return 0, err + } + + return n, nil +} + // LINT.ThenChange(../../fsimpl/proc/task.go|../../fsimpl/proc/task_files.go) diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index 2d814668a..18e5cd6f6 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -62,11 +62,13 @@ func newTaskInode(inoGen InoGenerator, task *kernel.Task, pidns *kernel.PIDNames "pid": newNamespaceSymlink(task, inoGen.NextIno(), "pid"), "user": newNamespaceSymlink(task, inoGen.NextIno(), "user"), }), - "smaps": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &smapsData{task: task}), - "stat": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &taskStatData{task: task, pidns: pidns, tgstats: isThreadGroup}), - "statm": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &statmData{task: task}), - "status": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &statusData{task: task, pidns: pidns}), - "uid_map": newTaskOwnedFile(task, inoGen.NextIno(), 0644, &idMapData{task: task, gids: false}), + "oom_score": newTaskOwnedFile(task, inoGen.NextIno(), 0444, newStaticFile("0\n")), + "oom_score_adj": newTaskOwnedFile(task, inoGen.NextIno(), 0644, &oomScoreAdj{task: task}), + "smaps": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &smapsData{task: task}), + "stat": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &taskStatData{task: task, pidns: pidns, tgstats: isThreadGroup}), + "statm": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &statmData{task: task}), + "status": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &statusData{task: task, pidns: pidns}), + "uid_map": newTaskOwnedFile(task, inoGen.NextIno(), 0644, &idMapData{task: task, gids: false}), } if isThreadGroup { contents["task"] = newSubtasks(task, pidns, inoGen, cgroupControllers) diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go index efd3b3453..5a231ac86 100644 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ b/pkg/sentry/fsimpl/proc/task_files.go @@ -525,3 +525,46 @@ func (i *ioData) Generate(ctx context.Context, buf *bytes.Buffer) error { fmt.Fprintf(buf, "cancelled_write_bytes: %d\n", io.BytesWriteCancelled) return nil } + +// oomScoreAdj is a stub of the /proc//oom_score_adj file. +// +// +stateify savable +type oomScoreAdj struct { + kernfs.DynamicBytesFile + + task *kernel.Task +} + +var _ vfs.WritableDynamicBytesSource = (*oomScoreAdj)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (o *oomScoreAdj) Generate(ctx context.Context, buf *bytes.Buffer) error { + adj, err := o.task.OOMScoreAdj() + if err != nil { + return err + } + fmt.Fprintf(buf, "%d\n", adj) + return nil +} + +// Write implements vfs.WritableDynamicBytesSource.Write. +func (o *oomScoreAdj) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { + if src.NumBytes() == 0 { + return 0, nil + } + + // Limit input size so as not to impact performance if input size is large. + src = src.TakeFirst(usermem.PageSize - 1) + + var v int32 + n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) + if err != nil { + return 0, err + } + + if err := o.task.SetOOMScoreAdj(v); err != nil { + return 0, err + } + + return n, nil +} diff --git a/pkg/sentry/fsimpl/proc/tasks_test.go b/pkg/sentry/fsimpl/proc/tasks_test.go index c5d531fe0..0eb401619 100644 --- a/pkg/sentry/fsimpl/proc/tasks_test.go +++ b/pkg/sentry/fsimpl/proc/tasks_test.go @@ -63,21 +63,23 @@ var ( "thread-self": threadSelfLink.NextOff, } taskStaticFiles = map[string]testutil.DirentType{ - "auxv": linux.DT_REG, - "cgroup": linux.DT_REG, - "cmdline": linux.DT_REG, - "comm": linux.DT_REG, - "environ": linux.DT_REG, - "gid_map": linux.DT_REG, - "io": linux.DT_REG, - "maps": linux.DT_REG, - "ns": linux.DT_DIR, - "smaps": linux.DT_REG, - "stat": linux.DT_REG, - "statm": linux.DT_REG, - "status": linux.DT_REG, - "task": linux.DT_DIR, - "uid_map": linux.DT_REG, + "auxv": linux.DT_REG, + "cgroup": linux.DT_REG, + "cmdline": linux.DT_REG, + "comm": linux.DT_REG, + "environ": linux.DT_REG, + "gid_map": linux.DT_REG, + "io": linux.DT_REG, + "maps": linux.DT_REG, + "ns": linux.DT_DIR, + "oom_score": linux.DT_REG, + "oom_score_adj": linux.DT_REG, + "smaps": linux.DT_REG, + "stat": linux.DT_REG, + "statm": linux.DT_REG, + "status": linux.DT_REG, + "task": linux.DT_DIR, + "uid_map": linux.DT_REG, } ) diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index 2cee2e6ed..c0dbbe890 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -37,6 +37,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -554,6 +555,13 @@ type Task struct { // // startTime is protected by mu. startTime ktime.Time + + // oomScoreAdj is the task's OOM score adjustment. This is currently not + // used but is maintained for consistency. + // TODO(gvisor.dev/issue/1967) + // + // oomScoreAdj is protected by mu, and is owned by the task goroutine. + oomScoreAdj int32 } func (t *Task) savePtraceTracer() *Task { @@ -847,3 +855,28 @@ func (t *Task) AbstractSockets() *AbstractSocketNamespace { func (t *Task) ContainerID() string { return t.containerID } + +// OOMScoreAdj gets the task's OOM score adjustment. +func (t *Task) OOMScoreAdj() (int32, error) { + t.mu.Lock() + defer t.mu.Unlock() + if t.ExitState() == TaskExitDead { + return 0, syserror.ESRCH + } + return t.oomScoreAdj, nil +} + +// SetOOMScoreAdj sets the task's OOM score adjustment. The value should be +// between -1000 and 1000 inclusive. +func (t *Task) SetOOMScoreAdj(adj int32) error { + t.mu.Lock() + defer t.mu.Unlock() + if t.ExitState() == TaskExitDead { + return syserror.ESRCH + } + if adj > 1000 || adj < -1000 { + return syserror.EINVAL + } + t.oomScoreAdj = adj + return nil +} diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go index 78866f280..dda502bb8 100644 --- a/pkg/sentry/kernel/task_clone.go +++ b/pkg/sentry/kernel/task_clone.go @@ -264,6 +264,11 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { rseqSignature = t.rseqSignature } + adj, err := t.OOMScoreAdj() + if err != nil { + return 0, nil, err + } + cfg := &TaskConfig{ Kernel: t.k, ThreadGroup: tg, @@ -282,6 +287,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { RSeqAddr: rseqAddr, RSeqSignature: rseqSignature, ContainerID: t.ContainerID(), + OOMScoreAdj: adj, } if opts.NewThreadGroup { cfg.Parent = t diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go index a5035bb7f..2bbf48bb8 100644 --- a/pkg/sentry/kernel/task_start.go +++ b/pkg/sentry/kernel/task_start.go @@ -93,6 +93,9 @@ type TaskConfig struct { // ContainerID is the container the new task belongs to. ContainerID string + + // oomScoreAdj is the task's OOM score adjustment. + OOMScoreAdj int32 } // NewTask creates a new task defined by cfg. @@ -143,6 +146,7 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) { rseqSignature: cfg.RSeqSignature, futexWaiter: futex.NewWaiter(), containerID: cfg.ContainerID, + oomScoreAdj: cfg.OOMScoreAdj, } t.creds.Store(cfg.Credentials) t.endStopCond.L = &t.tg.signalHandlers.mu diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index a69b0ce13..9800a0cdf 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -318,10 +318,14 @@ syscall_test( test = "//test/syscalls/linux:proc_test", ) -syscall_test(test = "//test/syscalls/linux:proc_pid_uid_gid_map_test") - syscall_test(test = "//test/syscalls/linux:proc_net_test") +syscall_test(test = "//test/syscalls/linux:proc_pid_oomscore_test") + +syscall_test(test = "//test/syscalls/linux:proc_pid_smaps_test") + +syscall_test(test = "//test/syscalls/linux:proc_pid_uid_gid_map_test") + syscall_test( size = "medium", test = "//test/syscalls/linux:pselect_test", diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 0fbd556de..43455f1a3 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1631,6 +1631,19 @@ cc_binary( ], ) +cc_binary( + name = "proc_pid_oomscore_test", + testonly = 1, + srcs = ["proc_pid_oomscore.cc"], + linkstatic = 1, + deps = [ + "//test/util:fs_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + ], +) + cc_binary( name = "proc_pid_smaps_test", testonly = 1, diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index f91187e75..5a70f6c3b 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -1431,6 +1431,12 @@ TEST(ProcPidFile, SubprocessRunning) { EXPECT_THAT(ReadWhileRunning("uid_map", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("oom_score", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("oom_score_adj", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); } // Test whether /proc/PID/ files can be read for a zombie process. @@ -1466,6 +1472,12 @@ TEST(ProcPidFile, SubprocessZombie) { EXPECT_THAT(ReadWhileZombied("uid_map", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); + EXPECT_THAT(ReadWhileZombied("oom_score", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileZombied("oom_score_adj", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + // FIXME(gvisor.dev/issue/164): Inconsistent behavior between gVisor and linux // on proc files. // @@ -1527,6 +1539,15 @@ TEST(ProcPidFile, SubprocessExited) { EXPECT_THAT(ReadWhileExited("uid_map", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); + + if (!IsRunningOnGvisor()) { + // FIXME(gvisor.dev/issue/164): Succeeds on gVisor. + EXPECT_THAT(ReadWhileExited("oom_score", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + } + + EXPECT_THAT(ReadWhileExited("oom_score_adj", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); } PosixError DirContainsImpl(absl::string_view path, diff --git a/test/syscalls/linux/proc_pid_oomscore.cc b/test/syscalls/linux/proc_pid_oomscore.cc new file mode 100644 index 000000000..707821a3f --- /dev/null +++ b/test/syscalls/linux/proc_pid_oomscore.cc @@ -0,0 +1,72 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include + +#include "test/util/fs_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +PosixErrorOr ReadProcNumber(std::string path) { + ASSIGN_OR_RETURN_ERRNO(std::string contents, GetContents(path)); + EXPECT_EQ(contents[contents.length() - 1], '\n'); + + int num; + if (!absl::SimpleAtoi(contents, &num)) { + return PosixError(EINVAL, "invalid value: " + contents); + } + + return num; +} + +TEST(ProcPidOomscoreTest, BasicRead) { + auto const oom_score = + ASSERT_NO_ERRNO_AND_VALUE(ReadProcNumber("/proc/self/oom_score")); + EXPECT_LE(oom_score, 1000); + EXPECT_GE(oom_score, -1000); +} + +TEST(ProcPidOomscoreAdjTest, BasicRead) { + auto const oom_score = + ASSERT_NO_ERRNO_AND_VALUE(ReadProcNumber("/proc/self/oom_score_adj")); + + // oom_score_adj defaults to 0. + EXPECT_EQ(oom_score, 0); +} + +TEST(ProcPidOomscoreAdjTest, BasicWrite) { + constexpr int test_value = 7; + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/oom_score_adj", O_WRONLY)); + ASSERT_THAT( + RetryEINTR(write)(fd.get(), std::to_string(test_value).c_str(), 1), + SyscallSucceeds()); + + auto const oom_score = + ASSERT_NO_ERRNO_AND_VALUE(ReadProcNumber("/proc/self/oom_score_adj")); + EXPECT_EQ(oom_score, test_value); +} + +} // namespace + +} // namespace testing +} // namespace gvisor -- cgit v1.2.3 From 20170d4fd5c26def584664762f4e639f0b43ff6e Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Fri, 6 Mar 2020 11:22:55 -0800 Subject: Move packetdrill tests to open-source PiperOrigin-RevId: 299396286 --- test/packetdrill/BUILD | 44 +++++++++++++++++++- test/packetdrill/accept_ack_drop.pkt | 27 ++++++++++++ test/packetdrill/defs.bzl | 10 ++--- test/packetdrill/linux/tcp_user_timeout.pkt | 39 ++++++++++++++++++ .../listen_close_before_handshake_complete.pkt | 31 ++++++++++++++ test/packetdrill/netstack/tcp_user_timeout.pkt | 38 +++++++++++++++++ test/packetdrill/no_rst_to_rst.pkt | 36 ++++++++++++++++ .../reset_for_ack_when_no_syn_cookies_in_use.pkt | 9 ++++ test/packetdrill/sanity_test.pkt | 7 ++++ test/packetdrill/tcp_defer_accept.pkt | 48 ++++++++++++++++++++++ test/packetdrill/tcp_defer_accept_timeout.pkt | 48 ++++++++++++++++++++++ 11 files changed, 330 insertions(+), 7 deletions(-) create mode 100644 test/packetdrill/accept_ack_drop.pkt create mode 100644 test/packetdrill/linux/tcp_user_timeout.pkt create mode 100644 test/packetdrill/listen_close_before_handshake_complete.pkt create mode 100644 test/packetdrill/netstack/tcp_user_timeout.pkt create mode 100644 test/packetdrill/no_rst_to_rst.pkt create mode 100644 test/packetdrill/reset_for_ack_when_no_syn_cookies_in_use.pkt create mode 100644 test/packetdrill/sanity_test.pkt create mode 100644 test/packetdrill/tcp_defer_accept.pkt create mode 100644 test/packetdrill/tcp_defer_accept_timeout.pkt (limited to 'test') diff --git a/test/packetdrill/BUILD b/test/packetdrill/BUILD index d113555b1..fb0b2db41 100644 --- a/test/packetdrill/BUILD +++ b/test/packetdrill/BUILD @@ -1,8 +1,48 @@ -load("defs.bzl", "packetdrill_test") +load("defs.bzl", "packetdrill_linux_test", "packetdrill_netstack_test", "packetdrill_test") package(licenses = ["notice"]) packetdrill_test( - name = "fin_wait2_timeout", + name = "packetdrill_sanity_test", + scripts = ["sanity_test.pkt"], +) + +packetdrill_test( + name = "accept_ack_drop_test", + scripts = ["accept_ack_drop.pkt"], +) + +packetdrill_test( + name = "fin_wait2_timeout_test", scripts = ["fin_wait2_timeout.pkt"], ) + +packetdrill_linux_test( + name = "tcp_user_timeout_test_linux_test", + scripts = ["linux/tcp_user_timeout.pkt"], +) + +packetdrill_netstack_test( + name = "tcp_user_timeout_test_netstack_test", + scripts = ["netstack/tcp_user_timeout.pkt"], +) + +packetdrill_test( + name = "listen_close_before_handshake_complete_test", + scripts = ["listen_close_before_handshake_complete.pkt"], +) + +packetdrill_test( + name = "no_rst_to_rst_test", + scripts = ["no_rst_to_rst.pkt"], +) + +packetdrill_test( + name = "tcp_defer_accept_test", + scripts = ["tcp_defer_accept.pkt"], +) + +packetdrill_test( + name = "tcp_defer_accept_timeout_test", + scripts = ["tcp_defer_accept_timeout.pkt"], +) diff --git a/test/packetdrill/accept_ack_drop.pkt b/test/packetdrill/accept_ack_drop.pkt new file mode 100644 index 000000000..76e638fd4 --- /dev/null +++ b/test/packetdrill/accept_ack_drop.pkt @@ -0,0 +1,27 @@ +// Test that the accept works if the final ACK is dropped and an ack with data +// follows the dropped ack. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 bind(3, ..., ...) = 0 + +// Set backlog to 1 so that we can easily test. ++0 listen(3, 1) = 0 + +// Establish a connection without timestamps. ++0.0 < S 0:0(0) win 32792 ++0.0 > S. 0:0(0) ack 1 <...> + ++0.0 < . 1:5(4) ack 1 win 257 ++0.0 > . 1:1(0) ack 5 <...> + +// This should cause connection to transition to connected state. ++0.000 accept(3, ..., ...) = 4 ++0.000 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Now read the data and we should get 4 bytes. ++0.000 read(4,..., 4) = 4 ++0.000 close(4) = 0 + ++0.0 > F. 1:1(0) ack 5 <...> ++0.0 < F. 5:5(0) ack 2 win 257 ++0.01 > . 2:2(0) ack 6 <...> \ No newline at end of file diff --git a/test/packetdrill/defs.bzl b/test/packetdrill/defs.bzl index 8623ce7b1..f499c177b 100644 --- a/test/packetdrill/defs.bzl +++ b/test/packetdrill/defs.bzl @@ -66,7 +66,7 @@ def packetdrill_linux_test(name, **kwargs): if "tags" not in kwargs: kwargs["tags"] = _PACKETDRILL_TAGS _packetdrill_test( - name = name + "_linux_test", + name = name, flags = ["--dut_platform", "linux"], **kwargs ) @@ -75,13 +75,13 @@ def packetdrill_netstack_test(name, **kwargs): if "tags" not in kwargs: kwargs["tags"] = _PACKETDRILL_TAGS _packetdrill_test( - name = name + "_netstack_test", + name = name, # This is the default runtime unless # "--test_arg=--runtime=OTHER_RUNTIME" is used to override the value. flags = ["--dut_platform", "netstack", "--runtime", "runsc-d"], **kwargs ) -def packetdrill_test(**kwargs): - packetdrill_linux_test(**kwargs) - packetdrill_netstack_test(**kwargs) +def packetdrill_test(name, **kwargs): + packetdrill_linux_test(name + "_linux_test", **kwargs) + packetdrill_netstack_test(name + "_netstack_test", **kwargs) diff --git a/test/packetdrill/linux/tcp_user_timeout.pkt b/test/packetdrill/linux/tcp_user_timeout.pkt new file mode 100644 index 000000000..38018cb42 --- /dev/null +++ b/test/packetdrill/linux/tcp_user_timeout.pkt @@ -0,0 +1,39 @@ +// Test that a socket w/ TCP_USER_TIMEOUT set aborts the connection +// if there is pending unacked data after the user specified timeout. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 bind(3, ..., ...) = 0 + ++0 listen(3, 1) = 0 + +// Establish a connection without timestamps. ++0 < S 0:0(0) win 32792 ++0 > S. 0:0(0) ack 1 <...> ++0.1 < . 1:1(0) ack 1 win 32792 + ++0.100 accept(3, ..., ...) = 4 + +// Okay, we received nothing, and decide to close this idle socket. +// We set TCP_USER_TIMEOUT to 3 seconds because really it is not worth +// trying hard to cleanly close this flow, at the price of keeping +// a TCP structure in kernel for about 1 minute! ++2 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0 + +// The write/ack is required mainly for netstack as netstack does +// not update its RTO during the handshake. ++0 write(4, ..., 100) = 100 ++0 > P. 1:101(100) ack 1 <...> ++0 < . 1:1(0) ack 101 win 32792 + ++0 close(4) = 0 + ++0 > F. 101:101(0) ack 1 <...> ++.3~+.400 > F. 101:101(0) ack 1 <...> ++.3~+.400 > F. 101:101(0) ack 1 <...> ++.6~+.800 > F. 101:101(0) ack 1 <...> ++1.2~+1.300 > F. 101:101(0) ack 1 <...> + +// We finally receive something from the peer, but it is way too late +// Our socket vanished because TCP_USER_TIMEOUT was really small. ++.1 < . 1:2(1) ack 102 win 32792 ++0 > R 102:102(0) win 0 diff --git a/test/packetdrill/listen_close_before_handshake_complete.pkt b/test/packetdrill/listen_close_before_handshake_complete.pkt new file mode 100644 index 000000000..51c3f1a32 --- /dev/null +++ b/test/packetdrill/listen_close_before_handshake_complete.pkt @@ -0,0 +1,31 @@ +// Test that closing a listening socket closes any connections in SYN-RCVD +// state and any packets bound for these connections generate a RESET. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 bind(3, ..., ...) = 0 + +// Set backlog to 1 so that we can easily test. ++0 listen(3, 1) = 0 + +// Establish a connection without timestamps. ++0 < S 0:0(0) win 32792 ++0 > S. 0:0(0) ack 1 <...> + ++0.100 close(3) = 0 ++0.1 < P. 1:1(0) ack 1 win 257 + +// Linux generates a reset with no ack number/bit set. This is contradictory to +// what is specified in Rule 1 under Reset Generation in +// https://tools.ietf.org/html/rfc793#section-3.4. +// "1. If the connection does not exist (CLOSED) then a reset is sent +// in response to any incoming segment except another reset. In +// particular, SYNs addressed to a non-existent connection are rejected +// by this means. +// +// If the incoming segment has an ACK field, the reset takes its +// sequence number from the ACK field of the segment, otherwise the +// reset has sequence number zero and the ACK field is set to the sum +// of the sequence number and segment length of the incoming segment. +// The connection remains in the CLOSED state." + ++0.0 > R 1:1(0) win 0 \ No newline at end of file diff --git a/test/packetdrill/netstack/tcp_user_timeout.pkt b/test/packetdrill/netstack/tcp_user_timeout.pkt new file mode 100644 index 000000000..60103adba --- /dev/null +++ b/test/packetdrill/netstack/tcp_user_timeout.pkt @@ -0,0 +1,38 @@ +// Test that a socket w/ TCP_USER_TIMEOUT set aborts the connection +// if there is pending unacked data after the user specified timeout. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 bind(3, ..., ...) = 0 + ++0 listen(3, 1) = 0 + +// Establish a connection without timestamps. ++0 < S 0:0(0) win 32792 ++0 > S. 0:0(0) ack 1 <...> ++0.1 < . 1:1(0) ack 1 win 32792 + ++0.100 accept(3, ..., ...) = 4 + +// Okay, we received nothing, and decide to close this idle socket. +// We set TCP_USER_TIMEOUT to 3 seconds because really it is not worth +// trying hard to cleanly close this flow, at the price of keeping +// a TCP structure in kernel for about 1 minute! ++2 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0 + +// The write/ack is required mainly for netstack as netstack does +// not update its RTO during the handshake. ++0 write(4, ..., 100) = 100 ++0 > P. 1:101(100) ack 1 <...> ++0 < . 1:1(0) ack 101 win 32792 + ++0 close(4) = 0 + ++0 > F. 101:101(0) ack 1 <...> ++.2~+.300 > F. 101:101(0) ack 1 <...> ++.4~+.500 > F. 101:101(0) ack 1 <...> ++.8~+.900 > F. 101:101(0) ack 1 <...> + +// We finally receive something from the peer, but it is way too late +// Our socket vanished because TCP_USER_TIMEOUT was really small. ++1.61 < . 1:2(1) ack 102 win 32792 ++0 > R 102:102(0) win 0 diff --git a/test/packetdrill/no_rst_to_rst.pkt b/test/packetdrill/no_rst_to_rst.pkt new file mode 100644 index 000000000..612747827 --- /dev/null +++ b/test/packetdrill/no_rst_to_rst.pkt @@ -0,0 +1,36 @@ +// Test a RST is not generated in response to a RST and a RST is correctly +// generated when an accepted endpoint is RST due to an incoming RST. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 bind(3, ..., ...) = 0 + ++0 listen(3, 1) = 0 + +// Establish a connection without timestamps. ++0 < S 0:0(0) win 32792 ++0 > S. 0:0(0) ack 1 <...> ++0 < P. 1:1(0) ack 1 win 257 + ++0.100 accept(3, ..., ...) = 4 + ++0.200 < R 1:1(0) win 0 + ++0.300 read(4,..., 4) = -1 ECONNRESET (Connection Reset by Peer) + ++0.00 < . 1:1(0) ack 1 win 257 + +// Linux generates a reset with no ack number/bit set. This is contradictory to +// what is specified in Rule 1 under Reset Generation in +// https://tools.ietf.org/html/rfc793#section-3.4. +// "1. If the connection does not exist (CLOSED) then a reset is sent +// in response to any incoming segment except another reset. In +// particular, SYNs addressed to a non-existent connection are rejected +// by this means. +// +// If the incoming segment has an ACK field, the reset takes its +// sequence number from the ACK field of the segment, otherwise the +// reset has sequence number zero and the ACK field is set to the sum +// of the sequence number and segment length of the incoming segment. +// The connection remains in the CLOSED state." + ++0.00 > R 1:1(0) win 0 \ No newline at end of file diff --git a/test/packetdrill/reset_for_ack_when_no_syn_cookies_in_use.pkt b/test/packetdrill/reset_for_ack_when_no_syn_cookies_in_use.pkt new file mode 100644 index 000000000..a86b90ce6 --- /dev/null +++ b/test/packetdrill/reset_for_ack_when_no_syn_cookies_in_use.pkt @@ -0,0 +1,9 @@ +// Test that a listening socket generates a RST when it receives an +// ACK and syn cookies are not in use. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 bind(3, ..., ...) = 0 + ++0 listen(3, 1) = 0 ++0.1 < . 1:1(0) ack 1 win 32792 ++0 > R 1:1(0) ack 0 win 0 \ No newline at end of file diff --git a/test/packetdrill/sanity_test.pkt b/test/packetdrill/sanity_test.pkt new file mode 100644 index 000000000..b3b58c366 --- /dev/null +++ b/test/packetdrill/sanity_test.pkt @@ -0,0 +1,7 @@ +// Basic sanity test. One system call. +// +// All of the plumbing has to be working however, and the packetdrill wire +// client needs to be able to connect to the wire server and send the script, +// probe local interfaces, run through the test w/ timings, etc. + +0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 diff --git a/test/packetdrill/tcp_defer_accept.pkt b/test/packetdrill/tcp_defer_accept.pkt new file mode 100644 index 000000000..a17f946db --- /dev/null +++ b/test/packetdrill/tcp_defer_accept.pkt @@ -0,0 +1,48 @@ +// Test that a bare ACK does not complete a connection when TCP_DEFER_ACCEPT +// timeout is not hit but an ACK w/ data does complete and deliver the +// connection to the accept queue. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_TCP, TCP_DEFER_ACCEPT, [5], 4) = 0 ++0.000 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 bind(3, ..., ...) = 0 + +// Set backlog to 1 so that we can easily test. ++0 listen(3, 1) = 0 + +// Establish a connection without timestamps. ++0.0 < S 0:0(0) win 32792 ++0.0 > S. 0:0(0) ack 1 <...> + +// Send a bare ACK this should not complete the connection as we +// set the TCP_DEFER_ACCEPT above. ++0.0 < . 1:1(0) ack 1 win 257 + +// The bare ACK should be dropped and no connection should be delivered +// to the accept queue. ++0.100 accept(3, ..., ...) = -1 EWOULDBLOCK (operation would block) + +// Send another bare ACK and it should still fail we set TCP_DEFER_ACCEPT +// to 5 seconds above. ++2.5 < . 1:1(0) ack 1 win 257 ++0.100 accept(3, ..., ...) = -1 EWOULDBLOCK (operation would block) + +// set accept socket back to blocking. ++0.000 fcntl(3, F_SETFL, O_RDWR) = 0 + +// Now send an ACK w/ data. This should complete the connection +// and deliver the socket to the accept queue. ++0.1 < . 1:5(4) ack 1 win 257 ++0.0 > . 1:1(0) ack 5 <...> + +// This should cause connection to transition to connected state. ++0.000 accept(3, ..., ...) = 4 ++0.000 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Now read the data and we should get 4 bytes. ++0.000 read(4,..., 4) = 4 ++0.000 close(4) = 0 + ++0.0 > F. 1:1(0) ack 5 <...> ++0.0 < F. 5:5(0) ack 2 win 257 ++0.01 > . 2:2(0) ack 6 <...> \ No newline at end of file diff --git a/test/packetdrill/tcp_defer_accept_timeout.pkt b/test/packetdrill/tcp_defer_accept_timeout.pkt new file mode 100644 index 000000000..201fdeb14 --- /dev/null +++ b/test/packetdrill/tcp_defer_accept_timeout.pkt @@ -0,0 +1,48 @@ +// Test that a bare ACK is accepted after TCP_DEFER_ACCEPT timeout +// is hit and a connection is delivered. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_TCP, TCP_DEFER_ACCEPT, [3], 4) = 0 ++0.000 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 bind(3, ..., ...) = 0 + +// Set backlog to 1 so that we can easily test. ++0 listen(3, 1) = 0 + +// Establish a connection without timestamps. ++0.0 < S 0:0(0) win 32792 ++0.0 > S. 0:0(0) ack 1 <...> + +// Send a bare ACK this should not complete the connection as we +// set the TCP_DEFER_ACCEPT above. ++0.0 < . 1:1(0) ack 1 win 257 + +// The bare ACK should be dropped and no connection should be delivered +// to the accept queue. ++0.100 accept(3, ..., ...) = -1 EWOULDBLOCK (operation would block) + +// Send another bare ACK and it should still fail we set TCP_DEFER_ACCEPT +// to 5 seconds above. ++2.5 < . 1:1(0) ack 1 win 257 ++0.100 accept(3, ..., ...) = -1 EWOULDBLOCK (operation would block) + +// set accept socket back to blocking. ++0.000 fcntl(3, F_SETFL, O_RDWR) = 0 + +// We should see one more retransmit of the SYN-ACK as a last ditch +// attempt when TCP_DEFER_ACCEPT timeout is hit to trigger another +// ACK or a packet with data. ++.35~+2.35 > S. 0:0(0) ack 1 <...> + +// Now send another bare ACK after TCP_DEFER_ACCEPT time has been passed. ++0.0 < . 1:1(0) ack 1 win 257 + +// The ACK above should cause connection to transition to connected state. ++0.000 accept(3, ..., ...) = 4 ++0.000 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + ++0.000 close(4) = 0 + ++0.0 > F. 1:1(0) ack 1 <...> ++0.0 < F. 1:1(0) ack 2 win 257 ++0.01 > . 2:2(0) ack 2 <...> -- cgit v1.2.3 From d5dbe366bf7c9f5b648b8114a9dc7f45589899b1 Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Fri, 6 Mar 2020 11:41:10 -0800 Subject: shutdown(s, SHUT_WR) in TIME-WAIT returns ENOTCONN From RFC 793 s3.9 p61 Event Processing: CLOSE Call during TIME-WAIT: return with "error: connection closing" Fixes #1603 PiperOrigin-RevId: 299401353 --- pkg/tcpip/transport/tcp/endpoint.go | 5 ++++- test/syscalls/linux/tcp_socket.cc | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 40cc664c0..dc9c18b6f 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -2117,10 +2117,13 @@ func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error { // Close for write. if (e.shutdownFlags & tcpip.ShutdownWrite) != 0 { e.sndBufMu.Lock() - if e.sndClosed { // Already closed. e.sndBufMu.Unlock() + if e.EndpointState() == StateTimeWait { + e.mu.Unlock() + return tcpip.ErrNotConnected + } break } diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc index 579463384..d9c1ac0e1 100644 --- a/test/syscalls/linux/tcp_socket.cc +++ b/test/syscalls/linux/tcp_socket.cc @@ -143,6 +143,20 @@ TEST_P(TcpSocketTest, ConnectOnEstablishedConnection) { SyscallFailsWithErrno(EISCONN)); } +TEST_P(TcpSocketTest, ShutdownWriteInTimeWait) { + EXPECT_THAT(shutdown(t_, SHUT_WR), SyscallSucceeds()); + EXPECT_THAT(shutdown(s_, SHUT_RDWR), SyscallSucceeds()); + absl::SleepFor(absl::Seconds(1)); // Wait to enter TIME_WAIT. + EXPECT_THAT(shutdown(t_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN)); +} + +TEST_P(TcpSocketTest, ShutdownWriteInFinWait1) { + EXPECT_THAT(shutdown(t_, SHUT_WR), SyscallSucceeds()); + EXPECT_THAT(shutdown(t_, SHUT_WR), SyscallSucceeds()); + absl::SleepFor(absl::Seconds(1)); // Wait to enter FIN-WAIT2. + EXPECT_THAT(shutdown(t_, SHUT_WR), SyscallSucceeds()); +} + TEST_P(TcpSocketTest, DataCoalesced) { char buf[10]; -- cgit v1.2.3 From 2446161b3faa352bf28dc83e338f10967f0224c2 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 9 Mar 2020 11:52:32 -0700 Subject: perf/signal: rewrite code in assembly to avoid compiler optimizations Without this change, the assembly code of this test compiled without optimizations: mov -0x150(%rbp),%rax movl $0x77777777,(%rax) lea -0x128(%rbp),%rax with optimizations: movl $0x77777777,0x0 This code doesn't work properly, because the test changes rax in the segv handler. PiperOrigin-RevId: 299896117 --- test/perf/linux/signal_benchmark.cc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/test/perf/linux/signal_benchmark.cc b/test/perf/linux/signal_benchmark.cc index a6928df58..cec679191 100644 --- a/test/perf/linux/signal_benchmark.cc +++ b/test/perf/linux/signal_benchmark.cc @@ -43,11 +43,13 @@ void BM_FaultSignalFixup(benchmark::State& state) { // Fault, fault, fault. for (auto _ : state) { - register volatile unsigned int* ptr asm("rax"); - // Trigger the segfault. - ptr = nullptr; - *ptr = 0; + asm volatile( + "movq $0, %%rax\n" + "movq $0x77777777, (%%rax)\n" + : + : + : "rax"); } } -- cgit v1.2.3 From b36de6e7be0542b410901d3cbcd1b3c0fc493cf5 Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Mon, 9 Mar 2020 19:57:35 -0700 Subject: Move /proc/net to /proc/PID/net, and make /proc/net -> /proc/self/net. Issue #1833 PiperOrigin-RevId: 299998105 --- pkg/sentry/fs/proc/net.go | 53 +-- pkg/sentry/fs/proc/proc.go | 2 +- pkg/sentry/fs/proc/task.go | 1 + pkg/sentry/fsimpl/proc/BUILD | 2 +- pkg/sentry/fsimpl/proc/task.go | 1 + pkg/sentry/fsimpl/proc/task_net.go | 790 +++++++++++++++++++++++++++++++++++ pkg/sentry/fsimpl/proc/tasks.go | 2 +- pkg/sentry/fsimpl/proc/tasks_net.go | 787 ---------------------------------- pkg/sentry/fsimpl/proc/tasks_test.go | 3 +- test/syscalls/linux/proc_net.cc | 25 ++ 10 files changed, 849 insertions(+), 817 deletions(-) create mode 100644 pkg/sentry/fsimpl/proc/task_net.go delete mode 100644 pkg/sentry/fsimpl/proc/tasks_net.go (limited to 'test') diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go index 95d5817ff..bd18177d4 100644 --- a/pkg/sentry/fs/proc/net.go +++ b/pkg/sentry/fs/proc/net.go @@ -40,47 +40,48 @@ import ( // LINT.IfChange -// newNet creates a new proc net entry. -func (p *proc) newNetDir(ctx context.Context, k *kernel.Kernel, msrc *fs.MountSource) *fs.Inode { +// newNetDir creates a new proc net entry. +func newNetDir(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { + k := t.Kernel() + var contents map[string]*fs.Inode - // TODO(gvisor.dev/issue/1833): Support for using the network stack in the - // network namespace of the calling process. We should make this per-process, - // a.k.a. /proc/PID/net, and make /proc/net a symlink to /proc/self/net. - if s := p.k.RootNetworkNamespace().Stack(); s != nil { + if s := t.NetworkNamespace().Stack(); s != nil { + // TODO(gvisor.dev/issue/1833): Make sure file contents reflect the task + // network namespace. contents = map[string]*fs.Inode{ - "dev": seqfile.NewSeqFileInode(ctx, &netDev{s: s}, msrc), - "snmp": seqfile.NewSeqFileInode(ctx, &netSnmp{s: s}, msrc), + "dev": seqfile.NewSeqFileInode(t, &netDev{s: s}, msrc), + "snmp": seqfile.NewSeqFileInode(t, &netSnmp{s: s}, msrc), // The following files are simple stubs until they are // implemented in netstack, if the file contains a // header the stub is just the header otherwise it is // an empty file. - "arp": newStaticProcInode(ctx, msrc, []byte("IP address HW type Flags HW address Mask Device\n")), + "arp": newStaticProcInode(t, msrc, []byte("IP address HW type Flags HW address Mask Device\n")), - "netlink": newStaticProcInode(ctx, msrc, []byte("sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode\n")), - "netstat": newStaticProcInode(ctx, msrc, []byte("TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed EmbryonicRsts PruneCalled RcvPruned OfoPruned OutOfWindowIcmps LockDroppedIcmps ArpFilter TW TWRecycled TWKilled PAWSPassive PAWSActive PAWSEstab DelayedACKs DelayedACKLocked DelayedACKLost ListenOverflows ListenDrops TCPPrequeued TCPDirectCopyFromBacklog TCPDirectCopyFromPrequeue TCPPrequeueDropped TCPHPHits TCPHPHitsToUser TCPPureAcks TCPHPAcks TCPRenoRecovery TCPSackRecovery TCPSACKReneging TCPFACKReorder TCPSACKReorder TCPRenoReorder TCPTSReorder TCPFullUndo TCPPartialUndo TCPDSACKUndo TCPLossUndo TCPLostRetransmit TCPRenoFailures TCPSackFailures TCPLossFailures TCPFastRetrans TCPForwardRetrans TCPSlowStartRetrans TCPTimeouts TCPLossProbes TCPLossProbeRecovery TCPRenoRecoveryFail TCPSackRecoveryFail TCPSchedulerFailed TCPRcvCollapsed TCPDSACKOldSent TCPDSACKOfoSent TCPDSACKRecv TCPDSACKOfoRecv TCPAbortOnData TCPAbortOnClose TCPAbortOnMemory TCPAbortOnTimeout TCPAbortOnLinger TCPAbortFailed TCPMemoryPressures TCPSACKDiscard TCPDSACKIgnoredOld TCPDSACKIgnoredNoUndo TCPSpuriousRTOs TCPMD5NotFound TCPMD5Unexpected TCPMD5Failure TCPSackShifted TCPSackMerged TCPSackShiftFallback TCPBacklogDrop TCPMinTTLDrop TCPDeferAcceptDrop IPReversePathFilter TCPTimeWaitOverflow TCPReqQFullDoCookies TCPReqQFullDrop TCPRetransFail TCPRcvCoalesce TCPOFOQueue TCPOFODrop TCPOFOMerge TCPChallengeACK TCPSYNChallenge TCPFastOpenActive TCPFastOpenActiveFail TCPFastOpenPassive TCPFastOpenPassiveFail TCPFastOpenListenOverflow TCPFastOpenCookieReqd TCPSpuriousRtxHostQueues BusyPollRxPackets TCPAutoCorking TCPFromZeroWindowAdv TCPToZeroWindowAdv TCPWantZeroWindowAdv TCPSynRetrans TCPOrigDataSent TCPHystartTrainDetect TCPHystartTrainCwnd TCPHystartDelayDetect TCPHystartDelayCwnd TCPACKSkippedSynRecv TCPACKSkippedPAWS TCPACKSkippedSeq TCPACKSkippedFinWait2 TCPACKSkippedTimeWait TCPACKSkippedChallenge TCPWinProbe TCPKeepAlive TCPMTUPFail TCPMTUPSuccess\n")), - "packet": newStaticProcInode(ctx, msrc, []byte("sk RefCnt Type Proto Iface R Rmem User Inode\n")), - "protocols": newStaticProcInode(ctx, msrc, []byte("protocol size sockets memory press maxhdr slab module cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n")), + "netlink": newStaticProcInode(t, msrc, []byte("sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode\n")), + "netstat": newStaticProcInode(t, msrc, []byte("TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed EmbryonicRsts PruneCalled RcvPruned OfoPruned OutOfWindowIcmps LockDroppedIcmps ArpFilter TW TWRecycled TWKilled PAWSPassive PAWSActive PAWSEstab DelayedACKs DelayedACKLocked DelayedACKLost ListenOverflows ListenDrops TCPPrequeued TCPDirectCopyFromBacklog TCPDirectCopyFromPrequeue TCPPrequeueDropped TCPHPHits TCPHPHitsToUser TCPPureAcks TCPHPAcks TCPRenoRecovery TCPSackRecovery TCPSACKReneging TCPFACKReorder TCPSACKReorder TCPRenoReorder TCPTSReorder TCPFullUndo TCPPartialUndo TCPDSACKUndo TCPLossUndo TCPLostRetransmit TCPRenoFailures TCPSackFailures TCPLossFailures TCPFastRetrans TCPForwardRetrans TCPSlowStartRetrans TCPTimeouts TCPLossProbes TCPLossProbeRecovery TCPRenoRecoveryFail TCPSackRecoveryFail TCPSchedulerFailed TCPRcvCollapsed TCPDSACKOldSent TCPDSACKOfoSent TCPDSACKRecv TCPDSACKOfoRecv TCPAbortOnData TCPAbortOnClose TCPAbortOnMemory TCPAbortOnTimeout TCPAbortOnLinger TCPAbortFailed TCPMemoryPressures TCPSACKDiscard TCPDSACKIgnoredOld TCPDSACKIgnoredNoUndo TCPSpuriousRTOs TCPMD5NotFound TCPMD5Unexpected TCPMD5Failure TCPSackShifted TCPSackMerged TCPSackShiftFallback TCPBacklogDrop TCPMinTTLDrop TCPDeferAcceptDrop IPReversePathFilter TCPTimeWaitOverflow TCPReqQFullDoCookies TCPReqQFullDrop TCPRetransFail TCPRcvCoalesce TCPOFOQueue TCPOFODrop TCPOFOMerge TCPChallengeACK TCPSYNChallenge TCPFastOpenActive TCPFastOpenActiveFail TCPFastOpenPassive TCPFastOpenPassiveFail TCPFastOpenListenOverflow TCPFastOpenCookieReqd TCPSpuriousRtxHostQueues BusyPollRxPackets TCPAutoCorking TCPFromZeroWindowAdv TCPToZeroWindowAdv TCPWantZeroWindowAdv TCPSynRetrans TCPOrigDataSent TCPHystartTrainDetect TCPHystartTrainCwnd TCPHystartDelayDetect TCPHystartDelayCwnd TCPACKSkippedSynRecv TCPACKSkippedPAWS TCPACKSkippedSeq TCPACKSkippedFinWait2 TCPACKSkippedTimeWait TCPACKSkippedChallenge TCPWinProbe TCPKeepAlive TCPMTUPFail TCPMTUPSuccess\n")), + "packet": newStaticProcInode(t, msrc, []byte("sk RefCnt Type Proto Iface R Rmem User Inode\n")), + "protocols": newStaticProcInode(t, msrc, []byte("protocol size sockets memory press maxhdr slab module cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n")), // Linux sets psched values to: nsec per usec, psched // tick in ns, 1000000, high res timer ticks per sec // (ClockGetres returns 1ns resolution). - "psched": newStaticProcInode(ctx, msrc, []byte(fmt.Sprintf("%08x %08x %08x %08x\n", uint64(time.Microsecond/time.Nanosecond), 64, 1000000, uint64(time.Second/time.Nanosecond)))), - "ptype": newStaticProcInode(ctx, msrc, []byte("Type Device Function\n")), - "route": seqfile.NewSeqFileInode(ctx, &netRoute{s: s}, msrc), - "tcp": seqfile.NewSeqFileInode(ctx, &netTCP{k: k}, msrc), - "udp": seqfile.NewSeqFileInode(ctx, &netUDP{k: k}, msrc), - "unix": seqfile.NewSeqFileInode(ctx, &netUnix{k: k}, msrc), + "psched": newStaticProcInode(t, msrc, []byte(fmt.Sprintf("%08x %08x %08x %08x\n", uint64(time.Microsecond/time.Nanosecond), 64, 1000000, uint64(time.Second/time.Nanosecond)))), + "ptype": newStaticProcInode(t, msrc, []byte("Type Device Function\n")), + "route": seqfile.NewSeqFileInode(t, &netRoute{s: s}, msrc), + "tcp": seqfile.NewSeqFileInode(t, &netTCP{k: k}, msrc), + "udp": seqfile.NewSeqFileInode(t, &netUDP{k: k}, msrc), + "unix": seqfile.NewSeqFileInode(t, &netUnix{k: k}, msrc), } if s.SupportsIPv6() { - contents["if_inet6"] = seqfile.NewSeqFileInode(ctx, &ifinet6{s: s}, msrc) - contents["ipv6_route"] = newStaticProcInode(ctx, msrc, []byte("")) - contents["tcp6"] = seqfile.NewSeqFileInode(ctx, &netTCP6{k: k}, msrc) - contents["udp6"] = newStaticProcInode(ctx, msrc, []byte(" sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode\n")) + contents["if_inet6"] = seqfile.NewSeqFileInode(t, &ifinet6{s: s}, msrc) + contents["ipv6_route"] = newStaticProcInode(t, msrc, []byte("")) + contents["tcp6"] = seqfile.NewSeqFileInode(t, &netTCP6{k: k}, msrc) + contents["udp6"] = newStaticProcInode(t, msrc, []byte(" sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode\n")) } } - d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) - return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) + d := ramfs.NewDir(t, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) + return newProcInode(t, d, msrc, fs.SpecialDirectory, t) } // ifinet6 implements seqfile.SeqSource for /proc/net/if_inet6. @@ -837,4 +838,4 @@ func (n *netUDP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]se return data, 0 } -// LINT.ThenChange(../../fsimpl/proc/tasks_net.go) +// LINT.ThenChange(../../fsimpl/proc/task_net.go) diff --git a/pkg/sentry/fs/proc/proc.go b/pkg/sentry/fs/proc/proc.go index c8abb5052..c659224a7 100644 --- a/pkg/sentry/fs/proc/proc.go +++ b/pkg/sentry/fs/proc/proc.go @@ -70,6 +70,7 @@ func New(ctx context.Context, msrc *fs.MountSource, cgroupControllers map[string "loadavg": seqfile.NewSeqFileInode(ctx, &loadavgData{}, msrc), "meminfo": seqfile.NewSeqFileInode(ctx, &meminfoData{k}, msrc), "mounts": newProcInode(ctx, ramfs.NewSymlink(ctx, fs.RootOwner, "self/mounts"), msrc, fs.Symlink, nil), + "net": newProcInode(ctx, ramfs.NewSymlink(ctx, fs.RootOwner, "self/net"), msrc, fs.Symlink, nil), "self": newSelf(ctx, pidns, msrc), "stat": seqfile.NewSeqFileInode(ctx, &statData{k}, msrc), "thread-self": newThreadSelf(ctx, pidns, msrc), @@ -86,7 +87,6 @@ func New(ctx context.Context, msrc *fs.MountSource, cgroupControllers map[string } // Add more contents that need proc to be initialized. - p.AddChild(ctx, "net", p.newNetDir(ctx, k, msrc)) p.AddChild(ctx, "sys", p.newSysDir(ctx, msrc)) return newProcInode(ctx, p, msrc, fs.SpecialDirectory, nil), nil diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index 4e9b0fc00..03cc788c8 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -84,6 +84,7 @@ func (p *proc) newTaskDir(t *kernel.Task, msrc *fs.MountSource, isThreadGroup bo "maps": newMaps(t, msrc), "mountinfo": seqfile.NewSeqFileInode(t, &mountInfoFile{t: t}, msrc), "mounts": seqfile.NewSeqFileInode(t, &mountsFile{t: t}, msrc), + "net": newNetDir(t, msrc), "ns": newNamespaceDir(t, msrc), "oom_score": newOOMScore(t, msrc), "oom_score_adj": newOOMScoreAdj(t, msrc), diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD index a83245866..bb609a305 100644 --- a/pkg/sentry/fsimpl/proc/BUILD +++ b/pkg/sentry/fsimpl/proc/BUILD @@ -9,9 +9,9 @@ go_library( "subtasks.go", "task.go", "task_files.go", + "task_net.go", "tasks.go", "tasks_files.go", - "tasks_net.go", "tasks_sys.go", ], visibility = ["//pkg/sentry:internal"], diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index c0d643f51..493acbd1b 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -57,6 +57,7 @@ func newTaskInode(inoGen InoGenerator, task *kernel.Task, pidns *kernel.PIDNames "maps": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &mapsData{task: task}), //"mountinfo": seqfile.NewSeqFileInode(t, &mountInfoFile{t: t}, msrc), //"mounts": seqfile.NewSeqFileInode(t, &mountsFile{t: t}, msrc), + "net": newTaskNetDir(task, inoGen), "ns": newTaskOwnedDir(task, inoGen.NextIno(), 0511, map[string]*kernfs.Dentry{ "net": newNamespaceSymlink(task, inoGen.NextIno(), "net"), "pid": newNamespaceSymlink(task, inoGen.NextIno(), "pid"), diff --git a/pkg/sentry/fsimpl/proc/task_net.go b/pkg/sentry/fsimpl/proc/task_net.go new file mode 100644 index 000000000..373a7b17d --- /dev/null +++ b/pkg/sentry/fsimpl/proc/task_net.go @@ -0,0 +1,790 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "bytes" + "fmt" + "io" + "reflect" + "time" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/socket" + "gvisor.dev/gvisor/pkg/sentry/socket/unix" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/usermem" +) + +func newTaskNetDir(task *kernel.Task, inoGen InoGenerator) *kernfs.Dentry { + k := task.Kernel() + pidns := task.PIDNamespace() + root := auth.NewRootCredentials(pidns.UserNamespace()) + + var contents map[string]*kernfs.Dentry + if stack := task.NetworkNamespace().Stack(); stack != nil { + const ( + arp = "IP address HW type Flags HW address Mask Device\n" + netlink = "sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode\n" + packet = "sk RefCnt Type Proto Iface R Rmem User Inode\n" + protocols = "protocol size sockets memory press maxhdr slab module cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n" + ptype = "Type Device Function\n" + upd6 = " sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode\n" + ) + psched := fmt.Sprintf("%08x %08x %08x %08x\n", uint64(time.Microsecond/time.Nanosecond), 64, 1000000, uint64(time.Second/time.Nanosecond)) + + // TODO(gvisor.dev/issue/1833): Make sure file contents reflect the task + // network namespace. + contents = map[string]*kernfs.Dentry{ + "dev": newDentry(root, inoGen.NextIno(), 0444, &netDevData{stack: stack}), + "snmp": newDentry(root, inoGen.NextIno(), 0444, &netSnmpData{stack: stack}), + + // The following files are simple stubs until they are implemented in + // netstack, if the file contains a header the stub is just the header + // otherwise it is an empty file. + "arp": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(arp)), + "netlink": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(netlink)), + "netstat": newDentry(root, inoGen.NextIno(), 0444, &netStatData{}), + "packet": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(packet)), + "protocols": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(protocols)), + + // Linux sets psched values to: nsec per usec, psched tick in ns, 1000000, + // high res timer ticks per sec (ClockGetres returns 1ns resolution). + "psched": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(psched)), + "ptype": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(ptype)), + "route": newDentry(root, inoGen.NextIno(), 0444, &netRouteData{stack: stack}), + "tcp": newDentry(root, inoGen.NextIno(), 0444, &netTCPData{kernel: k}), + "udp": newDentry(root, inoGen.NextIno(), 0444, &netUDPData{kernel: k}), + "unix": newDentry(root, inoGen.NextIno(), 0444, &netUnixData{kernel: k}), + } + + if stack.SupportsIPv6() { + contents["if_inet6"] = newDentry(root, inoGen.NextIno(), 0444, &ifinet6{stack: stack}) + contents["ipv6_route"] = newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")) + contents["tcp6"] = newDentry(root, inoGen.NextIno(), 0444, &netTCP6Data{kernel: k}) + contents["udp6"] = newDentry(root, inoGen.NextIno(), 0444, newStaticFile(upd6)) + } + } + + return newTaskOwnedDir(task, inoGen.NextIno(), 0555, contents) +} + +// ifinet6 implements vfs.DynamicBytesSource for /proc/net/if_inet6. +// +// +stateify savable +type ifinet6 struct { + kernfs.DynamicBytesFile + + stack inet.Stack +} + +var _ dynamicInode = (*ifinet6)(nil) + +func (n *ifinet6) contents() []string { + var lines []string + nics := n.stack.Interfaces() + for id, naddrs := range n.stack.InterfaceAddrs() { + nic, ok := nics[id] + if !ok { + // NIC was added after NICNames was called. We'll just ignore it. + continue + } + + for _, a := range naddrs { + // IPv6 only. + if a.Family != linux.AF_INET6 { + continue + } + + // Fields: + // IPv6 address displayed in 32 hexadecimal chars without colons + // Netlink device number (interface index) in hexadecimal (use nic id) + // Prefix length in hexadecimal + // Scope value (use 0) + // Interface flags + // Device name + lines = append(lines, fmt.Sprintf("%032x %02x %02x %02x %02x %8s\n", a.Addr, id, a.PrefixLen, 0, a.Flags, nic.Name)) + } + } + return lines +} + +// Generate implements vfs.DynamicBytesSource.Generate. +func (n *ifinet6) Generate(ctx context.Context, buf *bytes.Buffer) error { + for _, l := range n.contents() { + buf.WriteString(l) + } + return nil +} + +// netDevData implements vfs.DynamicBytesSource for /proc/net/dev. +// +// +stateify savable +type netDevData struct { + kernfs.DynamicBytesFile + + stack inet.Stack +} + +var _ dynamicInode = (*netDevData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (n *netDevData) Generate(ctx context.Context, buf *bytes.Buffer) error { + interfaces := n.stack.Interfaces() + buf.WriteString("Inter-| Receive | Transmit\n") + buf.WriteString(" face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n") + + for _, i := range interfaces { + // Implements the same format as + // net/core/net-procfs.c:dev_seq_printf_stats. + var stats inet.StatDev + if err := n.stack.Statistics(&stats, i.Name); err != nil { + log.Warningf("Failed to retrieve interface statistics for %v: %v", i.Name, err) + continue + } + fmt.Fprintf( + buf, + "%6s: %7d %7d %4d %4d %4d %5d %10d %9d %8d %7d %4d %4d %4d %5d %7d %10d\n", + i.Name, + // Received + stats[0], // bytes + stats[1], // packets + stats[2], // errors + stats[3], // dropped + stats[4], // fifo + stats[5], // frame + stats[6], // compressed + stats[7], // multicast + // Transmitted + stats[8], // bytes + stats[9], // packets + stats[10], // errors + stats[11], // dropped + stats[12], // fifo + stats[13], // frame + stats[14], // compressed + stats[15], // multicast + ) + } + + return nil +} + +// netUnixData implements vfs.DynamicBytesSource for /proc/net/unix. +// +// +stateify savable +type netUnixData struct { + kernfs.DynamicBytesFile + + kernel *kernel.Kernel +} + +var _ dynamicInode = (*netUnixData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (n *netUnixData) Generate(ctx context.Context, buf *bytes.Buffer) error { + buf.WriteString("Num RefCount Protocol Flags Type St Inode Path\n") + for _, se := range n.kernel.ListSockets() { + s := se.Sock.Get() + if s == nil { + log.Debugf("Couldn't resolve weakref %v in socket table, racing with destruction?", se.Sock) + continue + } + sfile := s.(*fs.File) + if family, _, _ := sfile.FileOperations.(socket.Socket).Type(); family != linux.AF_UNIX { + s.DecRef() + // Not a unix socket. + continue + } + sops := sfile.FileOperations.(*unix.SocketOperations) + + addr, err := sops.Endpoint().GetLocalAddress() + if err != nil { + log.Warningf("Failed to retrieve socket name from %+v: %v", sfile, err) + addr.Addr = "" + } + + sockFlags := 0 + if ce, ok := sops.Endpoint().(transport.ConnectingEndpoint); ok { + if ce.Listening() { + // For unix domain sockets, linux reports a single flag + // value if the socket is listening, of __SO_ACCEPTCON. + sockFlags = linux.SO_ACCEPTCON + } + } + + // In the socket entry below, the value for the 'Num' field requires + // some consideration. Linux prints the address to the struct + // unix_sock representing a socket in the kernel, but may redact the + // value for unprivileged users depending on the kptr_restrict + // sysctl. + // + // One use for this field is to allow a privileged user to + // introspect into the kernel memory to determine information about + // a socket not available through procfs, such as the socket's peer. + // + // In gvisor, returning a pointer to our internal structures would + // be pointless, as it wouldn't match the memory layout for struct + // unix_sock, making introspection difficult. We could populate a + // struct unix_sock with the appropriate data, but even that + // requires consideration for which kernel version to emulate, as + // the definition of this struct changes over time. + // + // For now, we always redact this pointer. + fmt.Fprintf(buf, "%#016p: %08X %08X %08X %04X %02X %5d", + (*unix.SocketOperations)(nil), // Num, pointer to kernel socket struct. + sfile.ReadRefs()-1, // RefCount, don't count our own ref. + 0, // Protocol, always 0 for UDS. + sockFlags, // Flags. + sops.Endpoint().Type(), // Type. + sops.State(), // State. + sfile.InodeID(), // Inode. + ) + + // Path + if len(addr.Addr) != 0 { + if addr.Addr[0] == 0 { + // Abstract path. + fmt.Fprintf(buf, " @%s", string(addr.Addr[1:])) + } else { + fmt.Fprintf(buf, " %s", string(addr.Addr)) + } + } + fmt.Fprintf(buf, "\n") + + s.DecRef() + } + return nil +} + +func networkToHost16(n uint16) uint16 { + // n is in network byte order, so is big-endian. The most-significant byte + // should be stored in the lower address. + // + // We manually inline binary.BigEndian.Uint16() because Go does not support + // non-primitive consts, so binary.BigEndian is a (mutable) var, so calls to + // binary.BigEndian.Uint16() require a read of binary.BigEndian and an + // interface method call, defeating inlining. + buf := [2]byte{byte(n >> 8 & 0xff), byte(n & 0xff)} + return usermem.ByteOrder.Uint16(buf[:]) +} + +func writeInetAddr(w io.Writer, family int, i linux.SockAddr) { + switch family { + case linux.AF_INET: + var a linux.SockAddrInet + if i != nil { + a = *i.(*linux.SockAddrInet) + } + + // linux.SockAddrInet.Port is stored in the network byte order and is + // printed like a number in host byte order. Note that all numbers in host + // byte order are printed with the most-significant byte first when + // formatted with %X. See get_tcp4_sock() and udp4_format_sock() in Linux. + port := networkToHost16(a.Port) + + // linux.SockAddrInet.Addr is stored as a byte slice in big-endian order + // (i.e. most-significant byte in index 0). Linux represents this as a + // __be32 which is a typedef for an unsigned int, and is printed with + // %X. This means that for a little-endian machine, Linux prints the + // least-significant byte of the address first. To emulate this, we first + // invert the byte order for the address using usermem.ByteOrder.Uint32, + // which makes it have the equivalent encoding to a __be32 on a little + // endian machine. Note that this operation is a no-op on a big endian + // machine. Then similar to Linux, we format it with %X, which will print + // the most-significant byte of the __be32 address first, which is now + // actually the least-significant byte of the original address in + // linux.SockAddrInet.Addr on little endian machines, due to the conversion. + addr := usermem.ByteOrder.Uint32(a.Addr[:]) + + fmt.Fprintf(w, "%08X:%04X ", addr, port) + case linux.AF_INET6: + var a linux.SockAddrInet6 + if i != nil { + a = *i.(*linux.SockAddrInet6) + } + + port := networkToHost16(a.Port) + addr0 := usermem.ByteOrder.Uint32(a.Addr[0:4]) + addr1 := usermem.ByteOrder.Uint32(a.Addr[4:8]) + addr2 := usermem.ByteOrder.Uint32(a.Addr[8:12]) + addr3 := usermem.ByteOrder.Uint32(a.Addr[12:16]) + fmt.Fprintf(w, "%08X%08X%08X%08X:%04X ", addr0, addr1, addr2, addr3, port) + } +} + +func commonGenerateTCP(ctx context.Context, buf *bytes.Buffer, k *kernel.Kernel, family int) error { + // t may be nil here if our caller is not part of a task goroutine. This can + // happen for example if we're here for "sentryctl cat". When t is nil, + // degrade gracefully and retrieve what we can. + t := kernel.TaskFromContext(ctx) + + for _, se := range k.ListSockets() { + s := se.Sock.Get() + if s == nil { + log.Debugf("Couldn't resolve weakref with ID %v in socket table, racing with destruction?", se.ID) + continue + } + sfile := s.(*fs.File) + sops, ok := sfile.FileOperations.(socket.Socket) + if !ok { + panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile)) + } + if fa, stype, _ := sops.Type(); !(family == fa && stype == linux.SOCK_STREAM) { + s.DecRef() + // Not tcp4 sockets. + continue + } + + // Linux's documentation for the fields below can be found at + // https://www.kernel.org/doc/Documentation/networking/proc_net_tcp.txt. + // For Linux's implementation, see net/ipv4/tcp_ipv4.c:get_tcp4_sock(). + // Note that the header doesn't contain labels for all the fields. + + // Field: sl; entry number. + fmt.Fprintf(buf, "%4d: ", se.ID) + + // Field: local_adddress. + var localAddr linux.SockAddr + if t != nil { + if local, _, err := sops.GetSockName(t); err == nil { + localAddr = local + } + } + writeInetAddr(buf, family, localAddr) + + // Field: rem_address. + var remoteAddr linux.SockAddr + if t != nil { + if remote, _, err := sops.GetPeerName(t); err == nil { + remoteAddr = remote + } + } + writeInetAddr(buf, family, remoteAddr) + + // Field: state; socket state. + fmt.Fprintf(buf, "%02X ", sops.State()) + + // Field: tx_queue, rx_queue; number of packets in the transmit and + // receive queue. Unimplemented. + fmt.Fprintf(buf, "%08X:%08X ", 0, 0) + + // Field: tr, tm->when; timer active state and number of jiffies + // until timer expires. Unimplemented. + fmt.Fprintf(buf, "%02X:%08X ", 0, 0) + + // Field: retrnsmt; number of unrecovered RTO timeouts. + // Unimplemented. + fmt.Fprintf(buf, "%08X ", 0) + + // Field: uid. + uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx) + if err != nil { + log.Warningf("Failed to retrieve unstable attr for socket file: %v", err) + fmt.Fprintf(buf, "%5d ", 0) + } else { + creds := auth.CredentialsFromContext(ctx) + fmt.Fprintf(buf, "%5d ", uint32(uattr.Owner.UID.In(creds.UserNamespace).OrOverflow())) + } + + // Field: timeout; number of unanswered 0-window probes. + // Unimplemented. + fmt.Fprintf(buf, "%8d ", 0) + + // Field: inode. + fmt.Fprintf(buf, "%8d ", sfile.InodeID()) + + // Field: refcount. Don't count the ref we obtain while deferencing + // the weakref to this socket. + fmt.Fprintf(buf, "%d ", sfile.ReadRefs()-1) + + // Field: Socket struct address. Redacted due to the same reason as + // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData. + fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil)) + + // Field: retransmit timeout. Unimplemented. + fmt.Fprintf(buf, "%d ", 0) + + // Field: predicted tick of soft clock (delayed ACK control data). + // Unimplemented. + fmt.Fprintf(buf, "%d ", 0) + + // Field: (ack.quick<<1)|ack.pingpong, Unimplemented. + fmt.Fprintf(buf, "%d ", 0) + + // Field: sending congestion window, Unimplemented. + fmt.Fprintf(buf, "%d ", 0) + + // Field: Slow start size threshold, -1 if threshold >= 0xFFFF. + // Unimplemented, report as large threshold. + fmt.Fprintf(buf, "%d", -1) + + fmt.Fprintf(buf, "\n") + + s.DecRef() + } + + return nil +} + +// netTCPData implements vfs.DynamicBytesSource for /proc/net/tcp. +// +// +stateify savable +type netTCPData struct { + kernfs.DynamicBytesFile + + kernel *kernel.Kernel +} + +var _ dynamicInode = (*netTCPData)(nil) + +func (d *netTCPData) Generate(ctx context.Context, buf *bytes.Buffer) error { + buf.WriteString(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode \n") + return commonGenerateTCP(ctx, buf, d.kernel, linux.AF_INET) +} + +// netTCP6Data implements vfs.DynamicBytesSource for /proc/net/tcp6. +// +// +stateify savable +type netTCP6Data struct { + kernfs.DynamicBytesFile + + kernel *kernel.Kernel +} + +var _ dynamicInode = (*netTCP6Data)(nil) + +func (d *netTCP6Data) Generate(ctx context.Context, buf *bytes.Buffer) error { + buf.WriteString(" sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode\n") + return commonGenerateTCP(ctx, buf, d.kernel, linux.AF_INET6) +} + +// netUDPData implements vfs.DynamicBytesSource for /proc/net/udp. +// +// +stateify savable +type netUDPData struct { + kernfs.DynamicBytesFile + + kernel *kernel.Kernel +} + +var _ dynamicInode = (*netUDPData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (d *netUDPData) Generate(ctx context.Context, buf *bytes.Buffer) error { + // t may be nil here if our caller is not part of a task goroutine. This can + // happen for example if we're here for "sentryctl cat". When t is nil, + // degrade gracefully and retrieve what we can. + t := kernel.TaskFromContext(ctx) + + for _, se := range d.kernel.ListSockets() { + s := se.Sock.Get() + if s == nil { + log.Debugf("Couldn't resolve weakref with ID %v in socket table, racing with destruction?", se.ID) + continue + } + sfile := s.(*fs.File) + sops, ok := sfile.FileOperations.(socket.Socket) + if !ok { + panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile)) + } + if family, stype, _ := sops.Type(); family != linux.AF_INET || stype != linux.SOCK_DGRAM { + s.DecRef() + // Not udp4 socket. + continue + } + + // For Linux's implementation, see net/ipv4/udp.c:udp4_format_sock(). + + // Field: sl; entry number. + fmt.Fprintf(buf, "%5d: ", se.ID) + + // Field: local_adddress. + var localAddr linux.SockAddrInet + if t != nil { + if local, _, err := sops.GetSockName(t); err == nil { + localAddr = *local.(*linux.SockAddrInet) + } + } + writeInetAddr(buf, linux.AF_INET, &localAddr) + + // Field: rem_address. + var remoteAddr linux.SockAddrInet + if t != nil { + if remote, _, err := sops.GetPeerName(t); err == nil { + remoteAddr = *remote.(*linux.SockAddrInet) + } + } + writeInetAddr(buf, linux.AF_INET, &remoteAddr) + + // Field: state; socket state. + fmt.Fprintf(buf, "%02X ", sops.State()) + + // Field: tx_queue, rx_queue; number of packets in the transmit and + // receive queue. Unimplemented. + fmt.Fprintf(buf, "%08X:%08X ", 0, 0) + + // Field: tr, tm->when. Always 0 for UDP. + fmt.Fprintf(buf, "%02X:%08X ", 0, 0) + + // Field: retrnsmt. Always 0 for UDP. + fmt.Fprintf(buf, "%08X ", 0) + + // Field: uid. + uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx) + if err != nil { + log.Warningf("Failed to retrieve unstable attr for socket file: %v", err) + fmt.Fprintf(buf, "%5d ", 0) + } else { + creds := auth.CredentialsFromContext(ctx) + fmt.Fprintf(buf, "%5d ", uint32(uattr.Owner.UID.In(creds.UserNamespace).OrOverflow())) + } + + // Field: timeout. Always 0 for UDP. + fmt.Fprintf(buf, "%8d ", 0) + + // Field: inode. + fmt.Fprintf(buf, "%8d ", sfile.InodeID()) + + // Field: ref; reference count on the socket inode. Don't count the ref + // we obtain while deferencing the weakref to this socket. + fmt.Fprintf(buf, "%d ", sfile.ReadRefs()-1) + + // Field: Socket struct address. Redacted due to the same reason as + // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData. + fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil)) + + // Field: drops; number of dropped packets. Unimplemented. + fmt.Fprintf(buf, "%d", 0) + + fmt.Fprintf(buf, "\n") + + s.DecRef() + } + return nil +} + +// netSnmpData implements vfs.DynamicBytesSource for /proc/net/snmp. +// +// +stateify savable +type netSnmpData struct { + kernfs.DynamicBytesFile + + stack inet.Stack +} + +var _ dynamicInode = (*netSnmpData)(nil) + +type snmpLine struct { + prefix string + header string +} + +var snmp = []snmpLine{ + { + prefix: "Ip", + header: "Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqds ReasmOKs ReasmFails FragOKs FragFails FragCreates", + }, + { + prefix: "Icmp", + header: "InMsgs InErrors InCsumErrors InDestUnreachs InTimeExcds InParmProbs InSrcQuenchs InRedirects InEchos InEchoReps InTimestamps InTimestampReps InAddrMasks InAddrMaskReps OutMsgs OutErrors OutDestUnreachs OutTimeExcds OutParmProbs OutSrcQuenchs OutRedirects OutEchos OutEchoReps OutTimestamps OutTimestampReps OutAddrMasks OutAddrMaskReps", + }, + { + prefix: "IcmpMsg", + }, + { + prefix: "Tcp", + header: "RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs InErrs OutRsts InCsumErrors", + }, + { + prefix: "Udp", + header: "InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti", + }, + { + prefix: "UdpLite", + header: "InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti", + }, +} + +func toSlice(a interface{}) []uint64 { + v := reflect.Indirect(reflect.ValueOf(a)) + return v.Slice(0, v.Len()).Interface().([]uint64) +} + +func sprintSlice(s []uint64) string { + if len(s) == 0 { + return "" + } + r := fmt.Sprint(s) + return r[1 : len(r)-1] // Remove "[]" introduced by fmt of slice. +} + +// Generate implements vfs.DynamicBytesSource. +func (d *netSnmpData) Generate(ctx context.Context, buf *bytes.Buffer) error { + types := []interface{}{ + &inet.StatSNMPIP{}, + &inet.StatSNMPICMP{}, + nil, // TODO(gvisor.dev/issue/628): Support IcmpMsg stats. + &inet.StatSNMPTCP{}, + &inet.StatSNMPUDP{}, + &inet.StatSNMPUDPLite{}, + } + for i, stat := range types { + line := snmp[i] + if stat == nil { + fmt.Fprintf(buf, "%s:\n", line.prefix) + fmt.Fprintf(buf, "%s:\n", line.prefix) + continue + } + if err := d.stack.Statistics(stat, line.prefix); err != nil { + if err == syserror.EOPNOTSUPP { + log.Infof("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err) + } else { + log.Warningf("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err) + } + } + + fmt.Fprintf(buf, "%s: %s\n", line.prefix, line.header) + + if line.prefix == "Tcp" { + tcp := stat.(*inet.StatSNMPTCP) + // "Tcp" needs special processing because MaxConn is signed. RFC 2012. + fmt.Sprintf("%s: %s %d %s\n", line.prefix, sprintSlice(tcp[:3]), int64(tcp[3]), sprintSlice(tcp[4:])) + } else { + fmt.Sprintf("%s: %s\n", line.prefix, sprintSlice(toSlice(stat))) + } + } + return nil +} + +// netRouteData implements vfs.DynamicBytesSource for /proc/net/route. +// +// +stateify savable +type netRouteData struct { + kernfs.DynamicBytesFile + + stack inet.Stack +} + +var _ dynamicInode = (*netRouteData)(nil) + +// Generate implements vfs.DynamicBytesSource. +// See Linux's net/ipv4/fib_trie.c:fib_route_seq_show. +func (d *netRouteData) Generate(ctx context.Context, buf *bytes.Buffer) error { + fmt.Fprintf(buf, "%-127s\n", "Iface\tDestination\tGateway\tFlags\tRefCnt\tUse\tMetric\tMask\tMTU\tWindow\tIRTT") + + interfaces := d.stack.Interfaces() + for _, rt := range d.stack.RouteTable() { + // /proc/net/route only includes ipv4 routes. + if rt.Family != linux.AF_INET { + continue + } + + // /proc/net/route does not include broadcast or multicast routes. + if rt.Type == linux.RTN_BROADCAST || rt.Type == linux.RTN_MULTICAST { + continue + } + + iface, ok := interfaces[rt.OutputInterface] + if !ok || iface.Name == "lo" { + continue + } + + var ( + gw uint32 + prefix uint32 + flags = linux.RTF_UP + ) + if len(rt.GatewayAddr) == header.IPv4AddressSize { + flags |= linux.RTF_GATEWAY + gw = usermem.ByteOrder.Uint32(rt.GatewayAddr) + } + if len(rt.DstAddr) == header.IPv4AddressSize { + prefix = usermem.ByteOrder.Uint32(rt.DstAddr) + } + l := fmt.Sprintf( + "%s\t%08X\t%08X\t%04X\t%d\t%d\t%d\t%08X\t%d\t%d\t%d", + iface.Name, + prefix, + gw, + flags, + 0, // RefCnt. + 0, // Use. + 0, // Metric. + (uint32(1)<> 8 & 0xff), byte(n & 0xff)} - return usermem.ByteOrder.Uint16(buf[:]) -} - -func writeInetAddr(w io.Writer, family int, i linux.SockAddr) { - switch family { - case linux.AF_INET: - var a linux.SockAddrInet - if i != nil { - a = *i.(*linux.SockAddrInet) - } - - // linux.SockAddrInet.Port is stored in the network byte order and is - // printed like a number in host byte order. Note that all numbers in host - // byte order are printed with the most-significant byte first when - // formatted with %X. See get_tcp4_sock() and udp4_format_sock() in Linux. - port := networkToHost16(a.Port) - - // linux.SockAddrInet.Addr is stored as a byte slice in big-endian order - // (i.e. most-significant byte in index 0). Linux represents this as a - // __be32 which is a typedef for an unsigned int, and is printed with - // %X. This means that for a little-endian machine, Linux prints the - // least-significant byte of the address first. To emulate this, we first - // invert the byte order for the address using usermem.ByteOrder.Uint32, - // which makes it have the equivalent encoding to a __be32 on a little - // endian machine. Note that this operation is a no-op on a big endian - // machine. Then similar to Linux, we format it with %X, which will print - // the most-significant byte of the __be32 address first, which is now - // actually the least-significant byte of the original address in - // linux.SockAddrInet.Addr on little endian machines, due to the conversion. - addr := usermem.ByteOrder.Uint32(a.Addr[:]) - - fmt.Fprintf(w, "%08X:%04X ", addr, port) - case linux.AF_INET6: - var a linux.SockAddrInet6 - if i != nil { - a = *i.(*linux.SockAddrInet6) - } - - port := networkToHost16(a.Port) - addr0 := usermem.ByteOrder.Uint32(a.Addr[0:4]) - addr1 := usermem.ByteOrder.Uint32(a.Addr[4:8]) - addr2 := usermem.ByteOrder.Uint32(a.Addr[8:12]) - addr3 := usermem.ByteOrder.Uint32(a.Addr[12:16]) - fmt.Fprintf(w, "%08X%08X%08X%08X:%04X ", addr0, addr1, addr2, addr3, port) - } -} - -func commonGenerateTCP(ctx context.Context, buf *bytes.Buffer, k *kernel.Kernel, family int) error { - // t may be nil here if our caller is not part of a task goroutine. This can - // happen for example if we're here for "sentryctl cat". When t is nil, - // degrade gracefully and retrieve what we can. - t := kernel.TaskFromContext(ctx) - - for _, se := range k.ListSockets() { - s := se.Sock.Get() - if s == nil { - log.Debugf("Couldn't resolve weakref with ID %v in socket table, racing with destruction?", se.ID) - continue - } - sfile := s.(*fs.File) - sops, ok := sfile.FileOperations.(socket.Socket) - if !ok { - panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile)) - } - if fa, stype, _ := sops.Type(); !(family == fa && stype == linux.SOCK_STREAM) { - s.DecRef() - // Not tcp4 sockets. - continue - } - - // Linux's documentation for the fields below can be found at - // https://www.kernel.org/doc/Documentation/networking/proc_net_tcp.txt. - // For Linux's implementation, see net/ipv4/tcp_ipv4.c:get_tcp4_sock(). - // Note that the header doesn't contain labels for all the fields. - - // Field: sl; entry number. - fmt.Fprintf(buf, "%4d: ", se.ID) - - // Field: local_adddress. - var localAddr linux.SockAddr - if t != nil { - if local, _, err := sops.GetSockName(t); err == nil { - localAddr = local - } - } - writeInetAddr(buf, family, localAddr) - - // Field: rem_address. - var remoteAddr linux.SockAddr - if t != nil { - if remote, _, err := sops.GetPeerName(t); err == nil { - remoteAddr = remote - } - } - writeInetAddr(buf, family, remoteAddr) - - // Field: state; socket state. - fmt.Fprintf(buf, "%02X ", sops.State()) - - // Field: tx_queue, rx_queue; number of packets in the transmit and - // receive queue. Unimplemented. - fmt.Fprintf(buf, "%08X:%08X ", 0, 0) - - // Field: tr, tm->when; timer active state and number of jiffies - // until timer expires. Unimplemented. - fmt.Fprintf(buf, "%02X:%08X ", 0, 0) - - // Field: retrnsmt; number of unrecovered RTO timeouts. - // Unimplemented. - fmt.Fprintf(buf, "%08X ", 0) - - // Field: uid. - uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx) - if err != nil { - log.Warningf("Failed to retrieve unstable attr for socket file: %v", err) - fmt.Fprintf(buf, "%5d ", 0) - } else { - creds := auth.CredentialsFromContext(ctx) - fmt.Fprintf(buf, "%5d ", uint32(uattr.Owner.UID.In(creds.UserNamespace).OrOverflow())) - } - - // Field: timeout; number of unanswered 0-window probes. - // Unimplemented. - fmt.Fprintf(buf, "%8d ", 0) - - // Field: inode. - fmt.Fprintf(buf, "%8d ", sfile.InodeID()) - - // Field: refcount. Don't count the ref we obtain while deferencing - // the weakref to this socket. - fmt.Fprintf(buf, "%d ", sfile.ReadRefs()-1) - - // Field: Socket struct address. Redacted due to the same reason as - // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData. - fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil)) - - // Field: retransmit timeout. Unimplemented. - fmt.Fprintf(buf, "%d ", 0) - - // Field: predicted tick of soft clock (delayed ACK control data). - // Unimplemented. - fmt.Fprintf(buf, "%d ", 0) - - // Field: (ack.quick<<1)|ack.pingpong, Unimplemented. - fmt.Fprintf(buf, "%d ", 0) - - // Field: sending congestion window, Unimplemented. - fmt.Fprintf(buf, "%d ", 0) - - // Field: Slow start size threshold, -1 if threshold >= 0xFFFF. - // Unimplemented, report as large threshold. - fmt.Fprintf(buf, "%d", -1) - - fmt.Fprintf(buf, "\n") - - s.DecRef() - } - - return nil -} - -// netTCPData implements vfs.DynamicBytesSource for /proc/net/tcp. -// -// +stateify savable -type netTCPData struct { - kernfs.DynamicBytesFile - - kernel *kernel.Kernel -} - -var _ dynamicInode = (*netTCPData)(nil) - -func (d *netTCPData) Generate(ctx context.Context, buf *bytes.Buffer) error { - buf.WriteString(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode \n") - return commonGenerateTCP(ctx, buf, d.kernel, linux.AF_INET) -} - -// netTCP6Data implements vfs.DynamicBytesSource for /proc/net/tcp6. -// -// +stateify savable -type netTCP6Data struct { - kernfs.DynamicBytesFile - - kernel *kernel.Kernel -} - -var _ dynamicInode = (*netTCP6Data)(nil) - -func (d *netTCP6Data) Generate(ctx context.Context, buf *bytes.Buffer) error { - buf.WriteString(" sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode\n") - return commonGenerateTCP(ctx, buf, d.kernel, linux.AF_INET6) -} - -// netUDPData implements vfs.DynamicBytesSource for /proc/net/udp. -// -// +stateify savable -type netUDPData struct { - kernfs.DynamicBytesFile - - kernel *kernel.Kernel -} - -var _ dynamicInode = (*netUDPData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (d *netUDPData) Generate(ctx context.Context, buf *bytes.Buffer) error { - // t may be nil here if our caller is not part of a task goroutine. This can - // happen for example if we're here for "sentryctl cat". When t is nil, - // degrade gracefully and retrieve what we can. - t := kernel.TaskFromContext(ctx) - - for _, se := range d.kernel.ListSockets() { - s := se.Sock.Get() - if s == nil { - log.Debugf("Couldn't resolve weakref with ID %v in socket table, racing with destruction?", se.ID) - continue - } - sfile := s.(*fs.File) - sops, ok := sfile.FileOperations.(socket.Socket) - if !ok { - panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile)) - } - if family, stype, _ := sops.Type(); family != linux.AF_INET || stype != linux.SOCK_DGRAM { - s.DecRef() - // Not udp4 socket. - continue - } - - // For Linux's implementation, see net/ipv4/udp.c:udp4_format_sock(). - - // Field: sl; entry number. - fmt.Fprintf(buf, "%5d: ", se.ID) - - // Field: local_adddress. - var localAddr linux.SockAddrInet - if t != nil { - if local, _, err := sops.GetSockName(t); err == nil { - localAddr = *local.(*linux.SockAddrInet) - } - } - writeInetAddr(buf, linux.AF_INET, &localAddr) - - // Field: rem_address. - var remoteAddr linux.SockAddrInet - if t != nil { - if remote, _, err := sops.GetPeerName(t); err == nil { - remoteAddr = *remote.(*linux.SockAddrInet) - } - } - writeInetAddr(buf, linux.AF_INET, &remoteAddr) - - // Field: state; socket state. - fmt.Fprintf(buf, "%02X ", sops.State()) - - // Field: tx_queue, rx_queue; number of packets in the transmit and - // receive queue. Unimplemented. - fmt.Fprintf(buf, "%08X:%08X ", 0, 0) - - // Field: tr, tm->when. Always 0 for UDP. - fmt.Fprintf(buf, "%02X:%08X ", 0, 0) - - // Field: retrnsmt. Always 0 for UDP. - fmt.Fprintf(buf, "%08X ", 0) - - // Field: uid. - uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx) - if err != nil { - log.Warningf("Failed to retrieve unstable attr for socket file: %v", err) - fmt.Fprintf(buf, "%5d ", 0) - } else { - creds := auth.CredentialsFromContext(ctx) - fmt.Fprintf(buf, "%5d ", uint32(uattr.Owner.UID.In(creds.UserNamespace).OrOverflow())) - } - - // Field: timeout. Always 0 for UDP. - fmt.Fprintf(buf, "%8d ", 0) - - // Field: inode. - fmt.Fprintf(buf, "%8d ", sfile.InodeID()) - - // Field: ref; reference count on the socket inode. Don't count the ref - // we obtain while deferencing the weakref to this socket. - fmt.Fprintf(buf, "%d ", sfile.ReadRefs()-1) - - // Field: Socket struct address. Redacted due to the same reason as - // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData. - fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil)) - - // Field: drops; number of dropped packets. Unimplemented. - fmt.Fprintf(buf, "%d", 0) - - fmt.Fprintf(buf, "\n") - - s.DecRef() - } - return nil -} - -// netSnmpData implements vfs.DynamicBytesSource for /proc/net/snmp. -// -// +stateify savable -type netSnmpData struct { - kernfs.DynamicBytesFile - - stack inet.Stack -} - -var _ dynamicInode = (*netSnmpData)(nil) - -type snmpLine struct { - prefix string - header string -} - -var snmp = []snmpLine{ - { - prefix: "Ip", - header: "Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqds ReasmOKs ReasmFails FragOKs FragFails FragCreates", - }, - { - prefix: "Icmp", - header: "InMsgs InErrors InCsumErrors InDestUnreachs InTimeExcds InParmProbs InSrcQuenchs InRedirects InEchos InEchoReps InTimestamps InTimestampReps InAddrMasks InAddrMaskReps OutMsgs OutErrors OutDestUnreachs OutTimeExcds OutParmProbs OutSrcQuenchs OutRedirects OutEchos OutEchoReps OutTimestamps OutTimestampReps OutAddrMasks OutAddrMaskReps", - }, - { - prefix: "IcmpMsg", - }, - { - prefix: "Tcp", - header: "RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs InErrs OutRsts InCsumErrors", - }, - { - prefix: "Udp", - header: "InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti", - }, - { - prefix: "UdpLite", - header: "InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti", - }, -} - -func toSlice(a interface{}) []uint64 { - v := reflect.Indirect(reflect.ValueOf(a)) - return v.Slice(0, v.Len()).Interface().([]uint64) -} - -func sprintSlice(s []uint64) string { - if len(s) == 0 { - return "" - } - r := fmt.Sprint(s) - return r[1 : len(r)-1] // Remove "[]" introduced by fmt of slice. -} - -// Generate implements vfs.DynamicBytesSource. -func (d *netSnmpData) Generate(ctx context.Context, buf *bytes.Buffer) error { - types := []interface{}{ - &inet.StatSNMPIP{}, - &inet.StatSNMPICMP{}, - nil, // TODO(gvisor.dev/issue/628): Support IcmpMsg stats. - &inet.StatSNMPTCP{}, - &inet.StatSNMPUDP{}, - &inet.StatSNMPUDPLite{}, - } - for i, stat := range types { - line := snmp[i] - if stat == nil { - fmt.Fprintf(buf, "%s:\n", line.prefix) - fmt.Fprintf(buf, "%s:\n", line.prefix) - continue - } - if err := d.stack.Statistics(stat, line.prefix); err != nil { - if err == syserror.EOPNOTSUPP { - log.Infof("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err) - } else { - log.Warningf("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err) - } - } - - fmt.Fprintf(buf, "%s: %s\n", line.prefix, line.header) - - if line.prefix == "Tcp" { - tcp := stat.(*inet.StatSNMPTCP) - // "Tcp" needs special processing because MaxConn is signed. RFC 2012. - fmt.Sprintf("%s: %s %d %s\n", line.prefix, sprintSlice(tcp[:3]), int64(tcp[3]), sprintSlice(tcp[4:])) - } else { - fmt.Sprintf("%s: %s\n", line.prefix, sprintSlice(toSlice(stat))) - } - } - return nil -} - -// netRouteData implements vfs.DynamicBytesSource for /proc/net/route. -// -// +stateify savable -type netRouteData struct { - kernfs.DynamicBytesFile - - stack inet.Stack -} - -var _ dynamicInode = (*netRouteData)(nil) - -// Generate implements vfs.DynamicBytesSource. -// See Linux's net/ipv4/fib_trie.c:fib_route_seq_show. -func (d *netRouteData) Generate(ctx context.Context, buf *bytes.Buffer) error { - fmt.Fprintf(buf, "%-127s\n", "Iface\tDestination\tGateway\tFlags\tRefCnt\tUse\tMetric\tMask\tMTU\tWindow\tIRTT") - - interfaces := d.stack.Interfaces() - for _, rt := range d.stack.RouteTable() { - // /proc/net/route only includes ipv4 routes. - if rt.Family != linux.AF_INET { - continue - } - - // /proc/net/route does not include broadcast or multicast routes. - if rt.Type == linux.RTN_BROADCAST || rt.Type == linux.RTN_MULTICAST { - continue - } - - iface, ok := interfaces[rt.OutputInterface] - if !ok || iface.Name == "lo" { - continue - } - - var ( - gw uint32 - prefix uint32 - flags = linux.RTF_UP - ) - if len(rt.GatewayAddr) == header.IPv4AddressSize { - flags |= linux.RTF_GATEWAY - gw = usermem.ByteOrder.Uint32(rt.GatewayAddr) - } - if len(rt.DstAddr) == header.IPv4AddressSize { - prefix = usermem.ByteOrder.Uint32(rt.DstAddr) - } - l := fmt.Sprintf( - "%s\t%08X\t%08X\t%04X\t%d\t%d\t%d\t%08X\t%d\t%d\t%d", - iface.Name, - prefix, - gw, - flags, - 0, // RefCnt. - 0, // Use. - 0, // Metric. - (uint32(1)< Date: Tue, 10 Mar 2020 13:58:27 -0700 Subject: Make checkpoint/restore e2e test less flaky PiperOrigin-RevId: 300171916 --- test/e2e/integration_test.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go index 28064e557..cc4fbbaed 100644 --- a/test/e2e/integration_test.go +++ b/test/e2e/integration_test.go @@ -175,10 +175,8 @@ func TestCheckpointRestore(t *testing.T) { t.Fatal(err) } - // TODO(b/143498576): Remove after github.com/moby/moby/issues/38963 is fixed. - time.Sleep(1 * time.Second) - - if err := d.Restore("test"); err != nil { + // TODO(b/143498576): Remove Poll after github.com/moby/moby/issues/38963 is fixed. + if err := testutil.Poll(func() error { return d.Restore("test") }, 15*time.Second); err != nil { t.Fatal("docker restore failed:", err) } -- cgit v1.2.3 From 4054b021f05cb0902e9877ba82403978fd8d6405 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Mon, 9 Mar 2020 17:40:13 -0700 Subject: iptables: ready tests to be enabled in kokoro Fixed flakes (tested via --runs_per_test=100) and added skips for not-yet-implemented features. Once submitted, the iptables tests will be ready to enable in kokoro. --- scripts/iptables_tests.sh | 11 +++++++---- test/iptables/filter_input.go | 13 +++++++------ test/iptables/filter_output.go | 10 ++++++---- test/iptables/iptables_test.go | 6 ++++++ test/iptables/iptables_util.go | 13 +++++-------- test/iptables/nat.go | 2 +- 6 files changed, 32 insertions(+), 23 deletions(-) (limited to 'test') diff --git a/scripts/iptables_tests.sh b/scripts/iptables_tests.sh index 3069d8628..b4a5211a5 100755 --- a/scripts/iptables_tests.sh +++ b/scripts/iptables_tests.sh @@ -19,9 +19,12 @@ source $(dirname $0)/common.sh install_runsc_for_test iptables # Build the docker image for the test. -run //test/iptables/runner-image --norun +run //test/iptables/runner:runner-image --norun -# TODO(gvisor.dev/issue/170): Also test this on runsc once iptables are better -# supported -test //test/iptables:iptables_test "--test_arg=--runtime=runc" \ +test //test/iptables:iptables_test \ + "--test_arg=--runtime=runc" \ + "--test_arg=--image=bazel/test/iptables/runner:runner-image" + +test //test/iptables:iptables_test \ + "--test_arg=--runtime=runsc" \ "--test_arg=--image=bazel/test/iptables/runner:runner-image" diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index b2fb6401a..141d20fbb 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -106,7 +106,7 @@ func (FilterInputDropOnlyUDP) ContainerAction(ip net.IP) error { func (FilterInputDropOnlyUDP) LocalAction(ip net.IP) error { // Try to establish a TCP connection with the container, which should // succeed. - return connectTCP(ip, acceptPort, dropPort, sendloopDuration) + return connectTCP(ip, acceptPort, sendloopDuration) } // FilterInputDropUDPPort tests that we can drop UDP traffic by port. @@ -192,7 +192,7 @@ func (FilterInputDropTCPDestPort) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterInputDropTCPDestPort) LocalAction(ip net.IP) error { - if err := connectTCP(ip, dropPort, acceptPort, sendloopDuration); err == nil { + if err := connectTCP(ip, dropPort, sendloopDuration); err == nil { return fmt.Errorf("connection destined to port %d should not be accepted, but got accepted", dropPort) } @@ -209,13 +209,14 @@ func (FilterInputDropTCPSrcPort) Name() string { // ContainerAction implements TestCase.ContainerAction. func (FilterInputDropTCPSrcPort) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "INPUT", "-p", "tcp", "-m", "tcp", "--sport", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { + // Drop anything from an ephemeral port. + if err := filterTable("-A", "INPUT", "-p", "tcp", "-m", "tcp", "--sport", "1024:65535", "-j", "DROP"); err != nil { return err } // Listen for TCP packets on accept port. if err := listenTCP(acceptPort, sendloopDuration); err == nil { - return fmt.Errorf("connection destined to port %d should not be accepted, but got accepted", dropPort) + return fmt.Errorf("connection destined to port %d should not be accepted, but was", dropPort) } return nil @@ -223,8 +224,8 @@ func (FilterInputDropTCPSrcPort) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterInputDropTCPSrcPort) LocalAction(ip net.IP) error { - if err := connectTCP(ip, acceptPort, dropPort, sendloopDuration); err == nil { - return fmt.Errorf("connection on port %d should not be acceptedi, but got accepted", dropPort) + if err := connectTCP(ip, acceptPort, sendloopDuration); err == nil { + return fmt.Errorf("connection should not be accepted, but was") } return nil diff --git a/test/iptables/filter_output.go b/test/iptables/filter_output.go index ee2c49f9a..1314a5a92 100644 --- a/test/iptables/filter_output.go +++ b/test/iptables/filter_output.go @@ -24,7 +24,8 @@ func init() { RegisterTestCase(FilterOutputDropTCPSrcPort{}) } -// FilterOutputDropTCPDestPort tests that connections are not accepted on specified source ports. +// FilterOutputDropTCPDestPort tests that connections are not accepted on +// specified source ports. type FilterOutputDropTCPDestPort struct{} // Name implements TestCase.Name. @@ -48,14 +49,15 @@ func (FilterOutputDropTCPDestPort) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterOutputDropTCPDestPort) LocalAction(ip net.IP) error { - if err := connectTCP(ip, acceptPort, dropPort, sendloopDuration); err == nil { + if err := connectTCP(ip, acceptPort, sendloopDuration); err == nil { return fmt.Errorf("connection on port %d should not be accepted, but got accepted", dropPort) } return nil } -// FilterOutputDropTCPSrcPort tests that connections are not accepted on specified source ports. +// FilterOutputDropTCPSrcPort tests that connections are not accepted on +// specified source ports. type FilterOutputDropTCPSrcPort struct{} // Name implements TestCase.Name. @@ -79,7 +81,7 @@ func (FilterOutputDropTCPSrcPort) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterOutputDropTCPSrcPort) LocalAction(ip net.IP) error { - if err := connectTCP(ip, dropPort, acceptPort, sendloopDuration); err == nil { + if err := connectTCP(ip, dropPort, sendloopDuration); err == nil { return fmt.Errorf("connection destined to port %d should not be accepted, but got accepted", dropPort) } diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 29ad5932d..56ba78107 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -191,24 +191,28 @@ func TestFilterInputDropOnlyUDP(t *testing.T) { } func TestNATRedirectUDPPort(t *testing.T) { + t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATRedirectUDPPort{}); err != nil { t.Fatal(err) } } func TestNATRedirectTCPPort(t *testing.T) { + t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATRedirectTCPPort{}); err != nil { t.Fatal(err) } } func TestNATDropUDP(t *testing.T) { + t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATDropUDP{}); err != nil { t.Fatal(err) } } func TestNATAcceptAll(t *testing.T) { + t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATAcceptAll{}); err != nil { t.Fatal(err) } @@ -251,12 +255,14 @@ func TestFilterInputReturnUnderflow(t *testing.T) { } func TestFilterOutputDropTCPDestPort(t *testing.T) { + t.Skip("filter OUTPUT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(FilterOutputDropTCPDestPort{}); err != nil { t.Fatal(err) } } func TestFilterOutputDropTCPSrcPort(t *testing.T) { + t.Skip("filter OUTPUT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(FilterOutputDropTCPSrcPort{}); err != nil { t.Fatal(err) } diff --git a/test/iptables/iptables_util.go b/test/iptables/iptables_util.go index 32cf5a417..1f8dac4f1 100644 --- a/test/iptables/iptables_util.go +++ b/test/iptables/iptables_util.go @@ -125,26 +125,23 @@ func listenTCP(port int, timeout time.Duration) error { return nil } -// connectTCP connects the TCP server over specified local port, server IP and remote/server port. -func connectTCP(ip net.IP, remotePort, localPort int, timeout time.Duration) error { +// connectTCP connects to the given IP and port from an ephemeral local address. +func connectTCP(ip net.IP, port int, timeout time.Duration) error { contAddr := net.TCPAddr{ IP: ip, - Port: remotePort, + Port: port, } // The container may not be listening when we first connect, so retry // upon error. callback := func() error { - localAddr := net.TCPAddr{ - Port: localPort, - } - conn, err := net.DialTCP("tcp4", &localAddr, &contAddr) + conn, err := net.DialTCP("tcp4", nil, &contAddr) if conn != nil { conn.Close() } return err } if err := testutil.Poll(callback, timeout); err != nil { - return fmt.Errorf("timed out waiting to send IP, most recent error: %v", err) + return fmt.Errorf("timed out waiting to connect IP, most recent error: %v", err) } return nil diff --git a/test/iptables/nat.go b/test/iptables/nat.go index 899d1c9d3..6ca6b46ca 100644 --- a/test/iptables/nat.go +++ b/test/iptables/nat.go @@ -76,7 +76,7 @@ func (NATRedirectTCPPort) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (NATRedirectTCPPort) LocalAction(ip net.IP) error { - return connectTCP(ip, dropPort, acceptPort, sendloopDuration) + return connectTCP(ip, dropPort, sendloopDuration) } // NATDropUDP tests that packets are not received in ports other than redirect port. -- cgit v1.2.3 From f2e4b5ab932a3816e4957171b303db645fd04a94 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Thu, 12 Mar 2020 12:31:16 -0700 Subject: Kill sandbox process when parent process terminates When the sandbox runs in attached more, e.g. runsc do, runsc run, the sandbox lifetime is controlled by the parent process. This wasn't working in all cases because PR_GET_PDEATHSIG doesn't propagate through execve when the process changes uid/gid. So it was getting dropped when the sandbox execve's to change to user nobody. PiperOrigin-RevId: 300601247 --- runsc/cmd/boot.go | 75 +++++++++++++------- runsc/container/container_test.go | 17 ----- runsc/sandbox/sandbox.go | 12 ++-- runsc/specutils/namespace.go | 3 + runsc/testutil/testutil.go | 23 +++--- test/root/BUILD | 3 + test/root/runsc_test.go | 146 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 221 insertions(+), 58 deletions(-) create mode 100644 test/root/runsc_test.go (limited to 'test') diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go index 0f3da69a0..0938944a6 100644 --- a/runsc/cmd/boot.go +++ b/runsc/cmd/boot.go @@ -23,6 +23,7 @@ import ( "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/boot/platforms" @@ -82,8 +83,13 @@ type Boot struct { // sandbox (e.g. gofer) and sent through this FD. mountsFD int - // pidns is set if the sanadbox is in its own pid namespace. + // pidns is set if the sandbox is in its own pid namespace. pidns bool + + // attached is set to true to kill the sandbox process when the parent process + // terminates. This flag is set when the command execve's itself because + // parent death signal doesn't propagate through execve when uid/gid changes. + attached bool } // Name implements subcommands.Command.Name. @@ -118,6 +124,7 @@ func (b *Boot) SetFlags(f *flag.FlagSet) { f.IntVar(&b.userLogFD, "user-log-fd", 0, "file descriptor to write user logs to. 0 means no logging.") f.IntVar(&b.startSyncFD, "start-sync-fd", -1, "required FD to used to synchronize sandbox startup") f.IntVar(&b.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to read list of mounts after they have been resolved (direct paths, no symlinks).") + f.BoolVar(&b.attached, "attached", false, "if attached is true, kills the sandbox process when the parent process terminates") } // Execute implements subcommands.Command.Execute. It starts a sandbox in a @@ -133,29 +140,32 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) conf := args[0].(*boot.Config) + if b.attached { + // Ensure this process is killed after parent process terminates when + // attached mode is enabled. In the unfortunate event that the parent + // terminates before this point, this process leaks. + if err := unix.Prctl(unix.PR_SET_PDEATHSIG, uintptr(unix.SIGKILL), 0, 0, 0); err != nil { + Fatalf("error setting parent death signal: %v", err) + } + } + if b.setUpRoot { if err := setUpChroot(b.pidns); err != nil { Fatalf("error setting up chroot: %v", err) } - if !b.applyCaps { - // Remove --setup-root arg to call myself. - var args []string - for _, arg := range os.Args { - if !strings.Contains(arg, "setup-root") { - args = append(args, arg) - } - } - if !conf.Rootless { - // Note that we've already read the spec from the spec FD, and - // we will read it again after the exec call. This works - // because the ReadSpecFromFile function seeks to the beginning - // of the file before reading. - if err := callSelfAsNobody(args); err != nil { - Fatalf("%v", err) - } - panic("callSelfAsNobody must never return success") + if !b.applyCaps && !conf.Rootless { + // Remove --apply-caps arg to call myself. It has already been done. + args := prepareArgs(b.attached, "setup-root") + + // Note that we've already read the spec from the spec FD, and + // we will read it again after the exec call. This works + // because the ReadSpecFromFile function seeks to the beginning + // of the file before reading. + if err := callSelfAsNobody(args); err != nil { + Fatalf("%v", err) } + panic("callSelfAsNobody must never return success") } } @@ -181,13 +191,9 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) caps.Permitted = append(caps.Permitted, c) } - // Remove --apply-caps arg to call myself. - var args []string - for _, arg := range os.Args { - if !strings.Contains(arg, "setup-root") && !strings.Contains(arg, "apply-caps") { - args = append(args, arg) - } - } + // Remove --apply-caps and --setup-root arg to call myself. Both have + // already been done. + args := prepareArgs(b.attached, "setup-root", "apply-caps") // Note that we've already read the spec from the spec FD, and // we will read it again after the exec call. This works @@ -258,3 +264,22 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) l.Destroy() return subcommands.ExitSuccess } + +func prepareArgs(attached bool, exclude ...string) []string { + var args []string + for _, arg := range os.Args { + for _, excl := range exclude { + if strings.Contains(arg, excl) { + goto skip + } + } + args = append(args, arg) + if attached && arg == "boot" { + // Strategicaly place "--attached" after the command. This is needed + // to ensure the new process is killed when the parent process terminates. + args = append(args, "--attached") + } + skip: + } + return args +} diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index c7eea85b3..442e80ac0 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -124,23 +124,6 @@ func procListsEqual(got, want []*control.Process) (bool, error) { return true, nil } -// getAndCheckProcLists is similar to waitForProcessList, but does not wait and retry the -// test for equality. This is because we already confirmed that exec occurred. -func getAndCheckProcLists(cont *Container, want []*control.Process) error { - got, err := cont.Processes() - if err != nil { - return fmt.Errorf("error getting process data from container: %v", err) - } - equal, err := procListsEqual(got, want) - if err != nil { - return err - } - if equal { - return nil - } - return fmt.Errorf("container got process list: %s, want: %s", procListToString(got), procListToString(want)) -} - func procListToString(pl []*control.Process) string { strs := make([]string, 0, len(pl)) for _, p := range pl { diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 6177d6aa7..8de75ae57 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -701,6 +701,13 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF nextFD++ } + if args.Attached { + // Kill sandbox if parent process exits in attached mode. + cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL + // Tells boot that any process it creates must have pdeathsig set. + cmd.Args = append(cmd.Args, "--attached") + } + // Add container as the last argument. cmd.Args = append(cmd.Args, s.ID) @@ -709,11 +716,6 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF log.Debugf("Donating FD %d: %q", i+3, f.Name()) } - if args.Attached { - // Kill sandbox if parent process exits in attached mode. - cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL - } - log.Debugf("Starting sandbox: %s %v", binPath, cmd.Args) log.Debugf("SysProcAttr: %+v", cmd.SysProcAttr) if err := specutils.StartInNS(cmd, nss); err != nil { diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go index c7dd3051c..60bb7b7ee 100644 --- a/runsc/specutils/namespace.go +++ b/runsc/specutils/namespace.go @@ -252,6 +252,9 @@ func MaybeRunAsRoot() error { }, Credential: &syscall.Credential{Uid: 0, Gid: 0}, GidMappingsEnableSetgroups: false, + + // Make sure child is killed when the parent terminates. + Pdeathsig: syscall.SIGKILL, } cmd.Env = os.Environ() diff --git a/runsc/testutil/testutil.go b/runsc/testutil/testutil.go index 92d677e71..51e487715 100644 --- a/runsc/testutil/testutil.go +++ b/runsc/testutil/testutil.go @@ -87,18 +87,19 @@ func TestConfig() *boot.Config { logDir = dir + "/" } return &boot.Config{ - Debug: true, - DebugLog: logDir, - LogFormat: "text", - DebugLogFormat: "text", - AlsoLogToStderr: true, - LogPackets: true, - Network: boot.NetworkNone, - Strace: true, - Platform: "ptrace", - FileAccess: boot.FileAccessExclusive, + Debug: true, + DebugLog: logDir, + LogFormat: "text", + DebugLogFormat: "text", + AlsoLogToStderr: true, + LogPackets: true, + Network: boot.NetworkNone, + Strace: true, + Platform: "ptrace", + FileAccess: boot.FileAccessExclusive, + NumNetworkChannels: 1, + TestOnlyAllowRunAsCurrentUserWithoutChroot: true, - NumNetworkChannels: 1, } } diff --git a/test/root/BUILD b/test/root/BUILD index 23ce2a70f..ddc9b4955 100644 --- a/test/root/BUILD +++ b/test/root/BUILD @@ -16,6 +16,7 @@ go_test( "crictl_test.go", "main_test.go", "oom_score_adj_test.go", + "runsc_test.go", ], data = [ "//runsc", @@ -37,7 +38,9 @@ go_test( "//runsc/specutils", "//runsc/testutil", "//test/root/testdata", + "@com_github_cenkalti_backoff//:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", "@com_github_syndtr_gocapability//capability:go_default_library", + "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/test/root/runsc_test.go b/test/root/runsc_test.go new file mode 100644 index 000000000..28bb60a12 --- /dev/null +++ b/test/root/runsc_test.go @@ -0,0 +1,146 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package root + +import ( + "bytes" + "fmt" + "io/ioutil" + "os/exec" + "path/filepath" + "strconv" + "strings" + "testing" + "time" + + "github.com/cenkalti/backoff" + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/runsc/specutils" + "gvisor.dev/gvisor/runsc/testutil" +) + +// TestDoKill checks that when "runsc do..." is killed, the sandbox process is +// also terminated. This ensures that parent death signal is propagate to the +// sandbox process correctly. +func TestDoKill(t *testing.T) { + // Make the sandbox process be reparented here when it's killed, so we can + // wait for it. + if err := unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0); err != nil { + t.Fatalf("prctl(PR_SET_CHILD_SUBREAPER): %v", err) + } + + cmd := exec.Command(specutils.ExePath, "do", "sleep", "10000") + buf := &bytes.Buffer{} + cmd.Stdout = buf + cmd.Stderr = buf + cmd.Start() + + var pid int + findSandbox := func() error { + var err error + pid, err = sandboxPid(cmd.Process.Pid) + if err != nil { + return &backoff.PermanentError{Err: err} + } + if pid == 0 { + return fmt.Errorf("sandbox process not found") + } + return nil + } + if err := testutil.Poll(findSandbox, 10*time.Second); err != nil { + t.Fatalf("failed to find sandbox: %v", err) + } + t.Logf("Found sandbox, pid: %d", pid) + + if err := cmd.Process.Kill(); err != nil { + t.Fatalf("failed to kill run process: %v", err) + } + cmd.Wait() + t.Logf("Parent process killed (%d). Output: %s", cmd.Process.Pid, buf.String()) + + ch := make(chan struct{}) + go func() { + defer func() { ch <- struct{}{} }() + t.Logf("Waiting for sandbox process (%d) termination", pid) + if _, err := unix.Wait4(pid, nil, 0, nil); err != nil { + t.Errorf("error waiting for sandbox process (%d): %v", pid, err) + } + }() + select { + case <-ch: + // Done + case <-time.After(5 * time.Second): + t.Fatalf("timeout waiting for sandbox process (%d) to exit", pid) + } +} + +// sandboxPid looks for the sandbox process inside the process tree starting +// from "pid". It returns 0 and no error if no sandbox process is found. It +// returns error if anything failed. +func sandboxPid(pid int) (int, error) { + cmd := exec.Command("pgrep", "-P", strconv.Itoa(pid)) + buf := &bytes.Buffer{} + cmd.Stdout = buf + if err := cmd.Start(); err != nil { + return 0, err + } + ps, err := cmd.Process.Wait() + if err != nil { + return 0, err + } + if ps.ExitCode() == 1 { + // pgrep returns 1 when no process is found. + return 0, nil + } + + var children []int + for _, line := range strings.Split(buf.String(), "\n") { + if len(line) == 0 { + continue + } + child, err := strconv.Atoi(line) + if err != nil { + return 0, err + } + + cmdline, err := ioutil.ReadFile(filepath.Join("/proc", line, "cmdline")) + if err != nil { + return 0, err + } + args := strings.SplitN(string(cmdline), "\x00", 2) + if len(args) == 0 { + return 0, fmt.Errorf("malformed cmdline file: %q", cmdline) + } + // The sandbox process has the first argument set to "runsc-sandbox". + if args[0] == "runsc-sandbox" { + return child, nil + } + + children = append(children, child) + } + + // Sandbox process wasn't found, try another level down. + for _, pid := range children { + sand, err := sandboxPid(pid) + if err != nil { + return 0, err + } + if sand != 0 { + return sand, nil + } + // Not found, continue the search. + } + return 0, nil +} -- cgit v1.2.3 From bbf86003bfd2a7547744b89c72e1cd06e9385e66 Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Thu, 12 Mar 2020 14:34:16 -0700 Subject: Remove flaky network namespace test that uses clone(). PiperOrigin-RevId: 300626011 --- test/syscalls/linux/BUILD | 3 +- test/syscalls/linux/network_namespace.cc | 87 ++++---------------------------- 2 files changed, 10 insertions(+), 80 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 43455f1a3..636e5db12 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -3707,11 +3707,10 @@ cc_binary( ":socket_test_util", gtest, "//test/util:capability_util", - "//test/util:memory_util", + "//test/util:posix_error", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", - "@com_google_absl//absl/synchronization", ], ) diff --git a/test/syscalls/linux/network_namespace.cc b/test/syscalls/linux/network_namespace.cc index 6ea48c263..133fdecf0 100644 --- a/test/syscalls/linux/network_namespace.cc +++ b/test/syscalls/linux/network_namespace.cc @@ -20,102 +20,33 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "absl/synchronization/notification.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/util/capability_util.h" -#include "test/util/memory_util.h" +#include "test/util/posix_error.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" namespace gvisor { namespace testing { - namespace { -using TestFunc = std::function; -using RunFunc = std::function; - -struct NamespaceStrategy { - RunFunc run; - - static NamespaceStrategy Of(RunFunc run) { - NamespaceStrategy s; - s.run = run; - return s; - } -}; +TEST(NetworkNamespaceTest, LoopbackExists) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); -PosixError RunWithUnshare(TestFunc fn) { - PosixError err = PosixError(-1, "function did not return a value"); ScopedThread t([&] { - if (unshare(CLONE_NEWNET) != 0) { - err = PosixError(errno); - return; - } - err = fn(); - }); - t.Join(); - return err; -} + ASSERT_THAT(unshare(CLONE_NEWNET), SyscallSucceedsWithValue(0)); -PosixError RunWithClone(TestFunc fn) { - struct Args { - absl::Notification n; - TestFunc fn; - PosixError err; - }; - Args args; - args.fn = fn; - args.err = PosixError(-1, "function did not return a value"); - - ASSIGN_OR_RETURN_ERRNO( - Mapping child_stack, - MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); - pid_t child = clone( - +[](void *arg) { - Args *args = reinterpret_cast(arg); - args->err = args->fn(); - args->n.Notify(); - syscall(SYS_exit, 0); // Exit manually. No return address on stack. - return 0; - }, - reinterpret_cast(child_stack.addr() + kPageSize), - CLONE_NEWNET | CLONE_THREAD | CLONE_SIGHAND | CLONE_VM, &args); - if (child < 0) { - return PosixError(errno, "clone() failed"); - } - args.n.WaitForNotification(); - return args.err; -} - -class NetworkNamespaceTest - : public ::testing::TestWithParam {}; - -TEST_P(NetworkNamespaceTest, LoopbackExists) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); - - EXPECT_NO_ERRNO(GetParam().run([]() { // TODO(gvisor.dev/issue/1833): Update this to test that only "lo" exists. // Check loopback device exists. int sock = socket(AF_INET, SOCK_DGRAM, 0); - if (sock < 0) { - return PosixError(errno, "socket() failed"); - } + ASSERT_THAT(sock, SyscallSucceeds()); struct ifreq ifr; - snprintf(ifr.ifr_name, IFNAMSIZ, "lo"); - if (ioctl(sock, SIOCGIFINDEX, &ifr) < 0) { - return PosixError(errno, "ioctl() failed, lo cannot be found"); - } - return NoError(); - })); + strncpy(ifr.ifr_name, "lo", IFNAMSIZ); + EXPECT_THAT(ioctl(sock, SIOCGIFINDEX, &ifr), SyscallSucceeds()) + << "lo cannot be found"; + }); } -INSTANTIATE_TEST_SUITE_P( - AllNetworkNamespaceTest, NetworkNamespaceTest, - ::testing::Values(NamespaceStrategy::Of(RunWithUnshare), - NamespaceStrategy::Of(RunWithClone))); - } // namespace - } // namespace testing } // namespace gvisor -- cgit v1.2.3 From 333b74dc288357e192dbd86f6d0732be5ea7df64 Mon Sep 17 00:00:00 2001 From: Haibo Xu Date: Fri, 13 Mar 2020 03:02:26 +0000 Subject: Enable syscall seccomp test on arm64. Signed-off-by: Haibo Xu Change-Id: Ibc926c917d98b31fc92bbf8d82d6818c39b0f93c --- test/syscalls/linux/seccomp.cc | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/test/syscalls/linux/seccomp.cc b/test/syscalls/linux/seccomp.cc index 8e0fc9acc..06cc6a64e 100644 --- a/test/syscalls/linux/seccomp.cc +++ b/test/syscalls/linux/seccomp.cc @@ -72,8 +72,15 @@ void ApplySeccompFilter(uint32_t sysno, uint32_t filtered_result, struct sock_filter filter[] = { // A = seccomp_data.arch BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 4), +#if defined(__x86_64__) // if (A != AUDIT_ARCH_X86_64) goto kill BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 0, 4), +#elif defined(__aarch64__) + // if (A != AUDIT_ARCH_AARCH64) goto kill + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_AARCH64, 0, 4), +#else +#error "Unknown architecture" +#endif // A = seccomp_data.nr BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0), // if (A != sysno) goto allow @@ -179,9 +186,12 @@ TEST(SeccompTest, RetTrapCausesSIGSYS) { TEST_CHECK(info->si_errno == kTrapValue); TEST_CHECK(info->si_call_addr != nullptr); TEST_CHECK(info->si_syscall == kFilteredSyscall); -#ifdef __x86_64__ +#if defined(__x86_64__) TEST_CHECK(info->si_arch == AUDIT_ARCH_X86_64); TEST_CHECK(uc->uc_mcontext.gregs[REG_RAX] == kFilteredSyscall); +#elif defined(__aarch64__) + TEST_CHECK(info->si_arch == AUDIT_ARCH_AARCH64); + TEST_CHECK(uc->uc_mcontext.regs[8] == kFilteredSyscall); #endif // defined(__x86_64__) _exit(0); }); -- cgit v1.2.3 From 722abdd8339f1df515beae0ad5272c8c2b2cfed0 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Fri, 13 Mar 2020 12:09:58 -0700 Subject: Skip process if it has exited PiperOrigin-RevId: 300802159 --- test/root/runsc_test.go | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'test') diff --git a/test/root/runsc_test.go b/test/root/runsc_test.go index 28bb60a12..90373e2db 100644 --- a/test/root/runsc_test.go +++ b/test/root/runsc_test.go @@ -18,6 +18,7 @@ import ( "bytes" "fmt" "io/ioutil" + "os" "os/exec" "path/filepath" "strconv" @@ -117,6 +118,10 @@ func sandboxPid(pid int) (int, error) { cmdline, err := ioutil.ReadFile(filepath.Join("/proc", line, "cmdline")) if err != nil { + if os.IsNotExist(err) { + // Raced with process exit. + continue + } return 0, err } args := strings.SplitN(string(cmdline), "\x00", 2) -- cgit v1.2.3 From 5e413cad10d2358a21dd08216953faee70e62a0b Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Sat, 14 Mar 2020 07:13:15 -0700 Subject: Plumb VFS2 imported fds into virtual filesystem. - When setting up the virtual filesystem, mount a host.filesystem to contain all files that need to be imported. - Make read/preadv syscalls to the host in cases where preadv2 may not be supported yet (likewise for writing). - Make save/restore functions in kernel/kernel.go return early if vfs2 is enabled. PiperOrigin-RevId: 300922353 --- pkg/abi/linux/file.go | 3 + pkg/sentry/fs/host/control.go | 2 + pkg/sentry/fsimpl/host/BUILD | 2 + pkg/sentry/fsimpl/host/default_file.go | 45 +++++++----- pkg/sentry/fsimpl/host/host.go | 124 ++++++++++++++++++++++++++++++--- pkg/sentry/fsimpl/host/util.go | 28 ++------ pkg/sentry/kernel/kernel.go | 40 +++++++---- pkg/sentry/syscalls/linux/sys_stat.go | 5 +- pkg/sentry/syscalls/linux/vfs2/stat.go | 6 +- runsc/boot/filter/config.go | 1 + test/syscalls/linux/stat.cc | 60 ++++++++++++++-- 11 files changed, 246 insertions(+), 70 deletions(-) (limited to 'test') diff --git a/pkg/abi/linux/file.go b/pkg/abi/linux/file.go index e229ac21c..dbe58acbe 100644 --- a/pkg/abi/linux/file.go +++ b/pkg/abi/linux/file.go @@ -266,6 +266,9 @@ type Statx struct { DevMinor uint32 } +// SizeOfStatx is the size of a Statx struct. +var SizeOfStatx = binary.Size(Statx{}) + // FileMode represents a mode_t. type FileMode uint16 diff --git a/pkg/sentry/fs/host/control.go b/pkg/sentry/fs/host/control.go index 1658979fc..cd84e1337 100644 --- a/pkg/sentry/fs/host/control.go +++ b/pkg/sentry/fs/host/control.go @@ -32,6 +32,8 @@ func newSCMRights(fds []int) control.SCMRights { } // Files implements control.SCMRights.Files. +// +// TODO(gvisor.dev/issue/2017): Port to VFS2. func (c *scmRights) Files(ctx context.Context, max int) (control.RightsFiles, bool) { n := max var trunc bool diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD index 731f192b3..5d67f88e3 100644 --- a/pkg/sentry/fsimpl/host/BUILD +++ b/pkg/sentry/fsimpl/host/BUILD @@ -9,9 +9,11 @@ go_library( "host.go", "util.go", ], + visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/fd", "//pkg/log", "//pkg/refs", "//pkg/safemem", diff --git a/pkg/sentry/fsimpl/host/default_file.go b/pkg/sentry/fsimpl/host/default_file.go index 172cdb161..98682ba5e 100644 --- a/pkg/sentry/fsimpl/host/default_file.go +++ b/pkg/sentry/fsimpl/host/default_file.go @@ -21,6 +21,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" @@ -64,9 +65,7 @@ func (f *defaultFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts v panic("files that can return EWOULDBLOCK (sockets, pipes, etc.) cannot be memory mapped") } - f.mu.Lock() n, err := readFromHostFD(ctx, f.inode.hostFD, dst, -1, int(opts.Flags)) - f.mu.Unlock() if isBlockError(err) { // If we got any data at all, return it as a "completed" partial read // rather than retrying until complete. @@ -86,16 +85,22 @@ func (f *defaultFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts v return n, err } -func readFromHostFD(ctx context.Context, fd int, dst usermem.IOSequence, offset int64, flags int) (int64, error) { - if flags&^(linux.RWF_VALID) != 0 { +func readFromHostFD(ctx context.Context, hostFD int, dst usermem.IOSequence, offset int64, flags int) (int64, error) { + // TODO(gvisor.dev/issue/1672): Support select preadv2 flags. + if flags != 0 { return 0, syserror.EOPNOTSUPP } - reader := safemem.FromVecReaderFunc{ - func(srcs [][]byte) (int64, error) { - n, err := unix.Preadv2(fd, srcs, offset, flags) - return int64(n), err - }, + var reader safemem.Reader + if offset == -1 { + reader = safemem.FromIOReader{fd.NewReadWriter(hostFD)} + } else { + reader = safemem.FromVecReaderFunc{ + func(srcs [][]byte) (int64, error) { + n, err := unix.Preadv(hostFD, srcs, offset) + return int64(n), err + }, + } } n, err := dst.CopyOutFrom(ctx, reader) return int64(n), err @@ -120,9 +125,7 @@ func (f *defaultFileFD) Write(ctx context.Context, src usermem.IOSequence, opts panic("files that can return EWOULDBLOCK (sockets, pipes, etc.) cannot be memory mapped") } - f.mu.Lock() n, err := writeToHostFD(ctx, f.inode.hostFD, src, -1, int(opts.Flags)) - f.mu.Unlock() if isBlockError(err) { err = syserror.ErrWouldBlock } @@ -137,16 +140,22 @@ func (f *defaultFileFD) Write(ctx context.Context, src usermem.IOSequence, opts return n, err } -func writeToHostFD(ctx context.Context, fd int, src usermem.IOSequence, offset int64, flags int) (int64, error) { - if flags&^(linux.RWF_VALID) != 0 { +func writeToHostFD(ctx context.Context, hostFD int, src usermem.IOSequence, offset int64, flags int) (int64, error) { + // TODO(gvisor.dev/issue/1672): Support select pwritev2 flags. + if flags != 0 { return 0, syserror.EOPNOTSUPP } - writer := safemem.FromVecWriterFunc{ - func(srcs [][]byte) (int64, error) { - n, err := unix.Pwritev2(fd, srcs, offset, flags) - return int64(n), err - }, + var writer safemem.Writer + if offset == -1 { + writer = safemem.FromIOWriter{fd.NewReadWriter(hostFD)} + } else { + writer = safemem.FromVecWriterFunc{ + func(srcs [][]byte) (int64, error) { + n, err := unix.Pwritev(hostFD, srcs, offset) + return int64(n), err + }, + } } n, err := src.CopyInTo(ctx, writer) return int64(n), err diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index c205e6a0b..0be812d13 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -38,10 +38,19 @@ type filesystem struct { kernfs.Filesystem } +// NewMount returns a new disconnected mount in vfsObj that may be passed to ImportFD. +func NewMount(vfsObj *vfs.VirtualFilesystem) (*vfs.Mount, error) { + fs := &filesystem{} + fs.Init(vfsObj) + vfsfs := fs.VFSFilesystem() + // NewDisconnectedMount will take an additional reference on vfsfs. + defer vfsfs.DecRef() + return vfsObj.NewDisconnectedMount(vfsfs, nil, &vfs.MountOptions{}) +} + // ImportFD sets up and returns a vfs.FileDescription from a donated fd. func ImportFD(mnt *vfs.Mount, hostFD int, ownerUID auth.KUID, ownerGID auth.KGID, isTTY bool) (*vfs.FileDescription, error) { - // Must be importing to a mount of host.filesystem. - fs, ok := mnt.Filesystem().Impl().(*filesystem) + fs, ok := mnt.Filesystem().Impl().(*kernfs.Filesystem) if !ok { return nil, fmt.Errorf("can't import host FDs into filesystems of type %T", mnt.Filesystem().Impl()) } @@ -54,8 +63,7 @@ func ImportFD(mnt *vfs.Mount, hostFD int, ownerUID auth.KUID, ownerGID auth.KGID fileMode := linux.FileMode(s.Mode) fileType := fileMode.FileType() - // Pipes, character devices, and sockets can return EWOULDBLOCK for - // operations that would block. + // Pipes, character devices, and sockets. isStream := fileType == syscall.S_IFIFO || fileType == syscall.S_IFCHR || fileType == syscall.S_IFSOCK i := &inode{ @@ -143,11 +151,109 @@ func (i *inode) Mode() linux.FileMode { // Stat implements kernfs.Inode. func (i *inode) Stat(_ *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) { + if opts.Mask&linux.STATX__RESERVED != 0 { + return linux.Statx{}, syserror.EINVAL + } + if opts.Sync&linux.AT_STATX_SYNC_TYPE == linux.AT_STATX_SYNC_TYPE { + return linux.Statx{}, syserror.EINVAL + } + + // Limit our host call only to known flags. + mask := opts.Mask & linux.STATX_ALL var s unix.Statx_t - if err := unix.Statx(i.hostFD, "", int(unix.AT_EMPTY_PATH|opts.Sync), int(opts.Mask), &s); err != nil { + err := unix.Statx(i.hostFD, "", int(unix.AT_EMPTY_PATH|opts.Sync), int(mask), &s) + // Fallback to fstat(2), if statx(2) is not supported on the host. + // + // TODO(b/151263641): Remove fallback. + if err == syserror.ENOSYS { + return i.fstat(opts) + } else if err != nil { + return linux.Statx{}, err + } + + ls := linux.Statx{Mask: mask} + // Unconditionally fill blksize, attributes, and device numbers, as indicated + // by /include/uapi/linux/stat.h. + // + // RdevMajor/RdevMinor are left as zero, so as not to expose host device + // numbers. + // + // TODO(gvisor.dev/issue/1672): Use kernfs-specific, internally defined + // device numbers. If we use the device number from the host, it may collide + // with another sentry-internal device number. We handle device/inode + // numbers without relying on the host to prevent collisions. + ls.Blksize = s.Blksize + ls.Attributes = s.Attributes + ls.AttributesMask = s.Attributes_mask + + if mask|linux.STATX_TYPE != 0 { + ls.Mode |= s.Mode & linux.S_IFMT + } + if mask|linux.STATX_MODE != 0 { + ls.Mode |= s.Mode &^ linux.S_IFMT + } + if mask|linux.STATX_NLINK != 0 { + ls.Nlink = s.Nlink + } + if mask|linux.STATX_ATIME != 0 { + ls.Atime = unixToLinuxStatxTimestamp(s.Atime) + } + if mask|linux.STATX_BTIME != 0 { + ls.Btime = unixToLinuxStatxTimestamp(s.Btime) + } + if mask|linux.STATX_CTIME != 0 { + ls.Ctime = unixToLinuxStatxTimestamp(s.Ctime) + } + if mask|linux.STATX_MTIME != 0 { + ls.Mtime = unixToLinuxStatxTimestamp(s.Mtime) + } + if mask|linux.STATX_SIZE != 0 { + ls.Size = s.Size + } + if mask|linux.STATX_BLOCKS != 0 { + ls.Blocks = s.Blocks + } + + // Use our own internal inode number and file owner. + if mask|linux.STATX_INO != 0 { + ls.Ino = i.ino + } + if mask|linux.STATX_UID != 0 { + ls.UID = uint32(i.uid) + } + if mask|linux.STATX_GID != 0 { + ls.GID = uint32(i.gid) + } + + return ls, nil +} + +// fstat is a best-effort fallback for inode.Stat() if the host does not +// support statx(2). +// +// We ignore the mask and sync flags in opts and simply supply +// STATX_BASIC_STATS, as fstat(2) itself does not allow the specification +// of a mask or sync flags. fstat(2) does not provide any metadata +// equivalent to Statx.Attributes, Statx.AttributesMask, or Statx.Btime, so +// those fields remain empty. +func (i *inode) fstat(opts vfs.StatOptions) (linux.Statx, error) { + var s unix.Stat_t + if err := unix.Fstat(i.hostFD, &s); err != nil { return linux.Statx{}, err } - ls := unixToLinuxStatx(s) + + // Note that rdev numbers are left as 0; do not expose host device numbers. + ls := linux.Statx{ + Mask: linux.STATX_BASIC_STATS, + Blksize: uint32(s.Blksize), + Nlink: uint32(s.Nlink), + Mode: uint16(s.Mode), + Size: uint64(s.Size), + Blocks: uint64(s.Blocks), + Atime: timespecToStatxTimestamp(s.Atim), + Ctime: timespecToStatxTimestamp(s.Ctim), + Mtime: timespecToStatxTimestamp(s.Mtim), + } // Use our own internal inode number and file owner. // @@ -159,9 +265,6 @@ func (i *inode) Stat(_ *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, erro ls.UID = uint32(i.uid) ls.GID = uint32(i.gid) - // Update file mode from the host. - i.mode = linux.FileMode(ls.Mode) - return ls, nil } @@ -217,7 +320,6 @@ func (i *inode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptio } func (i *inode) open(d *vfs.Dentry, mnt *vfs.Mount) (*vfs.FileDescription, error) { - fileType := i.mode.FileType() if fileType == syscall.S_IFSOCK { if i.isTTY { @@ -227,6 +329,8 @@ func (i *inode) open(d *vfs.Dentry, mnt *vfs.Mount) (*vfs.FileDescription, error return nil, errors.New("importing host sockets not supported") } + // TODO(gvisor.dev/issue/1672): Whitelist specific file types here, so that + // we don't allow importing arbitrary file types without proper support. if i.isTTY { // TODO(gvisor.dev/issue/1672): support importing host fd as TTY. return nil, errors.New("importing host fd as TTY not supported") diff --git a/pkg/sentry/fsimpl/host/util.go b/pkg/sentry/fsimpl/host/util.go index e1ccacb4d..d519feef5 100644 --- a/pkg/sentry/fsimpl/host/util.go +++ b/pkg/sentry/fsimpl/host/util.go @@ -35,34 +35,14 @@ func toTimespec(ts linux.StatxTimestamp, omit bool) unix.Timespec { } } -func unixToLinuxStatx(s unix.Statx_t) linux.Statx { - return linux.Statx{ - Mask: s.Mask, - Blksize: s.Blksize, - Attributes: s.Attributes, - Nlink: s.Nlink, - UID: s.Uid, - GID: s.Gid, - Mode: s.Mode, - Ino: s.Ino, - Size: s.Size, - Blocks: s.Blocks, - AttributesMask: s.Attributes_mask, - Atime: unixToLinuxStatxTimestamp(s.Atime), - Btime: unixToLinuxStatxTimestamp(s.Btime), - Ctime: unixToLinuxStatxTimestamp(s.Ctime), - Mtime: unixToLinuxStatxTimestamp(s.Mtime), - RdevMajor: s.Rdev_major, - RdevMinor: s.Rdev_minor, - DevMajor: s.Dev_major, - DevMinor: s.Dev_minor, - } -} - func unixToLinuxStatxTimestamp(ts unix.StatxTimestamp) linux.StatxTimestamp { return linux.StatxTimestamp{Sec: ts.Sec, Nsec: ts.Nsec} } +func timespecToStatxTimestamp(ts unix.Timespec) linux.StatxTimestamp { + return linux.StatxTimestamp{Sec: int64(ts.Sec), Nsec: uint32(ts.Nsec)} +} + // wouldBlock returns true for file types that can return EWOULDBLOCK // for blocking operations, e.g. pipes, character devices, and sockets. func wouldBlock(fileType uint32) bool { diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 1d627564f..6feda8fa1 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -467,6 +467,11 @@ func (k *Kernel) flushMountSourceRefs() error { // // Precondition: Must be called with the kernel paused. func (ts *TaskSet) forEachFDPaused(f func(*fs.File, *vfs.FileDescription) error) (err error) { + // TODO(gvisor.dev/issue/1663): Add save support for VFS2. + if VFS2Enabled { + return nil + } + ts.mu.RLock() defer ts.mu.RUnlock() for t := range ts.Root.tids { @@ -484,7 +489,7 @@ func (ts *TaskSet) forEachFDPaused(f func(*fs.File, *vfs.FileDescription) error) } func (ts *TaskSet) flushWritesToFiles(ctx context.Context) error { - // TODO(gvisor.dev/issues/1663): Add save support for VFS2. + // TODO(gvisor.dev/issue/1663): Add save support for VFS2. return ts.forEachFDPaused(func(file *fs.File, _ *vfs.FileDescription) error { if flags := file.Flags(); !flags.Write { return nil @@ -533,6 +538,11 @@ func (k *Kernel) invalidateUnsavableMappings(ctx context.Context) error { } func (ts *TaskSet) unregisterEpollWaiters() { + // TODO(gvisor.dev/issue/1663): Add save support for VFS2. + if VFS2Enabled { + return + } + ts.mu.RLock() defer ts.mu.RUnlock() for t := range ts.Root.tids { @@ -1005,11 +1015,14 @@ func (k *Kernel) pauseTimeLocked() { // This means we'll iterate FDTables shared by multiple tasks repeatedly, // but ktime.Timer.Pause is idempotent so this is harmless. if t.fdTable != nil { - t.fdTable.forEach(func(_ int32, file *fs.File, _ *vfs.FileDescription, _ FDFlags) { - if tfd, ok := file.FileOperations.(*timerfd.TimerOperations); ok { - tfd.PauseTimer() - } - }) + // TODO(gvisor.dev/issue/1663): Add save support for VFS2. + if !VFS2Enabled { + t.fdTable.forEach(func(_ int32, file *fs.File, _ *vfs.FileDescription, _ FDFlags) { + if tfd, ok := file.FileOperations.(*timerfd.TimerOperations); ok { + tfd.PauseTimer() + } + }) + } } } k.timekeeper.PauseUpdates() @@ -1034,12 +1047,15 @@ func (k *Kernel) resumeTimeLocked() { it.ResumeTimer() } } - if t.fdTable != nil { - t.fdTable.forEach(func(_ int32, file *fs.File, _ *vfs.FileDescription, _ FDFlags) { - if tfd, ok := file.FileOperations.(*timerfd.TimerOperations); ok { - tfd.ResumeTimer() - } - }) + // TODO(gvisor.dev/issue/1663): Add save support for VFS2. + if !VFS2Enabled { + if t.fdTable != nil { + t.fdTable.forEach(func(_ int32, file *fs.File, _ *vfs.FileDescription, _ FDFlags) { + if tfd, ok := file.FileOperations.(*timerfd.TimerOperations); ok { + tfd.ResumeTimer() + } + }) + } } } } diff --git a/pkg/sentry/syscalls/linux/sys_stat.go b/pkg/sentry/syscalls/linux/sys_stat.go index 9bd2df104..a11a87cd1 100644 --- a/pkg/sentry/syscalls/linux/sys_stat.go +++ b/pkg/sentry/syscalls/linux/sys_stat.go @@ -136,7 +136,10 @@ func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall mask := args[3].Uint() statxAddr := args[4].Pointer() - if mask&linux.STATX__RESERVED > 0 { + if mask&linux.STATX__RESERVED != 0 { + return 0, nil, syserror.EINVAL + } + if flags&^(linux.AT_SYMLINK_NOFOLLOW|linux.AT_EMPTY_PATH|linux.AT_STATX_SYNC_TYPE) != 0 { return 0, nil, syserror.EINVAL } if flags&linux.AT_STATX_SYNC_TYPE == linux.AT_STATX_SYNC_TYPE { diff --git a/pkg/sentry/syscalls/linux/vfs2/stat.go b/pkg/sentry/syscalls/linux/vfs2/stat.go index a74ea6fd5..97eaedd66 100644 --- a/pkg/sentry/syscalls/linux/vfs2/stat.go +++ b/pkg/sentry/syscalls/linux/vfs2/stat.go @@ -150,7 +150,11 @@ func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall mask := args[3].Uint() statxAddr := args[4].Pointer() - if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_NOFOLLOW) != 0 { + if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_NOFOLLOW|linux.AT_STATX_SYNC_TYPE) != 0 { + return 0, nil, syserror.EINVAL + } + + if mask&linux.STATX__RESERVED != 0 { return 0, nil, syserror.EINVAL } diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index a4627905e..f459d1973 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -284,6 +284,7 @@ var allowedSyscalls = seccomp.SyscallRules{ {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RDWR)}, }, syscall.SYS_SIGALTSTACK: {}, + unix.SYS_STATX: {}, syscall.SYS_SYNC_FILE_RANGE: {}, syscall.SYS_TGKILL: []seccomp.Rule{ { diff --git a/test/syscalls/linux/stat.cc b/test/syscalls/linux/stat.cc index c951ac3b3..513b9cd1c 100644 --- a/test/syscalls/linux/stat.cc +++ b/test/syscalls/linux/stat.cc @@ -607,7 +607,7 @@ int statx(int dirfd, const char* pathname, int flags, unsigned int mask, } TEST_F(StatTest, StatxAbsPath) { - SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 && + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && errno == ENOSYS); struct kernel_statx stx; @@ -617,7 +617,7 @@ TEST_F(StatTest, StatxAbsPath) { } TEST_F(StatTest, StatxRelPathDirFD) { - SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 && + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && errno == ENOSYS); struct kernel_statx stx; @@ -631,7 +631,7 @@ TEST_F(StatTest, StatxRelPathDirFD) { } TEST_F(StatTest, StatxRelPathCwd) { - SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 && + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && errno == ENOSYS); ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds()); @@ -643,7 +643,7 @@ TEST_F(StatTest, StatxRelPathCwd) { } TEST_F(StatTest, StatxEmptyPath) { - SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 && + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && errno == ENOSYS); const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY)); @@ -653,6 +653,58 @@ TEST_F(StatTest, StatxEmptyPath) { EXPECT_TRUE(S_ISREG(stx.stx_mode)); } +TEST_F(StatTest, StatxDoesNotRejectExtraneousMaskBits) { + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && + errno == ENOSYS); + + struct kernel_statx stx; + // Set all mask bits except for STATX__RESERVED. + uint mask = 0xffffffff & ~0x80000000; + EXPECT_THAT(statx(-1, test_file_name_.c_str(), 0, mask, &stx), + SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(stx.stx_mode)); +} + +TEST_F(StatTest, StatxRejectsReservedMaskBit) { + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && + errno == ENOSYS); + + struct kernel_statx stx; + // Set STATX__RESERVED in the mask. + EXPECT_THAT(statx(-1, test_file_name_.c_str(), 0, 0x80000000, &stx), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(StatTest, StatxSymlink) { + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && + errno == ENOSYS); + + std::string parent_dir = "/tmp"; + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(parent_dir, test_file_name_)); + std::string p = link.path(); + + struct kernel_statx stx; + EXPECT_THAT(statx(AT_FDCWD, p.c_str(), AT_SYMLINK_NOFOLLOW, STATX_ALL, &stx), + SyscallSucceeds()); + EXPECT_TRUE(S_ISLNK(stx.stx_mode)); + EXPECT_THAT(statx(AT_FDCWD, p.c_str(), 0, STATX_ALL, &stx), + SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(stx.stx_mode)); +} + +TEST_F(StatTest, StatxInvalidFlags) { + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && + errno == ENOSYS); + + struct kernel_statx stx; + EXPECT_THAT(statx(AT_FDCWD, test_file_name_.c_str(), 12345, 0, &stx), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(statx(AT_FDCWD, test_file_name_.c_str(), + 0x6000 /* AT_STATX_SYNC_TYPE */, 0, &stx), + SyscallFailsWithErrno(EINVAL)); +} + } // namespace } // namespace testing -- cgit v1.2.3 From 69da42885aff9371fd53227583a546df914de02b Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Mon, 16 Mar 2020 12:02:33 -0700 Subject: Enable ARP resolution in TAP devices. PiperOrigin-RevId: 301208471 --- pkg/tcpip/link/tun/device.go | 10 +++- test/syscalls/linux/tuntap.cc | 105 +++++++++++++++++++++++++++++++----------- 2 files changed, 86 insertions(+), 29 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/link/tun/device.go b/pkg/tcpip/link/tun/device.go index 6ff47a742..f6e301304 100644 --- a/pkg/tcpip/link/tun/device.go +++ b/pkg/tcpip/link/tun/device.go @@ -98,7 +98,12 @@ func (d *Device) SetIff(s *stack.Stack, name string, flags uint16) error { prefix = "tap" } - endpoint, err := attachOrCreateNIC(s, name, prefix) + linkCaps := stack.CapabilityNone + if isTap { + linkCaps |= stack.CapabilityResolutionRequired + } + + endpoint, err := attachOrCreateNIC(s, name, prefix, linkCaps) if err != nil { return syserror.EINVAL } @@ -109,7 +114,7 @@ func (d *Device) SetIff(s *stack.Stack, name string, flags uint16) error { return nil } -func attachOrCreateNIC(s *stack.Stack, name, prefix string) (*tunEndpoint, error) { +func attachOrCreateNIC(s *stack.Stack, name, prefix string, linkCaps stack.LinkEndpointCapabilities) (*tunEndpoint, error) { for { // 1. Try to attach to an existing NIC. if name != "" { @@ -135,6 +140,7 @@ func attachOrCreateNIC(s *stack.Stack, name, prefix string) (*tunEndpoint, error nicID: id, name: name, } + endpoint.Endpoint.LinkEPCapabilities = linkCaps if endpoint.name == "" { endpoint.name = fmt.Sprintf("%s%d", prefix, id) } diff --git a/test/syscalls/linux/tuntap.cc b/test/syscalls/linux/tuntap.cc index f734511d6..53ad2dda3 100644 --- a/test/syscalls/linux/tuntap.cc +++ b/test/syscalls/linux/tuntap.cc @@ -256,50 +256,59 @@ TEST_F(TuntapTest, WriteToDownDevice) { EXPECT_THAT(write(fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(EIO)); } -// This test sets up a TAP device and pings kernel by sending ICMP echo request. -// -// It works as the following: -// * Open /dev/net/tun, and create kTapName interface. -// * Use rtnetlink to do initial setup of the interface: -// * Assign IP address 10.0.0.1/24 to kernel. -// * MAC address: kMacA -// * Bring up the interface. -// * Send an ICMP echo reqest (ping) packet from 10.0.0.2 (kMacB) to kernel. -// * Loop to receive packets from TAP device/fd: -// * If packet is an ICMP echo reply, it stops and passes the test. -// * If packet is an ARP request, it responds with canned reply and resends -// the -// ICMP request packet. -TEST_F(TuntapTest, PingKernel) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); - +PosixErrorOr OpenAndAttachTap( + const std::string& dev_name, const std::string& dev_ipv4_addr) { // Interface creation. - FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR)); + ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, Open(kDevNetTun, O_RDWR)); struct ifreq ifr_set = {}; ifr_set.ifr_flags = IFF_TAP; - strncpy(ifr_set.ifr_name, kTapName, IFNAMSIZ); - EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr_set), - SyscallSucceedsWithValue(0)); + strncpy(ifr_set.ifr_name, dev_name.c_str(), IFNAMSIZ); + if (ioctl(fd.get(), TUNSETIFF, &ifr_set) < 0) { + return PosixError(errno); + } - absl::optional link = - ASSERT_NO_ERRNO_AND_VALUE(GetLinkByName(kTapName)); - ASSERT_TRUE(link.has_value()); + ASSIGN_OR_RETURN_ERRNO(absl::optional link, GetLinkByName(dev_name)); + if (!link.has_value()) { + return PosixError(ENOENT, "no link"); + } // Interface setup. struct in_addr addr; - inet_pton(AF_INET, "10.0.0.1", &addr); + inet_pton(AF_INET, dev_ipv4_addr.c_str(), &addr); EXPECT_NO_ERRNO(LinkAddLocalAddr(link->index, AF_INET, /*prefixlen=*/24, &addr, sizeof(addr))); if (!IsRunningOnGvisor()) { // FIXME: gVisor doesn't support setting MAC address on interfaces yet. - EXPECT_NO_ERRNO(LinkSetMacAddr(link->index, kMacA, sizeof(kMacA))); + RETURN_IF_ERRNO(LinkSetMacAddr(link->index, kMacA, sizeof(kMacA))); // FIXME: gVisor always creates enabled/up'd interfaces. - EXPECT_NO_ERRNO(LinkChangeFlags(link->index, IFF_UP, IFF_UP)); + RETURN_IF_ERRNO(LinkChangeFlags(link->index, IFF_UP, IFF_UP)); } + return fd; +} + +// This test sets up a TAP device and pings kernel by sending ICMP echo request. +// +// It works as the following: +// * Open /dev/net/tun, and create kTapName interface. +// * Use rtnetlink to do initial setup of the interface: +// * Assign IP address 10.0.0.1/24 to kernel. +// * MAC address: kMacA +// * Bring up the interface. +// * Send an ICMP echo reqest (ping) packet from 10.0.0.2 (kMacB) to kernel. +// * Loop to receive packets from TAP device/fd: +// * If packet is an ICMP echo reply, it stops and passes the test. +// * If packet is an ARP request, it responds with canned reply and resends +// the +// ICMP request packet. +TEST_F(TuntapTest, PingKernel) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTap(kTapName, "10.0.0.1")); ping_pkt ping_req = CreatePingPacket(kMacB, "10.0.0.2", kMacA, "10.0.0.1"); std::string arp_rep = CreateArpPacket(kMacB, "10.0.0.2", kMacA, "10.0.0.1"); @@ -349,5 +358,47 @@ TEST_F(TuntapTest, PingKernel) { } } +TEST_F(TuntapTest, SendUdpTriggersArpResolution) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTap(kTapName, "10.0.0.1")); + + // Send a UDP packet to remote. + int sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP); + ASSERT_THAT(sock, SyscallSucceeds()); + + struct sockaddr_in remote = {}; + remote.sin_family = AF_INET; + remote.sin_port = htons(42); + inet_pton(AF_INET, "10.0.0.2", &remote.sin_addr); + int ret = sendto(sock, "hello", 5, 0, reinterpret_cast(&remote), + sizeof(remote)); + ASSERT_THAT(ret, ::testing::AnyOf(SyscallSucceeds(), + SyscallFailsWithErrno(EHOSTDOWN))); + + struct inpkt { + union { + pihdr pi; + arp_pkt arp; + }; + }; + while (1) { + inpkt r = {}; + int n = read(fd.get(), &r, sizeof(r)); + EXPECT_THAT(n, SyscallSucceeds()); + + if (n < sizeof(pihdr)) { + std::cerr << "Ignored packet, protocol: " << r.pi.pi_protocol + << " len: " << n << std::endl; + continue; + } + + if (n >= sizeof(arp_pkt) && r.pi.pi_protocol == htons(ETH_P_ARP)) { + break; + } + } +} + } // namespace testing } // namespace gvisor -- cgit v1.2.3 From 3192e55ffe04b583ca4261ec0b04a6e566a6038b Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Tue, 17 Mar 2020 08:52:14 -0700 Subject: Packetimpact in Go with c++ stub PiperOrigin-RevId: 301382690 --- Dockerfile | 2 +- WORKSPACE | 35 ++ pkg/tcpip/header/tcp.go | 42 +- test/packetdrill/Dockerfile | 8 +- test/packetimpact/README.md | 531 ++++++++++++++++++++++ test/packetimpact/dut/BUILD | 18 + test/packetimpact/dut/posix_server.cc | 229 ++++++++++ test/packetimpact/proto/BUILD | 12 + test/packetimpact/proto/posix_server.proto | 150 ++++++ test/packetimpact/testbench/BUILD | 31 ++ test/packetimpact/testbench/connections.go | 245 ++++++++++ test/packetimpact/testbench/dut.go | 363 +++++++++++++++ test/packetimpact/testbench/dut_client.go | 28 ++ test/packetimpact/testbench/layers.go | 507 +++++++++++++++++++++ test/packetimpact/testbench/rawsockets.go | 151 ++++++ test/packetimpact/tests/BUILD | 21 + test/packetimpact/tests/Dockerfile | 5 + test/packetimpact/tests/defs.bzl | 106 +++++ test/packetimpact/tests/fin_wait2_timeout_test.go | 68 +++ test/packetimpact/tests/test_runner.sh | 246 ++++++++++ tools/bazeldefs/defs.bzl | 81 +++- tools/defs.bzl | 58 ++- 22 files changed, 2898 insertions(+), 39 deletions(-) create mode 100644 test/packetimpact/README.md create mode 100644 test/packetimpact/dut/BUILD create mode 100644 test/packetimpact/dut/posix_server.cc create mode 100644 test/packetimpact/proto/BUILD create mode 100644 test/packetimpact/proto/posix_server.proto create mode 100644 test/packetimpact/testbench/BUILD create mode 100644 test/packetimpact/testbench/connections.go create mode 100644 test/packetimpact/testbench/dut.go create mode 100644 test/packetimpact/testbench/dut_client.go create mode 100644 test/packetimpact/testbench/layers.go create mode 100644 test/packetimpact/testbench/rawsockets.go create mode 100644 test/packetimpact/tests/BUILD create mode 100644 test/packetimpact/tests/Dockerfile create mode 100644 test/packetimpact/tests/defs.bzl create mode 100644 test/packetimpact/tests/fin_wait2_timeout_test.go create mode 100755 test/packetimpact/tests/test_runner.sh (limited to 'test') diff --git a/Dockerfile b/Dockerfile index 2bfdfec6c..0fac71710 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM fedora:31 RUN dnf install -y dnf-plugins-core && dnf copr enable -y vbatts/bazel -RUN dnf install -y bazel2 git gcc make golang gcc-c++ glibc-devel python3 which python3-pip python3-devel libffi-devel openssl-devel pkg-config glibc-static +RUN dnf install -y bazel2 git gcc make golang gcc-c++ glibc-devel python3 which python3-pip python3-devel libffi-devel openssl-devel pkg-config glibc-static libstdc++-static patch RUN pip install pycparser diff --git a/WORKSPACE b/WORKSPACE index d2bbadc63..62dfb9dc6 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -161,6 +161,20 @@ load( _go_image_repos() +# Load C++ grpc rules. +http_archive( + name = "com_github_grpc_grpc", + sha256 = "2fcb7f1ab160d6fd3aaade64520be3e5446fc4c6fa7ba6581afdc4e26094bd81", + strip_prefix = "grpc-1.26.0", + urls = [ + "https://github.com/grpc/grpc/archive/v1.26.0.tar.gz", + ], +) +load("@com_github_grpc_grpc//bazel:grpc_deps.bzl", "grpc_deps") +grpc_deps() +load("@com_github_grpc_grpc//bazel:grpc_extra_deps.bzl", "grpc_extra_deps") +grpc_extra_deps() + # External repositories, in sorted order. go_repository( name = "com_github_cenkalti_backoff", @@ -204,6 +218,13 @@ go_repository( version = "v0.0.0-20171129191014-dec09d789f3d", ) +go_repository( + name = "com_github_imdario_mergo", + importpath = "github.com/imdario/mergo", + version = "v0.3.8", + sum = "h1:CGgOkSJeqMRmt0D9XLWExdT4m4F1vd3FV3VPt+0VxkQ=", +) + go_repository( name = "com_github_kr_pretty", importpath = "github.com/kr/pretty", @@ -225,6 +246,12 @@ go_repository( version = "v0.1.0", ) +go_repository( + name = "com_github_mohae_deepcopy", + importpath = "github.com/mohae/deepcopy", + commit = "c48cc78d482608239f6c4c92a4abd87eb8761c90", +) + go_repository( name = "com_github_opencontainers_runtime-spec", importpath = "github.com/opencontainers/runtime-spec", @@ -253,6 +280,14 @@ go_repository( version = "v0.0.0-20171111001504-be1fbeda1936", ) +go_repository( + name = "org_golang_google_grpc", + build_file_proto_mode = "disable", + importpath = "google.golang.org/grpc", + sum = "h1:zvIju4sqAGvwKspUQOhwnpcqSbzi7/H6QomNNjTL4sk=", + version = "v1.27.1", +) + go_repository( name = "in_gopkg_check_v1", importpath = "gopkg.in/check.v1", diff --git a/pkg/tcpip/header/tcp.go b/pkg/tcpip/header/tcp.go index 82cfe785c..13480687d 100644 --- a/pkg/tcpip/header/tcp.go +++ b/pkg/tcpip/header/tcp.go @@ -81,7 +81,8 @@ type TCPFields struct { // AckNum is the "acknowledgement number" field of a TCP packet. AckNum uint32 - // DataOffset is the "data offset" field of a TCP packet. + // DataOffset is the "data offset" field of a TCP packet. It is the length of + // the TCP header in bytes. DataOffset uint8 // Flags is the "flags" field of a TCP packet. @@ -213,7 +214,8 @@ func (b TCP) AckNumber() uint32 { return binary.BigEndian.Uint32(b[TCPAckNumOffset:]) } -// DataOffset returns the "data offset" field of the tcp header. +// DataOffset returns the "data offset" field of the tcp header. The return +// value is the length of the TCP header in bytes. func (b TCP) DataOffset() uint8 { return (b[TCPDataOffset] >> 4) * 4 } @@ -238,6 +240,11 @@ func (b TCP) Checksum() uint16 { return binary.BigEndian.Uint16(b[TCPChecksumOffset:]) } +// UrgentPointer returns the "urgent pointer" field of the tcp header. +func (b TCP) UrgentPointer() uint16 { + return binary.BigEndian.Uint16(b[TCPUrgentPtrOffset:]) +} + // SetSourcePort sets the "source port" field of the tcp header. func (b TCP) SetSourcePort(port uint16) { binary.BigEndian.PutUint16(b[TCPSrcPortOffset:], port) @@ -253,6 +260,37 @@ func (b TCP) SetChecksum(checksum uint16) { binary.BigEndian.PutUint16(b[TCPChecksumOffset:], checksum) } +// SetDataOffset sets the data offset field of the tcp header. headerLen should +// be the length of the TCP header in bytes. +func (b TCP) SetDataOffset(headerLen uint8) { + b[TCPDataOffset] = (headerLen / 4) << 4 +} + +// SetSequenceNumber sets the sequence number field of the tcp header. +func (b TCP) SetSequenceNumber(seqNum uint32) { + binary.BigEndian.PutUint32(b[TCPSeqNumOffset:], seqNum) +} + +// SetAckNumber sets the ack number field of the tcp header. +func (b TCP) SetAckNumber(ackNum uint32) { + binary.BigEndian.PutUint32(b[TCPAckNumOffset:], ackNum) +} + +// SetFlags sets the flags field of the tcp header. +func (b TCP) SetFlags(flags uint8) { + b[TCPFlagsOffset] = flags +} + +// SetWindowSize sets the window size field of the tcp header. +func (b TCP) SetWindowSize(rcvwnd uint16) { + binary.BigEndian.PutUint16(b[TCPWinSizeOffset:], rcvwnd) +} + +// SetUrgentPoiner sets the window size field of the tcp header. +func (b TCP) SetUrgentPoiner(urgentPointer uint16) { + binary.BigEndian.PutUint16(b[TCPUrgentPtrOffset:], urgentPointer) +} + // CalculateChecksum calculates the checksum of the tcp segment. // partialChecksum is the checksum of the network-layer pseudo-header // and the checksum of the segment data. diff --git a/test/packetdrill/Dockerfile b/test/packetdrill/Dockerfile index bd4451355..4b75e9527 100644 --- a/test/packetdrill/Dockerfile +++ b/test/packetdrill/Dockerfile @@ -1,9 +1,9 @@ FROM ubuntu:bionic -RUN apt-get update -RUN apt-get install -y net-tools git iptables iputils-ping netcat tcpdump jq tar +RUN apt-get update && apt-get install -y net-tools git iptables iputils-ping \ + netcat tcpdump jq tar bison flex make RUN hash -r RUN git clone --branch packetdrill-v2.0 \ https://github.com/google/packetdrill.git -RUN cd packetdrill/gtests/net/packetdrill && ./configure && \ - apt-get install -y bison flex make && make +RUN cd packetdrill/gtests/net/packetdrill && ./configure && make +CMD /bin/bash diff --git a/test/packetimpact/README.md b/test/packetimpact/README.md new file mode 100644 index 000000000..ece4dedc6 --- /dev/null +++ b/test/packetimpact/README.md @@ -0,0 +1,531 @@ +# Packetimpact + +## What is packetimpact? + +Packetimpact is a tool for platform-independent network testing. It is heavily +inspired by [packetdrill](https://github.com/google/packetdrill). It creates two +docker containers connected by a network. One is for the test bench, which +operates the test. The other is for the device-under-test (DUT), which is the +software being tested. The test bench communicates over the network with the DUT +to check correctness of the network. + +### Goals + +Packetimpact aims to provide: + +* A **multi-platform** solution that can test both Linux and gVisor. +* **Conciseness** on par with packetdrill scripts. +* **Control-flow** like for loops, conditionals, and variables. +* **Flexibilty** to specify every byte in a packet or use multiple sockets. + +## When to use packetimpact? + +There are a few ways to write networking tests for gVisor currently: + +* [Go unit tests](https://github.com/google/gvisor/tree/master/pkg/tcpip) +* [syscall tests](https://github.com/google/gvisor/tree/master/test/syscalls/linux) +* [packetdrill tests](https://github.com/google/gvisor/tree/master/test/packetdrill) +* packetimpact tests + +The right choice depends on the needs of the test. + +Feature | Go unit test | syscall test | packetdrill | packetimpact +------------- | ------------ | ------------ | ----------- | ------------ +Multiplatform | no | **YES** | **YES** | **YES** +Concise | no | somewhat | somewhat | **VERY** +Control-flow | **YES** | **YES** | no | **YES** +Flexible | **VERY** | no | somewhat | **VERY** + +### Go unit tests + +If the test depends on the internals of gVisor and doesn't need to run on Linux +or other platforms for comparison purposes, a Go unit test can be appropriate. +They can observe internals of gVisor networking. The downside is that they are +**not concise** and **not multiplatform**. If you require insight on gVisor +internals, this is the right choice. + +### Syscall tests + +Syscall tests are **multiplatform** but cannot examine the internals of gVisor +networking. They are **concise**. They can use **control-flow** structures like +conditionals, for loops, and variables. However, they are limited to only what +the POSIX interface provides so they are **not flexible**. For example, you +would have difficulty writing a syscall test that intentionally sends a bad IP +checksum. Or if you did write that test with raw sockets, it would be very +**verbose** to write a test that intentionally send wrong checksums, wrong +protocols, wrong sequence numbers, etc. + +### Packetdrill tests + +Packetdrill tests are **multiplatform** and can run against both Linux and +gVisor. They are **concise** and use a special packetdrill scripting language. +They are **more flexible** than a syscall test in that they can send packets +that a syscall test would have difficulty sending, like a packet with a +calcuated ACK number. But they are also somewhat limimted in flexibiilty in that +they can't do tests with multiple sockets. They have **no control-flow** ability +like variables or conditionals. For example, it isn't possible to send a packet +that depends on the window size of a previous packet because the packetdrill +language can't express that. Nor could you branch based on whether or not the +other side supports window scaling, for example. + +### Packetimpact tests + +Packetimpact tests are similar to Packetdrill tests except that they are written +in Go instead of the packetdrill scripting language. That gives them all the +**control-flow** abilities of Go (loops, functions, variables, etc). They are +**multiplatform** in the same way as packetdrill tests but even more +**flexible** because Go is more expressive than the scripting language of +packetdrill. However, Go is **not as concise** as the packetdrill language. Many +design decisions below are made to mitigate that. + +## How it works + +``` + +--------------+ +--------------+ + | | TEST NET | | + | | <===========> | Device | + | Test | | Under | + | Bench | | Test | + | | <===========> | (DUT) | + | | CONTROL NET | | + +--------------+ +--------------+ +``` + +Two docker containers are created by a script, one for the test bench and the +other for the device under test (DUT). The script connects the two containers +with a control network and test network. It also does some other tasks like +waiting until the DUT is ready before starting the test and disabling Linux +networking that would interfere with the test bench. + +### DUT + +The DUT container runs a program called the "posix_server". The posix_server is +written in c++ for maximum portability. It is compiled on the host. The script +that starts the containers copies it into the DUT's container and runs it. It's +job is to receive directions from the test bench on what actions to take. For +this, the posix_server does three steps in a loop: + +1. Listen for a request from the test bench. +2. Execute a command. +3. Send the response back to the test bench. + +The requests and responses are +[protobufs](https://developers.google.com/protocol-buffers) and the +communication is done with [gRPC](https://grpc.io/). The commands run are +[POSIX socket commands](https://en.wikipedia.org/wiki/Berkeley_sockets#Socket_API_functions), +with the inputs and outputs converted into protobuf requests and responses. All +communication is on the control network, so that the test network is unaffected +by extra packets. + +For example, this is the request and response pair to call +[`socket()`](http://man7.org/linux/man-pages/man2/socket.2.html): + +```protocol-buffer +message SocketRequest { + int32 domain = 1; + int32 type = 2; + int32 protocol = 3; +} + +message SocketResponse { + int32 fd = 1; + int32 errno_ = 2; +} +``` + +##### Alternatives considered + +* We could have use JSON for communication instead. It would have been a + lighter-touch than protobuf but protobuf handles all the data type and has + strict typing to prevent a class of errors. The test bench could be written + in other languages, too. +* Instead of mimicking the POSIX interfaces, arguments could have had a more + natural form, like the `bind()` getting a string IP address instead of bytes + in a `sockaddr_t`. However, conforming to the existing structures keeps more + of the complexity in Go and keeps the posix_server simpler and thus more + likely to compile everywhere. + +### Test Bench + +The test bench does most of the work in a test. It is a Go program that compiles +on the host and is copied by the script into test bench's container. It is a +regular [go unit test](https://golang.org/pkg/testing/) that imports the test +bench framework. The test bench framwork is based on three basic utilities: + +* Commanding the DUT to run POSIX commands and return responses. +* Sending raw packets to the DUT on the test network. +* Listening for raw packets from the DUT on the test network. + +#### DUT commands + +To keep the interface to the DUT consistent and easy-to-use, each POSIX command +supported by the posix_server is wrapped in functions with signatures similar to +the ones in the [Go unix package](https://godoc.org/golang.org/x/sys/unix). This +way all the details of endianess and (un)marshalling of go structs such as +[unix.Timeval](https://godoc.org/golang.org/x/sys/unix#Timeval) is handled in +one place. This also makes it straight-forward to convert tests that use `unix.` +or `syscall.` calls to `dut.` calls. + +For example, creating a connection to the DUT and commanding it to make a socket +looks like this: + +```go +dut := testbench.NewDut(t) +fd, err := dut.SocketWithErrno(unix.AF_INET, unix.SOCK_STREAM, unix.IPPROTO_IP) +if fd < 0 { + t.Fatalf(...) +} +``` + +Because the usual case is to fail the test when the DUT fails to create a +socket, there is a concise version of each of the `...WithErrno` functions that +does that: + +```go +dut := testbench.NewDut(t) +fd := dut.Socket(unix.AF_INET, unix.SOCK_STREAM, unix.IPPROTO_IP) +``` + +The DUT and other structs in the code store a `*testing.T` so that they can +provide versions of functions that call `t.Fatalf(...)`. This helps keep tests +concise. + +##### Alternatives considered + +* Instead of mimicking the `unix.` go interface, we could have invented a more + natural one, like using `float64` instead of `Timeval`. However, using the + same function signatures that `unix.` has makes it easier to convert code to + `dut.`. Also, using an existing interface ensures that we don't invent an + interface that isn't extensible. For example, if we invented a function for + `bind()` that didn't support IPv6 and later we had to add a second `bind6()` + function. + +#### Sending/Receiving Raw Packets + +The framework wraps POSIX sockets for sending and receiving raw frames. Both +send and receive are synchronous commands. +[SO_RCVTIMEO](http://man7.org/linux/man-pages/man7/socket.7.html) is used to set +a timeout on the receive commands. For ease of use, these are wrapped in an +`Injector` and a `Sniffer`. They have functions: + +```go +func (s *Sniffer) Recv(timeout time.Duration) []byte {...} +func (i *Injector) Send(b []byte) {...} +``` + +##### Alternatives considered + +* [gopacket](https://github.com/google/gopacket) pcap has raw socket support + but requires cgo. cgo is not guaranteed to be portable from the host to the + container and in practice, the container doesn't recognize binaries built on + the host if they use cgo. +* Both gVisor and gopacket have the ability to read and write pcap files + without cgo but that is insufficient here. +* The sniffer and injector can't share a socket because they need to be bound + differently. +* Sniffing could have been done asynchronously with channels, obviating the + need for `SO_RCVTIMEO`. But that would introduce asynchronous complication. + `SO_RCVTIMEO` is well supported on the test bench. + +#### `Layer` struct + +A large part of packetimpact tests is creating packets to send and comparing +received packets against expectations. To keep tests concise, it is useful to be +able to specify just the important parts of packets that need to be set. For +example, sending a packet with default values except for TCP Flags. And for +packets received, it's useful to be able to compare just the necessary parts of +received packets and ignore the rest. + +To aid in both of those, Go structs with optional fields are created for each +encapsulation type, such as IPv4, TCP, and Ethernet. This is inspired by +[scapy](https://scapy.readthedocs.io/en/latest/). For example, here is the +struct for Ethernet: + +```go +type Ether struct { + LayerBase + SrcAddr *tcpip.LinkAddress + DstAddr *tcpip.LinkAddress + Type *tcpip.NetworkProtocolNumber +} +``` + +Each struct has the same fields as those in the +[gVisor headers](https://github.com/google/gvisor/tree/master/pkg/tcpip/header) +but with a pointer for each field that may be `nil`. + +##### Alternatives considered + +* Just use []byte like gVisor headers do. The drawback is that it makes the + tests more verbose. + * For example, there would be no way to call `Send(myBytes)` concisely and + indicate if the checksum should be calculated automatically versus + overridden. The only way would be to add lines to the test to calculate + it before each Send, which is wordy. Or make multiple versions of Send: + one that checksums IP, one that doesn't, one that checksums TCP, one + that does both, etc. That would be many combinations. + * Filtering inputs would become verbose. Either: + * large conditionals that need to be repeated many places: + `h[FlagOffset] == SYN && h[LengthOffset:LengthOffset+2] == ...` or + * Many functions, one per field, like: `filterByFlag(myBytes, SYN)`, + `filterByLength(myBytes, 20)`, `filterByNextProto(myBytes, 0x8000)`, + etc. + * Using pointers allows us to combine `Layer`s with a one-line call to + `mergo.Merge(...)`. So the default `Layers` can be overridden by a + `Layers` with just the TCP conection's src/dst which can be overridden + by one with just a test specific TCP window size. Each override is + specified as just one call to `mergo.Merge`. + * It's a proven way to separate the details of a packet from the byte + format as shown by scapy's success. +* Use packetgo. It's more general than parsing packets with gVisor. However: + * packetgo doesn't have optional fields so many of the above problems + still apply. + * It would be yet another dependency. + * It's not as well known to engineers that are already writing gVisor + code. + * It might be a good candidate for replacing the parsing of packets into + `Layer`s if all that parsing turns out to be more work than parsing by + packetgo and converting *that* to `Layer`. packetgo has easier to use + getters for the layers. This could be done later in a way that doesn't + break tests. + +#### `Layer` methods + +The `Layer` structs provide a way to partially specify an encapsulation. They +also need methods for using those partially specified encapsulation, for example +to marshal them to bytes or compare them. For those, each encapsulation +implements the `Layer` interface: + +```go +// Layer is the interface that all encapsulations must implement. +// +// A Layer is an encapsulation in a packet, such as TCP, IPv4, IPv6, etc. A +// Layer contains all the fields of the encapsulation. Each field is a pointer +// and may be nil. +type Layer interface { + // toBytes converts the Layer into bytes. In places where the Layer's field + // isn't nil, the value that is pointed to is used. When the field is nil, a + // reasonable default for the Layer is used. For example, "64" for IPv4 TTL + // and a calculated checksum for TCP or IP. Some layers require information + // from the previous or next layers in order to compute a default, such as + // TCP's checksum or Ethernet's type, so each Layer has a doubly-linked list + // to the layer's neighbors. + toBytes() ([]byte, error) + + // match checks if the current Layer matches the provided Layer. If either + // Layer has a nil in a given field, that field is considered matching. + // Otherwise, the values pointed to by the fields must match. + match(Layer) bool + + // length in bytes of the current encapsulation + length() int + + // next gets a pointer to the encapsulated Layer. + next() Layer + + // prev gets a pointer to the Layer encapsulating this one. + prev() Layer + + // setNext sets the pointer to the encapsulated Layer. + setNext(Layer) + + // setPrev sets the pointer to the Layer encapsulating this one. + setPrev(Layer) +} +``` + +For each `Layer` there is also a parsing function. For example, this one is for +Ethernet: + +``` +func ParseEther(b []byte) (Layers, error) +``` + +The parsing function converts bytes received on the wire into a `Layer` +(actually `Layers`, see below) which has no `nil`s in it. By using +`match(Layer)` to compare against another `Layer` that *does* have `nil`s in it, +the received bytes can be partially compared. The `nil`s behave as +"don't-cares". + +##### Alternatives considered + +* Matching against `[]byte` instead of converting to `Layer` first. + * The downside is that it precludes the use of a `cmp.Equal` one-liner to + do comparisons. + * It creates confusion in the code to deal with both representations at + different times. For example, is the checksum calculated on `[]byte` or + `Layer` when sending? What about when checking received packets? + +#### `Layers` + +``` +type Layers []Layer + +func (ls *Layers) match(other Layers) bool {...} +func (ls *Layers) toBytes() ([]byte, error) {...} +``` + +`Layers` is an array of `Layer`. It represents a stack of encapsulations, such +as `Layers{Ether{},IPv4{},TCP{},Payload{}}`. It also has `toBytes()` and +`match(Layers)`, like `Layer`. The parse functions above actually return +`Layers` and not `Layer` because they know about the headers below and +sequentially call each parser on the remaining, encapsulated bytes. + +All this leads to the ability to write concise packet processing. For example: + +```go +etherType := 0x8000 +flags = uint8(header.TCPFlagSyn|header.TCPFlagAck) +toMatch := Layers{Ether{Type: ðerType}, IPv4{}, TCP{Flags: &flags}} +for { + recvBytes := sniffer.Recv(time.Second) + if recvBytes == nil { + println("Got no packet for 1 second") + } + gotPacket, err := ParseEther(recvBytes) + if err == nil && toMatch.match(gotPacket) { + println("Got a TCP/IPv4/Eth packet with SYNACK") + } +} +``` + +##### Alternatives considered + +* Don't use previous and next pointers. + * Each layer may need to be able to interrogate the layers aroung it, like + for computing the next protocol number or total length. So *some* + mechanism is needed for a `Layer` to see neighboring layers. + * We could pass the entire array `Layers` to the `toBytes()` function. + Passing an array to a method that includes in the array the function + receiver itself seems wrong. + +#### Connections + +Using `Layers` above, we can create connection structures to maintain state +about connections. For example, here is the `TCPIPv4` struct: + +``` +type TCPIPv4 struct { + outgoing Layers + incoming Layers + localSeqNum uint32 + remoteSeqNum uint32 + sniffer Sniffer + injector Injector + t *testing.T +} +``` + +`TCPIPv4` contains an `outgoing Layers` which holds the defaults for the +connection, such as the source and destination MACs, IPs, and ports. When +`outgoing.toBytes()` is called a valid packet for this TCPIPv4 flow is built. + +It also contains `incoming Layers` which holds filter for incoming packets that +belong to this flow. `incoming.match(Layers)` is used on received bytes to check +if they are part of the flow. + +The `sniffer` and `injector` are for receiving and sending raw packet bytes. The +`localSeqNum` and `remoteSeqNum` are updated by `Send` and `Recv` so that +outgoing packets will have, by default, the correct sequence number and ack +number. + +TCPIPv4 provides some functions: + +``` +func (conn *TCPIPv4) Send(tcp TCP) {...} +func (conn *TCPIPv4) Recv(timeout time.Duration) *TCP {...} +``` + +`Send(tcp TCP)` uses [mergo](https://github.com/imdario/mergo) to merge the +provided `TCP` (a `Layer`) into `outgoing`. This way the user can specify +concisely just which fields of `outgoing` to modify. The packet is sent using +the `injector`. + +`Recv(timeout time.Duration)` reads packets from the sniffer until either the +timeout has elapsed or a packet that matches `incoming` arrives. + +Using those, we can perform a TCP 3-way handshake without too much code: + +```go +func (conn *TCPIPv4) Handshake() { + syn := uint8(header.TCPFlagSyn) + synack := uint8(header.TCPFlagSyn) + ack := uint8(header.TCPFlagAck) + conn.Send(TCP{Flags: &syn}) // Send a packet with all defaults but set TCP-SYN. + + // Wait for the SYN-ACK response. + for { + newTCP := conn.Recv(time.Second) // This already filters by MAC, IP, and ports. + if TCP{Flags: &synack}.match(newTCP) { + break // Only if it's a SYN-ACK proceed. + } + } + + conn.Send(TCP{Flags: &ack}) // Send an ACK. The seq and ack numbers are set correctly. +} +``` + +The handshake code is part of the testbench utilities so tests can share this +common sequence, making tests even more concise. + +##### Alternatives considered + +* Instead of storing `outgoing` and `incoming`, store values. + * There would be many more things to store instead, like `localMac`, + `remoteMac`, `localIP`, `remoteIP`, `localPort`, and `remotePort`. + * Construction of a packet would be many lines to copy each of these + values into a `[]byte`. And there would be slight variations needed for + each encapsulation stack, like TCPIPv6 and ARP. + * Filtering incoming packets would be a long sequence: + * Compare the MACs, then + * Parse the next header, then + * Compare the IPs, then + * Parse the next header, then + * Compare the TCP ports. Instead it's all just one call to + `cmp.Equal(...)`, for all sequences. + * A TCPIPv6 connection could share most of the code. Only the type of the + IP addresses are different. The types of `outgoing` and `incoming` would + be remain `Layers`. + * An ARP connection could share all the Ethernet parts. The IP `Layer` + could be factored out of `outgoing`. After that, the IPv4 and IPv6 + connections could implement one interface and a single TCP struct could + have either network protocol through composition. + +## Putting it all together + +Here's what te start of a packetimpact unit test looks like. This test creates a +TCP connection with the DUT. There are added comments for explanation in this +document but a real test might not include them in order to stay even more +concise. + +```go +func TestMyTcpTest(t *testing.T) { + // Prepare a DUT for communication. + dut := testbench.NewDUT(t) + + // This does: + // dut.Socket() + // dut.Bind() + // dut.Getsockname() to learn the new port number + // dut.Listen() + listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(listenFD) // Tell the DUT to close the socket at the end of the test. + + // Monitor a new TCP connection with sniffer, injector, sequence number tracking, + // and reasonable outgoing and incoming packet field default IPs, MACs, and port numbers. + conn := testbench.NewTCPIPv4(t, dut, remotePort) + + // Perform a 3-way handshake: send SYN, expect SYNACK, send ACK. + conn.Handshake() + + // Tell the DUT to accept the new connection. + acceptFD := dut.Accept(acceptFd) +} +``` + +## Other notes + +* The time between receiving a SYN-ACK and replying with an ACK in `Handshake` + is about 3ms. This is much slower than the native unix response, which is + about 0.3ms. Packetdrill gets closer to 0.3ms. For tests where timing is + crucial, packetdrill is faster and more precise. diff --git a/test/packetimpact/dut/BUILD b/test/packetimpact/dut/BUILD new file mode 100644 index 000000000..3ce63c2c6 --- /dev/null +++ b/test/packetimpact/dut/BUILD @@ -0,0 +1,18 @@ +load("//tools:defs.bzl", "cc_binary", "grpcpp") + +package( + default_visibility = ["//test/packetimpact:__subpackages__"], + licenses = ["notice"], +) + +cc_binary( + name = "posix_server", + srcs = ["posix_server.cc"], + linkstatic = 1, + static = True, # This is needed for running in a docker container. + deps = [ + grpcpp, + "//test/packetimpact/proto:posix_server_cc_grpc_proto", + "//test/packetimpact/proto:posix_server_cc_proto", + ], +) diff --git a/test/packetimpact/dut/posix_server.cc b/test/packetimpact/dut/posix_server.cc new file mode 100644 index 000000000..2f10dda40 --- /dev/null +++ b/test/packetimpact/dut/posix_server.cc @@ -0,0 +1,229 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at // +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "arpa/inet.h" +#include "include/grpcpp/security/server_credentials.h" +#include "include/grpcpp/server_builder.h" +#include "test/packetimpact/proto/posix_server.grpc.pb.h" +#include "test/packetimpact/proto/posix_server.pb.h" + +// Converts a sockaddr_storage to a Sockaddr message. +::grpc::Status sockaddr_to_proto(const sockaddr_storage &addr, + socklen_t addrlen, + posix_server::Sockaddr *sockaddr_proto) { + switch (addr.ss_family) { + case AF_INET: { + auto addr_in = reinterpret_cast(&addr); + auto response_in = sockaddr_proto->mutable_in(); + response_in->set_family(addr_in->sin_family); + response_in->set_port(ntohs(addr_in->sin_port)); + response_in->mutable_addr()->assign( + reinterpret_cast(&addr_in->sin_addr.s_addr), 4); + return ::grpc::Status::OK; + } + case AF_INET6: { + auto addr_in6 = reinterpret_cast(&addr); + auto response_in6 = sockaddr_proto->mutable_in6(); + response_in6->set_family(addr_in6->sin6_family); + response_in6->set_port(ntohs(addr_in6->sin6_port)); + response_in6->set_flowinfo(ntohl(addr_in6->sin6_flowinfo)); + response_in6->mutable_addr()->assign( + reinterpret_cast(&addr_in6->sin6_addr.s6_addr), 16); + response_in6->set_scope_id(ntohl(addr_in6->sin6_scope_id)); + return ::grpc::Status::OK; + } + } + return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, "Unknown Sockaddr"); +} + +class PosixImpl final : public posix_server::Posix::Service { + ::grpc::Status Socket(grpc_impl::ServerContext *context, + const ::posix_server::SocketRequest *request, + ::posix_server::SocketResponse *response) override { + response->set_fd( + socket(request->domain(), request->type(), request->protocol())); + response->set_errno_(errno); + return ::grpc::Status::OK; + } + + ::grpc::Status Bind(grpc_impl::ServerContext *context, + const ::posix_server::BindRequest *request, + ::posix_server::BindResponse *response) override { + if (!request->has_addr()) { + return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, + "Missing address"); + } + sockaddr_storage addr; + + switch (request->addr().sockaddr_case()) { + case posix_server::Sockaddr::SockaddrCase::kIn: { + auto request_in = request->addr().in(); + if (request_in.addr().size() != 4) { + return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, + "IPv4 address must be 4 bytes"); + } + auto addr_in = reinterpret_cast(&addr); + addr_in->sin_family = request_in.family(); + addr_in->sin_port = htons(request_in.port()); + request_in.addr().copy( + reinterpret_cast(&addr_in->sin_addr.s_addr), 4); + break; + } + case posix_server::Sockaddr::SockaddrCase::kIn6: { + auto request_in6 = request->addr().in6(); + if (request_in6.addr().size() != 16) { + return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, + "IPv6 address must be 16 bytes"); + } + auto addr_in6 = reinterpret_cast(&addr); + addr_in6->sin6_family = request_in6.family(); + addr_in6->sin6_port = htons(request_in6.port()); + addr_in6->sin6_flowinfo = htonl(request_in6.flowinfo()); + request_in6.addr().copy( + reinterpret_cast(&addr_in6->sin6_addr.s6_addr), 16); + addr_in6->sin6_scope_id = htonl(request_in6.scope_id()); + break; + } + case posix_server::Sockaddr::SockaddrCase::SOCKADDR_NOT_SET: + default: + return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, + "Unknown Sockaddr"); + } + response->set_ret(bind(request->sockfd(), + reinterpret_cast(&addr), sizeof(addr))); + response->set_errno_(errno); + return ::grpc::Status::OK; + } + + ::grpc::Status GetSockName( + grpc_impl::ServerContext *context, + const ::posix_server::GetSockNameRequest *request, + ::posix_server::GetSockNameResponse *response) override { + sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + response->set_ret(getsockname( + request->sockfd(), reinterpret_cast(&addr), &addrlen)); + response->set_errno_(errno); + return sockaddr_to_proto(addr, addrlen, response->mutable_addr()); + } + + ::grpc::Status Listen(grpc_impl::ServerContext *context, + const ::posix_server::ListenRequest *request, + ::posix_server::ListenResponse *response) override { + response->set_ret(listen(request->sockfd(), request->backlog())); + response->set_errno_(errno); + return ::grpc::Status::OK; + } + + ::grpc::Status Accept(grpc_impl::ServerContext *context, + const ::posix_server::AcceptRequest *request, + ::posix_server::AcceptResponse *response) override { + sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + response->set_fd(accept(request->sockfd(), + reinterpret_cast(&addr), &addrlen)); + response->set_errno_(errno); + return sockaddr_to_proto(addr, addrlen, response->mutable_addr()); + } + + ::grpc::Status SetSockOpt( + grpc_impl::ServerContext *context, + const ::posix_server::SetSockOptRequest *request, + ::posix_server::SetSockOptResponse *response) override { + response->set_ret(setsockopt(request->sockfd(), request->level(), + request->optname(), request->optval().c_str(), + request->optval().size())); + response->set_errno_(errno); + return ::grpc::Status::OK; + } + + ::grpc::Status SetSockOptTimeval( + ::grpc::ServerContext *context, + const ::posix_server::SetSockOptTimevalRequest *request, + ::posix_server::SetSockOptTimevalResponse *response) override { + timeval tv = {.tv_sec = static_cast<__time_t>(request->timeval().seconds()), + .tv_usec = static_cast<__suseconds_t>( + request->timeval().microseconds())}; + response->set_ret(setsockopt(request->sockfd(), request->level(), + request->optname(), &tv, sizeof(tv))); + response->set_errno_(errno); + return ::grpc::Status::OK; + } + + ::grpc::Status Close(grpc_impl::ServerContext *context, + const ::posix_server::CloseRequest *request, + ::posix_server::CloseResponse *response) override { + response->set_ret(close(request->fd())); + response->set_errno_(errno); + return ::grpc::Status::OK; + } +}; + +// Parse command line options. Returns a pointer to the first argument beyond +// the options. +void parse_command_line_options(int argc, char *argv[], std::string *ip, + int *port) { + static struct option options[] = {{"ip", required_argument, NULL, 1}, + {"port", required_argument, NULL, 2}, + {0, 0, 0, 0}}; + + // Parse the arguments. + int c; + while ((c = getopt_long(argc, argv, "", options, NULL)) > 0) { + if (c == 1) { + *ip = optarg; + } else if (c == 2) { + *port = std::stoi(std::string(optarg)); + } + } +} + +void run_server(const std::string &ip, int port) { + PosixImpl posix_service; + grpc::ServerBuilder builder; + std::string server_address = ip + ":" + std::to_string(port); + // Set the authentication mechanism. + std::shared_ptr creds = + grpc::InsecureServerCredentials(); + builder.AddListeningPort(server_address, creds); + builder.RegisterService(&posix_service); + + std::unique_ptr server(builder.BuildAndStart()); + std::cerr << "Server listening on " << server_address << std::endl; + server->Wait(); + std::cerr << "posix_server is finished." << std::endl; +} + +int main(int argc, char *argv[]) { + std::cerr << "posix_server is starting." << std::endl; + std::string ip; + int port; + parse_command_line_options(argc, argv, &ip, &port); + + std::cerr << "Got IP " << ip << " and port " << port << "." << std::endl; + run_server(ip, port); +} diff --git a/test/packetimpact/proto/BUILD b/test/packetimpact/proto/BUILD new file mode 100644 index 000000000..4a4370f42 --- /dev/null +++ b/test/packetimpact/proto/BUILD @@ -0,0 +1,12 @@ +load("//tools:defs.bzl", "proto_library") + +package( + default_visibility = ["//test/packetimpact:__subpackages__"], + licenses = ["notice"], +) + +proto_library( + name = "posix_server", + srcs = ["posix_server.proto"], + has_services = 1, +) diff --git a/test/packetimpact/proto/posix_server.proto b/test/packetimpact/proto/posix_server.proto new file mode 100644 index 000000000..026876fc2 --- /dev/null +++ b/test/packetimpact/proto/posix_server.proto @@ -0,0 +1,150 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package posix_server; + +message SocketRequest { + int32 domain = 1; + int32 type = 2; + int32 protocol = 3; +} + +message SocketResponse { + int32 fd = 1; + int32 errno_ = 2; +} + +message SockaddrIn { + int32 family = 1; + uint32 port = 2; + bytes addr = 3; +} + +message SockaddrIn6 { + uint32 family = 1; + uint32 port = 2; + uint32 flowinfo = 3; + bytes addr = 4; + uint32 scope_id = 5; +} + +message Sockaddr { + oneof sockaddr { + SockaddrIn in = 1; + SockaddrIn6 in6 = 2; + } +} + +message BindRequest { + int32 sockfd = 1; + Sockaddr addr = 2; +} + +message BindResponse { + int32 ret = 1; + int32 errno_ = 2; +} + +message GetSockNameRequest { + int32 sockfd = 1; +} + +message GetSockNameResponse { + int32 ret = 1; + int32 errno_ = 2; + Sockaddr addr = 3; +} + +message ListenRequest { + int32 sockfd = 1; + int32 backlog = 2; +} + +message ListenResponse { + int32 ret = 1; + int32 errno_ = 2; +} + +message AcceptRequest { + int32 sockfd = 1; +} + +message AcceptResponse { + int32 fd = 1; + int32 errno_ = 2; + Sockaddr addr = 3; +} + +message SetSockOptRequest { + int32 sockfd = 1; + int32 level = 2; + int32 optname = 3; + bytes optval = 4; +} + +message SetSockOptResponse { + int32 ret = 1; + int32 errno_ = 2; +} + +message Timeval { + int64 seconds = 1; + int64 microseconds = 2; +} + +message SetSockOptTimevalRequest { + int32 sockfd = 1; + int32 level = 2; + int32 optname = 3; + Timeval timeval = 4; +} + +message SetSockOptTimevalResponse { + int32 ret = 1; + int32 errno_ = 2; +} + +message CloseRequest { + int32 fd = 1; +} + +message CloseResponse { + int32 ret = 1; + int32 errno_ = 2; +} + +service Posix { + // Call socket() on the DUT. + rpc Socket(SocketRequest) returns (SocketResponse); + // Call bind() on the DUT. + rpc Bind(BindRequest) returns (BindResponse); + // Call getsockname() on the DUT. + rpc GetSockName(GetSockNameRequest) returns (GetSockNameResponse); + // Call listen() on the DUT. + rpc Listen(ListenRequest) returns (ListenResponse); + // Call accept() on the DUT. + rpc Accept(AcceptRequest) returns (AcceptResponse); + // Call setsockopt() on the DUT. You should prefer one of the other + // SetSockOpt* functions with a more structured optval or else you may get the + // encoding wrong, such as making a bad assumption about the server's word + // sizes or endianness. + rpc SetSockOpt(SetSockOptRequest) returns (SetSockOptResponse); + // Call setsockopt() on the DUT with a Timeval optval. + rpc SetSockOptTimeval(SetSockOptTimevalRequest) + returns (SetSockOptTimevalResponse); + // Call close() on the DUT. + rpc Close(CloseRequest) returns (CloseResponse); +} diff --git a/test/packetimpact/testbench/BUILD b/test/packetimpact/testbench/BUILD new file mode 100644 index 000000000..a34c81fcc --- /dev/null +++ b/test/packetimpact/testbench/BUILD @@ -0,0 +1,31 @@ +load("//tools:defs.bzl", "go_library") + +package( + default_visibility = ["//test/packetimpact:__subpackages__"], + licenses = ["notice"], +) + +go_library( + name = "testbench", + srcs = [ + "connections.go", + "dut.go", + "dut_client.go", + "layers.go", + "rawsockets.go", + ], + deps = [ + "//pkg/tcpip", + "//pkg/tcpip/header", + "//pkg/tcpip/seqnum", + "//pkg/usermem", + "//test/packetimpact/proto:posix_server_go_proto", + "@com_github_google_go-cmp//cmp:go_default_library", + "@com_github_google_go-cmp//cmp/cmpopts:go_default_library", + "@com_github_imdario_mergo//:go_default_library", + "@com_github_mohae_deepcopy//:go_default_library", + "@org_golang_google_grpc//:go_default_library", + "@org_golang_google_grpc//keepalive:go_default_library", + "@org_golang_x_sys//unix:go_default_library", + ], +) diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go new file mode 100644 index 000000000..b7aa63934 --- /dev/null +++ b/test/packetimpact/testbench/connections.go @@ -0,0 +1,245 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package testbench has utilities to send and receive packets and also command +// the DUT to run POSIX functions. +package testbench + +import ( + "flag" + "fmt" + "math/rand" + "net" + "testing" + "time" + + "github.com/mohae/deepcopy" + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" +) + +var localIPv4 = flag.String("local_ipv4", "", "local IPv4 address for test packets") +var remoteIPv4 = flag.String("remote_ipv4", "", "remote IPv4 address for test packets") +var localMAC = flag.String("local_mac", "", "local mac address for test packets") +var remoteMAC = flag.String("remote_mac", "", "remote mac address for test packets") + +// TCPIPv4 maintains state about a TCP/IPv4 connection. +type TCPIPv4 struct { + outgoing Layers + incoming Layers + LocalSeqNum seqnum.Value + RemoteSeqNum seqnum.Value + SynAck *TCP + sniffer Sniffer + injector Injector + portPickerFD int + t *testing.T +} + +// pickPort makes a new socket and returns the socket FD and port. The caller +// must close the FD when done with the port if there is no error. +func pickPort() (int, uint16, error) { + fd, err := unix.Socket(unix.AF_INET, unix.SOCK_STREAM, 0) + if err != nil { + return -1, 0, err + } + var sa unix.SockaddrInet4 + copy(sa.Addr[0:4], net.ParseIP(*localIPv4).To4()) + if err := unix.Bind(fd, &sa); err != nil { + unix.Close(fd) + return -1, 0, err + } + newSockAddr, err := unix.Getsockname(fd) + if err != nil { + unix.Close(fd) + return -1, 0, err + } + newSockAddrInet4, ok := newSockAddr.(*unix.SockaddrInet4) + if !ok { + unix.Close(fd) + return -1, 0, fmt.Errorf("can't cast Getsockname result to SockaddrInet4") + } + return fd, uint16(newSockAddrInet4.Port), nil +} + +// tcpLayerIndex is the position of the TCP layer in the TCPIPv4 connection. It +// is the third, after Ethernet and IPv4. +const tcpLayerIndex int = 2 + +// NewTCPIPv4 creates a new TCPIPv4 connection with reasonable defaults. +func NewTCPIPv4(t *testing.T, dut DUT, outgoingTCP, incomingTCP TCP) TCPIPv4 { + lMAC, err := tcpip.ParseMACAddress(*localMAC) + if err != nil { + t.Fatalf("can't parse localMAC %q: %s", *localMAC, err) + } + + rMAC, err := tcpip.ParseMACAddress(*remoteMAC) + if err != nil { + t.Fatalf("can't parse remoteMAC %q: %s", *remoteMAC, err) + } + + portPickerFD, localPort, err := pickPort() + if err != nil { + t.Fatalf("can't pick a port: %s", err) + } + lIP := tcpip.Address(net.ParseIP(*localIPv4).To4()) + rIP := tcpip.Address(net.ParseIP(*remoteIPv4).To4()) + + sniffer, err := NewSniffer(t) + if err != nil { + t.Fatalf("can't make new sniffer: %s", err) + } + + injector, err := NewInjector(t) + if err != nil { + t.Fatalf("can't make new injector: %s", err) + } + + newOutgoingTCP := &TCP{ + DataOffset: Uint8(header.TCPMinimumSize), + WindowSize: Uint16(32768), + SrcPort: &localPort, + } + if err := newOutgoingTCP.merge(outgoingTCP); err != nil { + t.Fatalf("can't merge %v into %v: %s", outgoingTCP, newOutgoingTCP, err) + } + newIncomingTCP := &TCP{ + DstPort: &localPort, + } + if err := newIncomingTCP.merge(incomingTCP); err != nil { + t.Fatalf("can't merge %v into %v: %s", incomingTCP, newIncomingTCP, err) + } + return TCPIPv4{ + outgoing: Layers{ + &Ether{SrcAddr: &lMAC, DstAddr: &rMAC}, + &IPv4{SrcAddr: &lIP, DstAddr: &rIP}, + newOutgoingTCP}, + incoming: Layers{ + &Ether{SrcAddr: &rMAC, DstAddr: &lMAC}, + &IPv4{SrcAddr: &rIP, DstAddr: &lIP}, + newIncomingTCP}, + sniffer: sniffer, + injector: injector, + portPickerFD: portPickerFD, + t: t, + LocalSeqNum: seqnum.Value(rand.Uint32()), + } +} + +// Close the injector and sniffer associated with this connection. +func (conn *TCPIPv4) Close() { + conn.sniffer.Close() + conn.injector.Close() + if err := unix.Close(conn.portPickerFD); err != nil { + conn.t.Fatalf("can't close portPickerFD: %s", err) + } + conn.portPickerFD = -1 +} + +// Send a packet with reasonable defaults and override some fields by tcp. +func (conn *TCPIPv4) Send(tcp TCP, additionalLayers ...Layer) { + if tcp.SeqNum == nil { + tcp.SeqNum = Uint32(uint32(conn.LocalSeqNum)) + } + if tcp.AckNum == nil { + tcp.AckNum = Uint32(uint32(conn.RemoteSeqNum)) + } + layersToSend := deepcopy.Copy(conn.outgoing).(Layers) + if err := layersToSend[tcpLayerIndex].(*TCP).merge(tcp); err != nil { + conn.t.Fatalf("can't merge %v into %v: %s", tcp, layersToSend[tcpLayerIndex], err) + } + layersToSend = append(layersToSend, additionalLayers...) + outBytes, err := layersToSend.toBytes() + if err != nil { + conn.t.Fatalf("can't build outgoing TCP packet: %s", err) + } + conn.injector.Send(outBytes) + + // Compute the next TCP sequence number. + for i := tcpLayerIndex + 1; i < len(layersToSend); i++ { + conn.LocalSeqNum.UpdateForward(seqnum.Size(layersToSend[i].length())) + } + if tcp.Flags != nil && *tcp.Flags&(header.TCPFlagSyn|header.TCPFlagFin) != 0 { + conn.LocalSeqNum.UpdateForward(1) + } +} + +// Recv gets a packet from the sniffer within the timeout provided. If no packet +// arrives before the timeout, it returns nil. +func (conn *TCPIPv4) Recv(timeout time.Duration) *TCP { + deadline := time.Now().Add(timeout) + for { + timeout = deadline.Sub(time.Now()) + if timeout <= 0 { + break + } + b := conn.sniffer.Recv(timeout) + if b == nil { + break + } + layers, err := ParseEther(b) + if err != nil { + continue // Ignore packets that can't be parsed. + } + if !conn.incoming.match(layers) { + continue // Ignore packets that don't match the expected incoming. + } + tcpHeader := (layers[tcpLayerIndex]).(*TCP) + conn.RemoteSeqNum = seqnum.Value(*tcpHeader.SeqNum) + if *tcpHeader.Flags&(header.TCPFlagSyn|header.TCPFlagFin) != 0 { + conn.RemoteSeqNum.UpdateForward(1) + } + for i := tcpLayerIndex + 1; i < len(layers); i++ { + conn.RemoteSeqNum.UpdateForward(seqnum.Size(layers[i].length())) + } + return tcpHeader + } + return nil +} + +// Expect a packet that matches the provided tcp within the timeout specified. +// If it doesn't arrive in time, the test fails. +func (conn *TCPIPv4) Expect(tcp TCP, timeout time.Duration) *TCP { + deadline := time.Now().Add(timeout) + for { + timeout = deadline.Sub(time.Now()) + if timeout <= 0 { + return nil + } + gotTCP := conn.Recv(timeout) + if gotTCP == nil { + return nil + } + if tcp.match(gotTCP) { + return gotTCP + } + } +} + +// Handshake performs a TCP 3-way handshake. +func (conn *TCPIPv4) Handshake() { + // Send the SYN. + conn.Send(TCP{Flags: Uint8(header.TCPFlagSyn)}) + + // Wait for the SYN-ACK. + conn.SynAck = conn.Expect(TCP{Flags: Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second) + if conn.SynAck == nil { + conn.t.Fatalf("didn't get synack during handshake") + } + + // Send an ACK. + conn.Send(TCP{Flags: Uint8(header.TCPFlagAck)}) +} diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go new file mode 100644 index 000000000..8ea1706d3 --- /dev/null +++ b/test/packetimpact/testbench/dut.go @@ -0,0 +1,363 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package testbench + +import ( + "context" + "flag" + "net" + "strconv" + "syscall" + "testing" + "time" + + pb "gvisor.dev/gvisor/test/packetimpact/proto/posix_server_go_proto" + + "golang.org/x/sys/unix" + "google.golang.org/grpc" + "google.golang.org/grpc/keepalive" +) + +var ( + posixServerIP = flag.String("posix_server_ip", "", "ip address to listen to for UDP commands") + posixServerPort = flag.Int("posix_server_port", 40000, "port to listen to for UDP commands") + rpcTimeout = flag.Duration("rpc_timeout", 100*time.Millisecond, "gRPC timeout") + rpcKeepalive = flag.Duration("rpc_keepalive", 10*time.Second, "gRPC keepalive") +) + +// DUT communicates with the DUT to force it to make POSIX calls. +type DUT struct { + t *testing.T + conn *grpc.ClientConn + posixServer PosixClient +} + +// NewDUT creates a new connection with the DUT over gRPC. +func NewDUT(t *testing.T) DUT { + flag.Parse() + posixServerAddress := *posixServerIP + ":" + strconv.Itoa(*posixServerPort) + conn, err := grpc.Dial(posixServerAddress, grpc.WithInsecure(), grpc.WithKeepaliveParams(keepalive.ClientParameters{Timeout: *rpcKeepalive})) + if err != nil { + t.Fatalf("failed to grpc.Dial(%s): %s", posixServerAddress, err) + } + posixServer := NewPosixClient(conn) + return DUT{ + t: t, + conn: conn, + posixServer: posixServer, + } +} + +// TearDown closes the underlying connection. +func (dut *DUT) TearDown() { + dut.conn.Close() +} + +// SocketWithErrno calls socket on the DUT and returns the fd and errno. +func (dut *DUT) SocketWithErrno(domain, typ, proto int32) (int32, error) { + dut.t.Helper() + req := pb.SocketRequest{ + Domain: domain, + Type: typ, + Protocol: proto, + } + ctx := context.Background() + resp, err := dut.posixServer.Socket(ctx, &req) + if err != nil { + dut.t.Fatalf("failed to call Socket: %s", err) + } + return resp.GetFd(), syscall.Errno(resp.GetErrno_()) +} + +// Socket calls socket on the DUT and returns the file descriptor. If socket +// fails on the DUT, the test ends. +func (dut *DUT) Socket(domain, typ, proto int32) int32 { + dut.t.Helper() + fd, err := dut.SocketWithErrno(domain, typ, proto) + if fd < 0 { + dut.t.Fatalf("failed to create socket: %s", err) + } + return fd +} + +func (dut *DUT) sockaddrToProto(sa unix.Sockaddr) *pb.Sockaddr { + dut.t.Helper() + switch s := sa.(type) { + case *unix.SockaddrInet4: + return &pb.Sockaddr{ + Sockaddr: &pb.Sockaddr_In{ + In: &pb.SockaddrIn{ + Family: unix.AF_INET, + Port: uint32(s.Port), + Addr: s.Addr[:], + }, + }, + } + case *unix.SockaddrInet6: + return &pb.Sockaddr{ + Sockaddr: &pb.Sockaddr_In6{ + In6: &pb.SockaddrIn6{ + Family: unix.AF_INET6, + Port: uint32(s.Port), + Flowinfo: 0, + ScopeId: s.ZoneId, + Addr: s.Addr[:], + }, + }, + } + } + dut.t.Fatalf("can't parse Sockaddr: %+v", sa) + return nil +} + +func (dut *DUT) protoToSockaddr(sa *pb.Sockaddr) unix.Sockaddr { + dut.t.Helper() + switch s := sa.Sockaddr.(type) { + case *pb.Sockaddr_In: + ret := unix.SockaddrInet4{ + Port: int(s.In.GetPort()), + } + copy(ret.Addr[:], s.In.GetAddr()) + return &ret + case *pb.Sockaddr_In6: + ret := unix.SockaddrInet6{ + Port: int(s.In6.GetPort()), + ZoneId: s.In6.GetScopeId(), + } + copy(ret.Addr[:], s.In6.GetAddr()) + } + dut.t.Fatalf("can't parse Sockaddr: %+v", sa) + return nil +} + +// BindWithErrno calls bind on the DUT. +func (dut *DUT) BindWithErrno(fd int32, sa unix.Sockaddr) (int32, error) { + dut.t.Helper() + req := pb.BindRequest{ + Sockfd: fd, + Addr: dut.sockaddrToProto(sa), + } + ctx := context.Background() + resp, err := dut.posixServer.Bind(ctx, &req) + if err != nil { + dut.t.Fatalf("failed to call Bind: %s", err) + } + return resp.GetRet(), syscall.Errno(resp.GetErrno_()) +} + +// Bind calls bind on the DUT and causes a fatal test failure if it doesn't +// succeed. +func (dut *DUT) Bind(fd int32, sa unix.Sockaddr) { + dut.t.Helper() + ret, err := dut.BindWithErrno(fd, sa) + if ret != 0 { + dut.t.Fatalf("failed to bind socket: %s", err) + } +} + +// GetSockNameWithErrno calls getsockname on the DUT. +func (dut *DUT) GetSockNameWithErrno(sockfd int32) (int32, unix.Sockaddr, error) { + dut.t.Helper() + req := pb.GetSockNameRequest{ + Sockfd: sockfd, + } + ctx := context.Background() + resp, err := dut.posixServer.GetSockName(ctx, &req) + if err != nil { + dut.t.Fatalf("failed to call Bind: %s", err) + } + return resp.GetRet(), dut.protoToSockaddr(resp.GetAddr()), syscall.Errno(resp.GetErrno_()) +} + +// GetSockName calls getsockname on the DUT and causes a fatal test failure if +// it doens't succeed. +func (dut *DUT) GetSockName(sockfd int32) unix.Sockaddr { + dut.t.Helper() + ret, sa, err := dut.GetSockNameWithErrno(sockfd) + if ret != 0 { + dut.t.Fatalf("failed to getsockname: %s", err) + } + return sa +} + +// ListenWithErrno calls listen on the DUT. +func (dut *DUT) ListenWithErrno(sockfd, backlog int32) (int32, error) { + dut.t.Helper() + req := pb.ListenRequest{ + Sockfd: sockfd, + Backlog: backlog, + } + ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + defer cancel() + resp, err := dut.posixServer.Listen(ctx, &req) + if err != nil { + dut.t.Fatalf("failed to call Listen: %s", err) + } + return resp.GetRet(), syscall.Errno(resp.GetErrno_()) +} + +// Listen calls listen on the DUT and causes a fatal test failure if it doesn't +// succeed. +func (dut *DUT) Listen(sockfd, backlog int32) { + dut.t.Helper() + ret, err := dut.ListenWithErrno(sockfd, backlog) + if ret != 0 { + dut.t.Fatalf("failed to listen: %s", err) + } +} + +// AcceptWithErrno calls accept on the DUT. +func (dut *DUT) AcceptWithErrno(sockfd int32) (int32, unix.Sockaddr, error) { + dut.t.Helper() + req := pb.AcceptRequest{ + Sockfd: sockfd, + } + ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + defer cancel() + resp, err := dut.posixServer.Accept(ctx, &req) + if err != nil { + dut.t.Fatalf("failed to call Accept: %s", err) + } + return resp.GetFd(), dut.protoToSockaddr(resp.GetAddr()), syscall.Errno(resp.GetErrno_()) +} + +// Accept calls accept on the DUT and causes a fatal test failure if it doesn't +// succeed. +func (dut *DUT) Accept(sockfd int32) (int32, unix.Sockaddr) { + dut.t.Helper() + fd, sa, err := dut.AcceptWithErrno(sockfd) + if fd < 0 { + dut.t.Fatalf("failed to accept: %s", err) + } + return fd, sa +} + +// SetSockOptWithErrno calls setsockopt on the DUT. +func (dut *DUT) SetSockOptWithErrno(sockfd, level, optname int32, optval []byte) (int32, error) { + dut.t.Helper() + req := pb.SetSockOptRequest{ + Sockfd: sockfd, + Level: level, + Optname: optname, + Optval: optval, + } + ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + defer cancel() + resp, err := dut.posixServer.SetSockOpt(ctx, &req) + if err != nil { + dut.t.Fatalf("failed to call SetSockOpt: %s", err) + } + return resp.GetRet(), syscall.Errno(resp.GetErrno_()) +} + +// SetSockOpt calls setsockopt on the DUT and causes a fatal test failure if it +// doesn't succeed. +func (dut *DUT) SetSockOpt(sockfd, level, optname int32, optval []byte) { + dut.t.Helper() + ret, err := dut.SetSockOptWithErrno(sockfd, level, optname, optval) + if ret != 0 { + dut.t.Fatalf("failed to SetSockOpt: %s", err) + } +} + +// SetSockOptTimevalWithErrno calls setsockopt with the timeval converted to +// bytes. +func (dut *DUT) SetSockOptTimevalWithErrno(sockfd, level, optname int32, tv *unix.Timeval) (int32, error) { + dut.t.Helper() + timeval := pb.Timeval{ + Seconds: int64(tv.Sec), + Microseconds: int64(tv.Usec), + } + req := pb.SetSockOptTimevalRequest{ + Sockfd: sockfd, + Level: level, + Optname: optname, + Timeval: &timeval, + } + ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + defer cancel() + resp, err := dut.posixServer.SetSockOptTimeval(ctx, &req) + if err != nil { + dut.t.Fatalf("failed to call SetSockOptTimeval: %s", err) + } + return resp.GetRet(), syscall.Errno(resp.GetErrno_()) +} + +// SetSockOptTimeval calls setsockopt on the DUT and causes a fatal test failure +// if it doesn't succeed. +func (dut *DUT) SetSockOptTimeval(sockfd, level, optname int32, tv *unix.Timeval) { + dut.t.Helper() + ret, err := dut.SetSockOptTimevalWithErrno(sockfd, level, optname, tv) + if ret != 0 { + dut.t.Fatalf("failed to SetSockOptTimeval: %s", err) + } +} + +// CloseWithErrno calls close on the DUT. +func (dut *DUT) CloseWithErrno(fd int32) (int32, error) { + dut.t.Helper() + req := pb.CloseRequest{ + Fd: fd, + } + ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + defer cancel() + resp, err := dut.posixServer.Close(ctx, &req) + if err != nil { + dut.t.Fatalf("failed to call Close: %s", err) + } + return resp.GetRet(), syscall.Errno(resp.GetErrno_()) +} + +// Close calls close on the DUT and causes a fatal test failure if it doesn't +// succeed. +func (dut *DUT) Close(fd int32) { + dut.t.Helper() + ret, err := dut.CloseWithErrno(fd) + if ret != 0 { + dut.t.Fatalf("failed to close: %s", err) + } +} + +// CreateListener makes a new TCP connection. If it fails, the test ends. +func (dut *DUT) CreateListener(typ, proto, backlog int32) (int32, uint16) { + dut.t.Helper() + addr := net.ParseIP(*remoteIPv4) + var fd int32 + if addr.To4() != nil { + fd = dut.Socket(unix.AF_INET, typ, proto) + sa := unix.SockaddrInet4{} + copy(sa.Addr[:], addr.To4()) + dut.Bind(fd, &sa) + } else if addr.To16() != nil { + fd = dut.Socket(unix.AF_INET6, typ, proto) + sa := unix.SockaddrInet6{} + copy(sa.Addr[:], addr.To16()) + dut.Bind(fd, &sa) + } else { + dut.t.Fatal("unknown ip addr type for remoteIP") + } + sa := dut.GetSockName(fd) + var port int + switch s := sa.(type) { + case *unix.SockaddrInet4: + port = s.Port + case *unix.SockaddrInet6: + port = s.Port + default: + dut.t.Fatalf("unknown sockaddr type from getsockname: %t", sa) + } + dut.Listen(fd, backlog) + return fd, uint16(port) +} diff --git a/test/packetimpact/testbench/dut_client.go b/test/packetimpact/testbench/dut_client.go new file mode 100644 index 000000000..b130a33a2 --- /dev/null +++ b/test/packetimpact/testbench/dut_client.go @@ -0,0 +1,28 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package testbench + +import ( + "google.golang.org/grpc" + pb "gvisor.dev/gvisor/test/packetimpact/proto/posix_server_go_proto" +) + +// PosixClient is a gRPC client for the Posix service. +type PosixClient pb.PosixClient + +// NewPosixClient makes a new gRPC client for the Posix service. +func NewPosixClient(c grpc.ClientConnInterface) PosixClient { + return pb.NewPosixClient(c) +} diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go new file mode 100644 index 000000000..35fa4dcb6 --- /dev/null +++ b/test/packetimpact/testbench/layers.go @@ -0,0 +1,507 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package testbench + +import ( + "fmt" + "reflect" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/imdario/mergo" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" +) + +// Layer is the interface that all encapsulations must implement. +// +// A Layer is an encapsulation in a packet, such as TCP, IPv4, IPv6, etc. A +// Layer contains all the fields of the encapsulation. Each field is a pointer +// and may be nil. +type Layer interface { + // toBytes converts the Layer into bytes. In places where the Layer's field + // isn't nil, the value that is pointed to is used. When the field is nil, a + // reasonable default for the Layer is used. For example, "64" for IPv4 TTL + // and a calculated checksum for TCP or IP. Some layers require information + // from the previous or next layers in order to compute a default, such as + // TCP's checksum or Ethernet's type, so each Layer has a doubly-linked list + // to the layer's neighbors. + toBytes() ([]byte, error) + + // match checks if the current Layer matches the provided Layer. If either + // Layer has a nil in a given field, that field is considered matching. + // Otherwise, the values pointed to by the fields must match. + match(Layer) bool + + // length in bytes of the current encapsulation + length() int + + // next gets a pointer to the encapsulated Layer. + next() Layer + + // prev gets a pointer to the Layer encapsulating this one. + prev() Layer + + // setNext sets the pointer to the encapsulated Layer. + setNext(Layer) + + // setPrev sets the pointer to the Layer encapsulating this one. + setPrev(Layer) +} + +// LayerBase is the common elements of all layers. +type LayerBase struct { + nextLayer Layer + prevLayer Layer +} + +func (lb *LayerBase) next() Layer { + return lb.nextLayer +} + +func (lb *LayerBase) prev() Layer { + return lb.prevLayer +} + +func (lb *LayerBase) setNext(l Layer) { + lb.nextLayer = l +} + +func (lb *LayerBase) setPrev(l Layer) { + lb.prevLayer = l +} + +func equalLayer(x, y Layer) bool { + opt := cmp.FilterValues(func(x, y interface{}) bool { + if reflect.ValueOf(x).Kind() == reflect.Ptr && reflect.ValueOf(x).IsNil() { + return true + } + if reflect.ValueOf(y).Kind() == reflect.Ptr && reflect.ValueOf(y).IsNil() { + return true + } + return false + + }, cmp.Ignore()) + return cmp.Equal(x, y, opt, cmpopts.IgnoreUnexported(LayerBase{})) +} + +// Ether can construct and match the ethernet encapsulation. +type Ether struct { + LayerBase + SrcAddr *tcpip.LinkAddress + DstAddr *tcpip.LinkAddress + Type *tcpip.NetworkProtocolNumber +} + +func (l *Ether) toBytes() ([]byte, error) { + b := make([]byte, header.EthernetMinimumSize) + h := header.Ethernet(b) + fields := &header.EthernetFields{} + if l.SrcAddr != nil { + fields.SrcAddr = *l.SrcAddr + } + if l.DstAddr != nil { + fields.DstAddr = *l.DstAddr + } + if l.Type != nil { + fields.Type = *l.Type + } else { + switch n := l.next().(type) { + case *IPv4: + fields.Type = header.IPv4ProtocolNumber + default: + // TODO(b/150301488): Support more protocols, like IPv6. + return nil, fmt.Errorf("can't deduce the ethernet header's next protocol: %d", n) + } + } + h.Encode(fields) + return h, nil +} + +// LinkAddress is a helper routine that allocates a new tcpip.LinkAddress value +// to store v and returns a pointer to it. +func LinkAddress(v tcpip.LinkAddress) *tcpip.LinkAddress { + return &v +} + +// NetworkProtocolNumber is a helper routine that allocates a new +// tcpip.NetworkProtocolNumber value to store v and returns a pointer to it. +func NetworkProtocolNumber(v tcpip.NetworkProtocolNumber) *tcpip.NetworkProtocolNumber { + return &v +} + +// ParseEther parses the bytes assuming that they start with an ethernet header +// and continues parsing further encapsulations. +func ParseEther(b []byte) (Layers, error) { + h := header.Ethernet(b) + ether := Ether{ + SrcAddr: LinkAddress(h.SourceAddress()), + DstAddr: LinkAddress(h.DestinationAddress()), + Type: NetworkProtocolNumber(h.Type()), + } + layers := Layers{ðer} + switch h.Type() { + case header.IPv4ProtocolNumber: + moreLayers, err := ParseIPv4(b[ether.length():]) + if err != nil { + return nil, err + } + return append(layers, moreLayers...), nil + default: + // TODO(b/150301488): Support more protocols, like IPv6. + return nil, fmt.Errorf("can't deduce the ethernet header's next protocol: %v", b) + } +} + +func (l *Ether) match(other Layer) bool { + return equalLayer(l, other) +} + +func (l *Ether) length() int { + return header.EthernetMinimumSize +} + +// IPv4 can construct and match the ethernet excapulation. +type IPv4 struct { + LayerBase + IHL *uint8 + TOS *uint8 + TotalLength *uint16 + ID *uint16 + Flags *uint8 + FragmentOffset *uint16 + TTL *uint8 + Protocol *uint8 + Checksum *uint16 + SrcAddr *tcpip.Address + DstAddr *tcpip.Address +} + +func (l *IPv4) toBytes() ([]byte, error) { + b := make([]byte, header.IPv4MinimumSize) + h := header.IPv4(b) + fields := &header.IPv4Fields{ + IHL: 20, + TOS: 0, + TotalLength: 0, + ID: 0, + Flags: 0, + FragmentOffset: 0, + TTL: 64, + Protocol: 0, + Checksum: 0, + SrcAddr: tcpip.Address(""), + DstAddr: tcpip.Address(""), + } + if l.TOS != nil { + fields.TOS = *l.TOS + } + if l.TotalLength != nil { + fields.TotalLength = *l.TotalLength + } else { + fields.TotalLength = uint16(l.length()) + current := l.next() + for current != nil { + fields.TotalLength += uint16(current.length()) + current = current.next() + } + } + if l.ID != nil { + fields.ID = *l.ID + } + if l.Flags != nil { + fields.Flags = *l.Flags + } + if l.FragmentOffset != nil { + fields.FragmentOffset = *l.FragmentOffset + } + if l.TTL != nil { + fields.TTL = *l.TTL + } + if l.Protocol != nil { + fields.Protocol = *l.Protocol + } else { + switch n := l.next().(type) { + case *TCP: + fields.Protocol = uint8(header.TCPProtocolNumber) + default: + // TODO(b/150301488): Support more protocols, like UDP. + return nil, fmt.Errorf("can't deduce the ip header's next protocol: %+v", n) + } + } + if l.SrcAddr != nil { + fields.SrcAddr = *l.SrcAddr + } + if l.DstAddr != nil { + fields.DstAddr = *l.DstAddr + } + if l.Checksum != nil { + fields.Checksum = *l.Checksum + } + h.Encode(fields) + if l.Checksum == nil { + h.SetChecksum(^h.CalculateChecksum()) + } + return h, nil +} + +// Uint16 is a helper routine that allocates a new +// uint16 value to store v and returns a pointer to it. +func Uint16(v uint16) *uint16 { + return &v +} + +// Uint8 is a helper routine that allocates a new +// uint8 value to store v and returns a pointer to it. +func Uint8(v uint8) *uint8 { + return &v +} + +// Address is a helper routine that allocates a new tcpip.Address value to store +// v and returns a pointer to it. +func Address(v tcpip.Address) *tcpip.Address { + return &v +} + +// ParseIPv4 parses the bytes assuming that they start with an ipv4 header and +// continues parsing further encapsulations. +func ParseIPv4(b []byte) (Layers, error) { + h := header.IPv4(b) + tos, _ := h.TOS() + ipv4 := IPv4{ + IHL: Uint8(h.HeaderLength()), + TOS: &tos, + TotalLength: Uint16(h.TotalLength()), + ID: Uint16(h.ID()), + Flags: Uint8(h.Flags()), + FragmentOffset: Uint16(h.FragmentOffset()), + TTL: Uint8(h.TTL()), + Protocol: Uint8(h.Protocol()), + Checksum: Uint16(h.Checksum()), + SrcAddr: Address(h.SourceAddress()), + DstAddr: Address(h.DestinationAddress()), + } + layers := Layers{&ipv4} + switch h.Protocol() { + case uint8(header.TCPProtocolNumber): + moreLayers, err := ParseTCP(b[ipv4.length():]) + if err != nil { + return nil, err + } + return append(layers, moreLayers...), nil + } + return nil, fmt.Errorf("can't deduce the ethernet header's next protocol: %d", h.Protocol()) +} + +func (l *IPv4) match(other Layer) bool { + return equalLayer(l, other) +} + +func (l *IPv4) length() int { + if l.IHL == nil { + return header.IPv4MinimumSize + } + return int(*l.IHL) +} + +// TCP can construct and match the TCP excapulation. +type TCP struct { + LayerBase + SrcPort *uint16 + DstPort *uint16 + SeqNum *uint32 + AckNum *uint32 + DataOffset *uint8 + Flags *uint8 + WindowSize *uint16 + Checksum *uint16 + UrgentPointer *uint16 +} + +func (l *TCP) toBytes() ([]byte, error) { + b := make([]byte, header.TCPMinimumSize) + h := header.TCP(b) + if l.SrcPort != nil { + h.SetSourcePort(*l.SrcPort) + } + if l.DstPort != nil { + h.SetDestinationPort(*l.DstPort) + } + if l.SeqNum != nil { + h.SetSequenceNumber(*l.SeqNum) + } + if l.AckNum != nil { + h.SetAckNumber(*l.AckNum) + } + if l.DataOffset != nil { + h.SetDataOffset(*l.DataOffset) + } + if l.Flags != nil { + h.SetFlags(*l.Flags) + } + if l.WindowSize != nil { + h.SetWindowSize(*l.WindowSize) + } + if l.UrgentPointer != nil { + h.SetUrgentPoiner(*l.UrgentPointer) + } + if l.Checksum != nil { + h.SetChecksum(*l.Checksum) + return h, nil + } + if err := setChecksum(&h, l); err != nil { + return nil, err + } + return h, nil +} + +// setChecksum calculates the checksum of the TCP header and sets it in h. +func setChecksum(h *header.TCP, tcp *TCP) error { + h.SetChecksum(0) + tcpLength := uint16(tcp.length()) + current := tcp.next() + for current != nil { + tcpLength += uint16(current.length()) + current = current.next() + } + + var xsum uint16 + switch s := tcp.prev().(type) { + case *IPv4: + xsum = header.PseudoHeaderChecksum(header.TCPProtocolNumber, *s.SrcAddr, *s.DstAddr, tcpLength) + default: + // TODO(b/150301488): Support more protocols, like IPv6. + return fmt.Errorf("can't get src and dst addr from previous layer") + } + current = tcp.next() + for current != nil { + payload, err := current.toBytes() + if err != nil { + return fmt.Errorf("can't get bytes for next header: %s", payload) + } + xsum = header.Checksum(payload, xsum) + current = current.next() + } + h.SetChecksum(^h.CalculateChecksum(xsum)) + return nil +} + +// Uint32 is a helper routine that allocates a new +// uint32 value to store v and returns a pointer to it. +func Uint32(v uint32) *uint32 { + return &v +} + +// ParseTCP parses the bytes assuming that they start with a tcp header and +// continues parsing further encapsulations. +func ParseTCP(b []byte) (Layers, error) { + h := header.TCP(b) + tcp := TCP{ + SrcPort: Uint16(h.SourcePort()), + DstPort: Uint16(h.DestinationPort()), + SeqNum: Uint32(h.SequenceNumber()), + AckNum: Uint32(h.AckNumber()), + DataOffset: Uint8(h.DataOffset()), + Flags: Uint8(h.Flags()), + WindowSize: Uint16(h.WindowSize()), + Checksum: Uint16(h.Checksum()), + UrgentPointer: Uint16(h.UrgentPointer()), + } + layers := Layers{&tcp} + moreLayers, err := ParsePayload(b[tcp.length():]) + if err != nil { + return nil, err + } + return append(layers, moreLayers...), nil +} + +func (l *TCP) match(other Layer) bool { + return equalLayer(l, other) +} + +func (l *TCP) length() int { + if l.DataOffset == nil { + return header.TCPMinimumSize + } + return int(*l.DataOffset) +} + +// merge overrides the values in l with the values from other but only in fields +// where the value is not nil. +func (l *TCP) merge(other TCP) error { + return mergo.Merge(l, other, mergo.WithOverride) +} + +// Payload has bytes beyond OSI layer 4. +type Payload struct { + LayerBase + Bytes []byte +} + +// ParsePayload parses the bytes assuming that they start with a payload and +// continue to the end. There can be no further encapsulations. +func ParsePayload(b []byte) (Layers, error) { + payload := Payload{ + Bytes: b, + } + return Layers{&payload}, nil +} + +func (l *Payload) toBytes() ([]byte, error) { + return l.Bytes, nil +} + +func (l *Payload) match(other Layer) bool { + return equalLayer(l, other) +} + +func (l *Payload) length() int { + return len(l.Bytes) +} + +// Layers is an array of Layer and supports similar functions to Layer. +type Layers []Layer + +func (ls *Layers) toBytes() ([]byte, error) { + for i, l := range *ls { + if i > 0 { + l.setPrev((*ls)[i-1]) + } + if i+1 < len(*ls) { + l.setNext((*ls)[i+1]) + } + } + outBytes := []byte{} + for _, l := range *ls { + layerBytes, err := l.toBytes() + if err != nil { + return nil, err + } + outBytes = append(outBytes, layerBytes...) + } + return outBytes, nil +} + +func (ls *Layers) match(other Layers) bool { + if len(*ls) > len(other) { + return false + } + for i := 0; i < len(*ls); i++ { + if !equalLayer((*ls)[i], other[i]) { + return false + } + } + return true +} diff --git a/test/packetimpact/testbench/rawsockets.go b/test/packetimpact/testbench/rawsockets.go new file mode 100644 index 000000000..0c7d0f979 --- /dev/null +++ b/test/packetimpact/testbench/rawsockets.go @@ -0,0 +1,151 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package testbench + +import ( + "encoding/binary" + "flag" + "math" + "net" + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/usermem" +) + +var device = flag.String("device", "", "local device for test packets") + +// Sniffer can sniff raw packets on the wire. +type Sniffer struct { + t *testing.T + fd int +} + +func htons(x uint16) uint16 { + buf := [2]byte{} + binary.BigEndian.PutUint16(buf[:], x) + return usermem.ByteOrder.Uint16(buf[:]) +} + +// NewSniffer creates a Sniffer connected to *device. +func NewSniffer(t *testing.T) (Sniffer, error) { + flag.Parse() + snifferFd, err := unix.Socket(unix.AF_PACKET, unix.SOCK_RAW, int(htons(unix.ETH_P_ALL))) + if err != nil { + return Sniffer{}, err + } + return Sniffer{ + t: t, + fd: snifferFd, + }, nil +} + +// maxReadSize should be large enough for the maximum frame size in bytes. If a +// packet too large for the buffer arrives, the test will get a fatal error. +const maxReadSize int = 65536 + +// Recv tries to read one frame until the timeout is up. +func (s *Sniffer) Recv(timeout time.Duration) []byte { + deadline := time.Now().Add(timeout) + for { + timeout = deadline.Sub(time.Now()) + if timeout <= 0 { + return nil + } + whole, frac := math.Modf(timeout.Seconds()) + tv := unix.Timeval{ + Sec: int64(whole), + Usec: int64(frac * float64(time.Microsecond/time.Second)), + } + + if err := unix.SetsockoptTimeval(s.fd, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &tv); err != nil { + s.t.Fatalf("can't setsockopt SO_RCVTIMEO: %s", err) + } + + buf := make([]byte, maxReadSize) + nread, _, err := unix.Recvfrom(s.fd, buf, unix.MSG_TRUNC) + if err == unix.EINTR || err == unix.EAGAIN { + // There was a timeout. + continue + } + if err != nil { + s.t.Fatalf("can't read: %s", err) + } + if nread > maxReadSize { + s.t.Fatalf("received a truncated frame of %d bytes", nread) + } + return buf[:nread] + } +} + +// Close the socket that Sniffer is using. +func (s *Sniffer) Close() { + if err := unix.Close(s.fd); err != nil { + s.t.Fatalf("can't close sniffer socket: %s", err) + } + s.fd = -1 +} + +// Injector can inject raw frames. +type Injector struct { + t *testing.T + fd int +} + +// NewInjector creates a new injector on *device. +func NewInjector(t *testing.T) (Injector, error) { + flag.Parse() + ifInfo, err := net.InterfaceByName(*device) + if err != nil { + return Injector{}, err + } + + var haddr [8]byte + copy(haddr[:], ifInfo.HardwareAddr) + sa := unix.SockaddrLinklayer{ + Protocol: unix.ETH_P_IP, + Ifindex: ifInfo.Index, + Halen: uint8(len(ifInfo.HardwareAddr)), + Addr: haddr, + } + + injectFd, err := unix.Socket(unix.AF_PACKET, unix.SOCK_RAW, int(htons(unix.ETH_P_ALL))) + if err != nil { + return Injector{}, err + } + if err := unix.Bind(injectFd, &sa); err != nil { + return Injector{}, err + } + return Injector{ + t: t, + fd: injectFd, + }, nil +} + +// Send a raw frame. +func (i *Injector) Send(b []byte) { + if _, err := unix.Write(i.fd, b); err != nil { + i.t.Fatalf("can't write: %s", err) + } +} + +// Close the underlying socket. +func (i *Injector) Close() { + if err := unix.Close(i.fd); err != nil { + i.t.Fatalf("can't close sniffer socket: %s", err) + } + i.fd = -1 +} diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD new file mode 100644 index 000000000..1dff2a4d5 --- /dev/null +++ b/test/packetimpact/tests/BUILD @@ -0,0 +1,21 @@ +load("defs.bzl", "packetimpact_go_test") + +package( + default_visibility = ["//test/packetimpact:__subpackages__"], + licenses = ["notice"], +) + +packetimpact_go_test( + name = "fin_wait2_timeout", + srcs = ["fin_wait2_timeout_test.go"], + deps = [ + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + +sh_binary( + name = "test_runner", + srcs = ["test_runner.sh"], +) diff --git a/test/packetimpact/tests/Dockerfile b/test/packetimpact/tests/Dockerfile new file mode 100644 index 000000000..507030cc7 --- /dev/null +++ b/test/packetimpact/tests/Dockerfile @@ -0,0 +1,5 @@ +FROM ubuntu:bionic + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y iptables netcat tcpdump iproute2 tshark +RUN hash -r +CMD /bin/bash diff --git a/test/packetimpact/tests/defs.bzl b/test/packetimpact/tests/defs.bzl new file mode 100644 index 000000000..3baac567a --- /dev/null +++ b/test/packetimpact/tests/defs.bzl @@ -0,0 +1,106 @@ +"""Defines rules for packetimpact test targets.""" + +load("//tools:defs.bzl", "go_test") + +def _packetimpact_test_impl(ctx): + test_runner = ctx.executable._test_runner + bench = ctx.actions.declare_file("%s-bench" % ctx.label.name) + bench_content = "\n".join([ + "#!/bin/bash", + # This test will run part in a distinct user namespace. This can cause + # permission problems, because all runfiles may not be owned by the + # current user, and no other users will be mapped in that namespace. + # Make sure that everything is readable here. + "find . -type f -exec chmod a+rx {} \\;", + "find . -type d -exec chmod a+rx {} \\;", + "%s %s --posix_server_binary %s --testbench_binary %s $@\n" % ( + test_runner.short_path, + " ".join(ctx.attr.flags), + ctx.files._posix_server_binary[0].short_path, + ctx.files.testbench_binary[0].short_path, + ), + ]) + ctx.actions.write(bench, bench_content, is_executable = True) + + transitive_files = depset() + if hasattr(ctx.attr._test_runner, "data_runfiles"): + transitive_files = depset(ctx.attr._test_runner.data_runfiles.files) + runfiles = ctx.runfiles( + files = [test_runner] + ctx.files.testbench_binary + ctx.files._posix_server_binary, + transitive_files = transitive_files, + collect_default = True, + collect_data = True, + ) + return [DefaultInfo(executable = bench, runfiles = runfiles)] + +_packetimpact_test = rule( + attrs = { + "_test_runner": attr.label( + executable = True, + cfg = "target", + default = ":test_runner", + ), + "_posix_server_binary": attr.label( + cfg = "target", + default = "//test/packetimpact/dut:posix_server", + ), + "testbench_binary": attr.label( + cfg = "target", + mandatory = True, + ), + "flags": attr.string_list( + mandatory = False, + default = [], + ), + }, + test = True, + implementation = _packetimpact_test_impl, +) + +PACKETIMPACT_TAGS = ["local", "manual"] + +def packetimpact_linux_test(name, testbench_binary, **kwargs): + """Add a packetimpact test on linux. + + Args: + name: name of the test + testbench_binary: the testbench binary + **kwargs: all the other args, forwarded to _packetimpact_test + """ + _packetimpact_test( + name = name + "_linux_test", + testbench_binary = testbench_binary, + flags = ["--dut_platform", "linux"], + tags = PACKETIMPACT_TAGS, + **kwargs + ) + +def packetimpact_netstack_test(name, testbench_binary, **kwargs): + """Add a packetimpact test on netstack. + + Args: + name: name of the test + testbench_binary: the testbench binary + **kwargs: all the other args, forwarded to _packetimpact_test + """ + _packetimpact_test( + name = name + "_netstack_test", + testbench_binary = testbench_binary, + # This is the default runtime unless + # "--test_arg=--runtime=OTHER_RUNTIME" is used to override the value. + flags = ["--dut_platform", "netstack", "--runtime=runsc-d"], + tags = PACKETIMPACT_TAGS, + **kwargs + ) + +def packetimpact_go_test(name, size = "small", pure = True, **kwargs): + testbench_binary = name + "_test" + go_test( + name = testbench_binary, + size = size, + pure = pure, + tags = PACKETIMPACT_TAGS, + **kwargs + ) + packetimpact_linux_test(name = name, testbench_binary = testbench_binary) + packetimpact_netstack_test(name = name, testbench_binary = testbench_binary) diff --git a/test/packetimpact/tests/fin_wait2_timeout_test.go b/test/packetimpact/tests/fin_wait2_timeout_test.go new file mode 100644 index 000000000..5f54e67ed --- /dev/null +++ b/test/packetimpact/tests/fin_wait2_timeout_test.go @@ -0,0 +1,68 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fin_wait2_timeout_test + +import ( + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func TestFinWait2Timeout(t *testing.T) { + for _, tt := range []struct { + description string + linger2 bool + }{ + {"WithLinger2", true}, + {"WithoutLinger2", false}, + } { + t.Run(tt.description, func(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(listenFd) + conn := tb.NewTCPIPv4(t, dut, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + defer conn.Close() + conn.Handshake() + + acceptFd, _ := dut.Accept(listenFd) + if tt.linger2 { + tv := unix.Timeval{Sec: 1, Usec: 0} + dut.SetSockOptTimeval(acceptFd, unix.SOL_TCP, unix.TCP_LINGER2, &tv) + } + dut.Close(acceptFd) + + if gotOne := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); gotOne == nil { + t.Fatal("expected a FIN-ACK within 1 second but got none") + } + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) + + time.Sleep(5 * time.Second) + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) + if tt.linger2 { + if gotOne := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, time.Second); gotOne == nil { + t.Fatal("expected a RST packet within a second but got none") + } + } else { + if gotOne := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, 10*time.Second); gotOne != nil { + t.Fatal("expected no RST packets within ten seconds but got one") + } + } + }) + } +} diff --git a/test/packetimpact/tests/test_runner.sh b/test/packetimpact/tests/test_runner.sh new file mode 100755 index 000000000..5281cb53d --- /dev/null +++ b/test/packetimpact/tests/test_runner.sh @@ -0,0 +1,246 @@ +#!/bin/bash + +# Copyright 2020 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Run a packetimpact test. Two docker containers are made, one for the +# Device-Under-Test (DUT) and one for the test bench. Each is attached with +# two networks, one for control packets that aid the test and one for test +# packets which are sent as part of the test and observed for correctness. + +set -euxo pipefail + +function failure() { + local lineno=$1 + local msg=$2 + local filename="$0" + echo "FAIL: $filename:$lineno: $msg" +} +trap 'failure ${LINENO} "$BASH_COMMAND"' ERR + +declare -r LONGOPTS="dut_platform:,posix_server_binary:,testbench_binary:,runtime:,tshark" + +# Don't use declare below so that the error from getopt will end the script. +PARSED=$(getopt --options "" --longoptions=$LONGOPTS --name "$0" -- "$@") + +eval set -- "$PARSED" + +while true; do + case "$1" in + --dut_platform) + # Either "linux" or "netstack". + declare -r DUT_PLATFORM="$2" + shift 2 + ;; + --posix_server_binary) + declare -r POSIX_SERVER_BINARY="$2" + shift 2 + ;; + --testbench_binary) + declare -r TESTBENCH_BINARY="$2" + shift 2 + ;; + --runtime) + # Not readonly because there might be multiple --runtime arguments and we + # want to use just the last one. Only used if --dut_platform is + # "netstack". + declare RUNTIME="$2" + shift 2 + ;; + --tshark) + declare -r TSHARK="1" + shift 1 + ;; + --) + shift + break + ;; + *) + echo "Programming error" + exit 3 + esac +done + +# All the other arguments are scripts. +declare -r scripts="$@" + +# Check that the required flags are defined in a way that is safe for "set -u". +if [[ "${DUT_PLATFORM-}" == "netstack" ]]; then + if [[ -z "${RUNTIME-}" ]]; then + echo "FAIL: Missing --runtime argument: ${RUNTIME-}" + exit 2 + fi + declare -r RUNTIME_ARG="--runtime ${RUNTIME}" +elif [[ "${DUT_PLATFORM-}" == "linux" ]]; then + declare -r RUNTIME_ARG="" +else + echo "FAIL: Bad or missing --dut_platform argument: ${DUT_PLATFORM-}" + exit 2 +fi +if [[ ! -f "${POSIX_SERVER_BINARY-}" ]]; then + echo "FAIL: Bad or missing --posix_server_binary: ${POSIX_SERVER-}" + exit 2 +fi +if [[ ! -f "${TESTBENCH_BINARY-}" ]]; then + echo "FAIL: Bad or missing --testbench_binary: ${TESTBENCH_BINARY-}" + exit 2 +fi + +# Variables specific to the control network and interface start with CTRL_. +# Variables specific to the test network and interface start with TEST_. +# Variables specific to the DUT start with DUT_. +# Variables specific to the test bench start with TESTBENCH_. +# Use random numbers so that test networks don't collide. +declare -r CTRL_NET="ctrl_net-${RANDOM}${RANDOM}" +declare -r TEST_NET="test_net-${RANDOM}${RANDOM}" +# On both DUT and test bench, testing packets are on the eth2 interface. +declare -r TEST_DEVICE="eth2" +# Number of bits in the *_NET_PREFIX variables. +declare -r NET_MASK="24" +function new_net_prefix() { + # Class C, 192.0.0.0 to 223.255.255.255, transitionally has mask 24. + echo "$(shuf -i 192-223 -n 1).$(shuf -i 0-255 -n 1).$(shuf -i 0-255 -n 1)" +} +# Last bits of the DUT's IP address. +declare -r DUT_NET_SUFFIX=".10" +# Control port. +declare -r CTRL_PORT="40000" +# Last bits of the test bench's IP address. +declare -r TESTBENCH_NET_SUFFIX=".20" +declare -r TIMEOUT="60" +declare -r IMAGE_TAG="gcr.io/gvisor-presubmit/packetimpact" +# Make sure that docker is installed. +docker --version + +function finish { + local cleanup_success=1 + for net in "${CTRL_NET}" "${TEST_NET}"; do + # Kill all processes attached to ${net}. + for docker_command in "kill" "rm"; do + (docker network inspect "${net}" \ + --format '{{range $key, $value := .Containers}}{{$key}} {{end}}' \ + | xargs -r docker "${docker_command}") || \ + cleanup_success=0 + done + # Remove the network. + docker network rm "${net}" || \ + cleanup_success=0 + done + + if ((!$cleanup_success)); then + echo "FAIL: Cleanup command failed" + exit 4 + fi +} +trap finish EXIT + +# Subnet for control packets between test bench and DUT. +declare CTRL_NET_PREFIX=$(new_net_prefix) +while ! docker network create \ + "--subnet=${CTRL_NET_PREFIX}.0/${NET_MASK}" "${CTRL_NET}"; do + sleep 0.1 + declare CTRL_NET_PREFIX=$(new_net_prefix) +done + +# Subnet for the packets that are part of the test. +declare TEST_NET_PREFIX=$(new_net_prefix) +while ! docker network create \ + "--subnet=${TEST_NET_PREFIX}.0/${NET_MASK}" "${TEST_NET}"; do + sleep 0.1 + declare TEST_NET_PREFIX=$(new_net_prefix) +done + +docker pull "${IMAGE_TAG}" + +# Create the DUT container and connect to network. +DUT=$(docker create ${RUNTIME_ARG} --privileged --rm \ + --stop-timeout ${TIMEOUT} -it ${IMAGE_TAG}) +docker network connect "${CTRL_NET}" \ + --ip "${CTRL_NET_PREFIX}${DUT_NET_SUFFIX}" "${DUT}" \ + || (docker kill ${DUT}; docker rm ${DUT}; false) +docker network connect "${TEST_NET}" \ + --ip "${TEST_NET_PREFIX}${DUT_NET_SUFFIX}" "${DUT}" \ + || (docker kill ${DUT}; docker rm ${DUT}; false) +docker start "${DUT}" + +# Create the test bench container and connect to network. +TESTBENCH=$(docker create --privileged --rm \ + --stop-timeout ${TIMEOUT} -it ${IMAGE_TAG}) +docker network connect "${CTRL_NET}" \ + --ip "${CTRL_NET_PREFIX}${TESTBENCH_NET_SUFFIX}" "${TESTBENCH}" \ + || (docker kill ${TESTBENCH}; docker rm ${TESTBENCH}; false) +docker network connect "${TEST_NET}" \ + --ip "${TEST_NET_PREFIX}${TESTBENCH_NET_SUFFIX}" "${TESTBENCH}" \ + || (docker kill ${TESTBENCH}; docker rm ${TESTBENCH}; false) +docker start "${TESTBENCH}" + +# Start the posix_server in the DUT. +declare -r DOCKER_POSIX_SERVER_BINARY="/$(basename ${POSIX_SERVER_BINARY})" +docker cp -L ${POSIX_SERVER_BINARY} "${DUT}:${DOCKER_POSIX_SERVER_BINARY}" + +docker exec -t "${DUT}" \ + /bin/bash -c "${DOCKER_POSIX_SERVER_BINARY} \ + --ip ${CTRL_NET_PREFIX}${DUT_NET_SUFFIX} \ + --port ${CTRL_PORT}" & + +# Because the Linux kernel receives the SYN-ACK but didn't send the SYN it will +# issue a RST. To prevent this IPtables can be used to filter those out. +docker exec "${TESTBENCH}" \ + iptables -A INPUT -i ${TEST_DEVICE} -j DROP + +# Wait for the DUT server to come up. Attempt to connect to it from the test +# bench every 100 milliseconds until success. +while ! docker exec "${TESTBENCH}" \ + nc -zv "${CTRL_NET_PREFIX}${DUT_NET_SUFFIX}" "${CTRL_PORT}"; do + sleep 0.1 +done + +declare -r REMOTE_MAC=$(docker exec -t "${DUT}" ip link show \ + "${TEST_DEVICE}" | tail -1 | cut -d' ' -f6) +declare -r LOCAL_MAC=$(docker exec -t "${TESTBENCH}" ip link show \ + "${TEST_DEVICE}" | tail -1 | cut -d' ' -f6) + +declare -r DOCKER_TESTBENCH_BINARY="/$(basename ${TESTBENCH_BINARY})" +docker cp -L "${TESTBENCH_BINARY}" "${TESTBENCH}:${DOCKER_TESTBENCH_BINARY}" + +if [[ -z "${TSHARK-}" ]]; then + # Run tcpdump in the test bench unbuffered, without dns resolution, just on + # the interface with the test packets. + docker exec -t "${TESTBENCH}" \ + tcpdump -S -vvv -U -n -i "${TEST_DEVICE}" net "${TEST_NET_PREFIX}/24" & +else + # Run tshark in the test bench unbuffered, without dns resolution, just on the + # interface with the test packets. + docker exec -t "${TESTBENCH}" \ + tshark -V -l -n -i "${TEST_DEVICE}" \ + host "${TEST_NET_PREFIX}${TESTBENCH_NET_SUFFIX}" & +fi + +# tcpdump and tshark take time to startup +sleep 3 + +# Start a packetimpact test on the test bench. The packetimpact test sends and +# receives packets and also sends POSIX socket commands to the posix_server to +# be executed on the DUT. +docker exec -t "${TESTBENCH}" \ + /bin/bash -c "${DOCKER_TESTBENCH_BINARY} \ + --posix_server_ip=${CTRL_NET_PREFIX}${DUT_NET_SUFFIX} \ + --posix_server_port=${CTRL_PORT} \ + --remote_ipv4=${TEST_NET_PREFIX}${DUT_NET_SUFFIX} \ + --local_ipv4=${TEST_NET_PREFIX}${TESTBENCH_NET_SUFFIX} \ + --remote_mac=${REMOTE_MAC} \ + --local_mac=${LOCAL_MAC} \ + --device=${TEST_DEVICE}" + +echo PASS: No errors. diff --git a/tools/bazeldefs/defs.bzl b/tools/bazeldefs/defs.bzl index 905b16d41..64171ad8d 100644 --- a/tools/bazeldefs/defs.bzl +++ b/tools/bazeldefs/defs.bzl @@ -2,15 +2,15 @@ load("@bazel_tools//tools/cpp:cc_flags_supplier.bzl", _cc_flags_supplier = "cc_flags_supplier") load("@io_bazel_rules_go//go:def.bzl", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_library = "go_library", _go_test = "go_test", _go_tool_library = "go_tool_library") -load("@io_bazel_rules_go//proto:def.bzl", _go_proto_library = "go_proto_library") +load("@io_bazel_rules_go//proto:def.bzl", _go_grpc_library = "go_grpc_library", _go_proto_library = "go_proto_library") load("@rules_cc//cc:defs.bzl", _cc_binary = "cc_binary", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test") load("@rules_pkg//:pkg.bzl", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar") load("@io_bazel_rules_docker//go:image.bzl", _go_image = "go_image") load("@io_bazel_rules_docker//container:container.bzl", _container_image = "container_image") load("@pydeps//:requirements.bzl", _py_requirement = "requirement") +load("@com_github_grpc_grpc//bazel:cc_grpc_library.bzl", _cc_grpc_library = "cc_grpc_library") container_image = _container_image -cc_binary = _cc_binary cc_library = _cc_library cc_flags_supplier = _cc_flags_supplier cc_proto_library = _cc_proto_library @@ -19,16 +19,70 @@ cc_toolchain = "@bazel_tools//tools/cpp:current_cc_toolchain" go_image = _go_image go_embed_data = _go_embed_data gtest = "@com_google_googletest//:gtest" +grpcpp = "@com_github_grpc_grpc//:grpc++" gbenchmark = "@com_google_benchmark//:benchmark" loopback = "//tools/bazeldefs:loopback" -proto_library = native.proto_library pkg_deb = _pkg_deb pkg_tar = _pkg_tar py_library = native.py_library py_binary = native.py_binary py_test = native.py_test +def proto_library(name, has_services = None, **kwargs): + native.proto_library( + name = name, + **kwargs + ) + +def cc_grpc_library(name, **kwargs): + _cc_grpc_library(name = name, grpc_only = True, **kwargs) + +def _go_proto_or_grpc_library(go_library_func, name, **kwargs): + deps = [ + dep.replace("_proto", "_go_proto") + for dep in (kwargs.pop("deps", []) or []) + ] + go_library_func( + name = name + "_go_proto", + importpath = "gvisor.dev/gvisor/" + native.package_name() + "/" + name + "_go_proto", + proto = ":" + name + "_proto", + deps = deps, + **kwargs + ) + +def go_proto_library(name, **kwargs): + _go_proto_or_grpc_library(_go_proto_library, name, **kwargs) + +def go_grpc_and_proto_libraries(name, **kwargs): + _go_proto_or_grpc_library(_go_grpc_library, name, **kwargs) + +def cc_binary(name, static = False, **kwargs): + """Run cc_binary. + + Args: + name: name of the target. + static: make a static binary if True + **kwargs: the rest of the args. + """ + if static: + if "linkopts" in kwargs: + kwargs["linkopts"] += ["-static", "-lstdc++"] + else: + kwargs["linkopts"] = ["-static", "-lstdc++"] + _cc_binary( + name = name, + **kwargs + ) + def go_binary(name, static = False, pure = False, **kwargs): + """Build a go binary. + + Args: + name: name of the target. + static: build a static binary. + pure: build without cgo. + **kwargs: rest of the arguments are passed to _go_binary. + """ if static: kwargs["static"] = "on" if pure: @@ -52,18 +106,17 @@ def go_tool_library(name, **kwargs): **kwargs ) -def go_proto_library(name, proto, **kwargs): - deps = kwargs.pop("deps", []) - _go_proto_library( - name = name, - importpath = "gvisor.dev/gvisor/" + native.package_name() + "/" + name, - proto = proto, - deps = [dep.replace("_proto", "_go_proto") for dep in deps], - **kwargs - ) +def go_test(name, pure = False, library = None, **kwargs): + """Build a go test. -def go_test(name, **kwargs): - library = kwargs.pop("library", None) + Args: + name: name of the output binary. + pure: should it be built without cgo. + library: the library to embed. + **kwargs: rest of the arguments to pass to _go_test. + """ + if pure: + kwargs["pure"] = "on" if library: kwargs["embed"] = [library] _go_test( diff --git a/tools/defs.bzl b/tools/defs.bzl index 15a310403..91d689a82 100644 --- a/tools/defs.bzl +++ b/tools/defs.bzl @@ -7,36 +7,39 @@ change for Google-internal and bazel-compatible rules. load("//tools/go_stateify:defs.bzl", "go_stateify") load("//tools/go_marshal:defs.bzl", "go_marshal", "marshal_deps", "marshal_test_deps") -load("//tools/bazeldefs:defs.bzl", _cc_binary = "cc_binary", _cc_flags_supplier = "cc_flags_supplier", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test", _cc_toolchain = "cc_toolchain", _container_image = "container_image", _default_installer = "default_installer", _default_net_util = "default_net_util", _gbenchmark = "gbenchmark", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_image = "go_image", _go_library = "go_library", _go_proto_library = "go_proto_library", _go_test = "go_test", _go_tool_library = "go_tool_library", _gtest = "gtest", _loopback = "loopback", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar", _proto_library = "proto_library", _py_binary = "py_binary", _py_library = "py_library", _py_requirement = "py_requirement", _py_test = "py_test", _select_arch = "select_arch", _select_system = "select_system") +load("//tools/bazeldefs:defs.bzl", _cc_binary = "cc_binary", _cc_flags_supplier = "cc_flags_supplier", _cc_grpc_library = "cc_grpc_library", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test", _cc_toolchain = "cc_toolchain", _container_image = "container_image", _default_installer = "default_installer", _default_net_util = "default_net_util", _gbenchmark = "gbenchmark", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_grpc_and_proto_libraries = "go_grpc_and_proto_libraries", _go_image = "go_image", _go_library = "go_library", _go_proto_library = "go_proto_library", _go_test = "go_test", _go_tool_library = "go_tool_library", _grpcpp = "grpcpp", _gtest = "gtest", _loopback = "loopback", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar", _proto_library = "proto_library", _py_binary = "py_binary", _py_library = "py_library", _py_requirement = "py_requirement", _py_test = "py_test", _select_arch = "select_arch", _select_system = "select_system") load("//tools/bazeldefs:platforms.bzl", _default_platform = "default_platform", _platforms = "platforms") load("//tools/bazeldefs:tags.bzl", "go_suffixes") # Delegate directly. cc_binary = _cc_binary +cc_flags_supplier = _cc_flags_supplier +cc_grpc_library = _cc_grpc_library cc_library = _cc_library cc_test = _cc_test cc_toolchain = _cc_toolchain -cc_flags_supplier = _cc_flags_supplier container_image = _container_image +default_installer = _default_installer +default_net_util = _default_net_util +gbenchmark = _gbenchmark go_embed_data = _go_embed_data go_image = _go_image go_test = _go_test go_tool_library = _go_tool_library gtest = _gtest -gbenchmark = _gbenchmark +grpcpp = _grpcpp +loopback = _loopback pkg_deb = _pkg_deb pkg_tar = _pkg_tar -py_library = _py_library py_binary = _py_binary -py_test = _py_test +py_library = _py_library py_requirement = _py_requirement +py_test = _py_test select_arch = _select_arch select_system = _select_system -loopback = _loopback -default_installer = _default_installer -default_net_util = _default_net_util -platforms = _platforms + default_platform = _default_platform +platforms = _platforms def go_binary(name, **kwargs): """Wraps the standard go_binary. @@ -190,33 +193,52 @@ def go_library(name, srcs, deps = [], imports = [], stateify = True, marshal = F **kwargs ) -def proto_library(name, srcs, **kwargs): +def proto_library(name, srcs, deps = None, has_services = 0, **kwargs): """Wraps the standard proto_library. - Given a proto_library named "foo", this produces three different targets: + Given a proto_library named "foo", this produces up to five different + targets: - foo_proto: proto_library rule. - foo_go_proto: go_proto_library rule. - foo_cc_proto: cc_proto_library rule. + - foo_go_grpc_proto: go_grpc_library rule. + - foo_cc_grpc_proto: cc_grpc_library rule. Args: + name: the name to which _proto, _go_proto, etc, will be appended. srcs: the proto sources. + deps: for the proto library and the go_proto_library. + has_services: 1 to build gRPC code, otherwise 0. **kwargs: standard proto_library arguments. """ - deps = kwargs.pop("deps", []) _proto_library( name = name + "_proto", srcs = srcs, deps = deps, + has_services = has_services, **kwargs ) - _go_proto_library( - name = name + "_go_proto", - proto = ":" + name + "_proto", - deps = deps, - **kwargs - ) + if has_services: + _go_grpc_and_proto_libraries( + name = name, + deps = deps, + **kwargs + ) + else: + _go_proto_library( + name = name, + deps = deps, + **kwargs + ) _cc_proto_library( name = name + "_cc_proto", deps = [":" + name + "_proto"], **kwargs ) + if has_services: + _cc_grpc_library( + name = name + "_cc_grpc_proto", + srcs = [":" + name + "_proto"], + deps = [":" + name + "_cc_proto"], + **kwargs + ) -- cgit v1.2.3 From 1cc5a71a0e2100c89c97ffd12a38143907b33630 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Tue, 17 Mar 2020 16:21:36 -0700 Subject: iptables: fix type in script name, mark some new tests as skipped PiperOrigin-RevId: 301476456 --- kokoro/iptables_tests.cfg | 2 +- test/iptables/iptables_test.go | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/kokoro/iptables_tests.cfg b/kokoro/iptables_tests.cfg index 7af20629a..a30d82591 100644 --- a/kokoro/iptables_tests.cfg +++ b/kokoro/iptables_tests.cfg @@ -1,4 +1,4 @@ -build_file: "repo/scripts/iptables_test.sh" +build_file: "repo/scripts/iptables_tests.sh" action { define_artifacts { diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 73ba8b447..7f1f70606 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -191,6 +191,7 @@ func TestFilterInputDropOnlyUDP(t *testing.T) { } func TestNATRedirectUDPPort(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATRedirectUDPPort{}); err != nil { t.Fatal(err) @@ -198,6 +199,7 @@ func TestNATRedirectUDPPort(t *testing.T) { } func TestNATRedirectTCPPort(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATRedirectTCPPort{}); err != nil { t.Fatal(err) @@ -205,6 +207,7 @@ func TestNATRedirectTCPPort(t *testing.T) { } func TestNATDropUDP(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATDropUDP{}); err != nil { t.Fatal(err) @@ -212,6 +215,7 @@ func TestNATDropUDP(t *testing.T) { } func TestNATAcceptAll(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATAcceptAll{}); err != nil { t.Fatal(err) @@ -255,6 +259,7 @@ func TestFilterInputReturnUnderflow(t *testing.T) { } func TestFilterOutputDropTCPDestPort(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("filter OUTPUT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(FilterOutputDropTCPDestPort{}); err != nil { t.Fatal(err) @@ -262,6 +267,7 @@ func TestFilterOutputDropTCPDestPort(t *testing.T) { } func TestFilterOutputDropTCPSrcPort(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("filter OUTPUT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(FilterOutputDropTCPSrcPort{}); err != nil { t.Fatal(err) @@ -329,42 +335,56 @@ func TestOutputInvertDestination(t *testing.T) { } func TestNATOutRedirectIP(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. + t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATOutRedirectIP{}); err != nil { t.Fatal(err) } } func TestNATOutDontRedirectIP(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. + t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATOutDontRedirectIP{}); err != nil { t.Fatal(err) } } func TestNATOutRedirectInvert(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. + t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATOutRedirectInvert{}); err != nil { t.Fatal(err) } } func TestNATPreRedirectIP(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. + t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATPreRedirectIP{}); err != nil { t.Fatal(err) } } func TestNATPreDontRedirectIP(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. + t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATPreDontRedirectIP{}); err != nil { t.Fatal(err) } } func TestNATPreRedirectInvert(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. + t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATPreRedirectInvert{}); err != nil { t.Fatal(err) } } func TestNATRedirectRequiresProtocol(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. + t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATRedirectRequiresProtocol{}); err != nil { t.Fatal(err) } -- cgit v1.2.3 From 9c35d7eb1f96f12207f78b94722f0e8b778b5af3 Mon Sep 17 00:00:00 2001 From: Haibo Xu Date: Wed, 11 Mar 2020 09:55:07 +0000 Subject: Enable syscall sysret_test on arm64. Fixes #2058 Signed-off-by: Haibo Xu Change-Id: I05750d238a6425d3a47fae15720901f4dd924a32 --- test/syscalls/linux/BUILD | 5 +---- test/syscalls/linux/sysret.cc | 32 +++++++++++++++++++++++++++++--- 2 files changed, 30 insertions(+), 7 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 636e5db12..d0c431234 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -3336,10 +3336,7 @@ cc_binary( cc_binary( name = "sysret_test", testonly = 1, - srcs = select_arch( - amd64 = ["sysret.cc"], - arm64 = [], - ), + srcs = ["sysret.cc"], linkstatic = 1, deps = [ gtest, diff --git a/test/syscalls/linux/sysret.cc b/test/syscalls/linux/sysret.cc index 819fa655a..569190a59 100644 --- a/test/syscalls/linux/sysret.cc +++ b/test/syscalls/linux/sysret.cc @@ -14,6 +14,8 @@ // Tests to verify that the behavior of linux and gvisor matches when // 'sysret' returns to bad (aka non-canonical) %rip or %rsp. + +#include #include #include @@ -32,6 +34,7 @@ constexpr uint64_t kNonCanonicalRsp = 0xFFFF000000000000; class SysretTest : public ::testing::Test { protected: struct user_regs_struct regs_; + struct iovec iov; pid_t child_; void SetUp() override { @@ -48,10 +51,14 @@ class SysretTest : public ::testing::Test { // Parent. int status; + memset(&iov, 0, sizeof(iov)); ASSERT_THAT(pid, SyscallSucceeds()); // Might still be < 0. ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); - ASSERT_THAT(ptrace(PTRACE_GETREGS, pid, 0, ®s_), SyscallSucceeds()); + + iov.iov_base = ®s_; + iov.iov_len = sizeof(regs_); + ASSERT_THAT(ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov), SyscallSucceeds()); child_ = pid; } @@ -61,13 +68,25 @@ class SysretTest : public ::testing::Test { } void SetRip(uint64_t newrip) { +#if defined(__x86_64__) regs_.rip = newrip; - ASSERT_THAT(ptrace(PTRACE_SETREGS, child_, 0, ®s_), SyscallSucceeds()); +#elif defined(__aarch64__) + regs_.pc = newrip; +#else +#error "Unknown architecture" +#endif + ASSERT_THAT(ptrace(PTRACE_SETREGSET, child_, NT_PRSTATUS, &iov), SyscallSucceeds()); } void SetRsp(uint64_t newrsp) { +#if defined(__x86_64__) regs_.rsp = newrsp; - ASSERT_THAT(ptrace(PTRACE_SETREGS, child_, 0, ®s_), SyscallSucceeds()); +#elif defined(__aarch64__) + regs_.sp = newrsp; +#else +#error "Unknown architecture" +#endif + ASSERT_THAT(ptrace(PTRACE_SETREGSET, child_, NT_PRSTATUS, &iov), SyscallSucceeds()); } // Wait waits for the child pid and returns the exit status. @@ -104,8 +123,15 @@ TEST_F(SysretTest, BadRsp) { SetRsp(kNonCanonicalRsp); Detach(); int status = Wait(); +#if defined(__x86_64__) EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGBUS) << "status = " << status; +#elif defined(__aarch64__) + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSEGV) + << "status = " << status; +#else +#error "Unknown architecture" +#endif } } // namespace -- cgit v1.2.3 From 707664e0c498921860b209a19974977bb20c5746 Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Wed, 18 Mar 2020 10:16:37 -0700 Subject: Send the ACK later to stabilize the test. PiperOrigin-RevId: 301614096 --- test/packetdrill/fin_wait2_timeout.pkt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/packetdrill/fin_wait2_timeout.pkt b/test/packetdrill/fin_wait2_timeout.pkt index 613f0bec9..93ab08575 100644 --- a/test/packetdrill/fin_wait2_timeout.pkt +++ b/test/packetdrill/fin_wait2_timeout.pkt @@ -19,5 +19,5 @@ +0 > F. 1:1(0) ack 1 <...> +0 < . 1:1(0) ack 2 win 257 -+1.1 < . 1:1(0) ack 2 win 257 ++2 < . 1:1(0) ack 2 win 257 +0 > R 2:2(0) win 0 -- cgit v1.2.3 From fc16e64396cf534bc4336e6bc0396a2f0f621e70 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Wed, 18 Mar 2020 13:06:55 -0700 Subject: Automated rollback of changelist 301476456 PiperOrigin-RevId: 301650898 --- kokoro/iptables_tests.cfg | 2 +- test/iptables/iptables_test.go | 20 -------------------- 2 files changed, 1 insertion(+), 21 deletions(-) (limited to 'test') diff --git a/kokoro/iptables_tests.cfg b/kokoro/iptables_tests.cfg index a30d82591..7af20629a 100644 --- a/kokoro/iptables_tests.cfg +++ b/kokoro/iptables_tests.cfg @@ -1,4 +1,4 @@ -build_file: "repo/scripts/iptables_tests.sh" +build_file: "repo/scripts/iptables_test.sh" action { define_artifacts { diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 7f1f70606..73ba8b447 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -191,7 +191,6 @@ func TestFilterInputDropOnlyUDP(t *testing.T) { } func TestNATRedirectUDPPort(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATRedirectUDPPort{}); err != nil { t.Fatal(err) @@ -199,7 +198,6 @@ func TestNATRedirectUDPPort(t *testing.T) { } func TestNATRedirectTCPPort(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATRedirectTCPPort{}); err != nil { t.Fatal(err) @@ -207,7 +205,6 @@ func TestNATRedirectTCPPort(t *testing.T) { } func TestNATDropUDP(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATDropUDP{}); err != nil { t.Fatal(err) @@ -215,7 +212,6 @@ func TestNATDropUDP(t *testing.T) { } func TestNATAcceptAll(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATAcceptAll{}); err != nil { t.Fatal(err) @@ -259,7 +255,6 @@ func TestFilterInputReturnUnderflow(t *testing.T) { } func TestFilterOutputDropTCPDestPort(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("filter OUTPUT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(FilterOutputDropTCPDestPort{}); err != nil { t.Fatal(err) @@ -267,7 +262,6 @@ func TestFilterOutputDropTCPDestPort(t *testing.T) { } func TestFilterOutputDropTCPSrcPort(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("filter OUTPUT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(FilterOutputDropTCPSrcPort{}); err != nil { t.Fatal(err) @@ -335,56 +329,42 @@ func TestOutputInvertDestination(t *testing.T) { } func TestNATOutRedirectIP(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. - t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATOutRedirectIP{}); err != nil { t.Fatal(err) } } func TestNATOutDontRedirectIP(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. - t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATOutDontRedirectIP{}); err != nil { t.Fatal(err) } } func TestNATOutRedirectInvert(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. - t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATOutRedirectInvert{}); err != nil { t.Fatal(err) } } func TestNATPreRedirectIP(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. - t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATPreRedirectIP{}); err != nil { t.Fatal(err) } } func TestNATPreDontRedirectIP(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. - t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATPreDontRedirectIP{}); err != nil { t.Fatal(err) } } func TestNATPreRedirectInvert(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. - t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATPreRedirectInvert{}); err != nil { t.Fatal(err) } } func TestNATRedirectRequiresProtocol(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. - t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATRedirectRequiresProtocol{}); err != nil { t.Fatal(err) } -- cgit v1.2.3 From b5ea65c07c29cbc894e9f879796eed816696d042 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Wed, 18 Mar 2020 15:51:36 -0700 Subject: iptables: skip tests for not-yet-supported features PiperOrigin-RevId: 301686266 --- test/iptables/iptables_test.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'test') diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 73ba8b447..7f1f70606 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -191,6 +191,7 @@ func TestFilterInputDropOnlyUDP(t *testing.T) { } func TestNATRedirectUDPPort(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATRedirectUDPPort{}); err != nil { t.Fatal(err) @@ -198,6 +199,7 @@ func TestNATRedirectUDPPort(t *testing.T) { } func TestNATRedirectTCPPort(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATRedirectTCPPort{}); err != nil { t.Fatal(err) @@ -205,6 +207,7 @@ func TestNATRedirectTCPPort(t *testing.T) { } func TestNATDropUDP(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATDropUDP{}); err != nil { t.Fatal(err) @@ -212,6 +215,7 @@ func TestNATDropUDP(t *testing.T) { } func TestNATAcceptAll(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATAcceptAll{}); err != nil { t.Fatal(err) @@ -255,6 +259,7 @@ func TestFilterInputReturnUnderflow(t *testing.T) { } func TestFilterOutputDropTCPDestPort(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("filter OUTPUT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(FilterOutputDropTCPDestPort{}); err != nil { t.Fatal(err) @@ -262,6 +267,7 @@ func TestFilterOutputDropTCPDestPort(t *testing.T) { } func TestFilterOutputDropTCPSrcPort(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("filter OUTPUT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(FilterOutputDropTCPSrcPort{}); err != nil { t.Fatal(err) @@ -329,42 +335,56 @@ func TestOutputInvertDestination(t *testing.T) { } func TestNATOutRedirectIP(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. + t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATOutRedirectIP{}); err != nil { t.Fatal(err) } } func TestNATOutDontRedirectIP(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. + t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATOutDontRedirectIP{}); err != nil { t.Fatal(err) } } func TestNATOutRedirectInvert(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. + t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATOutRedirectInvert{}); err != nil { t.Fatal(err) } } func TestNATPreRedirectIP(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. + t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATPreRedirectIP{}); err != nil { t.Fatal(err) } } func TestNATPreDontRedirectIP(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. + t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATPreDontRedirectIP{}); err != nil { t.Fatal(err) } } func TestNATPreRedirectInvert(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. + t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATPreRedirectInvert{}); err != nil { t.Fatal(err) } } func TestNATRedirectRequiresProtocol(t *testing.T) { + // TODO(gvisor.dev/issue/170): Enable when supported. + t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") if err := singleTest(NATRedirectRequiresProtocol{}); err != nil { t.Fatal(err) } -- cgit v1.2.3 From a8f9cc87989979b6d8bc3759e64bdd1b76329b64 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Wed, 18 Mar 2020 14:44:56 -0700 Subject: iptables: deflake DropTCP*Port tests These tests could timeout because net.DialTCP didn't respect the timeout. --- test/iptables/filter_input.go | 20 ++++++++++++++++---- test/iptables/iptables_util.go | 2 +- 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'test') diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index 05647de33..4ccd4cce7 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -194,8 +194,14 @@ func (FilterInputDropTCPDestPort) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterInputDropTCPDestPort) LocalAction(ip net.IP) error { - if err := connectTCP(ip, dropPort, sendloopDuration); err == nil { - return fmt.Errorf("connection destined to port %d should not be accepted, but got accepted", dropPort) + // After the container sets its DROP rule, we shouldn't be able to connect. + // However, we may succeed in connecting if this runs before the container + // sets the rule. To avoid this race, we retry connecting until + // sendloopDuration has elapsed, ignoring whether the connect succeeds. The + // test works becuase the container will error if a connection is + // established after the rule is set. + for start := time.Now(); time.Since(start) < sendloopDuration; { + connectTCP(ip, dropPort, sendloopDuration-time.Since(start)) } return nil @@ -226,8 +232,14 @@ func (FilterInputDropTCPSrcPort) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterInputDropTCPSrcPort) LocalAction(ip net.IP) error { - if err := connectTCP(ip, acceptPort, sendloopDuration); err == nil { - return fmt.Errorf("connection should not be accepted, but was") + // After the container sets its DROP rule, we shouldn't be able to connect. + // However, we may succeed in connecting if this runs before the container + // sets the rule. To avoid this race, we retry connecting until + // sendloopDuration has elapsed, ignoring whether the connect succeeds. The + // test works becuase the container will error if a connection is + // established after the rule is set. + for start := time.Now(); time.Since(start) < sendloopDuration; { + connectTCP(ip, acceptPort, sendloopDuration-time.Since(start)) } return nil diff --git a/test/iptables/iptables_util.go b/test/iptables/iptables_util.go index e8ae65c5a..134391e8d 100644 --- a/test/iptables/iptables_util.go +++ b/test/iptables/iptables_util.go @@ -144,7 +144,7 @@ func connectTCP(ip net.IP, port int, timeout time.Duration) error { // The container may not be listening when we first connect, so retry // upon error. callback := func() error { - conn, err := net.DialTCP("tcp4", nil, &contAddr) + conn, err := net.DialTimeout("tcp", contAddr.String(), timeout) if conn != nil { conn.Close() } -- cgit v1.2.3 From e0fbcdcb7f8a3e18946ccd5d5b98ea3adadee5ba Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Thu, 19 Mar 2020 12:32:15 -0700 Subject: Add packetimpact tests to presubmit and CI testing PiperOrigin-RevId: 301872161 --- kokoro/packetimpact_tests.cfg | 9 +++++++++ scripts/packetimpact_tests.sh | 20 ++++++++++++++++++++ test/packetimpact/tests/defs.bzl | 4 ++-- 3 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 kokoro/packetimpact_tests.cfg create mode 100755 scripts/packetimpact_tests.sh (limited to 'test') diff --git a/kokoro/packetimpact_tests.cfg b/kokoro/packetimpact_tests.cfg new file mode 100644 index 000000000..db86b52d5 --- /dev/null +++ b/kokoro/packetimpact_tests.cfg @@ -0,0 +1,9 @@ +build_file: "repo/scripts/packetimpact_tests.sh" + +action { + define_artifacts { + regex: "**/sponge_log.xml" + regex: "**/sponge_log.log" + regex: "**/outputs.zip" + } +} diff --git a/scripts/packetimpact_tests.sh b/scripts/packetimpact_tests.sh new file mode 100755 index 000000000..027d11e64 --- /dev/null +++ b/scripts/packetimpact_tests.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# Copyright 2019 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +source $(dirname $0)/common.sh + +install_runsc_for_test runsc-d +test_runsc $(bazel query "attr(tags, packetimpact, tests(//test/packetimpact/...))") diff --git a/test/packetimpact/tests/defs.bzl b/test/packetimpact/tests/defs.bzl index 3baac567a..1b4213d9b 100644 --- a/test/packetimpact/tests/defs.bzl +++ b/test/packetimpact/tests/defs.bzl @@ -71,7 +71,7 @@ def packetimpact_linux_test(name, testbench_binary, **kwargs): name = name + "_linux_test", testbench_binary = testbench_binary, flags = ["--dut_platform", "linux"], - tags = PACKETIMPACT_TAGS, + tags = PACKETIMPACT_TAGS + ["packetimpact"], **kwargs ) @@ -89,7 +89,7 @@ def packetimpact_netstack_test(name, testbench_binary, **kwargs): # This is the default runtime unless # "--test_arg=--runtime=OTHER_RUNTIME" is used to override the value. flags = ["--dut_platform", "netstack", "--runtime=runsc-d"], - tags = PACKETIMPACT_TAGS, + tags = PACKETIMPACT_TAGS + ["packetimpact"], **kwargs ) -- cgit v1.2.3 From d5fe1ce0c1c551c3165632eecc0ea5589c049bd5 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Fri, 20 Mar 2020 17:19:06 -0700 Subject: test: Create a separate /tmp mount only for tests with the shared tag The root mount is not shared by default, but all other mounts are shared. So if we create the /tmp mount, this means that we run tests on a shared mount even if tests run without the --shared option. PiperOrigin-RevId: 302130790 --- test/runner/runner.go | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'test') diff --git a/test/runner/runner.go b/test/runner/runner.go index a78ef38e0..0d3742f71 100644 --- a/test/runner/runner.go +++ b/test/runner/runner.go @@ -300,6 +300,7 @@ func runTestCaseRunsc(testBin string, tc gtest.TestCase, t *testing.T) { // Test spec comes with pre-defined mounts that we don't want. Reset it. spec.Mounts = nil + testTmpDir := "/tmp" if *useTmpfs { // Forces '/tmp' to be mounted as tmpfs, otherwise test that rely on // features only available in gVisor's internal tmpfs may fail. @@ -325,11 +326,19 @@ func runTestCaseRunsc(testBin string, tc gtest.TestCase, t *testing.T) { t.Fatalf("could not chmod temp dir: %v", err) } - spec.Mounts = append(spec.Mounts, specs.Mount{ - Type: "bind", - Destination: "/tmp", - Source: tmpDir, - }) + // "/tmp" is not replaced with a tmpfs mount inside the sandbox + // when it's not empty. This ensures that testTmpDir uses gofer + // in exclusive mode. + testTmpDir = tmpDir + if *fileAccess == "shared" { + // All external mounts except the root mount are shared. + spec.Mounts = append(spec.Mounts, specs.Mount{ + Type: "bind", + Destination: "/tmp", + Source: tmpDir, + }) + testTmpDir = "/tmp" + } } // Set environment variables that indicate we are @@ -349,12 +358,8 @@ func runTestCaseRunsc(testBin string, tc gtest.TestCase, t *testing.T) { // Set TEST_TMPDIR to /tmp, as some of the syscall tests require it to // be backed by tmpfs. - for i, kv := range env { - if strings.HasPrefix(kv, "TEST_TMPDIR=") { - env[i] = "TEST_TMPDIR=/tmp" - break - } - } + env = filterEnv(env, []string{"TEST_TMPDIR"}) + env = append(env, fmt.Sprintf("TEST_TMPDIR=%s", testTmpDir)) spec.Process.Env = env -- cgit v1.2.3 From a730d74b3230fb32181b9a940c07b61338222874 Mon Sep 17 00:00:00 2001 From: Ian Lewis Date: Mon, 23 Mar 2020 16:11:37 -0700 Subject: Support basic /proc/net/dev metrics for netstack Fixes #506 PiperOrigin-RevId: 302540404 --- pkg/sentry/socket/netstack/stack.go | 73 ++++++++++++++++++++++++++----------- test/syscalls/linux/proc_net.cc | 53 +++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 21 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go index 0692482e9..a8e2e8c24 100644 --- a/pkg/sentry/socket/netstack/stack.go +++ b/pkg/sentry/socket/netstack/stack.go @@ -200,36 +200,66 @@ func (s *Stack) SetTCPSACKEnabled(enabled bool) error { // Statistics implements inet.Stack.Statistics. func (s *Stack) Statistics(stat interface{}, arg string) error { switch stats := stat.(type) { + case *inet.StatDev: + for _, ni := range s.Stack.NICInfo() { + if ni.Name != arg { + continue + } + // TODO(gvisor.dev/issue/2103) Support stubbed stats. + *stats = inet.StatDev{ + // Receive section. + ni.Stats.Rx.Bytes.Value(), // bytes. + ni.Stats.Rx.Packets.Value(), // packets. + 0, // errs. + 0, // drop. + 0, // fifo. + 0, // frame. + 0, // compressed. + 0, // multicast. + // Transmit section. + ni.Stats.Tx.Bytes.Value(), // bytes. + ni.Stats.Tx.Packets.Value(), // packets. + 0, // errs. + 0, // drop. + 0, // fifo. + 0, // colls. + 0, // carrier. + 0, // compressed. + } + break + } case *inet.StatSNMPIP: ip := Metrics.IP + // TODO(gvisor.dev/issue/969) Support stubbed stats. *stats = inet.StatSNMPIP{ - 0, // TODO(gvisor.dev/issue/969): Support Ip/Forwarding. - 0, // TODO(gvisor.dev/issue/969): Support Ip/DefaultTTL. + 0, // Ip/Forwarding. + 0, // Ip/DefaultTTL. ip.PacketsReceived.Value(), // InReceives. - 0, // TODO(gvisor.dev/issue/969): Support Ip/InHdrErrors. + 0, // Ip/InHdrErrors. ip.InvalidDestinationAddressesReceived.Value(), // InAddrErrors. - 0, // TODO(gvisor.dev/issue/969): Support Ip/ForwDatagrams. - 0, // TODO(gvisor.dev/issue/969): Support Ip/InUnknownProtos. - 0, // TODO(gvisor.dev/issue/969): Support Ip/InDiscards. + 0, // Ip/ForwDatagrams. + 0, // Ip/InUnknownProtos. + 0, // Ip/InDiscards. ip.PacketsDelivered.Value(), // InDelivers. ip.PacketsSent.Value(), // OutRequests. ip.OutgoingPacketErrors.Value(), // OutDiscards. - 0, // TODO(gvisor.dev/issue/969): Support Ip/OutNoRoutes. - 0, // TODO(gvisor.dev/issue/969): Support Ip/ReasmTimeout. - 0, // TODO(gvisor.dev/issue/969): Support Ip/ReasmReqds. - 0, // TODO(gvisor.dev/issue/969): Support Ip/ReasmOKs. - 0, // TODO(gvisor.dev/issue/969): Support Ip/ReasmFails. - 0, // TODO(gvisor.dev/issue/969): Support Ip/FragOKs. - 0, // TODO(gvisor.dev/issue/969): Support Ip/FragFails. - 0, // TODO(gvisor.dev/issue/969): Support Ip/FragCreates. + 0, // Ip/OutNoRoutes. + 0, // Support Ip/ReasmTimeout. + 0, // Support Ip/ReasmReqds. + 0, // Support Ip/ReasmOKs. + 0, // Support Ip/ReasmFails. + 0, // Support Ip/FragOKs. + 0, // Support Ip/FragFails. + 0, // Support Ip/FragCreates. } case *inet.StatSNMPICMP: in := Metrics.ICMP.V4PacketsReceived.ICMPv4PacketStats out := Metrics.ICMP.V4PacketsSent.ICMPv4PacketStats + // TODO(gvisor.dev/issue/969) Support stubbed stats. *stats = inet.StatSNMPICMP{ - 0, // TODO(gvisor.dev/issue/969): Support Icmp/InMsgs. + 0, // Icmp/InMsgs. Metrics.ICMP.V4PacketsSent.Dropped.Value(), // InErrors. - 0, // TODO(gvisor.dev/issue/969): Support Icmp/InCsumErrors. + 0, // Icmp/InCsumErrors. in.DstUnreachable.Value(), // InDestUnreachs. in.TimeExceeded.Value(), // InTimeExcds. in.ParamProblem.Value(), // InParmProbs. @@ -241,7 +271,7 @@ func (s *Stack) Statistics(stat interface{}, arg string) error { in.TimestampReply.Value(), // InTimestampReps. in.InfoRequest.Value(), // InAddrMasks. in.InfoReply.Value(), // InAddrMaskReps. - 0, // TODO(gvisor.dev/issue/969): Support Icmp/OutMsgs. + 0, // Icmp/OutMsgs. Metrics.ICMP.V4PacketsReceived.Invalid.Value(), // OutErrors. out.DstUnreachable.Value(), // OutDestUnreachs. out.TimeExceeded.Value(), // OutTimeExcds. @@ -277,15 +307,16 @@ func (s *Stack) Statistics(stat interface{}, arg string) error { } case *inet.StatSNMPUDP: udp := Metrics.UDP + // TODO(gvisor.dev/issue/969) Support stubbed stats. *stats = inet.StatSNMPUDP{ udp.PacketsReceived.Value(), // InDatagrams. udp.UnknownPortErrors.Value(), // NoPorts. - 0, // TODO(gvisor.dev/issue/969): Support Udp/InErrors. + 0, // Udp/InErrors. udp.PacketsSent.Value(), // OutDatagrams. udp.ReceiveBufferErrors.Value(), // RcvbufErrors. - 0, // TODO(gvisor.dev/issue/969): Support Udp/SndbufErrors. - 0, // TODO(gvisor.dev/issue/969): Support Udp/InCsumErrors. - 0, // TODO(gvisor.dev/issue/969): Support Udp/IgnoredMulti. + 0, // Udp/SndbufErrors. + 0, // Udp/InCsumErrors. + 0, // Udp/IgnoredMulti. } default: return syserr.ErrEndpointOperation.ToError() diff --git a/test/syscalls/linux/proc_net.cc b/test/syscalls/linux/proc_net.cc index 05c952b99..4e23d1e78 100644 --- a/test/syscalls/linux/proc_net.cc +++ b/test/syscalls/linux/proc_net.cc @@ -92,6 +92,59 @@ TEST(ProcSysNetIpv4Sack, CanReadAndWrite) { EXPECT_EQ(buf, to_write); } +// DeviceEntry is an entry in /proc/net/dev +struct DeviceEntry { + std::string name; + uint64_t stats[16]; +}; + +PosixErrorOr> GetDeviceMetricsFromProc( + const std::string dev) { + std::vector lines = absl::StrSplit(dev, '\n'); + std::vector entries; + + // /proc/net/dev prints 2 lines of headers followed by a line of metrics for + // each network interface. + for (unsigned i = 2; i < lines.size(); i++) { + // Ignore empty lines. + if (lines[i].empty()) { + continue; + } + + std::vector values = + absl::StrSplit(lines[i], ' ', absl::SkipWhitespace()); + + // Interface name + 16 values. + if (values.size() != 17) { + return PosixError(EINVAL, "invalid line: " + lines[i]); + } + + DeviceEntry entry; + entry.name = values[0]; + // Skip the interface name and read only the values. + for (unsigned j = 1; j < 17; j++) { + uint64_t num; + if (!absl::SimpleAtoi(values[j], &num)) { + return PosixError(EINVAL, "invalid value: " + values[j]); + } + entry.stats[j - 1] = num; + } + + entries.push_back(entry); + } + + return entries; +} + +// TEST(ProcNetDev, Format) tests that /proc/net/dev is parsable and +// contains at least one entry. +TEST(ProcNetDev, Format) { + auto dev = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/dev")); + auto entries = ASSERT_NO_ERRNO_AND_VALUE(GetDeviceMetricsFromProc(dev)); + + EXPECT_GT(entries.size(), 0); +} + PosixErrorOr GetSNMPMetricFromProc(const std::string snmp, const std::string& type, const std::string& item) { -- cgit v1.2.3 From f97858011fa88b539585ca456943922204d92840 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 24 Mar 2020 10:57:24 -0700 Subject: Open a temp directory before changing capabilities and user ID-s In cl/302130790, we started using a temp directory which is provided by bazel. By default, a test process has enough permissions to open it, but there is not any guarantee that it still will be able to do this after changing credentials. PiperOrigin-RevId: 302702337 --- test/syscalls/linux/sticky.cc | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/sticky.cc b/test/syscalls/linux/sticky.cc index 7e73325bf..92eec0449 100644 --- a/test/syscalls/linux/sticky.cc +++ b/test/syscalls/linux/sticky.cc @@ -42,8 +42,9 @@ TEST(StickyTest, StickyBitPermDenied) { auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); EXPECT_THAT(chmod(dir.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds()); - std::string path = JoinPath(dir.path(), "NewDir"); - ASSERT_THAT(mkdir(path.c_str(), 0755), SyscallSucceeds()); + const FileDescriptor dirfd = + ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_DIRECTORY)); + ASSERT_THAT(mkdirat(dirfd.get(), "NewDir", 0755), SyscallSucceeds()); // Drop privileges and change IDs only in child thread, or else this parent // thread won't be able to open some log files after the test ends. @@ -61,7 +62,8 @@ TEST(StickyTest, StickyBitPermDenied) { syscall(SYS_setresuid, -1, absl::GetFlag(FLAGS_scratch_uid), -1), SyscallSucceeds()); - EXPECT_THAT(rmdir(path.c_str()), SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(unlinkat(dirfd.get(), "NewDir", AT_REMOVEDIR), + SyscallFailsWithErrno(EPERM)); }); } @@ -96,8 +98,9 @@ TEST(StickyTest, StickyBitCapFOWNER) { auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); EXPECT_THAT(chmod(dir.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds()); - std::string path = JoinPath(dir.path(), "NewDir"); - ASSERT_THAT(mkdir(path.c_str(), 0755), SyscallSucceeds()); + const FileDescriptor dirfd = + ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_DIRECTORY)); + ASSERT_THAT(mkdirat(dirfd.get(), "NewDir", 0755), SyscallSucceeds()); // Drop privileges and change IDs only in child thread, or else this parent // thread won't be able to open some log files after the test ends. @@ -114,7 +117,8 @@ TEST(StickyTest, StickyBitCapFOWNER) { SyscallSucceeds()); EXPECT_NO_ERRNO(SetCapability(CAP_FOWNER, true)); - EXPECT_THAT(rmdir(path.c_str()), SyscallSucceeds()); + EXPECT_THAT(unlinkat(dirfd.get(), "NewDir", AT_REMOVEDIR), + SyscallSucceeds()); }); } } // namespace -- cgit v1.2.3 From 8fbf811f79d611dd55355734736265dfec20b73f Mon Sep 17 00:00:00 2001 From: Ian Lewis Date: Wed, 25 Mar 2020 08:28:24 +0900 Subject: Updated e2e tests on travis (#53) * Updated e2e tests on travis - Updated tests to run using Go 1.14 - Added test for containerd 1.3 - Updated release of runsc to test * Fix release downloading in e2e tests * Fix test targets * Update to using Ubuntu bionic in travis tests --- .travis.yml | 18 +++++++++--------- test/e2e/containerd-install.sh | 10 ++++++++-- test/e2e/runsc-install.sh | 2 +- test/e2e/untrusted-workload/install.sh | 5 ++++- 4 files changed, 22 insertions(+), 13 deletions(-) (limited to 'test') diff --git a/.travis.yml b/.travis.yml index e9be6c7aa..d9ef5e33f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,14 +1,15 @@ -dist: trusty +dist: bionic sudo: required language: go -go: "1.12.x" +go: "1.14.x" env: - - GO111MODULE=on CONTAINERD_VERSION=1.1.5 RUNSC_VERSION=2019-08-06 TEST=untrusted-workload - - GO111MODULE=on CONTAINERD_VERSION=1.2.1 RUNSC_VERSION=2019-08-06 TEST=untrusted-workload - - GO111MODULE=on CONTAINERD_VERSION=1.2.1 RUNSC_VERSION=2019-08-06 TEST=runtime-handler - - GO111MODULE=on CONTAINERD_VERSION=1.2.1 RUNSC_VERSION=2019-08-06 TEST=runtime-handler-shim-v2 - + - CONTAINERD_VERSION=1.1.8 RUNSC_VERSION=release/20200219.0 TEST=untrusted-workload + - CONTAINERD_VERSION=1.2.13 RUNSC_VERSION=release/20200219.0 TEST=untrusted-workload + - CONTAINERD_VERSION=1.2.13 RUNSC_VERSION=release/20200219.0 TEST=runtime-handler + - CONTAINERD_VERSION=1.2.13 RUNSC_VERSION=release/20200219.0 TEST=runtime-handler-shim-v2 + - CONTAINERD_VERSION=1.3.3 RUNSC_VERSION=release/20200219.0 TEST=runtime-handler + - CONTAINERD_VERSION=1.3.3 RUNSC_VERSION=release/20200219.0 TEST=runtime-handler-shim-v2 go_import_path: github.com/google/gvisor-containerd-shim @@ -18,11 +19,10 @@ addons: - socat - conntrack - ipset + - libseccomp-dev before_install: - uname -r - - sudo apt-get -q update - - sudo apt-get install -y libseccomp-dev/trusty-backports script: - make test diff --git a/test/e2e/containerd-install.sh b/test/e2e/containerd-install.sh index 538b0ca2b..400819245 100755 --- a/test/e2e/containerd-install.sh +++ b/test/e2e/containerd-install.sh @@ -10,6 +10,13 @@ sudo mkdir -p /etc/containerd /etc/cni/net.d /opt/cni/bin sudo tar -xvf cni-plugins-amd64-v0.7.0.tgz -C /opt/cni/bin/ sudo tar -xvf containerd-${CONTAINERD_VERSION}.linux-amd64.tar.gz -C / +cat < /tmp/containerd-cri.log & - +sudo PATH=$PATH containerd -log-level debug &>/tmp/containerd-cri.log & diff --git a/test/e2e/runsc-install.sh b/test/e2e/runsc-install.sh index 64823bd3b..420fe01e9 100755 --- a/test/e2e/runsc-install.sh +++ b/test/e2e/runsc-install.sh @@ -3,6 +3,6 @@ # Sample script to install runsc wget -q --https-only \ - https://storage.googleapis.com/gvisor/releases/nightly/${RUNSC_VERSION}/runsc + https://storage.googleapis.com/gvisor/releases/${RUNSC_VERSION}/runsc chmod +x runsc sudo mv runsc /usr/local/bin/ diff --git a/test/e2e/untrusted-workload/install.sh b/test/e2e/untrusted-workload/install.sh index cb11ab8d3..c4538aed1 100755 --- a/test/e2e/untrusted-workload/install.sh +++ b/test/e2e/untrusted-workload/install.sh @@ -11,6 +11,9 @@ disabled_plugins = ["restart"] [plugins.linux] shim = "/usr/local/bin/gvisor-containerd-shim" shim_debug = true +# Set to avoid port overlap on older versions of containerd where default is 10010. +[plugins.cri] + stream_server_port = "10011" [plugins.cri.containerd.untrusted_workload_runtime] runtime_type = "io.containerd.runtime.v1.linux" runtime_engine = "/usr/local/bin/runsc" @@ -20,5 +23,5 @@ EOF { # Step 2: Restart containerd sudo pkill containerd -sudo containerd -log-level debug &> /tmp/containerd-cri.log & +sudo containerd -log-level debug &>/tmp/containerd-cri.log & } -- cgit v1.2.3 From e7fbf6949514f1cf239437dcead4ceed5aac029e Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Wed, 25 Mar 2020 10:56:36 -0700 Subject: Fix futex_benchmark. - Fix definitions of Futex* wrappers. - Correctly handle glibc syscall() (which returns -1 and sets errno instead of returning the raw syscall return value). - De-parameterize FutexWaitBitset, which was apparently intended to test with deadlines of between 0 and 100000 nanoseconds after the Unix epoch, but was broken due to the preceding two issues. - Use wall time to measure the durations of tests that are expected to block (and thus stop accumulating CPU time). - Require 5s for all tests to improve robustness in the presence of sentry GC. - Remove FutexContend and FutexContendDeadline; it's unclear what these are supposed to measure, given that (1) FutexLock is unrealistically inefficient and (2) the benchmark rewards slow scheduling (since this reduces contention). PiperOrigin-RevId: 302925246 --- test/perf/linux/futex_benchmark.cc | 144 ++++++++++++------------------------- 1 file changed, 47 insertions(+), 97 deletions(-) (limited to 'test') diff --git a/test/perf/linux/futex_benchmark.cc b/test/perf/linux/futex_benchmark.cc index b349d50bf..241f39896 100644 --- a/test/perf/linux/futex_benchmark.cc +++ b/test/perf/linux/futex_benchmark.cc @@ -33,24 +33,24 @@ namespace testing { namespace { inline int FutexWait(std::atomic* v, int32_t val) { - return syscall(SYS_futex, v, FUTEX_BITSET_MATCH_ANY, nullptr); + return syscall(SYS_futex, v, FUTEX_WAIT_PRIVATE, val, nullptr); } -inline int FutexWaitRelativeTimeout(std::atomic* v, int32_t val, - const struct timespec* reltime) { - return syscall(SYS_futex, v, FUTEX_WAIT_PRIVATE, reltime); +inline int FutexWaitMonotonicTimeout(std::atomic* v, int32_t val, + const struct timespec* timeout) { + return syscall(SYS_futex, v, FUTEX_WAIT_PRIVATE, val, timeout); } -inline int FutexWaitAbsoluteTimeout(std::atomic* v, int32_t val, - const struct timespec* abstime) { - return syscall(SYS_futex, v, FUTEX_BITSET_MATCH_ANY, abstime); +inline int FutexWaitMonotonicDeadline(std::atomic* v, int32_t val, + const struct timespec* deadline) { + return syscall(SYS_futex, v, FUTEX_WAIT_BITSET_PRIVATE, val, deadline, + nullptr, FUTEX_BITSET_MATCH_ANY); } -inline int FutexWaitBitsetAbsoluteTimeout(std::atomic* v, int32_t val, - int32_t bits, - const struct timespec* abstime) { +inline int FutexWaitRealtimeDeadline(std::atomic* v, int32_t val, + const struct timespec* deadline) { return syscall(SYS_futex, v, FUTEX_WAIT_BITSET_PRIVATE | FUTEX_CLOCK_REALTIME, - val, abstime, nullptr, bits); + val, deadline, nullptr, FUTEX_BITSET_MATCH_ANY); } inline int FutexWake(std::atomic* v, int32_t count) { @@ -62,11 +62,11 @@ void BM_FutexWakeNop(benchmark::State& state) { std::atomic v(0); for (auto _ : state) { - EXPECT_EQ(0, FutexWake(&v, 1)); + TEST_PCHECK(FutexWake(&v, 1) == 0); } } -BENCHMARK(BM_FutexWakeNop); +BENCHMARK(BM_FutexWakeNop)->MinTime(5); // This just uses FUTEX_WAIT on an address whose value has changed, i.e., the // syscall won't wait. @@ -74,43 +74,63 @@ void BM_FutexWaitNop(benchmark::State& state) { std::atomic v(0); for (auto _ : state) { - EXPECT_EQ(-EAGAIN, FutexWait(&v, 1)); + TEST_PCHECK(FutexWait(&v, 1) == -1 && errno == EAGAIN); } } -BENCHMARK(BM_FutexWaitNop); +BENCHMARK(BM_FutexWaitNop)->MinTime(5); // This uses FUTEX_WAIT with a timeout on an address whose value never // changes, such that it always times out. Timeout overhead can be estimated by // timer overruns for short timeouts. -void BM_FutexWaitTimeout(benchmark::State& state) { +void BM_FutexWaitMonotonicTimeout(benchmark::State& state) { const int timeout_ns = state.range(0); std::atomic v(0); auto ts = absl::ToTimespec(absl::Nanoseconds(timeout_ns)); for (auto _ : state) { - EXPECT_EQ(-ETIMEDOUT, FutexWaitRelativeTimeout(&v, 0, &ts)); + TEST_PCHECK(FutexWaitMonotonicTimeout(&v, 0, &ts) == -1 && + errno == ETIMEDOUT); } } -BENCHMARK(BM_FutexWaitTimeout) +BENCHMARK(BM_FutexWaitMonotonicTimeout) + ->MinTime(5) + ->UseRealTime() ->Arg(1) ->Arg(10) ->Arg(100) ->Arg(1000) ->Arg(10000); -// This calls FUTEX_WAIT_BITSET with CLOCK_REALTIME. -void BM_FutexWaitBitset(benchmark::State& state) { +// This uses FUTEX_WAIT_BITSET with a deadline that is in the past. This allows +// estimation of the overhead of setting up a timer for a deadline (as opposed +// to a timeout as specified for FUTEX_WAIT). +void BM_FutexWaitMonotonicDeadline(benchmark::State& state) { std::atomic v(0); - int timeout_ns = state.range(0); - auto ts = absl::ToTimespec(absl::Nanoseconds(timeout_ns)); + struct timespec ts = {}; + for (auto _ : state) { - EXPECT_EQ(-ETIMEDOUT, FutexWaitBitsetAbsoluteTimeout(&v, 0, 1, &ts)); + TEST_PCHECK(FutexWaitMonotonicDeadline(&v, 0, &ts) == -1 && + errno == ETIMEDOUT); } } -BENCHMARK(BM_FutexWaitBitset)->Range(0, 100000); +BENCHMARK(BM_FutexWaitMonotonicDeadline)->MinTime(5); + +// This is equivalent to BM_FutexWaitMonotonicDeadline, but uses CLOCK_REALTIME +// instead of CLOCK_MONOTONIC for the deadline. +void BM_FutexWaitRealtimeDeadline(benchmark::State& state) { + std::atomic v(0); + struct timespec ts = {}; + + for (auto _ : state) { + TEST_PCHECK(FutexWaitRealtimeDeadline(&v, 0, &ts) == -1 && + errno == ETIMEDOUT); + } +} + +BENCHMARK(BM_FutexWaitRealtimeDeadline)->MinTime(5); int64_t GetCurrentMonotonicTimeNanos() { struct timespec ts; @@ -130,11 +150,10 @@ void SpinNanos(int64_t delay_ns) { // Each iteration of FutexRoundtripDelayed involves a thread sending a futex // wakeup to another thread, which spins for delay_us and then sends a futex -// wakeup back. The time per iteration is 2* (delay_us + kBeforeWakeDelayNs + +// wakeup back. The time per iteration is 2 * (delay_us + kBeforeWakeDelayNs + // futex/scheduling overhead). void BM_FutexRoundtripDelayed(benchmark::State& state) { const int delay_us = state.range(0); - const int64_t delay_ns = delay_us * 1000; // Spin for an extra kBeforeWakeDelayNs before invoking FUTEX_WAKE to reduce // the probability that the wakeup comes before the wait, preventing the wait @@ -165,83 +184,14 @@ void BM_FutexRoundtripDelayed(benchmark::State& state) { } BENCHMARK(BM_FutexRoundtripDelayed) + ->MinTime(5) + ->UseRealTime() ->Arg(0) ->Arg(10) ->Arg(20) ->Arg(50) ->Arg(100); -// FutexLock is a simple, dumb futex based lock implementation. -// It will try to acquire the lock by atomically incrementing the -// lock word. If it did not increment the lock from 0 to 1, someone -// else has the lock, so it will FUTEX_WAIT until it is woken in -// the unlock path. -class FutexLock { - public: - FutexLock() : lock_word_(0) {} - - void lock(struct timespec* deadline) { - int32_t val; - while ((val = lock_word_.fetch_add(1, std::memory_order_acquire) + 1) != - 1) { - // If we didn't get the lock by incrementing from 0 to 1, - // do a FUTEX_WAIT with the desired current value set to - // val. If val is no longer what the atomic increment returned, - // someone might have set it to 0 so we can try to acquire - // again. - int ret = FutexWaitAbsoluteTimeout(&lock_word_, val, deadline); - if (ret == 0 || ret == -EWOULDBLOCK || ret == -EINTR) { - continue; - } else { - FAIL() << "unexpected FUTEX_WAIT return: " << ret; - } - } - } - - void unlock() { - // Store 0 into the lock word and wake one waiter. We intentionally - // ignore the return value of the FUTEX_WAKE here, since there may be - // no waiters to wake anyway. - lock_word_.store(0, std::memory_order_release); - (void)FutexWake(&lock_word_, 1); - } - - private: - std::atomic lock_word_; -}; - -FutexLock* test_lock; // Used below. - -void FutexContend(benchmark::State& state, int thread_index, - struct timespec* deadline) { - int counter = 0; - if (thread_index == 0) { - test_lock = new FutexLock(); - } - for (auto _ : state) { - test_lock->lock(deadline); - counter++; - test_lock->unlock(); - } - if (thread_index == 0) { - delete test_lock; - } - state.SetItemsProcessed(state.iterations()); -} - -void BM_FutexContend(benchmark::State& state) { - FutexContend(state, state.thread_index, nullptr); -} - -BENCHMARK(BM_FutexContend)->ThreadRange(1, 1024)->UseRealTime(); - -void BM_FutexDeadlineContend(benchmark::State& state) { - auto deadline = absl::ToTimespec(absl::Now() + absl::Minutes(10)); - FutexContend(state, state.thread_index, &deadline); -} - -BENCHMARK(BM_FutexDeadlineContend)->ThreadRange(1, 1024)->UseRealTime(); - } // namespace } // namespace testing -- cgit v1.2.3 From e541ebec2fdb5b29209cb3fc8235b77edcaebb6a Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Wed, 25 Mar 2020 14:54:10 -0700 Subject: Misc fixes to make stat_test pass (almost) The only test failing now requires socket which is not available in VFS2 yet. Updates #1198 PiperOrigin-RevId: 302976572 --- pkg/bits/bits_template.go | 8 ++++++ pkg/bits/uint64_test.go | 18 ++++++++++++ pkg/sentry/fsimpl/gofer/filesystem.go | 16 +++++++++-- pkg/sentry/fsimpl/gofer/gofer.go | 41 ++++++++++++++++++++++++---- pkg/sentry/syscalls/linux/vfs2/BUILD | 1 + pkg/sentry/syscalls/linux/vfs2/filesystem.go | 2 +- pkg/sentry/syscalls/linux/vfs2/getdents.go | 4 +-- pkg/sentry/syscalls/linux/vfs2/stat.go | 7 ++++- pkg/sentry/vfs/resolving_path.go | 16 +++++++++-- test/syscalls/linux/stat.cc | 11 +++++++- 10 files changed, 109 insertions(+), 15 deletions(-) (limited to 'test') diff --git a/pkg/bits/bits_template.go b/pkg/bits/bits_template.go index 93a435b80..998645388 100644 --- a/pkg/bits/bits_template.go +++ b/pkg/bits/bits_template.go @@ -42,3 +42,11 @@ func Mask(is ...int) T { func MaskOf(i int) T { return T(1) << T(i) } + +// IsPowerOfTwo returns true if v is power of 2. +func IsPowerOfTwo(v T) bool { + if v == 0 { + return false + } + return v&(v-1) == 0 +} diff --git a/pkg/bits/uint64_test.go b/pkg/bits/uint64_test.go index 1b018d808..193d1ebcd 100644 --- a/pkg/bits/uint64_test.go +++ b/pkg/bits/uint64_test.go @@ -114,3 +114,21 @@ func TestIsOn(t *testing.T) { } } } + +func TestIsPowerOfTwo(t *testing.T) { + for _, tc := range []struct { + v uint64 + want bool + }{ + {v: 0, want: false}, + {v: 1, want: true}, + {v: 2, want: true}, + {v: 3, want: false}, + {v: 4, want: true}, + {v: 5, want: false}, + } { + if got := IsPowerOfTwo64(tc.v); got != tc.want { + t.Errorf("IsPowerOfTwo(%d) = %t, want: %t", tc.v, got, tc.want) + } + } +} diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 38e4cdbc5..26b492185 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -454,6 +454,9 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b } if fs.opts.interop != InteropModeShared { parent.touchCMtime(ctx) + if dir { + parent.decLinks() + } parent.cacheNegativeChildLocked(name) parent.dirents = nil } @@ -569,8 +572,13 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { return fs.doCreateAt(ctx, rp, true /* dir */, func(parent *dentry, name string) error { creds := rp.Credentials() - _, err := parent.file.mkdir(ctx, name, (p9.FileMode)(opts.Mode), (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID)) - return err + if _, err := parent.file.mkdir(ctx, name, (p9.FileMode)(opts.Mode), (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID)); err != nil { + return err + } + if fs.opts.interop != InteropModeShared { + parent.incLinks() + } + return nil }) } @@ -962,6 +970,10 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa oldParent.dirents = nil delete(newParent.negativeChildren, newName) newParent.dirents = nil + if renamed.isDir() { + oldParent.decLinks() + newParent.incLinks() + } } vfsObj.CommitRenameReplaceDentry(&renamed.vfsd, &newParent.vfsd, newName, replacedVFSD) return nil diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 999485492..13928ce36 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -485,6 +485,11 @@ type dentry struct { // locked to mutate it). size uint64 + // nlink counts the number of hard links to this dentry. It's updated and + // accessed using atomic operations. It's not protected by metadataMu like the + // other metadata fields. + nlink uint32 + mapsMu sync.Mutex // If this dentry represents a regular file, mappings tracks mappings of @@ -604,6 +609,9 @@ func (fs *filesystem) newDentry(ctx context.Context, file p9file, qid p9.QID, ma if mask.BTime { d.btime = dentryTimestampFromP9(attr.BTimeSeconds, attr.BTimeNanoSeconds) } + if mask.NLink { + d.nlink = uint32(attr.NLink) + } d.vfsd.Init(d) fs.syncMu.Lock() @@ -645,6 +653,9 @@ func (d *dentry) updateFromP9Attrs(mask p9.AttrMask, attr *p9.Attr) { if mask.BTime { atomic.StoreInt64(&d.btime, dentryTimestampFromP9(attr.BTimeSeconds, attr.BTimeNanoSeconds)) } + if mask.NLink { + atomic.StoreUint32(&d.nlink, uint32(attr.NLink)) + } if mask.Size { d.dataMu.Lock() atomic.StoreUint64(&d.size, attr.Size) @@ -687,10 +698,7 @@ func (d *dentry) fileType() uint32 { func (d *dentry) statTo(stat *linux.Statx) { stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | linux.STATX_UID | linux.STATX_GID | linux.STATX_ATIME | linux.STATX_MTIME | linux.STATX_CTIME | linux.STATX_INO | linux.STATX_SIZE | linux.STATX_BLOCKS | linux.STATX_BTIME stat.Blksize = atomic.LoadUint32(&d.blockSize) - stat.Nlink = 1 - if d.isDir() { - stat.Nlink = 2 - } + stat.Nlink = atomic.LoadUint32(&d.nlink) stat.UID = atomic.LoadUint32(&d.uid) stat.GID = atomic.LoadUint32(&d.gid) stat.Mode = uint16(atomic.LoadUint32(&d.mode)) @@ -703,7 +711,7 @@ func (d *dentry) statTo(stat *linux.Statx) { stat.Btime = statxTimestampFromDentry(atomic.LoadInt64(&d.btime)) stat.Ctime = statxTimestampFromDentry(atomic.LoadInt64(&d.ctime)) stat.Mtime = statxTimestampFromDentry(atomic.LoadInt64(&d.mtime)) - // TODO(jamieliu): device number + // TODO(gvisor.dev/issue/1198): device number } func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *linux.Statx, mnt *vfs.Mount) error { @@ -1094,6 +1102,26 @@ func (d *dentry) ensureSharedHandle(ctx context.Context, read, write, trunc bool return nil } +// incLinks increments link count. +// +// Preconditions: d.nlink != 0 && d.nlink < math.MaxUint32. +func (d *dentry) incLinks() { + v := atomic.AddUint32(&d.nlink, 1) + if v < 2 { + panic(fmt.Sprintf("dentry.nlink is invalid (was 0 or overflowed): %d", v)) + } +} + +// decLinks decrements link count. +// +// Preconditions: d.nlink > 1. +func (d *dentry) decLinks() { + v := atomic.AddUint32(&d.nlink, ^uint32(0)) + if v == 0 { + panic(fmt.Sprintf("dentry.nlink must be greater than 0: %d", v)) + } +} + // fileDescription is embedded by gofer implementations of // vfs.FileDescriptionImpl. type fileDescription struct { @@ -1112,7 +1140,8 @@ func (fd *fileDescription) dentry() *dentry { // Stat implements vfs.FileDescriptionImpl.Stat. func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { d := fd.dentry() - if d.fs.opts.interop == InteropModeShared && opts.Mask&(linux.STATX_MODE|linux.STATX_UID|linux.STATX_GID|linux.STATX_ATIME|linux.STATX_MTIME|linux.STATX_CTIME|linux.STATX_SIZE|linux.STATX_BLOCKS|linux.STATX_BTIME) != 0 && opts.Sync != linux.AT_STATX_DONT_SYNC { + const validMask = uint32(linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID | linux.STATX_ATIME | linux.STATX_MTIME | linux.STATX_CTIME | linux.STATX_SIZE | linux.STATX_BLOCKS | linux.STATX_BTIME) + if d.fs.opts.interop == InteropModeShared && opts.Mask&(validMask) != 0 && opts.Sync != linux.AT_STATX_DONT_SYNC { // TODO(jamieliu): Use specialFileFD.handle.file for the getattr if // available? if err := d.updateFromGetattr(ctx); err != nil { diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD index e7695e995..2eb210014 100644 --- a/pkg/sentry/syscalls/linux/vfs2/BUILD +++ b/pkg/sentry/syscalls/linux/vfs2/BUILD @@ -31,6 +31,7 @@ go_library( visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", + "//pkg/bits", "//pkg/fspath", "//pkg/gohacks", "//pkg/sentry/arch", diff --git a/pkg/sentry/syscalls/linux/vfs2/filesystem.go b/pkg/sentry/syscalls/linux/vfs2/filesystem.go index fc5ceea4c..a859095e2 100644 --- a/pkg/sentry/syscalls/linux/vfs2/filesystem.go +++ b/pkg/sentry/syscalls/linux/vfs2/filesystem.go @@ -250,7 +250,7 @@ func rmdirat(t *kernel.Task, dirfd int32, pathAddr usermem.Addr) error { if err != nil { return err } - tpop, err := getTaskPathOperation(t, dirfd, path, disallowEmptyPath, followFinalSymlink) + tpop, err := getTaskPathOperation(t, dirfd, path, disallowEmptyPath, nofollowFinalSymlink) if err != nil { return err } diff --git a/pkg/sentry/syscalls/linux/vfs2/getdents.go b/pkg/sentry/syscalls/linux/vfs2/getdents.go index ddc140b65..a61cc5059 100644 --- a/pkg/sentry/syscalls/linux/vfs2/getdents.go +++ b/pkg/sentry/syscalls/linux/vfs2/getdents.go @@ -97,7 +97,7 @@ func (cb *getdentsCallback) Handle(dirent vfs.Dirent) error { // char d_name[]; /* Filename (null-terminated) */ // }; size := 8 + 8 + 2 + 1 + 1 + len(dirent.Name) - if size < cb.remaining { + if size > cb.remaining { return syserror.EINVAL } buf = cb.t.CopyScratchBuffer(size) @@ -125,7 +125,7 @@ func (cb *getdentsCallback) Handle(dirent vfs.Dirent) error { panic(fmt.Sprintf("unsupported sizeof(unsigned long): %d", cb.t.Arch().Width())) } size := 8 + 8 + 2 + 1 + 1 + 1 + len(dirent.Name) - if size < cb.remaining { + if size > cb.remaining { return syserror.EINVAL } buf = cb.t.CopyScratchBuffer(size) diff --git a/pkg/sentry/syscalls/linux/vfs2/stat.go b/pkg/sentry/syscalls/linux/vfs2/stat.go index 068243132..fdfe49243 100644 --- a/pkg/sentry/syscalls/linux/vfs2/stat.go +++ b/pkg/sentry/syscalls/linux/vfs2/stat.go @@ -16,6 +16,7 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/bits" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/gohacks" "gvisor.dev/gvisor/pkg/sentry/arch" @@ -153,7 +154,11 @@ func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_NOFOLLOW|linux.AT_STATX_SYNC_TYPE) != 0 { return 0, nil, syserror.EINVAL } - + // Make sure that only one sync type option is set. + syncType := uint32(flags & linux.AT_STATX_SYNC_TYPE) + if syncType != 0 && !bits.IsPowerOfTwo32(syncType) { + return 0, nil, syserror.EINVAL + } if mask&linux.STATX__RESERVED != 0 { return 0, nil, syserror.EINVAL } diff --git a/pkg/sentry/vfs/resolving_path.go b/pkg/sentry/vfs/resolving_path.go index eb4ebb511..8f31495da 100644 --- a/pkg/sentry/vfs/resolving_path.go +++ b/pkg/sentry/vfs/resolving_path.go @@ -329,10 +329,22 @@ func (rp *ResolvingPath) ResolveComponent(d *Dentry) (*Dentry, error) { // component in pcs represents a symbolic link, the symbolic link should be // followed. // +// If path is terminated with '/', the '/' is considered the last element and +// any symlink before that is followed: +// - For most non-creating walks, the last path component is handled by +// fs/namei.c:lookup_last(), which sets LOOKUP_FOLLOW if the first byte +// after the path component is non-NULL (which is only possible if it's '/') +// and the path component is of type LAST_NORM. +// +// - For open/openat/openat2 without O_CREAT, the last path component is +// handled by fs/namei.c:do_last(), which does the same, though without the +// LAST_NORM check. +// // Preconditions: !rp.Done(). func (rp *ResolvingPath) ShouldFollowSymlink() bool { - // Non-final symlinks are always followed. - return rp.flags&rpflagsFollowFinalSymlink != 0 || !rp.Final() + // Non-final symlinks are always followed. Paths terminated with '/' are also + // always followed. + return rp.flags&rpflagsFollowFinalSymlink != 0 || !rp.Final() || rp.MustBeDir() } // HandleSymlink is called when the current path component is a symbolic link diff --git a/test/syscalls/linux/stat.cc b/test/syscalls/linux/stat.cc index 513b9cd1c..2503960f3 100644 --- a/test/syscalls/linux/stat.cc +++ b/test/syscalls/linux/stat.cc @@ -34,6 +34,13 @@ #include "test/util/temp_path.h" #include "test/util/test_util.h" +#ifndef AT_STATX_FORCE_SYNC +#define AT_STATX_FORCE_SYNC 0x2000 +#endif +#ifndef AT_STATX_DONT_SYNC +#define AT_STATX_DONT_SYNC 0x4000 +#endif + namespace gvisor { namespace testing { @@ -700,8 +707,10 @@ TEST_F(StatTest, StatxInvalidFlags) { struct kernel_statx stx; EXPECT_THAT(statx(AT_FDCWD, test_file_name_.c_str(), 12345, 0, &stx), SyscallFailsWithErrno(EINVAL)); + + // Sync flags are mutually exclusive. EXPECT_THAT(statx(AT_FDCWD, test_file_name_.c_str(), - 0x6000 /* AT_STATX_SYNC_TYPE */, 0, &stx), + AT_STATX_FORCE_SYNC | AT_STATX_DONT_SYNC, 0, &stx), SyscallFailsWithErrno(EINVAL)); } -- cgit v1.2.3 From 5f03dca5227e4f2e7aa472ad40d421d4623c9f72 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Wed, 25 Mar 2020 15:38:38 -0700 Subject: Fix race in TestRunEnvHasHome It's possible to execute the command that checks user's $HOME dir before the user is created. Move the code that creates the user inside exec so it can be serialized. PiperOrigin-RevId: 302986184 --- test/e2e/exec_test.go | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'test') diff --git a/test/e2e/exec_test.go b/test/e2e/exec_test.go index 4074d2285..594c8e752 100644 --- a/test/e2e/exec_test.go +++ b/test/e2e/exec_test.go @@ -240,17 +240,7 @@ func TestExecEnvHasHome(t *testing.T) { } d := dockerutil.MakeDocker("exec-env-home-test") - // We will check that HOME is set for root user, and also for a new - // non-root user we will create. - newUID := 1234 - newHome := "/foo/bar" - - // Create a new user with a home directory, and then sleep. - script := fmt.Sprintf(` - mkdir -p -m 777 %s && \ - adduser foo -D -u %d -h %s && \ - sleep 1000`, newHome, newUID, newHome) - if err := d.Run("alpine", "/bin/sh", "-c", script); err != nil { + if err := d.Run("alpine", "sleep", "1000"); err != nil { t.Fatalf("docker run failed: %v", err) } defer d.CleanUp() @@ -264,7 +254,15 @@ func TestExecEnvHasHome(t *testing.T) { t.Errorf("wanted exec output to contain %q, got %q", want, got) } - // Execute the same as uid 123 and expect newHome. + // Create a new user with a home directory. + newUID := 1234 + newHome := "/foo/bar" + cmd := fmt.Sprintf("mkdir -p -m 777 %q && adduser foo -D -u %d -h %q", newHome, newUID, newHome) + if _, err := d.Exec("/bin/sh", "-c", cmd); err != nil { + t.Fatalf("docker exec failed: %v", err) + } + + // Execute the same as the new user and expect newHome. got, err = d.ExecAsUser(strconv.Itoa(newUID), "/bin/sh", "-c", "echo $HOME") if err != nil { t.Fatalf("docker exec failed: %v", err) -- cgit v1.2.3 From bc3def43c3c30ccde6577a0af213d13e4fd17e1e Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Thu, 26 Mar 2020 10:46:47 -0700 Subject: Check error in DropTCP*Port tests and fix comment. PiperOrigin-RevId: 303147253 --- test/iptables/filter_input.go | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'test') diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index 4ccd4cce7..41e0cfa8d 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -194,14 +194,11 @@ func (FilterInputDropTCPDestPort) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterInputDropTCPDestPort) LocalAction(ip net.IP) error { - // After the container sets its DROP rule, we shouldn't be able to connect. - // However, we may succeed in connecting if this runs before the container - // sets the rule. To avoid this race, we retry connecting until - // sendloopDuration has elapsed, ignoring whether the connect succeeds. The - // test works becuase the container will error if a connection is - // established after the rule is set. + // Ensure we cannot connect to the container. for start := time.Now(); time.Since(start) < sendloopDuration; { - connectTCP(ip, dropPort, sendloopDuration-time.Since(start)) + if err := connectTCP(ip, dropPort, sendloopDuration-time.Since(start)); err == nil { + return fmt.Errorf("expected not to connect, but was able to connect on port %d", dropPort) + } } return nil @@ -232,14 +229,11 @@ func (FilterInputDropTCPSrcPort) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterInputDropTCPSrcPort) LocalAction(ip net.IP) error { - // After the container sets its DROP rule, we shouldn't be able to connect. - // However, we may succeed in connecting if this runs before the container - // sets the rule. To avoid this race, we retry connecting until - // sendloopDuration has elapsed, ignoring whether the connect succeeds. The - // test works becuase the container will error if a connection is - // established after the rule is set. + // Ensure we cannot connect to the container. for start := time.Now(); time.Since(start) < sendloopDuration; { - connectTCP(ip, acceptPort, sendloopDuration-time.Since(start)) + if err := connectTCP(ip, acceptPort, sendloopDuration-time.Since(start)); err == nil { + return fmt.Errorf("expected not to connect, but was able to connect on port %d", acceptPort) + } } return nil -- cgit v1.2.3 From 92b9069b67b927cef25a1490ebd142ad6d65690d Mon Sep 17 00:00:00 2001 From: Nayana Bidari Date: Fri, 20 Mar 2020 12:00:21 -0700 Subject: Support owner matching for iptables. This feature will match UID and GID of the packet creator, for locally generated packets. This match is only valid in the OUTPUT and POSTROUTING chains. Forwarded packets do not have any socket associated with them. Packets from kernel threads do have a socket, but usually no owner. --- pkg/abi/linux/netfilter.go | 41 ++++++++ pkg/abi/linux/netfilter_test.go | 1 + pkg/sentry/kernel/task.go | 12 +++ pkg/sentry/socket/netfilter/BUILD | 1 + pkg/sentry/socket/netfilter/netfilter.go | 7 +- pkg/sentry/socket/netfilter/owner_matcher.go | 128 ++++++++++++++++++++++++ pkg/sentry/socket/netstack/provider.go | 6 ++ pkg/tcpip/network/ipv4/ipv4.go | 15 +++ pkg/tcpip/stack/packet_buffer.go | 9 +- pkg/tcpip/stack/transport_test.go | 2 + pkg/tcpip/tcpip.go | 12 +++ pkg/tcpip/transport/icmp/endpoint.go | 12 ++- pkg/tcpip/transport/packet/endpoint.go | 2 + pkg/tcpip/transport/raw/endpoint.go | 9 ++ pkg/tcpip/transport/tcp/accept.go | 5 +- pkg/tcpip/transport/tcp/connect.go | 10 +- pkg/tcpip/transport/tcp/endpoint.go | 7 ++ pkg/tcpip/transport/tcp/forwarder.go | 2 +- pkg/tcpip/transport/tcp/protocol.go | 2 +- pkg/tcpip/transport/udp/endpoint.go | 12 ++- test/iptables/filter_output.go | 143 +++++++++++++++++++++++++++ test/iptables/iptables_test.go | 30 ++++++ 22 files changed, 451 insertions(+), 17 deletions(-) create mode 100644 pkg/sentry/socket/netfilter/owner_matcher.go (limited to 'test') diff --git a/pkg/abi/linux/netfilter.go b/pkg/abi/linux/netfilter.go index 80dc09aa9..a8d4f9d69 100644 --- a/pkg/abi/linux/netfilter.go +++ b/pkg/abi/linux/netfilter.go @@ -509,3 +509,44 @@ const ( // Enable all flags. XT_UDP_INV_MASK = 0x03 ) + +// IPTOwnerInfo holds data for matching packets with owner. It corresponds +// to struct ipt_owner_info in libxt_owner.c of iptables binary. +type IPTOwnerInfo struct { + // UID is user id which created the packet. + UID uint32 + + // GID is group id which created the packet. + GID uint32 + + // PID is process id of the process which created the packet. + PID uint32 + + // SID is session id which created the packet. + SID uint32 + + // Comm is the command name which created the packet. + Comm [16]byte + + // Match is used to match UID/GID of the socket. See the + // XT_OWNER_* flags below. + Match uint8 + + // Invert flips the meaning of Match field. + Invert uint8 +} + +// SizeOfIPTOwnerInfo is the size of an XTOwnerMatchInfo. +const SizeOfIPTOwnerInfo = 34 + +// Flags in IPTOwnerInfo.Match. Corresponding constants are in +// include/uapi/linux/netfilter/xt_owner.h. +const ( + // Match the UID of the packet. + XT_OWNER_UID = 1 << 0 + // Match the GID of the packet. + XT_OWNER_GID = 1 << 1 + // Match if the socket exists for the packet. Forwarded + // packets do not have an associated socket. + XT_OWNER_SOCKET = 1 << 2 +) diff --git a/pkg/abi/linux/netfilter_test.go b/pkg/abi/linux/netfilter_test.go index 21e237f92..565dd550e 100644 --- a/pkg/abi/linux/netfilter_test.go +++ b/pkg/abi/linux/netfilter_test.go @@ -29,6 +29,7 @@ func TestSizes(t *testing.T) { {IPTGetEntries{}, SizeOfIPTGetEntries}, {IPTGetinfo{}, SizeOfIPTGetinfo}, {IPTIP{}, SizeOfIPTIP}, + {IPTOwnerInfo{}, SizeOfIPTOwnerInfo}, {IPTReplace{}, SizeOfIPTReplace}, {XTCounters{}, SizeOfXTCounters}, {XTEntryMatch{}, SizeOfXTEntryMatch}, diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index 8452ddf5b..d6546735e 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -863,3 +863,15 @@ func (t *Task) SetOOMScoreAdj(adj int32) error { atomic.StoreInt32(&t.tg.oomScoreAdj, adj) return nil } + +// UID returns t's uid. +// TODO(gvisor.dev/issue/170): This method is not namespaced yet. +func (t *Task) UID() uint32 { + return uint32(t.Credentials().EffectiveKUID) +} + +// GID returns t's gid. +// TODO(gvisor.dev/issue/170): This method is not namespaced yet. +func (t *Task) GID() uint32 { + return uint32(t.Credentials().EffectiveKGID) +} diff --git a/pkg/sentry/socket/netfilter/BUILD b/pkg/sentry/socket/netfilter/BUILD index e801abeb8..721094bbf 100644 --- a/pkg/sentry/socket/netfilter/BUILD +++ b/pkg/sentry/socket/netfilter/BUILD @@ -7,6 +7,7 @@ go_library( srcs = [ "extensions.go", "netfilter.go", + "owner_matcher.go", "targets.go", "tcp_matcher.go", "udp_matcher.go", diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index 55bcc3ace..878f81fd5 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -517,11 +517,10 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error { } // TODO(gvisor.dev/issue/170): Support other chains. - // Since we only support modifying the INPUT chain and redirect for - // PREROUTING chain right now, make sure all other chains point to - // ACCEPT rules. + // Since we only support modifying the INPUT, PREROUTING and OUTPUT chain right now, + // make sure all other chains point to ACCEPT rules. for hook, ruleIdx := range table.BuiltinChains { - if hook != stack.Input && hook != stack.Prerouting { + if hook == stack.Forward || hook == stack.Postrouting { if _, ok := table.Rules[ruleIdx].Target.(stack.AcceptTarget); !ok { nflog("hook %d is unsupported.", hook) return syserr.ErrInvalidArgument diff --git a/pkg/sentry/socket/netfilter/owner_matcher.go b/pkg/sentry/socket/netfilter/owner_matcher.go new file mode 100644 index 000000000..5949a7c29 --- /dev/null +++ b/pkg/sentry/socket/netfilter/owner_matcher.go @@ -0,0 +1,128 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package netfilter + +import ( + "fmt" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/usermem" +) + +const matcherNameOwner = "owner" + +func init() { + registerMatchMaker(ownerMarshaler{}) +} + +// ownerMarshaler implements matchMaker for owner matching. +type ownerMarshaler struct{} + +// name implements matchMaker.name. +func (ownerMarshaler) name() string { + return matcherNameOwner +} + +// marshal implements matchMaker.marshal. +func (ownerMarshaler) marshal(mr stack.Matcher) []byte { + matcher := mr.(*OwnerMatcher) + iptOwnerInfo := linux.IPTOwnerInfo{ + UID: matcher.uid, + GID: matcher.gid, + } + + // Support for UID match. + // TODO(gvisor.dev/issue/170): Need to support gid match. + if matcher.matchUID { + iptOwnerInfo.Match = linux.XT_OWNER_UID + } else if matcher.matchGID { + panic("GID match is not supported.") + } else { + panic("UID match is not set.") + } + + buf := make([]byte, 0, linux.SizeOfIPTOwnerInfo) + return marshalEntryMatch(matcherNameOwner, binary.Marshal(buf, usermem.ByteOrder, iptOwnerInfo)) +} + +// unmarshal implements matchMaker.unmarshal. +func (ownerMarshaler) unmarshal(buf []byte, filter stack.IPHeaderFilter) (stack.Matcher, error) { + if len(buf) < linux.SizeOfIPTOwnerInfo { + return nil, fmt.Errorf("buf has insufficient size for owner match: %d", len(buf)) + } + + // For alignment reasons, the match's total size may + // exceed what's strictly necessary to hold matchData. + var matchData linux.IPTOwnerInfo + binary.Unmarshal(buf[:linux.SizeOfIPTOwnerInfo], usermem.ByteOrder, &matchData) + nflog("parseMatchers: parsed IPTOwnerInfo: %+v", matchData) + + if matchData.Invert != 0 { + return nil, fmt.Errorf("invert flag is not supported for owner match") + } + + // Support for UID match. + // TODO(gvisor.dev/issue/170): Need to support gid match. + if matchData.Match&linux.XT_OWNER_UID != linux.XT_OWNER_UID { + return nil, fmt.Errorf("owner match is only supported for uid") + } + + // Check Flags. + var owner OwnerMatcher + owner.uid = matchData.UID + owner.gid = matchData.GID + owner.matchUID = true + + return &owner, nil +} + +type OwnerMatcher struct { + uid uint32 + gid uint32 + matchUID bool + matchGID bool + invert uint8 +} + +// Name implements Matcher.Name. +func (*OwnerMatcher) Name() string { + return matcherNameOwner +} + +// Match implements Matcher.Match. +func (om *OwnerMatcher) Match(hook stack.Hook, pkt stack.PacketBuffer, interfaceName string) (bool, bool) { + // Support only for OUTPUT chain. + // TODO(gvisor.dev/issue/170): Need to support for POSTROUTING chain also. + if hook != stack.Output { + return false, true + } + + // If the packet owner is not set, drop the packet. + // Support for uid match. + // TODO(gvisor.dev/issue/170): Need to support gid match. + if pkt.Owner == nil || !om.matchUID { + return false, true + } + + // TODO(gvisor.dev/issue/170): Need to add tests to verify + // drop rule when packet UID does not match owner matcher UID. + if pkt.Owner.UID() != om.uid { + return false, false + } + + return true, false +} diff --git a/pkg/sentry/socket/netstack/provider.go b/pkg/sentry/socket/netstack/provider.go index 5f181f017..eb090e79b 100644 --- a/pkg/sentry/socket/netstack/provider.go +++ b/pkg/sentry/socket/netstack/provider.go @@ -126,6 +126,12 @@ func (p *provider) Socket(t *kernel.Task, stype linux.SockType, protocol int) (* ep, e = eps.Stack.NewRawEndpoint(transProto, p.netProto, wq, associated) } else { ep, e = eps.Stack.NewEndpoint(transProto, p.netProto, wq) + + // Assign task to PacketOwner interface to get the UID and GID for + // iptables owner matching. + if e == nil { + ep.SetOwner(t) + } } if e != nil { return nil, syserr.TranslateNetstackError(e) diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go index b3ee6000e..a7d9a8b25 100644 --- a/pkg/tcpip/network/ipv4/ipv4.go +++ b/pkg/tcpip/network/ipv4/ipv4.go @@ -244,6 +244,14 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.Netw ip := e.addIPHeader(r, &pkt.Header, pkt.Data.Size(), params) pkt.NetworkHeader = buffer.View(ip) + // iptables filtering. All packets that reach here are locally + // generated. + ipt := e.stack.IPTables() + if ok := ipt.Check(stack.Output, pkt); !ok { + // iptables is telling us to drop the packet. + return nil + } + if r.Loop&stack.PacketLoop != 0 { // The inbound path expects the network header to still be in // the PacketBuffer's Data field. @@ -280,7 +288,14 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts []stack.Pac return len(pkts), nil } + // iptables filtering. All packets that reach here are locally + // generated. + ipt := e.stack.IPTables() for i := range pkts { + if ok := ipt.Check(stack.Output, pkts[i]); !ok { + // iptables is telling us to drop the packet. + continue + } ip := e.addIPHeader(r, &pkts[i].Header, pkts[i].DataSize, params) pkts[i].NetworkHeader = buffer.View(ip) } diff --git a/pkg/tcpip/stack/packet_buffer.go b/pkg/tcpip/stack/packet_buffer.go index 9505a4e92..9367de180 100644 --- a/pkg/tcpip/stack/packet_buffer.go +++ b/pkg/tcpip/stack/packet_buffer.go @@ -13,7 +13,10 @@ package stack -import "gvisor.dev/gvisor/pkg/tcpip/buffer" +import ( + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" +) // A PacketBuffer contains all the data of a network packet. // @@ -59,6 +62,10 @@ type PacketBuffer struct { // Hash is the transport layer hash of this packet. A value of zero // indicates no valid hash has been set. Hash uint32 + + // Owner is implemented by task to get the uid and gid. + // Only set for locally generated packets. + Owner tcpip.PacketOwner } // Clone makes a copy of pk. It clones the Data field, which creates a new diff --git a/pkg/tcpip/stack/transport_test.go b/pkg/tcpip/stack/transport_test.go index 8ca9ac3cf..3084e6593 100644 --- a/pkg/tcpip/stack/transport_test.go +++ b/pkg/tcpip/stack/transport_test.go @@ -56,6 +56,8 @@ func (f *fakeTransportEndpoint) Stats() tcpip.EndpointStats { return nil } +func (f *fakeTransportEndpoint) SetOwner(owner tcpip.PacketOwner) {} + func newFakeTransportEndpoint(s *stack.Stack, proto *fakeTransportProtocol, netProto tcpip.NetworkProtocolNumber, uniqueID uint64) tcpip.Endpoint { return &fakeTransportEndpoint{stack: s, TransportEndpointInfo: stack.TransportEndpointInfo{NetProto: netProto}, proto: proto, uniqueID: uniqueID} } diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 3dc5d87d6..2ef3271f1 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -336,6 +336,15 @@ type ControlMessages struct { PacketInfo IPPacketInfo } +// PacketOwner is used to get UID and GID of the packet. +type PacketOwner interface { + // UID returns UID of the packet. + UID() uint32 + + // GID returns GID of the packet. + GID() uint32 +} + // Endpoint is the interface implemented by transport protocols (e.g., tcp, udp) // that exposes functionality like read, write, connect, etc. to users of the // networking stack. @@ -470,6 +479,9 @@ type Endpoint interface { // Stats returns a reference to the endpoint stats. Stats() EndpointStats + + // SetOwner sets the task owner to the endpoint owner. + SetOwner(owner PacketOwner) } // EndpointInfo is the interface implemented by each endpoint info struct. diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go index 613b12ead..b007302fb 100644 --- a/pkg/tcpip/transport/icmp/endpoint.go +++ b/pkg/tcpip/transport/icmp/endpoint.go @@ -73,6 +73,9 @@ type endpoint struct { route stack.Route `state:"manual"` ttl uint8 stats tcpip.TransportEndpointStats `state:"nosave"` + + // owner is used to get uid and gid of the packet. + owner tcpip.PacketOwner } func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { @@ -133,6 +136,10 @@ func (e *endpoint) Close() { // ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf. func (e *endpoint) ModerateRecvBuf(copied int) {} +func (e *endpoint) SetOwner(owner tcpip.PacketOwner) { + e.owner = owner +} + // IPTables implements tcpip.Endpoint.IPTables. func (e *endpoint) IPTables() (stack.IPTables, error) { return e.stack.IPTables(), nil @@ -321,7 +328,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c switch e.NetProto { case header.IPv4ProtocolNumber: - err = send4(route, e.ID.LocalPort, v, e.ttl) + err = send4(route, e.ID.LocalPort, v, e.ttl, e.owner) case header.IPv6ProtocolNumber: err = send6(route, e.ID.LocalPort, v, e.ttl) @@ -415,7 +422,7 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { } } -func send4(r *stack.Route, ident uint16, data buffer.View, ttl uint8) *tcpip.Error { +func send4(r *stack.Route, ident uint16, data buffer.View, ttl uint8, owner tcpip.PacketOwner) *tcpip.Error { if len(data) < header.ICMPv4MinimumSize { return tcpip.ErrInvalidEndpointState } @@ -444,6 +451,7 @@ func send4(r *stack.Route, ident uint16, data buffer.View, ttl uint8) *tcpip.Err Header: hdr, Data: data.ToVectorisedView(), TransportHeader: buffer.View(icmpv4), + Owner: owner, }) } diff --git a/pkg/tcpip/transport/packet/endpoint.go b/pkg/tcpip/transport/packet/endpoint.go index df49d0995..23158173d 100644 --- a/pkg/tcpip/transport/packet/endpoint.go +++ b/pkg/tcpip/transport/packet/endpoint.go @@ -392,3 +392,5 @@ func (ep *endpoint) Info() tcpip.EndpointInfo { func (ep *endpoint) Stats() tcpip.EndpointStats { return &ep.stats } + +func (ep *endpoint) SetOwner(owner tcpip.PacketOwner) {} diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go index 536dafd1e..337bc1c71 100644 --- a/pkg/tcpip/transport/raw/endpoint.go +++ b/pkg/tcpip/transport/raw/endpoint.go @@ -80,6 +80,9 @@ type endpoint struct { // Connect(), and is valid only when conneted is true. route stack.Route `state:"manual"` stats tcpip.TransportEndpointStats `state:"nosave"` + + // owner is used to get uid and gid of the packet. + owner tcpip.PacketOwner } // NewEndpoint returns a raw endpoint for the given protocols. @@ -159,6 +162,10 @@ func (e *endpoint) Close() { // ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf. func (e *endpoint) ModerateRecvBuf(copied int) {} +func (e *endpoint) SetOwner(owner tcpip.PacketOwner) { + e.owner = owner +} + // IPTables implements tcpip.Endpoint.IPTables. func (e *endpoint) IPTables() (stack.IPTables, error) { return e.stack.IPTables(), nil @@ -348,10 +355,12 @@ func (e *endpoint) finishWrite(payloadBytes []byte, route *stack.Route) (int64, } break } + hdr := buffer.NewPrependable(len(payloadBytes) + int(route.MaxHeaderLength())) if err := route.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: e.TransProto, TTL: route.DefaultTTL(), TOS: stack.DefaultTOS}, stack.PacketBuffer{ Header: hdr, Data: buffer.View(payloadBytes).ToVectorisedView(), + Owner: e.owner, }); err != nil { return 0, nil, err } diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go index 375ca21f6..7a9dea4ac 100644 --- a/pkg/tcpip/transport/tcp/accept.go +++ b/pkg/tcpip/transport/tcp/accept.go @@ -276,7 +276,7 @@ func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, i // and then performs the TCP 3-way handshake. // // The new endpoint is returned with e.mu held. -func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *header.TCPSynOptions, queue *waiter.Queue) (*endpoint, *tcpip.Error) { +func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *header.TCPSynOptions, queue *waiter.Queue, owner tcpip.PacketOwner) (*endpoint, *tcpip.Error) { // Create new endpoint. irs := s.sequenceNumber isn := generateSecureISN(s.id, l.stack.Seed()) @@ -284,6 +284,7 @@ func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *head if err != nil { return nil, err } + ep.owner = owner // listenEP is nil when listenContext is used by tcp.Forwarder. deferAccept := time.Duration(0) @@ -414,7 +415,7 @@ func (e *endpoint) handleSynSegment(ctx *listenContext, s *segment, opts *header }() defer s.decRef() - n, err := ctx.createEndpointAndPerformHandshake(s, opts, &waiter.Queue{}) + n, err := ctx.createEndpointAndPerformHandshake(s, opts, &waiter.Queue{}, e.owner) if err != nil { e.stack.Stats().TCP.FailedConnectionAttempts.Increment() e.stats.FailedConnectionAttempts.Increment() diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index 1d245c2c6..3239a5911 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -745,7 +745,7 @@ func (e *endpoint) sendSynTCP(r *stack.Route, tf tcpFields, opts header.TCPSynOp func (e *endpoint) sendTCP(r *stack.Route, tf tcpFields, data buffer.VectorisedView, gso *stack.GSO) *tcpip.Error { tf.txHash = e.txHash - if err := sendTCP(r, tf, data, gso); err != nil { + if err := sendTCP(r, tf, data, gso, e.owner); err != nil { e.stats.SendErrors.SegmentSendToNetworkFailed.Increment() return err } @@ -787,7 +787,7 @@ func buildTCPHdr(r *stack.Route, tf tcpFields, pkt *stack.PacketBuffer, gso *sta } } -func sendTCPBatch(r *stack.Route, tf tcpFields, data buffer.VectorisedView, gso *stack.GSO) *tcpip.Error { +func sendTCPBatch(r *stack.Route, tf tcpFields, data buffer.VectorisedView, gso *stack.GSO, owner tcpip.PacketOwner) *tcpip.Error { optLen := len(tf.opts) if tf.rcvWnd > 0xffff { tf.rcvWnd = 0xffff @@ -816,6 +816,7 @@ func sendTCPBatch(r *stack.Route, tf tcpFields, data buffer.VectorisedView, gso pkts[i].DataSize = packetSize pkts[i].Data = data pkts[i].Hash = tf.txHash + pkts[i].Owner = owner buildTCPHdr(r, tf, &pkts[i], gso) off += packetSize tf.seq = tf.seq.Add(seqnum.Size(packetSize)) @@ -833,14 +834,14 @@ func sendTCPBatch(r *stack.Route, tf tcpFields, data buffer.VectorisedView, gso // sendTCP sends a TCP segment with the provided options via the provided // network endpoint and under the provided identity. -func sendTCP(r *stack.Route, tf tcpFields, data buffer.VectorisedView, gso *stack.GSO) *tcpip.Error { +func sendTCP(r *stack.Route, tf tcpFields, data buffer.VectorisedView, gso *stack.GSO, owner tcpip.PacketOwner) *tcpip.Error { optLen := len(tf.opts) if tf.rcvWnd > 0xffff { tf.rcvWnd = 0xffff } if r.Loop&stack.PacketLoop == 0 && gso != nil && gso.Type == stack.GSOSW && int(gso.MSS) < data.Size() { - return sendTCPBatch(r, tf, data, gso) + return sendTCPBatch(r, tf, data, gso, owner) } pkt := stack.PacketBuffer{ @@ -849,6 +850,7 @@ func sendTCP(r *stack.Route, tf tcpFields, data buffer.VectorisedView, gso *stac DataSize: data.Size(), Data: data, Hash: tf.txHash, + Owner: owner, } buildTCPHdr(r, tf, &pkt, gso) diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 1ebee0cfe..9b123e968 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -603,6 +603,9 @@ type endpoint struct { // txHash is the transport layer hash to be set on outbound packets // emitted by this endpoint. txHash uint32 + + // owner is used to get uid and gid of the packet. + owner tcpip.PacketOwner } // UniqueID implements stack.TransportEndpoint.UniqueID. @@ -1132,6 +1135,10 @@ func (e *endpoint) ModerateRecvBuf(copied int) { e.rcvListMu.Unlock() } +func (e *endpoint) SetOwner(owner tcpip.PacketOwner) { + e.owner = owner +} + // IPTables implements tcpip.Endpoint.IPTables. func (e *endpoint) IPTables() (stack.IPTables, error) { return e.stack.IPTables(), nil diff --git a/pkg/tcpip/transport/tcp/forwarder.go b/pkg/tcpip/transport/tcp/forwarder.go index a094471b8..808410c92 100644 --- a/pkg/tcpip/transport/tcp/forwarder.go +++ b/pkg/tcpip/transport/tcp/forwarder.go @@ -157,7 +157,7 @@ func (r *ForwarderRequest) CreateEndpoint(queue *waiter.Queue) (tcpip.Endpoint, TSVal: r.synOptions.TSVal, TSEcr: r.synOptions.TSEcr, SACKPermitted: r.synOptions.SACKPermitted, - }, queue) + }, queue, nil) if err != nil { return nil, err } diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go index 1377107ca..dce9a1652 100644 --- a/pkg/tcpip/transport/tcp/protocol.go +++ b/pkg/tcpip/transport/tcp/protocol.go @@ -199,7 +199,7 @@ func replyWithReset(s *segment) { seq: seq, ack: ack, rcvWnd: 0, - }, buffer.VectorisedView{}, nil /* gso */) + }, buffer.VectorisedView{}, nil /* gso */, nil /* PacketOwner */) } // SetOption implements stack.TransportProtocol.SetOption. diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index a3372ac58..120d3baa3 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -143,6 +143,9 @@ type endpoint struct { // TODO(b/142022063): Add ability to save and restore per endpoint stats. stats tcpip.TransportEndpointStats `state:"nosave"` + + // owner is used to get uid and gid of the packet. + owner tcpip.PacketOwner } // +stateify savable @@ -484,7 +487,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c useDefaultTTL = false } - if err := sendUDP(route, buffer.View(v).ToVectorisedView(), e.ID.LocalPort, dstPort, ttl, useDefaultTTL, e.sendTOS); err != nil { + if err := sendUDP(route, buffer.View(v).ToVectorisedView(), e.ID.LocalPort, dstPort, ttl, useDefaultTTL, e.sendTOS, e.owner); err != nil { return 0, nil, err } return int64(len(v)), nil, nil @@ -886,7 +889,7 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { // sendUDP sends a UDP segment via the provided network endpoint and under the // provided identity. -func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort uint16, ttl uint8, useDefaultTTL bool, tos uint8) *tcpip.Error { +func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort uint16, ttl uint8, useDefaultTTL bool, tos uint8, owner tcpip.PacketOwner) *tcpip.Error { // Allocate a buffer for the UDP header. hdr := buffer.NewPrependable(header.UDPMinimumSize + int(r.MaxHeaderLength())) @@ -916,6 +919,7 @@ func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort u Header: hdr, Data: data, TransportHeader: buffer.View(udp), + Owner: owner, }); err != nil { r.Stats().UDP.PacketSendErrors.Increment() return err @@ -1356,3 +1360,7 @@ func (*endpoint) Wait() {} func isBroadcastOrMulticast(a tcpip.Address) bool { return a == header.IPv4Broadcast || header.IsV4MulticastAddress(a) || header.IsV6MulticastAddress(a) } + +func (e *endpoint) SetOwner(owner tcpip.PacketOwner) { + e.owner = owner +} diff --git a/test/iptables/filter_output.go b/test/iptables/filter_output.go index 4582d514c..f6d974b85 100644 --- a/test/iptables/filter_output.go +++ b/test/iptables/filter_output.go @@ -24,6 +24,11 @@ func init() { RegisterTestCase(FilterOutputDropTCPSrcPort{}) RegisterTestCase(FilterOutputDestination{}) RegisterTestCase(FilterOutputInvertDestination{}) + RegisterTestCase(FilterOutputAcceptTCPOwner{}) + RegisterTestCase(FilterOutputDropTCPOwner{}) + RegisterTestCase(FilterOutputAcceptUDPOwner{}) + RegisterTestCase(FilterOutputDropUDPOwner{}) + RegisterTestCase(FilterOutputOwnerFail{}) } // FilterOutputDropTCPDestPort tests that connections are not accepted on @@ -90,6 +95,144 @@ func (FilterOutputDropTCPSrcPort) LocalAction(ip net.IP) error { return nil } +// FilterOutputAcceptTCPOwner tests that TCP connections from uid owner are accepted. +type FilterOutputAcceptTCPOwner struct{} + +// Name implements TestCase.Name. +func (FilterOutputAcceptTCPOwner) Name() string { + return "FilterOutputAcceptTCPOwner" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterOutputAcceptTCPOwner) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "OUTPUT", "-p", "tcp", "-m", "owner", "--uid-owner", "root", "-j", "ACCEPT"); err != nil { + return err + } + + // Listen for TCP packets on accept port. + if err := listenTCP(acceptPort, sendloopDuration); err != nil { + return fmt.Errorf("connection on port %d should be accepted, but got dropped", acceptPort) + } + + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (FilterOutputAcceptTCPOwner) LocalAction(ip net.IP) error { + if err := connectTCP(ip, acceptPort, sendloopDuration); err != nil { + return fmt.Errorf("connection destined to port %d should be accepted, but got dropped", acceptPort) + } + + return nil +} + +// FilterOutputDropTCPOwner tests that TCP connections from uid owner are dropped. +type FilterOutputDropTCPOwner struct{} + +// Name implements TestCase.Name. +func (FilterOutputDropTCPOwner) Name() string { + return "FilterOutputDropTCPOwner" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterOutputDropTCPOwner) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "OUTPUT", "-p", "tcp", "-m", "owner", "--uid-owner", "root", "-j", "DROP"); err != nil { + return err + } + + // Listen for TCP packets on accept port. + if err := listenTCP(acceptPort, sendloopDuration); err == nil { + return fmt.Errorf("connection on port %d should be dropped, but got accepted", acceptPort) + } + + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (FilterOutputDropTCPOwner) LocalAction(ip net.IP) error { + if err := connectTCP(ip, acceptPort, sendloopDuration); err == nil { + return fmt.Errorf("connection destined to port %d should be dropped, but got accepted", acceptPort) + } + + return nil +} + +// FilterOutputAcceptUDPOwner tests that UDP packets from uid owner are accepted. +type FilterOutputAcceptUDPOwner struct{} + +// Name implements TestCase.Name. +func (FilterOutputAcceptUDPOwner) Name() string { + return "FilterOutputAcceptUDPOwner" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterOutputAcceptUDPOwner) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "OUTPUT", "-p", "udp", "-m", "owner", "--uid-owner", "root", "-j", "ACCEPT"); err != nil { + return err + } + + // Send UDP packets on acceptPort. + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (FilterOutputAcceptUDPOwner) LocalAction(ip net.IP) error { + // Listen for UDP packets on acceptPort. + return listenUDP(acceptPort, sendloopDuration) +} + +// FilterOutputDropUDPOwner tests that UDP packets from uid owner are dropped. +type FilterOutputDropUDPOwner struct{} + +// Name implements TestCase.Name. +func (FilterOutputDropUDPOwner) Name() string { + return "FilterOutputDropUDPOwner" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterOutputDropUDPOwner) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "OUTPUT", "-p", "udp", "-m", "owner", "--uid-owner", "root", "-j", "DROP"); err != nil { + return err + } + + // Send UDP packets on dropPort. + return sendUDPLoop(ip, dropPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (FilterOutputDropUDPOwner) LocalAction(ip net.IP) error { + // Listen for UDP packets on dropPort. + if err := listenUDP(dropPort, sendloopDuration); err == nil { + return fmt.Errorf("packets should not be received") + } + + return nil +} + +// FilterOutputOwnerFail tests that without uid/gid option, owner rule +// will fail. +type FilterOutputOwnerFail struct{} + +// Name implements TestCase.Name. +func (FilterOutputOwnerFail) Name() string { + return "FilterOutputOwnerFail" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterOutputOwnerFail) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "OUTPUT", "-p", "udp", "-m", "owner", "-j", "ACCEPT"); err == nil { + return fmt.Errorf("Invalid argument") + } + + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (FilterOutputOwnerFail) LocalAction(ip net.IP) error { + // no-op. + return nil +} + // FilterOutputDestination tests that we can selectively allow packets to // certain destinations. type FilterOutputDestination struct{} diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 7f1f70606..493d69052 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -274,6 +274,36 @@ func TestFilterOutputDropTCPSrcPort(t *testing.T) { } } +func TestFilterOutputAcceptTCPOwner(t *testing.T) { + if err := singleTest(FilterOutputAcceptTCPOwner{}); err != nil { + t.Fatal(err) + } +} + +func TestFilterOutputDropTCPOwner(t *testing.T) { + if err := singleTest(FilterOutputDropTCPOwner{}); err != nil { + t.Fatal(err) + } +} + +func TestFilterOutputAcceptUDPOwner(t *testing.T) { + if err := singleTest(FilterOutputAcceptUDPOwner{}); err != nil { + t.Fatal(err) + } +} + +func TestFilterOutputDropUDPOwner(t *testing.T) { + if err := singleTest(FilterOutputDropUDPOwner{}); err != nil { + t.Fatal(err) + } +} + +func TestFilterOutputOwnerFail(t *testing.T) { + if err := singleTest(FilterOutputOwnerFail{}); err != nil { + t.Fatal(err) + } +} + func TestJumpSerialize(t *testing.T) { if err := singleTest(FilterInputSerializeJump{}); err != nil { t.Fatal(err) -- cgit v1.2.3 From 8ce5b569714351f9f2f7fc48b0ff0bebbdb018ee Mon Sep 17 00:00:00 2001 From: Haibo Xu Date: Fri, 20 Mar 2020 08:45:07 +0000 Subject: Cleanup for syscall tests on arm64. Signed-off-by: Haibo Xu Change-Id: I8008c0375fc7e23225a21026f359e78e691729e5 --- test/syscalls/linux/getrandom.cc | 2 ++ test/syscalls/linux/lseek.cc | 2 +- test/syscalls/linux/mlock.cc | 4 +++- test/syscalls/linux/mmap.cc | 10 ++++++++-- 4 files changed, 14 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/getrandom.cc b/test/syscalls/linux/getrandom.cc index f97f60029..f87cdd7a1 100644 --- a/test/syscalls/linux/getrandom.cc +++ b/test/syscalls/linux/getrandom.cc @@ -29,6 +29,8 @@ namespace { #define SYS_getrandom 318 #elif defined(__i386__) #define SYS_getrandom 355 +#elif defined(__aarch64__) +#define SYS_getrandom 278 #else #error "Unknown architecture" #endif diff --git a/test/syscalls/linux/lseek.cc b/test/syscalls/linux/lseek.cc index a8af8e545..6ce1e6cc3 100644 --- a/test/syscalls/linux/lseek.cc +++ b/test/syscalls/linux/lseek.cc @@ -53,7 +53,7 @@ TEST(LseekTest, NegativeOffset) { // A 32-bit off_t is not large enough to represent an offset larger than // maximum file size on standard file systems, so it isn't possible to cause // overflow. -#ifdef __x86_64__ +#if defined(__x86_64__) || defined(__aarch64__) TEST(LseekTest, Overflow) { // HA! Classic Linux. We really should have an EOVERFLOW // here, since we're seeking to something that cannot be diff --git a/test/syscalls/linux/mlock.cc b/test/syscalls/linux/mlock.cc index 367a90fe1..78ac96bed 100644 --- a/test/syscalls/linux/mlock.cc +++ b/test/syscalls/linux/mlock.cc @@ -199,8 +199,10 @@ TEST(MunlockallTest, Basic) { } #ifndef SYS_mlock2 -#ifdef __x86_64__ +#if defined(__x86_64__) #define SYS_mlock2 325 +#elif defined(__aarch64__) +#define SYS_mlock2 284 #endif #endif diff --git a/test/syscalls/linux/mmap.cc b/test/syscalls/linux/mmap.cc index 11fb1b457..6d3227ab6 100644 --- a/test/syscalls/linux/mmap.cc +++ b/test/syscalls/linux/mmap.cc @@ -361,7 +361,7 @@ TEST_F(MMapTest, MapFixed) { } // 64-bit addresses work too -#ifdef __x86_64__ +#if defined(__x86_64__) || defined(__aarch64__) TEST_F(MMapTest, MapFixed64) { EXPECT_THAT(Map(0x300000000000, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0), @@ -571,6 +571,12 @@ const uint8_t machine_code[] = { 0xb8, 0x2a, 0x00, 0x00, 0x00, // movl $42, %eax 0xc3, // retq }; +#elif defined(__aarch64__) +const uint8_t machine_code[] = { + 0x40, 0x05, 0x80, 0x52, // mov w0, #42 + 0xc0, 0x03, 0x5f, 0xd6, // ret +}; +#endif // PROT_EXEC allows code execution TEST_F(MMapTest, ProtExec) { @@ -605,7 +611,6 @@ TEST_F(MMapTest, NoProtExecDeath) { EXPECT_EXIT(func(), ::testing::KilledBySignal(SIGSEGV), ""); } -#endif TEST_F(MMapTest, NoExceedLimitData) { void* prevbrk; @@ -1644,6 +1649,7 @@ TEST(MMapNoFixtureTest, MapReadOnlyAfterCreateWriteOnly) { } // Conditional on MAP_32BIT. +// This flag is supported only on x86-64, for 64-bit programs. #ifdef __x86_64__ TEST(MMapNoFixtureTest, Map32Bit) { -- cgit v1.2.3 From c71e97784cfc57a0664a07cb798aca3d39d6bb11 Mon Sep 17 00:00:00 2001 From: Haibo Xu Date: Fri, 28 Feb 2020 09:14:57 +0000 Subject: Enable rseq syscall test on arm64. Signed-off-by: Haibo Xu Change-Id: If30154a2d73e98f211cfe589853b232019b9e130 --- test/syscalls/linux/rseq/BUILD | 41 ++++++++++--------- test/syscalls/linux/rseq/critical.S | 66 ------------------------------- test/syscalls/linux/rseq/critical_amd64.S | 66 +++++++++++++++++++++++++++++++ test/syscalls/linux/rseq/critical_arm64.S | 66 +++++++++++++++++++++++++++++++ test/syscalls/linux/rseq/start.S | 45 --------------------- test/syscalls/linux/rseq/start_amd64.S | 45 +++++++++++++++++++++ test/syscalls/linux/rseq/start_arm64.S | 45 +++++++++++++++++++++ test/syscalls/linux/rseq/syscalls.h | 5 ++- test/syscalls/linux/rseq/uapi.h | 4 +- 9 files changed, 251 insertions(+), 132 deletions(-) delete mode 100644 test/syscalls/linux/rseq/critical.S create mode 100644 test/syscalls/linux/rseq/critical_amd64.S create mode 100644 test/syscalls/linux/rseq/critical_arm64.S delete mode 100644 test/syscalls/linux/rseq/start.S create mode 100644 test/syscalls/linux/rseq/start_amd64.S create mode 100644 test/syscalls/linux/rseq/start_arm64.S (limited to 'test') diff --git a/test/syscalls/linux/rseq/BUILD b/test/syscalls/linux/rseq/BUILD index ed488dbc2..ee5b0a11b 100644 --- a/test/syscalls/linux/rseq/BUILD +++ b/test/syscalls/linux/rseq/BUILD @@ -1,7 +1,7 @@ # This package contains a standalone rseq test binary. This binary must not # depend on libc, which might use rseq itself. -load("//tools:defs.bzl", "cc_flags_supplier", "cc_library", "cc_toolchain") +load("//tools:defs.bzl", "cc_flags_supplier", "cc_library", "cc_toolchain", "select_arch") package(licenses = ["notice"]) @@ -9,32 +9,35 @@ genrule( name = "rseq_binary", srcs = [ "critical.h", - "critical.S", + "critical_amd64.S", + "critical_arm64.S", "rseq.cc", "syscalls.h", - "start.S", + "start_amd64.S", + "start_arm64.S", "test.h", "types.h", "uapi.h", ], outs = ["rseq"], - cmd = " ".join([ - "$(CC)", - "$(CC_FLAGS) ", - "-I.", - "-Wall", - "-Werror", - "-O2", - "-std=c++17", - "-static", - "-nostdlib", - "-ffreestanding", - "-o", - "$(location rseq)", - "$(location critical.S)", + cmd = "$(CC) " + + "$(CC_FLAGS) " + + "-I. " + + "-Wall " + + "-Werror " + + "-O2 " + + "-std=c++17 " + + "-static " + + "-nostdlib " + + "-ffreestanding " + + "-o " + + "$(location rseq) " + + select_arch( + amd64 = "$(location critical_amd64.S) $(location start_amd64.S) ", + arm64 = "$(location critical_arm64.S) $(location start_arm64.S) ", + no_match_error = "unsupported architecture", + ) + "$(location rseq.cc)", - "$(location start.S)", - ]), toolchains = [ cc_toolchain, ":no_pie_cc_flags", diff --git a/test/syscalls/linux/rseq/critical.S b/test/syscalls/linux/rseq/critical.S deleted file mode 100644 index 8c0687e6d..000000000 --- a/test/syscalls/linux/rseq/critical.S +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Restartable sequences critical sections. - -// Loops continuously until aborted. -// -// void rseq_loop(struct rseq* r, struct rseq_cs* cs) - - .text - .globl rseq_loop - .type rseq_loop, @function - -rseq_loop: - jmp begin - - // Abort block before the critical section. - // Abort signature is 4 nops for simplicity. - .byte 0x90, 0x90, 0x90, 0x90 - .globl rseq_loop_early_abort -rseq_loop_early_abort: - ret - -begin: - // r->rseq_cs = cs - movq %rsi, 8(%rdi) - - // N.B. rseq_cs will be cleared by any preempt, even outside the critical - // section. Thus it must be set in or immediately before the critical section - // to ensure it is not cleared before the section begins. - .globl rseq_loop_start -rseq_loop_start: - jmp rseq_loop_start - - // "Pre-commit": extra instructions inside the critical section. These are - // used as the abort point in TestAbortPreCommit, which is not valid. - .globl rseq_loop_pre_commit -rseq_loop_pre_commit: - // Extra abort signature + nop for TestAbortPostCommit. - .byte 0x90, 0x90, 0x90, 0x90 - nop - - // "Post-commit": never reached in this case. - .globl rseq_loop_post_commit -rseq_loop_post_commit: - - // Abort signature is 4 nops for simplicity. - .byte 0x90, 0x90, 0x90, 0x90 - - .globl rseq_loop_abort -rseq_loop_abort: - ret - - .size rseq_loop,.-rseq_loop - .section .note.GNU-stack,"",@progbits diff --git a/test/syscalls/linux/rseq/critical_amd64.S b/test/syscalls/linux/rseq/critical_amd64.S new file mode 100644 index 000000000..8c0687e6d --- /dev/null +++ b/test/syscalls/linux/rseq/critical_amd64.S @@ -0,0 +1,66 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Restartable sequences critical sections. + +// Loops continuously until aborted. +// +// void rseq_loop(struct rseq* r, struct rseq_cs* cs) + + .text + .globl rseq_loop + .type rseq_loop, @function + +rseq_loop: + jmp begin + + // Abort block before the critical section. + // Abort signature is 4 nops for simplicity. + .byte 0x90, 0x90, 0x90, 0x90 + .globl rseq_loop_early_abort +rseq_loop_early_abort: + ret + +begin: + // r->rseq_cs = cs + movq %rsi, 8(%rdi) + + // N.B. rseq_cs will be cleared by any preempt, even outside the critical + // section. Thus it must be set in or immediately before the critical section + // to ensure it is not cleared before the section begins. + .globl rseq_loop_start +rseq_loop_start: + jmp rseq_loop_start + + // "Pre-commit": extra instructions inside the critical section. These are + // used as the abort point in TestAbortPreCommit, which is not valid. + .globl rseq_loop_pre_commit +rseq_loop_pre_commit: + // Extra abort signature + nop for TestAbortPostCommit. + .byte 0x90, 0x90, 0x90, 0x90 + nop + + // "Post-commit": never reached in this case. + .globl rseq_loop_post_commit +rseq_loop_post_commit: + + // Abort signature is 4 nops for simplicity. + .byte 0x90, 0x90, 0x90, 0x90 + + .globl rseq_loop_abort +rseq_loop_abort: + ret + + .size rseq_loop,.-rseq_loop + .section .note.GNU-stack,"",@progbits diff --git a/test/syscalls/linux/rseq/critical_arm64.S b/test/syscalls/linux/rseq/critical_arm64.S new file mode 100644 index 000000000..bfe7e8307 --- /dev/null +++ b/test/syscalls/linux/rseq/critical_arm64.S @@ -0,0 +1,66 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Restartable sequences critical sections. + +// Loops continuously until aborted. +// +// void rseq_loop(struct rseq* r, struct rseq_cs* cs) + + .text + .globl rseq_loop + .type rseq_loop, @function + +rseq_loop: + b begin + + // Abort block before the critical section. + // Abort signature. + .byte 0x90, 0x90, 0x90, 0x90 + .globl rseq_loop_early_abort +rseq_loop_early_abort: + ret + +begin: + // r->rseq_cs = cs + str x1, [x0, #8] + + // N.B. rseq_cs will be cleared by any preempt, even outside the critical + // section. Thus it must be set in or immediately before the critical section + // to ensure it is not cleared before the section begins. + .globl rseq_loop_start +rseq_loop_start: + b rseq_loop_start + + // "Pre-commit": extra instructions inside the critical section. These are + // used as the abort point in TestAbortPreCommit, which is not valid. + .globl rseq_loop_pre_commit +rseq_loop_pre_commit: + // Extra abort signature + nop for TestAbortPostCommit. + .byte 0x90, 0x90, 0x90, 0x90 + nop + + // "Post-commit": never reached in this case. + .globl rseq_loop_post_commit +rseq_loop_post_commit: + + // Abort signature. + .byte 0x90, 0x90, 0x90, 0x90 + + .globl rseq_loop_abort +rseq_loop_abort: + ret + + .size rseq_loop,.-rseq_loop + .section .note.GNU-stack,"",@progbits diff --git a/test/syscalls/linux/rseq/start.S b/test/syscalls/linux/rseq/start.S deleted file mode 100644 index b9611b276..000000000 --- a/test/syscalls/linux/rseq/start.S +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - - .text - .align 4 - .type _start,@function - .globl _start - -_start: - movq %rsp,%rdi - call __init - hlt - - .size _start,.-_start - .section .note.GNU-stack,"",@progbits - - .text - .globl raw_syscall - .type raw_syscall, @function - -raw_syscall: - mov %rdi,%rax // syscall # - mov %rsi,%rdi // arg0 - mov %rdx,%rsi // arg1 - mov %rcx,%rdx // arg2 - mov %r8,%r10 // arg3 (goes in r10 instead of rcx for system calls) - mov %r9,%r8 // arg4 - mov 0x8(%rsp),%r9 // arg5 - syscall - ret - - .size raw_syscall,.-raw_syscall - .section .note.GNU-stack,"",@progbits diff --git a/test/syscalls/linux/rseq/start_amd64.S b/test/syscalls/linux/rseq/start_amd64.S new file mode 100644 index 000000000..b9611b276 --- /dev/null +++ b/test/syscalls/linux/rseq/start_amd64.S @@ -0,0 +1,45 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + + .text + .align 4 + .type _start,@function + .globl _start + +_start: + movq %rsp,%rdi + call __init + hlt + + .size _start,.-_start + .section .note.GNU-stack,"",@progbits + + .text + .globl raw_syscall + .type raw_syscall, @function + +raw_syscall: + mov %rdi,%rax // syscall # + mov %rsi,%rdi // arg0 + mov %rdx,%rsi // arg1 + mov %rcx,%rdx // arg2 + mov %r8,%r10 // arg3 (goes in r10 instead of rcx for system calls) + mov %r9,%r8 // arg4 + mov 0x8(%rsp),%r9 // arg5 + syscall + ret + + .size raw_syscall,.-raw_syscall + .section .note.GNU-stack,"",@progbits diff --git a/test/syscalls/linux/rseq/start_arm64.S b/test/syscalls/linux/rseq/start_arm64.S new file mode 100644 index 000000000..693c1c6eb --- /dev/null +++ b/test/syscalls/linux/rseq/start_arm64.S @@ -0,0 +1,45 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + + .text + .align 4 + .type _start,@function + .globl _start + +_start: + mov x29, sp + bl __init + wfi + + .size _start,.-_start + .section .note.GNU-stack,"",@progbits + + .text + .globl raw_syscall + .type raw_syscall, @function + +raw_syscall: + mov x8,x0 // syscall # + mov x0,x1 // arg0 + mov x1,x2 // arg1 + mov x2,x3 // arg2 + mov x3,x4 // arg3 + mov x4,x5 // arg4 + mov x5,x6 // arg5 + svc #0 + ret + + .size raw_syscall,.-raw_syscall + .section .note.GNU-stack,"",@progbits diff --git a/test/syscalls/linux/rseq/syscalls.h b/test/syscalls/linux/rseq/syscalls.h index e5299c188..c4118e6c5 100644 --- a/test/syscalls/linux/rseq/syscalls.h +++ b/test/syscalls/linux/rseq/syscalls.h @@ -17,10 +17,13 @@ #include "test/syscalls/linux/rseq/types.h" -#ifdef __x86_64__ // Syscall numbers. +#if defined(__x86_64__) constexpr int kGetpid = 39; constexpr int kExitGroup = 231; +#elif defined(__aarch64__) +constexpr int kGetpid = 172; +constexpr int kExitGroup = 94; #else #error "Unknown architecture" #endif diff --git a/test/syscalls/linux/rseq/uapi.h b/test/syscalls/linux/rseq/uapi.h index ca1d67691..d3e60d0a4 100644 --- a/test/syscalls/linux/rseq/uapi.h +++ b/test/syscalls/linux/rseq/uapi.h @@ -19,9 +19,11 @@ // User-kernel ABI for restartable sequences. -#ifdef __x86_64__ // Syscall numbers. +#if defined(__x86_64__) constexpr int kRseqSyscall = 334; +#elif defined(__aarch64__) +constexpr int kRseqSyscall = 293; #else #error "Unknown architecture" #endif // __x86_64__ -- cgit v1.2.3 From 639d94f9f71b43e86320a6e9157c932f5d7936a7 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Tue, 31 Mar 2020 19:15:55 -0700 Subject: Add socket filesystem and global disconnected socket mount for VFS2. A socket mount where anonymous sockets will reside is added to the VirtualFilesystem. Socketfs is built on top of kernfs. Updates #1476, #1478, #1484, #1485. PiperOrigin-RevId: 304095251 --- pkg/sentry/fsimpl/sockfs/BUILD | 16 +++++++++ pkg/sentry/fsimpl/sockfs/sockfs.go | 73 ++++++++++++++++++++++++++++++++++++++ pkg/sentry/kernel/BUILD | 1 + pkg/sentry/kernel/kernel.go | 24 +++++++++++++ test/syscalls/linux/socket_unix.cc | 2 ++ 5 files changed, 116 insertions(+) create mode 100644 pkg/sentry/fsimpl/sockfs/BUILD create mode 100644 pkg/sentry/fsimpl/sockfs/sockfs.go (limited to 'test') diff --git a/pkg/sentry/fsimpl/sockfs/BUILD b/pkg/sentry/fsimpl/sockfs/BUILD new file mode 100644 index 000000000..790d50e65 --- /dev/null +++ b/pkg/sentry/fsimpl/sockfs/BUILD @@ -0,0 +1,16 @@ +load("//tools:defs.bzl", "go_library") + +licenses(["notice"]) + +go_library( + name = "sockfs", + srcs = ["sockfs.go"], + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/context", + "//pkg/sentry/fsimpl/kernfs", + "//pkg/sentry/kernel/auth", + "//pkg/sentry/vfs", + "//pkg/syserror", + ], +) diff --git a/pkg/sentry/fsimpl/sockfs/sockfs.go b/pkg/sentry/fsimpl/sockfs/sockfs.go new file mode 100644 index 000000000..c13511de2 --- /dev/null +++ b/pkg/sentry/fsimpl/sockfs/sockfs.go @@ -0,0 +1,73 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package sockfs provides a filesystem implementation for anonymous sockets. +package sockfs + +import ( + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" +) + +// NewFilesystem creates a new sockfs filesystem. +// +// Note that there should only ever be one instance of sockfs.Filesystem, +// backing a global socket mount. +func NewFilesystem(vfsObj *vfs.VirtualFilesystem) *vfs.Filesystem { + fs, _, err := filesystemType{}.GetFilesystem(nil, vfsObj, nil, "", vfs.GetFilesystemOptions{}) + if err != nil { + panic("failed to create sockfs filesystem") + } + return fs +} + +// filesystemType implements vfs.FilesystemType. +type filesystemType struct{} + +// GetFilesystem implements FilesystemType.GetFilesystem. +func (fsType filesystemType) GetFilesystem(_ context.Context, vfsObj *vfs.VirtualFilesystem, _ *auth.Credentials, _ string, _ vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { + fs := &filesystem{} + fs.Init(vfsObj, fsType) + return fs.VFSFilesystem(), nil, nil +} + +// Name implements FilesystemType.Name. +// +// Note that registering sockfs is unnecessary, except for the fact that it +// will not show up under /proc/filesystems as a result. This is a very minor +// discrepancy from Linux. +func (filesystemType) Name() string { + return "sockfs" +} + +// filesystem implements vfs.FilesystemImpl. +type filesystem struct { + kernfs.Filesystem +} + +// inode implements kernfs.Inode. +type inode struct { + kernfs.InodeNotDirectory + kernfs.InodeNotSymlink + kernfs.InodeAttrs + kernfs.InodeNoopRefCount +} + +// Open implements kernfs.Inode.Open. +func (i *inode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + return nil, syserror.ENXIO +} diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index beba29a09..bb7e3cbc3 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -169,6 +169,7 @@ go_library( "//pkg/sentry/fs/lock", "//pkg/sentry/fs/timerfd", "//pkg/sentry/fsbridge", + "//pkg/sentry/fsimpl/sockfs", "//pkg/sentry/hostcpu", "//pkg/sentry/inet", "//pkg/sentry/kernel/auth", diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 6feda8fa1..0a448b57c 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -50,6 +50,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/timerfd" "gvisor.dev/gvisor/pkg/sentry/fsbridge" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs" "gvisor.dev/gvisor/pkg/sentry/hostcpu" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -225,6 +226,11 @@ type Kernel struct { // by extMu. nextSocketEntry uint64 + // socketMount is a disconnected vfs.Mount, not included in k.vfs, + // representing a sockfs.filesystem. socketMount is used to back + // VirtualDentries representing anonymous sockets. + socketMount *vfs.Mount + // deviceRegistry is used to save/restore device.SimpleDevices. deviceRegistry struct{} `state:".(*device.Registry)"` @@ -348,6 +354,19 @@ func (k *Kernel) Init(args InitKernelArgs) error { k.monotonicClock = &timekeeperClock{tk: args.Timekeeper, c: sentrytime.Monotonic} k.futexes = futex.NewManager() k.netlinkPorts = port.New() + if VFS2Enabled { + if err := k.vfs.Init(); err != nil { + return fmt.Errorf("failed to initialize VFS: %v", err) + } + fs := sockfs.NewFilesystem(&k.vfs) + // NewDisconnectedMount will take an additional reference on fs. + defer fs.DecRef() + sm, err := k.vfs.NewDisconnectedMount(fs, nil, &vfs.MountOptions{}) + if err != nil { + return fmt.Errorf("failed to initialize socket mount: %v", err) + } + k.socketMount = sm + } return nil } @@ -1452,6 +1471,11 @@ func (k *Kernel) ListSockets() []*SocketEntry { return socks } +// SocketMount returns the global socket mount. +func (k *Kernel) SocketMount() *vfs.Mount { + return k.socketMount +} + // supervisorContext is a privileged context. type supervisorContext struct { context.NoopSleeper diff --git a/test/syscalls/linux/socket_unix.cc b/test/syscalls/linux/socket_unix.cc index 4cf1f76f1..8bf663e8b 100644 --- a/test/syscalls/linux/socket_unix.cc +++ b/test/syscalls/linux/socket_unix.cc @@ -257,6 +257,8 @@ TEST_P(UnixSocketPairTest, ShutdownWrite) { TEST_P(UnixSocketPairTest, SocketReopenFromProcfs) { // TODO(b/122310852): We should be returning ENXIO and NOT EIO. + // TODO(github.dev/issue/1624): This should be resolved in VFS2. Verify + // that this is the case and delete the SKIP_IF once we delete VFS1. SKIP_IF(IsRunningOnGvisor()); auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); -- cgit v1.2.3 From d25036ad17a3ada7fa6ce9900f20e246e07acd2f Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Tue, 31 Mar 2020 19:51:52 -0700 Subject: Test receiving multicast packets over UDP PiperOrigin-RevId: 304098611 --- pkg/tcpip/header/udp.go | 5 + test/packetimpact/dut/posix_server.cc | 11 ++ test/packetimpact/proto/posix_server.proto | 30 ++- test/packetimpact/testbench/BUILD | 1 + test/packetimpact/testbench/connections.go | 216 ++++++++++++++++++--- test/packetimpact/testbench/dut.go | 44 ++++- test/packetimpact/testbench/layers.go | 156 ++++++++++++--- test/packetimpact/tests/BUILD | 13 ++ test/packetimpact/tests/Dockerfile | 14 +- test/packetimpact/tests/defs.bzl | 18 +- test/packetimpact/tests/fin_wait2_timeout_test.go | 2 +- test/packetimpact/tests/test_runner.sh | 24 ++- test/packetimpact/tests/udp_recv_multicast_test.go | 37 ++++ 13 files changed, 501 insertions(+), 70 deletions(-) create mode 100644 test/packetimpact/tests/udp_recv_multicast_test.go (limited to 'test') diff --git a/pkg/tcpip/header/udp.go b/pkg/tcpip/header/udp.go index 74412c894..9339d637f 100644 --- a/pkg/tcpip/header/udp.go +++ b/pkg/tcpip/header/udp.go @@ -99,6 +99,11 @@ func (b UDP) SetChecksum(checksum uint16) { binary.BigEndian.PutUint16(b[udpChecksum:], checksum) } +// SetLength sets the "length" field of the udp header. +func (b UDP) SetLength(length uint16) { + binary.BigEndian.PutUint16(b[udpLength:], length) +} + // CalculateChecksum calculates the checksum of the udp packet, given the // checksum of the network-layer pseudo-header and the checksum of the payload. func (b UDP) CalculateChecksum(partialChecksum uint16) uint16 { diff --git a/test/packetimpact/dut/posix_server.cc b/test/packetimpact/dut/posix_server.cc index 2f10dda40..4a71c54c6 100644 --- a/test/packetimpact/dut/posix_server.cc +++ b/test/packetimpact/dut/posix_server.cc @@ -181,6 +181,17 @@ class PosixImpl final : public posix_server::Posix::Service { response->set_errno_(errno); return ::grpc::Status::OK; } + + ::grpc::Status Recv(::grpc::ServerContext *context, + const ::posix_server::RecvRequest *request, + ::posix_server::RecvResponse *response) override { + std::vector buf(request->len()); + response->set_ret( + recv(request->sockfd(), buf.data(), buf.size(), request->flags())); + response->set_errno_(errno); + response->set_buf(buf.data(), response->ret()); + return ::grpc::Status::OK; + } }; // Parse command line options. Returns a pointer to the first argument beyond diff --git a/test/packetimpact/proto/posix_server.proto b/test/packetimpact/proto/posix_server.proto index 026876fc2..53ec49410 100644 --- a/test/packetimpact/proto/posix_server.proto +++ b/test/packetimpact/proto/posix_server.proto @@ -24,7 +24,7 @@ message SocketRequest { message SocketResponse { int32 fd = 1; - int32 errno_ = 2; + int32 errno_ = 2; // "errno" may fail to compile in c++. } message SockaddrIn { @@ -55,7 +55,7 @@ message BindRequest { message BindResponse { int32 ret = 1; - int32 errno_ = 2; + int32 errno_ = 2; // "errno" may fail to compile in c++. } message GetSockNameRequest { @@ -64,7 +64,7 @@ message GetSockNameRequest { message GetSockNameResponse { int32 ret = 1; - int32 errno_ = 2; + int32 errno_ = 2; // "errno" may fail to compile in c++. Sockaddr addr = 3; } @@ -75,7 +75,7 @@ message ListenRequest { message ListenResponse { int32 ret = 1; - int32 errno_ = 2; + int32 errno_ = 2; // "errno" may fail to compile in c++. } message AcceptRequest { @@ -84,7 +84,7 @@ message AcceptRequest { message AcceptResponse { int32 fd = 1; - int32 errno_ = 2; + int32 errno_ = 2; // "errno" may fail to compile in c++. Sockaddr addr = 3; } @@ -97,7 +97,7 @@ message SetSockOptRequest { message SetSockOptResponse { int32 ret = 1; - int32 errno_ = 2; + int32 errno_ = 2; // "errno" may fail to compile in c++. } message Timeval { @@ -114,7 +114,7 @@ message SetSockOptTimevalRequest { message SetSockOptTimevalResponse { int32 ret = 1; - int32 errno_ = 2; + int32 errno_ = 2; // "errno" may fail to compile in c++. } message CloseRequest { @@ -123,7 +123,19 @@ message CloseRequest { message CloseResponse { int32 ret = 1; - int32 errno_ = 2; + int32 errno_ = 2; // "errno" may fail to compile in c++. +} + +message RecvRequest { + int32 sockfd = 1; + int32 len = 2; + int32 flags = 3; +} + +message RecvResponse { + int32 ret = 1; + int32 errno_ = 2; // "errno" may fail to compile in c++. + bytes buf = 3; } service Posix { @@ -147,4 +159,6 @@ service Posix { returns (SetSockOptTimevalResponse); // Call close() on the DUT. rpc Close(CloseRequest) returns (CloseResponse); + // Call recv() on the DUT. + rpc Recv(RecvRequest) returns (RecvResponse); } diff --git a/test/packetimpact/testbench/BUILD b/test/packetimpact/testbench/BUILD index a34c81fcc..4a9d8efa6 100644 --- a/test/packetimpact/testbench/BUILD +++ b/test/packetimpact/testbench/BUILD @@ -16,6 +16,7 @@ go_library( ], deps = [ "//pkg/tcpip", + "//pkg/tcpip/buffer", "//pkg/tcpip/header", "//pkg/tcpip/seqnum", "//pkg/usermem", diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index b7aa63934..8d1f562ee 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -36,19 +36,6 @@ var remoteIPv4 = flag.String("remote_ipv4", "", "remote IPv4 address for test pa var localMAC = flag.String("local_mac", "", "local mac address for test packets") var remoteMAC = flag.String("remote_mac", "", "remote mac address for test packets") -// TCPIPv4 maintains state about a TCP/IPv4 connection. -type TCPIPv4 struct { - outgoing Layers - incoming Layers - LocalSeqNum seqnum.Value - RemoteSeqNum seqnum.Value - SynAck *TCP - sniffer Sniffer - injector Injector - portPickerFD int - t *testing.T -} - // pickPort makes a new socket and returns the socket FD and port. The caller // must close the FD when done with the port if there is no error. func pickPort() (int, uint16, error) { @@ -75,12 +62,25 @@ func pickPort() (int, uint16, error) { return fd, uint16(newSockAddrInet4.Port), nil } +// TCPIPv4 maintains state about a TCP/IPv4 connection. +type TCPIPv4 struct { + outgoing Layers + incoming Layers + LocalSeqNum seqnum.Value + RemoteSeqNum seqnum.Value + SynAck *TCP + sniffer Sniffer + injector Injector + portPickerFD int + t *testing.T +} + // tcpLayerIndex is the position of the TCP layer in the TCPIPv4 connection. It // is the third, after Ethernet and IPv4. const tcpLayerIndex int = 2 // NewTCPIPv4 creates a new TCPIPv4 connection with reasonable defaults. -func NewTCPIPv4(t *testing.T, dut DUT, outgoingTCP, incomingTCP TCP) TCPIPv4 { +func NewTCPIPv4(t *testing.T, outgoingTCP, incomingTCP TCP) TCPIPv4 { lMAC, err := tcpip.ParseMACAddress(*localMAC) if err != nil { t.Fatalf("can't parse localMAC %q: %s", *localMAC, err) @@ -109,18 +109,16 @@ func NewTCPIPv4(t *testing.T, dut DUT, outgoingTCP, incomingTCP TCP) TCPIPv4 { } newOutgoingTCP := &TCP{ - DataOffset: Uint8(header.TCPMinimumSize), - WindowSize: Uint16(32768), - SrcPort: &localPort, + SrcPort: &localPort, } if err := newOutgoingTCP.merge(outgoingTCP); err != nil { - t.Fatalf("can't merge %v into %v: %s", outgoingTCP, newOutgoingTCP, err) + t.Fatalf("can't merge %+v into %+v: %s", outgoingTCP, newOutgoingTCP, err) } newIncomingTCP := &TCP{ DstPort: &localPort, } if err := newIncomingTCP.merge(incomingTCP); err != nil { - t.Fatalf("can't merge %v into %v: %s", incomingTCP, newIncomingTCP, err) + t.Fatalf("can't merge %+v into %+v: %s", incomingTCP, newIncomingTCP, err) } return TCPIPv4{ outgoing: Layers{ @@ -149,8 +147,9 @@ func (conn *TCPIPv4) Close() { conn.portPickerFD = -1 } -// Send a packet with reasonable defaults and override some fields by tcp. -func (conn *TCPIPv4) Send(tcp TCP, additionalLayers ...Layer) { +// CreateFrame builds a frame for the connection with tcp overriding defaults +// and additionalLayers added after the TCP header. +func (conn *TCPIPv4) CreateFrame(tcp TCP, additionalLayers ...Layer) Layers { if tcp.SeqNum == nil { tcp.SeqNum = Uint32(uint32(conn.LocalSeqNum)) } @@ -159,30 +158,41 @@ func (conn *TCPIPv4) Send(tcp TCP, additionalLayers ...Layer) { } layersToSend := deepcopy.Copy(conn.outgoing).(Layers) if err := layersToSend[tcpLayerIndex].(*TCP).merge(tcp); err != nil { - conn.t.Fatalf("can't merge %v into %v: %s", tcp, layersToSend[tcpLayerIndex], err) + conn.t.Fatalf("can't merge %+v into %+v: %s", tcp, layersToSend[tcpLayerIndex], err) } layersToSend = append(layersToSend, additionalLayers...) - outBytes, err := layersToSend.toBytes() + return layersToSend +} + +// SendFrame sends a frame with reasonable defaults. +func (conn *TCPIPv4) SendFrame(frame Layers) { + outBytes, err := frame.toBytes() if err != nil { conn.t.Fatalf("can't build outgoing TCP packet: %s", err) } conn.injector.Send(outBytes) // Compute the next TCP sequence number. - for i := tcpLayerIndex + 1; i < len(layersToSend); i++ { - conn.LocalSeqNum.UpdateForward(seqnum.Size(layersToSend[i].length())) + for i := tcpLayerIndex + 1; i < len(frame); i++ { + conn.LocalSeqNum.UpdateForward(seqnum.Size(frame[i].length())) } + tcp := frame[tcpLayerIndex].(*TCP) if tcp.Flags != nil && *tcp.Flags&(header.TCPFlagSyn|header.TCPFlagFin) != 0 { conn.LocalSeqNum.UpdateForward(1) } } +// Send a packet with reasonable defaults and override some fields by tcp. +func (conn *TCPIPv4) Send(tcp TCP, additionalLayers ...Layer) { + conn.SendFrame(conn.CreateFrame(tcp, additionalLayers...)) +} + // Recv gets a packet from the sniffer within the timeout provided. If no packet // arrives before the timeout, it returns nil. func (conn *TCPIPv4) Recv(timeout time.Duration) *TCP { deadline := time.Now().Add(timeout) for { - timeout = deadline.Sub(time.Now()) + timeout = time.Until(deadline) if timeout <= 0 { break } @@ -192,6 +202,7 @@ func (conn *TCPIPv4) Recv(timeout time.Duration) *TCP { } layers, err := ParseEther(b) if err != nil { + conn.t.Logf("can't parse frame: %s", err) continue // Ignore packets that can't be parsed. } if !conn.incoming.match(layers) { @@ -215,7 +226,7 @@ func (conn *TCPIPv4) Recv(timeout time.Duration) *TCP { func (conn *TCPIPv4) Expect(tcp TCP, timeout time.Duration) *TCP { deadline := time.Now().Add(timeout) for { - timeout = deadline.Sub(time.Now()) + timeout = time.Until(deadline) if timeout <= 0 { return nil } @@ -243,3 +254,154 @@ func (conn *TCPIPv4) Handshake() { // Send an ACK. conn.Send(TCP{Flags: Uint8(header.TCPFlagAck)}) } + +// UDPIPv4 maintains state about a UDP/IPv4 connection. +type UDPIPv4 struct { + outgoing Layers + incoming Layers + sniffer Sniffer + injector Injector + portPickerFD int + t *testing.T +} + +// udpLayerIndex is the position of the UDP layer in the UDPIPv4 connection. It +// is the third, after Ethernet and IPv4. +const udpLayerIndex int = 2 + +// NewUDPIPv4 creates a new UDPIPv4 connection with reasonable defaults. +func NewUDPIPv4(t *testing.T, outgoingUDP, incomingUDP UDP) UDPIPv4 { + lMAC, err := tcpip.ParseMACAddress(*localMAC) + if err != nil { + t.Fatalf("can't parse localMAC %q: %s", *localMAC, err) + } + + rMAC, err := tcpip.ParseMACAddress(*remoteMAC) + if err != nil { + t.Fatalf("can't parse remoteMAC %q: %s", *remoteMAC, err) + } + + portPickerFD, localPort, err := pickPort() + if err != nil { + t.Fatalf("can't pick a port: %s", err) + } + lIP := tcpip.Address(net.ParseIP(*localIPv4).To4()) + rIP := tcpip.Address(net.ParseIP(*remoteIPv4).To4()) + + sniffer, err := NewSniffer(t) + if err != nil { + t.Fatalf("can't make new sniffer: %s", err) + } + + injector, err := NewInjector(t) + if err != nil { + t.Fatalf("can't make new injector: %s", err) + } + + newOutgoingUDP := &UDP{ + SrcPort: &localPort, + } + if err := newOutgoingUDP.merge(outgoingUDP); err != nil { + t.Fatalf("can't merge %+v into %+v: %s", outgoingUDP, newOutgoingUDP, err) + } + newIncomingUDP := &UDP{ + DstPort: &localPort, + } + if err := newIncomingUDP.merge(incomingUDP); err != nil { + t.Fatalf("can't merge %+v into %+v: %s", incomingUDP, newIncomingUDP, err) + } + return UDPIPv4{ + outgoing: Layers{ + &Ether{SrcAddr: &lMAC, DstAddr: &rMAC}, + &IPv4{SrcAddr: &lIP, DstAddr: &rIP}, + newOutgoingUDP}, + incoming: Layers{ + &Ether{SrcAddr: &rMAC, DstAddr: &lMAC}, + &IPv4{SrcAddr: &rIP, DstAddr: &lIP}, + newIncomingUDP}, + sniffer: sniffer, + injector: injector, + portPickerFD: portPickerFD, + t: t, + } +} + +// Close the injector and sniffer associated with this connection. +func (conn *UDPIPv4) Close() { + conn.sniffer.Close() + conn.injector.Close() + if err := unix.Close(conn.portPickerFD); err != nil { + conn.t.Fatalf("can't close portPickerFD: %s", err) + } + conn.portPickerFD = -1 +} + +// CreateFrame builds a frame for the connection with the provided udp +// overriding defaults and the additionalLayers added after the UDP header. +func (conn *UDPIPv4) CreateFrame(udp UDP, additionalLayers ...Layer) Layers { + layersToSend := deepcopy.Copy(conn.outgoing).(Layers) + if err := layersToSend[udpLayerIndex].(*UDP).merge(udp); err != nil { + conn.t.Fatalf("can't merge %+v into %+v: %s", udp, layersToSend[udpLayerIndex], err) + } + layersToSend = append(layersToSend, additionalLayers...) + return layersToSend +} + +// SendFrame sends a frame with reasonable defaults. +func (conn *UDPIPv4) SendFrame(frame Layers) { + outBytes, err := frame.toBytes() + if err != nil { + conn.t.Fatalf("can't build outgoing UDP packet: %s", err) + } + conn.injector.Send(outBytes) +} + +// Send a packet with reasonable defaults and override some fields by udp. +func (conn *UDPIPv4) Send(udp UDP, additionalLayers ...Layer) { + conn.SendFrame(conn.CreateFrame(udp, additionalLayers...)) +} + +// Recv gets a packet from the sniffer within the timeout provided. If no packet +// arrives before the timeout, it returns nil. +func (conn *UDPIPv4) Recv(timeout time.Duration) *UDP { + deadline := time.Now().Add(timeout) + for { + timeout = time.Until(deadline) + if timeout <= 0 { + break + } + b := conn.sniffer.Recv(timeout) + if b == nil { + break + } + layers, err := ParseEther(b) + if err != nil { + conn.t.Logf("can't parse frame: %s", err) + continue // Ignore packets that can't be parsed. + } + if !conn.incoming.match(layers) { + continue // Ignore packets that don't match the expected incoming. + } + return (layers[udpLayerIndex]).(*UDP) + } + return nil +} + +// Expect a packet that matches the provided udp within the timeout specified. +// If it doesn't arrive in time, the test fails. +func (conn *UDPIPv4) Expect(udp UDP, timeout time.Duration) *UDP { + deadline := time.Now().Add(timeout) + for { + timeout = time.Until(deadline) + if timeout <= 0 { + return nil + } + gotUDP := conn.Recv(timeout) + if gotUDP == nil { + return nil + } + if udp.match(gotUDP) { + return gotUDP + } + } +} diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go index 8ea1706d3..f80dbb35f 100644 --- a/test/packetimpact/testbench/dut.go +++ b/test/packetimpact/testbench/dut.go @@ -305,6 +305,35 @@ func (dut *DUT) SetSockOptTimeval(sockfd, level, optname int32, tv *unix.Timeval } } +// RecvWithErrno calls recv on the DUT. +func (dut *DUT) RecvWithErrno(ctx context.Context, sockfd, len, flags int32) (int32, []byte, error) { + dut.t.Helper() + req := pb.RecvRequest{ + Sockfd: sockfd, + Len: len, + Flags: flags, + } + resp, err := dut.posixServer.Recv(ctx, &req) + if err != nil { + dut.t.Fatalf("failed to call Recv: %s", err) + } + return resp.GetRet(), resp.GetBuf(), syscall.Errno(resp.GetErrno_()) +} + +// Recv calls recv on the DUT and causes a fatal test failure if it doesn't +// succeed. If more control over the timeout or error handling is needed, use +// RecvWithErrno. +func (dut *DUT) Recv(sockfd, len, flags int32) []byte { + dut.t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + defer cancel() + ret, buf, err := dut.RecvWithErrno(ctx, sockfd, len, flags) + if ret == -1 { + dut.t.Fatalf("failed to recv: %s", err) + } + return buf +} + // CloseWithErrno calls close on the DUT. func (dut *DUT) CloseWithErrno(fd int32) (int32, error) { dut.t.Helper() @@ -330,10 +359,11 @@ func (dut *DUT) Close(fd int32) { } } -// CreateListener makes a new TCP connection. If it fails, the test ends. -func (dut *DUT) CreateListener(typ, proto, backlog int32) (int32, uint16) { +// CreateBoundSocket makes a new socket on the DUT, with type typ and protocol +// proto, and bound to the IP address addr. Returns the new file descriptor and +// the port that was selected on the DUT. +func (dut *DUT) CreateBoundSocket(typ, proto int32, addr net.IP) (int32, uint16) { dut.t.Helper() - addr := net.ParseIP(*remoteIPv4) var fd int32 if addr.To4() != nil { fd = dut.Socket(unix.AF_INET, typ, proto) @@ -358,6 +388,12 @@ func (dut *DUT) CreateListener(typ, proto, backlog int32) (int32, uint16) { default: dut.t.Fatalf("unknown sockaddr type from getsockname: %t", sa) } - dut.Listen(fd, backlog) return fd, uint16(port) } + +// CreateListener makes a new TCP connection. If it fails, the test ends. +func (dut *DUT) CreateListener(typ, proto, backlog int32) (int32, uint16) { + fd, remotePort := dut.CreateBoundSocket(typ, proto, net.ParseIP(*remoteIPv4)) + dut.Listen(fd, backlog) + return fd, remotePort +} diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go index 35fa4dcb6..d7434c3d2 100644 --- a/test/packetimpact/testbench/layers.go +++ b/test/packetimpact/testbench/layers.go @@ -22,6 +22,7 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "github.com/imdario/mergo" "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" ) @@ -97,7 +98,7 @@ func equalLayer(x, y Layer) bool { return cmp.Equal(x, y, opt, cmpopts.IgnoreUnexported(LayerBase{})) } -// Ether can construct and match the ethernet encapsulation. +// Ether can construct and match an ethernet encapsulation. type Ether struct { LayerBase SrcAddr *tcpip.LinkAddress @@ -161,7 +162,7 @@ func ParseEther(b []byte) (Layers, error) { return append(layers, moreLayers...), nil default: // TODO(b/150301488): Support more protocols, like IPv6. - return nil, fmt.Errorf("can't deduce the ethernet header's next protocol: %v", b) + return nil, fmt.Errorf("can't deduce the ethernet header's next protocol: %#v", b) } } @@ -173,7 +174,7 @@ func (l *Ether) length() int { return header.EthernetMinimumSize } -// IPv4 can construct and match the ethernet excapulation. +// IPv4 can construct and match an IPv4 encapsulation. type IPv4 struct { LayerBase IHL *uint8 @@ -236,9 +237,11 @@ func (l *IPv4) toBytes() ([]byte, error) { switch n := l.next().(type) { case *TCP: fields.Protocol = uint8(header.TCPProtocolNumber) + case *UDP: + fields.Protocol = uint8(header.UDPProtocolNumber) default: - // TODO(b/150301488): Support more protocols, like UDP. - return nil, fmt.Errorf("can't deduce the ip header's next protocol: %+v", n) + // TODO(b/150301488): Support more protocols as needed. + return nil, fmt.Errorf("can't deduce the ip header's next protocol: %#v", n) } } if l.SrcAddr != nil { @@ -294,13 +297,19 @@ func ParseIPv4(b []byte) (Layers, error) { DstAddr: Address(h.DestinationAddress()), } layers := Layers{&ipv4} - switch h.Protocol() { - case uint8(header.TCPProtocolNumber): + switch h.TransportProtocol() { + case header.TCPProtocolNumber: moreLayers, err := ParseTCP(b[ipv4.length():]) if err != nil { return nil, err } return append(layers, moreLayers...), nil + case header.UDPProtocolNumber: + moreLayers, err := ParseUDP(b[ipv4.length():]) + if err != nil { + return nil, err + } + return append(layers, moreLayers...), nil } return nil, fmt.Errorf("can't deduce the ethernet header's next protocol: %d", h.Protocol()) } @@ -316,7 +325,7 @@ func (l *IPv4) length() int { return int(*l.IHL) } -// TCP can construct and match the TCP excapulation. +// TCP can construct and match a TCP encapsulation. type TCP struct { LayerBase SrcPort *uint16 @@ -347,12 +356,16 @@ func (l *TCP) toBytes() ([]byte, error) { } if l.DataOffset != nil { h.SetDataOffset(*l.DataOffset) + } else { + h.SetDataOffset(uint8(l.length())) } if l.Flags != nil { h.SetFlags(*l.Flags) } if l.WindowSize != nil { h.SetWindowSize(*l.WindowSize) + } else { + h.SetWindowSize(32768) } if l.UrgentPointer != nil { h.SetUrgentPoiner(*l.UrgentPointer) @@ -361,38 +374,52 @@ func (l *TCP) toBytes() ([]byte, error) { h.SetChecksum(*l.Checksum) return h, nil } - if err := setChecksum(&h, l); err != nil { + if err := setTCPChecksum(&h, l); err != nil { return nil, err } return h, nil } -// setChecksum calculates the checksum of the TCP header and sets it in h. -func setChecksum(h *header.TCP, tcp *TCP) error { - h.SetChecksum(0) - tcpLength := uint16(tcp.length()) - current := tcp.next() - for current != nil { - tcpLength += uint16(current.length()) - current = current.next() +// totalLength returns the length of the provided layer and all following +// layers. +func totalLength(l Layer) int { + var totalLength int + for ; l != nil; l = l.next() { + totalLength += l.length() } + return totalLength +} +// layerChecksum calculates the checksum of the Layer header, including the +// peusdeochecksum of the layer before it and all the bytes after it.. +func layerChecksum(l Layer, protoNumber tcpip.TransportProtocolNumber) (uint16, error) { + totalLength := uint16(totalLength(l)) var xsum uint16 - switch s := tcp.prev().(type) { + switch s := l.prev().(type) { case *IPv4: - xsum = header.PseudoHeaderChecksum(header.TCPProtocolNumber, *s.SrcAddr, *s.DstAddr, tcpLength) + xsum = header.PseudoHeaderChecksum(protoNumber, *s.SrcAddr, *s.DstAddr, totalLength) default: // TODO(b/150301488): Support more protocols, like IPv6. - return fmt.Errorf("can't get src and dst addr from previous layer") + return 0, fmt.Errorf("can't get src and dst addr from previous layer: %#v", s) } - current = tcp.next() - for current != nil { + var payloadBytes buffer.VectorisedView + for current := l.next(); current != nil; current = current.next() { payload, err := current.toBytes() if err != nil { - return fmt.Errorf("can't get bytes for next header: %s", payload) + return 0, fmt.Errorf("can't get bytes for next header: %s", payload) } - xsum = header.Checksum(payload, xsum) - current = current.next() + payloadBytes.AppendView(payload) + } + xsum = header.ChecksumVV(payloadBytes, xsum) + return xsum, nil +} + +// setTCPChecksum calculates the checksum of the TCP header and sets it in h. +func setTCPChecksum(h *header.TCP, tcp *TCP) error { + h.SetChecksum(0) + xsum, err := layerChecksum(tcp, header.TCPProtocolNumber) + if err != nil { + return err } h.SetChecksum(^h.CalculateChecksum(xsum)) return nil @@ -444,6 +471,85 @@ func (l *TCP) merge(other TCP) error { return mergo.Merge(l, other, mergo.WithOverride) } +// UDP can construct and match a UDP encapsulation. +type UDP struct { + LayerBase + SrcPort *uint16 + DstPort *uint16 + Length *uint16 + Checksum *uint16 +} + +func (l *UDP) toBytes() ([]byte, error) { + b := make([]byte, header.UDPMinimumSize) + h := header.UDP(b) + if l.SrcPort != nil { + h.SetSourcePort(*l.SrcPort) + } + if l.DstPort != nil { + h.SetDestinationPort(*l.DstPort) + } + if l.Length != nil { + h.SetLength(*l.Length) + } else { + h.SetLength(uint16(totalLength(l))) + } + if l.Checksum != nil { + h.SetChecksum(*l.Checksum) + return h, nil + } + if err := setUDPChecksum(&h, l); err != nil { + return nil, err + } + return h, nil +} + +// setUDPChecksum calculates the checksum of the UDP header and sets it in h. +func setUDPChecksum(h *header.UDP, udp *UDP) error { + h.SetChecksum(0) + xsum, err := layerChecksum(udp, header.UDPProtocolNumber) + if err != nil { + return err + } + h.SetChecksum(^h.CalculateChecksum(xsum)) + return nil +} + +// ParseUDP parses the bytes assuming that they start with a udp header and +// continues parsing further encapsulations. +func ParseUDP(b []byte) (Layers, error) { + h := header.UDP(b) + udp := UDP{ + SrcPort: Uint16(h.SourcePort()), + DstPort: Uint16(h.DestinationPort()), + Length: Uint16(h.Length()), + Checksum: Uint16(h.Checksum()), + } + layers := Layers{&udp} + moreLayers, err := ParsePayload(b[udp.length():]) + if err != nil { + return nil, err + } + return append(layers, moreLayers...), nil +} + +func (l *UDP) match(other Layer) bool { + return equalLayer(l, other) +} + +func (l *UDP) length() int { + if l.Length == nil { + return header.UDPMinimumSize + } + return int(*l.Length) +} + +// merge overrides the values in l with the values from other but only in fields +// where the value is not nil. +func (l *UDP) merge(other UDP) error { + return mergo.Merge(l, other, mergo.WithOverride) +} + // Payload has bytes beyond OSI layer 4. type Payload struct { LayerBase diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 1dff2a4d5..9a4d66ea9 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -15,6 +15,19 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "udp_recv_multicast", + srcs = ["udp_recv_multicast_test.go"], + # TODO(b/152813495): Fix netstack then remove the line below. + netstack = False, + deps = [ + "//pkg/tcpip", + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + sh_binary( name = "test_runner", srcs = ["test_runner.sh"], diff --git a/test/packetimpact/tests/Dockerfile b/test/packetimpact/tests/Dockerfile index 507030cc7..9075bc555 100644 --- a/test/packetimpact/tests/Dockerfile +++ b/test/packetimpact/tests/Dockerfile @@ -1,5 +1,17 @@ FROM ubuntu:bionic -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y iptables netcat tcpdump iproute2 tshark +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + # iptables to disable OS native packet processing. + iptables \ + # nc to check that the posix_server is running. + netcat \ + # tcpdump to log brief packet sniffing. + tcpdump \ + # ip link show to display MAC addresses. + iproute2 \ + # tshark to log verbose packet sniffing. + tshark \ + # killall for cleanup. + psmisc RUN hash -r CMD /bin/bash diff --git a/test/packetimpact/tests/defs.bzl b/test/packetimpact/tests/defs.bzl index 1b4213d9b..8c0d058b2 100644 --- a/test/packetimpact/tests/defs.bzl +++ b/test/packetimpact/tests/defs.bzl @@ -93,7 +93,17 @@ def packetimpact_netstack_test(name, testbench_binary, **kwargs): **kwargs ) -def packetimpact_go_test(name, size = "small", pure = True, **kwargs): +def packetimpact_go_test(name, size = "small", pure = True, linux = True, netstack = True, **kwargs): + """Add packetimpact tests written in go. + + Args: + name: name of the test + size: size of the test + pure: make a static go binary + linux: generate a linux test + netstack: generate a netstack test + **kwargs: all the other args, forwarded to go_test + """ testbench_binary = name + "_test" go_test( name = testbench_binary, @@ -102,5 +112,7 @@ def packetimpact_go_test(name, size = "small", pure = True, **kwargs): tags = PACKETIMPACT_TAGS, **kwargs ) - packetimpact_linux_test(name = name, testbench_binary = testbench_binary) - packetimpact_netstack_test(name = name, testbench_binary = testbench_binary) + if linux: + packetimpact_linux_test(name = name, testbench_binary = testbench_binary) + if netstack: + packetimpact_netstack_test(name = name, testbench_binary = testbench_binary) diff --git a/test/packetimpact/tests/fin_wait2_timeout_test.go b/test/packetimpact/tests/fin_wait2_timeout_test.go index 5f54e67ed..2b3f39045 100644 --- a/test/packetimpact/tests/fin_wait2_timeout_test.go +++ b/test/packetimpact/tests/fin_wait2_timeout_test.go @@ -36,7 +36,7 @@ func TestFinWait2Timeout(t *testing.T) { defer dut.TearDown() listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) defer dut.Close(listenFd) - conn := tb.NewTCPIPv4(t, dut, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) defer conn.Close() conn.Handshake() diff --git a/test/packetimpact/tests/test_runner.sh b/test/packetimpact/tests/test_runner.sh index 5281cb53d..e99fc7d09 100755 --- a/test/packetimpact/tests/test_runner.sh +++ b/test/packetimpact/tests/test_runner.sh @@ -29,13 +29,15 @@ function failure() { } trap 'failure ${LINENO} "$BASH_COMMAND"' ERR -declare -r LONGOPTS="dut_platform:,posix_server_binary:,testbench_binary:,runtime:,tshark" +declare -r LONGOPTS="dut_platform:,posix_server_binary:,testbench_binary:,runtime:,tshark,extra_test_arg:" # Don't use declare below so that the error from getopt will end the script. PARSED=$(getopt --options "" --longoptions=$LONGOPTS --name "$0" -- "$@") eval set -- "$PARSED" +declare -a EXTRA_TEST_ARGS + while true; do case "$1" in --dut_platform) @@ -62,6 +64,10 @@ while true; do declare -r TSHARK="1" shift 1 ;; + --extra_test_arg) + EXTRA_TEST_ARGS+="$2" + shift 2 + ;; --) shift break @@ -125,6 +131,19 @@ docker --version function finish { local cleanup_success=1 + + if [[ -z "${TSHARK-}" ]]; then + # Kill tcpdump so that it will flush output. + docker exec -t "${TESTBENCH}" \ + killall tcpdump || \ + cleanup_success=0 + else + # Kill tshark so that it will flush output. + docker exec -t "${TESTBENCH}" \ + killall tshark || \ + cleanup_success=0 + fi + for net in "${CTRL_NET}" "${TEST_NET}"; do # Kill all processes attached to ${net}. for docker_command in "kill" "rm"; do @@ -224,6 +243,8 @@ else # interface with the test packets. docker exec -t "${TESTBENCH}" \ tshark -V -l -n -i "${TEST_DEVICE}" \ + -o tcp.check_checksum:TRUE \ + -o udp.check_checksum:TRUE \ host "${TEST_NET_PREFIX}${TESTBENCH_NET_SUFFIX}" & fi @@ -235,6 +256,7 @@ sleep 3 # be executed on the DUT. docker exec -t "${TESTBENCH}" \ /bin/bash -c "${DOCKER_TESTBENCH_BINARY} \ + ${EXTRA_TEST_ARGS[@]-} \ --posix_server_ip=${CTRL_NET_PREFIX}${DUT_NET_SUFFIX} \ --posix_server_port=${CTRL_PORT} \ --remote_ipv4=${TEST_NET_PREFIX}${DUT_NET_SUFFIX} \ diff --git a/test/packetimpact/tests/udp_recv_multicast_test.go b/test/packetimpact/tests/udp_recv_multicast_test.go new file mode 100644 index 000000000..bc1b0be49 --- /dev/null +++ b/test/packetimpact/tests/udp_recv_multicast_test.go @@ -0,0 +1,37 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package udp_recv_multicast_test + +import ( + "net" + "testing" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func TestUDPRecvMulticast(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + boundFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) + defer dut.Close(boundFD) + conn := tb.NewUDPIPv4(t, tb.UDP{DstPort: &remotePort}, tb.UDP{SrcPort: &remotePort}) + defer conn.Close() + frame := conn.CreateFrame(tb.UDP{}, &tb.Payload{Bytes: []byte("hello world")}) + frame[1].(*tb.IPv4).DstAddr = tb.Address(tcpip.Address(net.ParseIP("224.0.0.1").To4())) + conn.SendFrame(frame) + dut.Recv(boundFD, 100, 0) +} -- cgit v1.2.3 From 38f4501c995d7d915bcd168d58655e67e2b34566 Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Wed, 1 Apr 2020 11:25:43 -0700 Subject: Add context.Context argument to XxxWithErrno functions This allows control over the gRPC timeouts as needed. PiperOrigin-RevId: 304225713 --- test/packetimpact/testbench/dut.go | 81 ++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 34 deletions(-) (limited to 'test') diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go index f80dbb35f..d102dc7bb 100644 --- a/test/packetimpact/testbench/dut.go +++ b/test/packetimpact/testbench/dut.go @@ -143,13 +143,12 @@ func (dut *DUT) protoToSockaddr(sa *pb.Sockaddr) unix.Sockaddr { } // BindWithErrno calls bind on the DUT. -func (dut *DUT) BindWithErrno(fd int32, sa unix.Sockaddr) (int32, error) { +func (dut *DUT) BindWithErrno(ctx context.Context, fd int32, sa unix.Sockaddr) (int32, error) { dut.t.Helper() req := pb.BindRequest{ Sockfd: fd, Addr: dut.sockaddrToProto(sa), } - ctx := context.Background() resp, err := dut.posixServer.Bind(ctx, &req) if err != nil { dut.t.Fatalf("failed to call Bind: %s", err) @@ -158,22 +157,24 @@ func (dut *DUT) BindWithErrno(fd int32, sa unix.Sockaddr) (int32, error) { } // Bind calls bind on the DUT and causes a fatal test failure if it doesn't -// succeed. +// succeed. If more control over the timeout or error handling is +// needed, use BindWithErrno. func (dut *DUT) Bind(fd int32, sa unix.Sockaddr) { dut.t.Helper() - ret, err := dut.BindWithErrno(fd, sa) + ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + defer cancel() + ret, err := dut.BindWithErrno(ctx, fd, sa) if ret != 0 { dut.t.Fatalf("failed to bind socket: %s", err) } } // GetSockNameWithErrno calls getsockname on the DUT. -func (dut *DUT) GetSockNameWithErrno(sockfd int32) (int32, unix.Sockaddr, error) { +func (dut *DUT) GetSockNameWithErrno(ctx context.Context, sockfd int32) (int32, unix.Sockaddr, error) { dut.t.Helper() req := pb.GetSockNameRequest{ Sockfd: sockfd, } - ctx := context.Background() resp, err := dut.posixServer.GetSockName(ctx, &req) if err != nil { dut.t.Fatalf("failed to call Bind: %s", err) @@ -182,10 +183,13 @@ func (dut *DUT) GetSockNameWithErrno(sockfd int32) (int32, unix.Sockaddr, error) } // GetSockName calls getsockname on the DUT and causes a fatal test failure if -// it doens't succeed. +// it doesn't succeed. If more control over the timeout or error handling is +// needed, use GetSockNameWithErrno. func (dut *DUT) GetSockName(sockfd int32) unix.Sockaddr { dut.t.Helper() - ret, sa, err := dut.GetSockNameWithErrno(sockfd) + ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + defer cancel() + ret, sa, err := dut.GetSockNameWithErrno(ctx, sockfd) if ret != 0 { dut.t.Fatalf("failed to getsockname: %s", err) } @@ -193,14 +197,12 @@ func (dut *DUT) GetSockName(sockfd int32) unix.Sockaddr { } // ListenWithErrno calls listen on the DUT. -func (dut *DUT) ListenWithErrno(sockfd, backlog int32) (int32, error) { +func (dut *DUT) ListenWithErrno(ctx context.Context, sockfd, backlog int32) (int32, error) { dut.t.Helper() req := pb.ListenRequest{ Sockfd: sockfd, Backlog: backlog, } - ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) - defer cancel() resp, err := dut.posixServer.Listen(ctx, &req) if err != nil { dut.t.Fatalf("failed to call Listen: %s", err) @@ -209,23 +211,24 @@ func (dut *DUT) ListenWithErrno(sockfd, backlog int32) (int32, error) { } // Listen calls listen on the DUT and causes a fatal test failure if it doesn't -// succeed. +// succeed. If more control over the timeout or error handling is needed, use +// ListenWithErrno. func (dut *DUT) Listen(sockfd, backlog int32) { dut.t.Helper() - ret, err := dut.ListenWithErrno(sockfd, backlog) + ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + defer cancel() + ret, err := dut.ListenWithErrno(ctx, sockfd, backlog) if ret != 0 { dut.t.Fatalf("failed to listen: %s", err) } } // AcceptWithErrno calls accept on the DUT. -func (dut *DUT) AcceptWithErrno(sockfd int32) (int32, unix.Sockaddr, error) { +func (dut *DUT) AcceptWithErrno(ctx context.Context, sockfd int32) (int32, unix.Sockaddr, error) { dut.t.Helper() req := pb.AcceptRequest{ Sockfd: sockfd, } - ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) - defer cancel() resp, err := dut.posixServer.Accept(ctx, &req) if err != nil { dut.t.Fatalf("failed to call Accept: %s", err) @@ -234,18 +237,23 @@ func (dut *DUT) AcceptWithErrno(sockfd int32) (int32, unix.Sockaddr, error) { } // Accept calls accept on the DUT and causes a fatal test failure if it doesn't -// succeed. +// succeed. If more control over the timeout or error handling is needed, use +// AcceptWithErrno. func (dut *DUT) Accept(sockfd int32) (int32, unix.Sockaddr) { dut.t.Helper() - fd, sa, err := dut.AcceptWithErrno(sockfd) + ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + defer cancel() + fd, sa, err := dut.AcceptWithErrno(ctx, sockfd) if fd < 0 { dut.t.Fatalf("failed to accept: %s", err) } return fd, sa } -// SetSockOptWithErrno calls setsockopt on the DUT. -func (dut *DUT) SetSockOptWithErrno(sockfd, level, optname int32, optval []byte) (int32, error) { +// SetSockOptWithErrno calls setsockopt on the DUT. Because endianess and the +// width of values might differ between the testbench and DUT architectures, +// prefer to use a more specific SetSockOptXxxWithErrno function. +func (dut *DUT) SetSockOptWithErrno(ctx context.Context, sockfd, level, optname int32, optval []byte) (int32, error) { dut.t.Helper() req := pb.SetSockOptRequest{ Sockfd: sockfd, @@ -253,8 +261,6 @@ func (dut *DUT) SetSockOptWithErrno(sockfd, level, optname int32, optval []byte) Optname: optname, Optval: optval, } - ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) - defer cancel() resp, err := dut.posixServer.SetSockOpt(ctx, &req) if err != nil { dut.t.Fatalf("failed to call SetSockOpt: %s", err) @@ -263,10 +269,15 @@ func (dut *DUT) SetSockOptWithErrno(sockfd, level, optname int32, optval []byte) } // SetSockOpt calls setsockopt on the DUT and causes a fatal test failure if it -// doesn't succeed. +// doesn't succeed. If more control over the timeout or error handling is +// needed, use SetSockOptWithErrno. Because endianess and the width of values +// might differ between the testbench and DUT architectures, prefer to use a +// more specific SetSockOptXxx function. func (dut *DUT) SetSockOpt(sockfd, level, optname int32, optval []byte) { dut.t.Helper() - ret, err := dut.SetSockOptWithErrno(sockfd, level, optname, optval) + ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + defer cancel() + ret, err := dut.SetSockOptWithErrno(ctx, sockfd, level, optname, optval) if ret != 0 { dut.t.Fatalf("failed to SetSockOpt: %s", err) } @@ -274,7 +285,7 @@ func (dut *DUT) SetSockOpt(sockfd, level, optname int32, optval []byte) { // SetSockOptTimevalWithErrno calls setsockopt with the timeval converted to // bytes. -func (dut *DUT) SetSockOptTimevalWithErrno(sockfd, level, optname int32, tv *unix.Timeval) (int32, error) { +func (dut *DUT) SetSockOptTimevalWithErrno(ctx context.Context, sockfd, level, optname int32, tv *unix.Timeval) (int32, error) { dut.t.Helper() timeval := pb.Timeval{ Seconds: int64(tv.Sec), @@ -286,8 +297,6 @@ func (dut *DUT) SetSockOptTimevalWithErrno(sockfd, level, optname int32, tv *uni Optname: optname, Timeval: &timeval, } - ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) - defer cancel() resp, err := dut.posixServer.SetSockOptTimeval(ctx, &req) if err != nil { dut.t.Fatalf("failed to call SetSockOptTimeval: %s", err) @@ -296,10 +305,13 @@ func (dut *DUT) SetSockOptTimevalWithErrno(sockfd, level, optname int32, tv *uni } // SetSockOptTimeval calls setsockopt on the DUT and causes a fatal test failure -// if it doesn't succeed. +// if it doesn't succeed. If more control over the timeout or error handling is +// needed, use SetSockOptTimevalWithErrno. func (dut *DUT) SetSockOptTimeval(sockfd, level, optname int32, tv *unix.Timeval) { dut.t.Helper() - ret, err := dut.SetSockOptTimevalWithErrno(sockfd, level, optname, tv) + ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + defer cancel() + ret, err := dut.SetSockOptTimevalWithErrno(ctx, sockfd, level, optname, tv) if ret != 0 { dut.t.Fatalf("failed to SetSockOptTimeval: %s", err) } @@ -335,13 +347,11 @@ func (dut *DUT) Recv(sockfd, len, flags int32) []byte { } // CloseWithErrno calls close on the DUT. -func (dut *DUT) CloseWithErrno(fd int32) (int32, error) { +func (dut *DUT) CloseWithErrno(ctx context.Context, fd int32) (int32, error) { dut.t.Helper() req := pb.CloseRequest{ Fd: fd, } - ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) - defer cancel() resp, err := dut.posixServer.Close(ctx, &req) if err != nil { dut.t.Fatalf("failed to call Close: %s", err) @@ -350,10 +360,13 @@ func (dut *DUT) CloseWithErrno(fd int32) (int32, error) { } // Close calls close on the DUT and causes a fatal test failure if it doesn't -// succeed. +// succeed. If more control over the timeout or error handling is needed, use +// CloseWithErrno. func (dut *DUT) Close(fd int32) { dut.t.Helper() - ret, err := dut.CloseWithErrno(fd) + ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + defer cancel() + ret, err := dut.CloseWithErrno(ctx, fd) if ret != 0 { dut.t.Fatalf("failed to close: %s", err) } -- cgit v1.2.3 From c6d5742c21c19f9cf8b964b49b8df935c1303417 Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Thu, 2 Apr 2020 10:39:56 -0700 Subject: Fix flaky TCPLinger2TimeoutAfterClose test. The test is flaky in cooperative S/R mode because TCP timers are not restored across a S/R. This can cause the TCPLinger2 timer to not fire. This change disables S/R before setting the TCP_LINGER2 timeout. PiperOrigin-RevId: 304430536 --- test/syscalls/linux/socket_inet_loopback.cc | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index b24618a88..16888de2a 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -605,15 +605,23 @@ TEST_P(SocketInetLoopbackTest, TCPLinger2TimeoutAfterClose_NoRandomSave) { &conn_addrlen), SyscallSucceeds()); - constexpr int kTCPLingerTimeout = 5; - EXPECT_THAT(setsockopt(conn_fd.get(), IPPROTO_TCP, TCP_LINGER2, - &kTCPLingerTimeout, sizeof(kTCPLingerTimeout)), - SyscallSucceedsWithValue(0)); - - // close the connecting FD to trigger FIN_WAIT2 on the connected fd. - conn_fd.reset(); - - absl::SleepFor(absl::Seconds(kTCPLingerTimeout + 1)); + // Disable cooperative saves after this point as TCP timers are not restored + // across a S/R. + { + DisableSave ds; + constexpr int kTCPLingerTimeout = 5; + EXPECT_THAT(setsockopt(conn_fd.get(), IPPROTO_TCP, TCP_LINGER2, + &kTCPLingerTimeout, sizeof(kTCPLingerTimeout)), + SyscallSucceedsWithValue(0)); + + // close the connecting FD to trigger FIN_WAIT2 on the connected fd. + conn_fd.reset(); + + absl::SleepFor(absl::Seconds(kTCPLingerTimeout + 1)); + + // ds going out of scope will Re-enable S/R's since at this point the timer + // must have fired and cleaned up the endpoint. + } // Now bind and connect a new socket and verify that we can immediately // rebind the address bound by the conn_fd as it never entered TIME_WAIT. -- cgit v1.2.3 From 5b2396d244ed6283d928a72bdd4cc58d78ef3175 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Thu, 2 Apr 2020 17:06:19 -0700 Subject: Fix typo in TODO comments. PiperOrigin-RevId: 304508083 --- pkg/sentry/fs/proc/mounts.go | 3 ++- pkg/sentry/fsimpl/tmpfs/filesystem.go | 6 +++--- pkg/sentry/fsimpl/tmpfs/stat_test.go | 4 ++-- pkg/sentry/fsimpl/tmpfs/tmpfs.go | 2 +- pkg/sentry/socket/netstack/netstack.go | 2 +- pkg/sentry/vfs/mount.go | 3 ++- test/syscalls/linux/socket_inet_loopback.cc | 2 +- 7 files changed, 12 insertions(+), 10 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fs/proc/mounts.go b/pkg/sentry/fs/proc/mounts.go index 94deb553b..1fc9c703c 100644 --- a/pkg/sentry/fs/proc/mounts.go +++ b/pkg/sentry/fs/proc/mounts.go @@ -170,7 +170,8 @@ func superBlockOpts(mountPath string, msrc *fs.MountSource) string { // NOTE(b/147673608): If the mount is a cgroup, we also need to include // the cgroup name in the options. For now we just read that from the // path. - // TODO(gvisor.dev/issues/190): Once gVisor has full cgroup support, we + // + // TODO(gvisor.dev/issue/190): Once gVisor has full cgroup support, we // should get this value from the cgroup itself, and not rely on the // path. if msrc.FilesystemType == "cgroup" { diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index e678ecc37..4cf27bf13 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -57,7 +57,7 @@ afterSymlink: } next := nextVFSD.Impl().(*dentry) if symlink, ok := next.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() { - // TODO(gvisor.dev/issues/1197): Symlink traversals updates + // TODO(gvisor.dev/issue/1197): Symlink traversals updates // access time. if err := rp.HandleSymlink(symlink.target); err != nil { return nil, err @@ -515,7 +515,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa oldParent.inode.decLinksLocked() newParent.inode.incLinksLocked() } - // TODO(gvisor.dev/issues/1197): Update timestamps and parent directory + // TODO(gvisor.dev/issue/1197): Update timestamps and parent directory // sizes. vfsObj.CommitRenameReplaceDentry(renamedVFSD, &newParent.vfsd, newName, replacedVFSD) return nil @@ -600,7 +600,7 @@ func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu if err != nil { return linux.Statfs{}, err } - // TODO(gvisor.dev/issues/1197): Actually implement statfs. + // TODO(gvisor.dev/issue/1197): Actually implement statfs. return linux.Statfs{}, syserror.ENOSYS } diff --git a/pkg/sentry/fsimpl/tmpfs/stat_test.go b/pkg/sentry/fsimpl/tmpfs/stat_test.go index ebe035dee..3e02e7190 100644 --- a/pkg/sentry/fsimpl/tmpfs/stat_test.go +++ b/pkg/sentry/fsimpl/tmpfs/stat_test.go @@ -29,7 +29,7 @@ func TestStatAfterCreate(t *testing.T) { mode := linux.FileMode(0644) // Run with different file types. - // TODO(gvisor.dev/issues/1197): Also test symlinks and sockets. + // TODO(gvisor.dev/issue/1197): Also test symlinks and sockets. for _, typ := range []string{"file", "dir", "pipe"} { t.Run(fmt.Sprintf("type=%q", typ), func(t *testing.T) { var ( @@ -169,7 +169,7 @@ func TestSetStat(t *testing.T) { mode := linux.FileMode(0644) // Run with different file types. - // TODO(gvisor.dev/issues/1197): Also test symlinks and sockets. + // TODO(gvisor.dev/issue/1197): Also test symlinks and sockets. for _, typ := range []string{"file", "dir", "pipe"} { t.Run(fmt.Sprintf("type=%q", typ), func(t *testing.T) { var ( diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 8bc8818c0..54da15849 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -315,7 +315,7 @@ func (i *inode) statTo(stat *linux.Statx) { stat.Atime = linux.NsecToStatxTimestamp(i.atime) stat.Ctime = linux.NsecToStatxTimestamp(i.ctime) stat.Mtime = linux.NsecToStatxTimestamp(i.mtime) - // TODO(gvisor.dev/issues/1197): Device number. + // TODO(gvisor.dev/issue/1197): Device number. switch impl := i.impl.(type) { case *regularFile: stat.Mask |= linux.STATX_SIZE | linux.STATX_BLOCKS diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index f14c336b9..06a5b53bc 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -663,7 +663,7 @@ func (s *SocketOperations) checkFamily(family uint16, exact bool) *syserr.Error // This is a hack to work around the fact that both IPv4 and IPv6 ANY are // represented by the empty string. // -// TODO(gvisor.dev/issues/1556): remove this function. +// TODO(gvisor.dev/issue/1556): remove this function. func (s *SocketOperations) mapFamily(addr tcpip.FullAddress, family uint16) tcpip.FullAddress { if len(addr.Addr) == 0 && s.family == linux.AF_INET6 && family == linux.AF_INET { addr.Addr = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\x00\x00\x00\x00" diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go index 7792eb1a0..1b8ecc415 100644 --- a/pkg/sentry/vfs/mount.go +++ b/pkg/sentry/vfs/mount.go @@ -835,7 +835,8 @@ func superBlockOpts(mountPath string, mnt *Mount) string { // NOTE(b/147673608): If the mount is a cgroup, we also need to include // the cgroup name in the options. For now we just read that from the // path. - // TODO(gvisor.dev/issues/190): Once gVisor has full cgroup support, we + // + // TODO(gvisor.dev/issue/190): Once gVisor has full cgroup support, we // should get this value from the cgroup itself, and not rely on the // path. if mnt.fs.FilesystemType().Name() == "cgroup" { diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 16888de2a..2ffc86382 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -234,7 +234,7 @@ TEST_P(DualStackSocketTest, AddressOperations) { } } -// TODO(gvisor.dev/issues/1556): uncomment V4MappedAny. +// TODO(gvisor.dev/issue/1556): uncomment V4MappedAny. INSTANTIATE_TEST_SUITE_P( All, DualStackSocketTest, ::testing::Combine( -- cgit v1.2.3 From 1921c246a9907cd1623af4aabde086af9cf172d8 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Fri, 3 Apr 2020 10:19:42 -0700 Subject: Internal change. PiperOrigin-RevId: 304641990 --- test/syscalls/linux/proc_net_unix.cc | 6 +++--- test/syscalls/linux/pty.cc | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/proc_net_unix.cc b/test/syscalls/linux/proc_net_unix.cc index 66db0acaa..a63067586 100644 --- a/test/syscalls/linux/proc_net_unix.cc +++ b/test/syscalls/linux/proc_net_unix.cc @@ -106,7 +106,7 @@ PosixErrorOr> ProcNetUnixEntries() { std::vector entries; std::vector lines = absl::StrSplit(content, '\n'); std::cerr << "" << std::endl; - for (std::string line : lines) { + for (const std::string& line : lines) { // Emit the proc entry to the test output to provide context for the test // results. std::cerr << line << std::endl; @@ -374,7 +374,7 @@ TEST(ProcNetUnix, DgramSocketStateDisconnectingOnBind) { // corresponding entries, as they don't have an address yet. if (IsRunningOnGvisor()) { ASSERT_EQ(entries.size(), 2); - for (auto e : entries) { + for (const auto& e : entries) { ASSERT_EQ(e.state, SS_DISCONNECTING); } } @@ -403,7 +403,7 @@ TEST(ProcNetUnix, DgramSocketStateConnectingOnConnect) { // corresponding entries, as they don't have an address yet. if (IsRunningOnGvisor()) { ASSERT_EQ(entries.size(), 2); - for (auto e : entries) { + for (const auto& e : entries) { ASSERT_EQ(e.state, SS_DISCONNECTING); } } diff --git a/test/syscalls/linux/pty.cc b/test/syscalls/linux/pty.cc index dafe64d20..b8a0159ba 100644 --- a/test/syscalls/linux/pty.cc +++ b/test/syscalls/linux/pty.cc @@ -1126,7 +1126,7 @@ TEST_F(PtyTest, SwitchTwiceMultiline) { std::string kExpected = "GO\nBLUE\n!"; // Write each line. - for (std::string input : kInputs) { + for (const std::string& input : kInputs) { ASSERT_THAT(WriteFd(master_.get(), input.c_str(), input.size()), SyscallSucceedsWithValue(input.size())); } -- cgit v1.2.3 From ea98693d915ebb55bb6b93797bc58d7675ffbe9d Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Fri, 3 Apr 2020 11:37:16 -0700 Subject: Add missing newline PiperOrigin-RevId: 304659346 --- test/syscalls/linux/exec.cc | 10 ++++++---- test/syscalls/linux/poll.cc | 2 +- test/syscalls/linux/proc_pid_smaps.cc | 4 ++-- test/syscalls/linux/ptrace.cc | 2 +- test/syscalls/linux/sendfile_socket.cc | 2 +- test/syscalls/linux/socket_inet_loopback.cc | 2 +- test/syscalls/linux/socket_netlink_route.cc | 2 +- test/util/capability_util.cc | 4 ++-- 8 files changed, 15 insertions(+), 13 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/exec.cc b/test/syscalls/linux/exec.cc index 07bd527e6..12c9b05ca 100644 --- a/test/syscalls/linux/exec.cc +++ b/test/syscalls/linux/exec.cc @@ -812,26 +812,28 @@ void ExecFromThread() { bool ValidateProcCmdlineVsArgv(const int argc, const char* const* argv) { auto contents_or = GetContents("/proc/self/cmdline"); if (!contents_or.ok()) { - std::cerr << "Unable to get /proc/self/cmdline: " << contents_or.error(); + std::cerr << "Unable to get /proc/self/cmdline: " << contents_or.error() + << std::endl; return false; } auto contents = contents_or.ValueOrDie(); if (contents.back() != '\0') { - std::cerr << "Non-null terminated /proc/self/cmdline!"; + std::cerr << "Non-null terminated /proc/self/cmdline!" << std::endl; return false; } contents.pop_back(); std::vector procfs_cmdline = absl::StrSplit(contents, '\0'); if (static_cast(procfs_cmdline.size()) != argc) { - std::cerr << "argc = " << argc << " != " << procfs_cmdline.size(); + std::cerr << "argc = " << argc << " != " << procfs_cmdline.size() + << std::endl; return false; } for (int i = 0; i < argc; ++i) { if (procfs_cmdline[i] != argv[i]) { std::cerr << "Procfs command line argument " << i << " mismatch " - << procfs_cmdline[i] << " != " << argv[i]; + << procfs_cmdline[i] << " != " << argv[i] << std::endl; return false; } } diff --git a/test/syscalls/linux/poll.cc b/test/syscalls/linux/poll.cc index c42472474..1e35a4a8b 100644 --- a/test/syscalls/linux/poll.cc +++ b/test/syscalls/linux/poll.cc @@ -266,7 +266,7 @@ TEST_F(PollTest, Nfds) { } rlim_t max_fds = rlim.rlim_cur; - std::cout << "Using limit: " << max_fds; + std::cout << "Using limit: " << max_fds << std::endl; // Create an eventfd. Since its value is initially zero, it is writable. FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()); diff --git a/test/syscalls/linux/proc_pid_smaps.cc b/test/syscalls/linux/proc_pid_smaps.cc index 7f2e8f203..9fb1b3a2c 100644 --- a/test/syscalls/linux/proc_pid_smaps.cc +++ b/test/syscalls/linux/proc_pid_smaps.cc @@ -173,7 +173,7 @@ PosixErrorOr> ParseProcPidSmaps( return; } unknown_fields.insert(std::string(key)); - std::cerr << "skipping unknown smaps field " << key; + std::cerr << "skipping unknown smaps field " << key << std::endl; }; auto lines = absl::StrSplit(contents, '\n', absl::SkipEmpty()); @@ -191,7 +191,7 @@ PosixErrorOr> ParseProcPidSmaps( // amount of whitespace). if (!entry) { std::cerr << "smaps line not considered a maps line: " - << maybe_maps_entry.error_message(); + << maybe_maps_entry.error_message() << std::endl; return PosixError( EINVAL, absl::StrCat("smaps field line without preceding maps line: ", l)); diff --git a/test/syscalls/linux/ptrace.cc b/test/syscalls/linux/ptrace.cc index bfe3e2603..cb828ff88 100644 --- a/test/syscalls/linux/ptrace.cc +++ b/test/syscalls/linux/ptrace.cc @@ -1188,7 +1188,7 @@ TEST(PtraceTest, SeizeSetOptions) { // gVisor is not susceptible to this race because // kernel.Task.waitCollectTraceeStopLocked() checks specifically for an // active ptraceStop, which is not initiated if SIGKILL is pending. - std::cout << "Observed syscall-exit after SIGKILL"; + std::cout << "Observed syscall-exit after SIGKILL" << std::endl; ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceedsWithValue(child_pid)); } diff --git a/test/syscalls/linux/sendfile_socket.cc b/test/syscalls/linux/sendfile_socket.cc index 8f7ee4163..e94672679 100644 --- a/test/syscalls/linux/sendfile_socket.cc +++ b/test/syscalls/linux/sendfile_socket.cc @@ -149,7 +149,7 @@ TEST_P(SendFileTest, SendMultiple) { for (size_t sent = 0; sent < data.size(); cnt++) { const size_t remain = data.size() - sent; std::cout << "sendfile, size=" << data.size() << ", sent=" << sent - << ", remain=" << remain; + << ", remain=" << remain << std::endl; // Send data and verify that sendfile returns the correct value. int res = sendfile(client.get(), inf.get(), nullptr, remain); diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 2ffc86382..1b34e4ef7 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -2212,7 +2212,7 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, PortReuseTwoSockets) { setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &portreuse2, sizeof(int)), SyscallSucceeds()); - std::cout << portreuse1 << " " << portreuse2; + std::cout << portreuse1 << " " << portreuse2 << std::endl; int ret = bind(fd2, reinterpret_cast(&addr), addrlen); // Verify that two sockets can be bound to the same port only if diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc index e5aed1eec..2efb96bc3 100644 --- a/test/syscalls/linux/socket_netlink_route.cc +++ b/test/syscalls/linux/socket_netlink_route.cc @@ -152,7 +152,7 @@ TEST(NetlinkRouteTest, GetLinkDump) { const struct ifinfomsg* msg = reinterpret_cast(NLMSG_DATA(hdr)); std::cout << "Found interface idx=" << msg->ifi_index - << ", type=" << std::hex << msg->ifi_type; + << ", type=" << std::hex << msg->ifi_type << std::endl; if (msg->ifi_type == ARPHRD_LOOPBACK) { loopbackFound = true; EXPECT_NE(msg->ifi_flags & IFF_LOOPBACK, 0); diff --git a/test/util/capability_util.cc b/test/util/capability_util.cc index 9fee52fbb..a1b994c45 100644 --- a/test/util/capability_util.cc +++ b/test/util/capability_util.cc @@ -63,13 +63,13 @@ PosixErrorOr CanCreateUserNamespace() { // is in a chroot environment (i.e., the caller's root directory does // not match the root directory of the mount namespace in which it // resides)." - std::cerr << "clone(CLONE_NEWUSER) failed with EPERM"; + std::cerr << "clone(CLONE_NEWUSER) failed with EPERM" << std::endl; return false; } else if (errno == EUSERS) { // "(since Linux 3.11) CLONE_NEWUSER was specified in flags, and the call // would cause the limit on the number of nested user namespaces to be // exceeded. See user_namespaces(7)." - std::cerr << "clone(CLONE_NEWUSER) failed with EUSERS"; + std::cerr << "clone(CLONE_NEWUSER) failed with EUSERS" << std::endl; return false; } else { // Unexpected error code; indicate an actual error. -- cgit v1.2.3 From 4032cf06e4fe5f952f145427dec34629939df980 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Fri, 3 Apr 2020 12:28:00 -0700 Subject: Deflake //third_party/gvisor/test/perf:getdents_benchmark_runsc_ptrace * Increase a buffer size for getdents64 * Increase a number of shards PiperOrigin-RevId: 304670004 --- test/perf/BUILD | 1 + test/perf/linux/getdents_benchmark.cc | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/test/perf/BUILD b/test/perf/BUILD index 0a0def6a3..471d8c2ab 100644 --- a/test/perf/BUILD +++ b/test/perf/BUILD @@ -30,6 +30,7 @@ syscall_test( syscall_test( size = "enormous", + shard_count = 10, tags = ["nogotsan"], test = "//test/perf/linux:getdents_benchmark", ) diff --git a/test/perf/linux/getdents_benchmark.cc b/test/perf/linux/getdents_benchmark.cc index afc599ad2..d8e81fa8c 100644 --- a/test/perf/linux/getdents_benchmark.cc +++ b/test/perf/linux/getdents_benchmark.cc @@ -38,7 +38,7 @@ namespace testing { namespace { -constexpr int kBufferSize = 16384; +constexpr int kBufferSize = 65536; PosixErrorOr CreateDirectory(int count, std::vector* files) { -- cgit v1.2.3 From a94309628ebbc2e6c4997890f1b966fa7a16be20 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Fri, 3 Apr 2020 13:39:45 -0700 Subject: Ensure EOF is handled propertly during splice. PiperOrigin-RevId: 304684417 --- pkg/sentry/kernel/pipe/pipe.go | 13 ++++++++++--- test/syscalls/linux/sendfile.cc | 28 ++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go index 725e9db7d..62c8691f1 100644 --- a/pkg/sentry/kernel/pipe/pipe.go +++ b/pkg/sentry/kernel/pipe/pipe.go @@ -255,7 +255,8 @@ func (p *Pipe) write(ctx context.Context, ops writeOps) (int64, error) { // POSIX requires that a write smaller than atomicIOBytes (PIPE_BUF) be // atomic, but requires no atomicity for writes larger than this. wanted := ops.left() - if avail := p.max - p.view.Size(); wanted > avail { + avail := p.max - p.view.Size() + if wanted > avail { if wanted <= p.atomicIOBytes { return 0, syserror.ErrWouldBlock } @@ -268,8 +269,14 @@ func (p *Pipe) write(ctx context.Context, ops writeOps) (int64, error) { return done, err } - if wanted > done { - // Partial write due to full pipe. + if done < avail { + // Non-failure, but short write. + return done, nil + } + if done < wanted { + // Partial write due to full pipe. Note that this could also be + // the short write case above, we would expect a second call + // and the write to return zero bytes in this case. return done, syserror.ErrWouldBlock } diff --git a/test/syscalls/linux/sendfile.cc b/test/syscalls/linux/sendfile.cc index 580ab5193..ebaafe47e 100644 --- a/test/syscalls/linux/sendfile.cc +++ b/test/syscalls/linux/sendfile.cc @@ -530,6 +530,34 @@ TEST(SendFileTest, SendToSpecialFile) { SyscallSucceedsWithValue(kSize & (~7))); } +TEST(SendFileTest, SendFileToPipe) { + // Create temp file. + constexpr char kData[] = ""; + constexpr int kDataSize = sizeof(kData) - 1; + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Create a pipe for sending to a pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + // Expect to read up to the given size. + std::vector buf(kDataSize); + ScopedThread t([&]() { + absl::SleepFor(absl::Milliseconds(100)); + ASSERT_THAT(read(rfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kDataSize)); + }); + + // Send with twice the size of the file, which should hit EOF. + EXPECT_THAT(sendfile(wfd.get(), inf.get(), nullptr, kDataSize * 2), + SyscallSucceedsWithValue(kDataSize)); +} + } // namespace } // namespace testing -- cgit v1.2.3 From 748290236408f2c3e33b5f208352f8fd2ecbfa1e Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Mon, 6 Apr 2020 06:24:20 -0700 Subject: Implement Stringer for Layer Tested: Sample output for printing 3 different Layer structs: &testbench.Ether{SrcAddr:02:42:c4:77:5d:14 DstAddr:02:42:c4:77:5d:0a} &testbench.IPv4{SrcAddr:196.119.93.20 DstAddr:224.0.0.1} &testbench.UDP{SrcPort:0xc00033b260 DstPort:0xc00033b280} Sample output for printing a Layers struct (word-wrapped): [&testbench.Ether{SrcAddr:02:42:c4:77:5d:14 DstAddr:02:42:c4:77:5d:0a} &testbench.IPv4{SrcAddr:196.119.93.20 DstAddr:224.0.0.1} &testbench.UDP{SrcPort:0xc00033b260 DstPort:0xc00033b280} &testbench.Payload{Bytes:[104 101 108 108 111 32 119 111 114 108 100]}] PiperOrigin-RevId: 305014376 --- test/packetimpact/testbench/BUILD | 9 ++++- test/packetimpact/testbench/layers.go | 61 ++++++++++++++++++++++++++---- test/packetimpact/testbench/layers_test.go | 49 ++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 9 deletions(-) create mode 100644 test/packetimpact/testbench/layers_test.go (limited to 'test') diff --git a/test/packetimpact/testbench/BUILD b/test/packetimpact/testbench/BUILD index 4a9d8efa6..199823419 100644 --- a/test/packetimpact/testbench/BUILD +++ b/test/packetimpact/testbench/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package( default_visibility = ["//test/packetimpact:__subpackages__"], @@ -30,3 +30,10 @@ go_library( "@org_golang_x_sys//unix:go_default_library", ], ) + +go_test( + name = "testbench_test", + size = "small", + srcs = ["layers_test.go"], + library = ":testbench", +) diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go index d7434c3d2..4d6625941 100644 --- a/test/packetimpact/testbench/layers.go +++ b/test/packetimpact/testbench/layers.go @@ -17,6 +17,7 @@ package testbench import ( "fmt" "reflect" + "strings" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" @@ -32,6 +33,8 @@ import ( // Layer contains all the fields of the encapsulation. Each field is a pointer // and may be nil. type Layer interface { + fmt.Stringer + // toBytes converts the Layer into bytes. In places where the Layer's field // isn't nil, the value that is pointed to is used. When the field is nil, a // reasonable default for the Layer is used. For example, "64" for IPv4 TTL @@ -43,7 +46,8 @@ type Layer interface { // match checks if the current Layer matches the provided Layer. If either // Layer has a nil in a given field, that field is considered matching. - // Otherwise, the values pointed to by the fields must match. + // Otherwise, the values pointed to by the fields must match. The LayerBase is + // ignored. match(Layer) bool // length in bytes of the current encapsulation @@ -84,18 +88,39 @@ func (lb *LayerBase) setPrev(l Layer) { lb.prevLayer = l } +// equalLayer compares that two Layer structs match while ignoring field in +// which either input has a nil and also ignoring the LayerBase of the inputs. func equalLayer(x, y Layer) bool { + // opt ignores comparison pairs where either of the inputs is a nil. opt := cmp.FilterValues(func(x, y interface{}) bool { - if reflect.ValueOf(x).Kind() == reflect.Ptr && reflect.ValueOf(x).IsNil() { - return true - } - if reflect.ValueOf(y).Kind() == reflect.Ptr && reflect.ValueOf(y).IsNil() { - return true + for _, l := range []interface{}{x, y} { + v := reflect.ValueOf(l) + if (v.Kind() == reflect.Ptr || v.Kind() == reflect.Slice) && v.IsNil() { + return true + } } return false - }, cmp.Ignore()) - return cmp.Equal(x, y, opt, cmpopts.IgnoreUnexported(LayerBase{})) + return cmp.Equal(x, y, opt, cmpopts.IgnoreTypes(LayerBase{})) +} + +func stringLayer(l Layer) string { + v := reflect.ValueOf(l).Elem() + t := v.Type() + var ret []string + for i := 0; i < v.NumField(); i++ { + t := t.Field(i) + if t.Anonymous { + // Ignore the LayerBase in the Layer struct. + continue + } + v := v.Field(i) + if v.IsNil() { + continue + } + ret = append(ret, fmt.Sprintf("%s:%v", t.Name, v)) + } + return fmt.Sprintf("&%s{%s}", t, strings.Join(ret, " ")) } // Ether can construct and match an ethernet encapsulation. @@ -106,6 +131,10 @@ type Ether struct { Type *tcpip.NetworkProtocolNumber } +func (l *Ether) String() string { + return stringLayer(l) +} + func (l *Ether) toBytes() ([]byte, error) { b := make([]byte, header.EthernetMinimumSize) h := header.Ethernet(b) @@ -190,6 +219,10 @@ type IPv4 struct { DstAddr *tcpip.Address } +func (l *IPv4) String() string { + return stringLayer(l) +} + func (l *IPv4) toBytes() ([]byte, error) { b := make([]byte, header.IPv4MinimumSize) h := header.IPv4(b) @@ -339,6 +372,10 @@ type TCP struct { UrgentPointer *uint16 } +func (l *TCP) String() string { + return stringLayer(l) +} + func (l *TCP) toBytes() ([]byte, error) { b := make([]byte, header.TCPMinimumSize) h := header.TCP(b) @@ -480,6 +517,10 @@ type UDP struct { Checksum *uint16 } +func (l *UDP) String() string { + return stringLayer(l) +} + func (l *UDP) toBytes() ([]byte, error) { b := make([]byte, header.UDPMinimumSize) h := header.UDP(b) @@ -556,6 +597,10 @@ type Payload struct { Bytes []byte } +func (l *Payload) String() string { + return stringLayer(l) +} + // ParsePayload parses the bytes assuming that they start with a payload and // continue to the end. There can be no further encapsulations. func ParsePayload(b []byte) (Layers, error) { diff --git a/test/packetimpact/testbench/layers_test.go b/test/packetimpact/testbench/layers_test.go new file mode 100644 index 000000000..b39839625 --- /dev/null +++ b/test/packetimpact/testbench/layers_test.go @@ -0,0 +1,49 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package testbench + +import "testing" + +func TestLayerMatch(t *testing.T) { + var nilPayload *Payload + noPayload := &Payload{} + emptyPayload := &Payload{Bytes: []byte{}} + fullPayload := &Payload{Bytes: []byte{1, 2, 3}} + emptyTCP := &TCP{SrcPort: Uint16(1234), LayerBase: LayerBase{nextLayer: emptyPayload}} + fullTCP := &TCP{SrcPort: Uint16(1234), LayerBase: LayerBase{nextLayer: fullPayload}} + for _, tt := range []struct { + a, b Layer + want bool + }{ + {nilPayload, nilPayload, true}, + {nilPayload, noPayload, true}, + {nilPayload, emptyPayload, true}, + {nilPayload, fullPayload, true}, + {noPayload, noPayload, true}, + {noPayload, emptyPayload, true}, + {noPayload, fullPayload, true}, + {emptyPayload, emptyPayload, true}, + {emptyPayload, fullPayload, false}, + {fullPayload, fullPayload, true}, + {emptyTCP, fullTCP, true}, + } { + if got := tt.a.match(tt.b); got != tt.want { + t.Errorf("%s.match(%s) = %t, want %t", tt.a, tt.b, got, tt.want) + } + if got := tt.b.match(tt.a); got != tt.want { + t.Errorf("%s.match(%s) = %t, want %t", tt.b, tt.a, got, tt.want) + } + } +} -- cgit v1.2.3 From 4baa7e70795edbb350d55a9365807341515d3af4 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Mon, 6 Apr 2020 09:50:13 -0700 Subject: Bump up acceptable sample count for flaky itimer test. Running the test 1000x almost always produces 1+ test failures where the sample count is slightly more than 60. PiperOrigin-RevId: 305051754 --- test/syscalls/linux/itimer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/syscalls/linux/itimer.cc b/test/syscalls/linux/itimer.cc index 8b48f0804..dd981a278 100644 --- a/test/syscalls/linux/itimer.cc +++ b/test/syscalls/linux/itimer.cc @@ -246,7 +246,7 @@ int TestSIGPROFFairness(absl::Duration sleep) { // The number of samples on the main thread should be very low as it did // nothing. - TEST_CHECK(result.main_thread_samples < 60); + TEST_CHECK(result.main_thread_samples < 80); // Both workers should get roughly equal number of samples. TEST_CHECK(result.worker_samples.size() == 2); -- cgit v1.2.3 From 32fc11ee3e39b7ef1152825090112f4b239887c4 Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Mon, 6 Apr 2020 17:52:25 -0700 Subject: Sort posix service functions PiperOrigin-RevId: 305157179 --- test/packetimpact/dut/posix_server.cc | 42 ++-- test/packetimpact/proto/posix_server.proto | 74 +++---- test/packetimpact/testbench/dut.go | 299 +++++++++++++++-------------- 3 files changed, 210 insertions(+), 205 deletions(-) (limited to 'test') diff --git a/test/packetimpact/dut/posix_server.cc b/test/packetimpact/dut/posix_server.cc index 4a71c54c6..b8177f5b1 100644 --- a/test/packetimpact/dut/posix_server.cc +++ b/test/packetimpact/dut/posix_server.cc @@ -61,13 +61,15 @@ } class PosixImpl final : public posix_server::Posix::Service { - ::grpc::Status Socket(grpc_impl::ServerContext *context, - const ::posix_server::SocketRequest *request, - ::posix_server::SocketResponse *response) override { - response->set_fd( - socket(request->domain(), request->type(), request->protocol())); + ::grpc::Status Accept(grpc_impl::ServerContext *context, + const ::posix_server::AcceptRequest *request, + ::posix_server::AcceptResponse *response) override { + sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + response->set_fd(accept(request->sockfd(), + reinterpret_cast(&addr), &addrlen)); response->set_errno_(errno); - return ::grpc::Status::OK; + return sockaddr_to_proto(addr, addrlen, response->mutable_addr()); } ::grpc::Status Bind(grpc_impl::ServerContext *context, @@ -119,6 +121,14 @@ class PosixImpl final : public posix_server::Posix::Service { return ::grpc::Status::OK; } + ::grpc::Status Close(grpc_impl::ServerContext *context, + const ::posix_server::CloseRequest *request, + ::posix_server::CloseResponse *response) override { + response->set_ret(close(request->fd())); + response->set_errno_(errno); + return ::grpc::Status::OK; + } + ::grpc::Status GetSockName( grpc_impl::ServerContext *context, const ::posix_server::GetSockNameRequest *request, @@ -139,17 +149,6 @@ class PosixImpl final : public posix_server::Posix::Service { return ::grpc::Status::OK; } - ::grpc::Status Accept(grpc_impl::ServerContext *context, - const ::posix_server::AcceptRequest *request, - ::posix_server::AcceptResponse *response) override { - sockaddr_storage addr; - socklen_t addrlen = sizeof(addr); - response->set_fd(accept(request->sockfd(), - reinterpret_cast(&addr), &addrlen)); - response->set_errno_(errno); - return sockaddr_to_proto(addr, addrlen, response->mutable_addr()); - } - ::grpc::Status SetSockOpt( grpc_impl::ServerContext *context, const ::posix_server::SetSockOptRequest *request, @@ -174,10 +173,11 @@ class PosixImpl final : public posix_server::Posix::Service { return ::grpc::Status::OK; } - ::grpc::Status Close(grpc_impl::ServerContext *context, - const ::posix_server::CloseRequest *request, - ::posix_server::CloseResponse *response) override { - response->set_ret(close(request->fd())); + ::grpc::Status Socket(grpc_impl::ServerContext *context, + const ::posix_server::SocketRequest *request, + ::posix_server::SocketResponse *response) override { + response->set_fd( + socket(request->domain(), request->type(), request->protocol())); response->set_errno_(errno); return ::grpc::Status::OK; } diff --git a/test/packetimpact/proto/posix_server.proto b/test/packetimpact/proto/posix_server.proto index 53ec49410..1565f31fa 100644 --- a/test/packetimpact/proto/posix_server.proto +++ b/test/packetimpact/proto/posix_server.proto @@ -16,17 +16,6 @@ syntax = "proto3"; package posix_server; -message SocketRequest { - int32 domain = 1; - int32 type = 2; - int32 protocol = 3; -} - -message SocketResponse { - int32 fd = 1; - int32 errno_ = 2; // "errno" may fail to compile in c++. -} - message SockaddrIn { int32 family = 1; uint32 port = 2; @@ -48,6 +37,23 @@ message Sockaddr { } } +message Timeval { + int64 seconds = 1; + int64 microseconds = 2; +} + +// Request and Response pairs for each Posix service RPC call, sorted. + +message AcceptRequest { + int32 sockfd = 1; +} + +message AcceptResponse { + int32 fd = 1; + int32 errno_ = 2; // "errno" may fail to compile in c++. + Sockaddr addr = 3; +} + message BindRequest { int32 sockfd = 1; Sockaddr addr = 2; @@ -58,6 +64,15 @@ message BindResponse { int32 errno_ = 2; // "errno" may fail to compile in c++. } +message CloseRequest { + int32 fd = 1; +} + +message CloseResponse { + int32 ret = 1; + int32 errno_ = 2; // "errno" may fail to compile in c++. +} + message GetSockNameRequest { int32 sockfd = 1; } @@ -78,16 +93,6 @@ message ListenResponse { int32 errno_ = 2; // "errno" may fail to compile in c++. } -message AcceptRequest { - int32 sockfd = 1; -} - -message AcceptResponse { - int32 fd = 1; - int32 errno_ = 2; // "errno" may fail to compile in c++. - Sockaddr addr = 3; -} - message SetSockOptRequest { int32 sockfd = 1; int32 level = 2; @@ -100,11 +105,6 @@ message SetSockOptResponse { int32 errno_ = 2; // "errno" may fail to compile in c++. } -message Timeval { - int64 seconds = 1; - int64 microseconds = 2; -} - message SetSockOptTimevalRequest { int32 sockfd = 1; int32 level = 2; @@ -117,12 +117,14 @@ message SetSockOptTimevalResponse { int32 errno_ = 2; // "errno" may fail to compile in c++. } -message CloseRequest { - int32 fd = 1; +message SocketRequest { + int32 domain = 1; + int32 type = 2; + int32 protocol = 3; } -message CloseResponse { - int32 ret = 1; +message SocketResponse { + int32 fd = 1; int32 errno_ = 2; // "errno" may fail to compile in c++. } @@ -139,16 +141,16 @@ message RecvResponse { } service Posix { - // Call socket() on the DUT. - rpc Socket(SocketRequest) returns (SocketResponse); + // Call accept() on the DUT. + rpc Accept(AcceptRequest) returns (AcceptResponse); // Call bind() on the DUT. rpc Bind(BindRequest) returns (BindResponse); + // Call close() on the DUT. + rpc Close(CloseRequest) returns (CloseResponse); // Call getsockname() on the DUT. rpc GetSockName(GetSockNameRequest) returns (GetSockNameResponse); // Call listen() on the DUT. rpc Listen(ListenRequest) returns (ListenResponse); - // Call accept() on the DUT. - rpc Accept(AcceptRequest) returns (AcceptResponse); // Call setsockopt() on the DUT. You should prefer one of the other // SetSockOpt* functions with a more structured optval or else you may get the // encoding wrong, such as making a bad assumption about the server's word @@ -157,8 +159,8 @@ service Posix { // Call setsockopt() on the DUT with a Timeval optval. rpc SetSockOptTimeval(SetSockOptTimevalRequest) returns (SetSockOptTimevalResponse); - // Call close() on the DUT. - rpc Close(CloseRequest) returns (CloseResponse); + // Call socket() on the DUT. + rpc Socket(SocketRequest) returns (SocketResponse); // Call recv() on the DUT. rpc Recv(RecvRequest) returns (RecvResponse); } diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go index d102dc7bb..f342aee01 100644 --- a/test/packetimpact/testbench/dut.go +++ b/test/packetimpact/testbench/dut.go @@ -65,33 +65,6 @@ func (dut *DUT) TearDown() { dut.conn.Close() } -// SocketWithErrno calls socket on the DUT and returns the fd and errno. -func (dut *DUT) SocketWithErrno(domain, typ, proto int32) (int32, error) { - dut.t.Helper() - req := pb.SocketRequest{ - Domain: domain, - Type: typ, - Protocol: proto, - } - ctx := context.Background() - resp, err := dut.posixServer.Socket(ctx, &req) - if err != nil { - dut.t.Fatalf("failed to call Socket: %s", err) - } - return resp.GetFd(), syscall.Errno(resp.GetErrno_()) -} - -// Socket calls socket on the DUT and returns the file descriptor. If socket -// fails on the DUT, the test ends. -func (dut *DUT) Socket(domain, typ, proto int32) int32 { - dut.t.Helper() - fd, err := dut.SocketWithErrno(domain, typ, proto) - if fd < 0 { - dut.t.Fatalf("failed to create socket: %s", err) - } - return fd -} - func (dut *DUT) sockaddrToProto(sa unix.Sockaddr) *pb.Sockaddr { dut.t.Helper() switch s := sa.(type) { @@ -142,6 +115,88 @@ func (dut *DUT) protoToSockaddr(sa *pb.Sockaddr) unix.Sockaddr { return nil } +// CreateBoundSocket makes a new socket on the DUT, with type typ and protocol +// proto, and bound to the IP address addr. Returns the new file descriptor and +// the port that was selected on the DUT. +func (dut *DUT) CreateBoundSocket(typ, proto int32, addr net.IP) (int32, uint16) { + dut.t.Helper() + var fd int32 + if addr.To4() != nil { + fd = dut.Socket(unix.AF_INET, typ, proto) + sa := unix.SockaddrInet4{} + copy(sa.Addr[:], addr.To4()) + dut.Bind(fd, &sa) + } else if addr.To16() != nil { + fd = dut.Socket(unix.AF_INET6, typ, proto) + sa := unix.SockaddrInet6{} + copy(sa.Addr[:], addr.To16()) + dut.Bind(fd, &sa) + } else { + dut.t.Fatal("unknown ip addr type for remoteIP") + } + sa := dut.GetSockName(fd) + var port int + switch s := sa.(type) { + case *unix.SockaddrInet4: + port = s.Port + case *unix.SockaddrInet6: + port = s.Port + default: + dut.t.Fatalf("unknown sockaddr type from getsockname: %t", sa) + } + return fd, uint16(port) +} + +// CreateListener makes a new TCP connection. If it fails, the test ends. +func (dut *DUT) CreateListener(typ, proto, backlog int32) (int32, uint16) { + fd, remotePort := dut.CreateBoundSocket(typ, proto, net.ParseIP(*remoteIPv4)) + dut.Listen(fd, backlog) + return fd, remotePort +} + +// All the functions that make gRPC calls to the Posix service are below, sorted +// alphabetically. + +// Accept calls accept on the DUT and causes a fatal test failure if it doesn't +// succeed. If more control over the timeout or error handling is needed, use +// AcceptWithErrno. +func (dut *DUT) Accept(sockfd int32) (int32, unix.Sockaddr) { + dut.t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + defer cancel() + fd, sa, err := dut.AcceptWithErrno(ctx, sockfd) + if fd < 0 { + dut.t.Fatalf("failed to accept: %s", err) + } + return fd, sa +} + +// AcceptWithErrno calls accept on the DUT. +func (dut *DUT) AcceptWithErrno(ctx context.Context, sockfd int32) (int32, unix.Sockaddr, error) { + dut.t.Helper() + req := pb.AcceptRequest{ + Sockfd: sockfd, + } + resp, err := dut.posixServer.Accept(ctx, &req) + if err != nil { + dut.t.Fatalf("failed to call Accept: %s", err) + } + return resp.GetFd(), dut.protoToSockaddr(resp.GetAddr()), syscall.Errno(resp.GetErrno_()) +} + +// Bind calls bind on the DUT and causes a fatal test failure if it doesn't +// succeed. If more control over the timeout or error handling is +// needed, use BindWithErrno. +func (dut *DUT) Bind(fd int32, sa unix.Sockaddr) { + dut.t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + defer cancel() + ret, err := dut.BindWithErrno(ctx, fd, sa) + if ret != 0 { + dut.t.Fatalf("failed to bind socket: %s", err) + } +} + // BindWithErrno calls bind on the DUT. func (dut *DUT) BindWithErrno(ctx context.Context, fd int32, sa unix.Sockaddr) (int32, error) { dut.t.Helper() @@ -156,30 +211,30 @@ func (dut *DUT) BindWithErrno(ctx context.Context, fd int32, sa unix.Sockaddr) ( return resp.GetRet(), syscall.Errno(resp.GetErrno_()) } -// Bind calls bind on the DUT and causes a fatal test failure if it doesn't -// succeed. If more control over the timeout or error handling is -// needed, use BindWithErrno. -func (dut *DUT) Bind(fd int32, sa unix.Sockaddr) { +// Close calls close on the DUT and causes a fatal test failure if it doesn't +// succeed. If more control over the timeout or error handling is needed, use +// CloseWithErrno. +func (dut *DUT) Close(fd int32) { dut.t.Helper() ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) defer cancel() - ret, err := dut.BindWithErrno(ctx, fd, sa) + ret, err := dut.CloseWithErrno(ctx, fd) if ret != 0 { - dut.t.Fatalf("failed to bind socket: %s", err) + dut.t.Fatalf("failed to close: %s", err) } } -// GetSockNameWithErrno calls getsockname on the DUT. -func (dut *DUT) GetSockNameWithErrno(ctx context.Context, sockfd int32) (int32, unix.Sockaddr, error) { +// CloseWithErrno calls close on the DUT. +func (dut *DUT) CloseWithErrno(ctx context.Context, fd int32) (int32, error) { dut.t.Helper() - req := pb.GetSockNameRequest{ - Sockfd: sockfd, + req := pb.CloseRequest{ + Fd: fd, } - resp, err := dut.posixServer.GetSockName(ctx, &req) + resp, err := dut.posixServer.Close(ctx, &req) if err != nil { - dut.t.Fatalf("failed to call Bind: %s", err) + dut.t.Fatalf("failed to call Close: %s", err) } - return resp.GetRet(), dut.protoToSockaddr(resp.GetAddr()), syscall.Errno(resp.GetErrno_()) + return resp.GetRet(), syscall.Errno(resp.GetErrno_()) } // GetSockName calls getsockname on the DUT and causes a fatal test failure if @@ -196,18 +251,17 @@ func (dut *DUT) GetSockName(sockfd int32) unix.Sockaddr { return sa } -// ListenWithErrno calls listen on the DUT. -func (dut *DUT) ListenWithErrno(ctx context.Context, sockfd, backlog int32) (int32, error) { +// GetSockNameWithErrno calls getsockname on the DUT. +func (dut *DUT) GetSockNameWithErrno(ctx context.Context, sockfd int32) (int32, unix.Sockaddr, error) { dut.t.Helper() - req := pb.ListenRequest{ - Sockfd: sockfd, - Backlog: backlog, + req := pb.GetSockNameRequest{ + Sockfd: sockfd, } - resp, err := dut.posixServer.Listen(ctx, &req) + resp, err := dut.posixServer.GetSockName(ctx, &req) if err != nil { - dut.t.Fatalf("failed to call Listen: %s", err) + dut.t.Fatalf("failed to call Bind: %s", err) } - return resp.GetRet(), syscall.Errno(resp.GetErrno_()) + return resp.GetRet(), dut.protoToSockaddr(resp.GetAddr()), syscall.Errno(resp.GetErrno_()) } // Listen calls listen on the DUT and causes a fatal test failure if it doesn't @@ -223,31 +277,33 @@ func (dut *DUT) Listen(sockfd, backlog int32) { } } -// AcceptWithErrno calls accept on the DUT. -func (dut *DUT) AcceptWithErrno(ctx context.Context, sockfd int32) (int32, unix.Sockaddr, error) { +// ListenWithErrno calls listen on the DUT. +func (dut *DUT) ListenWithErrno(ctx context.Context, sockfd, backlog int32) (int32, error) { dut.t.Helper() - req := pb.AcceptRequest{ - Sockfd: sockfd, + req := pb.ListenRequest{ + Sockfd: sockfd, + Backlog: backlog, } - resp, err := dut.posixServer.Accept(ctx, &req) + resp, err := dut.posixServer.Listen(ctx, &req) if err != nil { - dut.t.Fatalf("failed to call Accept: %s", err) + dut.t.Fatalf("failed to call Listen: %s", err) } - return resp.GetFd(), dut.protoToSockaddr(resp.GetAddr()), syscall.Errno(resp.GetErrno_()) + return resp.GetRet(), syscall.Errno(resp.GetErrno_()) } -// Accept calls accept on the DUT and causes a fatal test failure if it doesn't -// succeed. If more control over the timeout or error handling is needed, use -// AcceptWithErrno. -func (dut *DUT) Accept(sockfd int32) (int32, unix.Sockaddr) { +// SetSockOpt calls setsockopt on the DUT and causes a fatal test failure if it +// doesn't succeed. If more control over the timeout or error handling is +// needed, use SetSockOptWithErrno. Because endianess and the width of values +// might differ between the testbench and DUT architectures, prefer to use a +// more specific SetSockOptXxx function. +func (dut *DUT) SetSockOpt(sockfd, level, optname int32, optval []byte) { dut.t.Helper() ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) defer cancel() - fd, sa, err := dut.AcceptWithErrno(ctx, sockfd) - if fd < 0 { - dut.t.Fatalf("failed to accept: %s", err) + ret, err := dut.SetSockOptWithErrno(ctx, sockfd, level, optname, optval) + if ret != 0 { + dut.t.Fatalf("failed to SetSockOpt: %s", err) } - return fd, sa } // SetSockOptWithErrno calls setsockopt on the DUT. Because endianess and the @@ -268,18 +324,16 @@ func (dut *DUT) SetSockOptWithErrno(ctx context.Context, sockfd, level, optname return resp.GetRet(), syscall.Errno(resp.GetErrno_()) } -// SetSockOpt calls setsockopt on the DUT and causes a fatal test failure if it -// doesn't succeed. If more control over the timeout or error handling is -// needed, use SetSockOptWithErrno. Because endianess and the width of values -// might differ between the testbench and DUT architectures, prefer to use a -// more specific SetSockOptXxx function. -func (dut *DUT) SetSockOpt(sockfd, level, optname int32, optval []byte) { +// SetSockOptTimeval calls setsockopt on the DUT and causes a fatal test failure +// if it doesn't succeed. If more control over the timeout or error handling is +// needed, use SetSockOptTimevalWithErrno. +func (dut *DUT) SetSockOptTimeval(sockfd, level, optname int32, tv *unix.Timeval) { dut.t.Helper() ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) defer cancel() - ret, err := dut.SetSockOptWithErrno(ctx, sockfd, level, optname, optval) + ret, err := dut.SetSockOptTimevalWithErrno(ctx, sockfd, level, optname, tv) if ret != 0 { - dut.t.Fatalf("failed to SetSockOpt: %s", err) + dut.t.Fatalf("failed to SetSockOptTimeval: %s", err) } } @@ -304,32 +358,31 @@ func (dut *DUT) SetSockOptTimevalWithErrno(ctx context.Context, sockfd, level, o return resp.GetRet(), syscall.Errno(resp.GetErrno_()) } -// SetSockOptTimeval calls setsockopt on the DUT and causes a fatal test failure -// if it doesn't succeed. If more control over the timeout or error handling is -// needed, use SetSockOptTimevalWithErrno. -func (dut *DUT) SetSockOptTimeval(sockfd, level, optname int32, tv *unix.Timeval) { +// Socket calls socket on the DUT and returns the file descriptor. If socket +// fails on the DUT, the test ends. +func (dut *DUT) Socket(domain, typ, proto int32) int32 { dut.t.Helper() - ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) - defer cancel() - ret, err := dut.SetSockOptTimevalWithErrno(ctx, sockfd, level, optname, tv) - if ret != 0 { - dut.t.Fatalf("failed to SetSockOptTimeval: %s", err) + fd, err := dut.SocketWithErrno(domain, typ, proto) + if fd < 0 { + dut.t.Fatalf("failed to create socket: %s", err) } + return fd } -// RecvWithErrno calls recv on the DUT. -func (dut *DUT) RecvWithErrno(ctx context.Context, sockfd, len, flags int32) (int32, []byte, error) { +// SocketWithErrno calls socket on the DUT and returns the fd and errno. +func (dut *DUT) SocketWithErrno(domain, typ, proto int32) (int32, error) { dut.t.Helper() - req := pb.RecvRequest{ - Sockfd: sockfd, - Len: len, - Flags: flags, + req := pb.SocketRequest{ + Domain: domain, + Type: typ, + Protocol: proto, } - resp, err := dut.posixServer.Recv(ctx, &req) + ctx := context.Background() + resp, err := dut.posixServer.Socket(ctx, &req) if err != nil { - dut.t.Fatalf("failed to call Recv: %s", err) + dut.t.Fatalf("failed to call Socket: %s", err) } - return resp.GetRet(), resp.GetBuf(), syscall.Errno(resp.GetErrno_()) + return resp.GetFd(), syscall.Errno(resp.GetErrno_()) } // Recv calls recv on the DUT and causes a fatal test failure if it doesn't @@ -346,67 +399,17 @@ func (dut *DUT) Recv(sockfd, len, flags int32) []byte { return buf } -// CloseWithErrno calls close on the DUT. -func (dut *DUT) CloseWithErrno(ctx context.Context, fd int32) (int32, error) { +// RecvWithErrno calls recv on the DUT. +func (dut *DUT) RecvWithErrno(ctx context.Context, sockfd, len, flags int32) (int32, []byte, error) { dut.t.Helper() - req := pb.CloseRequest{ - Fd: fd, + req := pb.RecvRequest{ + Sockfd: sockfd, + Len: len, + Flags: flags, } - resp, err := dut.posixServer.Close(ctx, &req) + resp, err := dut.posixServer.Recv(ctx, &req) if err != nil { - dut.t.Fatalf("failed to call Close: %s", err) - } - return resp.GetRet(), syscall.Errno(resp.GetErrno_()) -} - -// Close calls close on the DUT and causes a fatal test failure if it doesn't -// succeed. If more control over the timeout or error handling is needed, use -// CloseWithErrno. -func (dut *DUT) Close(fd int32) { - dut.t.Helper() - ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) - defer cancel() - ret, err := dut.CloseWithErrno(ctx, fd) - if ret != 0 { - dut.t.Fatalf("failed to close: %s", err) - } -} - -// CreateBoundSocket makes a new socket on the DUT, with type typ and protocol -// proto, and bound to the IP address addr. Returns the new file descriptor and -// the port that was selected on the DUT. -func (dut *DUT) CreateBoundSocket(typ, proto int32, addr net.IP) (int32, uint16) { - dut.t.Helper() - var fd int32 - if addr.To4() != nil { - fd = dut.Socket(unix.AF_INET, typ, proto) - sa := unix.SockaddrInet4{} - copy(sa.Addr[:], addr.To4()) - dut.Bind(fd, &sa) - } else if addr.To16() != nil { - fd = dut.Socket(unix.AF_INET6, typ, proto) - sa := unix.SockaddrInet6{} - copy(sa.Addr[:], addr.To16()) - dut.Bind(fd, &sa) - } else { - dut.t.Fatal("unknown ip addr type for remoteIP") - } - sa := dut.GetSockName(fd) - var port int - switch s := sa.(type) { - case *unix.SockaddrInet4: - port = s.Port - case *unix.SockaddrInet6: - port = s.Port - default: - dut.t.Fatalf("unknown sockaddr type from getsockname: %t", sa) + dut.t.Fatalf("failed to call Recv: %s", err) } - return fd, uint16(port) -} - -// CreateListener makes a new TCP connection. If it fails, the test ends. -func (dut *DUT) CreateListener(typ, proto, backlog int32) (int32, uint16) { - fd, remotePort := dut.CreateBoundSocket(typ, proto, net.ParseIP(*remoteIPv4)) - dut.Listen(fd, backlog) - return fd, remotePort + return resp.GetRet(), resp.GetBuf(), syscall.Errno(resp.GetErrno_()) } -- cgit v1.2.3 From 94319a8241cb299edc812024d6132b7a3819a4dc Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Tue, 7 Apr 2020 09:40:38 -0700 Subject: Make gofer.dentry.destroyLocked idempotent gofer operations accumulate dentries touched in a slice to call checkCachingLocked on them when the operation is over. In case the same dentry is touched multiple times during the operation, checkCachingLocked, and consequently destroyLocked, may be called more than once for the same dentry. Updates #1198 PiperOrigin-RevId: 305276819 --- pkg/sentry/fsimpl/gofer/BUILD | 12 ++++++- pkg/sentry/fsimpl/gofer/gofer.go | 36 +++++++++++++++++--- pkg/sentry/fsimpl/gofer/gofer_test.go | 64 +++++++++++++++++++++++++++++++++++ test/syscalls/linux/open.cc | 22 ++++++++++++ 4 files changed, 129 insertions(+), 5 deletions(-) create mode 100644 pkg/sentry/fsimpl/gofer/gofer_test.go (limited to 'test') diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD index d15a36709..99d1e3f8f 100644 --- a/pkg/sentry/fsimpl/gofer/BUILD +++ b/pkg/sentry/fsimpl/gofer/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") licenses(["notice"]) @@ -54,3 +54,13 @@ go_library( "//pkg/usermem", ], ) + +go_test( + name = "gofer_test", + srcs = ["gofer_test.go"], + library = ":gofer", + deps = [ + "//pkg/p9", + "//pkg/sentry/contexttest", + ], +) diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index adee8bb60..20edaf643 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -444,7 +444,8 @@ type dentry struct { // refs is the reference count. Each dentry holds a reference on its // parent, even if disowned. refs is accessed using atomic memory - // operations. + // operations. When refs reaches 0, the dentry may be added to the cache or + // destroyed. If refs==-1 the dentry has already been destroyed. refs int64 // fs is the owning filesystem. fs is immutable. @@ -860,7 +861,7 @@ func (d *dentry) IncRef() { func (d *dentry) TryIncRef() bool { for { refs := atomic.LoadInt64(&d.refs) - if refs == 0 { + if refs <= 0 { return false } if atomic.CompareAndSwapInt64(&d.refs, refs, refs+1) { @@ -883,13 +884,20 @@ func (d *dentry) DecRef() { // checkCachingLocked should be called after d's reference count becomes 0 or it // becomes disowned. // +// It may be called on a destroyed dentry. For example, +// renameMu[R]UnlockAndCheckCaching may call checkCachingLocked multiple times +// for the same dentry when the dentry is visited more than once in the same +// operation. One of the calls may destroy the dentry, so subsequent calls will +// do nothing. +// // Preconditions: d.fs.renameMu must be locked for writing. func (d *dentry) checkCachingLocked() { // Dentries with a non-zero reference count must be retained. (The only way // to obtain a reference on a dentry with zero references is via path // resolution, which requires renameMu, so if d.refs is zero then it will // remain zero while we hold renameMu for writing.) - if atomic.LoadInt64(&d.refs) != 0 { + refs := atomic.LoadInt64(&d.refs) + if refs > 0 { if d.cached { d.fs.cachedDentries.Remove(d) d.fs.cachedDentriesLen-- @@ -897,6 +905,10 @@ func (d *dentry) checkCachingLocked() { } return } + if refs == -1 { + // Dentry has already been destroyed. + return + } // Non-child dentries with zero references are no longer reachable by path // resolution and should be dropped immediately. if d.vfsd.Parent() == nil || d.vfsd.IsDisowned() { @@ -949,9 +961,22 @@ func (d *dentry) checkCachingLocked() { } } +// destroyLocked destroys the dentry. It may flushes dirty pages from cache, +// close p9 file and remove reference on parent dentry. +// // Preconditions: d.fs.renameMu must be locked for writing. d.refs == 0. d is // not a child dentry. func (d *dentry) destroyLocked() { + switch atomic.LoadInt64(&d.refs) { + case 0: + // Mark the dentry destroyed. + atomic.StoreInt64(&d.refs, -1) + case -1: + panic("dentry.destroyLocked() called on already destroyed dentry") + default: + panic("dentry.destroyLocked() called with references on the dentry") + } + ctx := context.Background() d.handleMu.Lock() if !d.handle.file.isNil() { @@ -971,7 +996,10 @@ func (d *dentry) destroyLocked() { d.handle.close(ctx) } d.handleMu.Unlock() - d.file.close(ctx) + if !d.file.isNil() { + d.file.close(ctx) + d.file = p9file{} + } // Remove d from the set of all dentries. d.fs.syncMu.Lock() delete(d.fs.dentries, d) diff --git a/pkg/sentry/fsimpl/gofer/gofer_test.go b/pkg/sentry/fsimpl/gofer/gofer_test.go new file mode 100644 index 000000000..82bc239db --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/gofer_test.go @@ -0,0 +1,64 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "sync/atomic" + "testing" + + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/contexttest" +) + +func TestDestroyIdempotent(t *testing.T) { + fs := filesystem{ + dentries: make(map[*dentry]struct{}), + opts: filesystemOptions{ + // Test relies on no dentry being held in the cache. + maxCachedDentries: 0, + }, + } + + ctx := contexttest.Context(t) + attr := &p9.Attr{ + Mode: p9.ModeRegular, + } + mask := p9.AttrMask{ + Mode: true, + Size: true, + } + parent, err := fs.newDentry(ctx, p9file{}, p9.QID{}, mask, attr) + if err != nil { + t.Fatalf("fs.newDentry(): %v", err) + } + + child, err := fs.newDentry(ctx, p9file{}, p9.QID{}, mask, attr) + if err != nil { + t.Fatalf("fs.newDentry(): %v", err) + } + parent.IncRef() // reference held by child on its parent. + parent.vfsd.InsertChild(&child.vfsd, "child") + + child.checkCachingLocked() + if got := atomic.LoadInt64(&child.refs); got != -1 { + t.Fatalf("child.refs=%d, want: -1", got) + } + // Parent will also be destroyed when child reference is removed. + if got := atomic.LoadInt64(&parent.refs); got != -1 { + t.Fatalf("parent.refs=%d, want: -1", got) + } + child.checkCachingLocked() + child.checkCachingLocked() +} diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc index 267ae19f6..640fe6bfc 100644 --- a/test/syscalls/linux/open.cc +++ b/test/syscalls/linux/open.cc @@ -186,6 +186,28 @@ TEST_F(OpenTest, OpenNoFollowStillFollowsLinksInPath) { ASSERT_NO_ERRNO_AND_VALUE(Open(path_via_symlink, O_RDONLY | O_NOFOLLOW)); } +// Test that open(2) can follow symlinks that point back to the same tree. +// Test sets up files as follows: +// root/child/symlink => redirects to ../.. +// root/child/target => regular file +// +// open("root/child/symlink/root/child/file") +TEST_F(OpenTest, SymlinkRecurse) { + auto root = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(GetAbsoluteTestTmpdir())); + auto child = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path())); + auto symlink = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(child.path(), "../..")); + auto target = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(child.path(), "abc", 0644)); + auto path_via_symlink = + JoinPath(symlink.path(), Basename(root.path()), Basename(child.path()), + Basename(target.path())); + const auto contents = + ASSERT_NO_ERRNO_AND_VALUE(GetContents(path_via_symlink)); + ASSERT_EQ(contents, "abc"); +} + TEST_F(OpenTest, Fault) { char* totally_not_null = nullptr; ASSERT_THAT(open(totally_not_null, O_RDONLY), SyscallFailsWithErrno(EFAULT)); -- cgit v1.2.3 From 71770e56629339c9853466e994b78b172bc668a9 Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Tue, 7 Apr 2020 13:27:26 -0700 Subject: mkdir test: Address TODOs and re-enable a test. PiperOrigin-RevId: 305328184 --- test/syscalls/linux/mkdir.cc | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/mkdir.cc b/test/syscalls/linux/mkdir.cc index def4c50a4..4036a9275 100644 --- a/test/syscalls/linux/mkdir.cc +++ b/test/syscalls/linux/mkdir.cc @@ -36,21 +36,12 @@ class MkdirTest : public ::testing::Test { // TearDown unlinks created files. void TearDown() override { - // FIXME(edahlgren): We don't currently implement rmdir. - // We do this unconditionally because there's no harm in trying. - rmdir(dirname_.c_str()); + EXPECT_THAT(rmdir(dirname_.c_str()), SyscallSucceeds()); } std::string dirname_; }; -TEST_F(MkdirTest, DISABLED_CanCreateReadbleDir) { - ASSERT_THAT(mkdir(dirname_.c_str(), 0444), SyscallSucceeds()); - ASSERT_THAT( - open(JoinPath(dirname_, "anything").c_str(), O_RDWR | O_CREAT, 0666), - SyscallFailsWithErrno(EACCES)); -} - TEST_F(MkdirTest, CanCreateWritableDir) { ASSERT_THAT(mkdir(dirname_.c_str(), 0777), SyscallSucceeds()); std::string filename = JoinPath(dirname_, "anything"); @@ -84,10 +75,11 @@ TEST_F(MkdirTest, FailsOnDirWithoutWritePerms) { ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); - auto parent = ASSERT_NO_ERRNO_AND_VALUE( - TempPath::CreateDirWith(GetAbsoluteTestTmpdir(), 0555)); - auto dir = JoinPath(parent.path(), "foo"); - ASSERT_THAT(mkdir(dir.c_str(), 0777), SyscallFailsWithErrno(EACCES)); + ASSERT_THAT(mkdir(dirname_.c_str(), 0555), SyscallSucceeds()); + auto dir = JoinPath(dirname_.c_str(), "foo"); + EXPECT_THAT(mkdir(dir.c_str(), 0777), SyscallFailsWithErrno(EACCES)); + EXPECT_THAT(open(JoinPath(dirname_, "file").c_str(), O_RDWR | O_CREAT, 0666), + SyscallFailsWithErrno(EACCES)); } } // namespace -- cgit v1.2.3 From 47db097773f9c0badb9f7b5866e697a7e7d0da13 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Tue, 7 Apr 2020 14:28:23 -0700 Subject: Internal change. PiperOrigin-RevId: 305341059 --- test/packetimpact/testbench/rawsockets.go | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'test') diff --git a/test/packetimpact/testbench/rawsockets.go b/test/packetimpact/testbench/rawsockets.go index 0c7d0f979..0074484f7 100644 --- a/test/packetimpact/testbench/rawsockets.go +++ b/test/packetimpact/testbench/rawsockets.go @@ -47,6 +47,12 @@ func NewSniffer(t *testing.T) (Sniffer, error) { if err != nil { return Sniffer{}, err } + if err := unix.SetsockoptInt(snifferFd, unix.SOL_SOCKET, unix.SO_RCVBUFFORCE, 1); err != nil { + t.Fatalf("can't set sockopt SO_RCVBUFFORCE to 1: %s", err) + } + if err := unix.SetsockoptInt(snifferFd, unix.SOL_SOCKET, unix.SO_RCVBUF, 1e7); err != nil { + t.Fatalf("can't setsockopt SO_RCVBUF to 10M: %s", err) + } return Sniffer{ t: t, fd: snifferFd, -- cgit v1.2.3 From dbcc59af0b834b6295589a594fe4cc1c360e66f7 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Tue, 7 Apr 2020 17:48:06 -0700 Subject: Test TCP sender behavior against window shrinking RFC 1122 Section 3.7: A sending TCP MUST be robust against window shrinking, which may cause the "useable window" to become negative. PiperOrigin-RevId: 305377072 --- test/packetimpact/dut/posix_server.cc | 20 ++++++++ test/packetimpact/proto/posix_server.proto | 27 +++++++++++ test/packetimpact/testbench/connections.go | 52 +++++++++++++++++--- test/packetimpact/testbench/dut.go | 58 +++++++++++++++++++++++ test/packetimpact/tests/BUILD | 12 +++++ test/packetimpact/tests/tcp_window_shrink_test.go | 58 +++++++++++++++++++++++ 6 files changed, 220 insertions(+), 7 deletions(-) create mode 100644 test/packetimpact/tests/tcp_window_shrink_test.go (limited to 'test') diff --git a/test/packetimpact/dut/posix_server.cc b/test/packetimpact/dut/posix_server.cc index b8177f5b1..86e580c6f 100644 --- a/test/packetimpact/dut/posix_server.cc +++ b/test/packetimpact/dut/posix_server.cc @@ -149,6 +149,15 @@ class PosixImpl final : public posix_server::Posix::Service { return ::grpc::Status::OK; } + ::grpc::Status Send(::grpc::ServerContext *context, + const ::posix_server::SendRequest *request, + ::posix_server::SendResponse *response) override { + response->set_ret(::send(request->sockfd(), request->buf().data(), + request->buf().size(), request->flags())); + response->set_errno_(errno); + return ::grpc::Status::OK; + } + ::grpc::Status SetSockOpt( grpc_impl::ServerContext *context, const ::posix_server::SetSockOptRequest *request, @@ -160,6 +169,17 @@ class PosixImpl final : public posix_server::Posix::Service { return ::grpc::Status::OK; } + ::grpc::Status SetSockOptInt( + ::grpc::ServerContext *context, + const ::posix_server::SetSockOptIntRequest *request, + ::posix_server::SetSockOptIntResponse *response) override { + int opt = request->intval(); + response->set_ret(::setsockopt(request->sockfd(), request->level(), + request->optname(), &opt, sizeof(opt))); + response->set_errno_(errno); + return ::grpc::Status::OK; + } + ::grpc::Status SetSockOptTimeval( ::grpc::ServerContext *context, const ::posix_server::SetSockOptTimevalRequest *request, diff --git a/test/packetimpact/proto/posix_server.proto b/test/packetimpact/proto/posix_server.proto index 1565f31fa..4035e1ee6 100644 --- a/test/packetimpact/proto/posix_server.proto +++ b/test/packetimpact/proto/posix_server.proto @@ -93,6 +93,17 @@ message ListenResponse { int32 errno_ = 2; // "errno" may fail to compile in c++. } +message SendRequest { + int32 sockfd = 1; + bytes buf = 2; + int32 flags = 3; +} + +message SendResponse { + int32 ret = 1; + int32 errno_ = 2; +} + message SetSockOptRequest { int32 sockfd = 1; int32 level = 2; @@ -105,6 +116,18 @@ message SetSockOptResponse { int32 errno_ = 2; // "errno" may fail to compile in c++. } +message SetSockOptIntRequest { + int32 sockfd = 1; + int32 level = 2; + int32 optname = 3; + int32 intval = 4; +} + +message SetSockOptIntResponse { + int32 ret = 1; + int32 errno_ = 2; +} + message SetSockOptTimevalRequest { int32 sockfd = 1; int32 level = 2; @@ -151,11 +174,15 @@ service Posix { rpc GetSockName(GetSockNameRequest) returns (GetSockNameResponse); // Call listen() on the DUT. rpc Listen(ListenRequest) returns (ListenResponse); + // Call send() on the DUT. + rpc Send(SendRequest) returns (SendResponse); // Call setsockopt() on the DUT. You should prefer one of the other // SetSockOpt* functions with a more structured optval or else you may get the // encoding wrong, such as making a bad assumption about the server's word // sizes or endianness. rpc SetSockOpt(SetSockOptRequest) returns (SetSockOptResponse); + // Call setsockopt() on the DUT with an int optval. + rpc SetSockOptInt(SetSockOptIntRequest) returns (SetSockOptIntResponse); // Call setsockopt() on the DUT with a Timeval optval. rpc SetSockOptTimeval(SetSockOptTimevalRequest) returns (SetSockOptTimevalResponse); diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index 8d1f562ee..579da59c3 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -187,9 +187,19 @@ func (conn *TCPIPv4) Send(tcp TCP, additionalLayers ...Layer) { conn.SendFrame(conn.CreateFrame(tcp, additionalLayers...)) } -// Recv gets a packet from the sniffer within the timeout provided. If no packet -// arrives before the timeout, it returns nil. +// Recv gets a packet from the sniffer within the timeout provided. +// If no packet arrives before the timeout, it returns nil. func (conn *TCPIPv4) Recv(timeout time.Duration) *TCP { + layers := conn.RecvFrame(timeout) + if tcpLayerIndex < len(layers) { + return layers[tcpLayerIndex].(*TCP) + } + return nil +} + +// RecvFrame gets a frame (of type Layers) within the timeout provided. +// If no frame arrives before the timeout, it returns nil. +func (conn *TCPIPv4) RecvFrame(timeout time.Duration) Layers { deadline := time.Now().Add(timeout) for { timeout = time.Until(deadline) @@ -216,14 +226,16 @@ func (conn *TCPIPv4) Recv(timeout time.Duration) *TCP { for i := tcpLayerIndex + 1; i < len(layers); i++ { conn.RemoteSeqNum.UpdateForward(seqnum.Size(layers[i].length())) } - return tcpHeader + return layers } return nil } // Expect a packet that matches the provided tcp within the timeout specified. -// If it doesn't arrive in time, the test fails. +// If it doesn't arrive in time, it returns nil. func (conn *TCPIPv4) Expect(tcp TCP, timeout time.Duration) *TCP { + // We cannot implement this directly using ExpectFrame as we cannot specify + // the Payload part. deadline := time.Now().Add(timeout) for { timeout = time.Until(deadline) @@ -231,15 +243,41 @@ func (conn *TCPIPv4) Expect(tcp TCP, timeout time.Duration) *TCP { return nil } gotTCP := conn.Recv(timeout) - if gotTCP == nil { - return nil - } if tcp.match(gotTCP) { return gotTCP } } } +// ExpectFrame expects a frame that matches the specified layers within the +// timeout specified. If it doesn't arrive in time, it returns nil. +func (conn *TCPIPv4) ExpectFrame(layers Layers, timeout time.Duration) Layers { + deadline := time.Now().Add(timeout) + for { + timeout = time.Until(deadline) + if timeout <= 0 { + return nil + } + gotLayers := conn.RecvFrame(timeout) + if layers.match(gotLayers) { + return gotLayers + } + } +} + +// ExpectData is a convenient method that expects a TCP packet along with +// the payload to arrive within the timeout specified. If it doesn't arrive +// in time, it causes a fatal test failure. +func (conn *TCPIPv4) ExpectData(tcp TCP, data []byte, timeout time.Duration) { + expected := []Layer{&Ether{}, &IPv4{}, &tcp} + if len(data) > 0 { + expected = append(expected, &Payload{Bytes: data}) + } + if conn.ExpectFrame(expected, timeout) == nil { + conn.t.Fatalf("expected to get a TCP frame %s with payload %x", &tcp, data) + } +} + // Handshake performs a TCP 3-way handshake. func (conn *TCPIPv4) Handshake() { // Send the SYN. diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go index f342aee01..9335909c0 100644 --- a/test/packetimpact/testbench/dut.go +++ b/test/packetimpact/testbench/dut.go @@ -291,6 +291,35 @@ func (dut *DUT) ListenWithErrno(ctx context.Context, sockfd, backlog int32) (int return resp.GetRet(), syscall.Errno(resp.GetErrno_()) } +// Send calls send on the DUT and causes a fatal test failure if it doesn't +// succeed. If more control over the timeout or error handling is needed, use +// SendWithErrno. +func (dut *DUT) Send(sockfd int32, buf []byte, flags int32) int32 { + dut.t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + defer cancel() + ret, err := dut.SendWithErrno(ctx, sockfd, buf, flags) + if ret == -1 { + dut.t.Fatalf("failed to send: %s", err) + } + return ret +} + +// SendWithErrno calls send on the DUT. +func (dut *DUT) SendWithErrno(ctx context.Context, sockfd int32, buf []byte, flags int32) (int32, error) { + dut.t.Helper() + req := pb.SendRequest{ + Sockfd: sockfd, + Buf: buf, + Flags: flags, + } + resp, err := dut.posixServer.Send(ctx, &req) + if err != nil { + dut.t.Fatalf("failed to call Send: %s", err) + } + return resp.GetRet(), syscall.Errno(resp.GetErrno_()) +} + // SetSockOpt calls setsockopt on the DUT and causes a fatal test failure if it // doesn't succeed. If more control over the timeout or error handling is // needed, use SetSockOptWithErrno. Because endianess and the width of values @@ -324,6 +353,35 @@ func (dut *DUT) SetSockOptWithErrno(ctx context.Context, sockfd, level, optname return resp.GetRet(), syscall.Errno(resp.GetErrno_()) } +// SetSockOptInt calls setsockopt on the DUT and causes a fatal test failure +// if it doesn't succeed. If more control over the int optval or error handling +// is needed, use SetSockOptIntWithErrno. +func (dut *DUT) SetSockOptInt(sockfd, level, optname, optval int32) { + dut.t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + defer cancel() + ret, err := dut.SetSockOptIntWithErrno(ctx, sockfd, level, optname, optval) + if ret != 0 { + dut.t.Fatalf("failed to SetSockOptInt: %s", err) + } +} + +// SetSockOptIntWithErrno calls setsockopt with an integer optval. +func (dut *DUT) SetSockOptIntWithErrno(ctx context.Context, sockfd, level, optname, optval int32) (int32, error) { + dut.t.Helper() + req := pb.SetSockOptIntRequest{ + Sockfd: sockfd, + Level: level, + Optname: optname, + Intval: optval, + } + resp, err := dut.posixServer.SetSockOptInt(ctx, &req) + if err != nil { + dut.t.Fatalf("failed to call SetSockOptInt: %s", err) + } + return resp.GetRet(), syscall.Errno(resp.GetErrno_()) +} + // SetSockOptTimeval calls setsockopt on the DUT and causes a fatal test failure // if it doesn't succeed. If more control over the timeout or error handling is // needed, use SetSockOptTimevalWithErrno. diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 9a4d66ea9..a9b2de9b9 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -28,6 +28,18 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "tcp_window_shrink", + srcs = ["tcp_window_shrink_test.go"], + # TODO(b/153202472): Fix netstack then remove the line below. + netstack = False, + deps = [ + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + sh_binary( name = "test_runner", srcs = ["test_runner.sh"], diff --git a/test/packetimpact/tests/tcp_window_shrink_test.go b/test/packetimpact/tests/tcp_window_shrink_test.go new file mode 100644 index 000000000..b48cc6491 --- /dev/null +++ b/test/packetimpact/tests/tcp_window_shrink_test.go @@ -0,0 +1,58 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_window_shrink_test + +import ( + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func TestWindowShrink(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(listenFd) + conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + defer conn.Close() + + conn.Handshake() + acceptFd, _ := dut.Accept(listenFd) + defer dut.Close(acceptFd) + + dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) + + sampleData := []byte("Sample Data") + + dut.Send(acceptFd, sampleData, 0) + conn.ExpectData(tb.TCP{}, sampleData, time.Second) + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) + + dut.Send(acceptFd, sampleData, 0) + dut.Send(acceptFd, sampleData, 0) + conn.ExpectData(tb.TCP{}, sampleData, time.Second) + conn.ExpectData(tb.TCP{}, sampleData, time.Second) + // We close our receiving window here + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), WindowSize: tb.Uint16(0)}) + + dut.Send(acceptFd, []byte("Sample Data"), 0) + // Note: There is another kind of zero-window probing which Windows uses (by sending one + // new byte at `RemoteSeqNum`), if netstack wants to go that way, we may want to change + // the following lines. + conn.ExpectData(tb.TCP{SeqNum: tb.Uint32(uint32(conn.RemoteSeqNum - 1))}, nil, time.Second) +} -- cgit v1.2.3 From c7d841ac6e0be2aaacd6a3a81786508be797f667 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 8 Apr 2020 00:25:16 -0700 Subject: tests: Specify NoRandomSave for PortReuse tests SO_REUSEPORT is not properly restored: https://github.com/google/gvisor/issues/873 PiperOrigin-RevId: 305422775 --- test/syscalls/linux/socket_inet_loopback.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 1b34e4ef7..030c3b835 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -1157,7 +1157,7 @@ TEST_P(SocketInetReusePortTest, TcpPortReuseMultiThread_NoRandomSave) { EquivalentWithin((kConnectAttempts / kThreadCount), 0.10)); } -TEST_P(SocketInetReusePortTest, UdpPortReuseMultiThread) { +TEST_P(SocketInetReusePortTest, UdpPortReuseMultiThread_NoRandomSave) { auto const& param = GetParam(); TestAddress const& listener = param.listener; @@ -1270,7 +1270,7 @@ TEST_P(SocketInetReusePortTest, UdpPortReuseMultiThread) { EquivalentWithin((kConnectAttempts / kThreadCount), 0.10)); } -TEST_P(SocketInetReusePortTest, UdpPortReuseMultiThreadShort) { +TEST_P(SocketInetReusePortTest, UdpPortReuseMultiThreadShort_NoRandomSave) { auto const& param = GetParam(); TestAddress const& listener = param.listener; @@ -2146,8 +2146,9 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V4EphemeralPortReservedReuseAddr) { &kSockOptOn, sizeof(kSockOptOn)), SyscallSucceeds()); - ASSERT_THAT(connect(connected_fd.get(), - reinterpret_cast(&bound_addr), bound_addr_len), + ASSERT_THAT(RetryEINTR(connect)(connected_fd.get(), + reinterpret_cast(&bound_addr), + bound_addr_len), SyscallSucceeds()); // Get the ephemeral port. -- cgit v1.2.3 From a86ffefd3f52dede3ffd6ae3c20d67734ecc2616 Mon Sep 17 00:00:00 2001 From: Bin Lu Date: Wed, 8 Apr 2020 04:06:14 -0400 Subject: Enable exec_binary syscall test on Arm64 Signed-off-by: Bin Lu --- test/syscalls/linux/BUILD | 5 +- test/syscalls/linux/exec_binary.cc | 164 ++++++++++++++++++++++++++++++++----- 2 files changed, 143 insertions(+), 26 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index d0c431234..9447b06a8 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -663,10 +663,7 @@ cc_binary( cc_binary( name = "exec_binary_test", testonly = 1, - srcs = select_arch( - amd64 = ["exec_binary.cc"], - arm64 = [], - ), + srcs = ["exec_binary.cc"], linkstatic = 1, deps = [ "//test/util:cleanup", diff --git a/test/syscalls/linux/exec_binary.cc b/test/syscalls/linux/exec_binary.cc index 736452b0c..ae2683256 100644 --- a/test/syscalls/linux/exec_binary.cc +++ b/test/syscalls/linux/exec_binary.cc @@ -48,10 +48,17 @@ namespace { using ::testing::AnyOf; using ::testing::Eq; -#ifndef __x86_64__ +#if !defined(__x86_64__) && !defined(__aarch64__) // The assembly stub and ELF internal details must be ported to other arches. -#error "Test only supported on x86-64" -#endif // __x86_64__ +#error "Test only supported on x86-64/arm64" +#endif // __x86_64__ || __aarch64__ + +#if defined(__x86_64__) +#define EM_TYPE EM_X86_64 +#define IP_REG(p) ((p).rip) +#define RAX_REG(p) ((p).rax) +#define RDI_REG(p) ((p).rdi) +#define RETURN_REG(p) ((p).rax) // amd64 stub that calls PTRACE_TRACEME and sends itself SIGSTOP. const char kPtraceCode[] = { @@ -139,6 +146,76 @@ const char kPtraceCode[] = { // Size of a syscall instruction. constexpr int kSyscallSize = 2; +#elif defined(__aarch64__) +#define EM_TYPE EM_AARCH64 +#define IP_REG(p) ((p).pc) +#define RAX_REG(p) ((p).regs[8]) +#define RDI_REG(p) ((p).regs[0]) +#define RETURN_REG(p) ((p).regs[0]) + +const char kPtraceCode[] = { + // MOVD $117, R8 /* ptrace */ + '\xa8', + '\x0e', + '\x80', + '\xd2', + // MOVD $0, R0 /* PTRACE_TRACEME */ + '\x00', + '\x00', + '\x80', + '\xd2', + // MOVD $0, R1 /* pid */ + '\x01', + '\x00', + '\x80', + '\xd2', + // MOVD $0, R2 /* addr */ + '\x02', + '\x00', + '\x80', + '\xd2', + // MOVD $0, R3 /* data */ + '\x03', + '\x00', + '\x80', + '\xd2', + // SVC + '\x01', + '\x00', + '\x00', + '\xd4', + // MOVD $172, R8 /* getpid */ + '\x88', + '\x15', + '\x80', + '\xd2', + // SVC + '\x01', + '\x00', + '\x00', + '\xd4', + // MOVD $129, R8 /* kill, R0=pid */ + '\x28', + '\x10', + '\x80', + '\xd2', + // MOVD $19, R1 /* SIGSTOP */ + '\x61', + '\x02', + '\x80', + '\xd2', + // SVC + '\x01', + '\x00', + '\x00', + '\xd4', +}; +// Size of a syscall instruction. +constexpr int kSyscallSize = 4; +#else +#error "Unknown architecture" +#endif + // This test suite tests executable loading in the kernel (ELF and interpreter // scripts). @@ -281,7 +358,7 @@ ElfBinary<64> StandardElf() { elf.header.e_ident[EI_DATA] = ELFDATA2LSB; elf.header.e_ident[EI_VERSION] = EV_CURRENT; elf.header.e_type = ET_EXEC; - elf.header.e_machine = EM_X86_64; + elf.header.e_machine = EM_TYPE; elf.header.e_version = EV_CURRENT; elf.header.e_phoff = sizeof(elf.header); elf.header.e_phentsize = sizeof(decltype(elf)::ElfPhdr); @@ -327,9 +404,15 @@ TEST(ElfTest, Execute) { ASSERT_NO_ERRNO(WaitStopped(child)); struct user_regs_struct regs; - ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, ®s), SyscallSucceeds()); - // RIP is just beyond the final syscall instruction. - EXPECT_EQ(regs.rip, elf.header.e_entry + sizeof(kPtraceCode)); + struct iovec iov; + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + EXPECT_THAT(ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov), + SyscallSucceeds()); + // Read exactly the full register set. + EXPECT_EQ(iov.iov_len, sizeof(regs)); + // RIP/PC is just beyond the final syscall instruction. + EXPECT_EQ(IP_REG(regs), elf.header.e_entry + sizeof(kPtraceCode)); EXPECT_THAT(child, ContainsMappings(std::vector({ {0x40000, 0x41000, true, false, true, true, 0, 0, 0, 0, @@ -718,9 +801,16 @@ TEST(ElfTest, PIE) { // RIP tells us which page the first segment was loaded into. struct user_regs_struct regs; - ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, ®s), SyscallSucceeds()); + struct iovec iov; + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); - const uint64_t load_addr = regs.rip & ~(kPageSize - 1); + EXPECT_THAT(ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov), + SyscallSucceeds()); + // Read exactly the full register set. + EXPECT_EQ(iov.iov_len, sizeof(regs)); + + const uint64_t load_addr = IP_REG(regs) & ~(kPageSize - 1); EXPECT_THAT(child, ContainsMappings(std::vector({ // text page. @@ -787,9 +877,15 @@ TEST(ElfTest, PIENonZeroStart) { // RIP tells us which page the first segment was loaded into. struct user_regs_struct regs; - ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, ®s), SyscallSucceeds()); + struct iovec iov; + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + EXPECT_THAT(ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov), + SyscallSucceeds()); + // Read exactly the full register set. + EXPECT_EQ(iov.iov_len, sizeof(regs)); - const uint64_t load_addr = regs.rip & ~(kPageSize - 1); + const uint64_t load_addr = IP_REG(regs) & ~(kPageSize - 1); // The ELF is loaded at an arbitrary address, not the first PT_LOAD vaddr. // @@ -910,9 +1006,15 @@ TEST(ElfTest, ELFInterpreter) { // RIP tells us which page the first segment of the interpreter was loaded // into. struct user_regs_struct regs; - ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, ®s), SyscallSucceeds()); + struct iovec iov; + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + EXPECT_THAT(ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov), + SyscallSucceeds()); + // Read exactly the full register set. + EXPECT_EQ(iov.iov_len, sizeof(regs)); - const uint64_t interp_load_addr = regs.rip & ~(kPageSize - 1); + const uint64_t interp_load_addr = IP_REG(regs) & ~(kPageSize - 1); EXPECT_THAT( child, ContainsMappings(std::vector({ @@ -1084,9 +1186,15 @@ TEST(ElfTest, ELFInterpreterRelative) { // RIP tells us which page the first segment of the interpreter was loaded // into. struct user_regs_struct regs; - ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, ®s), SyscallSucceeds()); + struct iovec iov; + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + EXPECT_THAT(ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov), + SyscallSucceeds()); + // Read exactly the full register set. + EXPECT_EQ(iov.iov_len, sizeof(regs)); - const uint64_t interp_load_addr = regs.rip & ~(kPageSize - 1); + const uint64_t interp_load_addr = IP_REG(regs) & ~(kPageSize - 1); EXPECT_THAT( child, ContainsMappings(std::vector({ @@ -1480,14 +1588,21 @@ TEST(ExecveTest, BrkAfterBinary) { ASSERT_NO_ERRNO(WaitStopped(child)); struct user_regs_struct regs; - ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, ®s), SyscallSucceeds()); + struct iovec iov; + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + EXPECT_THAT(ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov), + SyscallSucceeds()); + // Read exactly the full register set. + EXPECT_EQ(iov.iov_len, sizeof(regs)); // RIP is just beyond the final syscall instruction. Rewind to execute a brk // syscall. - regs.rip -= kSyscallSize; - regs.rax = __NR_brk; - regs.rdi = 0; - ASSERT_THAT(ptrace(PTRACE_SETREGS, child, 0, ®s), SyscallSucceeds()); + IP_REG(regs) -= kSyscallSize; + RAX_REG(regs) = __NR_brk; + RDI_REG(regs) = 0; + ASSERT_THAT(ptrace(PTRACE_SETREGSET, child, NT_PRSTATUS, &iov), + SyscallSucceeds()); // Resume the child, waiting for syscall entry. ASSERT_THAT(ptrace(PTRACE_SYSCALL, child, 0, 0), SyscallSucceeds()); @@ -1504,7 +1619,12 @@ TEST(ExecveTest, BrkAfterBinary) { ASSERT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) << "status = " << status; - ASSERT_THAT(ptrace(PTRACE_GETREGS, child, 0, ®s), SyscallSucceeds()); + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + EXPECT_THAT(ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov), + SyscallSucceeds()); + //Read exactly the full register set. + EXPECT_EQ(iov.iov_len, sizeof(regs)); // brk is after the text page. // @@ -1512,7 +1632,7 @@ TEST(ExecveTest, BrkAfterBinary) { // address will be, but it is always beyond the final page in the binary. // i.e., it does not start immediately after memsz in the middle of a page. // Userspace may expect to use that space. - EXPECT_GE(regs.rax, 0x41000); + EXPECT_GE(RETURN_REG(regs), 0x41000); } } // namespace -- cgit v1.2.3 From 71c7e24e5cb8641f4cb98b5fc848ae2033b29eac Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Wed, 8 Apr 2020 06:41:52 -0700 Subject: Return all packets when Expect fails. PiperOrigin-RevId: 305466309 --- test/packetimpact/testbench/connections.go | 39 +++++++++++++---------- test/packetimpact/tests/fin_wait2_timeout_test.go | 12 +++---- 2 files changed, 29 insertions(+), 22 deletions(-) (limited to 'test') diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index 579da59c3..ed8689fd3 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -21,6 +21,7 @@ import ( "fmt" "math/rand" "net" + "strings" "testing" "time" @@ -233,19 +234,23 @@ func (conn *TCPIPv4) RecvFrame(timeout time.Duration) Layers { // Expect a packet that matches the provided tcp within the timeout specified. // If it doesn't arrive in time, it returns nil. -func (conn *TCPIPv4) Expect(tcp TCP, timeout time.Duration) *TCP { +func (conn *TCPIPv4) Expect(tcp TCP, timeout time.Duration) (*TCP, error) { // We cannot implement this directly using ExpectFrame as we cannot specify // the Payload part. deadline := time.Now().Add(timeout) + var allTCP []string for { - timeout = time.Until(deadline) - if timeout <= 0 { - return nil + var gotTCP *TCP + if timeout = time.Until(deadline); timeout > 0 { + gotTCP = conn.Recv(timeout) + } + if gotTCP == nil { + return nil, fmt.Errorf("got %d packets:\n%s", len(allTCP), strings.Join(allTCP, "\n")) } - gotTCP := conn.Recv(timeout) if tcp.match(gotTCP) { - return gotTCP + return gotTCP, nil } + allTCP = append(allTCP, gotTCP.String()) } } @@ -284,10 +289,11 @@ func (conn *TCPIPv4) Handshake() { conn.Send(TCP{Flags: Uint8(header.TCPFlagSyn)}) // Wait for the SYN-ACK. - conn.SynAck = conn.Expect(TCP{Flags: Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second) - if conn.SynAck == nil { - conn.t.Fatalf("didn't get synack during handshake") + synAck, err := conn.Expect(TCP{Flags: Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second) + if synAck == nil { + conn.t.Fatalf("didn't get synack during handshake: %s", err) } + conn.SynAck = synAck // Send an ACK. conn.Send(TCP{Flags: Uint8(header.TCPFlagAck)}) @@ -427,19 +433,20 @@ func (conn *UDPIPv4) Recv(timeout time.Duration) *UDP { // Expect a packet that matches the provided udp within the timeout specified. // If it doesn't arrive in time, the test fails. -func (conn *UDPIPv4) Expect(udp UDP, timeout time.Duration) *UDP { +func (conn *UDPIPv4) Expect(udp UDP, timeout time.Duration) (*UDP, error) { deadline := time.Now().Add(timeout) + var allUDP []string for { - timeout = time.Until(deadline) - if timeout <= 0 { - return nil + var gotUDP *UDP + if timeout = time.Until(deadline); timeout > 0 { + gotUDP = conn.Recv(timeout) } - gotUDP := conn.Recv(timeout) if gotUDP == nil { - return nil + return nil, fmt.Errorf("got %d packets:\n%s", len(allUDP), strings.Join(allUDP, "\n")) } if udp.match(gotUDP) { - return gotUDP + return gotUDP, nil } + allUDP = append(allUDP, gotUDP.String()) } } diff --git a/test/packetimpact/tests/fin_wait2_timeout_test.go b/test/packetimpact/tests/fin_wait2_timeout_test.go index 2b3f39045..90e16ef65 100644 --- a/test/packetimpact/tests/fin_wait2_timeout_test.go +++ b/test/packetimpact/tests/fin_wait2_timeout_test.go @@ -47,20 +47,20 @@ func TestFinWait2Timeout(t *testing.T) { } dut.Close(acceptFd) - if gotOne := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); gotOne == nil { - t.Fatal("expected a FIN-ACK within 1 second but got none") + if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil { + t.Fatalf("expected a FIN-ACK within 1 second but got none: %s", err) } conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) time.Sleep(5 * time.Second) conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) if tt.linger2 { - if gotOne := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, time.Second); gotOne == nil { - t.Fatal("expected a RST packet within a second but got none") + if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, time.Second); err != nil { + t.Fatalf("expected a RST packet within a second but got none: %s", err) } } else { - if gotOne := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, 10*time.Second); gotOne != nil { - t.Fatal("expected no RST packets within ten seconds but got one") + if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, 10*time.Second); err == nil { + t.Fatalf("expected no RST packets within ten seconds but got one: %s", err) } } }) -- cgit v1.2.3 From 928a7c60b8f02811e9c0fcbed0077efd55471cc4 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Wed, 25 Mar 2020 17:15:05 -0700 Subject: Fix all printf formatting errors. Updates #2243 --- pkg/eventchannel/event_test.go | 4 ++-- pkg/segment/test/segment_test.go | 2 +- pkg/sentry/fs/fdpipe/pipe_test.go | 4 ++-- pkg/sentry/fsimpl/tmpfs/stat_test.go | 8 ++++---- pkg/tcpip/header/eth_test.go | 2 +- pkg/tcpip/header/ndp_test.go | 2 +- pkg/tcpip/link/fdbased/endpoint.go | 2 +- pkg/tcpip/stack/ndp_test.go | 12 ++++++------ pkg/tcpip/stack/stack_test.go | 32 ++++++++++++++++---------------- pkg/tcpip/tcpip_test.go | 2 +- pkg/tcpip/transport/tcp/tcp_test.go | 4 ++-- pkg/tcpip/transport/udp/udp_test.go | 2 +- runsc/container/test_app/test_app.go | 2 +- test/root/cgroup_test.go | 4 ++-- test/runtimes/blacklist_test.go | 2 +- test/runtimes/runner.go | 2 +- tools/nogo.json | 31 ------------------------------- 17 files changed, 43 insertions(+), 74 deletions(-) (limited to 'test') diff --git a/pkg/eventchannel/event_test.go b/pkg/eventchannel/event_test.go index 7f41b4a27..43750360b 100644 --- a/pkg/eventchannel/event_test.go +++ b/pkg/eventchannel/event_test.go @@ -78,7 +78,7 @@ func TestMultiEmitter(t *testing.T) { for _, name := range names { m := testMessage{name: name} if _, err := me.Emit(m); err != nil { - t.Fatal("me.Emit(%v) failed: %v", m, err) + t.Fatalf("me.Emit(%v) failed: %v", m, err) } } @@ -96,7 +96,7 @@ func TestMultiEmitter(t *testing.T) { // Close multiEmitter. if err := me.Close(); err != nil { - t.Fatal("me.Close() failed: %v", err) + t.Fatalf("me.Close() failed: %v", err) } // All testEmitters should be closed. diff --git a/pkg/segment/test/segment_test.go b/pkg/segment/test/segment_test.go index f19a005f3..97b16c158 100644 --- a/pkg/segment/test/segment_test.go +++ b/pkg/segment/test/segment_test.go @@ -63,7 +63,7 @@ func checkSet(s *Set, expectedSegments int) error { return fmt.Errorf("incorrect order: key %d (segment %d) >= key %d (segment %d)", prev, nrSegments-1, next, nrSegments) } if got, want := seg.Value(), seg.Start()+valueOffset; got != want { - return fmt.Errorf("segment %d has key %d, value %d (expected %d)", nrSegments, seg.Start, got, want) + return fmt.Errorf("segment %d has key %d, value %d (expected %d)", nrSegments, seg.Start(), got, want) } prev = next havePrev = true diff --git a/pkg/sentry/fs/fdpipe/pipe_test.go b/pkg/sentry/fs/fdpipe/pipe_test.go index 5aff0cc95..a0082ecca 100644 --- a/pkg/sentry/fs/fdpipe/pipe_test.go +++ b/pkg/sentry/fs/fdpipe/pipe_test.go @@ -119,7 +119,7 @@ func TestNewPipe(t *testing.T) { continue } if flags := p.flags; test.flags != flags { - t.Errorf("%s: got file flags %s, want %s", test.desc, flags, test.flags) + t.Errorf("%s: got file flags %v, want %v", test.desc, flags, test.flags) continue } if len(test.readAheadBuffer) != len(p.readAheadBuffer) { @@ -136,7 +136,7 @@ func TestNewPipe(t *testing.T) { continue } if !fdnotifier.HasFD(int32(f.FD())) { - t.Errorf("%s: pipe fd %d is not registered for events", test.desc, f.FD) + t.Errorf("%s: pipe fd %d is not registered for events", test.desc, f.FD()) } } } diff --git a/pkg/sentry/fsimpl/tmpfs/stat_test.go b/pkg/sentry/fsimpl/tmpfs/stat_test.go index 3e02e7190..d4f59ee5b 100644 --- a/pkg/sentry/fsimpl/tmpfs/stat_test.go +++ b/pkg/sentry/fsimpl/tmpfs/stat_test.go @@ -140,7 +140,7 @@ func TestSetStatAtime(t *testing.T) { Mask: 0, Atime: linux.NsecToStatxTimestamp(100), }}); err != nil { - t.Errorf("SetStat atime without mask failed: %v") + t.Errorf("SetStat atime without mask failed: %v", err) } // Atime should be unchanged. if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil { @@ -155,7 +155,7 @@ func TestSetStatAtime(t *testing.T) { Atime: linux.NsecToStatxTimestamp(100), } if err := fd.SetStat(ctx, vfs.SetStatOptions{Stat: setStat}); err != nil { - t.Errorf("SetStat atime with mask failed: %v") + t.Errorf("SetStat atime with mask failed: %v", err) } if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil { t.Errorf("Stat got error: %v", err) @@ -205,7 +205,7 @@ func TestSetStat(t *testing.T) { Mask: 0, Atime: linux.NsecToStatxTimestamp(100), }}); err != nil { - t.Errorf("SetStat atime without mask failed: %v") + t.Errorf("SetStat atime without mask failed: %v", err) } // Atime should be unchanged. if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil { @@ -220,7 +220,7 @@ func TestSetStat(t *testing.T) { Atime: linux.NsecToStatxTimestamp(100), } if err := fd.SetStat(ctx, vfs.SetStatOptions{Stat: setStat}); err != nil { - t.Errorf("SetStat atime with mask failed: %v") + t.Errorf("SetStat atime with mask failed: %v", err) } if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil { t.Errorf("Stat got error: %v", err) diff --git a/pkg/tcpip/header/eth_test.go b/pkg/tcpip/header/eth_test.go index 7a0014ad9..14413f2ce 100644 --- a/pkg/tcpip/header/eth_test.go +++ b/pkg/tcpip/header/eth_test.go @@ -88,7 +88,7 @@ func TestEthernetAddressFromMulticastIPv4Address(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { if got := EthernetAddressFromMulticastIPv4Address(test.addr); got != test.expectedLinkAddr { - t.Fatalf("got EthernetAddressFromMulticastIPv4Address(%s) = %s, want = %s", got, test.expectedLinkAddr) + t.Fatalf("got EthernetAddressFromMulticastIPv4Address(%s) = %s, want = %s", test.addr, got, test.expectedLinkAddr) } }) } diff --git a/pkg/tcpip/header/ndp_test.go b/pkg/tcpip/header/ndp_test.go index 1cb9f5dc8..969f8f1e8 100644 --- a/pkg/tcpip/header/ndp_test.go +++ b/pkg/tcpip/header/ndp_test.go @@ -115,7 +115,7 @@ func TestNDPNeighborAdvert(t *testing.T) { // Make sure flags got updated in the backing buffer. if got := b[ndpNAFlagsOffset]; got != 64 { - t.Errorf("got flags byte = %d, want = 64") + t.Errorf("got flags byte = %d, want = 64", got) } } diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go index 7198742b7..b857ce9d0 100644 --- a/pkg/tcpip/link/fdbased/endpoint.go +++ b/pkg/tcpip/link/fdbased/endpoint.go @@ -91,7 +91,7 @@ func (p PacketDispatchMode) String() string { case PacketMMap: return "PacketMMap" default: - return fmt.Sprintf("unknown packet dispatch mode %v", p) + return fmt.Sprintf("unknown packet dispatch mode '%d'", p) } } diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go index 27dc8baf9..bc822e3b1 100644 --- a/pkg/tcpip/stack/ndp_test.go +++ b/pkg/tcpip/stack/ndp_test.go @@ -1959,7 +1959,7 @@ func TestAutoGenAddrDeprecateFromPI(t *testing.T) { // addr2 is deprecated but if explicitly requested, it should be used. fullAddr2 := tcpip.FullAddress{Addr: addr2.Address, NIC: nicID} if got := addrForNewConnectionWithAddr(t, s, fullAddr2); got != addr2.Address { - t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", got, addr2.Address) + t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr2, got, addr2.Address) } // Another PI w/ 0 preferred lifetime should not result in a deprecation @@ -1972,7 +1972,7 @@ func TestAutoGenAddrDeprecateFromPI(t *testing.T) { } expectPrimaryAddr(addr1) if got := addrForNewConnectionWithAddr(t, s, fullAddr2); got != addr2.Address { - t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", got, addr2.Address) + t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr2, got, addr2.Address) } // Refresh lifetimes of addr generated from prefix2. @@ -2084,7 +2084,7 @@ func TestAutoGenAddrTimerDeprecation(t *testing.T) { // addr1 is deprecated but if explicitly requested, it should be used. fullAddr1 := tcpip.FullAddress{Addr: addr1.Address, NIC: nicID} if got := addrForNewConnectionWithAddr(t, s, fullAddr1); got != addr1.Address { - t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", got, addr1.Address) + t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr1, got, addr1.Address) } // Refresh valid lifetime for addr of prefix1, w/ 0 preferred lifetime to make @@ -2097,7 +2097,7 @@ func TestAutoGenAddrTimerDeprecation(t *testing.T) { } expectPrimaryAddr(addr2) if got := addrForNewConnectionWithAddr(t, s, fullAddr1); got != addr1.Address { - t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", got, addr1.Address) + t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr1, got, addr1.Address) } // Refresh lifetimes for addr of prefix1. @@ -2121,7 +2121,7 @@ func TestAutoGenAddrTimerDeprecation(t *testing.T) { // addr2 should be the primary endpoint now since it is not deprecated. expectPrimaryAddr(addr2) if got := addrForNewConnectionWithAddr(t, s, fullAddr1); got != addr1.Address { - t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", got, addr1.Address) + t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr1, got, addr1.Address) } // Wait for addr of prefix1 to be invalidated. @@ -2564,7 +2564,7 @@ func TestAutoGenAddrAfterRemoval(t *testing.T) { AddressWithPrefix: addr2, } if err := s.AddProtocolAddressWithOptions(nicID, protoAddr2, stack.FirstPrimaryEndpoint); err != nil { - t.Fatalf("AddProtocolAddressWithOptions(%d, %+v, %d, %s) = %s", nicID, protoAddr2, stack.FirstPrimaryEndpoint, err) + t.Fatalf("AddProtocolAddressWithOptions(%d, %+v, %d) = %s", nicID, protoAddr2, stack.FirstPrimaryEndpoint, err) } // addr2 should be more preferred now since it is at the front of the primary // list. diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go index 3f8a2a095..c7634ceb1 100644 --- a/pkg/tcpip/stack/stack_test.go +++ b/pkg/tcpip/stack/stack_test.go @@ -1445,19 +1445,19 @@ func TestOutgoingBroadcastWithEmptyRouteTable(t *testing.T) { protoAddr := tcpip.ProtocolAddress{Protocol: fakeNetNumber, AddressWithPrefix: tcpip.AddressWithPrefix{header.IPv4Any, 0}} if err := s.AddProtocolAddress(1, protoAddr); err != nil { - t.Fatalf("AddProtocolAddress(1, %s) failed: %s", protoAddr, err) + t.Fatalf("AddProtocolAddress(1, %v) failed: %v", protoAddr, err) } r, err := s.FindRoute(1, header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, false /* multicastLoop */) if err != nil { - t.Fatalf("FindRoute(1, %s, %s, %d) failed: %s", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err) + t.Fatalf("FindRoute(1, %v, %v, %d) failed: %v", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err) } if err := verifyRoute(r, stack.Route{LocalAddress: header.IPv4Any, RemoteAddress: header.IPv4Broadcast}); err != nil { - t.Errorf("FindRoute(1, %s, %s, %d) returned unexpected Route: %s)", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err) + t.Errorf("FindRoute(1, %v, %v, %d) returned unexpected Route: %v", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err) } // If the NIC doesn't exist, it won't work. if _, err := s.FindRoute(2, header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, false /* multicastLoop */); err != tcpip.ErrNetworkUnreachable { - t.Fatalf("got FindRoute(2, %s, %s, %d) = %s want = %s", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err, tcpip.ErrNetworkUnreachable) + t.Fatalf("got FindRoute(2, %v, %v, %d) = %v want = %v", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err, tcpip.ErrNetworkUnreachable) } } @@ -1483,12 +1483,12 @@ func TestOutgoingBroadcastWithRouteTable(t *testing.T) { } nic1ProtoAddr := tcpip.ProtocolAddress{fakeNetNumber, nic1Addr} if err := s.AddProtocolAddress(1, nic1ProtoAddr); err != nil { - t.Fatalf("AddProtocolAddress(1, %s) failed: %s", nic1ProtoAddr, err) + t.Fatalf("AddProtocolAddress(1, %v) failed: %v", nic1ProtoAddr, err) } nic2ProtoAddr := tcpip.ProtocolAddress{fakeNetNumber, nic2Addr} if err := s.AddProtocolAddress(2, nic2ProtoAddr); err != nil { - t.Fatalf("AddAddress(2, %s) failed: %s", nic2ProtoAddr, err) + t.Fatalf("AddAddress(2, %v) failed: %v", nic2ProtoAddr, err) } // Set the initial route table. @@ -1503,10 +1503,10 @@ func TestOutgoingBroadcastWithRouteTable(t *testing.T) { // When an interface is given, the route for a broadcast goes through it. r, err := s.FindRoute(1, nic1Addr.Address, header.IPv4Broadcast, fakeNetNumber, false /* multicastLoop */) if err != nil { - t.Fatalf("FindRoute(1, %s, %s, %d) failed: %s", nic1Addr.Address, header.IPv4Broadcast, fakeNetNumber, err) + t.Fatalf("FindRoute(1, %v, %v, %d) failed: %v", nic1Addr.Address, header.IPv4Broadcast, fakeNetNumber, err) } if err := verifyRoute(r, stack.Route{LocalAddress: nic1Addr.Address, RemoteAddress: header.IPv4Broadcast}); err != nil { - t.Errorf("FindRoute(1, %s, %s, %d) returned unexpected Route: %s)", nic1Addr.Address, header.IPv4Broadcast, fakeNetNumber, err) + t.Errorf("FindRoute(1, %v, %v, %d) returned unexpected Route: %v", nic1Addr.Address, header.IPv4Broadcast, fakeNetNumber, err) } // When an interface is not given, it consults the route table. @@ -2399,7 +2399,7 @@ func TestNICContextPreservation(t *testing.T) { t.Fatalf("got nicinfos[%d] = _, %t, want _, true; nicinfos = %+v", id, ok, nicinfos) } if got, want := nicinfo.Context == test.want, true; got != want { - t.Fatal("got nicinfo.Context == ctx = %t, want %t; nicinfo.Context = %p, ctx = %p", got, want, nicinfo.Context, test.want) + t.Fatalf("got nicinfo.Context == ctx = %t, want %t; nicinfo.Context = %p, ctx = %p", got, want, nicinfo.Context, test.want) } }) } @@ -2768,7 +2768,7 @@ func TestNewPEBOnPromotionToPermanent(t *testing.T) { { subnet, err := tcpip.NewSubnet("\x00", "\x00") if err != nil { - t.Fatalf("NewSubnet failed:", err) + t.Fatalf("NewSubnet failed: %v", err) } s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: 1}}) } @@ -2782,11 +2782,11 @@ func TestNewPEBOnPromotionToPermanent(t *testing.T) { // permanentExpired kind. r, err := s.FindRoute(1, "\x01", "\x02", fakeNetNumber, false) if err != nil { - t.Fatal("FindRoute failed:", err) + t.Fatalf("FindRoute failed: %v", err) } defer r.Release() if err := s.RemoveAddress(1, "\x01"); err != nil { - t.Fatalf("RemoveAddress failed:", err) + t.Fatalf("RemoveAddress failed: %v", err) } // @@ -2798,7 +2798,7 @@ func TestNewPEBOnPromotionToPermanent(t *testing.T) { // Add some other address with peb set to // FirstPrimaryEndpoint. if err := s.AddAddressWithOptions(1, fakeNetNumber, "\x03", stack.FirstPrimaryEndpoint); err != nil { - t.Fatal("AddAddressWithOptions failed:", err) + t.Fatalf("AddAddressWithOptions failed: %v", err) } @@ -2806,7 +2806,7 @@ func TestNewPEBOnPromotionToPermanent(t *testing.T) { // make sure the new peb was respected. // (The address should just be promoted now). if err := s.AddAddressWithOptions(1, fakeNetNumber, "\x01", ps); err != nil { - t.Fatal("AddAddressWithOptions failed:", err) + t.Fatalf("AddAddressWithOptions failed: %v", err) } var primaryAddrs []tcpip.Address for _, pa := range s.NICInfo()[1].ProtocolAddresses { @@ -2839,11 +2839,11 @@ func TestNewPEBOnPromotionToPermanent(t *testing.T) { // GetMainNICAddress; else, our original address // should be returned. if err := s.RemoveAddress(1, "\x03"); err != nil { - t.Fatalf("RemoveAddress failed:", err) + t.Fatalf("RemoveAddress failed: %v", err) } addr, err = s.GetMainNICAddress(1, fakeNetNumber) if err != nil { - t.Fatal("s.GetMainNICAddress failed:", err) + t.Fatalf("s.GetMainNICAddress failed: %v", err) } if ps == stack.NeverPrimaryEndpoint { if want := (tcpip.AddressWithPrefix{}); addr != want { diff --git a/pkg/tcpip/tcpip_test.go b/pkg/tcpip/tcpip_test.go index 8c0aacffa..1c8e2bc34 100644 --- a/pkg/tcpip/tcpip_test.go +++ b/pkg/tcpip/tcpip_test.go @@ -218,7 +218,7 @@ func TestAddressWithPrefixSubnet(t *testing.T) { gotSubnet := ap.Subnet() wantSubnet, err := NewSubnet(tt.subnetAddr, tt.subnetMask) if err != nil { - t.Error("NewSubnet(%q, %q) failed: %s", tt.subnetAddr, tt.subnetMask, err) + t.Errorf("NewSubnet(%q, %q) failed: %s", tt.subnetAddr, tt.subnetMask, err) continue } if gotSubnet != wantSubnet { diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index ce3df7478..8a87fa0a8 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -284,7 +284,7 @@ func TestTCPResetSentForACKWhenNotUsingSynCookies(t *testing.T) { // are released instantly on Close. tcpTW := tcpip.TCPTimeWaitTimeoutOption(1 * time.Millisecond) if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpTW); err != nil { - t.Fatalf("e.stack.SetTransportProtocolOption(%d, %s) = %s", tcp.ProtocolNumber, tcpTW, err) + t.Fatalf("e.stack.SetTransportProtocolOption(%d, %v) = %v", tcp.ProtocolNumber, tcpTW, err) } c.EP.Close() @@ -5609,7 +5609,7 @@ func TestReceiveBufferAutoTuningApplicationLimited(t *testing.T) { return } if w := tcp.WindowSize(); w == 0 || w > uint16(wantRcvWnd) { - t.Errorf("expected a non-zero window: got %d, want <= wantRcvWnd", w, wantRcvWnd) + t.Errorf("expected a non-zero window: got %d, want <= wantRcvWnd", w) } }, )) diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go index 0905726c1..b3fe557ca 100644 --- a/pkg/tcpip/transport/udp/udp_test.go +++ b/pkg/tcpip/transport/udp/udp_test.go @@ -607,7 +607,7 @@ func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expe // Check the peer address. h := flow.header4Tuple(incoming) if addr.Addr != h.srcAddr.Addr { - c.t.Fatalf("unexpected remote address: got %s, want %s", addr.Addr, h.srcAddr) + c.t.Fatalf("unexpected remote address: got %s, want %v", addr.Addr, h.srcAddr) } // Check the payload. diff --git a/runsc/container/test_app/test_app.go b/runsc/container/test_app/test_app.go index 01c47c79f..5f1c4b7d6 100644 --- a/runsc/container/test_app/test_app.go +++ b/runsc/container/test_app/test_app.go @@ -96,7 +96,7 @@ func (c *uds) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) listener, err := net.Listen("unix", c.socketPath) if err != nil { - log.Fatal("error listening on socket %q:", c.socketPath, err) + log.Fatalf("error listening on socket %q: %v", c.socketPath, err) } go server(listener, outputFile) diff --git a/test/root/cgroup_test.go b/test/root/cgroup_test.go index 4038661cb..679342def 100644 --- a/test/root/cgroup_test.go +++ b/test/root/cgroup_test.go @@ -53,7 +53,7 @@ func verifyPid(pid int, path string) error { if scanner.Err() != nil { return scanner.Err() } - return fmt.Errorf("got: %s, want: %d", gots, pid) + return fmt.Errorf("got: %v, want: %d", gots, pid) } // TestCgroup sets cgroup options and checks that cgroup was properly configured. @@ -106,7 +106,7 @@ func TestMemCGroup(t *testing.T) { time.Sleep(100 * time.Millisecond) } - t.Fatalf("%vMB is less than %vMB: %v", memUsage>>20, allocMemSize>>20) + t.Fatalf("%vMB is less than %vMB", memUsage>>20, allocMemSize>>20) } // TestCgroup sets cgroup options and checks that cgroup was properly configured. diff --git a/test/runtimes/blacklist_test.go b/test/runtimes/blacklist_test.go index 52f49b984..0ff69ab18 100644 --- a/test/runtimes/blacklist_test.go +++ b/test/runtimes/blacklist_test.go @@ -32,6 +32,6 @@ func TestBlacklists(t *testing.T) { t.Fatalf("error parsing blacklist: %v", err) } if *blacklistFile != "" && len(bl) == 0 { - t.Errorf("got empty blacklist for file %q", blacklistFile) + t.Errorf("got empty blacklist for file %q", *blacklistFile) } } diff --git a/test/runtimes/runner.go b/test/runtimes/runner.go index ddb890dbc..3c98f4570 100644 --- a/test/runtimes/runner.go +++ b/test/runtimes/runner.go @@ -114,7 +114,7 @@ func getTests(d dockerutil.Docker, blacklist map[string]struct{}) ([]testing.Int F: func(t *testing.T) { // Is the test blacklisted? if _, ok := blacklist[tc]; ok { - t.Skip("SKIP: blacklisted test %q", tc) + t.Skipf("SKIP: blacklisted test %q", tc) } var ( diff --git a/tools/nogo.json b/tools/nogo.json index cc05ba027..f69999e50 100644 --- a/tools/nogo.json +++ b/tools/nogo.json @@ -27,37 +27,6 @@ "/external/io_opencensus_go/tag/map_codec.go": "allowed: false positive" } }, - "printf": { - "exclude_files": { - ".*_abi_autogen_test.go": "fix: Sprintf format has insufficient args", - "/pkg/segment/test/segment_test.go": "fix: Errorf format %d arg seg.Start is a func value, not called", - "/pkg/tcpip/tcpip_test.go": "fix: Error call has possible formatting directive %q", - "/pkg/tcpip/header/eth_test.go": "fix: Fatalf format %s reads arg #3, but call has 2 args", - "/pkg/tcpip/header/ndp_test.go": "fix: Errorf format %d reads arg #1, but call has 0 args", - "/pkg/eventchannel/event_test.go": "fix: Fatal call has possible formatting directive %v", - "/pkg/tcpip/stack/ndp.go": "fix: Fatalf format %s has arg protocolAddr of wrong type gvisor.dev/gvisor/pkg/tcpip.ProtocolAddress", - "/pkg/sentry/fs/fdpipe/pipe_test.go": "fix: Errorf format %s has arg flags of wrong type gvisor.dev/gvisor/pkg/sentry/fs.FileFlags", - "/pkg/sentry/fs/fdpipe/pipe_test.go": "fix: Errorf format %d arg f.FD is a func value, not called", - "/pkg/tcpip/link/fdbased/endpoint.go": "fix: Sprintf format %v with arg p causes recursive String method call", - "/pkg/tcpip/transport/udp/udp_test.go": "fix: Fatalf format %s has arg h.srcAddr of wrong type gvisor.dev/gvisor/pkg/tcpip.FullAddress", - "/pkg/tcpip/transport/tcp/tcp_test.go": "fix: Fatalf format %s has arg tcpTW of wrong type gvisor.dev/gvisor/pkg/tcpip.TCPTimeWaitTimeoutOption", - "/pkg/tcpip/transport/tcp/tcp_test.go": "fix: Errorf call needs 1 arg but has 2 args", - "/pkg/tcpip/stack/ndp_test.go": "fix: Errorf format %s reads arg #3, but call has 2 args", - "/pkg/tcpip/stack/ndp_test.go": "fix: Fatalf format %s reads arg #5, but call has 4 args", - "/pkg/tcpip/stack/stack_test.go": "fix: Fatalf format %s has arg protoAddr of wrong type gvisor.dev/gvisor/pkg/tcpip.ProtocolAddress", - "/pkg/tcpip/stack/stack_test.go": "fix: Fatalf format %s has arg nic1ProtoAddr of wrong type gvisor.dev/gvisor/pkg/tcpip.ProtocolAddress", - "/pkg/tcpip/stack/stack_test.go": "fix: Fatalf format %s has arg nic2ProtoAddr of wrong type gvisor.dev/gvisor/pkg/tcpip.ProtocolAddress", - "/pkg/tcpip/stack/stack_test.go": "fix: Fatal call has possible formatting directive %t", - "/pkg/tcpip/stack/stack_test.go": "fix: Fatalf call has arguments but no formatting directives", - "/pkg/tcpip/link/fdbased/endpoint.go": "fix: Sprintf format %v with arg p causes recursive String method call", - "/pkg/sentry/fsimpl/tmpfs/stat_test.go": "fix: Errorf format %v reads arg #1, but call has 0 args", - "/runsc/container/test_app/test_app.go": "fix: Fatal call has possible formatting directive %q", - "/test/root/cgroup_test.go": "fix: Errorf format %s has arg gots of wrong type []int", - "/test/root/cgroup_test.go": "fix: Fatalf format %v reads arg #3, but call has 2 args", - "/test/runtimes/runner.go": "fix: Skip call has possible formatting directive %q", - "/test/runtimes/blacklist_test.go": "fix: Errorf format %q has arg blacklistFile of wrong type *string" - } - }, "structtag": { "exclude_files": { "/external/": "allowed: may use arbitrary tags" -- cgit v1.2.3 From b30130567d81157e39b692e0116f86015f0bcc71 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Wed, 8 Apr 2020 13:33:44 -0700 Subject: Enable SubprocessExited and SubprocessZombie for gVisor Updates #164 PiperOrigin-RevId: 305544029 --- pkg/sentry/fs/proc/task.go | 28 ++++++++++++++++-- pkg/sentry/fsimpl/proc/task.go | 16 ----------- pkg/sentry/fsimpl/proc/task_files.go | 56 ++++++++++++++++++++++++++++++++++-- test/syscalls/linux/proc.cc | 31 ++++++++------------ 4 files changed, 90 insertions(+), 41 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index d6c5dd2c1..4d42eac83 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -57,6 +57,16 @@ func getTaskMM(t *kernel.Task) (*mm.MemoryManager, error) { return m, nil } +func checkTaskState(t *kernel.Task) error { + switch t.ExitState() { + case kernel.TaskExitZombie: + return syserror.EACCES + case kernel.TaskExitDead: + return syserror.ESRCH + } + return nil +} + // taskDir represents a task-level directory. // // +stateify savable @@ -254,11 +264,12 @@ func newExe(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { } func (e *exe) executable() (file fsbridge.File, err error) { + if err := checkTaskState(e.t); err != nil { + return nil, err + } e.t.WithMuLocked(func(t *kernel.Task) { mm := t.MemoryManager() if mm == nil { - // TODO(b/34851096): Check shouldn't allow Readlink once the - // Task is zombied. err = syserror.EACCES return } @@ -268,7 +279,7 @@ func (e *exe) executable() (file fsbridge.File, err error) { // (with locks held). file = mm.Executable() if file == nil { - err = syserror.ENOENT + err = syserror.ESRCH } }) return @@ -313,11 +324,22 @@ func newNamespaceSymlink(t *kernel.Task, msrc *fs.MountSource, name string) *fs. return newProcInode(t, n, msrc, fs.Symlink, t) } +// Readlink reads the symlink value. +func (n *namespaceSymlink) Readlink(ctx context.Context, inode *fs.Inode) (string, error) { + if err := checkTaskState(n.t); err != nil { + return "", err + } + return n.Symlink.Readlink(ctx, inode) +} + // Getlink implements fs.InodeOperations.Getlink. func (n *namespaceSymlink) Getlink(ctx context.Context, inode *fs.Inode) (*fs.Dirent, error) { if !kernel.ContextCanTrace(ctx, n.t, false) { return nil, syserror.EACCES } + if err := checkTaskState(n.t); err != nil { + return nil, err + } // Create a new regular file to fake the namespace file. iops := fsutil.NewNoReadWriteFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0777), linux.PROC_SUPER_MAGIC) diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index aee2a4392..888afc0fd 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -214,22 +214,6 @@ func newIO(t *kernel.Task, isThreadGroup bool) *ioData { return &ioData{ioUsage: t} } -func newNamespaceSymlink(task *kernel.Task, ino uint64, ns string) *kernfs.Dentry { - // Namespace symlinks should contain the namespace name and the inode number - // for the namespace instance, so for example user:[123456]. We currently fake - // the inode number by sticking the symlink inode in its place. - target := fmt.Sprintf("%s:[%d]", ns, ino) - - inode := &kernfs.StaticSymlink{} - // Note: credentials are overridden by taskOwnedInode. - inode.Init(task.Credentials(), ino, target) - - taskInode := &taskOwnedInode{Inode: inode, owner: task} - d := &kernfs.Dentry{} - d.Init(taskInode) - return d -} - // newCgroupData creates inode that shows cgroup information. // From man 7 cgroups: "For each cgroup hierarchy of which the process is a // member, there is one entry containing three colon-separated fields: diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go index 88ea6a6d8..2c6f8bdfc 100644 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ b/pkg/sentry/fsimpl/proc/task_files.go @@ -64,6 +64,16 @@ func getMMIncRef(task *kernel.Task) (*mm.MemoryManager, error) { return m, nil } +func checkTaskState(t *kernel.Task) error { + switch t.ExitState() { + case kernel.TaskExitZombie: + return syserror.EACCES + case kernel.TaskExitDead: + return syserror.ESRCH + } + return nil +} + type bufferWriter struct { buf *bytes.Buffer } @@ -628,11 +638,13 @@ func (s *exeSymlink) Getlink(ctx context.Context) (vfs.VirtualDentry, string, er } func (s *exeSymlink) executable() (file fsbridge.File, err error) { + if err := checkTaskState(s.task); err != nil { + return nil, err + } + s.task.WithMuLocked(func(t *kernel.Task) { mm := t.MemoryManager() if mm == nil { - // TODO(b/34851096): Check shouldn't allow Readlink once the - // Task is zombied. err = syserror.EACCES return } @@ -642,7 +654,7 @@ func (s *exeSymlink) executable() (file fsbridge.File, err error) { // (with locks held). file = mm.Executable() if file == nil { - err = syserror.ENOENT + err = syserror.ESRCH } }) return @@ -709,3 +721,41 @@ func (i *mountsData) Generate(ctx context.Context, buf *bytes.Buffer) error { i.task.Kernel().VFS().GenerateProcMounts(ctx, rootDir, buf) return nil } + +type namespaceSymlink struct { + kernfs.StaticSymlink + + task *kernel.Task +} + +func newNamespaceSymlink(task *kernel.Task, ino uint64, ns string) *kernfs.Dentry { + // Namespace symlinks should contain the namespace name and the inode number + // for the namespace instance, so for example user:[123456]. We currently fake + // the inode number by sticking the symlink inode in its place. + target := fmt.Sprintf("%s:[%d]", ns, ino) + + inode := &namespaceSymlink{task: task} + // Note: credentials are overridden by taskOwnedInode. + inode.Init(task.Credentials(), ino, target) + + taskInode := &taskOwnedInode{Inode: inode, owner: task} + d := &kernfs.Dentry{} + d.Init(taskInode) + return d +} + +// Readlink implements Inode. +func (s *namespaceSymlink) Readlink(ctx context.Context) (string, error) { + if err := checkTaskState(s.task); err != nil { + return "", err + } + return s.StaticSymlink.Readlink(ctx) +} + +// Getlink implements Inode.Getlink. +func (s *namespaceSymlink) Getlink(ctx context.Context) (vfs.VirtualDentry, string, error) { + if err := checkTaskState(s.task); err != nil { + return vfs.VirtualDentry{}, "", err + } + return s.StaticSymlink.Getlink(ctx) +} diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index 5a70f6c3b..da98e1f66 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -1326,8 +1326,6 @@ TEST(ProcPidSymlink, SubprocessRunning) { SyscallSucceedsWithValue(sizeof(buf))); } -// FIXME(gvisor.dev/issue/164): Inconsistent behavior between gVisor and linux -// on proc files. TEST(ProcPidSymlink, SubprocessZombied) { ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); @@ -1337,7 +1335,7 @@ TEST(ProcPidSymlink, SubprocessZombied) { int want = EACCES; if (!IsRunningOnGvisor()) { auto version = ASSERT_NO_ERRNO_AND_VALUE(GetKernelVersion()); - if (version.major == 4 && version.minor > 3) { + if (version.major > 4 || (version.major == 4 && version.minor > 3)) { want = ENOENT; } } @@ -1350,30 +1348,25 @@ TEST(ProcPidSymlink, SubprocessZombied) { SyscallFailsWithErrno(want)); } - // FIXME(gvisor.dev/issue/164): Inconsistent behavior between gVisor and linux - // on proc files. + // FIXME(gvisor.dev/issue/164): Inconsistent behavior between linux on proc + // files. // // ~4.3: Syscall fails with EACCES. - // 4.17 & gVisor: Syscall succeeds and returns 1. + // 4.17: Syscall succeeds and returns 1. // - // EXPECT_THAT(ReadlinkWhileZombied("ns/pid", buf, sizeof(buf)), - // SyscallFailsWithErrno(EACCES)); + if (!IsRunningOnGvisor()) { + return; + } - // FIXME(gvisor.dev/issue/164): Inconsistent behavior between gVisor and linux - // on proc files. - // - // ~4.3: Syscall fails with EACCES. - // 4.17 & gVisor: Syscall succeeds and returns 1. - // - // EXPECT_THAT(ReadlinkWhileZombied("ns/user", buf, sizeof(buf)), - // SyscallFailsWithErrno(EACCES)); + EXPECT_THAT(ReadlinkWhileZombied("ns/pid", buf, sizeof(buf)), + SyscallFailsWithErrno(want)); + + EXPECT_THAT(ReadlinkWhileZombied("ns/user", buf, sizeof(buf)), + SyscallFailsWithErrno(want)); } // Test whether /proc/PID/ symlinks can be read for an exited process. TEST(ProcPidSymlink, SubprocessExited) { - // FIXME(gvisor.dev/issue/164): These all succeed on gVisor. - SKIP_IF(IsRunningOnGvisor()); - char buf[1]; EXPECT_THAT(ReadlinkWhileExited("exe", buf, sizeof(buf)), -- cgit v1.2.3 From 2907e6da5e9fc7eeda51644db7bec4d15691b384 Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Wed, 8 Apr 2020 13:46:51 -0700 Subject: file test: Remove FIXME about FIFO. It is already tested in mknod test. PiperOrigin-RevId: 305546584 --- test/syscalls/linux/file_base.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/file_base.h b/test/syscalls/linux/file_base.h index 6f80bc97c..25fdd7106 100644 --- a/test/syscalls/linux/file_base.h +++ b/test/syscalls/linux/file_base.h @@ -52,17 +52,6 @@ class FileTest : public ::testing::Test { test_file_fd_ = ASSERT_NO_ERRNO_AND_VALUE( Open(test_file_name_, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR)); - // FIXME(edahlgren): enable when mknod syscall is supported. - // test_fifo_name_ = NewTempAbsPath(); - // ASSERT_THAT(mknod(test_fifo_name_.c_str()), S_IFIFO|0644, 0, - // SyscallSucceeds()); - // ASSERT_THAT(test_fifo_[1] = open(test_fifo_name_.c_str(), - // O_WRONLY), - // SyscallSucceeds()); - // ASSERT_THAT(test_fifo_[0] = open(test_fifo_name_.c_str(), - // O_RDONLY), - // SyscallSucceeds()); - ASSERT_THAT(pipe(test_pipe_), SyscallSucceeds()); ASSERT_THAT(fcntl(test_pipe_[0], F_SETFL, O_NONBLOCK), SyscallSucceeds()); } @@ -96,18 +85,11 @@ class FileTest : public ::testing::Test { CloseFile(); UnlinkFile(); ClosePipes(); - - // FIXME(edahlgren): enable when mknod syscall is supported. - // close(test_fifo_[0]); - // close(test_fifo_[1]); - // unlink(test_fifo_name_.c_str()); } std::string test_file_name_; - std::string test_fifo_name_; FileDescriptor test_file_fd_; - int test_fifo_[2]; int test_pipe_[2]; }; -- cgit v1.2.3 From 357f136e42de81b033b65b7f39a4a555275a17e3 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Wed, 8 Apr 2020 14:38:09 -0700 Subject: Handle utimes correctly for shared gofer filesystems. Determine system time from within the sentry rather than relying on the remote filesystem to prevent inconsistencies. Resolve related TODOs; the time discrepancies in question don't exist anymore. PiperOrigin-RevId: 305557099 --- pkg/sentry/fs/gofer/util.go | 16 ++++++++++++++-- test/syscalls/linux/utimes.cc | 18 +----------------- 2 files changed, 15 insertions(+), 19 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fs/gofer/util.go b/pkg/sentry/fs/gofer/util.go index 2d8d3a2ea..47a6c69bf 100644 --- a/pkg/sentry/fs/gofer/util.go +++ b/pkg/sentry/fs/gofer/util.go @@ -20,17 +20,29 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/sentry/fs" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" ) func utimes(ctx context.Context, file contextFile, ts fs.TimeSpec) error { if ts.ATimeOmit && ts.MTimeOmit { return nil } + + // Replace requests to use the "system time" with the current time to + // ensure that timestamps remain consistent with the remote + // filesystem. + now := ktime.NowFromContext(ctx) + if ts.ATimeSetSystemTime { + ts.ATime = now + } + if ts.MTimeSetSystemTime { + ts.MTime = now + } mask := p9.SetAttrMask{ ATime: !ts.ATimeOmit, - ATimeNotSystemTime: !ts.ATimeSetSystemTime, + ATimeNotSystemTime: true, MTime: !ts.MTimeOmit, - MTimeNotSystemTime: !ts.MTimeSetSystemTime, + MTimeNotSystemTime: true, } as, ans := ts.ATime.Unix() ms, mns := ts.MTime.Unix() diff --git a/test/syscalls/linux/utimes.cc b/test/syscalls/linux/utimes.cc index 3a927a430..22e6d1a85 100644 --- a/test/syscalls/linux/utimes.cc +++ b/test/syscalls/linux/utimes.cc @@ -34,17 +34,10 @@ namespace testing { namespace { -// TODO(b/36516566): utimes(nullptr) does not pick the "now" time in the -// application's time domain, so when asserting that times are within a window, -// we expand the window to allow for differences between the time domains. -constexpr absl::Duration kClockSlack = absl::Milliseconds(100); - // TimeBoxed runs fn, setting before and after to (coarse realtime) times // guaranteed* to come before and after fn started and completed, respectively. // // fn may be called more than once if the clock is adjusted. -// -// * See the comment on kClockSlack. gVisor breaks this guarantee. void TimeBoxed(absl::Time* before, absl::Time* after, std::function const& fn) { do { @@ -69,12 +62,6 @@ void TimeBoxed(absl::Time* before, absl::Time* after, // which could lead to test failures, but that is very unlikely to happen. continue; } - - if (IsRunningOnGvisor()) { - // See comment on kClockSlack. - *before -= kClockSlack; - *after += kClockSlack; - } } while (*after < *before); } @@ -235,10 +222,7 @@ void TestUtimensat(int dirFd, std::string const& path) { EXPECT_GE(mtime3, before); EXPECT_LE(mtime3, after); - if (!IsRunningOnGvisor()) { - // FIXME(b/36516566): Gofers set atime and mtime to different "now" times. - EXPECT_EQ(atime3, mtime3); - } + EXPECT_EQ(atime3, mtime3); } TEST(UtimensatTest, OnAbsPath) { -- cgit v1.2.3 From 6dd5a1f3fe55daa8510b1ee5e3a59219aad92af6 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Wed, 8 Apr 2020 17:56:55 -0700 Subject: Clean up TODOs PiperOrigin-RevId: 305592245 --- pkg/sentry/fs/tmpfs/fs.go | 3 --- pkg/sentry/fsimpl/kernfs/filesystem.go | 2 +- pkg/sentry/kernel/ptrace.go | 1 - pkg/sentry/vfs/filesystem.go | 2 +- pkg/sentry/vfs/mount.go | 12 ++++++------ pkg/sentry/vfs/mount_test.go | 2 +- runsc/cmd/gofer.go | 5 ++--- test/syscalls/linux/epoll.cc | 4 ---- test/syscalls/linux/file_base.h | 1 + test/syscalls/linux/pwrite64.cc | 9 +-------- test/syscalls/linux/tuntap.cc | 7 ++++--- test/syscalls/linux/write.cc | 10 ++-------- tools/go_generics/defs.bzl | 1 - 13 files changed, 19 insertions(+), 40 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fs/tmpfs/fs.go b/pkg/sentry/fs/tmpfs/fs.go index d5be56c3f..bc117ca6a 100644 --- a/pkg/sentry/fs/tmpfs/fs.go +++ b/pkg/sentry/fs/tmpfs/fs.go @@ -44,9 +44,6 @@ const ( // lookup. cacheRevalidate = "revalidate" - // TODO(edahlgren/mpratt): support a tmpfs size limit. - // size = "size" - // Permissions that exceed modeMask will be rejected. modeMask = 01777 diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index 16a3c18ae..4433071aa 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -682,7 +682,7 @@ func (fs *Filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu if err != nil { return linux.Statfs{}, err } - // TODO: actually implement statfs + // TODO(gvisor.dev/issue/1193): actually implement statfs. return linux.Statfs{}, syserror.ENOSYS } diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go index 35ad97d5d..e23e796ef 100644 --- a/pkg/sentry/kernel/ptrace.go +++ b/pkg/sentry/kernel/ptrace.go @@ -184,7 +184,6 @@ func (t *Task) CanTrace(target *Task, attach bool) bool { if targetCreds.PermittedCaps&^callerCreds.PermittedCaps != 0 { return false } - // TODO: Yama LSM return true } diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go index cd34782ff..bef1bd312 100644 --- a/pkg/sentry/vfs/filesystem.go +++ b/pkg/sentry/vfs/filesystem.go @@ -497,7 +497,7 @@ type FilesystemImpl interface { // Preconditions: vd.Mount().Filesystem().Impl() == this FilesystemImpl. PrependPath(ctx context.Context, vfsroot, vd VirtualDentry, b *fspath.Builder) error - // TODO: inotify_add_watch() + // TODO(gvisor.dev/issue/1479): inotify_add_watch() } // PrependPathAtVFSRootError is returned by implementations of diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go index 1b8ecc415..f06946103 100644 --- a/pkg/sentry/vfs/mount.go +++ b/pkg/sentry/vfs/mount.go @@ -233,9 +233,9 @@ func (vfs *VirtualFilesystem) MountAt(ctx context.Context, creds *auth.Credentia } vd.dentry.mu.Lock() } - // TODO: Linux requires that either both the mount point and the mount root - // are directories, or neither are, and returns ENOTDIR if this is not the - // case. + // TODO(gvisor.dev/issue/1035): Linux requires that either both the mount + // point and the mount root are directories, or neither are, and returns + // ENOTDIR if this is not the case. mntns := vd.mount.ns mnt := newMount(vfs, fs, root, mntns, opts) vfs.mounts.seq.BeginWrite() @@ -274,9 +274,9 @@ func (vfs *VirtualFilesystem) UmountAt(ctx context.Context, creds *auth.Credenti } } - // TODO(jamieliu): Linux special-cases umount of the caller's root, which - // we don't implement yet (we'll just fail it since the caller holds a - // reference on it). + // TODO(gvisor.dev/issue/1035): Linux special-cases umount of the caller's + // root, which we don't implement yet (we'll just fail it since the caller + // holds a reference on it). vfs.mounts.seq.BeginWrite() if opts.Flags&linux.MNT_DETACH == 0 { diff --git a/pkg/sentry/vfs/mount_test.go b/pkg/sentry/vfs/mount_test.go index 3b933468d..3335e4057 100644 --- a/pkg/sentry/vfs/mount_test.go +++ b/pkg/sentry/vfs/mount_test.go @@ -55,7 +55,7 @@ func TestMountTableInsertLookup(t *testing.T) { } } -// TODO: concurrent lookup/insertion/removal +// TODO(gvisor.dev/issue/1035): concurrent lookup/insertion/removal. // must be powers of 2 var benchNumMounts = []int{1 << 2, 1 << 5, 1 << 8} diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 02e5af3d3..28f0d54b9 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -272,9 +272,8 @@ func setupRootFS(spec *specs.Spec, conf *boot.Config) error { root := spec.Root.Path if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { - // FIXME: runsc can't be re-executed without - // /proc, so we create a tmpfs mount, mount ./proc and ./root - // there, then move this mount to the root and after + // runsc can't be re-executed without /proc, so we create a tmpfs mount, + // mount ./proc and ./root there, then move this mount to the root and after // setCapsAndCallSelf, runsc will chroot into /root. // // We need a directory to construct a new root and we know that diff --git a/test/syscalls/linux/epoll.cc b/test/syscalls/linux/epoll.cc index a4f8f3cec..f57d38dc7 100644 --- a/test/syscalls/linux/epoll.cc +++ b/test/syscalls/linux/epoll.cc @@ -56,10 +56,6 @@ TEST(EpollTest, AllWritable) { struct epoll_event result[kFDsPerEpoll]; ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1), SyscallSucceedsWithValue(kFDsPerEpoll)); - // TODO(edahlgren): Why do some tests check epoll_event::data, and others - // don't? Does Linux actually guarantee that, in any of these test cases, - // epoll_wait will necessarily write out the epoll_events in the order that - // they were registered? for (int i = 0; i < kFDsPerEpoll; i++) { ASSERT_EQ(result[i].events, EPOLLOUT); } diff --git a/test/syscalls/linux/file_base.h b/test/syscalls/linux/file_base.h index 25fdd7106..fb418e052 100644 --- a/test/syscalls/linux/file_base.h +++ b/test/syscalls/linux/file_base.h @@ -87,6 +87,7 @@ class FileTest : public ::testing::Test { ClosePipes(); } + protected: std::string test_file_name_; FileDescriptor test_file_fd_; diff --git a/test/syscalls/linux/pwrite64.cc b/test/syscalls/linux/pwrite64.cc index b48fe540d..c2f72e010 100644 --- a/test/syscalls/linux/pwrite64.cc +++ b/test/syscalls/linux/pwrite64.cc @@ -27,14 +27,7 @@ namespace testing { namespace { -// This test is currently very rudimentary. -// -// TODO(edahlgren): -// * bad buffer states (EFAULT). -// * bad fds (wrong permission, wrong type of file, EBADF). -// * check offset is not incremented. -// * check for EOF. -// * writing to pipes, symlinks, special files. +// TODO(gvisor.dev/issue/2370): This test is currently very rudimentary. class Pwrite64 : public ::testing::Test { void SetUp() override { name_ = NewTempAbsPath(); diff --git a/test/syscalls/linux/tuntap.cc b/test/syscalls/linux/tuntap.cc index 53ad2dda3..3a8ba37eb 100644 --- a/test/syscalls/linux/tuntap.cc +++ b/test/syscalls/linux/tuntap.cc @@ -242,7 +242,7 @@ TEST_F(TuntapTest, InvalidReadWrite) { TEST_F(TuntapTest, WriteToDownDevice) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); - // FIXME: gVisor always creates enabled/up'd interfaces. + // FIXME(b/110961832): gVisor always creates enabled/up'd interfaces. SKIP_IF(IsRunningOnGvisor()); FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR)); @@ -280,10 +280,11 @@ PosixErrorOr OpenAndAttachTap( &addr, sizeof(addr))); if (!IsRunningOnGvisor()) { - // FIXME: gVisor doesn't support setting MAC address on interfaces yet. + // FIXME(b/110961832): gVisor doesn't support setting MAC address on + // interfaces yet. RETURN_IF_ERRNO(LinkSetMacAddr(link->index, kMacA, sizeof(kMacA))); - // FIXME: gVisor always creates enabled/up'd interfaces. + // FIXME(b/110961832): gVisor always creates enabled/up'd interfaces. RETURN_IF_ERRNO(LinkChangeFlags(link->index, IFF_UP, IFF_UP)); } diff --git a/test/syscalls/linux/write.cc b/test/syscalls/linux/write.cc index 9b219cfd6..39b5b2f56 100644 --- a/test/syscalls/linux/write.cc +++ b/test/syscalls/linux/write.cc @@ -31,14 +31,8 @@ namespace gvisor { namespace testing { namespace { -// This test is currently very rudimentary. -// -// TODO(edahlgren): -// * bad buffer states (EFAULT). -// * bad fds (wrong permission, wrong type of file, EBADF). -// * check offset is incremented. -// * check for EOF. -// * writing to pipes, symlinks, special files. + +// TODO(gvisor.dev/issue/2370): This test is currently very rudimentary. class WriteTest : public ::testing::Test { public: ssize_t WriteBytes(int fd, int bytes) { diff --git a/tools/go_generics/defs.bzl b/tools/go_generics/defs.bzl index c5be52ecd..8c9995fd4 100644 --- a/tools/go_generics/defs.bzl +++ b/tools/go_generics/defs.bzl @@ -105,7 +105,6 @@ def _go_template_instance_impl(ctx): executable = ctx.executable._tool, ) - # TODO: How can we get the dependencies out? return struct( files = depset([output]), ) -- cgit v1.2.3 From 7297fd7238e17803e073fb5a5ef85edf992bdf6b Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Wed, 8 Apr 2020 19:40:15 -0700 Subject: Bump proc_test's kRSSTolerance to 10MB. PiperOrigin-RevId: 305604557 --- test/syscalls/linux/proc.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index da98e1f66..79a625ebc 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -994,7 +994,7 @@ constexpr uint64_t kMappingSize = 100 << 20; // Tolerance on RSS comparisons to account for background thread mappings, // reclaimed pages, newly faulted pages, etc. -constexpr uint64_t kRSSTolerance = 5 << 20; +constexpr uint64_t kRSSTolerance = 10 << 20; // Capture RSS before and after an anonymous mapping with passed prot. void MapPopulateRSS(int prot, uint64_t* before, uint64_t* after) { -- cgit v1.2.3 From a10389e783aab5f530641394ef44c8a1dede9372 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 8 Apr 2020 23:02:09 -0700 Subject: splice: cap splice calls to MAX_RW_COUNT The Linux does the same. Reported-by: syzbot+e81716e8956e92e9d56b@syzkaller.appspotmail.com PiperOrigin-RevId: 305625439 --- pkg/sentry/syscalls/linux/sys_splice.go | 4 ++ test/syscalls/linux/BUILD | 2 + test/syscalls/linux/sendfile_socket.cc | 105 ++++++++++++++------------------ 3 files changed, 53 insertions(+), 58 deletions(-) (limited to 'test') diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go index fd642834b..fbc6cf15f 100644 --- a/pkg/sentry/syscalls/linux/sys_splice.go +++ b/pkg/sentry/syscalls/linux/sys_splice.go @@ -29,6 +29,10 @@ func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonB return 0, syserror.EINVAL } + if opts.Length > int64(kernel.MAX_RW_COUNT) { + opts.Length = int64(kernel.MAX_RW_COUNT) + } + var ( total int64 n int64 diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index d0c431234..ae3017608 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -2026,6 +2026,8 @@ cc_binary( "//test/util:file_descriptor", "@com_google_absl//absl/strings", gtest, + ":ip_socket_test_util", + ":unix_domain_socket_test_util", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", diff --git a/test/syscalls/linux/sendfile_socket.cc b/test/syscalls/linux/sendfile_socket.cc index e94672679..c101fe9d2 100644 --- a/test/syscalls/linux/sendfile_socket.cc +++ b/test/syscalls/linux/sendfile_socket.cc @@ -23,6 +23,7 @@ #include "gtest/gtest.h" #include "absl/strings/string_view.h" +#include "test/syscalls/linux/ip_socket_test_util.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/util/file_descriptor.h" #include "test/util/temp_path.h" @@ -35,61 +36,39 @@ namespace { class SendFileTest : public ::testing::TestWithParam { protected: - PosixErrorOr> Sockets() { + PosixErrorOr> Sockets(int type) { // Bind a server socket. int family = GetParam(); - struct sockaddr server_addr = {}; switch (family) { case AF_INET: { - struct sockaddr_in* server_addr_in = - reinterpret_cast(&server_addr); - server_addr_in->sin_family = family; - server_addr_in->sin_addr.s_addr = INADDR_ANY; - break; + if (type == SOCK_STREAM) { + return SocketPairKind{ + "TCP", AF_INET, type, 0, + TCPAcceptBindSocketPairCreator(AF_INET, type, 0, false)} + .Create(); + } else { + return SocketPairKind{ + "UDP", AF_INET, type, 0, + UDPBidirectionalBindSocketPairCreator(AF_INET, type, 0, false)} + .Create(); + } } case AF_UNIX: { - struct sockaddr_un* server_addr_un = - reinterpret_cast(&server_addr); - server_addr_un->sun_family = family; - server_addr_un->sun_path[0] = '\0'; - break; + if (type == SOCK_STREAM) { + return SocketPairKind{ + "UNIX", AF_UNIX, type, 0, + FilesystemAcceptBindSocketPairCreator(AF_UNIX, type, 0)} + .Create(); + } else { + return SocketPairKind{ + "UNIX", AF_UNIX, type, 0, + FilesystemBidirectionalBindSocketPairCreator(AF_UNIX, type, 0)} + .Create(); + } } default: return PosixError(EINVAL); } - int server = socket(family, SOCK_STREAM, 0); - if (bind(server, &server_addr, sizeof(server_addr)) < 0) { - return PosixError(errno); - } - if (listen(server, 1) < 0) { - close(server); - return PosixError(errno); - } - - // Fetch the address; both are anonymous. - socklen_t length = sizeof(server_addr); - if (getsockname(server, &server_addr, &length) < 0) { - close(server); - return PosixError(errno); - } - - // Connect the client. - int client = socket(family, SOCK_STREAM, 0); - if (connect(client, &server_addr, length) < 0) { - close(server); - close(client); - return PosixError(errno); - } - - // Accept on the server. - int server_client = accept(server, nullptr, 0); - if (server_client < 0) { - close(server); - close(client); - return PosixError(errno); - } - close(server); - return std::make_tuple(client, server_client); } }; @@ -106,9 +85,7 @@ TEST_P(SendFileTest, SendMultiple) { const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); // Create sockets. - std::tuple fds = ASSERT_NO_ERRNO_AND_VALUE(Sockets()); - const FileDescriptor server(std::get<0>(fds)); - FileDescriptor client(std::get<1>(fds)); // non-const, reset is used. + auto socks = ASSERT_NO_ERRNO_AND_VALUE(Sockets(SOCK_STREAM)); // Thread that reads data from socket and dumps to a file. ScopedThread th([&] { @@ -118,7 +95,7 @@ TEST_P(SendFileTest, SendMultiple) { // Read until socket is closed. char buf[10240]; for (int cnt = 0;; cnt++) { - int r = RetryEINTR(read)(server.get(), buf, sizeof(buf)); + int r = RetryEINTR(read)(socks->first_fd(), buf, sizeof(buf)); // We cannot afford to save on every read() call. if (cnt % 1000 == 0) { ASSERT_THAT(r, SyscallSucceeds()); @@ -152,7 +129,7 @@ TEST_P(SendFileTest, SendMultiple) { << ", remain=" << remain << std::endl; // Send data and verify that sendfile returns the correct value. - int res = sendfile(client.get(), inf.get(), nullptr, remain); + int res = sendfile(socks->second_fd(), inf.get(), nullptr, remain); // We cannot afford to save on every sendfile() call. if (cnt % 120 == 0) { MaybeSave(); @@ -169,7 +146,7 @@ TEST_P(SendFileTest, SendMultiple) { } // Close socket to stop thread. - client.reset(); + close(socks->release_second_fd()); th.Join(); // Verify that the output file has the correct data. @@ -183,9 +160,7 @@ TEST_P(SendFileTest, SendMultiple) { TEST_P(SendFileTest, Shutdown) { // Create a socket. - std::tuple fds = ASSERT_NO_ERRNO_AND_VALUE(Sockets()); - const FileDescriptor client(std::get<0>(fds)); - FileDescriptor server(std::get<1>(fds)); // non-const, reset below. + auto socks = ASSERT_NO_ERRNO_AND_VALUE(Sockets(SOCK_STREAM)); // If this is a TCP socket, then turn off linger. if (GetParam() == AF_INET) { @@ -193,7 +168,7 @@ TEST_P(SendFileTest, Shutdown) { sl.l_onoff = 1; sl.l_linger = 0; ASSERT_THAT( - setsockopt(server.get(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)), + setsockopt(socks->first_fd(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)), SyscallSucceeds()); } @@ -212,12 +187,12 @@ TEST_P(SendFileTest, Shutdown) { ScopedThread t([&]() { size_t done = 0; while (done < data.size()) { - int n = RetryEINTR(read)(server.get(), data.data(), data.size()); + int n = RetryEINTR(read)(socks->first_fd(), data.data(), data.size()); ASSERT_THAT(n, SyscallSucceeds()); done += n; } // Close the server side socket. - server.reset(); + close(socks->release_first_fd()); }); // Continuously stream from the file to the socket. Note we do not assert @@ -225,7 +200,7 @@ TEST_P(SendFileTest, Shutdown) { // data is written. Eventually, we should get a connection reset error. while (1) { off_t offset = 0; // Always read from the start. - int n = sendfile(client.get(), inf.get(), &offset, data.size()); + int n = sendfile(socks->second_fd(), inf.get(), &offset, data.size()); EXPECT_THAT(n, AnyOf(SyscallFailsWithErrno(ECONNRESET), SyscallFailsWithErrno(EPIPE), SyscallSucceeds())); if (n <= 0) { @@ -234,6 +209,20 @@ TEST_P(SendFileTest, Shutdown) { } } +TEST_P(SendFileTest, SendpageFromEmptyFileToUDP) { + auto socks = ASSERT_NO_ERRNO_AND_VALUE(Sockets(SOCK_DGRAM)); + + TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + + // The value to the count argument has to be so that it is impossible to + // allocate a buffer of this size. In Linux, sendfile transfer at most + // 0x7ffff000 (MAX_RW_COUNT) bytes. + EXPECT_THAT(sendfile(socks->first_fd(), fd.get(), 0x0, 0x8000000000004), + SyscallSucceedsWithValue(0)); +} + INSTANTIATE_TEST_SUITE_P(AddressFamily, SendFileTest, ::testing::Values(AF_UNIX, AF_INET)); -- cgit v1.2.3 From 21e438d257861eadc1dafcee914e4a51cffd3852 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Wed, 8 Apr 2020 23:28:52 -0700 Subject: Dereference pointers in Layer's Stringer impl Dereference any fields which are pointers before string formatting so that the value pointed to ends up in the string representation. Tested: Added TestLayerStringFormat to //third_party/gvisor/test/packetimpact/testbench:testbench_test PiperOrigin-RevId: 305627821 --- test/packetimpact/testbench/BUILD | 1 + test/packetimpact/testbench/layers.go | 2 +- test/packetimpact/testbench/layers_test.go | 95 ++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/test/packetimpact/testbench/BUILD b/test/packetimpact/testbench/BUILD index 199823419..838a10ffe 100644 --- a/test/packetimpact/testbench/BUILD +++ b/test/packetimpact/testbench/BUILD @@ -36,4 +36,5 @@ go_test( size = "small", srcs = ["layers_test.go"], library = ":testbench", + deps = ["//pkg/tcpip"], ) diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go index 4d6625941..093a46e23 100644 --- a/test/packetimpact/testbench/layers.go +++ b/test/packetimpact/testbench/layers.go @@ -118,7 +118,7 @@ func stringLayer(l Layer) string { if v.IsNil() { continue } - ret = append(ret, fmt.Sprintf("%s:%v", t.Name, v)) + ret = append(ret, fmt.Sprintf("%s:%v", t.Name, reflect.Indirect(v))) } return fmt.Sprintf("&%s{%s}", t, strings.Join(ret, " ")) } diff --git a/test/packetimpact/testbench/layers_test.go b/test/packetimpact/testbench/layers_test.go index b39839625..8ffc26bf9 100644 --- a/test/packetimpact/testbench/layers_test.go +++ b/test/packetimpact/testbench/layers_test.go @@ -16,6 +16,8 @@ package testbench import "testing" +import "gvisor.dev/gvisor/pkg/tcpip" + func TestLayerMatch(t *testing.T) { var nilPayload *Payload noPayload := &Payload{} @@ -47,3 +49,96 @@ func TestLayerMatch(t *testing.T) { } } } + +func TestLayerStringFormat(t *testing.T) { + for _, tt := range []struct { + name string + l Layer + want string + }{ + { + name: "TCP", + l: &TCP{ + SrcPort: Uint16(34785), + DstPort: Uint16(47767), + SeqNum: Uint32(3452155723), + AckNum: Uint32(2596996163), + DataOffset: Uint8(5), + Flags: Uint8(20), + WindowSize: Uint16(64240), + Checksum: Uint16(0x2e2b), + }, + want: "&testbench.TCP{" + + "SrcPort:34785 " + + "DstPort:47767 " + + "SeqNum:3452155723 " + + "AckNum:2596996163 " + + "DataOffset:5 " + + "Flags:20 " + + "WindowSize:64240 " + + "Checksum:11819" + + "}", + }, + { + name: "UDP", + l: &UDP{ + SrcPort: Uint16(34785), + DstPort: Uint16(47767), + Length: Uint16(12), + }, + want: "&testbench.UDP{" + + "SrcPort:34785 " + + "DstPort:47767 " + + "Length:12" + + "}", + }, + { + name: "IPv4", + l: &IPv4{ + IHL: Uint8(5), + TOS: Uint8(0), + TotalLength: Uint16(44), + ID: Uint16(0), + Flags: Uint8(2), + FragmentOffset: Uint16(0), + TTL: Uint8(64), + Protocol: Uint8(6), + Checksum: Uint16(0x2e2b), + SrcAddr: Address(tcpip.Address([]byte{197, 34, 63, 10})), + DstAddr: Address(tcpip.Address([]byte{197, 34, 63, 20})), + }, + want: "&testbench.IPv4{" + + "IHL:5 " + + "TOS:0 " + + "TotalLength:44 " + + "ID:0 " + + "Flags:2 " + + "FragmentOffset:0 " + + "TTL:64 " + + "Protocol:6 " + + "Checksum:11819 " + + "SrcAddr:197.34.63.10 " + + "DstAddr:197.34.63.20" + + "}", + }, + { + name: "Ether", + l: &Ether{ + SrcAddr: LinkAddress(tcpip.LinkAddress([]byte{0x02, 0x42, 0xc5, 0x22, 0x3f, 0x0a})), + DstAddr: LinkAddress(tcpip.LinkAddress([]byte{0x02, 0x42, 0xc5, 0x22, 0x3f, 0x14})), + Type: NetworkProtocolNumber(4), + }, + want: "&testbench.Ether{" + + "SrcAddr:02:42:c5:22:3f:0a " + + "DstAddr:02:42:c5:22:3f:14 " + + "Type:4" + + "}", + }, + } { + t.Run(tt.name, func(t *testing.T) { + if got := tt.l.String(); got != tt.want { + t.Errorf("%s.String() = %s, want: %s", tt.name, got, tt.want) + } + }) + } +} -- cgit v1.2.3 From 64c2b490671852aaa024a4bb4757eef309fadf18 Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Thu, 9 Apr 2020 13:33:18 -0700 Subject: Dedup netlink utility functions in tests. PiperOrigin-RevId: 305749697 --- test/syscalls/linux/BUILD | 3 +- test/syscalls/linux/socket_netlink_route.cc | 75 ++++-------------------- test/syscalls/linux/socket_netlink_route_util.cc | 7 +-- test/syscalls/linux/socket_netlink_route_util.h | 4 +- test/syscalls/linux/tuntap.cc | 19 +++--- 5 files changed, 24 insertions(+), 84 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index ae3017608..96ca39583 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -138,7 +138,6 @@ cc_library( hdrs = ["socket_netlink_route_util.h"], deps = [ ":socket_netlink_util", - "@com_google_absl//absl/types:optional", ], ) @@ -2804,13 +2803,13 @@ cc_binary( srcs = ["socket_netlink_route.cc"], linkstatic = 1, deps = [ + ":socket_netlink_route_util", ":socket_netlink_util", ":socket_test_util", "//test/util:capability_util", "//test/util:cleanup", "//test/util:file_descriptor", "@com_google_absl//absl/strings:str_format", - "@com_google_absl//absl/types:optional", gtest, "//test/util:test_main", "//test/util:test_util", diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc index 2efb96bc3..fbe61c5a0 100644 --- a/test/syscalls/linux/socket_netlink_route.cc +++ b/test/syscalls/linux/socket_netlink_route.cc @@ -26,7 +26,7 @@ #include "gtest/gtest.h" #include "absl/strings/str_format.h" -#include "absl/types/optional.h" +#include "test/syscalls/linux/socket_netlink_route_util.h" #include "test/syscalls/linux/socket_netlink_util.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/util/capability_util.h" @@ -118,24 +118,6 @@ void CheckGetLinkResponse(const struct nlmsghdr* hdr, int seq, int port) { // TODO(mpratt): Check ifinfomsg contents and following attrs. } -PosixError DumpLinks( - const FileDescriptor& fd, uint32_t seq, - const std::function& fn) { - struct request { - struct nlmsghdr hdr; - struct ifinfomsg ifm; - }; - - struct request req = {}; - req.hdr.nlmsg_len = sizeof(req); - req.hdr.nlmsg_type = RTM_GETLINK; - req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; - req.hdr.nlmsg_seq = seq; - req.ifm.ifi_family = AF_UNSPEC; - - return NetlinkRequestResponse(fd, &req, sizeof(req), fn, false); -} - TEST(NetlinkRouteTest, GetLinkDump) { FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); @@ -161,37 +143,6 @@ TEST(NetlinkRouteTest, GetLinkDump) { EXPECT_TRUE(loopbackFound); } -struct Link { - int index; - std::string name; -}; - -PosixErrorOr> FindLoopbackLink() { - ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, NetlinkBoundSocket(NETLINK_ROUTE)); - - absl::optional link; - RETURN_IF_ERRNO(DumpLinks(fd, kSeq, [&](const struct nlmsghdr* hdr) { - if (hdr->nlmsg_type != RTM_NEWLINK || - hdr->nlmsg_len < NLMSG_SPACE(sizeof(struct ifinfomsg))) { - return; - } - const struct ifinfomsg* msg = - reinterpret_cast(NLMSG_DATA(hdr)); - if (msg->ifi_type == ARPHRD_LOOPBACK) { - const auto* rta = FindRtAttr(hdr, msg, IFLA_IFNAME); - if (rta == nullptr) { - // Ignore links that do not have a name. - return; - } - - link = Link(); - link->index = msg->ifi_index; - link->name = std::string(reinterpret_cast(RTA_DATA(rta))); - } - })); - return link; -} - // CheckLinkMsg checks a netlink message against an expected link. void CheckLinkMsg(const struct nlmsghdr* hdr, const Link& link) { ASSERT_THAT(hdr->nlmsg_type, Eq(RTM_NEWLINK)); @@ -209,9 +160,7 @@ void CheckLinkMsg(const struct nlmsghdr* hdr, const Link& link) { } TEST(NetlinkRouteTest, GetLinkByIndex) { - absl::optional loopback_link = - ASSERT_NO_ERRNO_AND_VALUE(FindLoopbackLink()); - ASSERT_TRUE(loopback_link.has_value()); + Link loopback_link = ASSERT_NO_ERRNO_AND_VALUE(LoopbackLink()); FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); @@ -227,13 +176,13 @@ TEST(NetlinkRouteTest, GetLinkByIndex) { req.hdr.nlmsg_flags = NLM_F_REQUEST; req.hdr.nlmsg_seq = kSeq; req.ifm.ifi_family = AF_UNSPEC; - req.ifm.ifi_index = loopback_link->index; + req.ifm.ifi_index = loopback_link.index; bool found = false; ASSERT_NO_ERRNO(NetlinkRequestResponse( fd, &req, sizeof(req), [&](const struct nlmsghdr* hdr) { - CheckLinkMsg(hdr, *loopback_link); + CheckLinkMsg(hdr, loopback_link); found = true; }, false)); @@ -241,9 +190,7 @@ TEST(NetlinkRouteTest, GetLinkByIndex) { } TEST(NetlinkRouteTest, GetLinkByName) { - absl::optional loopback_link = - ASSERT_NO_ERRNO_AND_VALUE(FindLoopbackLink()); - ASSERT_TRUE(loopback_link.has_value()); + Link loopback_link = ASSERT_NO_ERRNO_AND_VALUE(LoopbackLink()); FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); @@ -262,8 +209,8 @@ TEST(NetlinkRouteTest, GetLinkByName) { req.hdr.nlmsg_seq = kSeq; req.ifm.ifi_family = AF_UNSPEC; req.rtattr.rta_type = IFLA_IFNAME; - req.rtattr.rta_len = RTA_LENGTH(loopback_link->name.size() + 1); - strncpy(req.ifname, loopback_link->name.c_str(), sizeof(req.ifname)); + req.rtattr.rta_len = RTA_LENGTH(loopback_link.name.size() + 1); + strncpy(req.ifname, loopback_link.name.c_str(), sizeof(req.ifname)); req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifm)) + NLMSG_ALIGN(req.rtattr.rta_len); @@ -271,7 +218,7 @@ TEST(NetlinkRouteTest, GetLinkByName) { ASSERT_NO_ERRNO(NetlinkRequestResponse( fd, &req, sizeof(req), [&](const struct nlmsghdr* hdr) { - CheckLinkMsg(hdr, *loopback_link); + CheckLinkMsg(hdr, loopback_link); found = true; }, false)); @@ -523,9 +470,7 @@ TEST(NetlinkRouteTest, LookupAll) { TEST(NetlinkRouteTest, AddAddr) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); - absl::optional loopback_link = - ASSERT_NO_ERRNO_AND_VALUE(FindLoopbackLink()); - ASSERT_TRUE(loopback_link.has_value()); + Link loopback_link = ASSERT_NO_ERRNO_AND_VALUE(LoopbackLink()); FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); @@ -545,7 +490,7 @@ TEST(NetlinkRouteTest, AddAddr) { req.ifa.ifa_prefixlen = 24; req.ifa.ifa_flags = 0; req.ifa.ifa_scope = 0; - req.ifa.ifa_index = loopback_link->index; + req.ifa.ifa_index = loopback_link.index; req.rtattr.rta_type = IFA_LOCAL; req.rtattr.rta_len = RTA_LENGTH(sizeof(req.addr)); inet_pton(AF_INET, "10.0.0.1", &req.addr); diff --git a/test/syscalls/linux/socket_netlink_route_util.cc b/test/syscalls/linux/socket_netlink_route_util.cc index 53eb3b6b2..bde1dbb4d 100644 --- a/test/syscalls/linux/socket_netlink_route_util.cc +++ b/test/syscalls/linux/socket_netlink_route_util.cc @@ -18,7 +18,6 @@ #include #include -#include "absl/types/optional.h" #include "test/syscalls/linux/socket_netlink_util.h" namespace gvisor { @@ -73,14 +72,14 @@ PosixErrorOr> DumpLinks() { return links; } -PosixErrorOr> FindLoopbackLink() { +PosixErrorOr LoopbackLink() { ASSIGN_OR_RETURN_ERRNO(auto links, DumpLinks()); for (const auto& link : links) { if (link.type == ARPHRD_LOOPBACK) { - return absl::optional(link); + return link; } } - return absl::optional(); + return PosixError(ENOENT, "loopback link not found"); } PosixError LinkAddLocalAddr(int index, int family, int prefixlen, diff --git a/test/syscalls/linux/socket_netlink_route_util.h b/test/syscalls/linux/socket_netlink_route_util.h index 2c018e487..149c4a7f6 100644 --- a/test/syscalls/linux/socket_netlink_route_util.h +++ b/test/syscalls/linux/socket_netlink_route_util.h @@ -20,7 +20,6 @@ #include -#include "absl/types/optional.h" #include "test/syscalls/linux/socket_netlink_util.h" namespace gvisor { @@ -37,7 +36,8 @@ PosixError DumpLinks(const FileDescriptor& fd, uint32_t seq, PosixErrorOr> DumpLinks(); -PosixErrorOr> FindLoopbackLink(); +// Returns the loopback link on the system. ENOENT if not found. +PosixErrorOr LoopbackLink(); // LinkAddLocalAddr sets IFA_LOCAL attribute on the interface. PosixError LinkAddLocalAddr(int index, int family, int prefixlen, diff --git a/test/syscalls/linux/tuntap.cc b/test/syscalls/linux/tuntap.cc index 3a8ba37eb..6195b11e1 100644 --- a/test/syscalls/linux/tuntap.cc +++ b/test/syscalls/linux/tuntap.cc @@ -56,14 +56,14 @@ PosixErrorOr> DumpLinkNames() { return names; } -PosixErrorOr> GetLinkByName(const std::string& name) { +PosixErrorOr GetLinkByName(const std::string& name) { ASSIGN_OR_RETURN_ERRNO(auto links, DumpLinks()); for (const auto& link : links) { if (link.name == name) { - return absl::optional(link); + return link; } } - return absl::optional(); + return PosixError(ENOENT, "interface not found"); } struct pihdr { @@ -268,24 +268,21 @@ PosixErrorOr OpenAndAttachTap( return PosixError(errno); } - ASSIGN_OR_RETURN_ERRNO(absl::optional link, GetLinkByName(dev_name)); - if (!link.has_value()) { - return PosixError(ENOENT, "no link"); - } + ASSIGN_OR_RETURN_ERRNO(auto link, GetLinkByName(dev_name)); // Interface setup. struct in_addr addr; inet_pton(AF_INET, dev_ipv4_addr.c_str(), &addr); - EXPECT_NO_ERRNO(LinkAddLocalAddr(link->index, AF_INET, /*prefixlen=*/24, - &addr, sizeof(addr))); + EXPECT_NO_ERRNO(LinkAddLocalAddr(link.index, AF_INET, /*prefixlen=*/24, &addr, + sizeof(addr))); if (!IsRunningOnGvisor()) { // FIXME(b/110961832): gVisor doesn't support setting MAC address on // interfaces yet. - RETURN_IF_ERRNO(LinkSetMacAddr(link->index, kMacA, sizeof(kMacA))); + RETURN_IF_ERRNO(LinkSetMacAddr(link.index, kMacA, sizeof(kMacA))); // FIXME(b/110961832): gVisor always creates enabled/up'd interfaces. - RETURN_IF_ERRNO(LinkChangeFlags(link->index, IFF_UP, IFF_UP)); + RETURN_IF_ERRNO(LinkChangeFlags(link.index, IFF_UP, IFF_UP)); } return fd; -- cgit v1.2.3 From ace90f823cf33d1c1180dcd0d2061c702270a0d6 Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Thu, 9 Apr 2020 16:21:02 -0700 Subject: Make some functions in IfAddrHelper const. PiperOrigin-RevId: 305782490 --- test/syscalls/linux/ip_socket_test_util.cc | 10 ++--- test/syscalls/linux/ip_socket_test_util.h | 6 +-- .../socket_ipv4_udp_unbound_external_networking.cc | 49 ++++++++++------------ .../socket_ipv4_udp_unbound_external_networking.h | 6 +-- 4 files changed, 32 insertions(+), 39 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/ip_socket_test_util.cc b/test/syscalls/linux/ip_socket_test_util.cc index bba022a41..d28dc0db6 100644 --- a/test/syscalls/linux/ip_socket_test_util.cc +++ b/test/syscalls/linux/ip_socket_test_util.cc @@ -177,17 +177,17 @@ SocketKind IPv6TCPUnboundSocket(int type) { PosixError IfAddrHelper::Load() { Release(); RETURN_ERROR_IF_SYSCALL_FAIL(getifaddrs(&ifaddr_)); - return PosixError(0); + return NoError(); } void IfAddrHelper::Release() { if (ifaddr_) { freeifaddrs(ifaddr_); + ifaddr_ = nullptr; } - ifaddr_ = nullptr; } -std::vector IfAddrHelper::InterfaceList(int family) { +std::vector IfAddrHelper::InterfaceList(int family) const { std::vector names; for (auto ifa = ifaddr_; ifa != NULL; ifa = ifa->ifa_next) { if (ifa->ifa_addr == NULL || ifa->ifa_addr->sa_family != family) { @@ -198,7 +198,7 @@ std::vector IfAddrHelper::InterfaceList(int family) { return names; } -sockaddr* IfAddrHelper::GetAddr(int family, std::string name) { +const sockaddr* IfAddrHelper::GetAddr(int family, std::string name) const { for (auto ifa = ifaddr_; ifa != NULL; ifa = ifa->ifa_next) { if (ifa->ifa_addr == NULL || ifa->ifa_addr->sa_family != family) { continue; @@ -210,7 +210,7 @@ sockaddr* IfAddrHelper::GetAddr(int family, std::string name) { return nullptr; } -PosixErrorOr IfAddrHelper::GetIndex(std::string name) { +PosixErrorOr IfAddrHelper::GetIndex(std::string name) const { return InterfaceIndex(name); } diff --git a/test/syscalls/linux/ip_socket_test_util.h b/test/syscalls/linux/ip_socket_test_util.h index 39fd6709d..9c3859fcd 100644 --- a/test/syscalls/linux/ip_socket_test_util.h +++ b/test/syscalls/linux/ip_socket_test_util.h @@ -110,10 +110,10 @@ class IfAddrHelper { PosixError Load(); void Release(); - std::vector InterfaceList(int family); + std::vector InterfaceList(int family) const; - struct sockaddr* GetAddr(int family, std::string name); - PosixErrorOr GetIndex(std::string name); + const sockaddr* GetAddr(int family, std::string name) const; + PosixErrorOr GetIndex(std::string name) const; private: struct ifaddrs* ifaddr_; diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.cc index 40e673625..d690d9564 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.cc @@ -45,37 +45,31 @@ void IPv4UDPUnboundExternalNetworkingSocketTest::SetUp() { got_if_infos_ = false; // Get interface list. - std::vector if_names; ASSERT_NO_ERRNO(if_helper_.Load()); - if_names = if_helper_.InterfaceList(AF_INET); + std::vector if_names = if_helper_.InterfaceList(AF_INET); if (if_names.size() != 2) { return; } // Figure out which interface is where. - int lo = 0, eth = 1; - if (if_names[lo] != "lo") { - lo = 1; - eth = 0; - } - - if (if_names[lo] != "lo") { - return; - } - - lo_if_idx_ = ASSERT_NO_ERRNO_AND_VALUE(if_helper_.GetIndex(if_names[lo])); - lo_if_addr_ = if_helper_.GetAddr(AF_INET, if_names[lo]); - if (lo_if_addr_ == nullptr) { + std::string lo = if_names[0]; + std::string eth = if_names[1]; + if (lo != "lo") std::swap(lo, eth); + if (lo != "lo") return; + + lo_if_idx_ = ASSERT_NO_ERRNO_AND_VALUE(if_helper_.GetIndex(lo)); + auto lo_if_addr = if_helper_.GetAddr(AF_INET, lo); + if (lo_if_addr == nullptr) { return; } - lo_if_sin_addr_ = reinterpret_cast(lo_if_addr_)->sin_addr; + lo_if_addr_ = *reinterpret_cast(lo_if_addr); - eth_if_idx_ = ASSERT_NO_ERRNO_AND_VALUE(if_helper_.GetIndex(if_names[eth])); - eth_if_addr_ = if_helper_.GetAddr(AF_INET, if_names[eth]); - if (eth_if_addr_ == nullptr) { + eth_if_idx_ = ASSERT_NO_ERRNO_AND_VALUE(if_helper_.GetIndex(eth)); + auto eth_if_addr = if_helper_.GetAddr(AF_INET, eth); + if (eth_if_addr == nullptr) { return; } - eth_if_sin_addr_ = reinterpret_cast(eth_if_addr_)->sin_addr; + eth_if_addr_ = *reinterpret_cast(eth_if_addr); got_if_infos_ = true; } @@ -242,7 +236,7 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, // Bind the non-receiving socket to the unicast ethernet address. auto norecv_addr = rcv1_addr; reinterpret_cast(&norecv_addr.addr)->sin_addr = - eth_if_sin_addr_; + eth_if_addr_.sin_addr; ASSERT_THAT(bind(norcv->get(), reinterpret_cast(&norecv_addr.addr), norecv_addr.addr_len), SyscallSucceedsWithValue(0)); @@ -1028,7 +1022,7 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); ip_mreqn iface = {}; iface.imr_ifindex = lo_if_idx_; - iface.imr_address = eth_if_sin_addr_; + iface.imr_address = eth_if_addr_.sin_addr; ASSERT_THAT(setsockopt(sender->get(), IPPROTO_IP, IP_MULTICAST_IF, &iface, sizeof(iface)), SyscallSucceeds()); @@ -1058,7 +1052,7 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, SKIP_IF(IsRunningOnGvisor()); // Verify the received source address. - EXPECT_EQ(eth_if_sin_addr_.s_addr, src_addr_in->sin_addr.s_addr); + EXPECT_EQ(eth_if_addr_.sin_addr.s_addr, src_addr_in->sin_addr.s_addr); } // Check that when we are bound to one interface we can set IP_MULTICAST_IF to @@ -1075,7 +1069,8 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, // Create sender and bind to eth interface. auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); - ASSERT_THAT(bind(sender->get(), eth_if_addr_, sizeof(sockaddr_in)), + ASSERT_THAT(bind(sender->get(), reinterpret_cast(ð_if_addr_), + sizeof(eth_if_addr_)), SyscallSucceeds()); // Run through all possible combinations of index and address for @@ -1085,9 +1080,9 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, struct in_addr imr_address; } test_data[] = { {lo_if_idx_, {}}, - {0, lo_if_sin_addr_}, - {lo_if_idx_, lo_if_sin_addr_}, - {lo_if_idx_, eth_if_sin_addr_}, + {0, lo_if_addr_.sin_addr}, + {lo_if_idx_, lo_if_addr_.sin_addr}, + {lo_if_idx_, eth_if_addr_.sin_addr}, }; for (auto t : test_data) { ip_mreqn iface = {}; diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h index bec2e96ee..10b90b1e0 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h +++ b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h @@ -36,10 +36,8 @@ class IPv4UDPUnboundExternalNetworkingSocketTest : public SimpleSocketTest { // Interface infos. int lo_if_idx_; int eth_if_idx_; - sockaddr* lo_if_addr_; - sockaddr* eth_if_addr_; - in_addr lo_if_sin_addr_; - in_addr eth_if_sin_addr_; + sockaddr_in lo_if_addr_; + sockaddr_in eth_if_addr_; }; } // namespace testing -- cgit v1.2.3 From 9f87502b4619b60779ce19c41ea0e6bd6582e8e4 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Thu, 9 Apr 2020 16:40:12 -0700 Subject: Remove TODOs from Async IO Block and drain requests in io_destroy(2). Note the reason to create read-only mapping. PiperOrigin-RevId: 305786312 --- pkg/sentry/mm/aio_context.go | 101 ++++++++++++++++++++++++----------- pkg/sentry/mm/aio_context_state.go | 2 +- pkg/sentry/syscalls/linux/sys_aio.go | 34 +++++++++--- test/syscalls/linux/aio.cc | 12 +++-- 4 files changed, 107 insertions(+), 42 deletions(-) (limited to 'test') diff --git a/pkg/sentry/mm/aio_context.go b/pkg/sentry/mm/aio_context.go index cb29d94b0..379148903 100644 --- a/pkg/sentry/mm/aio_context.go +++ b/pkg/sentry/mm/aio_context.go @@ -59,25 +59,27 @@ func (a *aioManager) newAIOContext(events uint32, id uint64) bool { } a.contexts[id] = &AIOContext{ - done: make(chan struct{}, 1), + requestReady: make(chan struct{}, 1), maxOutstanding: events, } return true } -// destroyAIOContext destroys an asynchronous I/O context. +// destroyAIOContext destroys an asynchronous I/O context. It doesn't wait for +// for pending requests to complete. Returns the destroyed AIOContext so it can +// be drained. // -// False is returned if the context does not exist. -func (a *aioManager) destroyAIOContext(id uint64) bool { +// Nil is returned if the context does not exist. +func (a *aioManager) destroyAIOContext(id uint64) *AIOContext { a.mu.Lock() defer a.mu.Unlock() ctx, ok := a.contexts[id] if !ok { - return false + return nil } delete(a.contexts, id) ctx.destroy() - return true + return ctx } // lookupAIOContext looks up the given context. @@ -102,8 +104,8 @@ type ioResult struct { // // +stateify savable type AIOContext struct { - // done is the notification channel used for all requests. - done chan struct{} `state:"nosave"` + // requestReady is the notification channel used for all requests. + requestReady chan struct{} `state:"nosave"` // mu protects below. mu sync.Mutex `state:"nosave"` @@ -129,8 +131,14 @@ func (ctx *AIOContext) destroy() { ctx.mu.Lock() defer ctx.mu.Unlock() ctx.dead = true - if ctx.outstanding == 0 { - close(ctx.done) + ctx.checkForDone() +} + +// Preconditions: ctx.mu must be held by caller. +func (ctx *AIOContext) checkForDone() { + if ctx.dead && ctx.outstanding == 0 { + close(ctx.requestReady) + ctx.requestReady = nil } } @@ -154,11 +162,12 @@ func (ctx *AIOContext) PopRequest() (interface{}, bool) { // Is there anything ready? if e := ctx.results.Front(); e != nil { - ctx.results.Remove(e) - ctx.outstanding-- - if ctx.outstanding == 0 && ctx.dead { - close(ctx.done) + if ctx.outstanding == 0 { + panic("AIOContext outstanding is going negative") } + ctx.outstanding-- + ctx.results.Remove(e) + ctx.checkForDone() return e.data, true } return nil, false @@ -172,26 +181,58 @@ func (ctx *AIOContext) FinishRequest(data interface{}) { // Push to the list and notify opportunistically. The channel notify // here is guaranteed to be safe because outstanding must be non-zero. - // The done channel is only closed when outstanding reaches zero. + // The requestReady channel is only closed when outstanding reaches zero. ctx.results.PushBack(&ioResult{data: data}) select { - case ctx.done <- struct{}{}: + case ctx.requestReady <- struct{}{}: default: } } // WaitChannel returns a channel that is notified when an AIO request is -// completed. -// -// The boolean return value indicates whether or not the context is active. -func (ctx *AIOContext) WaitChannel() (chan struct{}, bool) { +// completed. Returns nil if the context is destroyed and there are no more +// outstanding requests. +func (ctx *AIOContext) WaitChannel() chan struct{} { ctx.mu.Lock() defer ctx.mu.Unlock() - if ctx.outstanding == 0 && ctx.dead { - return nil, false + return ctx.requestReady +} + +// Dead returns true if the context has been destroyed. +func (ctx *AIOContext) Dead() bool { + ctx.mu.Lock() + defer ctx.mu.Unlock() + return ctx.dead +} + +// CancelPendingRequest forgets about a request that hasn't yet completed. +func (ctx *AIOContext) CancelPendingRequest() { + ctx.mu.Lock() + defer ctx.mu.Unlock() + + if ctx.outstanding == 0 { + panic("AIOContext outstanding is going negative") } - return ctx.done, true + ctx.outstanding-- + ctx.checkForDone() +} + +// Drain drops all completed requests. Pending requests remain untouched. +func (ctx *AIOContext) Drain() { + ctx.mu.Lock() + defer ctx.mu.Unlock() + + if ctx.outstanding == 0 { + return + } + size := uint32(ctx.results.Len()) + if ctx.outstanding < size { + panic("AIOContext outstanding is going negative") + } + ctx.outstanding -= size + ctx.results.Reset() + ctx.checkForDone() } // aioMappable implements memmap.MappingIdentity and memmap.Mappable for AIO @@ -332,9 +373,9 @@ func (mm *MemoryManager) NewAIOContext(ctx context.Context, events uint32) (uint Length: aioRingBufferSize, MappingIdentity: m, Mappable: m, - // TODO(fvoznika): Linux does "do_mmap_pgoff(..., PROT_READ | - // PROT_WRITE, ...)" in fs/aio.c:aio_setup_ring(); why do we make this - // mapping read-only? + // Linux uses "do_mmap_pgoff(..., PROT_READ | PROT_WRITE, ...)" in + // fs/aio.c:aio_setup_ring(). Since we don't implement AIO_RING_MAGIC, + // user mode should not write to this page. Perms: usermem.Read, MaxPerms: usermem.Read, }) @@ -349,11 +390,11 @@ func (mm *MemoryManager) NewAIOContext(ctx context.Context, events uint32) (uint return id, nil } -// DestroyAIOContext destroys an asynchronous I/O context. It returns false if -// the context does not exist. -func (mm *MemoryManager) DestroyAIOContext(ctx context.Context, id uint64) bool { +// DestroyAIOContext destroys an asynchronous I/O context. It returns the +// destroyed context. nil if the context does not exist. +func (mm *MemoryManager) DestroyAIOContext(ctx context.Context, id uint64) *AIOContext { if _, ok := mm.LookupAIOContext(ctx, id); !ok { - return false + return nil } // Only unmaps after it assured that the address is a valid aio context to diff --git a/pkg/sentry/mm/aio_context_state.go b/pkg/sentry/mm/aio_context_state.go index c37fc9f7b..3dabac1af 100644 --- a/pkg/sentry/mm/aio_context_state.go +++ b/pkg/sentry/mm/aio_context_state.go @@ -16,5 +16,5 @@ package mm // afterLoad is invoked by stateify. func (a *AIOContext) afterLoad() { - a.done = make(chan struct{}, 1) + a.requestReady = make(chan struct{}, 1) } diff --git a/pkg/sentry/syscalls/linux/sys_aio.go b/pkg/sentry/syscalls/linux/sys_aio.go index b401978db..38cbeba5a 100644 --- a/pkg/sentry/syscalls/linux/sys_aio.go +++ b/pkg/sentry/syscalls/linux/sys_aio.go @@ -114,14 +114,28 @@ func IoSetup(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca func IoDestroy(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { id := args[0].Uint64() - // Destroy the given context. - if !t.MemoryManager().DestroyAIOContext(t, id) { + ctx := t.MemoryManager().DestroyAIOContext(t, id) + if ctx == nil { // Does not exist. return 0, nil, syserror.EINVAL } - // FIXME(fvoznika): Linux blocks until all AIO to the destroyed context is - // done. - return 0, nil, nil + + // Drain completed requests amd wait for pending requests until there are no + // more. + for { + ctx.Drain() + + ch := ctx.WaitChannel() + if ch == nil { + // No more requests, we're done. + return 0, nil, nil + } + // The task cannot be interrupted during the wait. Equivalent to + // TASK_UNINTERRUPTIBLE in Linux. + t.UninterruptibleSleepStart(true /* deactivate */) + <-ch + t.UninterruptibleSleepFinish(true /* activate */) + } } // IoGetevents implements linux syscall io_getevents(2). @@ -200,13 +214,13 @@ func IoGetevents(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S func waitForRequest(ctx *mm.AIOContext, t *kernel.Task, haveDeadline bool, deadline ktime.Time) (interface{}, error) { for { if v, ok := ctx.PopRequest(); ok { - // Request was readly available. Just return it. + // Request was readily available. Just return it. return v, nil } // Need to wait for request completion. - done, active := ctx.WaitChannel() - if !active { + done := ctx.WaitChannel() + if done == nil { // Context has been destroyed. return nil, syserror.EINVAL } @@ -248,6 +262,10 @@ func memoryFor(t *kernel.Task, cb *ioCallback) (usermem.IOSequence, error) { } func performCallback(t *kernel.Task, file *fs.File, cbAddr usermem.Addr, cb *ioCallback, ioseq usermem.IOSequence, ctx *mm.AIOContext, eventFile *fs.File) { + if ctx.Dead() { + ctx.CancelPendingRequest() + return + } ev := &ioEvent{ Data: cb.Data, Obj: uint64(cbAddr), diff --git a/test/syscalls/linux/aio.cc b/test/syscalls/linux/aio.cc index a33daff17..806d5729e 100644 --- a/test/syscalls/linux/aio.cc +++ b/test/syscalls/linux/aio.cc @@ -89,6 +89,7 @@ class AIOTest : public FileTest { FileTest::TearDown(); if (ctx_ != 0) { ASSERT_THAT(DestroyContext(), SyscallSucceeds()); + ctx_ = 0; } } @@ -188,14 +189,19 @@ TEST_F(AIOTest, BadWrite) { } TEST_F(AIOTest, ExitWithPendingIo) { - // Setup a context that is 5 entries deep. - ASSERT_THAT(SetupContext(5), SyscallSucceeds()); + // Setup a context that is 100 entries deep. + ASSERT_THAT(SetupContext(100), SyscallSucceeds()); struct iocb cb = CreateCallback(); struct iocb* cbs[] = {&cb}; // Submit a request but don't complete it to make it pending. - EXPECT_THAT(Submit(1, cbs), SyscallSucceeds()); + for (int i = 0; i < 100; ++i) { + EXPECT_THAT(Submit(1, cbs), SyscallSucceeds()); + } + + ASSERT_THAT(DestroyContext(), SyscallSucceeds()); + ctx_ = 0; } int Submitter(void* arg) { -- cgit v1.2.3 From 35e6b6bf1aeb909a12fb80cc99d5695408a9eaa5 Mon Sep 17 00:00:00 2001 From: Haibo Xu Date: Tue, 17 Mar 2020 06:59:54 +0000 Subject: Enable syscall fork_test on arm64. Signed-off-by: Haibo Xu Change-Id: I033692bcf4f8139df29e369a12b150d10fccbe32 --- test/syscalls/linux/fork.cc | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/fork.cc b/test/syscalls/linux/fork.cc index ff8bdfeb0..853f6231a 100644 --- a/test/syscalls/linux/fork.cc +++ b/test/syscalls/linux/fork.cc @@ -431,7 +431,6 @@ TEST(CloneTest, NewUserNamespacePermitsAllOtherNamespaces) { << "status = " << status; } -#ifdef __x86_64__ // Clone with CLONE_SETTLS and a non-canonical TLS address is rejected. TEST(CloneTest, NonCanonicalTLS) { constexpr uintptr_t kNonCanonical = 1ull << 48; @@ -440,11 +439,25 @@ TEST(CloneTest, NonCanonicalTLS) { // on this. char stack; + // The raw system call interface on x86-64 is: + // long clone(unsigned long flags, void *stack, + // int *parent_tid, int *child_tid, + // unsigned long tls); + // + // While on arm64, the order of the last two arguments is reversed: + // long clone(unsigned long flags, void *stack, + // int *parent_tid, unsigned long tls, + // int *child_tid); +#if defined(__x86_64__) EXPECT_THAT(syscall(__NR_clone, SIGCHLD | CLONE_SETTLS, &stack, nullptr, nullptr, kNonCanonical), SyscallFailsWithErrno(EPERM)); -} +#elif defined(__aarch64__) + EXPECT_THAT(syscall(__NR_clone, SIGCHLD | CLONE_SETTLS, &stack, nullptr, + kNonCanonical, nullptr), + SyscallFailsWithErrno(EPERM)); #endif +} } // namespace } // namespace testing -- cgit v1.2.3 From 7aa5caae71c29b0be9047a7c156a9daaa435ebb8 Mon Sep 17 00:00:00 2001 From: Haibo Xu Date: Wed, 11 Mar 2020 03:21:34 +0000 Subject: Enable syscall ptrace test on arm64. Signed-off-by: Haibo Xu Change-Id: I5bb8fa7d580d173b1438d6465e1adb442216c8fa --- pkg/sentry/arch/arch.go | 3 +++ pkg/sentry/arch/syscalls_amd64.go | 7 +++++++ pkg/sentry/arch/syscalls_arm64.go | 13 ++++++++++++- pkg/sentry/kernel/task_syscall.go | 14 ++++++++++++++ test/syscalls/linux/ptrace.cc | 31 ++++++++++++++++++++++++------- 5 files changed, 60 insertions(+), 8 deletions(-) (limited to 'test') diff --git a/pkg/sentry/arch/arch.go b/pkg/sentry/arch/arch.go index 1d11cc472..a903d031c 100644 --- a/pkg/sentry/arch/arch.go +++ b/pkg/sentry/arch/arch.go @@ -88,6 +88,9 @@ type Context interface { // SyscallNo returns the syscall number. SyscallNo() uintptr + // SyscallSaveOrig save orignal register value. + SyscallSaveOrig() + // SyscallArgs returns the syscall arguments in an array. SyscallArgs() SyscallArguments diff --git a/pkg/sentry/arch/syscalls_amd64.go b/pkg/sentry/arch/syscalls_amd64.go index 8b4f23007..3859f41ee 100644 --- a/pkg/sentry/arch/syscalls_amd64.go +++ b/pkg/sentry/arch/syscalls_amd64.go @@ -18,6 +18,13 @@ package arch const restartSyscallNr = uintptr(219) +// SyscallSaveOrig save the value of the register which is clobbered in +// syscall handler(doSyscall()). +// +// Noop on x86. +func (c *context64) SyscallSaveOrig() { +} + // SyscallNo returns the syscall number according to the 64-bit convention. func (c *context64) SyscallNo() uintptr { return uintptr(c.Regs.Orig_rax) diff --git a/pkg/sentry/arch/syscalls_arm64.go b/pkg/sentry/arch/syscalls_arm64.go index dc13b6124..92d062513 100644 --- a/pkg/sentry/arch/syscalls_arm64.go +++ b/pkg/sentry/arch/syscalls_arm64.go @@ -18,6 +18,17 @@ package arch const restartSyscallNr = uintptr(128) +// SyscallSaveOrig save the value of the register R0 which is clobbered in +// syscall handler(doSyscall()). +// +// In linux, at the entry of the syscall handler(el0_svc_common()), value of R0 +// is saved to the pt_regs.orig_x0 in kernel code. But currently, the orig_x0 +// was not accessible to the user space application, so we have to do the same +// operation in the sentry code to save the R0 value into the App context. +func (c *context64) SyscallSaveOrig() { + c.OrigR0 = c.Regs.Regs[0] +} + // SyscallNo returns the syscall number according to the 64-bit convention. func (c *context64) SyscallNo() uintptr { return uintptr(c.Regs.Regs[8]) @@ -40,7 +51,7 @@ func (c *context64) SyscallNo() uintptr { // R30: the link register. func (c *context64) SyscallArgs() SyscallArguments { return SyscallArguments{ - SyscallArgument{Value: uintptr(c.Regs.Regs[0])}, + SyscallArgument{Value: uintptr(c.OrigR0)}, SyscallArgument{Value: uintptr(c.Regs.Regs[1])}, SyscallArgument{Value: uintptr(c.Regs.Regs[2])}, SyscallArgument{Value: uintptr(c.Regs.Regs[3])}, diff --git a/pkg/sentry/kernel/task_syscall.go b/pkg/sentry/kernel/task_syscall.go index d555d69a8..3d7a734ef 100644 --- a/pkg/sentry/kernel/task_syscall.go +++ b/pkg/sentry/kernel/task_syscall.go @@ -194,6 +194,19 @@ func (t *Task) executeSyscall(sysno uintptr, args arch.SyscallArguments) (rval u // // The syscall path is very hot; avoid defer. func (t *Task) doSyscall() taskRunState { + // Save value of the register which is clobbered in the following + // t.Arch().SetReturn(-ENOSYS) operation. This is dedicated to arm64. + // + // On x86, register rax was shared by syscall number and return + // value, and at the entry of the syscall handler, the rax was + // saved to regs.orig_rax which was exposed to user space. + // But on arm64, syscall number was passed through X8, and the X0 + // was shared by the first syscall argument and return value. The + // X0 was saved to regs.orig_x0 which was not exposed to user space. + // So we have to do the same operation here to save the X0 value + // into the task context. + t.Arch().SyscallSaveOrig() + sysno := t.Arch().SyscallNo() args := t.Arch().SyscallArgs() @@ -269,6 +282,7 @@ func (*runSyscallAfterSyscallEnterStop) execute(t *Task) taskRunState { return (*runSyscallExit)(nil) } args := t.Arch().SyscallArgs() + return t.doSyscallInvoke(sysno, args) } diff --git a/test/syscalls/linux/ptrace.cc b/test/syscalls/linux/ptrace.cc index cb828ff88..926690eb8 100644 --- a/test/syscalls/linux/ptrace.cc +++ b/test/syscalls/linux/ptrace.cc @@ -400,9 +400,11 @@ TEST(PtraceTest, GetRegSet) { // Read exactly the full register set. EXPECT_EQ(iov.iov_len, sizeof(regs)); -#ifdef __x86_64__ +#if defined(__x86_64__) // Child called kill(2), with SIGSTOP as arg 2. EXPECT_EQ(regs.rsi, SIGSTOP); +#elif defined(__aarch64__) + EXPECT_EQ(regs.regs[1], SIGSTOP); #endif // Suppress SIGSTOP and resume the child. @@ -752,15 +754,23 @@ TEST(PtraceTest, SyscallSucceeds()); EXPECT_TRUE(siginfo.si_code == SIGTRAP || siginfo.si_code == (SIGTRAP | 0x80)) << "si_code = " << siginfo.si_code; -#ifdef __x86_64__ + { struct user_regs_struct regs = {}; - ASSERT_THAT(ptrace(PTRACE_GETREGS, child_pid, 0, ®s), SyscallSucceeds()); + struct iovec iov; + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + EXPECT_THAT(ptrace(PTRACE_GETREGSET, child_pid, NT_PRSTATUS, &iov), + SyscallSucceeds()); +#if defined(__x86_64__) EXPECT_TRUE(regs.orig_rax == SYS_vfork || regs.orig_rax == SYS_clone) << "orig_rax = " << regs.orig_rax; EXPECT_EQ(grandchild_pid, regs.rax); - } +#elif defined(__aarch64__) + EXPECT_TRUE(regs.regs[8] == SYS_clone) << "regs[8] = " << regs.regs[8]; + EXPECT_EQ(grandchild_pid, regs.regs[0]); #endif // defined(__x86_64__) + } // After this point, the child will be making wait4 syscalls that will be // interrupted by saving, so saving is not permitted. Note that this is @@ -805,14 +815,21 @@ TEST(PtraceTest, SyscallSucceedsWithValue(child_pid)); EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80)) << " status " << status; -#ifdef __x86_64__ { struct user_regs_struct regs = {}; - ASSERT_THAT(ptrace(PTRACE_GETREGS, child_pid, 0, ®s), SyscallSucceeds()); + struct iovec iov; + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + EXPECT_THAT(ptrace(PTRACE_GETREGSET, child_pid, NT_PRSTATUS, &iov), + SyscallSucceeds()); +#if defined(__x86_64__) EXPECT_EQ(SYS_wait4, regs.orig_rax); EXPECT_EQ(grandchild_pid, regs.rax); - } +#elif defined(__aarch64__) + EXPECT_EQ(SYS_wait4, regs.regs[8]); + EXPECT_EQ(grandchild_pid, regs.regs[0]); #endif // defined(__x86_64__) + } // Detach from the child and wait for it to exit. ASSERT_THAT(ptrace(PTRACE_DETACH, child_pid, 0, 0), SyscallSucceeds()); -- cgit v1.2.3 From 935007937cee1e2867cc4fc5c00b7f370864e241 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Fri, 10 Apr 2020 07:13:16 -0700 Subject: test: remove 1s delay after non-blocking socket pair accept It was added in cl/201419897 to deflake socket_ip_tcp_loopback_non_blocking_test_gvisor. It seems we don't need this hack, because the origin issue isn't reproducible without this hack. PiperOrigin-RevId: 305871748 --- test/syscalls/linux/socket_test_util.cc | 5 ----- 1 file changed, 5 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/socket_test_util.cc b/test/syscalls/linux/socket_test_util.cc index 5d3a39868..53b678e94 100644 --- a/test/syscalls/linux/socket_test_util.cc +++ b/test/syscalls/linux/socket_test_util.cc @@ -364,11 +364,6 @@ CreateTCPConnectAcceptSocketPair(int bound, int connected, int type, } MaybeSave(); // Successful accept. - // FIXME(b/110484944) - if (connect_result == -1) { - absl::SleepFor(absl::Seconds(1)); - } - T extra_addr = {}; LocalhostAddr(&extra_addr, dual_stack); return absl::make_unique(connected, accepted, bind_addr, -- cgit v1.2.3 From 12b00c815638a28943c23d3be6ef09b955c6149e Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Fri, 10 Apr 2020 08:20:56 -0700 Subject: Test that RST is sent after ABORT in ESTABLISHED TCP state. PiperOrigin-RevId: 305879441 --- test/packetimpact/tests/BUILD | 12 +++++++ .../tests/tcp_noaccept_close_rst_test.go | 37 ++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 test/packetimpact/tests/tcp_noaccept_close_rst_test.go (limited to 'test') diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index a9b2de9b9..956b1addf 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -40,6 +40,18 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "tcp_noaccept_close_rst", + srcs = ["tcp_noaccept_close_rst_test.go"], + # TODO(b/153380909): Fix netstack then remove the line below. + netstack = False, + deps = [ + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + sh_binary( name = "test_runner", srcs = ["test_runner.sh"], diff --git a/test/packetimpact/tests/tcp_noaccept_close_rst_test.go b/test/packetimpact/tests/tcp_noaccept_close_rst_test.go new file mode 100644 index 000000000..7ebdd1950 --- /dev/null +++ b/test/packetimpact/tests/tcp_noaccept_close_rst_test.go @@ -0,0 +1,37 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_noaccept_close_rst_test + +import ( + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func TestTcpNoAcceptCloseReset(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + conn.Handshake() + defer conn.Close() + dut.Close(listenFd) + if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst | header.TCPFlagAck)}, 1*time.Second); err != nil { + t.Fatalf("expected a RST-ACK packet but got none: %s", err) + } +} -- cgit v1.2.3 From 09ddb5a4262c39744643b612109dd12dcce176a8 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Fri, 10 Apr 2020 19:01:39 -0700 Subject: Port extended attributes to VFS2. As in VFS1, we only support the user.* namespace. Plumbing is added to tmpfs and goferfs. Note that because of the slightly different order of checks between VFS2 and Linux, one of the xattr tests needs to be relaxed slightly. Fixes #2363. PiperOrigin-RevId: 305985121 --- pkg/sentry/fsimpl/ext/filesystem.go | 4 +- pkg/sentry/fsimpl/gofer/filesystem.go | 12 ++-- pkg/sentry/fsimpl/gofer/gofer.go | 58 +++++++++++---- pkg/sentry/fsimpl/gofer/p9file.go | 14 ++++ pkg/sentry/fsimpl/kernfs/filesystem.go | 4 +- pkg/sentry/fsimpl/tmpfs/BUILD | 1 + pkg/sentry/fsimpl/tmpfs/filesystem.go | 24 +++---- pkg/sentry/fsimpl/tmpfs/tmpfs.go | 77 ++++++++++++++++++++ pkg/sentry/syscalls/linux/vfs2/xattr.go | 13 ++-- pkg/sentry/vfs/anonfs.go | 4 +- pkg/sentry/vfs/file_description.go | 32 ++++++--- pkg/sentry/vfs/file_description_impl_util.go | 4 +- pkg/sentry/vfs/filesystem.go | 24 ++++++- pkg/sentry/vfs/memxattr/BUILD | 15 ++++ pkg/sentry/vfs/memxattr/xattr.go | 102 +++++++++++++++++++++++++++ pkg/sentry/vfs/options.go | 14 ++++ pkg/sentry/vfs/vfs.go | 8 +-- test/syscalls/linux/xattr.cc | 8 +-- 18 files changed, 350 insertions(+), 68 deletions(-) create mode 100644 pkg/sentry/vfs/memxattr/BUILD create mode 100644 pkg/sentry/vfs/memxattr/xattr.go (limited to 'test') diff --git a/pkg/sentry/fsimpl/ext/filesystem.go b/pkg/sentry/fsimpl/ext/filesystem.go index 48eaccdbc..afea58f65 100644 --- a/pkg/sentry/fsimpl/ext/filesystem.go +++ b/pkg/sentry/fsimpl/ext/filesystem.go @@ -476,7 +476,7 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath } // ListxattrAt implements vfs.FilesystemImpl.ListxattrAt. -func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([]string, error) { +func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { _, _, err := fs.walk(rp, false) if err != nil { return nil, err @@ -485,7 +485,7 @@ func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([ } // GetxattrAt implements vfs.FilesystemImpl.GetxattrAt. -func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) (string, error) { +func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetxattrOptions) (string, error) { _, _, err := fs.walk(rp, false) if err != nil { return "", err diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 137260898..cd744bf5e 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -1080,7 +1080,7 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath } // ListxattrAt implements vfs.FilesystemImpl.ListxattrAt. -func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([]string, error) { +func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { var ds *[]*dentry fs.renameMu.RLock() defer fs.renameMuRUnlockAndCheckCaching(&ds) @@ -1088,11 +1088,11 @@ func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([ if err != nil { return nil, err } - return d.listxattr(ctx) + return d.listxattr(ctx, rp.Credentials(), size) } // GetxattrAt implements vfs.FilesystemImpl.GetxattrAt. -func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) (string, error) { +func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetxattrOptions) (string, error) { var ds *[]*dentry fs.renameMu.RLock() defer fs.renameMuRUnlockAndCheckCaching(&ds) @@ -1100,7 +1100,7 @@ func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, nam if err != nil { return "", err } - return d.getxattr(ctx, name) + return d.getxattr(ctx, rp.Credentials(), &opts) } // SetxattrAt implements vfs.FilesystemImpl.SetxattrAt. @@ -1112,7 +1112,7 @@ func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt if err != nil { return err } - return d.setxattr(ctx, &opts) + return d.setxattr(ctx, rp.Credentials(), &opts) } // RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt. @@ -1124,7 +1124,7 @@ func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, if err != nil { return err } - return d.removexattr(ctx, name) + return d.removexattr(ctx, rp.Credentials(), name) } // PrependPath implements vfs.FilesystemImpl.PrependPath. diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index bdf11fa65..2485cdb53 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -34,6 +34,7 @@ package gofer import ( "fmt" "strconv" + "strings" "sync" "sync/atomic" "syscall" @@ -1024,21 +1025,50 @@ func (d *dentry) setDeleted() { atomic.StoreUint32(&d.deleted, 1) } -func (d *dentry) listxattr(ctx context.Context) ([]string, error) { - return nil, syserror.ENOTSUP +// We only support xattrs prefixed with "user." (see b/148380782). Currently, +// there is no need to expose any other xattrs through a gofer. +func (d *dentry) listxattr(ctx context.Context, creds *auth.Credentials, size uint64) ([]string, error) { + xattrMap, err := d.file.listXattr(ctx, size) + if err != nil { + return nil, err + } + xattrs := make([]string, 0, len(xattrMap)) + for x := range xattrMap { + if strings.HasPrefix(x, linux.XATTR_USER_PREFIX) { + xattrs = append(xattrs, x) + } + } + return xattrs, nil } -func (d *dentry) getxattr(ctx context.Context, name string) (string, error) { - // TODO(jamieliu): add vfs.GetxattrOptions.Size - return d.file.getXattr(ctx, name, linux.XATTR_SIZE_MAX) +func (d *dentry) getxattr(ctx context.Context, creds *auth.Credentials, opts *vfs.GetxattrOptions) (string, error) { + if err := d.checkPermissions(creds, vfs.MayRead); err != nil { + return "", err + } + if !strings.HasPrefix(opts.Name, linux.XATTR_USER_PREFIX) { + return "", syserror.EOPNOTSUPP + } + return d.file.getXattr(ctx, opts.Name, opts.Size) } -func (d *dentry) setxattr(ctx context.Context, opts *vfs.SetxattrOptions) error { +func (d *dentry) setxattr(ctx context.Context, creds *auth.Credentials, opts *vfs.SetxattrOptions) error { + if err := d.checkPermissions(creds, vfs.MayWrite); err != nil { + return err + } + if !strings.HasPrefix(opts.Name, linux.XATTR_USER_PREFIX) { + return syserror.EOPNOTSUPP + } return d.file.setXattr(ctx, opts.Name, opts.Value, opts.Flags) } -func (d *dentry) removexattr(ctx context.Context, name string) error { - return syserror.ENOTSUP +func (d *dentry) removexattr(ctx context.Context, creds *auth.Credentials, name string) error { + if err := d.checkPermissions(creds, vfs.MayWrite); err != nil { + return err + } + if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) { + return syserror.EOPNOTSUPP + } + return d.file.removeXattr(ctx, name) } // Preconditions: d.isRegularFile() || d.isDirectory(). @@ -1189,21 +1219,21 @@ func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) } // Listxattr implements vfs.FileDescriptionImpl.Listxattr. -func (fd *fileDescription) Listxattr(ctx context.Context) ([]string, error) { - return fd.dentry().listxattr(ctx) +func (fd *fileDescription) Listxattr(ctx context.Context, size uint64) ([]string, error) { + return fd.dentry().listxattr(ctx, auth.CredentialsFromContext(ctx), size) } // Getxattr implements vfs.FileDescriptionImpl.Getxattr. -func (fd *fileDescription) Getxattr(ctx context.Context, name string) (string, error) { - return fd.dentry().getxattr(ctx, name) +func (fd *fileDescription) Getxattr(ctx context.Context, opts vfs.GetxattrOptions) (string, error) { + return fd.dentry().getxattr(ctx, auth.CredentialsFromContext(ctx), &opts) } // Setxattr implements vfs.FileDescriptionImpl.Setxattr. func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOptions) error { - return fd.dentry().setxattr(ctx, &opts) + return fd.dentry().setxattr(ctx, auth.CredentialsFromContext(ctx), &opts) } // Removexattr implements vfs.FileDescriptionImpl.Removexattr. func (fd *fileDescription) Removexattr(ctx context.Context, name string) error { - return fd.dentry().removexattr(ctx, name) + return fd.dentry().removexattr(ctx, auth.CredentialsFromContext(ctx), name) } diff --git a/pkg/sentry/fsimpl/gofer/p9file.go b/pkg/sentry/fsimpl/gofer/p9file.go index 755ac2985..87f0b877f 100644 --- a/pkg/sentry/fsimpl/gofer/p9file.go +++ b/pkg/sentry/fsimpl/gofer/p9file.go @@ -85,6 +85,13 @@ func (f p9file) setAttr(ctx context.Context, valid p9.SetAttrMask, attr p9.SetAt return err } +func (f p9file) listXattr(ctx context.Context, size uint64) (map[string]struct{}, error) { + ctx.UninterruptibleSleepStart(false) + xattrs, err := f.file.ListXattr(size) + ctx.UninterruptibleSleepFinish(false) + return xattrs, err +} + func (f p9file) getXattr(ctx context.Context, name string, size uint64) (string, error) { ctx.UninterruptibleSleepStart(false) val, err := f.file.GetXattr(name, size) @@ -99,6 +106,13 @@ func (f p9file) setXattr(ctx context.Context, name, value string, flags uint32) return err } +func (f p9file) removeXattr(ctx context.Context, name string) error { + ctx.UninterruptibleSleepStart(false) + err := f.file.RemoveXattr(name) + ctx.UninterruptibleSleepFinish(false) + return err +} + func (f p9file) allocate(ctx context.Context, mode p9.AllocateMode, offset, length uint64) error { ctx.UninterruptibleSleepStart(false) err := f.file.Allocate(mode, offset, length) diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index 4433071aa..baf81b4db 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -763,7 +763,7 @@ func (fs *Filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath } // ListxattrAt implements vfs.FilesystemImpl.ListxattrAt. -func (fs *Filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([]string, error) { +func (fs *Filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { fs.mu.RLock() _, _, err := fs.walkExistingLocked(ctx, rp) fs.mu.RUnlock() @@ -776,7 +776,7 @@ func (fs *Filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([ } // GetxattrAt implements vfs.FilesystemImpl.GetxattrAt. -func (fs *Filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) (string, error) { +func (fs *Filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetxattrOptions) (string, error) { fs.mu.RLock() _, _, err := fs.walkExistingLocked(ctx, rp) fs.mu.RUnlock() diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD index f2ac23c88..4e6cd3491 100644 --- a/pkg/sentry/fsimpl/tmpfs/BUILD +++ b/pkg/sentry/fsimpl/tmpfs/BUILD @@ -51,6 +51,7 @@ go_library( "//pkg/sentry/usage", "//pkg/sentry/vfs", "//pkg/sentry/vfs/lock", + "//pkg/sentry/vfs/memxattr", "//pkg/sync", "//pkg/syserror", "//pkg/usermem", diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 5339d7072..f4d50d64f 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -696,51 +696,47 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath } // ListxattrAt implements vfs.FilesystemImpl.ListxattrAt. -func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([]string, error) { +func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { fs.mu.RLock() defer fs.mu.RUnlock() - _, err := resolveLocked(rp) + d, err := resolveLocked(rp) if err != nil { return nil, err } - // TODO(b/127675828): support extended attributes - return nil, syserror.ENOTSUP + return d.inode.listxattr(size) } // GetxattrAt implements vfs.FilesystemImpl.GetxattrAt. -func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) (string, error) { +func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetxattrOptions) (string, error) { fs.mu.RLock() defer fs.mu.RUnlock() - _, err := resolveLocked(rp) + d, err := resolveLocked(rp) if err != nil { return "", err } - // TODO(b/127675828): support extended attributes - return "", syserror.ENOTSUP + return d.inode.getxattr(rp.Credentials(), &opts) } // SetxattrAt implements vfs.FilesystemImpl.SetxattrAt. func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error { fs.mu.RLock() defer fs.mu.RUnlock() - _, err := resolveLocked(rp) + d, err := resolveLocked(rp) if err != nil { return err } - // TODO(b/127675828): support extended attributes - return syserror.ENOTSUP + return d.inode.setxattr(rp.Credentials(), &opts) } // RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt. func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { fs.mu.RLock() defer fs.mu.RUnlock() - _, err := resolveLocked(rp) + d, err := resolveLocked(rp) if err != nil { return err } - // TODO(b/127675828): support extended attributes - return syserror.ENOTSUP + return d.inode.removexattr(rp.Credentials(), name) } // PrependPath implements vfs.FilesystemImpl.PrependPath. diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 654e788e3..9fa8637d5 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -27,6 +27,7 @@ package tmpfs import ( "fmt" "math" + "strings" "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" @@ -37,6 +38,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sentry/vfs/lock" + "gvisor.dev/gvisor/pkg/sentry/vfs/memxattr" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) @@ -186,6 +188,11 @@ type inode struct { // filesystem.RmdirAt() drops the reference. refs int64 + // xattrs implements extended attributes. + // + // TODO(b/148380782): Support xattrs other than user.* + xattrs memxattr.SimpleExtendedAttributes + // Inode metadata. Writing multiple fields atomically requires holding // mu, othewise atomic operations can be used. mu sync.Mutex @@ -535,6 +542,56 @@ func (i *inode) touchCMtimeLocked() { atomic.StoreInt64(&i.ctime, now) } +func (i *inode) listxattr(size uint64) ([]string, error) { + return i.xattrs.Listxattr(size) +} + +func (i *inode) getxattr(creds *auth.Credentials, opts *vfs.GetxattrOptions) (string, error) { + if err := i.checkPermissions(creds, vfs.MayRead); err != nil { + return "", err + } + if !strings.HasPrefix(opts.Name, linux.XATTR_USER_PREFIX) { + return "", syserror.EOPNOTSUPP + } + if !i.userXattrSupported() { + return "", syserror.ENODATA + } + return i.xattrs.Getxattr(opts) +} + +func (i *inode) setxattr(creds *auth.Credentials, opts *vfs.SetxattrOptions) error { + if err := i.checkPermissions(creds, vfs.MayWrite); err != nil { + return err + } + if !strings.HasPrefix(opts.Name, linux.XATTR_USER_PREFIX) { + return syserror.EOPNOTSUPP + } + if !i.userXattrSupported() { + return syserror.EPERM + } + return i.xattrs.Setxattr(opts) +} + +func (i *inode) removexattr(creds *auth.Credentials, name string) error { + if err := i.checkPermissions(creds, vfs.MayWrite); err != nil { + return err + } + if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) { + return syserror.EOPNOTSUPP + } + if !i.userXattrSupported() { + return syserror.EPERM + } + return i.xattrs.Removexattr(name) +} + +// Extended attributes in the user.* namespace are only supported for regular +// files and directories. +func (i *inode) userXattrSupported() bool { + filetype := linux.S_IFMT & atomic.LoadUint32(&i.mode) + return filetype == linux.S_IFREG || filetype == linux.S_IFDIR +} + // fileDescription is embedded by tmpfs implementations of // vfs.FileDescriptionImpl. type fileDescription struct { @@ -562,3 +619,23 @@ func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) creds := auth.CredentialsFromContext(ctx) return fd.inode().setStat(ctx, creds, &opts.Stat) } + +// Listxattr implements vfs.FileDescriptionImpl.Listxattr. +func (fd *fileDescription) Listxattr(ctx context.Context, size uint64) ([]string, error) { + return fd.inode().listxattr(size) +} + +// Getxattr implements vfs.FileDescriptionImpl.Getxattr. +func (fd *fileDescription) Getxattr(ctx context.Context, opts vfs.GetxattrOptions) (string, error) { + return fd.inode().getxattr(auth.CredentialsFromContext(ctx), &opts) +} + +// Setxattr implements vfs.FileDescriptionImpl.Setxattr. +func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOptions) error { + return fd.inode().setxattr(auth.CredentialsFromContext(ctx), &opts) +} + +// Removexattr implements vfs.FileDescriptionImpl.Removexattr. +func (fd *fileDescription) Removexattr(ctx context.Context, name string) error { + return fd.inode().removexattr(auth.CredentialsFromContext(ctx), name) +} diff --git a/pkg/sentry/syscalls/linux/vfs2/xattr.go b/pkg/sentry/syscalls/linux/vfs2/xattr.go index 89e9ff4d7..af455d5c1 100644 --- a/pkg/sentry/syscalls/linux/vfs2/xattr.go +++ b/pkg/sentry/syscalls/linux/vfs2/xattr.go @@ -51,7 +51,7 @@ func listxattr(t *kernel.Task, args arch.SyscallArguments, shouldFollowFinalSyml } defer tpop.Release() - names, err := t.Kernel().VFS().ListxattrAt(t, t.Credentials(), &tpop.pop) + names, err := t.Kernel().VFS().ListxattrAt(t, t.Credentials(), &tpop.pop, uint64(size)) if err != nil { return 0, nil, err } @@ -74,7 +74,7 @@ func Flistxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy } defer file.DecRef() - names, err := file.Listxattr(t) + names, err := file.Listxattr(t, uint64(size)) if err != nil { return 0, nil, err } @@ -116,7 +116,10 @@ func getxattr(t *kernel.Task, args arch.SyscallArguments, shouldFollowFinalSymli return 0, nil, err } - value, err := t.Kernel().VFS().GetxattrAt(t, t.Credentials(), &tpop.pop, name) + value, err := t.Kernel().VFS().GetxattrAt(t, t.Credentials(), &tpop.pop, &vfs.GetxattrOptions{ + Name: name, + Size: uint64(size), + }) if err != nil { return 0, nil, err } @@ -145,7 +148,7 @@ func Fgetxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, err } - value, err := file.Getxattr(t, name) + value, err := file.Getxattr(t, &vfs.GetxattrOptions{Name: name, Size: uint64(size)}) if err != nil { return 0, nil, err } @@ -230,7 +233,7 @@ func Fsetxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, err } - return 0, nil, file.Setxattr(t, vfs.SetxattrOptions{ + return 0, nil, file.Setxattr(t, &vfs.SetxattrOptions{ Name: name, Value: value, Flags: uint32(flags), diff --git a/pkg/sentry/vfs/anonfs.go b/pkg/sentry/vfs/anonfs.go index d1f6dfb45..a64d86122 100644 --- a/pkg/sentry/vfs/anonfs.go +++ b/pkg/sentry/vfs/anonfs.go @@ -245,7 +245,7 @@ func (fs *anonFilesystem) BoundEndpointAt(ctx context.Context, rp *ResolvingPath } // ListxattrAt implements FilesystemImpl.ListxattrAt. -func (fs *anonFilesystem) ListxattrAt(ctx context.Context, rp *ResolvingPath) ([]string, error) { +func (fs *anonFilesystem) ListxattrAt(ctx context.Context, rp *ResolvingPath, size uint64) ([]string, error) { if !rp.Done() { return nil, syserror.ENOTDIR } @@ -253,7 +253,7 @@ func (fs *anonFilesystem) ListxattrAt(ctx context.Context, rp *ResolvingPath) ([ } // GetxattrAt implements FilesystemImpl.GetxattrAt. -func (fs *anonFilesystem) GetxattrAt(ctx context.Context, rp *ResolvingPath, name string) (string, error) { +func (fs *anonFilesystem) GetxattrAt(ctx context.Context, rp *ResolvingPath, opts GetxattrOptions) (string, error) { if !rp.Done() { return "", syserror.ENOTDIR } diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go index 20c545fca..4fb9aea87 100644 --- a/pkg/sentry/vfs/file_description.go +++ b/pkg/sentry/vfs/file_description.go @@ -401,11 +401,11 @@ type FileDescriptionImpl interface { Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) // Listxattr returns all extended attribute names for the file. - Listxattr(ctx context.Context) ([]string, error) + Listxattr(ctx context.Context, size uint64) ([]string, error) // Getxattr returns the value associated with the given extended attribute // for the file. - Getxattr(ctx context.Context, name string) (string, error) + Getxattr(ctx context.Context, opts GetxattrOptions) (string, error) // Setxattr changes the value associated with the given extended attribute // for the file. @@ -605,18 +605,23 @@ func (fd *FileDescription) Ioctl(ctx context.Context, uio usermem.IO, args arch. // Listxattr returns all extended attribute names for the file represented by // fd. -func (fd *FileDescription) Listxattr(ctx context.Context) ([]string, error) { +// +// If the size of the list (including a NUL terminating byte after every entry) +// would exceed size, ERANGE may be returned. Note that implementations +// are free to ignore size entirely and return without error). In all cases, +// if size is 0, the list should be returned without error, regardless of size. +func (fd *FileDescription) Listxattr(ctx context.Context, size uint64) ([]string, error) { if fd.opts.UseDentryMetadata { vfsObj := fd.vd.mount.vfs rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{ Root: fd.vd, Start: fd.vd, }) - names, err := fd.vd.mount.fs.impl.ListxattrAt(ctx, rp) + names, err := fd.vd.mount.fs.impl.ListxattrAt(ctx, rp, size) vfsObj.putResolvingPath(rp) return names, err } - names, err := fd.impl.Listxattr(ctx) + names, err := fd.impl.Listxattr(ctx, size) if err == syserror.ENOTSUP { // Linux doesn't actually return ENOTSUP in this case; instead, // fs/xattr.c:vfs_listxattr() falls back to allowing the security @@ -629,34 +634,39 @@ func (fd *FileDescription) Listxattr(ctx context.Context) ([]string, error) { // Getxattr returns the value associated with the given extended attribute for // the file represented by fd. -func (fd *FileDescription) Getxattr(ctx context.Context, name string) (string, error) { +// +// If the size of the return value exceeds opts.Size, ERANGE may be returned +// (note that implementations are free to ignore opts.Size entirely and return +// without error). In all cases, if opts.Size is 0, the value should be +// returned without error, regardless of size. +func (fd *FileDescription) Getxattr(ctx context.Context, opts *GetxattrOptions) (string, error) { if fd.opts.UseDentryMetadata { vfsObj := fd.vd.mount.vfs rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{ Root: fd.vd, Start: fd.vd, }) - val, err := fd.vd.mount.fs.impl.GetxattrAt(ctx, rp, name) + val, err := fd.vd.mount.fs.impl.GetxattrAt(ctx, rp, *opts) vfsObj.putResolvingPath(rp) return val, err } - return fd.impl.Getxattr(ctx, name) + return fd.impl.Getxattr(ctx, *opts) } // Setxattr changes the value associated with the given extended attribute for // the file represented by fd. -func (fd *FileDescription) Setxattr(ctx context.Context, opts SetxattrOptions) error { +func (fd *FileDescription) Setxattr(ctx context.Context, opts *SetxattrOptions) error { if fd.opts.UseDentryMetadata { vfsObj := fd.vd.mount.vfs rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{ Root: fd.vd, Start: fd.vd, }) - err := fd.vd.mount.fs.impl.SetxattrAt(ctx, rp, opts) + err := fd.vd.mount.fs.impl.SetxattrAt(ctx, rp, *opts) vfsObj.putResolvingPath(rp) return err } - return fd.impl.Setxattr(ctx, opts) + return fd.impl.Setxattr(ctx, *opts) } // Removexattr removes the given extended attribute from the file represented diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go index d45e602ce..f4c111926 100644 --- a/pkg/sentry/vfs/file_description_impl_util.go +++ b/pkg/sentry/vfs/file_description_impl_util.go @@ -130,14 +130,14 @@ func (FileDescriptionDefaultImpl) Ioctl(ctx context.Context, uio usermem.IO, arg // Listxattr implements FileDescriptionImpl.Listxattr analogously to // inode_operations::listxattr == NULL in Linux. -func (FileDescriptionDefaultImpl) Listxattr(ctx context.Context) ([]string, error) { +func (FileDescriptionDefaultImpl) Listxattr(ctx context.Context, size uint64) ([]string, error) { // This isn't exactly accurate; see FileDescription.Listxattr. return nil, syserror.ENOTSUP } // Getxattr implements FileDescriptionImpl.Getxattr analogously to // inode::i_opflags & IOP_XATTR == 0 in Linux. -func (FileDescriptionDefaultImpl) Getxattr(ctx context.Context, name string) (string, error) { +func (FileDescriptionDefaultImpl) Getxattr(ctx context.Context, opts GetxattrOptions) (string, error) { return "", syserror.ENOTSUP } diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go index bef1bd312..a537a29d1 100644 --- a/pkg/sentry/vfs/filesystem.go +++ b/pkg/sentry/vfs/filesystem.go @@ -442,7 +442,13 @@ type FilesystemImpl interface { // - If extended attributes are not supported by the filesystem, // ListxattrAt returns nil. (See FileDescription.Listxattr for an // explanation.) - ListxattrAt(ctx context.Context, rp *ResolvingPath) ([]string, error) + // + // - If the size of the list (including a NUL terminating byte after every + // entry) would exceed size, ERANGE may be returned. Note that + // implementations are free to ignore size entirely and return without + // error). In all cases, if size is 0, the list should be returned without + // error, regardless of size. + ListxattrAt(ctx context.Context, rp *ResolvingPath, size uint64) ([]string, error) // GetxattrAt returns the value associated with the given extended // attribute for the file at rp. @@ -451,7 +457,15 @@ type FilesystemImpl interface { // // - If extended attributes are not supported by the filesystem, GetxattrAt // returns ENOTSUP. - GetxattrAt(ctx context.Context, rp *ResolvingPath, name string) (string, error) + // + // - If an extended attribute named opts.Name does not exist, ENODATA is + // returned. + // + // - If the size of the return value exceeds opts.Size, ERANGE may be + // returned (note that implementations are free to ignore opts.Size entirely + // and return without error). In all cases, if opts.Size is 0, the value + // should be returned without error, regardless of size. + GetxattrAt(ctx context.Context, rp *ResolvingPath, opts GetxattrOptions) (string, error) // SetxattrAt changes the value associated with the given extended // attribute for the file at rp. @@ -460,6 +474,10 @@ type FilesystemImpl interface { // // - If extended attributes are not supported by the filesystem, SetxattrAt // returns ENOTSUP. + // + // - If XATTR_CREATE is set in opts.Flag and opts.Name already exists, + // EEXIST is returned. If XATTR_REPLACE is set and opts.Name does not exist, + // ENODATA is returned. SetxattrAt(ctx context.Context, rp *ResolvingPath, opts SetxattrOptions) error // RemovexattrAt removes the given extended attribute from the file at rp. @@ -468,6 +486,8 @@ type FilesystemImpl interface { // // - If extended attributes are not supported by the filesystem, // RemovexattrAt returns ENOTSUP. + // + // - If name does not exist, ENODATA is returned. RemovexattrAt(ctx context.Context, rp *ResolvingPath, name string) error // BoundEndpointAt returns the Unix socket endpoint bound at the path rp. diff --git a/pkg/sentry/vfs/memxattr/BUILD b/pkg/sentry/vfs/memxattr/BUILD new file mode 100644 index 000000000..d8c4d27b9 --- /dev/null +++ b/pkg/sentry/vfs/memxattr/BUILD @@ -0,0 +1,15 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "memxattr", + srcs = ["xattr.go"], + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/abi/linux", + "//pkg/sentry/vfs", + "//pkg/sync", + "//pkg/syserror", + ], +) diff --git a/pkg/sentry/vfs/memxattr/xattr.go b/pkg/sentry/vfs/memxattr/xattr.go new file mode 100644 index 000000000..cc1e7d764 --- /dev/null +++ b/pkg/sentry/vfs/memxattr/xattr.go @@ -0,0 +1,102 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package memxattr provides a default, in-memory extended attribute +// implementation. +package memxattr + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" +) + +// SimpleExtendedAttributes implements extended attributes using a map of +// names to values. +// +// +stateify savable +type SimpleExtendedAttributes struct { + // mu protects the below fields. + mu sync.RWMutex `state:"nosave"` + xattrs map[string]string +} + +// Getxattr returns the value at 'name'. +func (x *SimpleExtendedAttributes) Getxattr(opts *vfs.GetxattrOptions) (string, error) { + x.mu.RLock() + value, ok := x.xattrs[opts.Name] + x.mu.RUnlock() + if !ok { + return "", syserror.ENODATA + } + // Check that the size of the buffer provided in getxattr(2) is large enough + // to contain the value. + if opts.Size != 0 && uint64(len(value)) > opts.Size { + return "", syserror.ERANGE + } + return value, nil +} + +// Setxattr sets 'value' at 'name'. +func (x *SimpleExtendedAttributes) Setxattr(opts *vfs.SetxattrOptions) error { + x.mu.Lock() + defer x.mu.Unlock() + if x.xattrs == nil { + if opts.Flags&linux.XATTR_REPLACE != 0 { + return syserror.ENODATA + } + x.xattrs = make(map[string]string) + } + + _, ok := x.xattrs[opts.Name] + if ok && opts.Flags&linux.XATTR_CREATE != 0 { + return syserror.EEXIST + } + if !ok && opts.Flags&linux.XATTR_REPLACE != 0 { + return syserror.ENODATA + } + + x.xattrs[opts.Name] = opts.Value + return nil +} + +// Listxattr returns all names in xattrs. +func (x *SimpleExtendedAttributes) Listxattr(size uint64) ([]string, error) { + // Keep track of the size of the buffer needed in listxattr(2) for the list. + listSize := 0 + x.mu.RLock() + names := make([]string, 0, len(x.xattrs)) + for n := range x.xattrs { + names = append(names, n) + // Add one byte per null terminator. + listSize += len(n) + 1 + } + x.mu.RUnlock() + if size != 0 && uint64(listSize) > size { + return nil, syserror.ERANGE + } + return names, nil +} + +// Removexattr removes the xattr at 'name'. +func (x *SimpleExtendedAttributes) Removexattr(name string) error { + x.mu.Lock() + defer x.mu.Unlock() + if _, ok := x.xattrs[name]; !ok { + return syserror.ENODATA + } + delete(x.xattrs, name) + return nil +} diff --git a/pkg/sentry/vfs/options.go b/pkg/sentry/vfs/options.go index 2f04bf882..534528ce6 100644 --- a/pkg/sentry/vfs/options.go +++ b/pkg/sentry/vfs/options.go @@ -132,6 +132,20 @@ type SetStatOptions struct { Stat linux.Statx } +// GetxattrOptions contains options to VirtualFilesystem.GetxattrAt(), +// FilesystemImpl.GetxattrAt(), FileDescription.Getxattr(), and +// FileDescriptionImpl.Getxattr(). +type GetxattrOptions struct { + // Name is the name of the extended attribute to retrieve. + Name string + + // Size is the maximum value size that the caller will tolerate. If the value + // is larger than size, getxattr methods may return ERANGE, but they are also + // free to ignore the hint entirely (i.e. the value returned may be larger + // than size). All size checking is done independently at the syscall layer. + Size uint64 +} + // SetxattrOptions contains options to VirtualFilesystem.SetxattrAt(), // FilesystemImpl.SetxattrAt(), FileDescription.Setxattr(), and // FileDescriptionImpl.Setxattr(). diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go index 720b90d8f..f592913d5 100644 --- a/pkg/sentry/vfs/vfs.go +++ b/pkg/sentry/vfs/vfs.go @@ -680,10 +680,10 @@ func (vfs *VirtualFilesystem) UnlinkAt(ctx context.Context, creds *auth.Credenti // ListxattrAt returns all extended attribute names for the file at the given // path. -func (vfs *VirtualFilesystem) ListxattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation) ([]string, error) { +func (vfs *VirtualFilesystem) ListxattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, size uint64) ([]string, error) { rp := vfs.getResolvingPath(creds, pop) for { - names, err := rp.mount.fs.impl.ListxattrAt(ctx, rp) + names, err := rp.mount.fs.impl.ListxattrAt(ctx, rp, size) if err == nil { vfs.putResolvingPath(rp) return names, nil @@ -705,10 +705,10 @@ func (vfs *VirtualFilesystem) ListxattrAt(ctx context.Context, creds *auth.Crede // GetxattrAt returns the value associated with the given extended attribute // for the file at the given path. -func (vfs *VirtualFilesystem) GetxattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, name string) (string, error) { +func (vfs *VirtualFilesystem) GetxattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *GetxattrOptions) (string, error) { rp := vfs.getResolvingPath(creds, pop) for { - val, err := rp.mount.fs.impl.GetxattrAt(ctx, rp, name) + val, err := rp.mount.fs.impl.GetxattrAt(ctx, rp, *opts) if err == nil { vfs.putResolvingPath(rp) return val, nil diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc index 8b00ef44c..3231732ec 100644 --- a/test/syscalls/linux/xattr.cc +++ b/test/syscalls/linux/xattr.cc @@ -41,12 +41,12 @@ class XattrTest : public FileTest {}; TEST_F(XattrTest, XattrNonexistentFile) { const char* path = "/does/not/exist"; - EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, /*flags=*/0), - SyscallFailsWithErrno(ENOENT)); - EXPECT_THAT(getxattr(path, nullptr, nullptr, 0), + const char* name = "user.test"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallFailsWithErrno(ENOENT)); + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENOENT)); EXPECT_THAT(listxattr(path, nullptr, 0), SyscallFailsWithErrno(ENOENT)); - EXPECT_THAT(removexattr(path, nullptr), SyscallFailsWithErrno(ENOENT)); + EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(ENOENT)); } TEST_F(XattrTest, XattrNullName) { -- cgit v1.2.3 From 20203494680f869669ab5318b36e9470ad4b3e7b Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Sat, 11 Apr 2020 06:45:15 -0700 Subject: Improve error messages when parsing headers. Tested: Looked at output of failing tests. PiperOrigin-RevId: 306031407 --- test/packetimpact/testbench/connections.go | 2 +- test/packetimpact/testbench/layers.go | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'test') diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index ed8689fd3..b11a534ac 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -213,7 +213,7 @@ func (conn *TCPIPv4) RecvFrame(timeout time.Duration) Layers { } layers, err := ParseEther(b) if err != nil { - conn.t.Logf("can't parse frame: %s", err) + conn.t.Logf("debug: can't parse frame, ignoring: %s", err) continue // Ignore packets that can't be parsed. } if !conn.incoming.match(layers) { diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go index 093a46e23..ff800377e 100644 --- a/test/packetimpact/testbench/layers.go +++ b/test/packetimpact/testbench/layers.go @@ -153,7 +153,7 @@ func (l *Ether) toBytes() ([]byte, error) { fields.Type = header.IPv4ProtocolNumber default: // TODO(b/150301488): Support more protocols, like IPv6. - return nil, fmt.Errorf("can't deduce the ethernet header's next protocol: %d", n) + return nil, fmt.Errorf("ethernet header's next layer is unrecognized: %#v", n) } } h.Encode(fields) @@ -191,7 +191,7 @@ func ParseEther(b []byte) (Layers, error) { return append(layers, moreLayers...), nil default: // TODO(b/150301488): Support more protocols, like IPv6. - return nil, fmt.Errorf("can't deduce the ethernet header's next protocol: %#v", b) + return nil, fmt.Errorf("ethernet header's type field is unrecognized: %#04x", h.Type()) } } @@ -274,7 +274,7 @@ func (l *IPv4) toBytes() ([]byte, error) { fields.Protocol = uint8(header.UDPProtocolNumber) default: // TODO(b/150301488): Support more protocols as needed. - return nil, fmt.Errorf("can't deduce the ip header's next protocol: %#v", n) + return nil, fmt.Errorf("ipv4 header's next layer is unrecognized: %#v", n) } } if l.SrcAddr != nil { @@ -344,7 +344,7 @@ func ParseIPv4(b []byte) (Layers, error) { } return append(layers, moreLayers...), nil } - return nil, fmt.Errorf("can't deduce the ethernet header's next protocol: %d", h.Protocol()) + return nil, fmt.Errorf("ipv4 header's protocol field is unrecognized: %#02x", h.Protocol()) } func (l *IPv4) match(other Layer) bool { -- cgit v1.2.3 From ef0b5584e5389cc392e03d20976a15974f277251 Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Sun, 12 Apr 2020 18:32:18 -0700 Subject: Refactor parser to use a for loop instead of recursion. This makes the code shorter and less repetitive. TESTED: All unit tests still pass. PiperOrigin-RevId: 306161475 --- test/packetimpact/testbench/connections.go | 12 +--- test/packetimpact/testbench/layers.go | 93 +++++++++++++++++------------- 2 files changed, 55 insertions(+), 50 deletions(-) (limited to 'test') diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index b11a534ac..79c0ccf5c 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -211,11 +211,7 @@ func (conn *TCPIPv4) RecvFrame(timeout time.Duration) Layers { if b == nil { break } - layers, err := ParseEther(b) - if err != nil { - conn.t.Logf("debug: can't parse frame, ignoring: %s", err) - continue // Ignore packets that can't be parsed. - } + layers := Parse(ParseEther, b) if !conn.incoming.match(layers) { continue // Ignore packets that don't match the expected incoming. } @@ -418,11 +414,7 @@ func (conn *UDPIPv4) Recv(timeout time.Duration) *UDP { if b == nil { break } - layers, err := ParseEther(b) - if err != nil { - conn.t.Logf("can't parse frame: %s", err) - continue // Ignore packets that can't be parsed. - } + layers := Parse(ParseEther, b) if !conn.incoming.match(layers) { continue // Ignore packets that don't match the expected incoming. } diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go index ff800377e..b467c15cc 100644 --- a/test/packetimpact/testbench/layers.go +++ b/test/packetimpact/testbench/layers.go @@ -172,27 +172,46 @@ func NetworkProtocolNumber(v tcpip.NetworkProtocolNumber) *tcpip.NetworkProtocol return &v } +// LayerParser parses the input bytes and returns a Layer along with the next +// LayerParser to run. If there is no more parsing to do, the returned +// LayerParser is nil. +type LayerParser func([]byte) (Layer, LayerParser) + +// Parse parses bytes starting with the first LayerParser and using successive +// LayerParsers until all the bytes are parsed. +func Parse(parser LayerParser, b []byte) Layers { + var layers Layers + for { + var layer Layer + layer, parser = parser(b) + layers = append(layers, layer) + if parser == nil { + break + } + b = b[layer.length():] + } + layers.linkLayers() + return layers +} + // ParseEther parses the bytes assuming that they start with an ethernet header // and continues parsing further encapsulations. -func ParseEther(b []byte) (Layers, error) { +func ParseEther(b []byte) (Layer, LayerParser) { h := header.Ethernet(b) ether := Ether{ SrcAddr: LinkAddress(h.SourceAddress()), DstAddr: LinkAddress(h.DestinationAddress()), Type: NetworkProtocolNumber(h.Type()), } - layers := Layers{ðer} + var nextParser LayerParser switch h.Type() { case header.IPv4ProtocolNumber: - moreLayers, err := ParseIPv4(b[ether.length():]) - if err != nil { - return nil, err - } - return append(layers, moreLayers...), nil + nextParser = ParseIPv4 default: - // TODO(b/150301488): Support more protocols, like IPv6. - return nil, fmt.Errorf("ethernet header's type field is unrecognized: %#04x", h.Type()) + // Assume that the rest is a payload. + nextParser = ParsePayload } + return ðer, nextParser } func (l *Ether) match(other Layer) bool { @@ -313,7 +332,7 @@ func Address(v tcpip.Address) *tcpip.Address { // ParseIPv4 parses the bytes assuming that they start with an ipv4 header and // continues parsing further encapsulations. -func ParseIPv4(b []byte) (Layers, error) { +func ParseIPv4(b []byte) (Layer, LayerParser) { h := header.IPv4(b) tos, _ := h.TOS() ipv4 := IPv4{ @@ -329,22 +348,17 @@ func ParseIPv4(b []byte) (Layers, error) { SrcAddr: Address(h.SourceAddress()), DstAddr: Address(h.DestinationAddress()), } - layers := Layers{&ipv4} + var nextParser LayerParser switch h.TransportProtocol() { case header.TCPProtocolNumber: - moreLayers, err := ParseTCP(b[ipv4.length():]) - if err != nil { - return nil, err - } - return append(layers, moreLayers...), nil + nextParser = ParseTCP case header.UDPProtocolNumber: - moreLayers, err := ParseUDP(b[ipv4.length():]) - if err != nil { - return nil, err - } - return append(layers, moreLayers...), nil + nextParser = ParseUDP + default: + // Assume that the rest is a payload. + nextParser = ParsePayload } - return nil, fmt.Errorf("ipv4 header's protocol field is unrecognized: %#02x", h.Protocol()) + return &ipv4, nextParser } func (l *IPv4) match(other Layer) bool { @@ -470,7 +484,7 @@ func Uint32(v uint32) *uint32 { // ParseTCP parses the bytes assuming that they start with a tcp header and // continues parsing further encapsulations. -func ParseTCP(b []byte) (Layers, error) { +func ParseTCP(b []byte) (Layer, LayerParser) { h := header.TCP(b) tcp := TCP{ SrcPort: Uint16(h.SourcePort()), @@ -483,12 +497,7 @@ func ParseTCP(b []byte) (Layers, error) { Checksum: Uint16(h.Checksum()), UrgentPointer: Uint16(h.UrgentPointer()), } - layers := Layers{&tcp} - moreLayers, err := ParsePayload(b[tcp.length():]) - if err != nil { - return nil, err - } - return append(layers, moreLayers...), nil + return &tcp, ParsePayload } func (l *TCP) match(other Layer) bool { @@ -557,8 +566,8 @@ func setUDPChecksum(h *header.UDP, udp *UDP) error { } // ParseUDP parses the bytes assuming that they start with a udp header and -// continues parsing further encapsulations. -func ParseUDP(b []byte) (Layers, error) { +// returns the parsed layer and the next parser to use. +func ParseUDP(b []byte) (Layer, LayerParser) { h := header.UDP(b) udp := UDP{ SrcPort: Uint16(h.SourcePort()), @@ -566,12 +575,7 @@ func ParseUDP(b []byte) (Layers, error) { Length: Uint16(h.Length()), Checksum: Uint16(h.Checksum()), } - layers := Layers{&udp} - moreLayers, err := ParsePayload(b[udp.length():]) - if err != nil { - return nil, err - } - return append(layers, moreLayers...), nil + return &udp, ParsePayload } func (l *UDP) match(other Layer) bool { @@ -603,11 +607,11 @@ func (l *Payload) String() string { // ParsePayload parses the bytes assuming that they start with a payload and // continue to the end. There can be no further encapsulations. -func ParsePayload(b []byte) (Layers, error) { +func ParsePayload(b []byte) (Layer, LayerParser) { payload := Payload{ Bytes: b, } - return Layers{&payload}, nil + return &payload, nil } func (l *Payload) toBytes() ([]byte, error) { @@ -625,15 +629,24 @@ func (l *Payload) length() int { // Layers is an array of Layer and supports similar functions to Layer. type Layers []Layer -func (ls *Layers) toBytes() ([]byte, error) { +// linkLayers sets the linked-list ponters in ls. +func (ls *Layers) linkLayers() { for i, l := range *ls { if i > 0 { l.setPrev((*ls)[i-1]) + } else { + l.setPrev(nil) } if i+1 < len(*ls) { l.setNext((*ls)[i+1]) + } else { + l.setNext(nil) } } +} + +func (ls *Layers) toBytes() ([]byte, error) { + ls.linkLayers() outBytes := []byte{} for _, l := range *ls { layerBytes, err := l.toBytes() -- cgit v1.2.3 From d303684d7ab9b8a3961398fcf12560956ee9e2e3 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Mon, 13 Apr 2020 16:59:45 -0700 Subject: Remove unnecessary threads The work being done in these threads is not asynchronous with respect to the test; that is, it is equivalent to issue non-blocking `connect` calls serially, since the work is done asynchronously with respect to the caller. Futhermore, this test was added to test closing a listener with completed but not delivered connections, which never required threading in the first place. PiperOrigin-RevId: 306339486 --- test/syscalls/linux/socket_inet_loopback.cc | 40 ++++++++--------------------- 1 file changed, 11 insertions(+), 29 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 030c3b835..71bd7c14d 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -325,11 +325,9 @@ TEST_P(SocketInetLoopbackTest, TCPListenClose) { TestAddress const& listener = param.listener; TestAddress const& connector = param.connector; - constexpr int kAcceptCount = 32; - constexpr int kBacklog = kAcceptCount * 2; - constexpr int kFDs = 128; - constexpr int kThreadCount = 4; - constexpr int kFDsPerThread = kFDs / kThreadCount; + constexpr int kAcceptCount = 2; + constexpr int kBacklog = kAcceptCount + 2; + constexpr int kFDs = kBacklog * 3; // Create the listening socket. FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( @@ -348,39 +346,23 @@ TEST_P(SocketInetLoopbackTest, TCPListenClose) { uint16_t const port = ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); - DisableSave ds; // Too many system calls. sockaddr_storage conn_addr = connector.addr; ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); - FileDescriptor clients[kFDs]; - std::unique_ptr threads[kThreadCount]; + std::vector clients; for (int i = 0; i < kFDs; i++) { - clients[i] = ASSERT_NO_ERRNO_AND_VALUE( + auto client = ASSERT_NO_ERRNO_AND_VALUE( Socket(connector.family(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); - } - for (int i = 0; i < kThreadCount; i++) { - threads[i] = absl::make_unique([&connector, &conn_addr, - &clients, i]() { - for (int j = 0; j < kFDsPerThread; j++) { - int k = i * kFDsPerThread + j; - int ret = - connect(clients[k].get(), reinterpret_cast(&conn_addr), - connector.addr_len); - if (ret != 0) { - EXPECT_THAT(ret, SyscallFailsWithErrno(EINPROGRESS)); - } - } - }); - } - for (int i = 0; i < kThreadCount; i++) { - threads[i]->Join(); + int ret = connect(client.get(), reinterpret_cast(&conn_addr), + connector.addr_len); + if (ret != 0) { + EXPECT_THAT(ret, SyscallFailsWithErrno(EINPROGRESS)); + } + clients.push_back(std::move(client)); } for (int i = 0; i < kAcceptCount; i++) { auto accepted = ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); } - // TODO(b/138400178): Fix cooperative S/R failure when ds.reset() is invoked - // before function end. - // ds.reset(); } TEST_P(SocketInetLoopbackTest, TCPbacklog) { -- cgit v1.2.3 From 71e6ac3e1f551cf52166bf501de114f06502b994 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Mon, 13 Apr 2020 17:58:52 -0700 Subject: Don't allow read/write when offset+size overflows. PiperOrigin-RevId: 306348346 --- pkg/sentry/syscalls/linux/sys_read.go | 8 ++++---- pkg/sentry/syscalls/linux/sys_splice.go | 4 +++- pkg/sentry/syscalls/linux/sys_write.go | 4 ++-- pkg/sentry/syscalls/linux/vfs2/read_write.go | 8 ++++---- test/syscalls/linux/memfd.cc | 1 + test/syscalls/linux/pread64.cc | 16 ++++++++++++++++ test/syscalls/linux/pwrite64.cc | 12 ++++++++++++ test/syscalls/linux/sendfile.cc | 23 +++++++++++++++++++++++ test/syscalls/linux/splice.cc | 1 + 9 files changed, 66 insertions(+), 11 deletions(-) (limited to 'test') diff --git a/pkg/sentry/syscalls/linux/sys_read.go b/pkg/sentry/syscalls/linux/sys_read.go index 78a2cb750..071b4bacc 100644 --- a/pkg/sentry/syscalls/linux/sys_read.go +++ b/pkg/sentry/syscalls/linux/sys_read.go @@ -96,8 +96,8 @@ func Readahead(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, syserror.EINVAL } - // Check that the offset is legitimate. - if offset < 0 { + // Check that the offset is legitimate and does not overflow. + if offset < 0 || offset+int64(size) < 0 { return 0, nil, syserror.EINVAL } @@ -120,8 +120,8 @@ func Pread64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca } defer file.DecRef() - // Check that the offset is legitimate. - if offset < 0 { + // Check that the offset is legitimate and does not overflow. + if offset < 0 || offset+int64(size) < 0 { return 0, nil, syserror.EINVAL } diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go index fbc6cf15f..df0d0f461 100644 --- a/pkg/sentry/syscalls/linux/sys_splice.go +++ b/pkg/sentry/syscalls/linux/sys_splice.go @@ -16,6 +16,7 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -25,7 +26,8 @@ import ( // doSplice implements a blocking splice operation. func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonBlocking bool) (int64, error) { - if opts.Length < 0 || opts.SrcStart < 0 || opts.DstStart < 0 { + log.Infof("NLAC: doSplice opts: %+v", opts) + if opts.Length < 0 || opts.SrcStart < 0 || opts.DstStart < 0 || (opts.SrcStart+opts.Length < 0) { return 0, syserror.EINVAL } diff --git a/pkg/sentry/syscalls/linux/sys_write.go b/pkg/sentry/syscalls/linux/sys_write.go index 506ee54ce..6ec0de96e 100644 --- a/pkg/sentry/syscalls/linux/sys_write.go +++ b/pkg/sentry/syscalls/linux/sys_write.go @@ -87,8 +87,8 @@ func Pwrite64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } defer file.DecRef() - // Check that the offset is legitimate. - if offset < 0 { + // Check that the offset is legitimate and does not overflow. + if offset < 0 || offset+int64(size) < 0 { return 0, nil, syserror.EINVAL } diff --git a/pkg/sentry/syscalls/linux/vfs2/read_write.go b/pkg/sentry/syscalls/linux/vfs2/read_write.go index 35f6308d6..898b190fd 100644 --- a/pkg/sentry/syscalls/linux/vfs2/read_write.go +++ b/pkg/sentry/syscalls/linux/vfs2/read_write.go @@ -130,8 +130,8 @@ func Pread64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca } defer file.DecRef() - // Check that the offset is legitimate. - if offset < 0 { + // Check that the offset is legitimate and does not overflow. + if offset < 0 || offset+int64(size) < 0 { return 0, nil, syserror.EINVAL } @@ -362,8 +362,8 @@ func Pwrite64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } defer file.DecRef() - // Check that the offset is legitimate. - if offset < 0 { + // Check that the offset is legitimate and does not overflow. + if offset < 0 || offset+int64(size) < 0 { return 0, nil, syserror.EINVAL } diff --git a/test/syscalls/linux/memfd.cc b/test/syscalls/linux/memfd.cc index e57b49a4a..f8b7f7938 100644 --- a/test/syscalls/linux/memfd.cc +++ b/test/syscalls/linux/memfd.cc @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/test/syscalls/linux/pread64.cc b/test/syscalls/linux/pread64.cc index 2cecf2e5f..bcdbbb044 100644 --- a/test/syscalls/linux/pread64.cc +++ b/test/syscalls/linux/pread64.cc @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -118,6 +119,21 @@ TEST_F(Pread64Test, EndOfFile) { EXPECT_THAT(pread64(fd.get(), buf, 1024, 0), SyscallSucceedsWithValue(0)); } +int memfd_create(const std::string& name, unsigned int flags) { + return syscall(__NR_memfd_create, name.c_str(), flags); +} + +TEST_F(Pread64Test, Overflow) { + int f = memfd_create("negative", 0); + const FileDescriptor fd(f); + + EXPECT_THAT(ftruncate(fd.get(), 0x7fffffffffffffffull), SyscallSucceeds()); + + char buf[10]; + EXPECT_THAT(pread64(fd.get(), buf, sizeof(buf), 0x7fffffffffffffffull), + SyscallFailsWithErrno(EINVAL)); +} + TEST(Pread64TestNoTempFile, CantReadSocketPair_NoRandomSave) { int sock_fds[2]; EXPECT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds), SyscallSucceeds()); diff --git a/test/syscalls/linux/pwrite64.cc b/test/syscalls/linux/pwrite64.cc index c2f72e010..e69794910 100644 --- a/test/syscalls/linux/pwrite64.cc +++ b/test/syscalls/linux/pwrite64.cc @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -65,6 +66,17 @@ TEST_F(Pwrite64, InvalidArgs) { EXPECT_THAT(close(fd), SyscallSucceeds()); } +TEST_F(Pwrite64, Overflow) { + int fd; + ASSERT_THAT(fd = open(name_.c_str(), O_APPEND | O_RDWR), SyscallSucceeds()); + constexpr int64_t kBufSize = 1024; + std::vector buf(kBufSize); + std::fill(buf.begin(), buf.end(), 'a'); + EXPECT_THAT(PwriteFd(fd, buf.data(), buf.size(), 0x7fffffffffffffffull), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + } // namespace } // namespace testing diff --git a/test/syscalls/linux/sendfile.cc b/test/syscalls/linux/sendfile.cc index ebaafe47e..64123e904 100644 --- a/test/syscalls/linux/sendfile.cc +++ b/test/syscalls/linux/sendfile.cc @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -70,6 +71,28 @@ TEST(SendFileTest, InvalidOffset) { SyscallFailsWithErrno(EINVAL)); } +int memfd_create(const std::string& name, unsigned int flags) { + return syscall(__NR_memfd_create, name.c_str(), flags); +} + +TEST(SendFileTest, Overflow) { + // Create input file. + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open the output file. + int fd; + EXPECT_THAT(fd = memfd_create("overflow", 0), SyscallSucceeds()); + const FileDescriptor outf(fd); + + // out_offset + kSize overflows INT64_MAX. + loff_t out_offset = 0x7ffffffffffffffeull; + constexpr int kSize = 3; + EXPECT_THAT(sendfile(outf.get(), inf.get(), &out_offset, kSize), + SyscallFailsWithErrno(EINVAL)); +} + TEST(SendFileTest, SendTrivially) { // Create temp files. constexpr char kData[] = "To be, or not to be, that is the question:"; diff --git a/test/syscalls/linux/splice.cc b/test/syscalls/linux/splice.cc index faa1247f6..f103e2e56 100644 --- a/test/syscalls/linux/splice.cc +++ b/test/syscalls/linux/splice.cc @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include -- cgit v1.2.3 From c230d12b5ce540239df06e517f3b1b72722dcc14 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Mon, 13 Apr 2020 23:04:01 -0700 Subject: Add Sniffer.Drain() draining socket receive buffer Add Sniffer.Drain() which drains the socket's receive buffer by temporarily setting the socket to non-blocking, and receiving in a loop until EINTR, EWOULDBLOCK or EAGAIN. This method should be used when long periods of time elapses without receiving on the socket, because uninteresting packets may have piled up in the receive buffer, filling it up and causing packets critical to test operation to be dropped. PiperOrigin-RevId: 306380480 --- test/packetimpact/testbench/connections.go | 12 ++++++++++++ test/packetimpact/testbench/rawsockets.go | 23 +++++++++++++++++++++++ test/packetimpact/tests/fin_wait2_timeout_test.go | 2 ++ 3 files changed, 37 insertions(+) (limited to 'test') diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index 79c0ccf5c..2b8e2f005 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -228,6 +228,12 @@ func (conn *TCPIPv4) RecvFrame(timeout time.Duration) Layers { return nil } +// Drain drains the sniffer's receive buffer by receiving packets until there's +// nothing else to receive. +func (conn *TCPIPv4) Drain() { + conn.sniffer.Drain() +} + // Expect a packet that matches the provided tcp within the timeout specified. // If it doesn't arrive in time, it returns nil. func (conn *TCPIPv4) Expect(tcp TCP, timeout time.Duration) (*TCP, error) { @@ -423,6 +429,12 @@ func (conn *UDPIPv4) Recv(timeout time.Duration) *UDP { return nil } +// Drain drains the sniffer's receive buffer by receiving packets until there's +// nothing else to receive. +func (conn *UDPIPv4) Drain() { + conn.sniffer.Drain() +} + // Expect a packet that matches the provided udp within the timeout specified. // If it doesn't arrive in time, the test fails. func (conn *UDPIPv4) Expect(udp UDP, timeout time.Duration) (*UDP, error) { diff --git a/test/packetimpact/testbench/rawsockets.go b/test/packetimpact/testbench/rawsockets.go index 0074484f7..09bfa43c5 100644 --- a/test/packetimpact/testbench/rawsockets.go +++ b/test/packetimpact/testbench/rawsockets.go @@ -97,6 +97,29 @@ func (s *Sniffer) Recv(timeout time.Duration) []byte { } } +// Drain drains the Sniffer's socket receive buffer by receiving until there's +// nothing else to receive. +func (s *Sniffer) Drain() { + s.t.Helper() + flags, err := unix.FcntlInt(uintptr(s.fd), unix.F_GETFL, 0) + if err != nil { + s.t.Fatalf("failed to get sniffer socket fd flags: %s", err) + } + if _, err := unix.FcntlInt(uintptr(s.fd), unix.F_SETFL, flags|unix.O_NONBLOCK); err != nil { + s.t.Fatalf("failed to make sniffer socket non-blocking: %s", err) + } + for { + buf := make([]byte, maxReadSize) + _, _, err := unix.Recvfrom(s.fd, buf, unix.MSG_TRUNC) + if err == unix.EINTR || err == unix.EAGAIN || err == unix.EWOULDBLOCK { + break + } + } + if _, err := unix.FcntlInt(uintptr(s.fd), unix.F_SETFL, flags); err != nil { + s.t.Fatalf("failed to restore sniffer socket fd flags: %s", err) + } +} + // Close the socket that Sniffer is using. func (s *Sniffer) Close() { if err := unix.Close(s.fd); err != nil { diff --git a/test/packetimpact/tests/fin_wait2_timeout_test.go b/test/packetimpact/tests/fin_wait2_timeout_test.go index 90e16ef65..b98594f94 100644 --- a/test/packetimpact/tests/fin_wait2_timeout_test.go +++ b/test/packetimpact/tests/fin_wait2_timeout_test.go @@ -53,6 +53,8 @@ func TestFinWait2Timeout(t *testing.T) { conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) time.Sleep(5 * time.Second) + conn.Drain() + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) if tt.linger2 { if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, time.Second); err != nil { -- cgit v1.2.3 From 81c44c4cd7cfa121d9ef028db18b3ee550845811 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Tue, 14 Apr 2020 11:04:30 -0700 Subject: Test TCP should piggyback ACK in ESTAB state TCP, in ESTABLISHED state, SHOULD piggyback acknowledgement with a segment being transmitted (whenever possible) without incurring undue delay PiperOrigin-RevId: 306474550 --- test/packetimpact/tests/BUILD | 12 +++++ .../tests/tcp_should_piggyback_test.go | 53 ++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 test/packetimpact/tests/tcp_should_piggyback_test.go (limited to 'test') diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 956b1addf..308590162 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -52,6 +52,18 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "tcp_should_piggyback", + srcs = ["tcp_should_piggyback_test.go"], + # TODO(b/153680566): Fix netstack then remove the line below. + netstack = False, + deps = [ + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + sh_binary( name = "test_runner", srcs = ["test_runner.sh"], diff --git a/test/packetimpact/tests/tcp_should_piggyback_test.go b/test/packetimpact/tests/tcp_should_piggyback_test.go new file mode 100644 index 000000000..f2ab49e51 --- /dev/null +++ b/test/packetimpact/tests/tcp_should_piggyback_test.go @@ -0,0 +1,53 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_should_piggyback_test + +import ( + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func TestPiggyback(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(listenFd) + conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort, WindowSize: tb.Uint16(12)}, tb.TCP{SrcPort: &remotePort}) + defer conn.Close() + + conn.Handshake() + acceptFd, _ := dut.Accept(listenFd) + defer dut.Close(acceptFd) + + dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) + + sampleData := []byte("Sample Data") + + dut.Send(acceptFd, sampleData, 0) + conn.ExpectData(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, sampleData, time.Second) + + // Cause DUT to send us more data as soon as we ACK their first data segment because we have + // a small window. + dut.Send(acceptFd, sampleData, 0) + + // DUT should ACK our segment by piggybacking ACK to their outstanding data segment instead of + // sending a separate ACK packet. + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &tb.Payload{Bytes: sampleData}) + conn.ExpectData(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, sampleData, time.Second) +} -- cgit v1.2.3 From 9c918340e4e6126cca1dfedbf28fec8c8f836e1a Mon Sep 17 00:00:00 2001 From: Mithun Iyer Date: Wed, 15 Apr 2020 01:10:38 -0700 Subject: Reset pending connections on listener close Attempt to redeliver TCP segments that are enqueued into a closing TCP endpoint. This was being done for Established endpoints but not for those that are listening or performing connection handshake. Fixes #2417 PiperOrigin-RevId: 306598155 --- pkg/tcpip/transport/tcp/accept.go | 7 +++- pkg/tcpip/transport/tcp/connect.go | 30 ++++++++------ pkg/tcpip/transport/tcp/endpoint.go | 30 +++++++------- pkg/tcpip/transport/tcp/tcp_test.go | 37 +++++++++++++++++ test/packetimpact/tests/BUILD | 2 - test/syscalls/linux/socket_inet_loopback.cc | 62 +++++++++++++++++++++++++++++ 6 files changed, 138 insertions(+), 30 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go index 7a9dea4ac..e07b436c4 100644 --- a/pkg/tcpip/transport/tcp/accept.go +++ b/pkg/tcpip/transport/tcp/accept.go @@ -330,6 +330,9 @@ func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *head if l.listenEP != nil { l.removePendingEndpoint(ep) } + + ep.drainClosingSegmentQueue() + return nil, err } ep.isConnectNotified = true @@ -378,7 +381,7 @@ func (e *endpoint) deliverAccepted(n *endpoint) { for { if e.acceptedChan == nil { e.acceptMu.Unlock() - n.Close() + n.notifyProtocolGoroutine(notifyReset) return } select { @@ -656,6 +659,8 @@ func (e *endpoint) protocolListenLoop(rcvWnd seqnum.Size) *tcpip.Error { } e.mu.Unlock() + e.drainClosingSegmentQueue() + // Notify waiters that the endpoint is shutdown. e.waiterQueue.Notify(waiter.EventIn | waiter.EventOut | waiter.EventHUp | waiter.EventErr) }() diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index 2ca3fb809..994ac52a3 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -1062,6 +1062,20 @@ func (e *endpoint) tryDeliverSegmentFromClosedEndpoint(s *segment) { } } +// Drain segment queue from the endpoint and try to re-match the segment to a +// different endpoint. This is used when the current endpoint is transitioned to +// StateClose and has been unregistered from the transport demuxer. +func (e *endpoint) drainClosingSegmentQueue() { + for { + s := e.segmentQueue.dequeue() + if s == nil { + break + } + + e.tryDeliverSegmentFromClosedEndpoint(s) + } +} + func (e *endpoint) handleReset(s *segment) (ok bool, err *tcpip.Error) { if e.rcv.acceptable(s.sequenceNumber, 0) { // RFC 793, page 37 states that "in all states @@ -1315,6 +1329,9 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{ } e.mu.Unlock() + + e.drainClosingSegmentQueue() + // When the protocol loop exits we should wake up our waiters. e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut) } @@ -1565,19 +1582,6 @@ loop: // Lock released below. epilogue() - // epilogue removes the endpoint from the transport-demuxer and - // unlocks e.mu. Now that no new segments can get enqueued to this - // endpoint, try to re-match the segment to a different endpoint - // as the current endpoint is closed. - for { - s := e.segmentQueue.dequeue() - if s == nil { - break - } - - e.tryDeliverSegmentFromClosedEndpoint(s) - } - // A new SYN was received during TIME_WAIT and we need to abort // the timewait and redirect the segment to the listener queue if reuseTW != nil { diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index a8d443f73..7ed78d57f 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -980,25 +980,22 @@ func (e *endpoint) closeNoShutdownLocked() { // Mark endpoint as closed. e.closed = true + + switch e.EndpointState() { + case StateClose, StateError: + return + } + // Either perform the local cleanup or kick the worker to make sure it // knows it needs to cleanup. - switch e.EndpointState() { - // Sockets in StateSynRecv state(passive connections) are closed when - // the handshake fails or if the listening socket is closed while - // handshake was in progress. In such cases the handshake goroutine - // is already gone by the time Close is called and we need to cleanup - // here. - case StateInitial, StateBound, StateSynRecv: - e.cleanupLocked() - e.setEndpointState(StateClose) - case StateError, StateClose: - // do nothing. - default: + if e.workerRunning { e.workerCleanup = true tcpip.AddDanglingEndpoint(e) // Worker will remove the dangling endpoint when the endpoint // goroutine terminates. e.notifyProtocolGoroutine(notifyClose) + } else { + e.transitionToStateCloseLocked() } } @@ -1010,13 +1007,18 @@ func (e *endpoint) closePendingAcceptableConnectionsLocked() { e.acceptMu.Unlock() return } - close(e.acceptedChan) + ch := e.acceptedChan e.acceptedChan = nil e.acceptCond.Broadcast() e.acceptMu.Unlock() - // Wait for all pending endpoints to close. + // Reset all connections that are waiting to be accepted. + for n := range ch { + n.notifyProtocolGoroutine(notifyReset) + } + // Wait for reset of all endpoints that are still waiting to be delivered to + // the now closed acceptedChan. e.pendingAccepted.Wait() } diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 41caa9ed4..a9f121c17 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -1068,6 +1068,43 @@ func TestListenShutdown(t *testing.T) { c.CheckNoPacket("Packet received when listening socket was shutdown") } +// TestListenCloseWhileConnect tests for the listening endpoint to +// drain the accept-queue when closed. This should reset all of the +// pending connections that are waiting to be accepted. +func TestListenCloseWhileConnect(t *testing.T) { + c := context.New(t, defaultMTU) + defer c.Cleanup() + + c.Create(-1 /* epRcvBuf */) + + if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { + t.Fatal("Bind failed:", err) + } + + if err := c.EP.Listen(1 /* backlog */); err != nil { + t.Fatal("Listen failed:", err) + } + + waitEntry, notifyCh := waiter.NewChannelEntry(nil) + c.WQ.EventRegister(&waitEntry, waiter.EventIn) + defer c.WQ.EventUnregister(&waitEntry) + + executeHandshake(t, c, context.TestPort, false /* synCookiesInUse */) + // Wait for the new endpoint created because of handshake to be delivered + // to the listening endpoint's accept queue. + <-notifyCh + + // Close the listening endpoint. + c.EP.Close() + + // Expect the listening endpoint to reset the connection. + checker.IPv4(t, c.GetPacket(), + checker.TCP( + checker.DstPort(context.TestPort), + checker.TCPFlags(header.TCPFlagAck|header.TCPFlagRst), + )) +} + func TestTOSV4(t *testing.T) { c := context.New(t, defaultMTU) defer c.Cleanup() diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 308590162..1274d9f60 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -43,8 +43,6 @@ packetimpact_go_test( packetimpact_go_test( name = "tcp_noaccept_close_rst", srcs = ["tcp_noaccept_close_rst_test.go"], - # TODO(b/153380909): Fix netstack then remove the line below. - netstack = False, deps = [ "//pkg/tcpip/header", "//test/packetimpact/testbench", diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 71bd7c14d..cd84e633a 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -365,6 +365,68 @@ TEST_P(SocketInetLoopbackTest, TCPListenClose) { } } +TEST_P(SocketInetLoopbackTest, TCPListenCloseWhileConnect) { + auto const& param = GetParam(); + + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + constexpr int kBacklog = 2; + constexpr int kClients = kBacklog + 1; + + // Create the listening socket. + FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast(&listen_addr), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + std::vector clients; + for (int i = 0; i < kClients; i++) { + FileDescriptor client = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); + int ret = connect(client.get(), reinterpret_cast(&conn_addr), + connector.addr_len); + if (ret != 0) { + EXPECT_THAT(ret, SyscallFailsWithErrno(EINPROGRESS)); + clients.push_back(std::move(client)); + } + } + // Close the listening socket. + listen_fd.reset(); + + for (auto& client : clients) { + const int kTimeout = 10000; + struct pollfd pfd = { + .fd = client.get(), + .events = POLLIN, + }; + // When the listening socket is closed, then we expect the remote to reset + // the connection. + ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1)); + ASSERT_EQ(pfd.revents, POLLIN | POLLHUP | POLLERR); + char c; + // Subsequent read can fail with: + // ECONNRESET: If the client connection was established and was reset by the + // remote. ECONNREFUSED: If the client connection failed to be established. + ASSERT_THAT(read(client.get(), &c, sizeof(c)), + AnyOf(SyscallFailsWithErrno(ECONNRESET), + SyscallFailsWithErrno(ECONNREFUSED))); + } +} + TEST_P(SocketInetLoopbackTest, TCPbacklog) { auto const& param = GetParam(); -- cgit v1.2.3 From 1bcc2bf17f0e2ccf8e98e934cb9f9ce66711d27a Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Wed, 15 Apr 2020 12:59:58 -0700 Subject: Refactor connections.go to make it easier to add new connection types. Rather than have a struct for the state of each type of connection, such as TCP/IPv4, UDP/IPv4, TCP/IPv6, etc, have a state for each layer, such as UDP, TCP, IPv4, IPv6. Those states can be composed into connections. Tested: Existing unit tests still pass/fail as expected. PiperOrigin-RevId: 306703180 --- test/packetimpact/testbench/connections.go | 743 +++++++++++++-------- test/packetimpact/testbench/layers.go | 97 ++- test/packetimpact/testbench/rawsockets.go | 15 +- .../tests/tcp_should_piggyback_test.go | 12 +- test/packetimpact/tests/tcp_window_shrink_test.go | 18 +- test/packetimpact/tests/udp_recv_multicast_test.go | 2 +- 6 files changed, 566 insertions(+), 321 deletions(-) (limited to 'test') diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index 2b8e2f005..169db01b0 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -63,229 +63,469 @@ func pickPort() (int, uint16, error) { return fd, uint16(newSockAddrInet4.Port), nil } -// TCPIPv4 maintains state about a TCP/IPv4 connection. -type TCPIPv4 struct { - outgoing Layers - incoming Layers - LocalSeqNum seqnum.Value - RemoteSeqNum seqnum.Value - SynAck *TCP - sniffer Sniffer - injector Injector - portPickerFD int - t *testing.T +// layerState stores the state of a layer of a connection. +type layerState interface { + // outgoing returns an outgoing layer to be sent in a frame. + outgoing() Layer + + // incoming creates an expected Layer for comparing against a received Layer. + // Because the expectation can depend on values in the received Layer, it is + // an input to incoming. For example, the ACK number needs to be checked in a + // TCP packet but only if the ACK flag is set in the received packet. + incoming(received Layer) Layer + + // sent updates the layerState based on the Layer that was sent. The input is + // a Layer with all prev and next pointers populated so that the entire frame + // as it was sent is available. + sent(sent Layer) error + + // received updates the layerState based on a Layer that is receieved. The + // input is a Layer with all prev and next pointers populated so that the + // entire frame as it was receieved is available. + received(received Layer) error + + // close frees associated resources held by the LayerState. + close() error } -// tcpLayerIndex is the position of the TCP layer in the TCPIPv4 connection. It -// is the third, after Ethernet and IPv4. -const tcpLayerIndex int = 2 +// etherState maintains state about an Ethernet connection. +type etherState struct { + out, in Ether +} -// NewTCPIPv4 creates a new TCPIPv4 connection with reasonable defaults. -func NewTCPIPv4(t *testing.T, outgoingTCP, incomingTCP TCP) TCPIPv4 { +var _ layerState = (*etherState)(nil) + +// newEtherState creates a new etherState. +func newEtherState(out, in Ether) (*etherState, error) { lMAC, err := tcpip.ParseMACAddress(*localMAC) if err != nil { - t.Fatalf("can't parse localMAC %q: %s", *localMAC, err) + return nil, err } rMAC, err := tcpip.ParseMACAddress(*remoteMAC) if err != nil { - t.Fatalf("can't parse remoteMAC %q: %s", *remoteMAC, err) + return nil, err } - - portPickerFD, localPort, err := pickPort() - if err != nil { - t.Fatalf("can't pick a port: %s", err) + s := etherState{ + out: Ether{SrcAddr: &lMAC, DstAddr: &rMAC}, + in: Ether{SrcAddr: &rMAC, DstAddr: &lMAC}, } + if err := s.out.merge(&out); err != nil { + return nil, err + } + if err := s.in.merge(&in); err != nil { + return nil, err + } + return &s, nil +} + +func (s *etherState) outgoing() Layer { + return &s.out +} + +func (s *etherState) incoming(Layer) Layer { + return deepcopy.Copy(&s.in).(Layer) +} + +func (*etherState) sent(Layer) error { + return nil +} + +func (*etherState) received(Layer) error { + return nil +} + +func (*etherState) close() error { + return nil +} + +// ipv4State maintains state about an IPv4 connection. +type ipv4State struct { + out, in IPv4 +} + +var _ layerState = (*ipv4State)(nil) + +// newIPv4State creates a new ipv4State. +func newIPv4State(out, in IPv4) (*ipv4State, error) { lIP := tcpip.Address(net.ParseIP(*localIPv4).To4()) rIP := tcpip.Address(net.ParseIP(*remoteIPv4).To4()) - - sniffer, err := NewSniffer(t) - if err != nil { - t.Fatalf("can't make new sniffer: %s", err) + s := ipv4State{ + out: IPv4{SrcAddr: &lIP, DstAddr: &rIP}, + in: IPv4{SrcAddr: &rIP, DstAddr: &lIP}, + } + if err := s.out.merge(&out); err != nil { + return nil, err } + if err := s.in.merge(&in); err != nil { + return nil, err + } + return &s, nil +} - injector, err := NewInjector(t) +func (s *ipv4State) outgoing() Layer { + return &s.out +} + +func (s *ipv4State) incoming(Layer) Layer { + return deepcopy.Copy(&s.in).(Layer) +} + +func (*ipv4State) sent(Layer) error { + return nil +} + +func (*ipv4State) received(Layer) error { + return nil +} + +func (*ipv4State) close() error { + return nil +} + +// tcpState maintains state about a TCP connection. +type tcpState struct { + out, in TCP + localSeqNum, remoteSeqNum *seqnum.Value + synAck *TCP + portPickerFD int +} + +var _ layerState = (*tcpState)(nil) + +// SeqNumValue is a helper routine that allocates a new seqnum.Value value to +// store v and returns a pointer to it. +func SeqNumValue(v seqnum.Value) *seqnum.Value { + return &v +} + +// newTCPState creates a new TCPState. +func newTCPState(out, in TCP) (*tcpState, error) { + portPickerFD, localPort, err := pickPort() if err != nil { - t.Fatalf("can't make new injector: %s", err) + return nil, err + } + s := tcpState{ + out: TCP{SrcPort: &localPort}, + in: TCP{DstPort: &localPort}, + localSeqNum: SeqNumValue(seqnum.Value(rand.Uint32())), + portPickerFD: portPickerFD, + } + if err := s.out.merge(&out); err != nil { + return nil, err } + if err := s.in.merge(&in); err != nil { + return nil, err + } + return &s, nil +} - newOutgoingTCP := &TCP{ - SrcPort: &localPort, +func (s *tcpState) outgoing() Layer { + newOutgoing := deepcopy.Copy(s.out).(TCP) + if s.localSeqNum != nil { + newOutgoing.SeqNum = Uint32(uint32(*s.localSeqNum)) } - if err := newOutgoingTCP.merge(outgoingTCP); err != nil { - t.Fatalf("can't merge %+v into %+v: %s", outgoingTCP, newOutgoingTCP, err) + if s.remoteSeqNum != nil { + newOutgoing.AckNum = Uint32(uint32(*s.remoteSeqNum)) } - newIncomingTCP := &TCP{ - DstPort: &localPort, + return &newOutgoing +} + +func (s *tcpState) incoming(received Layer) Layer { + tcpReceived, ok := received.(*TCP) + if !ok { + return nil } - if err := newIncomingTCP.merge(incomingTCP); err != nil { - t.Fatalf("can't merge %+v into %+v: %s", incomingTCP, newIncomingTCP, err) + newIn := deepcopy.Copy(s.in).(TCP) + if s.remoteSeqNum != nil { + newIn.SeqNum = Uint32(uint32(*s.remoteSeqNum)) } - return TCPIPv4{ - outgoing: Layers{ - &Ether{SrcAddr: &lMAC, DstAddr: &rMAC}, - &IPv4{SrcAddr: &lIP, DstAddr: &rIP}, - newOutgoingTCP}, - incoming: Layers{ - &Ether{SrcAddr: &rMAC, DstAddr: &lMAC}, - &IPv4{SrcAddr: &rIP, DstAddr: &lIP}, - newIncomingTCP}, - sniffer: sniffer, - injector: injector, - portPickerFD: portPickerFD, - t: t, - LocalSeqNum: seqnum.Value(rand.Uint32()), + if s.localSeqNum != nil && (*tcpReceived.Flags&header.TCPFlagAck) != 0 { + // The caller didn't specify an AckNum so we'll expect the calculated one, + // but only if the ACK flag is set because the AckNum is not valid in a + // header if ACK is not set. + newIn.AckNum = Uint32(uint32(*s.localSeqNum)) } + return &newIn } -// Close the injector and sniffer associated with this connection. -func (conn *TCPIPv4) Close() { - conn.sniffer.Close() - conn.injector.Close() - if err := unix.Close(conn.portPickerFD); err != nil { - conn.t.Fatalf("can't close portPickerFD: %s", err) +func (s *tcpState) sent(sent Layer) error { + tcp, ok := sent.(*TCP) + if !ok { + return fmt.Errorf("can't update tcpState with %T Layer", sent) } - conn.portPickerFD = -1 + for current := tcp.next(); current != nil; current = current.next() { + s.localSeqNum.UpdateForward(seqnum.Size(current.length())) + } + if tcp.Flags != nil && *tcp.Flags&(header.TCPFlagSyn|header.TCPFlagFin) != 0 { + s.localSeqNum.UpdateForward(1) + } + return nil } -// CreateFrame builds a frame for the connection with tcp overriding defaults -// and additionalLayers added after the TCP header. -func (conn *TCPIPv4) CreateFrame(tcp TCP, additionalLayers ...Layer) Layers { - if tcp.SeqNum == nil { - tcp.SeqNum = Uint32(uint32(conn.LocalSeqNum)) +func (s *tcpState) received(l Layer) error { + tcp, ok := l.(*TCP) + if !ok { + return fmt.Errorf("can't update tcpState with %T Layer", l) } - if tcp.AckNum == nil { - tcp.AckNum = Uint32(uint32(conn.RemoteSeqNum)) + s.remoteSeqNum = SeqNumValue(seqnum.Value(*tcp.SeqNum)) + if *tcp.Flags&(header.TCPFlagSyn|header.TCPFlagFin) != 0 { + s.remoteSeqNum.UpdateForward(1) } - layersToSend := deepcopy.Copy(conn.outgoing).(Layers) - if err := layersToSend[tcpLayerIndex].(*TCP).merge(tcp); err != nil { - conn.t.Fatalf("can't merge %+v into %+v: %s", tcp, layersToSend[tcpLayerIndex], err) + for current := tcp.next(); current != nil; current = current.next() { + s.remoteSeqNum.UpdateForward(seqnum.Size(current.length())) } - layersToSend = append(layersToSend, additionalLayers...) - return layersToSend + return nil } -// SendFrame sends a frame with reasonable defaults. -func (conn *TCPIPv4) SendFrame(frame Layers) { - outBytes, err := frame.toBytes() - if err != nil { - conn.t.Fatalf("can't build outgoing TCP packet: %s", err) +// close frees the port associated with this connection. +func (s *tcpState) close() error { + if err := unix.Close(s.portPickerFD); err != nil { + return err } - conn.injector.Send(outBytes) + s.portPickerFD = -1 + return nil +} + +// udpState maintains state about a UDP connection. +type udpState struct { + out, in UDP + portPickerFD int +} - // Compute the next TCP sequence number. - for i := tcpLayerIndex + 1; i < len(frame); i++ { - conn.LocalSeqNum.UpdateForward(seqnum.Size(frame[i].length())) +var _ layerState = (*udpState)(nil) + +// newUDPState creates a new udpState. +func newUDPState(out, in UDP) (*udpState, error) { + portPickerFD, localPort, err := pickPort() + if err != nil { + return nil, err } - tcp := frame[tcpLayerIndex].(*TCP) - if tcp.Flags != nil && *tcp.Flags&(header.TCPFlagSyn|header.TCPFlagFin) != 0 { - conn.LocalSeqNum.UpdateForward(1) + s := udpState{ + out: UDP{SrcPort: &localPort}, + in: UDP{DstPort: &localPort}, + portPickerFD: portPickerFD, + } + if err := s.out.merge(&out); err != nil { + return nil, err + } + if err := s.in.merge(&in); err != nil { + return nil, err } + return &s, nil } -// Send a packet with reasonable defaults and override some fields by tcp. -func (conn *TCPIPv4) Send(tcp TCP, additionalLayers ...Layer) { - conn.SendFrame(conn.CreateFrame(tcp, additionalLayers...)) +func (s *udpState) outgoing() Layer { + return &s.out +} + +func (s *udpState) incoming(Layer) Layer { + return deepcopy.Copy(&s.in).(Layer) +} + +func (*udpState) sent(l Layer) error { + return nil } -// Recv gets a packet from the sniffer within the timeout provided. -// If no packet arrives before the timeout, it returns nil. -func (conn *TCPIPv4) Recv(timeout time.Duration) *TCP { - layers := conn.RecvFrame(timeout) - if tcpLayerIndex < len(layers) { - return layers[tcpLayerIndex].(*TCP) +func (*udpState) received(l Layer) error { + return nil +} + +// close frees the port associated with this connection. +func (s *udpState) close() error { + if err := unix.Close(s.portPickerFD); err != nil { + return err } + s.portPickerFD = -1 return nil } -// RecvFrame gets a frame (of type Layers) within the timeout provided. -// If no frame arrives before the timeout, it returns nil. -func (conn *TCPIPv4) RecvFrame(timeout time.Duration) Layers { - deadline := time.Now().Add(timeout) - for { - timeout = time.Until(deadline) - if timeout <= 0 { - break - } - b := conn.sniffer.Recv(timeout) - if b == nil { - break +// Connection holds a collection of layer states for maintaining a connection +// along with sockets for sniffer and injecting packets. +type Connection struct { + layerStates []layerState + injector Injector + sniffer Sniffer + t *testing.T +} + +// match tries to match each Layer in received against the incoming filter. If +// received is longer than layerStates then that may still count as a match. The +// reverse is never a match. override overrides the default matchers for each +// Layer. +func (conn *Connection) match(override, received Layers) bool { + if len(received) < len(conn.layerStates) { + return false + } + for i, s := range conn.layerStates { + toMatch := s.incoming(received[i]) + if toMatch == nil { + return false } - layers := Parse(ParseEther, b) - if !conn.incoming.match(layers) { - continue // Ignore packets that don't match the expected incoming. + if i < len(override) { + toMatch.merge(override[i]) } - tcpHeader := (layers[tcpLayerIndex]).(*TCP) - conn.RemoteSeqNum = seqnum.Value(*tcpHeader.SeqNum) - if *tcpHeader.Flags&(header.TCPFlagSyn|header.TCPFlagFin) != 0 { - conn.RemoteSeqNum.UpdateForward(1) + if !toMatch.match(received[i]) { + return false } - for i := tcpLayerIndex + 1; i < len(layers); i++ { - conn.RemoteSeqNum.UpdateForward(seqnum.Size(layers[i].length())) + } + return true +} + +// Close frees associated resources held by the Connection. +func (conn *Connection) Close() { + if err := conn.sniffer.close(); err != nil { + conn.t.Fatal(err) + } + if err := conn.injector.close(); err != nil { + conn.t.Fatal(err) + } + for _, s := range conn.layerStates { + if err := s.close(); err != nil { + conn.t.Fatalf("unable to close %+v: %s", s, err) } - return layers } - return nil } -// Drain drains the sniffer's receive buffer by receiving packets until there's -// nothing else to receive. -func (conn *TCPIPv4) Drain() { - conn.sniffer.Drain() +// CreateFrame builds a frame for the connection with layer overriding defaults +// of the innermost layer and additionalLayers added after it. +func (conn *Connection) CreateFrame(layer Layer, additionalLayers ...Layer) Layers { + var layersToSend Layers + for _, s := range conn.layerStates { + layersToSend = append(layersToSend, s.outgoing()) + } + if err := layersToSend[len(layersToSend)-1].merge(layer); err != nil { + conn.t.Fatalf("can't merge %+v into %+v: %s", layer, layersToSend[len(layersToSend)-1], err) + } + layersToSend = append(layersToSend, additionalLayers...) + return layersToSend } -// Expect a packet that matches the provided tcp within the timeout specified. -// If it doesn't arrive in time, it returns nil. -func (conn *TCPIPv4) Expect(tcp TCP, timeout time.Duration) (*TCP, error) { - // We cannot implement this directly using ExpectFrame as we cannot specify - // the Payload part. - deadline := time.Now().Add(timeout) - var allTCP []string - for { - var gotTCP *TCP - if timeout = time.Until(deadline); timeout > 0 { - gotTCP = conn.Recv(timeout) - } - if gotTCP == nil { - return nil, fmt.Errorf("got %d packets:\n%s", len(allTCP), strings.Join(allTCP, "\n")) - } - if tcp.match(gotTCP) { - return gotTCP, nil +// SendFrame sends a frame on the wire and updates the state of all layers. +func (conn *Connection) SendFrame(frame Layers) { + outBytes, err := frame.toBytes() + if err != nil { + conn.t.Fatalf("can't build outgoing TCP packet: %s", err) + } + conn.injector.Send(outBytes) + + // frame might have nil values where the caller wanted to use default values. + // sentFrame will have no nil values in it because it comes from parsing the + // bytes that were actually sent. + sentFrame := parse(parseEther, outBytes) + // Update the state of each layer based on what was sent. + for i, s := range conn.layerStates { + if err := s.sent(sentFrame[i]); err != nil { + conn.t.Fatalf("Unable to update the state of %+v with %s: %s", s, sentFrame[i], err) } - allTCP = append(allTCP, gotTCP.String()) } } -// ExpectFrame expects a frame that matches the specified layers within the +// Send a packet with reasonable defaults. Potentially override the final layer +// in the connection with the provided layer and add additionLayers. +func (conn *Connection) Send(layer Layer, additionalLayers ...Layer) { + conn.SendFrame(conn.CreateFrame(layer, additionalLayers...)) +} + +// recvFrame gets the next successfully parsed frame (of type Layers) within the +// timeout provided. If no parsable frame arrives before the timeout, it returns +// nil. +func (conn *Connection) recvFrame(timeout time.Duration) Layers { + if timeout <= 0 { + return nil + } + b := conn.sniffer.Recv(timeout) + if b == nil { + return nil + } + return parse(parseEther, b) +} + +// Expect a frame with the final layerStates layer matching the provided Layer +// within the timeout specified. If it doesn't arrive in time, it returns nil. +func (conn *Connection) Expect(layer Layer, timeout time.Duration) (Layer, error) { + // Make a frame that will ignore all but the final layer. + layers := make([]Layer, len(conn.layerStates)) + layers[len(layers)-1] = layer + + gotFrame, err := conn.ExpectFrame(layers, timeout) + if err != nil { + return nil, err + } + if len(conn.layerStates)-1 < len(gotFrame) { + return gotFrame[len(conn.layerStates)-1], nil + } + conn.t.Fatal("the received frame should be at least as long as the expected layers") + return nil, fmt.Errorf("the received frame should be at least as long as the expected layers") +} + +// ExpectFrame expects a frame that matches the provided Layers within the // timeout specified. If it doesn't arrive in time, it returns nil. -func (conn *TCPIPv4) ExpectFrame(layers Layers, timeout time.Duration) Layers { +func (conn *Connection) ExpectFrame(layers Layers, timeout time.Duration) (Layers, error) { deadline := time.Now().Add(timeout) + var allLayers []string for { - timeout = time.Until(deadline) - if timeout <= 0 { - return nil + var gotLayers Layers + if timeout = time.Until(deadline); timeout > 0 { + gotLayers = conn.recvFrame(timeout) + } + if gotLayers == nil { + return nil, fmt.Errorf("got %d packets:\n%s", len(allLayers), strings.Join(allLayers, "\n")) } - gotLayers := conn.RecvFrame(timeout) - if layers.match(gotLayers) { - return gotLayers + if conn.match(layers, gotLayers) { + for i, s := range conn.layerStates { + if err := s.received(gotLayers[i]); err != nil { + conn.t.Fatal(err) + } + } + return gotLayers, nil } + allLayers = append(allLayers, fmt.Sprintf("%s", gotLayers)) } } -// ExpectData is a convenient method that expects a TCP packet along with -// the payload to arrive within the timeout specified. If it doesn't arrive -// in time, it causes a fatal test failure. -func (conn *TCPIPv4) ExpectData(tcp TCP, data []byte, timeout time.Duration) { - expected := []Layer{&Ether{}, &IPv4{}, &tcp} - if len(data) > 0 { - expected = append(expected, &Payload{Bytes: data}) +// Drain drains the sniffer's receive buffer by receiving packets until there's +// nothing else to receive. +func (conn *Connection) Drain() { + conn.sniffer.Drain() +} + +// TCPIPv4 maintains the state for all the layers in a TCP/IPv4 connection. +type TCPIPv4 Connection + +// NewTCPIPv4 creates a new TCPIPv4 connection with reasonable defaults. +func NewTCPIPv4(t *testing.T, outgoingTCP, incomingTCP TCP) TCPIPv4 { + etherState, err := newEtherState(Ether{}, Ether{}) + if err != nil { + t.Fatalf("can't make etherState: %s", err) + } + ipv4State, err := newIPv4State(IPv4{}, IPv4{}) + if err != nil { + t.Fatalf("can't make ipv4State: %s", err) + } + tcpState, err := newTCPState(outgoingTCP, incomingTCP) + if err != nil { + t.Fatalf("can't make tcpState: %s", err) } - if conn.ExpectFrame(expected, timeout) == nil { - conn.t.Fatalf("expected to get a TCP frame %s with payload %x", &tcp, data) + injector, err := NewInjector(t) + if err != nil { + t.Fatalf("can't make injector: %s", err) + } + sniffer, err := NewSniffer(t) + if err != nil { + t.Fatalf("can't make sniffer: %s", err) + } + + return TCPIPv4{ + layerStates: []layerState{etherState, ipv4State, tcpState}, + injector: injector, + sniffer: sniffer, + t: t, } } -// Handshake performs a TCP 3-way handshake. +// Handshake performs a TCP 3-way handshake. The input Connection should have a +// final TCP Layer. func (conn *TCPIPv4) Handshake() { // Send the SYN. conn.Send(TCP{Flags: Uint8(header.TCPFlagSyn)}) @@ -295,138 +535,111 @@ func (conn *TCPIPv4) Handshake() { if synAck == nil { conn.t.Fatalf("didn't get synack during handshake: %s", err) } - conn.SynAck = synAck + conn.layerStates[len(conn.layerStates)-1].(*tcpState).synAck = synAck // Send an ACK. conn.Send(TCP{Flags: Uint8(header.TCPFlagAck)}) } -// UDPIPv4 maintains state about a UDP/IPv4 connection. -type UDPIPv4 struct { - outgoing Layers - incoming Layers - sniffer Sniffer - injector Injector - portPickerFD int - t *testing.T +// ExpectData is a convenient method that expects a Layer and the Layer after +// it. If it doens't arrive in time, it returns nil. +func (conn *TCPIPv4) ExpectData(tcp *TCP, payload *Payload, timeout time.Duration) (Layers, error) { + expected := make([]Layer, len(conn.layerStates)) + expected[len(expected)-1] = tcp + if payload != nil { + expected = append(expected, payload) + } + return (*Connection)(conn).ExpectFrame(expected, timeout) +} + +// Send a packet with reasonable defaults. Potentially override the TCP layer in +// the connection with the provided layer and add additionLayers. +func (conn *TCPIPv4) Send(tcp TCP, additionalLayers ...Layer) { + (*Connection)(conn).Send(&tcp, additionalLayers...) } -// udpLayerIndex is the position of the UDP layer in the UDPIPv4 connection. It -// is the third, after Ethernet and IPv4. -const udpLayerIndex int = 2 +// Close frees associated resources held by the TCPIPv4 connection. +func (conn *TCPIPv4) Close() { + (*Connection)(conn).Close() +} + +// Expect a frame with the TCP layer matching the provided TCP within the +// timeout specified. If it doesn't arrive in time, it returns nil. +func (conn *TCPIPv4) Expect(tcp TCP, timeout time.Duration) (*TCP, error) { + layer, err := (*Connection)(conn).Expect(&tcp, timeout) + if layer == nil { + return nil, err + } + gotTCP, ok := layer.(*TCP) + if !ok { + conn.t.Fatalf("expected %s to be TCP", layer) + } + return gotTCP, err +} + +// RemoteSeqNum returns the next expected sequence number from the DUT. +func (conn *TCPIPv4) RemoteSeqNum() *seqnum.Value { + state, ok := conn.layerStates[len(conn.layerStates)-1].(*tcpState) + if !ok { + conn.t.Fatalf("expected final state of %v to be tcpState", conn.layerStates) + } + return state.remoteSeqNum +} + +// Drain drains the sniffer's receive buffer by receiving packets until there's +// nothing else to receive. +func (conn *TCPIPv4) Drain() { + conn.sniffer.Drain() +} + +// UDPIPv4 maintains the state for all the layers in a UDP/IPv4 connection. +type UDPIPv4 Connection // NewUDPIPv4 creates a new UDPIPv4 connection with reasonable defaults. func NewUDPIPv4(t *testing.T, outgoingUDP, incomingUDP UDP) UDPIPv4 { - lMAC, err := tcpip.ParseMACAddress(*localMAC) + etherState, err := newEtherState(Ether{}, Ether{}) if err != nil { - t.Fatalf("can't parse localMAC %q: %s", *localMAC, err) + t.Fatalf("can't make etherState: %s", err) } - - rMAC, err := tcpip.ParseMACAddress(*remoteMAC) + ipv4State, err := newIPv4State(IPv4{}, IPv4{}) if err != nil { - t.Fatalf("can't parse remoteMAC %q: %s", *remoteMAC, err) + t.Fatalf("can't make ipv4State: %s", err) } - - portPickerFD, localPort, err := pickPort() + tcpState, err := newUDPState(outgoingUDP, incomingUDP) if err != nil { - t.Fatalf("can't pick a port: %s", err) + t.Fatalf("can't make udpState: %s", err) } - lIP := tcpip.Address(net.ParseIP(*localIPv4).To4()) - rIP := tcpip.Address(net.ParseIP(*remoteIPv4).To4()) - - sniffer, err := NewSniffer(t) + injector, err := NewInjector(t) if err != nil { - t.Fatalf("can't make new sniffer: %s", err) + t.Fatalf("can't make injector: %s", err) } - - injector, err := NewInjector(t) + sniffer, err := NewSniffer(t) if err != nil { - t.Fatalf("can't make new injector: %s", err) + t.Fatalf("can't make sniffer: %s", err) } - newOutgoingUDP := &UDP{ - SrcPort: &localPort, - } - if err := newOutgoingUDP.merge(outgoingUDP); err != nil { - t.Fatalf("can't merge %+v into %+v: %s", outgoingUDP, newOutgoingUDP, err) - } - newIncomingUDP := &UDP{ - DstPort: &localPort, - } - if err := newIncomingUDP.merge(incomingUDP); err != nil { - t.Fatalf("can't merge %+v into %+v: %s", incomingUDP, newIncomingUDP, err) - } return UDPIPv4{ - outgoing: Layers{ - &Ether{SrcAddr: &lMAC, DstAddr: &rMAC}, - &IPv4{SrcAddr: &lIP, DstAddr: &rIP}, - newOutgoingUDP}, - incoming: Layers{ - &Ether{SrcAddr: &rMAC, DstAddr: &lMAC}, - &IPv4{SrcAddr: &rIP, DstAddr: &lIP}, - newIncomingUDP}, - sniffer: sniffer, - injector: injector, - portPickerFD: portPickerFD, - t: t, + layerStates: []layerState{etherState, ipv4State, tcpState}, + injector: injector, + sniffer: sniffer, + t: t, } } -// Close the injector and sniffer associated with this connection. -func (conn *UDPIPv4) Close() { - conn.sniffer.Close() - conn.injector.Close() - if err := unix.Close(conn.portPickerFD); err != nil { - conn.t.Fatalf("can't close portPickerFD: %s", err) - } - conn.portPickerFD = -1 -} - -// CreateFrame builds a frame for the connection with the provided udp -// overriding defaults and the additionalLayers added after the UDP header. -func (conn *UDPIPv4) CreateFrame(udp UDP, additionalLayers ...Layer) Layers { - layersToSend := deepcopy.Copy(conn.outgoing).(Layers) - if err := layersToSend[udpLayerIndex].(*UDP).merge(udp); err != nil { - conn.t.Fatalf("can't merge %+v into %+v: %s", udp, layersToSend[udpLayerIndex], err) - } - layersToSend = append(layersToSend, additionalLayers...) - return layersToSend +// CreateFrame builds a frame for the connection with layer overriding defaults +// of the innermost layer and additionalLayers added after it. +func (conn *UDPIPv4) CreateFrame(layer Layer, additionalLayers ...Layer) Layers { + return (*Connection)(conn).CreateFrame(layer, additionalLayers...) } -// SendFrame sends a frame with reasonable defaults. +// SendFrame sends a frame on the wire and updates the state of all layers. func (conn *UDPIPv4) SendFrame(frame Layers) { - outBytes, err := frame.toBytes() - if err != nil { - conn.t.Fatalf("can't build outgoing UDP packet: %s", err) - } - conn.injector.Send(outBytes) -} - -// Send a packet with reasonable defaults and override some fields by udp. -func (conn *UDPIPv4) Send(udp UDP, additionalLayers ...Layer) { - conn.SendFrame(conn.CreateFrame(udp, additionalLayers...)) + (*Connection)(conn).SendFrame(frame) } -// Recv gets a packet from the sniffer within the timeout provided. If no packet -// arrives before the timeout, it returns nil. -func (conn *UDPIPv4) Recv(timeout time.Duration) *UDP { - deadline := time.Now().Add(timeout) - for { - timeout = time.Until(deadline) - if timeout <= 0 { - break - } - b := conn.sniffer.Recv(timeout) - if b == nil { - break - } - layers := Parse(ParseEther, b) - if !conn.incoming.match(layers) { - continue // Ignore packets that don't match the expected incoming. - } - return (layers[udpLayerIndex]).(*UDP) - } - return nil +// Close frees associated resources held by the UDPIPv4 connection. +func (conn *UDPIPv4) Close() { + (*Connection)(conn).Close() } // Drain drains the sniffer's receive buffer by receiving packets until there's @@ -434,23 +647,3 @@ func (conn *UDPIPv4) Recv(timeout time.Duration) *UDP { func (conn *UDPIPv4) Drain() { conn.sniffer.Drain() } - -// Expect a packet that matches the provided udp within the timeout specified. -// If it doesn't arrive in time, the test fails. -func (conn *UDPIPv4) Expect(udp UDP, timeout time.Duration) (*UDP, error) { - deadline := time.Now().Add(timeout) - var allUDP []string - for { - var gotUDP *UDP - if timeout = time.Until(deadline); timeout > 0 { - gotUDP = conn.Recv(timeout) - } - if gotUDP == nil { - return nil, fmt.Errorf("got %d packets:\n%s", len(allUDP), strings.Join(allUDP, "\n")) - } - if udp.match(gotUDP) { - return gotUDP, nil - } - allUDP = append(allUDP, gotUDP.String()) - } -} diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go index b467c15cc..1ec94ce17 100644 --- a/test/packetimpact/testbench/layers.go +++ b/test/packetimpact/testbench/layers.go @@ -64,6 +64,9 @@ type Layer interface { // setPrev sets the pointer to the Layer encapsulating this one. setPrev(Layer) + + // merge overrides the values in the interface with the provided values. + merge(Layer) error } // LayerBase is the common elements of all layers. @@ -91,6 +94,9 @@ func (lb *LayerBase) setPrev(l Layer) { // equalLayer compares that two Layer structs match while ignoring field in // which either input has a nil and also ignoring the LayerBase of the inputs. func equalLayer(x, y Layer) bool { + if x == nil || y == nil { + return true + } // opt ignores comparison pairs where either of the inputs is a nil. opt := cmp.FilterValues(func(x, y interface{}) bool { for _, l := range []interface{}{x, y} { @@ -104,6 +110,15 @@ func equalLayer(x, y Layer) bool { return cmp.Equal(x, y, opt, cmpopts.IgnoreTypes(LayerBase{})) } +// mergeLayer merges other in layer. Any non-nil value in other overrides the +// corresponding value in layer. If other is nil, no action is performed. +func mergeLayer(layer, other Layer) error { + if other == nil { + return nil + } + return mergo.Merge(layer, other, mergo.WithOverride) +} + func stringLayer(l Layer) string { v := reflect.ValueOf(l).Elem() t := v.Type() @@ -172,14 +187,14 @@ func NetworkProtocolNumber(v tcpip.NetworkProtocolNumber) *tcpip.NetworkProtocol return &v } -// LayerParser parses the input bytes and returns a Layer along with the next -// LayerParser to run. If there is no more parsing to do, the returned -// LayerParser is nil. -type LayerParser func([]byte) (Layer, LayerParser) +// layerParser parses the input bytes and returns a Layer along with the next +// layerParser to run. If there is no more parsing to do, the returned +// layerParser is nil. +type layerParser func([]byte) (Layer, layerParser) -// Parse parses bytes starting with the first LayerParser and using successive -// LayerParsers until all the bytes are parsed. -func Parse(parser LayerParser, b []byte) Layers { +// parse parses bytes starting with the first layerParser and using successive +// layerParsers until all the bytes are parsed. +func parse(parser layerParser, b []byte) Layers { var layers Layers for { var layer Layer @@ -194,22 +209,22 @@ func Parse(parser LayerParser, b []byte) Layers { return layers } -// ParseEther parses the bytes assuming that they start with an ethernet header +// parseEther parses the bytes assuming that they start with an ethernet header // and continues parsing further encapsulations. -func ParseEther(b []byte) (Layer, LayerParser) { +func parseEther(b []byte) (Layer, layerParser) { h := header.Ethernet(b) ether := Ether{ SrcAddr: LinkAddress(h.SourceAddress()), DstAddr: LinkAddress(h.DestinationAddress()), Type: NetworkProtocolNumber(h.Type()), } - var nextParser LayerParser + var nextParser layerParser switch h.Type() { case header.IPv4ProtocolNumber: - nextParser = ParseIPv4 + nextParser = parseIPv4 default: // Assume that the rest is a payload. - nextParser = ParsePayload + nextParser = parsePayload } return ðer, nextParser } @@ -222,6 +237,12 @@ func (l *Ether) length() int { return header.EthernetMinimumSize } +// merge overrides the values in l with the values from other but only in fields +// where the value is not nil. +func (l *Ether) merge(other Layer) error { + return mergeLayer(l, other) +} + // IPv4 can construct and match an IPv4 encapsulation. type IPv4 struct { LayerBase @@ -330,9 +351,9 @@ func Address(v tcpip.Address) *tcpip.Address { return &v } -// ParseIPv4 parses the bytes assuming that they start with an ipv4 header and +// parseIPv4 parses the bytes assuming that they start with an ipv4 header and // continues parsing further encapsulations. -func ParseIPv4(b []byte) (Layer, LayerParser) { +func parseIPv4(b []byte) (Layer, layerParser) { h := header.IPv4(b) tos, _ := h.TOS() ipv4 := IPv4{ @@ -348,15 +369,15 @@ func ParseIPv4(b []byte) (Layer, LayerParser) { SrcAddr: Address(h.SourceAddress()), DstAddr: Address(h.DestinationAddress()), } - var nextParser LayerParser + var nextParser layerParser switch h.TransportProtocol() { case header.TCPProtocolNumber: - nextParser = ParseTCP + nextParser = parseTCP case header.UDPProtocolNumber: - nextParser = ParseUDP + nextParser = parseUDP default: // Assume that the rest is a payload. - nextParser = ParsePayload + nextParser = parsePayload } return &ipv4, nextParser } @@ -372,6 +393,12 @@ func (l *IPv4) length() int { return int(*l.IHL) } +// merge overrides the values in l with the values from other but only in fields +// where the value is not nil. +func (l *IPv4) merge(other Layer) error { + return mergeLayer(l, other) +} + // TCP can construct and match a TCP encapsulation. type TCP struct { LayerBase @@ -482,9 +509,9 @@ func Uint32(v uint32) *uint32 { return &v } -// ParseTCP parses the bytes assuming that they start with a tcp header and +// parseTCP parses the bytes assuming that they start with a tcp header and // continues parsing further encapsulations. -func ParseTCP(b []byte) (Layer, LayerParser) { +func parseTCP(b []byte) (Layer, layerParser) { h := header.TCP(b) tcp := TCP{ SrcPort: Uint16(h.SourcePort()), @@ -497,7 +524,7 @@ func ParseTCP(b []byte) (Layer, LayerParser) { Checksum: Uint16(h.Checksum()), UrgentPointer: Uint16(h.UrgentPointer()), } - return &tcp, ParsePayload + return &tcp, parsePayload } func (l *TCP) match(other Layer) bool { @@ -513,8 +540,8 @@ func (l *TCP) length() int { // merge overrides the values in l with the values from other but only in fields // where the value is not nil. -func (l *TCP) merge(other TCP) error { - return mergo.Merge(l, other, mergo.WithOverride) +func (l *TCP) merge(other Layer) error { + return mergeLayer(l, other) } // UDP can construct and match a UDP encapsulation. @@ -565,9 +592,9 @@ func setUDPChecksum(h *header.UDP, udp *UDP) error { return nil } -// ParseUDP parses the bytes assuming that they start with a udp header and +// parseUDP parses the bytes assuming that they start with a udp header and // returns the parsed layer and the next parser to use. -func ParseUDP(b []byte) (Layer, LayerParser) { +func parseUDP(b []byte) (Layer, layerParser) { h := header.UDP(b) udp := UDP{ SrcPort: Uint16(h.SourcePort()), @@ -575,7 +602,7 @@ func ParseUDP(b []byte) (Layer, LayerParser) { Length: Uint16(h.Length()), Checksum: Uint16(h.Checksum()), } - return &udp, ParsePayload + return &udp, parsePayload } func (l *UDP) match(other Layer) bool { @@ -591,8 +618,8 @@ func (l *UDP) length() int { // merge overrides the values in l with the values from other but only in fields // where the value is not nil. -func (l *UDP) merge(other UDP) error { - return mergo.Merge(l, other, mergo.WithOverride) +func (l *UDP) merge(other Layer) error { + return mergeLayer(l, other) } // Payload has bytes beyond OSI layer 4. @@ -605,9 +632,9 @@ func (l *Payload) String() string { return stringLayer(l) } -// ParsePayload parses the bytes assuming that they start with a payload and +// parsePayload parses the bytes assuming that they start with a payload and // continue to the end. There can be no further encapsulations. -func ParsePayload(b []byte) (Layer, LayerParser) { +func parsePayload(b []byte) (Layer, layerParser) { payload := Payload{ Bytes: b, } @@ -626,6 +653,12 @@ func (l *Payload) length() int { return len(l.Bytes) } +// merge overrides the values in l with the values from other but only in fields +// where the value is not nil. +func (l *Payload) merge(other Layer) error { + return mergeLayer(l, other) +} + // Layers is an array of Layer and supports similar functions to Layer. type Layers []Layer @@ -662,8 +695,8 @@ func (ls *Layers) match(other Layers) bool { if len(*ls) > len(other) { return false } - for i := 0; i < len(*ls); i++ { - if !equalLayer((*ls)[i], other[i]) { + for i, l := range *ls { + if !equalLayer(l, other[i]) { return false } } diff --git a/test/packetimpact/testbench/rawsockets.go b/test/packetimpact/testbench/rawsockets.go index 09bfa43c5..ff722d4a6 100644 --- a/test/packetimpact/testbench/rawsockets.go +++ b/test/packetimpact/testbench/rawsockets.go @@ -17,6 +17,7 @@ package testbench import ( "encoding/binary" "flag" + "fmt" "math" "net" "testing" @@ -120,12 +121,13 @@ func (s *Sniffer) Drain() { } } -// Close the socket that Sniffer is using. -func (s *Sniffer) Close() { +// close the socket that Sniffer is using. +func (s *Sniffer) close() error { if err := unix.Close(s.fd); err != nil { - s.t.Fatalf("can't close sniffer socket: %s", err) + return fmt.Errorf("can't close sniffer socket: %w", err) } s.fd = -1 + return nil } // Injector can inject raw frames. @@ -171,10 +173,11 @@ func (i *Injector) Send(b []byte) { } } -// Close the underlying socket. -func (i *Injector) Close() { +// close the underlying socket. +func (i *Injector) close() error { if err := unix.Close(i.fd); err != nil { - i.t.Fatalf("can't close sniffer socket: %s", err) + return fmt.Errorf("can't close sniffer socket: %w", err) } i.fd = -1 + return nil } diff --git a/test/packetimpact/tests/tcp_should_piggyback_test.go b/test/packetimpact/tests/tcp_should_piggyback_test.go index f2ab49e51..b0be6ba23 100644 --- a/test/packetimpact/tests/tcp_should_piggyback_test.go +++ b/test/packetimpact/tests/tcp_should_piggyback_test.go @@ -40,7 +40,11 @@ func TestPiggyback(t *testing.T) { sampleData := []byte("Sample Data") dut.Send(acceptFd, sampleData, 0) - conn.ExpectData(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, sampleData, time.Second) + expectedTCP := tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)} + expectedPayload := tb.Payload{Bytes: sampleData} + if _, err := conn.ExpectData(&expectedTCP, &expectedPayload, time.Second); err != nil { + t.Fatalf("Expected %v but didn't get one: %s", tb.Layers{&expectedTCP, &expectedPayload}, err) + } // Cause DUT to send us more data as soon as we ACK their first data segment because we have // a small window. @@ -48,6 +52,8 @@ func TestPiggyback(t *testing.T) { // DUT should ACK our segment by piggybacking ACK to their outstanding data segment instead of // sending a separate ACK packet. - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &tb.Payload{Bytes: sampleData}) - conn.ExpectData(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, sampleData, time.Second) + conn.Send(expectedTCP, &expectedPayload) + if _, err := conn.ExpectData(&expectedTCP, &expectedPayload, time.Second); err != nil { + t.Fatalf("Expected %v but didn't get one: %s", tb.Layers{&expectedTCP, &expectedPayload}, err) + } } diff --git a/test/packetimpact/tests/tcp_window_shrink_test.go b/test/packetimpact/tests/tcp_window_shrink_test.go index b48cc6491..c9354074e 100644 --- a/test/packetimpact/tests/tcp_window_shrink_test.go +++ b/test/packetimpact/tests/tcp_window_shrink_test.go @@ -38,15 +38,22 @@ func TestWindowShrink(t *testing.T) { dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) sampleData := []byte("Sample Data") + samplePayload := &tb.Payload{Bytes: sampleData} dut.Send(acceptFd, sampleData, 0) - conn.ExpectData(tb.TCP{}, sampleData, time.Second) + if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil { + t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) + } conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) dut.Send(acceptFd, sampleData, 0) dut.Send(acceptFd, sampleData, 0) - conn.ExpectData(tb.TCP{}, sampleData, time.Second) - conn.ExpectData(tb.TCP{}, sampleData, time.Second) + if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil { + t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) + } + if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil { + t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) + } // We close our receiving window here conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), WindowSize: tb.Uint16(0)}) @@ -54,5 +61,8 @@ func TestWindowShrink(t *testing.T) { // Note: There is another kind of zero-window probing which Windows uses (by sending one // new byte at `RemoteSeqNum`), if netstack wants to go that way, we may want to change // the following lines. - conn.ExpectData(tb.TCP{SeqNum: tb.Uint32(uint32(conn.RemoteSeqNum - 1))}, nil, time.Second) + expectedRemoteSeqNum := *conn.RemoteSeqNum() - 1 + if _, err := conn.ExpectData(&tb.TCP{SeqNum: tb.Uint32(uint32(expectedRemoteSeqNum))}, nil, time.Second); err != nil { + t.Fatalf("expected a packet with sequence number %v: %s", expectedRemoteSeqNum, err) + } } diff --git a/test/packetimpact/tests/udp_recv_multicast_test.go b/test/packetimpact/tests/udp_recv_multicast_test.go index bc1b0be49..61fd17050 100644 --- a/test/packetimpact/tests/udp_recv_multicast_test.go +++ b/test/packetimpact/tests/udp_recv_multicast_test.go @@ -30,7 +30,7 @@ func TestUDPRecvMulticast(t *testing.T) { defer dut.Close(boundFD) conn := tb.NewUDPIPv4(t, tb.UDP{DstPort: &remotePort}, tb.UDP{SrcPort: &remotePort}) defer conn.Close() - frame := conn.CreateFrame(tb.UDP{}, &tb.Payload{Bytes: []byte("hello world")}) + frame := conn.CreateFrame(&tb.UDP{}, &tb.Payload{Bytes: []byte("hello world")}) frame[1].(*tb.IPv4).DstAddr = tb.Address(tcpip.Address(net.ParseIP("224.0.0.1").To4())) conn.SendFrame(frame) dut.Recv(boundFD, 100, 0) -- cgit v1.2.3 From ea5b8e9633cd2731bb5656dea523beaf3d643472 Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Wed, 15 Apr 2020 14:30:20 -0700 Subject: Use if_nametoindex to get interface index. Removed the TODO to use netlink. PiperOrigin-RevId: 306721468 --- test/syscalls/linux/ip_socket_test_util.cc | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/ip_socket_test_util.cc b/test/syscalls/linux/ip_socket_test_util.cc index d28dc0db6..98d07ae85 100644 --- a/test/syscalls/linux/ip_socket_test_util.cc +++ b/test/syscalls/linux/ip_socket_test_util.cc @@ -16,7 +16,6 @@ #include #include -#include #include #include @@ -35,12 +34,11 @@ uint16_t PortFromInetSockaddr(const struct sockaddr* addr) { } PosixErrorOr InterfaceIndex(std::string name) { - // TODO(igudger): Consider using netlink. - ifreq req = {}; - memcpy(req.ifr_name, name.c_str(), name.size()); - ASSIGN_OR_RETURN_ERRNO(auto sock, Socket(AF_INET, SOCK_DGRAM, 0)); - RETURN_ERROR_IF_SYSCALL_FAIL(ioctl(sock.get(), SIOCGIFINDEX, &req)); - return req.ifr_ifindex; + int index = if_nametoindex(name.c_str()); + if (index) { + return index; + } + return PosixError(errno); } namespace { -- cgit v1.2.3 From 3d3bf9603d9a933b4bf19c38190c583894b75d66 Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Wed, 15 Apr 2020 14:57:56 -0700 Subject: Use hex.Dump for Layer.String() of byte slices. PiperOrigin-RevId: 306726587 --- test/packetimpact/testbench/layers.go | 8 +++++++- test/packetimpact/testbench/layers_test.go | 16 ++++++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go index 1ec94ce17..5ce324f0d 100644 --- a/test/packetimpact/testbench/layers.go +++ b/test/packetimpact/testbench/layers.go @@ -15,6 +15,7 @@ package testbench import ( + "encoding/hex" "fmt" "reflect" "strings" @@ -133,7 +134,12 @@ func stringLayer(l Layer) string { if v.IsNil() { continue } - ret = append(ret, fmt.Sprintf("%s:%v", t.Name, reflect.Indirect(v))) + v = reflect.Indirect(v) + if v.Kind() == reflect.Slice && v.Type().Elem().Kind() == reflect.Uint8 { + ret = append(ret, fmt.Sprintf("%s:\n%v", t.Name, hex.Dump(v.Bytes()))) + } else { + ret = append(ret, fmt.Sprintf("%s:%v", t.Name, v)) + } } return fmt.Sprintf("&%s{%s}", t, strings.Join(ret, " ")) } diff --git a/test/packetimpact/testbench/layers_test.go b/test/packetimpact/testbench/layers_test.go index 8ffc26bf9..b32efda93 100644 --- a/test/packetimpact/testbench/layers_test.go +++ b/test/packetimpact/testbench/layers_test.go @@ -14,9 +14,11 @@ package testbench -import "testing" +import ( + "testing" -import "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip" +) func TestLayerMatch(t *testing.T) { var nilPayload *Payload @@ -134,6 +136,16 @@ func TestLayerStringFormat(t *testing.T) { "Type:4" + "}", }, + { + name: "Payload", + l: &Payload{ + Bytes: []byte("Hooray for packetimpact."), + }, + want: "&testbench.Payload{Bytes:\n" + + "00000000 48 6f 6f 72 61 79 20 66 6f 72 20 70 61 63 6b 65 |Hooray for packe|\n" + + "00000010 74 69 6d 70 61 63 74 2e |timpact.|\n" + + "}", + }, } { t.Run(tt.name, func(t *testing.T) { if got := tt.l.String(); got != tt.want { -- cgit v1.2.3 From 09c7e3f6e497f4ae267772e7357763ac7c18659f Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Wed, 15 Apr 2020 19:36:03 -0700 Subject: Add tests for segments outside the receive window. The tests are based on RFC 793 page 69. Updates #1607 PiperOrigin-RevId: 306768847 --- test/packetimpact/testbench/connections.go | 20 ++++- test/packetimpact/tests/BUILD | 13 ++++ .../tests/tcp_outside_the_window_test.go | 88 ++++++++++++++++++++++ 3 files changed, 118 insertions(+), 3 deletions(-) create mode 100644 test/packetimpact/tests/tcp_outside_the_window_test.go (limited to 'test') diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index 169db01b0..be62d051d 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -577,13 +577,27 @@ func (conn *TCPIPv4) Expect(tcp TCP, timeout time.Duration) (*TCP, error) { return gotTCP, err } -// RemoteSeqNum returns the next expected sequence number from the DUT. -func (conn *TCPIPv4) RemoteSeqNum() *seqnum.Value { +func (conn *TCPIPv4) state() *tcpState { state, ok := conn.layerStates[len(conn.layerStates)-1].(*tcpState) if !ok { conn.t.Fatalf("expected final state of %v to be tcpState", conn.layerStates) } - return state.remoteSeqNum + return state +} + +// RemoteSeqNum returns the next expected sequence number from the DUT. +func (conn *TCPIPv4) RemoteSeqNum() *seqnum.Value { + return conn.state().remoteSeqNum +} + +// LocalSeqNum returns the next expected sequence number from the DUT. +func (conn *TCPIPv4) LocalSeqNum() *seqnum.Value { + return conn.state().localSeqNum +} + +// SynAck returns the SynAck that was part of the handshake. +func (conn *TCPIPv4) SynAck() *TCP { + return conn.state().synAck } // Drain drains the sniffer's receive buffer by receiving packets until there's diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 1274d9f60..4f8c8bdc0 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -40,6 +40,19 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "tcp_outside_the_window", + srcs = ["tcp_outside_the_window_test.go"], + # TODO(eyalsoha): Fix #1607 then remove the line below. + netstack = False, + deps = [ + "//pkg/tcpip/header", + "//pkg/tcpip/seqnum", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + packetimpact_go_test( name = "tcp_noaccept_close_rst", srcs = ["tcp_noaccept_close_rst_test.go"], diff --git a/test/packetimpact/tests/tcp_outside_the_window_test.go b/test/packetimpact/tests/tcp_outside_the_window_test.go new file mode 100644 index 000000000..db3d3273b --- /dev/null +++ b/test/packetimpact/tests/tcp_outside_the_window_test.go @@ -0,0 +1,88 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_outside_the_window_test + +import ( + "fmt" + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +// TestTCPOutsideTheWindows tests the behavior of the DUT when packets arrive +// that are inside or outside the TCP window. Packets that are outside the +// window should force an extra ACK, as described in RFC793 page 69: +// https://tools.ietf.org/html/rfc793#page-69 +func TestTCPOutsideTheWindow(t *testing.T) { + for _, tt := range []struct { + description string + tcpFlags uint8 + payload []tb.Layer + seqNumOffset seqnum.Size + expectACK bool + }{ + {"SYN", header.TCPFlagSyn, nil, 0, true}, + {"SYNACK", header.TCPFlagSyn | header.TCPFlagAck, nil, 0, true}, + {"ACK", header.TCPFlagAck, nil, 0, false}, + {"FIN", header.TCPFlagFin, nil, 0, false}, + {"Data", header.TCPFlagAck, []tb.Layer{&tb.Payload{Bytes: []byte("abc123")}}, 0, true}, + + {"SYN", header.TCPFlagSyn, nil, 1, true}, + {"SYNACK", header.TCPFlagSyn | header.TCPFlagAck, nil, 1, true}, + {"ACK", header.TCPFlagAck, nil, 1, true}, + {"FIN", header.TCPFlagFin, nil, 1, false}, + {"Data", header.TCPFlagAck, []tb.Layer{&tb.Payload{Bytes: []byte("abc123")}}, 1, true}, + + {"SYN", header.TCPFlagSyn, nil, 2, true}, + {"SYNACK", header.TCPFlagSyn | header.TCPFlagAck, nil, 2, true}, + {"ACK", header.TCPFlagAck, nil, 2, true}, + {"FIN", header.TCPFlagFin, nil, 2, false}, + {"Data", header.TCPFlagAck, []tb.Layer{&tb.Payload{Bytes: []byte("abc123")}}, 2, true}, + } { + t.Run(fmt.Sprintf("%s%d", tt.description, tt.seqNumOffset), func(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(listenFD) + conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + defer conn.Close() + conn.Handshake() + acceptFD, _ := dut.Accept(listenFD) + defer dut.Close(acceptFD) + + windowSize := seqnum.Size(*conn.SynAck().WindowSize) + tt.seqNumOffset + conn.Drain() + // Ignore whatever incrementing that this out-of-order packet might cause + // to the AckNum. + localSeqNum := tb.Uint32(uint32(*conn.LocalSeqNum())) + conn.Send(tb.TCP{ + Flags: tb.Uint8(tt.tcpFlags), + SeqNum: tb.Uint32(uint32(conn.LocalSeqNum().Add(windowSize))), + }, tt.payload...) + timeout := 3 * time.Second + gotACK, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), AckNum: localSeqNum}, timeout) + if tt.expectACK && err != nil { + t.Fatalf("expected an ACK packet within %s but got none: %s", timeout, err) + } + if !tt.expectACK && gotACK != nil { + t.Fatalf("expected no ACK packet within %s but got one: %s", timeout, gotACK) + } + }) + } +} -- cgit v1.2.3 From eb7b1903e00eda9248da59991d80594590c9aab6 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Thu, 16 Apr 2020 12:21:06 -0700 Subject: Test TCP behavior when receiving unacceptable segment in CLOSE_WAIT TCP, in CLOSE-WAIT state, MUST return ACK with proper SEQ and ACK numbers after recv a seg with OTW SEQ or unacc ACK number, and remain in same state. If the connection is in a synchronized state, any unacceptable segment (out of window sequence number or unacceptable acknowledgment number) must elicit only an empty acknowledgment segment containing the current send-sequence number and an acknowledgment indicating the next sequence number expected to be received, and the connection remains in the same state. PiperOrigin-RevId: 306897984 --- test/packetimpact/testbench/connections.go | 14 ++- test/packetimpact/tests/BUILD | 13 +++ test/packetimpact/tests/tcp_close_wait_ack_test.go | 102 +++++++++++++++++++++ 3 files changed, 126 insertions(+), 3 deletions(-) create mode 100644 test/packetimpact/tests/tcp_close_wait_ack_test.go (limited to 'test') diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index be62d051d..c1b3c4380 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -189,6 +189,7 @@ type tcpState struct { localSeqNum, remoteSeqNum *seqnum.Value synAck *TCP portPickerFD int + finSent bool } var _ layerState = (*tcpState)(nil) @@ -210,6 +211,7 @@ func newTCPState(out, in TCP) (*tcpState, error) { in: TCP{DstPort: &localPort}, localSeqNum: SeqNumValue(seqnum.Value(rand.Uint32())), portPickerFD: portPickerFD, + finSent: false, } if err := s.out.merge(&out); err != nil { return nil, err @@ -254,12 +256,18 @@ func (s *tcpState) sent(sent Layer) error { if !ok { return fmt.Errorf("can't update tcpState with %T Layer", sent) } - for current := tcp.next(); current != nil; current = current.next() { - s.localSeqNum.UpdateForward(seqnum.Size(current.length())) + if !s.finSent { + // update localSeqNum by the payload only when FIN is not yet sent by us + for current := tcp.next(); current != nil; current = current.next() { + s.localSeqNum.UpdateForward(seqnum.Size(current.length())) + } } if tcp.Flags != nil && *tcp.Flags&(header.TCPFlagSyn|header.TCPFlagFin) != 0 { s.localSeqNum.UpdateForward(1) } + if *tcp.Flags&(header.TCPFlagFin) != 0 { + s.finSent = true + } return nil } @@ -590,7 +598,7 @@ func (conn *TCPIPv4) RemoteSeqNum() *seqnum.Value { return conn.state().remoteSeqNum } -// LocalSeqNum returns the next expected sequence number from the DUT. +// LocalSeqNum returns the next sequence number to send from the testbench. func (conn *TCPIPv4) LocalSeqNum() *seqnum.Value { return conn.state().localSeqNum } diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 4f8c8bdc0..690cee140 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -75,6 +75,19 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "tcp_close_wait_ack", + srcs = ["tcp_close_wait_ack_test.go"], + # TODO(b/153574037): Fix netstack then remove the line below. + netstack = False, + deps = [ + "//pkg/tcpip/header", + "//pkg/tcpip/seqnum", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + sh_binary( name = "test_runner", srcs = ["test_runner.sh"], diff --git a/test/packetimpact/tests/tcp_close_wait_ack_test.go b/test/packetimpact/tests/tcp_close_wait_ack_test.go new file mode 100644 index 000000000..eb4cc7a65 --- /dev/null +++ b/test/packetimpact/tests/tcp_close_wait_ack_test.go @@ -0,0 +1,102 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_close_wait_ack_test + +import ( + "fmt" + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func TestCloseWaitAck(t *testing.T) { + for _, tt := range []struct { + description string + makeTestingTCP func(conn *tb.TCPIPv4, seqNumOffset seqnum.Size) tb.TCP + seqNumOffset seqnum.Size + expectAck bool + }{ + {"OTW", GenerateOTWSeqSegment, 0, false}, + {"OTW", GenerateOTWSeqSegment, 1, true}, + {"OTW", GenerateOTWSeqSegment, 2, true}, + {"ACK", GenerateUnaccACKSegment, 0, false}, + {"ACK", GenerateUnaccACKSegment, 1, true}, + {"ACK", GenerateUnaccACKSegment, 2, true}, + } { + t.Run(fmt.Sprintf("%s%d", tt.description, tt.seqNumOffset), func(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(listenFd) + conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + defer conn.Close() + + conn.Handshake() + acceptFd, _ := dut.Accept(listenFd) + + // Send a FIN to DUT to intiate the active close + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagFin)}) + if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, time.Second); err != nil { + t.Fatalf("expected an ACK for our fin and DUT should enter CLOSE_WAIT: %s", err) + } + + // Send a segment with OTW Seq / unacc ACK and expect an ACK back + conn.Send(tt.makeTestingTCP(&conn, tt.seqNumOffset), &tb.Payload{Bytes: []byte("Sample Data")}) + gotAck, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, time.Second) + if tt.expectAck && err != nil { + t.Fatalf("expected an ack but got none: %s", err) + } + if !tt.expectAck && gotAck != nil { + t.Fatalf("expected no ack but got one: %s", gotAck) + } + + // Now let's verify DUT is indeed in CLOSE_WAIT + dut.Close(acceptFd) + if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagFin)}, time.Second); err != nil { + t.Fatalf("expected DUT to send a FIN: %s", err) + } + // Ack the FIN from DUT + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) + // Send some extra data to DUT + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, &tb.Payload{Bytes: []byte("Sample Data")}) + if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, time.Second); err != nil { + t.Fatalf("expected DUT to send an RST: %s", err) + } + }) + } +} + +// This generates an segment with seqnum = RCV.NXT + RCV.WND + seqNumOffset, the +// generated segment is only acceptable when seqNumOffset is 0, otherwise an ACK +// is expected from the receiver. +func GenerateOTWSeqSegment(conn *tb.TCPIPv4, seqNumOffset seqnum.Size) tb.TCP { + windowSize := seqnum.Size(*conn.SynAck().WindowSize) + lastAcceptable := conn.LocalSeqNum().Add(windowSize - 1) + otwSeq := uint32(lastAcceptable.Add(seqNumOffset)) + return tb.TCP{SeqNum: tb.Uint32(otwSeq), Flags: tb.Uint8(header.TCPFlagAck)} +} + +// This generates an segment with acknum = SND.NXT + seqNumOffset, the generated +// segment is only acceptable when seqNumOffset is 0, otherwise an ACK is +// expected from the receiver. +func GenerateUnaccACKSegment(conn *tb.TCPIPv4, seqNumOffset seqnum.Size) tb.TCP { + lastAcceptable := conn.RemoteSeqNum() + unaccAck := uint32(lastAcceptable.Add(seqNumOffset)) + return tb.TCP{AckNum: tb.Uint32(unaccAck), Flags: tb.Uint8(header.TCPFlagAck)} +} -- cgit v1.2.3 From 75e864fc7529bf71484ecabbb2ce2264e96399cf Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Thu, 16 Apr 2020 15:14:44 -0700 Subject: Use multierr in packetimpact Connection.Close() PiperOrigin-RevId: 306930652 --- WORKSPACE | 14 ++++++++++++++ test/packetimpact/testbench/BUILD | 1 + test/packetimpact/testbench/connections.go | 13 ++++++------- 3 files changed, 21 insertions(+), 7 deletions(-) (limited to 'test') diff --git a/WORKSPACE b/WORKSPACE index bca63c0d9..c40e03ad2 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -400,6 +400,20 @@ go_repository( version = "v0.20.0", ) +go_repository( + name = "org_uber_go_atomic", + importpath = "go.uber.org/atomic", + version = "v1.6.0", + sum = "h1:Ezj3JGmsOnG1MoRWQkPBsKLe9DwWD9QeXzTRzzldNVk=", +) + +go_repository( + name = "org_uber_go_multierr", + importpath = "go.uber.org/multierr", + version = "v1.5.0", + sum = "h1:KCa4XfM8CWFCpxXRGok+Q0SS/0XBhMDbHHGABQLvD2A=", +) + # BigQuery Dependencies for Benchmarks go_repository( name = "com_google_cloud_go", diff --git a/test/packetimpact/testbench/BUILD b/test/packetimpact/testbench/BUILD index 838a10ffe..b6a254882 100644 --- a/test/packetimpact/testbench/BUILD +++ b/test/packetimpact/testbench/BUILD @@ -28,6 +28,7 @@ go_library( "@org_golang_google_grpc//:go_default_library", "@org_golang_google_grpc//keepalive:go_default_library", "@org_golang_x_sys//unix:go_default_library", + "@org_uber_go_multierr//:go_default_library", ], ) diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index c1b3c4380..f84fd8ba7 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -26,6 +26,7 @@ import ( "time" "github.com/mohae/deepcopy" + "go.uber.org/multierr" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" @@ -382,17 +383,15 @@ func (conn *Connection) match(override, received Layers) bool { // Close frees associated resources held by the Connection. func (conn *Connection) Close() { - if err := conn.sniffer.close(); err != nil { - conn.t.Fatal(err) - } - if err := conn.injector.close(); err != nil { - conn.t.Fatal(err) - } + errs := multierr.Combine(conn.sniffer.close(), conn.injector.close()) for _, s := range conn.layerStates { if err := s.close(); err != nil { - conn.t.Fatalf("unable to close %+v: %s", s, err) + errs = multierr.Append(errs, fmt.Errorf("unable to close %+v: %s", s, err)) } } + if errs != nil { + conn.t.Fatalf("unable to close %+v: %s", conn, errs) + } } // CreateFrame builds a frame for the connection with layer overriding defaults -- cgit v1.2.3 From 3b05f576d73be644daa17203d9ed64481c45b4a8 Mon Sep 17 00:00:00 2001 From: Mithun Iyer Date: Thu, 16 Apr 2020 17:57:06 -0700 Subject: Reset pending connections on listener shutdown. When the listening socket is read shutdown, we need to reset all pending and incoming connections. Ensure that the endpoint is not cleaned up from the demuxer and subsequent bind to same port does not go through. PiperOrigin-RevId: 306958038 --- pkg/tcpip/transport/tcp/accept.go | 20 +++--- pkg/tcpip/transport/tcp/connect.go | 7 ++- pkg/tcpip/transport/tcp/endpoint.go | 30 ++++++--- pkg/tcpip/transport/tcp/forwarder.go | 2 +- pkg/tcpip/transport/tcp/protocol.go | 8 +-- pkg/tcpip/transport/tcp/tcp_test.go | 16 ++--- test/syscalls/linux/socket_inet_loopback.cc | 94 +++++++++++++++++++++++++++-- 7 files changed, 138 insertions(+), 39 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go index b61c2a8c3..5bb243e3b 100644 --- a/pkg/tcpip/transport/tcp/accept.go +++ b/pkg/tcpip/transport/tcp/accept.go @@ -26,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sleep" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" - "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/seqnum" "gvisor.dev/gvisor/pkg/tcpip/stack" @@ -433,19 +432,16 @@ func (e *endpoint) acceptQueueIsFull() bool { // handleListenSegment is called when a listening endpoint receives a segment // and needs to handle it. func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) { - if s.flagsAreSet(header.TCPFlagSyn | header.TCPFlagAck) { + e.rcvListMu.Lock() + rcvClosed := e.rcvClosed + e.rcvListMu.Unlock() + if rcvClosed || s.flagsAreSet(header.TCPFlagSyn|header.TCPFlagAck) { + // If the endpoint is shutdown, reply with reset. + // // RFC 793 section 3.4 page 35 (figure 12) outlines that a RST // must be sent in response to a SYN-ACK while in the listen // state to prevent completing a handshake from an old SYN. - e.sendTCP(&s.route, tcpFields{ - id: s.id, - ttl: e.ttl, - tos: e.sendTOS, - flags: header.TCPFlagRst, - seq: s.ackNumber, - ack: 0, - rcvWnd: 0, - }, buffer.VectorisedView{}, nil) + replyWithReset(s, e.sendTOS, e.ttl) return } @@ -534,7 +530,7 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) { // The only time we should reach here when a connection // was opened and closed really quickly and a delayed // ACK was received from the sender. - replyWithReset(s) + replyWithReset(s, e.sendTOS, e.ttl) return } diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index 994ac52a3..368865911 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -1053,10 +1053,15 @@ func (e *endpoint) tryDeliverSegmentFromClosedEndpoint(s *segment) { ep = e.stack.FindTransportEndpoint(header.IPv4ProtocolNumber, e.TransProto, e.ID, &s.route) } if ep == nil { - replyWithReset(s) + replyWithReset(s, stack.DefaultTOS, s.route.DefaultTTL()) s.decRef() return } + + if e == ep { + panic("current endpoint not removed from demuxer, enqueing segments to itself") + } + if ep.(*endpoint).enqueueSegment(s) { ep.(*endpoint).newSegmentWaker.Assert() } diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index bffc59e9f..5d0ea9e93 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -2101,7 +2101,7 @@ func (e *endpoint) shutdownLocked(flags tcpip.ShutdownFlags) *tcpip.Error { switch { case e.EndpointState().connected(): // Close for read. - if (e.shutdownFlags & tcpip.ShutdownRead) != 0 { + if e.shutdownFlags&tcpip.ShutdownRead != 0 { // Mark read side as closed. e.rcvListMu.Lock() e.rcvClosed = true @@ -2110,7 +2110,7 @@ func (e *endpoint) shutdownLocked(flags tcpip.ShutdownFlags) *tcpip.Error { // If we're fully closed and we have unread data we need to abort // the connection with a RST. - if (e.shutdownFlags&tcpip.ShutdownWrite) != 0 && rcvBufUsed > 0 { + if e.shutdownFlags&tcpip.ShutdownWrite != 0 && rcvBufUsed > 0 { e.resetConnectionLocked(tcpip.ErrConnectionAborted) // Wake up worker to terminate loop. e.notifyProtocolGoroutine(notifyTickleWorker) @@ -2119,7 +2119,7 @@ func (e *endpoint) shutdownLocked(flags tcpip.ShutdownFlags) *tcpip.Error { } // Close for write. - if (e.shutdownFlags & tcpip.ShutdownWrite) != 0 { + if e.shutdownFlags&tcpip.ShutdownWrite != 0 { e.sndBufMu.Lock() if e.sndClosed { // Already closed. @@ -2142,12 +2142,23 @@ func (e *endpoint) shutdownLocked(flags tcpip.ShutdownFlags) *tcpip.Error { return nil case e.EndpointState() == StateListen: - // Tell protocolListenLoop to stop. - if flags&tcpip.ShutdownRead != 0 { - e.notifyProtocolGoroutine(notifyClose) + if e.shutdownFlags&tcpip.ShutdownRead != 0 { + // Reset all connections from the accept queue and keep the + // worker running so that it can continue handling incoming + // segments by replying with RST. + // + // By not removing this endpoint from the demuxer mapping, we + // ensure that any other bind to the same port fails, as on Linux. + // TODO(gvisor.dev/issue/2468): We need to enable applications to + // start listening on this endpoint again similar to Linux. + e.rcvListMu.Lock() + e.rcvClosed = true + e.rcvListMu.Unlock() + e.closePendingAcceptableConnectionsLocked() + // Notify waiters that the endpoint is shutdown. + e.waiterQueue.Notify(waiter.EventIn | waiter.EventOut | waiter.EventHUp | waiter.EventErr) } return nil - default: return tcpip.ErrNotConnected } @@ -2251,8 +2262,11 @@ func (e *endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) { e.LockUser() defer e.UnlockUser() + e.rcvListMu.Lock() + rcvClosed := e.rcvClosed + e.rcvListMu.Unlock() // Endpoint must be in listen state before it can accept connections. - if e.EndpointState() != StateListen { + if rcvClosed || e.EndpointState() != StateListen { return nil, nil, tcpip.ErrInvalidEndpointState } diff --git a/pkg/tcpip/transport/tcp/forwarder.go b/pkg/tcpip/transport/tcp/forwarder.go index 808410c92..704d01c64 100644 --- a/pkg/tcpip/transport/tcp/forwarder.go +++ b/pkg/tcpip/transport/tcp/forwarder.go @@ -130,7 +130,7 @@ func (r *ForwarderRequest) Complete(sendReset bool) { // If the caller requested, send a reset. if sendReset { - replyWithReset(r.segment) + replyWithReset(r.segment, stack.DefaultTOS, r.segment.route.DefaultTTL()) } // Release all resources. diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go index effbf203f..cfd9a4e8e 100644 --- a/pkg/tcpip/transport/tcp/protocol.go +++ b/pkg/tcpip/transport/tcp/protocol.go @@ -223,12 +223,12 @@ func (*protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.Transpo return true } - replyWithReset(s) + replyWithReset(s, stack.DefaultTOS, s.route.DefaultTTL()) return true } // replyWithReset replies to the given segment with a reset segment. -func replyWithReset(s *segment) { +func replyWithReset(s *segment, tos, ttl uint8) { // Get the seqnum from the packet if the ack flag is set. seq := seqnum.Value(0) ack := seqnum.Value(0) @@ -252,8 +252,8 @@ func replyWithReset(s *segment) { } sendTCP(&s.route, tcpFields{ id: s.id, - ttl: s.route.DefaultTTL(), - tos: stack.DefaultTOS, + ttl: ttl, + tos: tos, flags: flags, seq: seq, ack: ack, diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 74fb6e064..ab1014c7f 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -1034,8 +1034,8 @@ func TestSendRstOnListenerRxAckV6(t *testing.T) { checker.SeqNum(200))) } -// TestListenShutdown tests for the listening endpoint not processing -// any receive when it is on read shutdown. +// TestListenShutdown tests for the listening endpoint replying with RST +// on read shutdown. func TestListenShutdown(t *testing.T) { c := context.New(t, defaultMTU) defer c.Cleanup() @@ -1046,7 +1046,7 @@ func TestListenShutdown(t *testing.T) { t.Fatal("Bind failed:", err) } - if err := c.EP.Listen(10 /* backlog */); err != nil { + if err := c.EP.Listen(1 /* backlog */); err != nil { t.Fatal("Listen failed:", err) } @@ -1054,9 +1054,6 @@ func TestListenShutdown(t *testing.T) { t.Fatal("Shutdown failed:", err) } - // Wait for the endpoint state to be propagated. - time.Sleep(10 * time.Millisecond) - c.SendPacket(nil, &context.Headers{ SrcPort: context.TestPort, DstPort: context.StackPort, @@ -1065,7 +1062,12 @@ func TestListenShutdown(t *testing.T) { AckNum: 200, }) - c.CheckNoPacket("Packet received when listening socket was shutdown") + // Expect the listening endpoint to reset the connection. + checker.IPv4(t, c.GetPacket(), + checker.TCP( + checker.DstPort(context.TestPort), + checker.TCPFlags(header.TCPFlagAck|header.TCPFlagRst), + )) } // TestListenCloseWhileConnect tests for the listening endpoint to diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index cd84e633a..d3000dbc6 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -319,6 +319,75 @@ TEST_P(SocketInetLoopbackTest, TCPListenUnbound) { tcpSimpleConnectTest(listener, connector, false); } +TEST_P(SocketInetLoopbackTest, TCPListenShutdown) { + auto const& param = GetParam(); + + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + constexpr int kBacklog = 2; + constexpr int kFDs = kBacklog + 1; + + // Create the listening socket. + FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast(&listen_addr), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + + // Shutdown the write of the listener, expect to not have any effect. + ASSERT_THAT(shutdown(listen_fd.get(), SHUT_WR), SyscallSucceeds()); + + for (int i = 0; i < kFDs; i++) { + auto client = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + ASSERT_THAT(connect(client.get(), reinterpret_cast(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + ASSERT_THAT(accept(listen_fd.get(), nullptr, nullptr), SyscallSucceeds()); + } + + // Shutdown the read of the listener, expect to fail subsequent + // server accepts, binds and client connects. + ASSERT_THAT(shutdown(listen_fd.get(), SHUT_RD), SyscallSucceeds()); + + ASSERT_THAT(accept(listen_fd.get(), nullptr, nullptr), + SyscallFailsWithErrno(EINVAL)); + + // Check that shutdown did not release the port. + FileDescriptor new_listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + ASSERT_THAT( + bind(new_listen_fd.get(), reinterpret_cast(&listen_addr), + listener.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Check that subsequent connection attempts receive a RST. + auto client = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + for (int i = 0; i < kFDs; i++) { + auto client = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + ASSERT_THAT(connect(client.get(), reinterpret_cast(&conn_addr), + connector.addr_len), + SyscallFailsWithErrno(ECONNREFUSED)); + } +} + TEST_P(SocketInetLoopbackTest, TCPListenClose) { auto const& param = GetParam(); @@ -365,9 +434,8 @@ TEST_P(SocketInetLoopbackTest, TCPListenClose) { } } -TEST_P(SocketInetLoopbackTest, TCPListenCloseWhileConnect) { - auto const& param = GetParam(); - +void TestListenWhileConnect(const TestParam& param, + void (*stopListen)(FileDescriptor&)) { TestAddress const& listener = param.listener; TestAddress const& connector = param.connector; @@ -404,8 +472,8 @@ TEST_P(SocketInetLoopbackTest, TCPListenCloseWhileConnect) { clients.push_back(std::move(client)); } } - // Close the listening socket. - listen_fd.reset(); + + stopListen(listen_fd); for (auto& client : clients) { const int kTimeout = 10000; @@ -420,13 +488,26 @@ TEST_P(SocketInetLoopbackTest, TCPListenCloseWhileConnect) { char c; // Subsequent read can fail with: // ECONNRESET: If the client connection was established and was reset by the - // remote. ECONNREFUSED: If the client connection failed to be established. + // remote. + // ECONNREFUSED: If the client connection failed to be established. ASSERT_THAT(read(client.get(), &c, sizeof(c)), AnyOf(SyscallFailsWithErrno(ECONNRESET), SyscallFailsWithErrno(ECONNREFUSED))); } } +TEST_P(SocketInetLoopbackTest, TCPListenCloseWhileConnect) { + TestListenWhileConnect(GetParam(), [](FileDescriptor& f) { + ASSERT_THAT(close(f.release()), SyscallSucceeds()); + }); +} + +TEST_P(SocketInetLoopbackTest, TCPListenShutdownWhileConnect) { + TestListenWhileConnect(GetParam(), [](FileDescriptor& f) { + ASSERT_THAT(shutdown(f.get(), SHUT_RD), SyscallSucceeds()); + }); +} + TEST_P(SocketInetLoopbackTest, TCPbacklog) { auto const& param = GetParam(); @@ -1134,6 +1215,7 @@ TEST_P(SocketInetReusePortTest, TcpPortReuseMultiThread_NoRandomSave) { if (connects_received >= kConnectAttempts) { // Another thread have shutdown our read side causing the // accept to fail. + ASSERT_EQ(errno, EINVAL); break; } ASSERT_NO_ERRNO(fd); -- cgit v1.2.3 From f03996c5e9803934226e4b3a10827501cb936ab9 Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Thu, 16 Apr 2020 19:26:02 -0700 Subject: Implement pipe(2) and pipe2(2) for VFS2. Updates #1035 PiperOrigin-RevId: 306968644 --- pkg/sentry/fsimpl/pipefs/BUILD | 20 +++ pkg/sentry/fsimpl/pipefs/pipefs.go | 148 +++++++++++++++++++ pkg/sentry/fsimpl/tmpfs/filesystem.go | 2 +- pkg/sentry/fsimpl/tmpfs/named_pipe.go | 23 +-- pkg/sentry/fsimpl/tmpfs/tmpfs.go | 2 +- pkg/sentry/kernel/BUILD | 1 + pkg/sentry/kernel/kernel.go | 30 +++- pkg/sentry/kernel/pipe/vfs.go | 162 ++++++++++++--------- pkg/sentry/syscalls/linux/sys_pipe.go | 14 +- pkg/sentry/syscalls/linux/vfs2/BUILD | 3 + pkg/sentry/syscalls/linux/vfs2/fd.go | 17 +++ .../syscalls/linux/vfs2/linux64_override_amd64.go | 4 +- pkg/sentry/syscalls/linux/vfs2/pipe.go | 63 ++++++++ pkg/sentry/syscalls/linux/vfs2/read_write.go | 8 +- pkg/sentry/vfs/vfs.go | 2 +- test/syscalls/linux/pipe.cc | 2 + 16 files changed, 389 insertions(+), 112 deletions(-) create mode 100644 pkg/sentry/fsimpl/pipefs/BUILD create mode 100644 pkg/sentry/fsimpl/pipefs/pipefs.go create mode 100644 pkg/sentry/syscalls/linux/vfs2/pipe.go (limited to 'test') diff --git a/pkg/sentry/fsimpl/pipefs/BUILD b/pkg/sentry/fsimpl/pipefs/BUILD new file mode 100644 index 000000000..0d411606f --- /dev/null +++ b/pkg/sentry/fsimpl/pipefs/BUILD @@ -0,0 +1,20 @@ +load("//tools:defs.bzl", "go_library") + +licenses(["notice"]) + +go_library( + name = "pipefs", + srcs = ["pipefs.go"], + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/abi/linux", + "//pkg/context", + "//pkg/sentry/fsimpl/kernfs", + "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/pipe", + "//pkg/sentry/kernel/time", + "//pkg/sentry/vfs", + "//pkg/syserror", + "//pkg/usermem", + ], +) diff --git a/pkg/sentry/fsimpl/pipefs/pipefs.go b/pkg/sentry/fsimpl/pipefs/pipefs.go new file mode 100644 index 000000000..faf3179bc --- /dev/null +++ b/pkg/sentry/fsimpl/pipefs/pipefs.go @@ -0,0 +1,148 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package pipefs provides the filesystem implementation backing +// Kernel.PipeMount. +package pipefs + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +type filesystemType struct{} + +// Name implements vfs.FilesystemType.Name. +func (filesystemType) Name() string { + return "pipefs" +} + +// GetFilesystem implements vfs.FilesystemType.GetFilesystem. +func (filesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { + panic("pipefs.filesystemType.GetFilesystem should never be called") +} + +// filesystem implements vfs.FilesystemImpl. +type filesystem struct { + kernfs.Filesystem + + // TODO(gvisor.dev/issue/1193): + // + // - kernfs does not provide a way to implement statfs, from which we + // should indicate PIPEFS_MAGIC. + // + // - kernfs does not provide a way to override names for + // vfs.FilesystemImpl.PrependPath(); pipefs inodes should use synthetic + // name fmt.Sprintf("pipe:[%d]", inode.ino). +} + +// NewFilesystem sets up and returns a new vfs.Filesystem implemented by +// pipefs. +func NewFilesystem(vfsObj *vfs.VirtualFilesystem) *vfs.Filesystem { + fs := &filesystem{} + fs.Init(vfsObj, filesystemType{}) + return fs.VFSFilesystem() +} + +// inode implements kernfs.Inode. +type inode struct { + kernfs.InodeNotDirectory + kernfs.InodeNotSymlink + kernfs.InodeNoopRefCount + + pipe *pipe.VFSPipe + + ino uint64 + uid auth.KUID + gid auth.KGID + // We use the creation timestamp for all of atime, mtime, and ctime. + ctime ktime.Time +} + +func newInode(ctx context.Context, fs *kernfs.Filesystem) *inode { + creds := auth.CredentialsFromContext(ctx) + return &inode{ + pipe: pipe.NewVFSPipe(false /* isNamed */, pipe.DefaultPipeSize, usermem.PageSize), + ino: fs.NextIno(), + uid: creds.EffectiveKUID, + gid: creds.EffectiveKGID, + ctime: ktime.NowFromContext(ctx), + } +} + +const pipeMode = 0600 | linux.S_IFIFO + +// CheckPermissions implements kernfs.Inode.CheckPermissions. +func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error { + return vfs.GenericCheckPermissions(creds, ats, pipeMode, i.uid, i.gid) +} + +// Mode implements kernfs.Inode.Mode. +func (i *inode) Mode() linux.FileMode { + return pipeMode +} + +// Stat implements kernfs.Inode.Stat. +func (i *inode) Stat(vfsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) { + ts := linux.NsecToStatxTimestamp(i.ctime.Nanoseconds()) + return linux.Statx{ + Mask: linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | linux.STATX_UID | linux.STATX_GID | linux.STATX_ATIME | linux.STATX_MTIME | linux.STATX_CTIME | linux.STATX_INO | linux.STATX_SIZE | linux.STATX_BLOCKS, + Blksize: usermem.PageSize, + Nlink: 1, + UID: uint32(i.uid), + GID: uint32(i.gid), + Mode: pipeMode, + Ino: i.ino, + Size: 0, + Blocks: 0, + Atime: ts, + Ctime: ts, + Mtime: ts, + // TODO(gvisor.dev/issue/1197): Device number. + }, nil +} + +// SetStat implements kernfs.Inode.SetStat. +func (i *inode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error { + if opts.Stat.Mask == 0 { + return nil + } + return syserror.EPERM +} + +// Open implements kernfs.Inode.Open. +func (i *inode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + // FIXME(b/38173783): kernfs does not plumb Context here. + return i.pipe.Open(context.Background(), rp.Mount(), vfsd, opts.Flags) +} + +// NewConnectedPipeFDs returns a pair of FileDescriptions representing the read +// and write ends of a newly-created pipe, as for pipe(2) and pipe2(2). +// +// Preconditions: mnt.Filesystem() must have been returned by NewFilesystem(). +func NewConnectedPipeFDs(ctx context.Context, mnt *vfs.Mount, flags uint32) (*vfs.FileDescription, *vfs.FileDescription) { + fs := mnt.Filesystem().Impl().(*kernfs.Filesystem) + inode := newInode(ctx, fs) + var d kernfs.Dentry + d.Init(inode) + defer d.DecRef() + return inode.pipe.ReaderWriterPair(mnt, d.VFSDentry(), flags) +} diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index f4d50d64f..660f5a29b 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -392,7 +392,7 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open // Can't open symlinks without O_PATH (which is unimplemented). return nil, syserror.ELOOP case *namedPipe: - return newNamedPipeFD(ctx, impl, rp, &d.vfsd, opts.Flags) + return impl.pipe.Open(ctx, rp.Mount(), &d.vfsd, opts.Flags) case *deviceFile: return rp.VirtualFilesystem().OpenDeviceSpecialFile(ctx, rp.Mount(), &d.vfsd, impl.kind, impl.major, impl.minor, opts) case *socketFile: diff --git a/pkg/sentry/fsimpl/tmpfs/named_pipe.go b/pkg/sentry/fsimpl/tmpfs/named_pipe.go index 2c5c739df..8d77b3fa8 100644 --- a/pkg/sentry/fsimpl/tmpfs/named_pipe.go +++ b/pkg/sentry/fsimpl/tmpfs/named_pipe.go @@ -16,10 +16,8 @@ package tmpfs import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" - "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/usermem" ) @@ -33,27 +31,8 @@ type namedPipe struct { // * fs.mu must be locked. // * rp.Mount().CheckBeginWrite() has been called successfully. func (fs *filesystem) newNamedPipe(creds *auth.Credentials, mode linux.FileMode) *inode { - file := &namedPipe{pipe: pipe.NewVFSPipe(pipe.DefaultPipeSize, usermem.PageSize)} + file := &namedPipe{pipe: pipe.NewVFSPipe(true /* isNamed */, pipe.DefaultPipeSize, usermem.PageSize)} file.inode.init(file, fs, creds, linux.S_IFIFO|mode) file.inode.nlink = 1 // Only the parent has a link. return &file.inode } - -// namedPipeFD implements vfs.FileDescriptionImpl. Methods are implemented -// entirely via struct embedding. -type namedPipeFD struct { - fileDescription - - *pipe.VFSPipeFD -} - -func newNamedPipeFD(ctx context.Context, np *namedPipe, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) { - var err error - var fd namedPipeFD - fd.VFSPipeFD, err = np.pipe.NewVFSPipeFD(ctx, vfsd, &fd.vfsfd, flags) - if err != nil { - return nil, err - } - fd.vfsfd.Init(&fd, flags, rp.Mount(), vfsd, &vfs.FileDescriptionOptions{}) - return &fd.vfsfd, nil -} diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 9fa8637d5..a59b24d45 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -357,6 +357,7 @@ func (i *inode) setStat(ctx context.Context, creds *auth.Credentials, stat *linu return err } i.mu.Lock() + defer i.mu.Unlock() var ( needsMtimeBump bool needsCtimeBump bool @@ -427,7 +428,6 @@ func (i *inode) setStat(ctx context.Context, creds *auth.Credentials, stat *linu atomic.StoreInt64(&i.ctime, now) } - i.mu.Unlock() return nil } diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index e0ff58d8c..e47af66d6 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -170,6 +170,7 @@ go_library( "//pkg/sentry/fs/timerfd", "//pkg/sentry/fsbridge", "//pkg/sentry/fsimpl/kernfs", + "//pkg/sentry/fsimpl/pipefs", "//pkg/sentry/fsimpl/sockfs", "//pkg/sentry/hostcpu", "//pkg/sentry/inet", diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index de8a95854..fef60e636 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -50,6 +50,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/timerfd" "gvisor.dev/gvisor/pkg/sentry/fsbridge" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/pipefs" "gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs" "gvisor.dev/gvisor/pkg/sentry/hostcpu" "gvisor.dev/gvisor/pkg/sentry/inet" @@ -254,6 +255,10 @@ type Kernel struct { // VFS keeps the filesystem state used across the kernel. vfs vfs.VirtualFilesystem + // pipeMount is the Mount used for pipes created by the pipe() and pipe2() + // syscalls (as opposed to named pipes created by mknod()). + pipeMount *vfs.Mount + // If set to true, report address space activation waits as if the task is in // external wait so that the watchdog doesn't report the task stuck. SleepForAddressSpaceActivation bool @@ -354,19 +359,29 @@ func (k *Kernel) Init(args InitKernelArgs) error { k.monotonicClock = &timekeeperClock{tk: args.Timekeeper, c: sentrytime.Monotonic} k.futexes = futex.NewManager() k.netlinkPorts = port.New() + if VFS2Enabled { if err := k.vfs.Init(); err != nil { return fmt.Errorf("failed to initialize VFS: %v", err) } - fs := sockfs.NewFilesystem(&k.vfs) - // NewDisconnectedMount will take an additional reference on fs. - defer fs.DecRef() - sm, err := k.vfs.NewDisconnectedMount(fs, nil, &vfs.MountOptions{}) + + pipeFilesystem := pipefs.NewFilesystem(&k.vfs) + defer pipeFilesystem.DecRef() + pipeMount, err := k.vfs.NewDisconnectedMount(pipeFilesystem, nil, &vfs.MountOptions{}) + if err != nil { + return fmt.Errorf("failed to create pipefs mount: %v", err) + } + k.pipeMount = pipeMount + + socketFilesystem := sockfs.NewFilesystem(&k.vfs) + defer socketFilesystem.DecRef() + socketMount, err := k.vfs.NewDisconnectedMount(socketFilesystem, nil, &vfs.MountOptions{}) if err != nil { return fmt.Errorf("failed to initialize socket mount: %v", err) } - k.socketMount = sm + k.socketMount = socketMount } + return nil } @@ -1613,3 +1628,8 @@ func (k *Kernel) EmitUnimplementedEvent(ctx context.Context) { func (k *Kernel) VFS() *vfs.VirtualFilesystem { return &k.vfs } + +// PipeMount returns the pipefs mount. +func (k *Kernel) PipeMount() *vfs.Mount { + return k.pipeMount +} diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go index a5675bd70..b54f08a30 100644 --- a/pkg/sentry/kernel/pipe/vfs.go +++ b/pkg/sentry/kernel/pipe/vfs.go @@ -49,38 +49,42 @@ type VFSPipe struct { } // NewVFSPipe returns an initialized VFSPipe. -func NewVFSPipe(sizeBytes, atomicIOBytes int64) *VFSPipe { +func NewVFSPipe(isNamed bool, sizeBytes, atomicIOBytes int64) *VFSPipe { var vp VFSPipe - initPipe(&vp.pipe, true /* isNamed */, sizeBytes, atomicIOBytes) + initPipe(&vp.pipe, isNamed, sizeBytes, atomicIOBytes) return &vp } -// NewVFSPipeFD opens a named pipe. Named pipes have special blocking semantics -// during open: +// ReaderWriterPair returns read-only and write-only FDs for vp. // -// "Normally, opening the FIFO blocks until the other end is opened also. A -// process can open a FIFO in nonblocking mode. In this case, opening for -// read-only will succeed even if no-one has opened on the write side yet, -// opening for write-only will fail with ENXIO (no such device or address) -// unless the other end has already been opened. Under Linux, opening a FIFO -// for read and write will succeed both in blocking and nonblocking mode. POSIX -// leaves this behavior undefined. This can be used to open a FIFO for writing -// while there are no readers available." - fifo(7) -func (vp *VFSPipe) NewVFSPipeFD(ctx context.Context, vfsd *vfs.Dentry, vfsfd *vfs.FileDescription, flags uint32) (*VFSPipeFD, error) { +// Preconditions: statusFlags should not contain an open access mode. +func (vp *VFSPipe) ReaderWriterPair(mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32) (*vfs.FileDescription, *vfs.FileDescription) { + return vp.newFD(mnt, vfsd, linux.O_RDONLY|statusFlags), vp.newFD(mnt, vfsd, linux.O_WRONLY|statusFlags) +} + +// Open opens the pipe represented by vp. +func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32) (*vfs.FileDescription, error) { vp.mu.Lock() defer vp.mu.Unlock() - readable := vfs.MayReadFileWithOpenFlags(flags) - writable := vfs.MayWriteFileWithOpenFlags(flags) + readable := vfs.MayReadFileWithOpenFlags(statusFlags) + writable := vfs.MayWriteFileWithOpenFlags(statusFlags) if !readable && !writable { return nil, syserror.EINVAL } - vfd, err := vp.open(vfsd, vfsfd, flags) - if err != nil { - return nil, err - } + fd := vp.newFD(mnt, vfsd, statusFlags) + // Named pipes have special blocking semantics during open: + // + // "Normally, opening the FIFO blocks until the other end is opened also. A + // process can open a FIFO in nonblocking mode. In this case, opening for + // read-only will succeed even if no-one has opened on the write side yet, + // opening for write-only will fail with ENXIO (no such device or address) + // unless the other end has already been opened. Under Linux, opening a + // FIFO for read and write will succeed both in blocking and nonblocking + // mode. POSIX leaves this behavior undefined. This can be used to open a + // FIFO for writing while there are no readers available." - fifo(7) switch { case readable && writable: // Pipes opened for read-write always succeed without blocking. @@ -89,23 +93,26 @@ func (vp *VFSPipe) NewVFSPipeFD(ctx context.Context, vfsd *vfs.Dentry, vfsfd *vf case readable: newHandleLocked(&vp.rWakeup) - // If this pipe is being opened as nonblocking and there's no + // If this pipe is being opened as blocking and there's no // writer, we have to wait for a writer to open the other end. - if flags&linux.O_NONBLOCK == 0 && !vp.pipe.HasWriters() && !waitFor(&vp.mu, &vp.wWakeup, ctx) { + if vp.pipe.isNamed && statusFlags&linux.O_NONBLOCK == 0 && !vp.pipe.HasWriters() && !waitFor(&vp.mu, &vp.wWakeup, ctx) { + fd.DecRef() return nil, syserror.EINTR } case writable: newHandleLocked(&vp.wWakeup) - if !vp.pipe.HasReaders() { - // Nonblocking, write-only opens fail with ENXIO when - // the read side isn't open yet. - if flags&linux.O_NONBLOCK != 0 { + if vp.pipe.isNamed && !vp.pipe.HasReaders() { + // Non-blocking, write-only opens fail with ENXIO when the read + // side isn't open yet. + if statusFlags&linux.O_NONBLOCK != 0 { + fd.DecRef() return nil, syserror.ENXIO } // Wait for a reader to open the other end. if !waitFor(&vp.mu, &vp.rWakeup, ctx) { + fd.DecRef() return nil, syserror.EINTR } } @@ -114,96 +121,93 @@ func (vp *VFSPipe) NewVFSPipeFD(ctx context.Context, vfsd *vfs.Dentry, vfsfd *vf panic("invalid pipe flags: must be readable, writable, or both") } - return vfd, nil + return fd, nil } // Preconditions: vp.mu must be held. -func (vp *VFSPipe) open(vfsd *vfs.Dentry, vfsfd *vfs.FileDescription, flags uint32) (*VFSPipeFD, error) { - var fd VFSPipeFD - fd.flags = flags - fd.readable = vfs.MayReadFileWithOpenFlags(flags) - fd.writable = vfs.MayWriteFileWithOpenFlags(flags) - fd.vfsfd = vfsfd - fd.pipe = &vp.pipe +func (vp *VFSPipe) newFD(mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32) *vfs.FileDescription { + fd := &VFSPipeFD{ + pipe: &vp.pipe, + } + fd.vfsfd.Init(fd, statusFlags, mnt, vfsd, &vfs.FileDescriptionOptions{ + DenyPRead: true, + DenyPWrite: true, + UseDentryMetadata: true, + }) switch { - case fd.readable && fd.writable: + case fd.vfsfd.IsReadable() && fd.vfsfd.IsWritable(): vp.pipe.rOpen() vp.pipe.wOpen() - case fd.readable: + case fd.vfsfd.IsReadable(): vp.pipe.rOpen() - case fd.writable: + case fd.vfsfd.IsWritable(): vp.pipe.wOpen() default: panic("invalid pipe flags: must be readable, writable, or both") } - return &fd, nil + return &fd.vfsfd } -// VFSPipeFD implements a subset of vfs.FileDescriptionImpl for pipes. It is -// expected that filesystesm will use this in a struct implementing -// vfs.FileDescriptionImpl. +// VFSPipeFD implements vfs.FileDescriptionImpl for pipes. type VFSPipeFD struct { - pipe *Pipe - flags uint32 - readable bool - writable bool - vfsfd *vfs.FileDescription + vfsfd vfs.FileDescription + vfs.FileDescriptionDefaultImpl + vfs.DentryMetadataFileDescriptionImpl + + pipe *Pipe } // Release implements vfs.FileDescriptionImpl.Release. func (fd *VFSPipeFD) Release() { var event waiter.EventMask - if fd.readable { + if fd.vfsfd.IsReadable() { fd.pipe.rClose() - event |= waiter.EventIn + event |= waiter.EventOut } - if fd.writable { + if fd.vfsfd.IsWritable() { fd.pipe.wClose() - event |= waiter.EventOut + event |= waiter.EventIn | waiter.EventHUp } if event == 0 { panic("invalid pipe flags: must be readable, writable, or both") } - if fd.writable { - fd.vfsfd.VirtualDentry().Mount().EndWrite() - } - fd.pipe.Notify(event) } -// OnClose implements vfs.FileDescriptionImpl.OnClose. -func (fd *VFSPipeFD) OnClose(_ context.Context) error { - return nil +// Readiness implements waiter.Waitable.Readiness. +func (fd *VFSPipeFD) Readiness(mask waiter.EventMask) waiter.EventMask { + switch { + case fd.vfsfd.IsReadable() && fd.vfsfd.IsWritable(): + return fd.pipe.rwReadiness() + case fd.vfsfd.IsReadable(): + return fd.pipe.rReadiness() + case fd.vfsfd.IsWritable(): + return fd.pipe.wReadiness() + default: + panic("pipe FD is neither readable nor writable") + } } -// PRead implements vfs.FileDescriptionImpl.PRead. -func (fd *VFSPipeFD) PRead(_ context.Context, _ usermem.IOSequence, _ int64, _ vfs.ReadOptions) (int64, error) { - return 0, syserror.ESPIPE +// EventRegister implements waiter.Waitable.EventRegister. +func (fd *VFSPipeFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) { + fd.pipe.EventRegister(e, mask) +} + +// EventUnregister implements waiter.Waitable.EventUnregister. +func (fd *VFSPipeFD) EventUnregister(e *waiter.Entry) { + fd.pipe.EventUnregister(e) } // Read implements vfs.FileDescriptionImpl.Read. func (fd *VFSPipeFD) Read(ctx context.Context, dst usermem.IOSequence, _ vfs.ReadOptions) (int64, error) { - if !fd.readable { - return 0, syserror.EINVAL - } - return fd.pipe.Read(ctx, dst) } -// PWrite implements vfs.FileDescriptionImpl.PWrite. -func (fd *VFSPipeFD) PWrite(_ context.Context, _ usermem.IOSequence, _ int64, _ vfs.WriteOptions) (int64, error) { - return 0, syserror.ESPIPE -} - // Write implements vfs.FileDescriptionImpl.Write. func (fd *VFSPipeFD) Write(ctx context.Context, src usermem.IOSequence, _ vfs.WriteOptions) (int64, error) { - if !fd.writable { - return 0, syserror.EINVAL - } - return fd.pipe.Write(ctx, src) } @@ -211,3 +215,17 @@ func (fd *VFSPipeFD) Write(ctx context.Context, src usermem.IOSequence, _ vfs.Wr func (fd *VFSPipeFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) { return fd.pipe.Ioctl(ctx, uio, args) } + +// PipeSize implements fcntl(F_GETPIPE_SZ). +func (fd *VFSPipeFD) PipeSize() int64 { + // Inline Pipe.FifoSize() rather than calling it with nil Context and + // fs.File and ignoring the returned error (which is always nil). + fd.pipe.mu.Lock() + defer fd.pipe.mu.Unlock() + return fd.pipe.max +} + +// SetPipeSize implements fcntl(F_SETPIPE_SZ). +func (fd *VFSPipeFD) SetPipeSize(size int64) (int64, error) { + return fd.pipe.SetFifoSize(size) +} diff --git a/pkg/sentry/syscalls/linux/sys_pipe.go b/pkg/sentry/syscalls/linux/sys_pipe.go index 798344042..43c510930 100644 --- a/pkg/sentry/syscalls/linux/sys_pipe.go +++ b/pkg/sentry/syscalls/linux/sys_pipe.go @@ -24,6 +24,8 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) +// LINT.IfChange + // pipe2 implements the actual system call with flags. func pipe2(t *kernel.Task, addr usermem.Addr, flags uint) (uintptr, error) { if flags&^(linux.O_NONBLOCK|linux.O_CLOEXEC) != 0 { @@ -45,10 +47,12 @@ func pipe2(t *kernel.Task, addr usermem.Addr, flags uint) (uintptr, error) { } if _, err := t.CopyOut(addr, fds); err != nil { - // The files are not closed in this case, the exact semantics - // of this error case are not well defined, but they could have - // already been observed by user space. - return 0, syserror.EFAULT + for _, fd := range fds { + if file, _ := t.FDTable().Remove(fd); file != nil { + file.DecRef() + } + } + return 0, err } return 0, nil } @@ -69,3 +73,5 @@ func Pipe2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall n, err := pipe2(t, addr, flags) return n, nil, err } + +// LINT.ThenChange(vfs2/pipe.go) diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD index b32abfe59..6ff2d84d2 100644 --- a/pkg/sentry/syscalls/linux/vfs2/BUILD +++ b/pkg/sentry/syscalls/linux/vfs2/BUILD @@ -18,6 +18,7 @@ go_library( "linux64_override_arm64.go", "mmap.go", "path.go", + "pipe.go", "poll.go", "read_write.go", "setstat.go", @@ -39,8 +40,10 @@ go_library( "//pkg/gohacks", "//pkg/sentry/arch", "//pkg/sentry/fsbridge", + "//pkg/sentry/fsimpl/pipefs", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/pipe", "//pkg/sentry/kernel/time", "//pkg/sentry/limits", "//pkg/sentry/loader", diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go index 3afcea665..8181d80f4 100644 --- a/pkg/sentry/syscalls/linux/vfs2/fd.go +++ b/pkg/sentry/syscalls/linux/vfs2/fd.go @@ -18,6 +18,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" "gvisor.dev/gvisor/pkg/syserror" ) @@ -140,6 +141,22 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return uintptr(file.StatusFlags()), nil, nil case linux.F_SETFL: return 0, nil, file.SetStatusFlags(t, t.Credentials(), args[2].Uint()) + case linux.F_SETPIPE_SZ: + pipefile, ok := file.Impl().(*pipe.VFSPipeFD) + if !ok { + return 0, nil, syserror.EBADF + } + n, err := pipefile.SetPipeSize(int64(args[2].Int())) + if err != nil { + return 0, nil, err + } + return uintptr(n), nil, nil + case linux.F_GETPIPE_SZ: + pipefile, ok := file.Impl().(*pipe.VFSPipeFD) + if !ok { + return 0, nil, syserror.EBADF + } + return uintptr(pipefile.PipeSize()), nil, nil default: // TODO(gvisor.dev/issue/1623): Everything else is not yet supported. return 0, nil, syserror.EINVAL diff --git a/pkg/sentry/syscalls/linux/vfs2/linux64_override_amd64.go b/pkg/sentry/syscalls/linux/vfs2/linux64_override_amd64.go index 645e0bcb8..21eb98444 100644 --- a/pkg/sentry/syscalls/linux/vfs2/linux64_override_amd64.go +++ b/pkg/sentry/syscalls/linux/vfs2/linux64_override_amd64.go @@ -39,7 +39,7 @@ func Override(table map[uintptr]kernel.Syscall) { table[19] = syscalls.Supported("readv", Readv) table[20] = syscalls.Supported("writev", Writev) table[21] = syscalls.Supported("access", Access) - delete(table, 22) // pipe + table[22] = syscalls.Supported("pipe", Pipe) table[23] = syscalls.Supported("select", Select) table[32] = syscalls.Supported("dup", Dup) table[33] = syscalls.Supported("dup2", Dup2) @@ -151,7 +151,7 @@ func Override(table map[uintptr]kernel.Syscall) { delete(table, 290) // eventfd2 table[291] = syscalls.Supported("epoll_create1", EpollCreate1) table[292] = syscalls.Supported("dup3", Dup3) - delete(table, 293) // pipe2 + table[293] = syscalls.Supported("pipe2", Pipe2) delete(table, 294) // inotify_init1 table[295] = syscalls.Supported("preadv", Preadv) table[296] = syscalls.Supported("pwritev", Pwritev) diff --git a/pkg/sentry/syscalls/linux/vfs2/pipe.go b/pkg/sentry/syscalls/linux/vfs2/pipe.go new file mode 100644 index 000000000..4a01e4209 --- /dev/null +++ b/pkg/sentry/syscalls/linux/vfs2/pipe.go @@ -0,0 +1,63 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vfs2 + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/pipefs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +// Pipe implements Linux syscall pipe(2). +func Pipe(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + addr := args[0].Pointer() + return 0, nil, pipe2(t, addr, 0) +} + +// Pipe2 implements Linux syscall pipe2(2). +func Pipe2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + addr := args[0].Pointer() + flags := args[1].Int() + return 0, nil, pipe2(t, addr, flags) +} + +func pipe2(t *kernel.Task, addr usermem.Addr, flags int32) error { + if flags&^(linux.O_NONBLOCK|linux.O_CLOEXEC) != 0 { + return syserror.EINVAL + } + r, w := pipefs.NewConnectedPipeFDs(t, t.Kernel().PipeMount(), uint32(flags&linux.O_NONBLOCK)) + defer r.DecRef() + defer w.DecRef() + + fds, err := t.NewFDsVFS2(0, []*vfs.FileDescription{r, w}, kernel.FDFlags{ + CloseOnExec: flags&linux.O_CLOEXEC != 0, + }) + if err != nil { + return err + } + if _, err := t.CopyOut(addr, fds); err != nil { + for _, fd := range fds { + if _, file := t.FDTable().Remove(fd); file != nil { + file.DecRef() + } + } + return err + } + return nil +} diff --git a/pkg/sentry/syscalls/linux/vfs2/read_write.go b/pkg/sentry/syscalls/linux/vfs2/read_write.go index 898b190fd..6c6998f45 100644 --- a/pkg/sentry/syscalls/linux/vfs2/read_write.go +++ b/pkg/sentry/syscalls/linux/vfs2/read_write.go @@ -103,7 +103,7 @@ func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opt // Issue the request and break out if it completes with anything other than // "would block". - n, err := file.Read(t, dst, opts) + n, err = file.Read(t, dst, opts) total += n if err != syserror.ErrWouldBlock { break @@ -248,7 +248,7 @@ func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, of // Issue the request and break out if it completes with anything other than // "would block". - n, err := file.PRead(t, dst, offset+total, opts) + n, err = file.PRead(t, dst, offset+total, opts) total += n if err != syserror.ErrWouldBlock { break @@ -335,7 +335,7 @@ func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, op // Issue the request and break out if it completes with anything other than // "would block". - n, err := file.Write(t, src, opts) + n, err = file.Write(t, src, opts) total += n if err != syserror.ErrWouldBlock { break @@ -480,7 +480,7 @@ func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, o // Issue the request and break out if it completes with anything other than // "would block". - n, err := file.PWrite(t, src, offset+total, opts) + n, err = file.PWrite(t, src, offset+total, opts) total += n if err != syserror.ErrWouldBlock { break diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go index 053c6e1d1..cb5bbd781 100644 --- a/pkg/sentry/vfs/vfs.go +++ b/pkg/sentry/vfs/vfs.go @@ -335,7 +335,7 @@ func (vfs *VirtualFilesystem) MknodAt(ctx context.Context, creds *auth.Credentia rp := vfs.getResolvingPath(creds, pop) for { err := rp.mount.fs.impl.MknodAt(ctx, rp, *opts) - if err != nil { + if err == nil { vfs.putResolvingPath(rp) return nil } diff --git a/test/syscalls/linux/pipe.cc b/test/syscalls/linux/pipe.cc index d8e19e910..67228b66b 100644 --- a/test/syscalls/linux/pipe.cc +++ b/test/syscalls/linux/pipe.cc @@ -265,6 +265,8 @@ TEST_P(PipeTest, OffsetCalls) { SyscallFailsWithErrno(ESPIPE)); struct iovec iov; + iov.iov_base = &buf; + iov.iov_len = sizeof(buf); EXPECT_THAT(preadv(wfd_.get(), &iov, 1, 0), SyscallFailsWithErrno(ESPIPE)); EXPECT_THAT(pwritev(rfd_.get(), &iov, 1, 0), SyscallFailsWithErrno(ESPIPE)); } -- cgit v1.2.3 From 4a818d64378f16f3738ba51c7804cff90f753b1d Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Fri, 17 Apr 2020 10:33:54 -0700 Subject: proc net test: Annotate disable-save test with NoRandomSave. PiperOrigin-RevId: 307069884 --- test/syscalls/linux/proc_net.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/syscalls/linux/proc_net.cc b/test/syscalls/linux/proc_net.cc index 4e23d1e78..cac394910 100644 --- a/test/syscalls/linux/proc_net.cc +++ b/test/syscalls/linux/proc_net.cc @@ -353,7 +353,7 @@ TEST(ProcNetSnmp, UdpNoPorts_NoRandomSave) { EXPECT_EQ(oldNoPorts, newNoPorts - 1); } -TEST(ProcNetSnmp, UdpIn) { +TEST(ProcNetSnmp, UdpIn_NoRandomSave) { // TODO(gvisor.dev/issue/866): epsocket metrics are not savable. const DisableSave ds; -- cgit v1.2.3 From a80cd4302337f1c3a807e127f5a6edc2f014f431 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Fri, 17 Apr 2020 13:27:35 -0700 Subject: Add test name to boot and gofer log files This is to make easier to find corresponding logs in case test fails. PiperOrigin-RevId: 307104283 --- runsc/cmd/capability_test.go | 2 +- runsc/container/console_test.go | 6 +-- runsc/container/container_test.go | 72 ++++++++++++++++----------------- runsc/container/multi_container_test.go | 44 ++++++++++---------- runsc/container/shared_volume_test.go | 4 +- runsc/main.go | 4 +- runsc/testutil/testutil.go | 7 ++-- test/root/oom_score_adj_test.go | 4 +- 8 files changed, 70 insertions(+), 73 deletions(-) (limited to 'test') diff --git a/runsc/cmd/capability_test.go b/runsc/cmd/capability_test.go index 0c27f7313..9360d7442 100644 --- a/runsc/cmd/capability_test.go +++ b/runsc/cmd/capability_test.go @@ -85,7 +85,7 @@ func TestCapabilities(t *testing.T) { Inheritable: caps, } - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) // Use --network=host to make sandbox use spec's capabilities. conf.Network = boot.NetworkHost diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go index 651615d4c..af245b6d8 100644 --- a/runsc/container/console_test.go +++ b/runsc/container/console_test.go @@ -118,7 +118,7 @@ func receiveConsolePTY(srv *unet.ServerSocket) (*os.File, error) { // Test that an pty FD is sent over the console socket if one is provided. func TestConsoleSocket(t *testing.T) { - for _, conf := range configs(all...) { + for _, conf := range configs(t, all...) { t.Logf("Running test with conf: %+v", conf) spec := testutil.NewSpecWithArgs("true") rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) @@ -163,7 +163,7 @@ func TestConsoleSocket(t *testing.T) { // Test that job control signals work on a console created with "exec -ti". func TestJobControlSignalExec(t *testing.T) { spec := testutil.NewSpecWithArgs("/bin/sleep", "10000") - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) if err != nil { @@ -286,7 +286,7 @@ func TestJobControlSignalExec(t *testing.T) { // Test that job control signals work on a console created with "run -ti". func TestJobControlSignalRootContainer(t *testing.T) { - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) // Don't let bash execute from profile or rc files, otherwise our PID // counts get messed up. spec := testutil.NewSpecWithArgs("/bin/bash", "--noprofile", "--norc") diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 24f9ecc35..5db6d64aa 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -251,12 +251,12 @@ var noOverlay = []configOption{kvm, nonExclusiveFS} var all = append(noOverlay, overlay) // configs generates different configurations to run tests. -func configs(opts ...configOption) []*boot.Config { +func configs(t *testing.T, opts ...configOption) []*boot.Config { // Always load the default config. - cs := []*boot.Config{testutil.TestConfig()} + cs := []*boot.Config{testutil.TestConfig(t)} for _, o := range opts { - c := testutil.TestConfig() + c := testutil.TestConfig(t) switch o { case overlay: c.Overlay = true @@ -285,7 +285,7 @@ func TestLifecycle(t *testing.T) { childReaper.Start() defer childReaper.Stop() - for _, conf := range configs(all...) { + for _, conf := range configs(t, all...) { t.Logf("Running test with conf: %+v", conf) // The container will just sleep for a long time. We will kill it before // it finishes sleeping. @@ -457,7 +457,7 @@ func TestExePath(t *testing.T) { t.Fatal(err) } - for _, conf := range configs(overlay) { + for _, conf := range configs(t, overlay) { t.Logf("Running test with conf: %+v", conf) for _, test := range []struct { path string @@ -521,21 +521,19 @@ func TestExePath(t *testing.T) { // Test the we can retrieve the application exit status from the container. func TestAppExitStatus(t *testing.T) { - conf := testutil.TestConfig() - conf.VFS2 = false - doAppExitStatus(t, conf) + doAppExitStatus(t, false) } // This is TestAppExitStatus for VFSv2. func TestAppExitStatusVFS2(t *testing.T) { - conf := testutil.TestConfig() - conf.VFS2 = true - doAppExitStatus(t, conf) + doAppExitStatus(t, true) } -func doAppExitStatus(t *testing.T, conf *boot.Config) { +func doAppExitStatus(t *testing.T, vfs2 bool) { // First container will succeed. succSpec := testutil.NewSpecWithArgs("true") + conf := testutil.TestConfig(t) + conf.VFS2 = vfs2 rootDir, bundleDir, err := testutil.SetupContainer(succSpec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) @@ -585,7 +583,7 @@ func doAppExitStatus(t *testing.T, conf *boot.Config) { // TestExec verifies that a container can exec a new program. func TestExec(t *testing.T) { - for _, conf := range configs(overlay) { + for _, conf := range configs(t, overlay) { t.Logf("Running test with conf: %+v", conf) const uid = 343 @@ -679,7 +677,7 @@ func TestExec(t *testing.T) { // TestKillPid verifies that we can signal individual exec'd processes. func TestKillPid(t *testing.T) { - for _, conf := range configs(overlay) { + for _, conf := range configs(t, overlay) { t.Logf("Running test with conf: %+v", conf) app, err := testutil.FindFile("runsc/container/test_app/test_app") @@ -755,7 +753,7 @@ func TestKillPid(t *testing.T) { // be the next consecutive number after the last number from the checkpointed container. func TestCheckpointRestore(t *testing.T) { // Skip overlay because test requires writing to host file. - for _, conf := range configs(noOverlay...) { + for _, conf := range configs(t, noOverlay...) { t.Logf("Running test with conf: %+v", conf) dir, err := ioutil.TempDir(testutil.TmpDir(), "checkpoint-test") @@ -916,7 +914,7 @@ func TestCheckpointRestore(t *testing.T) { // with filesystem Unix Domain Socket use. func TestUnixDomainSockets(t *testing.T) { // Skip overlay because test requires writing to host file. - for _, conf := range configs(noOverlay...) { + for _, conf := range configs(t, noOverlay...) { t.Logf("Running test with conf: %+v", conf) // UDS path is limited to 108 chars for compatibility with older systems. @@ -1054,7 +1052,7 @@ func TestUnixDomainSockets(t *testing.T) { // recreated. Then it resumes the container, verify that the file gets created // again. func TestPauseResume(t *testing.T) { - for _, conf := range configs(noOverlay...) { + for _, conf := range configs(t, noOverlay...) { t.Run(fmt.Sprintf("conf: %+v", conf), func(t *testing.T) { t.Logf("Running test with conf: %+v", conf) @@ -1135,7 +1133,7 @@ func TestPauseResume(t *testing.T) { // occurs given the correct state. func TestPauseResumeStatus(t *testing.T) { spec := testutil.NewSpecWithArgs("sleep", "20") - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) @@ -1201,7 +1199,7 @@ func TestCapabilities(t *testing.T) { uid := auth.KUID(os.Getuid() + 1) gid := auth.KGID(os.Getgid() + 1) - for _, conf := range configs(all...) { + for _, conf := range configs(t, all...) { t.Logf("Running test with conf: %+v", conf) spec := testutil.NewSpecWithArgs("sleep", "100") @@ -1290,7 +1288,7 @@ func TestCapabilities(t *testing.T) { // TestRunNonRoot checks that sandbox can be configured when running as // non-privileged user. func TestRunNonRoot(t *testing.T) { - for _, conf := range configs(noOverlay...) { + for _, conf := range configs(t, noOverlay...) { t.Logf("Running test with conf: %+v", conf) spec := testutil.NewSpecWithArgs("/bin/true") @@ -1334,7 +1332,7 @@ func TestRunNonRoot(t *testing.T) { // TestMountNewDir checks that runsc will create destination directory if it // doesn't exit. func TestMountNewDir(t *testing.T) { - for _, conf := range configs(overlay) { + for _, conf := range configs(t, overlay) { t.Logf("Running test with conf: %+v", conf) root, err := ioutil.TempDir(testutil.TmpDir(), "root") @@ -1363,7 +1361,7 @@ func TestMountNewDir(t *testing.T) { } func TestReadonlyRoot(t *testing.T) { - for _, conf := range configs(overlay) { + for _, conf := range configs(t, overlay) { t.Logf("Running test with conf: %+v", conf) spec := testutil.NewSpecWithArgs("/bin/touch", "/foo") @@ -1401,7 +1399,7 @@ func TestReadonlyRoot(t *testing.T) { } func TestUIDMap(t *testing.T) { - for _, conf := range configs(noOverlay...) { + for _, conf := range configs(t, noOverlay...) { t.Logf("Running test with conf: %+v", conf) testDir, err := ioutil.TempDir(testutil.TmpDir(), "test-mount") if err != nil { @@ -1482,7 +1480,7 @@ func TestUIDMap(t *testing.T) { } func TestReadonlyMount(t *testing.T) { - for _, conf := range configs(overlay) { + for _, conf := range configs(t, overlay) { t.Logf("Running test with conf: %+v", conf) dir, err := ioutil.TempDir(testutil.TmpDir(), "ro-mount") @@ -1539,7 +1537,7 @@ func TestAbbreviatedIDs(t *testing.T) { } defer os.RemoveAll(rootDir) - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir cids := []string{ @@ -1597,7 +1595,7 @@ func TestAbbreviatedIDs(t *testing.T) { func TestGoferExits(t *testing.T) { spec := testutil.NewSpecWithArgs("/bin/sleep", "10000") - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) @@ -1666,7 +1664,7 @@ func TestRootNotMount(t *testing.T) { spec.Root.Readonly = true spec.Mounts = nil - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) if err := run(spec, conf); err != nil { t.Fatalf("error running sandbox: %v", err) } @@ -1680,7 +1678,7 @@ func TestUserLog(t *testing.T) { // sched_rr_get_interval = 148 - not implemented in gvisor. spec := testutil.NewSpecWithArgs(app, "syscall", "--syscall=148") - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) @@ -1720,7 +1718,7 @@ func TestUserLog(t *testing.T) { } func TestWaitOnExitedSandbox(t *testing.T) { - for _, conf := range configs(all...) { + for _, conf := range configs(t, all...) { t.Logf("Running test with conf: %+v", conf) // Run a shell that sleeps for 1 second and then exits with a @@ -1775,7 +1773,7 @@ func TestWaitOnExitedSandbox(t *testing.T) { func TestDestroyNotStarted(t *testing.T) { spec := testutil.NewSpecWithArgs("/bin/sleep", "100") - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) @@ -1802,7 +1800,7 @@ func TestDestroyNotStarted(t *testing.T) { func TestDestroyStarting(t *testing.T) { for i := 0; i < 10; i++ { spec := testutil.NewSpecWithArgs("/bin/sleep", "100") - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) @@ -1847,7 +1845,7 @@ func TestDestroyStarting(t *testing.T) { } func TestCreateWorkingDir(t *testing.T) { - for _, conf := range configs(overlay) { + for _, conf := range configs(t, overlay) { t.Logf("Running test with conf: %+v", conf) tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "cwd-create") @@ -1920,7 +1918,7 @@ func TestMountPropagation(t *testing.T) { }, } - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) @@ -1971,7 +1969,7 @@ func TestMountPropagation(t *testing.T) { } func TestMountSymlink(t *testing.T) { - for _, conf := range configs(overlay) { + for _, conf := range configs(t, overlay) { t.Logf("Running test with conf: %+v", conf) dir, err := ioutil.TempDir(testutil.TmpDir(), "mount-symlink") @@ -2051,7 +2049,7 @@ func TestNetRaw(t *testing.T) { } for _, enableRaw := range []bool{true, false} { - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.EnableRaw = enableRaw test := "--enabled" @@ -2068,7 +2066,7 @@ func TestNetRaw(t *testing.T) { // TestOverlayfsStaleRead most basic test that '--overlayfs-stale-read' works. func TestOverlayfsStaleRead(t *testing.T) { - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.OverlayfsStaleRead = true in, err := ioutil.TempFile(testutil.TmpDir(), "stale-read.in") @@ -2132,7 +2130,7 @@ func TestTTYField(t *testing.T) { for _, test := range testCases { t.Run(test.name, func(t *testing.T) { - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) // We will run /bin/sleep, possibly with an open TTY. cmd := []string{"/bin/sleep", "10000"} diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index 2da93ec5b..dc2fb42ce 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -135,7 +135,7 @@ func createSharedMount(mount specs.Mount, name string, pod ...*specs.Spec) { // TestMultiContainerSanity checks that it is possible to run 2 dead-simple // containers in the same sandbox. func TestMultiContainerSanity(t *testing.T) { - for _, conf := range configs(all...) { + for _, conf := range configs(t, all...) { t.Logf("Running test with conf: %+v", conf) rootDir, err := testutil.SetupRootDir() @@ -173,7 +173,7 @@ func TestMultiContainerSanity(t *testing.T) { // TestMultiPIDNS checks that it is possible to run 2 dead-simple // containers in the same sandbox with different pidns. func TestMultiPIDNS(t *testing.T) { - for _, conf := range configs(all...) { + for _, conf := range configs(t, all...) { t.Logf("Running test with conf: %+v", conf) rootDir, err := testutil.SetupRootDir() @@ -218,7 +218,7 @@ func TestMultiPIDNS(t *testing.T) { // TestMultiPIDNSPath checks the pidns path. func TestMultiPIDNSPath(t *testing.T) { - for _, conf := range configs(all...) { + for _, conf := range configs(t, all...) { t.Logf("Running test with conf: %+v", conf) rootDir, err := testutil.SetupRootDir() @@ -289,7 +289,7 @@ func TestMultiContainerWait(t *testing.T) { } defer os.RemoveAll(rootDir) - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir // The first container should run the entire duration of the test. @@ -367,7 +367,7 @@ func TestExecWait(t *testing.T) { } defer os.RemoveAll(rootDir) - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir // The first container should run the entire duration of the test. @@ -463,7 +463,7 @@ func TestMultiContainerMount(t *testing.T) { } defer os.RemoveAll(rootDir) - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir containers, cleanup, err := startContainers(conf, sps, ids) @@ -484,7 +484,7 @@ func TestMultiContainerMount(t *testing.T) { // TestMultiContainerSignal checks that it is possible to signal individual // containers without killing the entire sandbox. func TestMultiContainerSignal(t *testing.T) { - for _, conf := range configs(all...) { + for _, conf := range configs(t, all...) { t.Logf("Running test with conf: %+v", conf) rootDir, err := testutil.SetupRootDir() @@ -585,7 +585,7 @@ func TestMultiContainerDestroy(t *testing.T) { t.Fatal("error finding test_app:", err) } - for _, conf := range configs(all...) { + for _, conf := range configs(t, all...) { t.Logf("Running test with conf: %+v", conf) rootDir, err := testutil.SetupRootDir() @@ -653,7 +653,7 @@ func TestMultiContainerProcesses(t *testing.T) { } defer os.RemoveAll(rootDir) - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir // Note: use curly braces to keep 'sh' process around. Otherwise, shell @@ -712,7 +712,7 @@ func TestMultiContainerKillAll(t *testing.T) { } defer os.RemoveAll(rootDir) - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir for _, tc := range []struct { @@ -804,7 +804,7 @@ func TestMultiContainerDestroyNotStarted(t *testing.T) { []string{"/bin/sleep", "100"}, []string{"/bin/sleep", "100"}) - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) rootDir, rootBundleDir, err := testutil.SetupContainer(specs[0], conf) if err != nil { t.Fatalf("error setting up container: %v", err) @@ -858,7 +858,7 @@ func TestMultiContainerDestroyStarting(t *testing.T) { } specs, ids := createSpecs(cmds...) - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) rootDir, rootBundleDir, err := testutil.SetupContainer(specs[0], conf) if err != nil { t.Fatalf("error setting up container: %v", err) @@ -943,7 +943,7 @@ func TestMultiContainerDifferentFilesystems(t *testing.T) { } defer os.RemoveAll(rootDir) - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir // Make sure overlay is enabled, and none of the root filesystems are @@ -1006,7 +1006,7 @@ func TestMultiContainerContainerDestroyStress(t *testing.T) { childrenSpecs := allSpecs[1:] childrenIDs := allIDs[1:] - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) rootDir, bundleDir, err := testutil.SetupContainer(rootSpec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) @@ -1080,7 +1080,7 @@ func TestMultiContainerContainerDestroyStress(t *testing.T) { // Test that pod shared mounts are properly mounted in 2 containers and that // changes from one container is reflected in the other. func TestMultiContainerSharedMount(t *testing.T) { - for _, conf := range configs(all...) { + for _, conf := range configs(t, all...) { t.Logf("Running test with conf: %+v", conf) rootDir, err := testutil.SetupRootDir() @@ -1195,7 +1195,7 @@ func TestMultiContainerSharedMount(t *testing.T) { // Test that pod mounts are mounted as readonly when requested. func TestMultiContainerSharedMountReadonly(t *testing.T) { - for _, conf := range configs(all...) { + for _, conf := range configs(t, all...) { t.Logf("Running test with conf: %+v", conf) rootDir, err := testutil.SetupRootDir() @@ -1262,7 +1262,7 @@ func TestMultiContainerSharedMountReadonly(t *testing.T) { // Test that shared pod mounts continue to work after container is restarted. func TestMultiContainerSharedMountRestart(t *testing.T) { - for _, conf := range configs(all...) { + for _, conf := range configs(t, all...) { t.Logf("Running test with conf: %+v", conf) rootDir, err := testutil.SetupRootDir() @@ -1381,7 +1381,7 @@ func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) { } defer os.RemoveAll(rootDir) - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir // Setup the containers. @@ -1463,7 +1463,7 @@ func TestMultiContainerMultiRootCanHandleFDs(t *testing.T) { } defer os.RemoveAll(rootDir) - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir // Create the specs. @@ -1500,7 +1500,7 @@ func TestMultiContainerGoferKilled(t *testing.T) { } defer os.RemoveAll(rootDir) - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir sleep := []string{"sleep", "100"} @@ -1587,7 +1587,7 @@ func TestMultiContainerLoadSandbox(t *testing.T) { } defer os.RemoveAll(rootDir) - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir // Create containers for the sandbox. @@ -1687,7 +1687,7 @@ func TestMultiContainerRunNonRoot(t *testing.T) { } defer os.RemoveAll(rootDir) - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir pod, cleanup, err := startContainers(conf, podSpecs, ids) diff --git a/runsc/container/shared_volume_test.go b/runsc/container/shared_volume_test.go index dc4194134..f80852414 100644 --- a/runsc/container/shared_volume_test.go +++ b/runsc/container/shared_volume_test.go @@ -31,7 +31,7 @@ import ( // TestSharedVolume checks that modifications to a volume mount are propagated // into and out of the sandbox. func TestSharedVolume(t *testing.T) { - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.FileAccess = boot.FileAccessShared t.Logf("Running test with conf: %+v", conf) @@ -190,7 +190,7 @@ func checkFile(c *Container, filename string, want []byte) error { // TestSharedVolumeFile tests that changes to file content outside the sandbox // is reflected inside. func TestSharedVolumeFile(t *testing.T) { - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.FileAccess = boot.FileAccessShared t.Logf("Running test with conf: %+v", conf) diff --git a/runsc/main.go b/runsc/main.go index 9d52f3006..2baba90f8 100644 --- a/runsc/main.go +++ b/runsc/main.go @@ -296,9 +296,7 @@ func main() { if err := syscall.Dup3(fd, int(os.Stderr.Fd()), 0); err != nil { cmd.Fatalf("error dup'ing fd %d to stderr: %v", fd, err) } - } - - if *alsoLogToStderr { + } else if *alsoLogToStderr { e = &log.MultiEmitter{e, newEmitter(*debugLogFormat, os.Stderr)} } diff --git a/runsc/testutil/testutil.go b/runsc/testutil/testutil.go index 51e487715..5e09f8f16 100644 --- a/runsc/testutil/testutil.go +++ b/runsc/testutil/testutil.go @@ -31,11 +31,13 @@ import ( "os" "os/exec" "os/signal" + "path" "path/filepath" "strconv" "strings" "sync/atomic" "syscall" + "testing" "time" "github.com/cenkalti/backoff" @@ -81,17 +83,16 @@ func ConfigureExePath() error { // TestConfig returns the default configuration to use in tests. Note that // 'RootDir' must be set by caller if required. -func TestConfig() *boot.Config { +func TestConfig(t *testing.T) *boot.Config { logDir := "" if dir, ok := os.LookupEnv("TEST_UNDECLARED_OUTPUTS_DIR"); ok { logDir = dir + "/" } return &boot.Config{ Debug: true, - DebugLog: logDir, + DebugLog: path.Join(logDir, "runsc.log."+t.Name()+".%TIMESTAMP%.%COMMAND%"), LogFormat: "text", DebugLogFormat: "text", - AlsoLogToStderr: true, LogPackets: true, Network: boot.NetworkNone, Strace: true, diff --git a/test/root/oom_score_adj_test.go b/test/root/oom_score_adj_test.go index 126f0975a..22488b05d 100644 --- a/test/root/oom_score_adj_test.go +++ b/test/root/oom_score_adj_test.go @@ -46,7 +46,7 @@ func TestOOMScoreAdjSingle(t *testing.T) { } defer os.RemoveAll(rootDir) - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir ppid, err := specutils.GetParentPid(os.Getpid()) @@ -137,7 +137,7 @@ func TestOOMScoreAdjMulti(t *testing.T) { } defer os.RemoveAll(rootDir) - conf := testutil.TestConfig() + conf := testutil.TestConfig(t) conf.RootDir = rootDir ppid, err := specutils.GetParentPid(os.Getpid()) -- cgit v1.2.3 From e838290e671c9d72dbaa3aba13bf0c35f1147de4 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Fri, 17 Apr 2020 15:31:51 -0700 Subject: prlimit: don't check credentials on self prlimit was erroneously comparing UIDs and GIDs when getting/setting a process' own limits. From the manpage: To set or get the resources of a process other than itself, the caller must have the CAP_SYS_RESOURCE capability, or the real, effective, and saved set user IDs of the target process must match the real user ID of the caller and the real, effective, and saved set group IDs of the target process must match the real group ID of the caller. PiperOrigin-RevId: 307127266 --- pkg/sentry/syscalls/linux/sys_rlimit.go | 2 +- test/syscalls/linux/uidgid.cc | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/pkg/sentry/syscalls/linux/sys_rlimit.go b/pkg/sentry/syscalls/linux/sys_rlimit.go index e08c333d6..d5d5b6959 100644 --- a/pkg/sentry/syscalls/linux/sys_rlimit.go +++ b/pkg/sentry/syscalls/linux/sys_rlimit.go @@ -197,7 +197,7 @@ func Prlimit64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys // saved set user IDs of the target process must match the real user ID of // the caller and the real, effective, and saved set group IDs of the // target process must match the real group ID of the caller." - if !t.HasCapabilityIn(linux.CAP_SYS_RESOURCE, t.PIDNamespace().UserNamespace()) { + if ot != t && !t.HasCapabilityIn(linux.CAP_SYS_RESOURCE, t.PIDNamespace().UserNamespace()) { cred, tcred := t.Credentials(), ot.Credentials() if cred.RealKUID != tcred.RealKUID || cred.RealKUID != tcred.EffectiveKUID || diff --git a/test/syscalls/linux/uidgid.cc b/test/syscalls/linux/uidgid.cc index 6218fbce1..ff66a79f4 100644 --- a/test/syscalls/linux/uidgid.cc +++ b/test/syscalls/linux/uidgid.cc @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -249,6 +250,17 @@ TEST(UidGidRootTest, Setgroups) { SyscallFailsWithErrno(EFAULT)); } +TEST(UidGidRootTest, Setuid_prlimit) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot())); + + // Change our UID. + EXPECT_THAT(seteuid(65534), SyscallSucceeds()); + + // Despite the UID change, we should be able to get our own limits. + struct rlimit rl = {}; + ASSERT_THAT(prlimit(0, RLIMIT_NOFILE, NULL, &rl), SyscallSucceeds()); +} + } // namespace } // namespace testing -- cgit v1.2.3 From 08b2fd9bc2a963ea15821b782cf6d80c15dbdf42 Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Sun, 19 Apr 2020 20:47:55 -0700 Subject: Convert tcp_user_timeout test from packetdrill to packetimpact. PiperOrigin-RevId: 307328289 --- test/packetdrill/BUILD | 12 +-- test/packetdrill/linux/tcp_user_timeout.pkt | 39 --------- test/packetdrill/netstack/tcp_user_timeout.pkt | 38 --------- test/packetimpact/tests/BUILD | 10 +++ test/packetimpact/tests/tcp_user_timeout_test.go | 100 +++++++++++++++++++++++ 5 files changed, 111 insertions(+), 88 deletions(-) delete mode 100644 test/packetdrill/linux/tcp_user_timeout.pkt delete mode 100644 test/packetdrill/netstack/tcp_user_timeout.pkt create mode 100644 test/packetimpact/tests/tcp_user_timeout_test.go (limited to 'test') diff --git a/test/packetdrill/BUILD b/test/packetdrill/BUILD index fb0b2db41..dfcd55f60 100644 --- a/test/packetdrill/BUILD +++ b/test/packetdrill/BUILD @@ -1,4 +1,4 @@ -load("defs.bzl", "packetdrill_linux_test", "packetdrill_netstack_test", "packetdrill_test") +load("defs.bzl", "packetdrill_test") package(licenses = ["notice"]) @@ -17,16 +17,6 @@ packetdrill_test( scripts = ["fin_wait2_timeout.pkt"], ) -packetdrill_linux_test( - name = "tcp_user_timeout_test_linux_test", - scripts = ["linux/tcp_user_timeout.pkt"], -) - -packetdrill_netstack_test( - name = "tcp_user_timeout_test_netstack_test", - scripts = ["netstack/tcp_user_timeout.pkt"], -) - packetdrill_test( name = "listen_close_before_handshake_complete_test", scripts = ["listen_close_before_handshake_complete.pkt"], diff --git a/test/packetdrill/linux/tcp_user_timeout.pkt b/test/packetdrill/linux/tcp_user_timeout.pkt deleted file mode 100644 index 38018cb42..000000000 --- a/test/packetdrill/linux/tcp_user_timeout.pkt +++ /dev/null @@ -1,39 +0,0 @@ -// Test that a socket w/ TCP_USER_TIMEOUT set aborts the connection -// if there is pending unacked data after the user specified timeout. - -0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 -+0 bind(3, ..., ...) = 0 - -+0 listen(3, 1) = 0 - -// Establish a connection without timestamps. -+0 < S 0:0(0) win 32792 -+0 > S. 0:0(0) ack 1 <...> -+0.1 < . 1:1(0) ack 1 win 32792 - -+0.100 accept(3, ..., ...) = 4 - -// Okay, we received nothing, and decide to close this idle socket. -// We set TCP_USER_TIMEOUT to 3 seconds because really it is not worth -// trying hard to cleanly close this flow, at the price of keeping -// a TCP structure in kernel for about 1 minute! -+2 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0 - -// The write/ack is required mainly for netstack as netstack does -// not update its RTO during the handshake. -+0 write(4, ..., 100) = 100 -+0 > P. 1:101(100) ack 1 <...> -+0 < . 1:1(0) ack 101 win 32792 - -+0 close(4) = 0 - -+0 > F. 101:101(0) ack 1 <...> -+.3~+.400 > F. 101:101(0) ack 1 <...> -+.3~+.400 > F. 101:101(0) ack 1 <...> -+.6~+.800 > F. 101:101(0) ack 1 <...> -+1.2~+1.300 > F. 101:101(0) ack 1 <...> - -// We finally receive something from the peer, but it is way too late -// Our socket vanished because TCP_USER_TIMEOUT was really small. -+.1 < . 1:2(1) ack 102 win 32792 -+0 > R 102:102(0) win 0 diff --git a/test/packetdrill/netstack/tcp_user_timeout.pkt b/test/packetdrill/netstack/tcp_user_timeout.pkt deleted file mode 100644 index 60103adba..000000000 --- a/test/packetdrill/netstack/tcp_user_timeout.pkt +++ /dev/null @@ -1,38 +0,0 @@ -// Test that a socket w/ TCP_USER_TIMEOUT set aborts the connection -// if there is pending unacked data after the user specified timeout. - -0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 -+0 bind(3, ..., ...) = 0 - -+0 listen(3, 1) = 0 - -// Establish a connection without timestamps. -+0 < S 0:0(0) win 32792 -+0 > S. 0:0(0) ack 1 <...> -+0.1 < . 1:1(0) ack 1 win 32792 - -+0.100 accept(3, ..., ...) = 4 - -// Okay, we received nothing, and decide to close this idle socket. -// We set TCP_USER_TIMEOUT to 3 seconds because really it is not worth -// trying hard to cleanly close this flow, at the price of keeping -// a TCP structure in kernel for about 1 minute! -+2 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0 - -// The write/ack is required mainly for netstack as netstack does -// not update its RTO during the handshake. -+0 write(4, ..., 100) = 100 -+0 > P. 1:101(100) ack 1 <...> -+0 < . 1:1(0) ack 101 win 32792 - -+0 close(4) = 0 - -+0 > F. 101:101(0) ack 1 <...> -+.2~+.300 > F. 101:101(0) ack 1 <...> -+.4~+.500 > F. 101:101(0) ack 1 <...> -+.8~+.900 > F. 101:101(0) ack 1 <...> - -// We finally receive something from the peer, but it is way too late -// Our socket vanished because TCP_USER_TIMEOUT was really small. -+1.61 < . 1:2(1) ack 102 win 32792 -+0 > R 102:102(0) win 0 diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 690cee140..1410512e6 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -88,6 +88,16 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "tcp_user_timeout", + srcs = ["tcp_user_timeout_test.go"], + deps = [ + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + sh_binary( name = "test_runner", srcs = ["test_runner.sh"], diff --git a/test/packetimpact/tests/tcp_user_timeout_test.go b/test/packetimpact/tests/tcp_user_timeout_test.go new file mode 100644 index 000000000..3cf82badb --- /dev/null +++ b/test/packetimpact/tests/tcp_user_timeout_test.go @@ -0,0 +1,100 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_user_timeout_test + +import ( + "fmt" + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func sendPayload(conn *tb.TCPIPv4, dut *tb.DUT, fd int32) error { + sampleData := make([]byte, 100) + for i := range sampleData { + sampleData[i] = uint8(i) + } + conn.Drain() + dut.Send(fd, sampleData, 0) + if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &tb.Payload{Bytes: sampleData}, time.Second); err != nil { + return fmt.Errorf("expected data but got none: %w", err) + } + return nil +} + +func sendFIN(conn *tb.TCPIPv4, dut *tb.DUT, fd int32) error { + dut.Close(fd) + return nil +} + +func TestTCPUserTimeout(t *testing.T) { + for _, tt := range []struct { + description string + userTimeout time.Duration + sendDelay time.Duration + }{ + {"NoUserTimeout", 0, 3 * time.Second}, + {"ACKBeforeUserTimeout", 5 * time.Second, 4 * time.Second}, + {"ACKAfterUserTimeout", 5 * time.Second, 7 * time.Second}, + } { + for _, ttf := range []struct { + description string + f func(conn *tb.TCPIPv4, dut *tb.DUT, fd int32) error + }{ + {"AfterPayload", sendPayload}, + {"AfterFIN", sendFIN}, + } { + t.Run(tt.description+ttf.description, func(t *testing.T) { + // Create a socket, listen, TCP handshake, and accept. + dut := tb.NewDUT(t) + defer dut.TearDown() + listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(listenFD) + conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + defer conn.Close() + conn.Handshake() + acceptFD, _ := dut.Accept(listenFD) + + if tt.userTimeout != 0 { + dut.SetSockOptInt(acceptFD, unix.SOL_TCP, unix.TCP_USER_TIMEOUT, int32(tt.userTimeout.Milliseconds())) + } + + if err := ttf.f(&conn, &dut, acceptFD); err != nil { + t.Fatal(err) + } + + time.Sleep(tt.sendDelay) + conn.Drain() + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) + + // If TCP_USER_TIMEOUT was set and the above delay was longer than the + // TCP_USER_TIMEOUT then the DUT should send a RST in response to the + // testbench's packet. + expectRST := tt.userTimeout != 0 && tt.sendDelay > tt.userTimeout + expectTimeout := 5 * time.Second + got, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, expectTimeout) + if expectRST && err != nil { + t.Errorf("expected RST packet within %s but got none: %s", expectTimeout, err) + } + if !expectRST && got != nil { + t.Errorf("expected no RST packet within %s but got one: %s", expectTimeout, got) + } + }) + } + } +} -- cgit v1.2.3 From db2a60be67f0e869a58eb12d253a0d7fe13ebfa3 Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Sun, 19 Apr 2020 22:14:53 -0700 Subject: Don't accept segments outside the receive window Fixed to match RFC 793 page 69. Fixes #1607 PiperOrigin-RevId: 307334892 --- pkg/tcpip/seqnum/seqnum.go | 5 -- pkg/tcpip/transport/tcp/BUILD | 10 +++ pkg/tcpip/transport/tcp/accept.go | 14 ++--- pkg/tcpip/transport/tcp/connect.go | 15 +---- pkg/tcpip/transport/tcp/endpoint.go | 13 ++++ pkg/tcpip/transport/tcp/rcv.go | 23 +++++-- pkg/tcpip/transport/tcp/rcv_test.go | 74 +++++++++++++++++++++++ pkg/tcpip/transport/tcpconntrack/BUILD | 1 + pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go | 13 +--- test/packetimpact/tests/BUILD | 2 - 10 files changed, 125 insertions(+), 45 deletions(-) create mode 100644 pkg/tcpip/transport/tcp/rcv_test.go (limited to 'test') diff --git a/pkg/tcpip/seqnum/seqnum.go b/pkg/tcpip/seqnum/seqnum.go index b40a3c212..d3bea7de4 100644 --- a/pkg/tcpip/seqnum/seqnum.go +++ b/pkg/tcpip/seqnum/seqnum.go @@ -46,11 +46,6 @@ func (v Value) InWindow(first Value, size Size) bool { return v.InRange(first, first.Add(size)) } -// Overlap checks if the window [a,a+b) overlaps with the window [x, x+y). -func Overlap(a Value, b Size, x Value, y Size) bool { - return a.LessThan(x.Add(y)) && x.LessThan(a.Add(b)) -} - // Add calculates the sequence number following the [v, v+s) window. func (v Value) Add(s Size) Value { return v + Value(s) diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD index edb7718a6..61426623c 100644 --- a/pkg/tcpip/transport/tcp/BUILD +++ b/pkg/tcpip/transport/tcp/BUILD @@ -109,3 +109,13 @@ go_test( "//runsc/testutil", ], ) + +go_test( + name = "rcv_test", + size = "small", + srcs = ["rcv_test.go"], + deps = [ + ":tcp", + "//pkg/tcpip/seqnum", + ], +) diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go index 5bb243e3b..e6a23c978 100644 --- a/pkg/tcpip/transport/tcp/accept.go +++ b/pkg/tcpip/transport/tcp/accept.go @@ -101,7 +101,7 @@ type listenContext struct { // v6Only is true if listenEP is a dual stack socket and has the // IPV6_V6ONLY option set. - v6only bool + v6Only bool // netProto indicates the network protocol(IPv4/v6) for the listening // endpoint. @@ -126,12 +126,12 @@ func timeStamp() uint32 { } // newListenContext creates a new listen context. -func newListenContext(stk *stack.Stack, listenEP *endpoint, rcvWnd seqnum.Size, v6only bool, netProto tcpip.NetworkProtocolNumber) *listenContext { +func newListenContext(stk *stack.Stack, listenEP *endpoint, rcvWnd seqnum.Size, v6Only bool, netProto tcpip.NetworkProtocolNumber) *listenContext { l := &listenContext{ stack: stk, rcvWnd: rcvWnd, hasher: sha1.New(), - v6only: v6only, + v6Only: v6Only, netProto: netProto, listenEP: listenEP, pendingEndpoints: make(map[stack.TransportEndpointID]*endpoint), @@ -207,7 +207,7 @@ func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, i netProto = s.route.NetProto } n := newEndpoint(l.stack, netProto, queue) - n.v6only = l.v6only + n.v6only = l.v6Only n.ID = s.id n.boundNICID = s.route.NICID() n.route = s.route.Clone() @@ -293,7 +293,7 @@ func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *head } // Perform the 3-way handshake. - h := newPassiveHandshake(ep, seqnum.Size(ep.initialReceiveWindow()), isn, irs, opts, deferAccept) + h := newPassiveHandshake(ep, ep.rcv.rcvWnd, isn, irs, opts, deferAccept) if err := h.execute(); err != nil { ep.mu.Unlock() ep.Close() @@ -613,8 +613,8 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) { // its own goroutine and is responsible for handling connection requests. func (e *endpoint) protocolListenLoop(rcvWnd seqnum.Size) *tcpip.Error { e.mu.Lock() - v6only := e.v6only - ctx := newListenContext(e.stack, e, rcvWnd, v6only, e.NetProto) + v6Only := e.v6only + ctx := newListenContext(e.stack, e, rcvWnd, v6Only, e.NetProto) defer func() { // Mark endpoint as closed. This will prevent goroutines running diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index 368865911..76e27bf26 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -105,24 +105,11 @@ type handshake struct { } func newHandshake(ep *endpoint, rcvWnd seqnum.Size) handshake { - rcvWndScale := ep.rcvWndScaleForHandshake() - - // Round-down the rcvWnd to a multiple of wndScale. This ensures that the - // window offered in SYN won't be reduced due to the loss of precision if - // window scaling is enabled after the handshake. - rcvWnd = (rcvWnd >> uint8(rcvWndScale)) << uint8(rcvWndScale) - - // Ensure we can always accept at least 1 byte if the scale specified - // was too high for the provided rcvWnd. - if rcvWnd == 0 { - rcvWnd = 1 - } - h := handshake{ ep: ep, active: true, rcvWnd: rcvWnd, - rcvWndScale: int(rcvWndScale), + rcvWndScale: ep.rcvWndScaleForHandshake(), } h.resetState() return h diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 7e8def82d..45f2aa78b 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -1062,6 +1062,19 @@ func (e *endpoint) initialReceiveWindow() int { if rcvWnd > routeWnd { rcvWnd = routeWnd } + rcvWndScale := e.rcvWndScaleForHandshake() + + // Round-down the rcvWnd to a multiple of wndScale. This ensures that the + // window offered in SYN won't be reduced due to the loss of precision if + // window scaling is enabled after the handshake. + rcvWnd = (rcvWnd >> uint8(rcvWndScale)) << uint8(rcvWndScale) + + // Ensure we can always accept at least 1 byte if the scale specified + // was too high for the provided rcvWnd. + if rcvWnd == 0 { + rcvWnd = 1 + } + return rcvWnd } diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go index caf8977b3..a4b73b588 100644 --- a/pkg/tcpip/transport/tcp/rcv.go +++ b/pkg/tcpip/transport/tcp/rcv.go @@ -70,13 +70,24 @@ func newReceiver(ep *endpoint, irs seqnum.Value, rcvWnd seqnum.Size, rcvWndScale // acceptable checks if the segment sequence number range is acceptable // according to the table on page 26 of RFC 793. func (r *receiver) acceptable(segSeq seqnum.Value, segLen seqnum.Size) bool { - rcvWnd := r.rcvNxt.Size(r.rcvAcc) - if rcvWnd == 0 { - return segLen == 0 && segSeq == r.rcvNxt - } + return Acceptable(segSeq, segLen, r.rcvNxt, r.rcvAcc) +} - return segSeq.InWindow(r.rcvNxt, rcvWnd) || - seqnum.Overlap(r.rcvNxt, rcvWnd, segSeq, segLen) +// Acceptable checks if a segment that starts at segSeq and has length segLen is +// "acceptable" for arriving in a receive window that starts at rcvNxt and ends +// before rcvAcc, according to the table on page 26 and 69 of RFC 793. +func Acceptable(segSeq seqnum.Value, segLen seqnum.Size, rcvNxt, rcvAcc seqnum.Value) bool { + if rcvNxt == rcvAcc { + return segLen == 0 && segSeq == rcvNxt + } + if segLen == 0 { + // rcvWnd is incremented by 1 because that is Linux's behavior despite the + // RFC. + return segSeq.InRange(rcvNxt, rcvAcc.Add(1)) + } + // Page 70 of RFC 793 allows packets that can be made "acceptable" by trimming + // the payload, so we'll accept any payload that overlaps the receieve window. + return rcvNxt.LessThan(segSeq.Add(segLen)) && segSeq.LessThan(rcvAcc) } // getSendParams returns the parameters needed by the sender when building diff --git a/pkg/tcpip/transport/tcp/rcv_test.go b/pkg/tcpip/transport/tcp/rcv_test.go new file mode 100644 index 000000000..dc02729ce --- /dev/null +++ b/pkg/tcpip/transport/tcp/rcv_test.go @@ -0,0 +1,74 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package rcv_test + +import ( + "testing" + + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" +) + +func TestAcceptable(t *testing.T) { + for _, tt := range []struct { + segSeq seqnum.Value + segLen seqnum.Size + rcvNxt, rcvAcc seqnum.Value + want bool + }{ + // The segment is smaller than the window. + {105, 2, 100, 104, false}, + {105, 2, 101, 105, false}, + {105, 2, 102, 106, true}, + {105, 2, 103, 107, true}, + {105, 2, 104, 108, true}, + {105, 2, 105, 109, true}, + {105, 2, 106, 110, true}, + {105, 2, 107, 111, false}, + + // The segment is larger than the window. + {105, 4, 103, 105, false}, + {105, 4, 104, 106, true}, + {105, 4, 105, 107, true}, + {105, 4, 106, 108, true}, + {105, 4, 107, 109, true}, + {105, 4, 108, 110, true}, + {105, 4, 109, 111, false}, + {105, 4, 110, 112, false}, + + // The segment has no width. + {105, 0, 100, 102, false}, + {105, 0, 101, 103, false}, + {105, 0, 102, 104, false}, + {105, 0, 103, 105, true}, + {105, 0, 104, 106, true}, + {105, 0, 105, 107, true}, + {105, 0, 106, 108, false}, + {105, 0, 107, 109, false}, + + // The receive window has no width. + {105, 2, 103, 103, false}, + {105, 2, 104, 104, false}, + {105, 2, 105, 105, false}, + {105, 2, 106, 106, false}, + {105, 2, 107, 107, false}, + {105, 2, 108, 108, false}, + {105, 2, 109, 109, false}, + } { + if got := tcp.Acceptable(tt.segSeq, tt.segLen, tt.rcvNxt, tt.rcvAcc); got != tt.want { + t.Errorf("tcp.Acceptable(%d, %d, %d, %d) = %t, want %t", tt.segSeq, tt.segLen, tt.rcvNxt, tt.rcvAcc, got, tt.want) + } + } +} diff --git a/pkg/tcpip/transport/tcpconntrack/BUILD b/pkg/tcpip/transport/tcpconntrack/BUILD index 3ad6994a7..2025ff757 100644 --- a/pkg/tcpip/transport/tcpconntrack/BUILD +++ b/pkg/tcpip/transport/tcpconntrack/BUILD @@ -9,6 +9,7 @@ go_library( deps = [ "//pkg/tcpip/header", "//pkg/tcpip/seqnum", + "//pkg/tcpip/transport/tcp", ], ) diff --git a/pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go b/pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go index 93712cd45..30d05200f 100644 --- a/pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go +++ b/pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go @@ -20,6 +20,7 @@ package tcpconntrack import ( "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/seqnum" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" ) // Result is returned when the state of a TCB is updated in response to an @@ -311,17 +312,7 @@ type stream struct { // the window is zero, if it's a packet with no payload and sequence number // equal to una. func (s *stream) acceptable(segSeq seqnum.Value, segLen seqnum.Size) bool { - wnd := s.una.Size(s.end) - if wnd == 0 { - return segLen == 0 && segSeq == s.una - } - - // Make sure [segSeq, seqSeq+segLen) is non-empty. - if segLen == 0 { - segLen = 1 - } - - return seqnum.Overlap(s.una, wnd, segSeq, segLen) + return tcp.Acceptable(segSeq, segLen, s.una, s.end) } // closed determines if the stream has already been closed. This happens when diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 1410512e6..47c722ccd 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -43,8 +43,6 @@ packetimpact_go_test( packetimpact_go_test( name = "tcp_outside_the_window", srcs = ["tcp_outside_the_window_test.go"], - # TODO(eyalsoha): Fix #1607 then remove the line below. - netstack = False, deps = [ "//pkg/tcpip/header", "//pkg/tcpip/seqnum", -- cgit v1.2.3 From 1a597e01bed5d5fb30b3d444e0a23669c5587235 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Mon, 20 Apr 2020 15:47:16 -0700 Subject: Add a functional vm_test for root_test. This change renames the tools/images directory to tools/vm for clarity, and adds a functional vm_test. Sharding is also added to the same test, and some documentation added around key flags & variables to describe how they work. Subsequent changes will add vm_tests for other cases, such as the runtime tests. PiperOrigin-RevId: 307492245 --- benchmarks/BUILD | 4 +- benchmarks/runner/commands.py | 5 +- runsc/dockerutil/dockerutil.go | 12 +- test/root/BUILD | 14 ++- tools/images/BUILD | 63 ---------- tools/images/README.md | 42 ------- tools/images/build.sh | 113 ------------------ tools/images/defs.bzl | 191 ----------------------------- tools/images/execute.sh | 152 ------------------------ tools/images/test.cc | 23 ---- tools/images/ubuntu1604/10_core.sh | 43 ------- tools/images/ubuntu1604/20_bazel.sh | 38 ------ tools/images/ubuntu1604/25_docker.sh | 54 --------- tools/images/ubuntu1604/30_containerd.sh | 86 -------------- tools/images/ubuntu1604/40_kokoro.sh | 72 ----------- tools/images/ubuntu1604/BUILD | 7 -- tools/images/ubuntu1804/BUILD | 7 -- tools/images/zone.sh | 17 --- tools/installers/head.sh | 2 +- tools/vm/BUILD | 57 +++++++++ tools/vm/README.md | 42 +++++++ tools/vm/build.sh | 117 ++++++++++++++++++ tools/vm/defs.bzl | 198 +++++++++++++++++++++++++++++++ tools/vm/execute.sh | 160 +++++++++++++++++++++++++ tools/vm/test.cc | 27 +++++ tools/vm/ubuntu1604/10_core.sh | 43 +++++++ tools/vm/ubuntu1604/20_bazel.sh | 38 ++++++ tools/vm/ubuntu1604/25_docker.sh | 54 +++++++++ tools/vm/ubuntu1604/30_containerd.sh | 86 ++++++++++++++ tools/vm/ubuntu1604/40_kokoro.sh | 72 +++++++++++ tools/vm/ubuntu1604/BUILD | 7 ++ tools/vm/ubuntu1804/BUILD | 7 ++ tools/vm/zone.sh | 17 +++ 33 files changed, 954 insertions(+), 916 deletions(-) delete mode 100644 tools/images/BUILD delete mode 100644 tools/images/README.md delete mode 100755 tools/images/build.sh delete mode 100644 tools/images/defs.bzl delete mode 100755 tools/images/execute.sh delete mode 100644 tools/images/test.cc delete mode 100755 tools/images/ubuntu1604/10_core.sh delete mode 100755 tools/images/ubuntu1604/20_bazel.sh delete mode 100755 tools/images/ubuntu1604/25_docker.sh delete mode 100755 tools/images/ubuntu1604/30_containerd.sh delete mode 100755 tools/images/ubuntu1604/40_kokoro.sh delete mode 100644 tools/images/ubuntu1604/BUILD delete mode 100644 tools/images/ubuntu1804/BUILD delete mode 100755 tools/images/zone.sh create mode 100644 tools/vm/BUILD create mode 100644 tools/vm/README.md create mode 100755 tools/vm/build.sh create mode 100644 tools/vm/defs.bzl create mode 100755 tools/vm/execute.sh create mode 100644 tools/vm/test.cc create mode 100755 tools/vm/ubuntu1604/10_core.sh create mode 100755 tools/vm/ubuntu1604/20_bazel.sh create mode 100755 tools/vm/ubuntu1604/25_docker.sh create mode 100755 tools/vm/ubuntu1604/30_containerd.sh create mode 100755 tools/vm/ubuntu1604/40_kokoro.sh create mode 100644 tools/vm/ubuntu1604/BUILD create mode 100644 tools/vm/ubuntu1804/BUILD create mode 100755 tools/vm/zone.sh (limited to 'test') diff --git a/benchmarks/BUILD b/benchmarks/BUILD index 2a2d15d7e..ac44f479d 100644 --- a/benchmarks/BUILD +++ b/benchmarks/BUILD @@ -13,8 +13,8 @@ py_binary( data = select({ ":gcloud_rule": [], "//conditions:default": [ - "//tools/images:ubuntu1604", - "//tools/images:zone", + "//tools/vm:ubuntu1604", + "//tools/vm:zone", ], }), main = "run.py", diff --git a/benchmarks/runner/commands.py b/benchmarks/runner/commands.py index e8289f6c5..9a391eb01 100644 --- a/benchmarks/runner/commands.py +++ b/benchmarks/runner/commands.py @@ -103,13 +103,12 @@ class GCPCommand(RunCommand): ("--image_file",), help="The binary that emits the GCP image.", default=os.path.join( - os.path.dirname(__file__), "../../tools/images/ubuntu1604"), + os.path.dirname(__file__), "../../tools/vm/ubuntu1604"), ) zone_file = click.core.Option( ("--zone_file",), help="The binary that emits the GCP zone.", - default=os.path.join( - os.path.dirname(__file__), "../../tools/images/zone"), + default=os.path.join(os.path.dirname(__file__), "../../tools/vm/zone"), ) internal = click.core.Option( ("--internal/--no-internal",), diff --git a/runsc/dockerutil/dockerutil.go b/runsc/dockerutil/dockerutil.go index 1ff5e8cc3..f009486bc 100644 --- a/runsc/dockerutil/dockerutil.go +++ b/runsc/dockerutil/dockerutil.go @@ -36,8 +36,18 @@ import ( ) var ( + // runtime is the runtime to use for tests. This will be applied to all + // containers. Note that the default here ("runsc") corresponds to the + // default used by the installations. This is important, because the + // default installer for vm_tests (in tools/installers:head, invoked + // via tools/vm:defs.bzl) will install with this name. So without + // changing anything, tests should have a runsc runtime available to + // them. Otherwise installers should update the existing runtime + // instead of installing a new one. runtime = flag.String("runtime", "runsc", "specify which runtime to use") - config = flag.String("config_path", "/etc/docker/daemon.json", "configuration file for reading paths") + + // config is the default Docker daemon configuration path. + config = flag.String("config_path", "/etc/docker/daemon.json", "configuration file for reading paths") ) // EnsureSupportedDockerVersion checks if correct docker is installed. diff --git a/test/root/BUILD b/test/root/BUILD index ddc9b4955..05166673a 100644 --- a/test/root/BUILD +++ b/test/root/BUILD @@ -1,4 +1,5 @@ load("//tools:defs.bzl", "go_library", "go_test") +load("//tools/vm:defs.bzl", "vm_test") package(licenses = ["notice"]) @@ -24,7 +25,9 @@ go_test( library = ":root", tags = [ # Requires docker and runsc to be configured before the test runs. - # Also test only runs as root. + # Also, the test needs to be run as root. Note that below, the + # root_vm_test relies on the default runtime 'runsc' being installed by + # the default installer. "manual", "local", ], @@ -44,3 +47,12 @@ go_test( "@org_golang_x_sys//unix:go_default_library", ], ) + +vm_test( + name = "root_vm_test", + shard_count = 1, + targets = [ + "//tools/installers:shim", + ":root_test", + ], +) diff --git a/tools/images/BUILD b/tools/images/BUILD deleted file mode 100644 index 8d319e3e4..000000000 --- a/tools/images/BUILD +++ /dev/null @@ -1,63 +0,0 @@ -load("//tools:defs.bzl", "cc_binary", "gtest") -load("//tools/images:defs.bzl", "vm_image", "vm_test") - -package( - default_visibility = ["//:sandbox"], - licenses = ["notice"], -) - -sh_binary( - name = "zone", - srcs = ["zone.sh"], -) - -sh_binary( - name = "builder", - srcs = ["build.sh"], -) - -sh_binary( - name = "executer", - srcs = ["execute.sh"], -) - -cc_binary( - name = "test", - testonly = 1, - srcs = ["test.cc"], - linkstatic = 1, - deps = [ - gtest, - "//test/util:test_main", - ], -) - -vm_image( - name = "ubuntu1604", - family = "ubuntu-1604-lts", - project = "ubuntu-os-cloud", - scripts = [ - "//tools/images/ubuntu1604", - ], -) - -vm_test( - name = "ubuntu1604_test", - image = ":ubuntu1604", - targets = [":test"], -) - -vm_image( - name = "ubuntu1804", - family = "ubuntu-1804-lts", - project = "ubuntu-os-cloud", - scripts = [ - "//tools/images/ubuntu1804", - ], -) - -vm_test( - name = "ubuntu1804_test", - image = ":ubuntu1804", - targets = [":test"], -) diff --git a/tools/images/README.md b/tools/images/README.md deleted file mode 100644 index 26c0f84f2..000000000 --- a/tools/images/README.md +++ /dev/null @@ -1,42 +0,0 @@ -# Images - -All commands in this directory require the `gcloud` project to be set. - -For example: `gcloud config set project gvisor-kokoro-testing`. - -Images can be generated by using the `vm_image` rule. This rule will generate a -binary target that builds an image in an idempotent way, and can be referenced -from other rules. - -For example: - -``` -vm_image( - name = "ubuntu", - project = "ubuntu-1604-lts", - family = "ubuntu-os-cloud", - scripts = [ - "script.sh", - "other.sh", - ], -) -``` - -These images can be built manually by executing the target. The output on -`stdout` will be the image id (in the current project). - -Images are always named per the hash of all the hermetic input scripts. This -allows images to be memoized quickly and easily. - -The `vm_test` rule can be used to execute a command remotely. This is still -under development however, and will likely change over time. - -For example: - -``` -vm_test( - name = "mycommand", - image = ":ubuntu", - targets = [":test"], -) -``` diff --git a/tools/images/build.sh b/tools/images/build.sh deleted file mode 100755 index f39f723b8..000000000 --- a/tools/images/build.sh +++ /dev/null @@ -1,113 +0,0 @@ -#!/bin/bash - -# Copyright 2019 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This script is responsible for building a new GCP image that: 1) has nested -# virtualization enabled, and 2) has been completely set up with the -# image_setup.sh script. This script should be idempotent, as we memoize the -# setup script with a hash and check for that name. - -set -eou pipefail - -# Parameters. -declare -r USERNAME=${USERNAME:-test} -declare -r IMAGE_PROJECT=${IMAGE_PROJECT:-ubuntu-os-cloud} -declare -r IMAGE_FAMILY=${IMAGE_FAMILY:-ubuntu-1604-lts} -declare -r ZONE=${ZONE:-us-central1-f} - -# Random names. -declare -r DISK_NAME=$(mktemp -u disk-XXXXXX | tr A-Z a-z) -declare -r SNAPSHOT_NAME=$(mktemp -u snapshot-XXXXXX | tr A-Z a-z) -declare -r INSTANCE_NAME=$(mktemp -u build-XXXXXX | tr A-Z a-z) - -# Hash inputs in order to memoize the produced image. -declare -r SETUP_HASH=$( (echo ${USERNAME} ${IMAGE_PROJECT} ${IMAGE_FAMILY} && cat "$@") | sha256sum - | cut -d' ' -f1 | cut -c 1-16) -declare -r IMAGE_NAME=${IMAGE_FAMILY:-image}-${SETUP_HASH} - -# Does the image already exist? Skip the build. -declare -r existing=$(set -x; gcloud compute images list --filter="name=(${IMAGE_NAME})" --format="value(name)") -if ! [[ -z "${existing}" ]]; then - echo "${existing}" - exit 0 -fi - -# gcloud has path errors; is this a result of being a genrule? -export PATH=${PATH:-/bin:/usr/bin:/usr/local/bin} - -# Start a unique instance. Note that this instance will have a unique persistent -# disk as it's boot disk with the same name as the instance. -(set -x; gcloud compute instances create \ - --quiet \ - --image-project "${IMAGE_PROJECT}" \ - --image-family "${IMAGE_FAMILY}" \ - --boot-disk-size "200GB" \ - --zone "${ZONE}" \ - "${INSTANCE_NAME}" >/dev/null) -function cleanup { - (set -x; gcloud compute instances delete --quiet --zone "${ZONE}" "${INSTANCE_NAME}") -} -trap cleanup EXIT - -# Wait for the instance to become available (up to 5 minutes). -echo -n "Waiting for ${INSTANCE_NAME}" -declare timeout=300 -declare success=0 -declare internal="" -declare -r start=$(date +%s) -declare -r end=$((${start}+${timeout})) -while [[ "$(date +%s)" -lt "${end}" ]] && [[ "${success}" -lt 3 ]]; do - echo -n "." - if gcloud compute ssh --zone "${ZONE}" "${USERNAME}"@"${INSTANCE_NAME}" -- env - true 2>/dev/null; then - success=$((${success}+1)) - elif gcloud compute ssh --internal-ip --zone "${ZONE}" "${USERNAME}"@"${INSTANCE_NAME}" -- env - true 2>/dev/null; then - success=$((${success}+1)) - internal="--internal-ip" - fi -done - -if [[ "${success}" -eq "0" ]]; then - echo "connect timed out after ${timeout} seconds." - exit 1 -else - echo "done." -fi - -# Run the install scripts provided. -for arg; do - (set -x; gcloud compute ssh ${internal} \ - --zone "${ZONE}" \ - "${USERNAME}"@"${INSTANCE_NAME}" -- \ - sudo bash - <"${arg}" >/dev/null) -done - -# Stop the instance; required before creating an image. -(set -x; gcloud compute instances stop --quiet --zone "${ZONE}" "${INSTANCE_NAME}" >/dev/null) - -# Create a snapshot of the instance disk. -(set -x; gcloud compute disks snapshot \ - --quiet \ - --zone "${ZONE}" \ - --snapshot-names="${SNAPSHOT_NAME}" \ - "${INSTANCE_NAME}" >/dev/null) - -# Create the disk image. -(set -x; gcloud compute images create \ - --quiet \ - --source-snapshot="${SNAPSHOT_NAME}" \ - --licenses="https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx" \ - "${IMAGE_NAME}" >/dev/null) - -# Finish up. -echo "${IMAGE_NAME}" diff --git a/tools/images/defs.bzl b/tools/images/defs.bzl deleted file mode 100644 index 2847e1847..000000000 --- a/tools/images/defs.bzl +++ /dev/null @@ -1,191 +0,0 @@ -"""Image configuration. See README.md.""" - -load("//tools:defs.bzl", "default_installer") - -# vm_image_builder is a rule that will construct a shell script that actually -# generates a given VM image. Note that this does not _run_ the shell script -# (although it can be run manually). It will be run manually during generation -# of the vm_image target itself. This level of indirection is used so that the -# build system itself only runs the builder once when multiple targets depend -# on it, avoiding a set of races and conflicts. -def _vm_image_builder_impl(ctx): - # Generate a binary that actually builds the image. - builder = ctx.actions.declare_file(ctx.label.name) - script_paths = [] - for script in ctx.files.scripts: - script_paths.append(script.short_path) - builder_content = "\n".join([ - "#!/bin/bash", - "export ZONE=$(%s)" % ctx.files.zone[0].short_path, - "export USERNAME=%s" % ctx.attr.username, - "export IMAGE_PROJECT=%s" % ctx.attr.project, - "export IMAGE_FAMILY=%s" % ctx.attr.family, - "%s %s" % (ctx.files._builder[0].short_path, " ".join(script_paths)), - "", - ]) - ctx.actions.write(builder, builder_content, is_executable = True) - - # Note that the scripts should only be files, and should not include any - # indirect transitive dependencies. The build script wouldn't work. - return [DefaultInfo( - executable = builder, - runfiles = ctx.runfiles( - files = ctx.files.scripts + ctx.files._builder + ctx.files.zone, - ), - )] - -vm_image_builder = rule( - attrs = { - "_builder": attr.label( - executable = True, - default = "//tools/images:builder", - cfg = "host", - ), - "username": attr.string(default = "$(whoami)"), - "zone": attr.label( - executable = True, - default = "//tools/images:zone", - cfg = "host", - ), - "family": attr.string(mandatory = True), - "project": attr.string(mandatory = True), - "scripts": attr.label_list(allow_files = True), - }, - executable = True, - implementation = _vm_image_builder_impl, -) - -# See vm_image_builder above. -def _vm_image_impl(ctx): - # Run the builder to generate our output. - echo = ctx.actions.declare_file(ctx.label.name) - resolved_inputs, argv, runfiles_manifests = ctx.resolve_command( - command = "echo -ne \"#!/bin/bash\\necho $(%s)\\n\" > %s && chmod 0755 %s" % ( - ctx.files.builder[0].path, - echo.path, - echo.path, - ), - tools = [ctx.attr.builder], - ) - ctx.actions.run_shell( - tools = resolved_inputs, - outputs = [echo], - progress_message = "Building image...", - execution_requirements = {"local": "true"}, - command = argv, - input_manifests = runfiles_manifests, - ) - - # Return just the echo command. All of the builder runfiles have been - # resolved and consumed in the generation of the trivial echo script. - return [DefaultInfo(executable = echo)] - -_vm_image = rule( - attrs = { - "builder": attr.label( - executable = True, - cfg = "host", - ), - }, - executable = True, - implementation = _vm_image_impl, -) - -def vm_image(name, **kwargs): - vm_image_builder( - name = name + "_builder", - **kwargs - ) - _vm_image( - name = name, - builder = ":" + name + "_builder", - ) - -def _vm_test_impl(ctx): - runner = ctx.actions.declare_file("%s-executer" % ctx.label.name) - - # Note that the remote execution case must actually generate an - # intermediate target in order to collect all the relevant runfiles so that - # they can be copied over for remote execution. - runner_content = "\n".join([ - "#!/bin/bash", - "export ZONE=$(cat %s)" % ctx.files.zone[0].short_path, - "export USERNAME=%s" % ctx.attr.username, - "export IMAGE=$(cat %s)" % ctx.files.image[0].short_path, - "export SUDO=%s" % "true" if ctx.attr.sudo else "false", - "%s %s" % ( - ctx.executable.executer.short_path, - " ".join([ - target.files_to_run.executable.short_path - for target in ctx.attr.targets - ]), - ), - "", - ]) - ctx.actions.write(runner, runner_content, is_executable = True) - - # Return with all transitive files. - runfiles = ctx.runfiles( - transitive_files = depset(transitive = [ - depset(target.data_runfiles.files) - for target in ctx.attr.targets - if hasattr(target, "data_runfiles") - ]), - files = ctx.files.executer + ctx.files.zone + ctx.files.image + - ctx.files.targets, - collect_default = True, - collect_data = True, - ) - return [DefaultInfo(executable = runner, runfiles = runfiles)] - -_vm_test = rule( - attrs = { - "image": attr.label( - mandatory = True, - cfg = "host", - ), - "executer": attr.label( - executable = True, - default = "//tools/images:executer", - cfg = "host", - ), - "username": attr.string(default = "$(whoami)"), - "zone": attr.label( - default = "//tools/images:zone", - cfg = "host", - ), - "sudo": attr.bool(default = True), - "machine": attr.string(default = "n1-standard-1"), - "targets": attr.label_list( - mandatory = True, - allow_empty = False, - cfg = "target", - ), - }, - test = True, - implementation = _vm_test_impl, -) - -def vm_test( - installer = "//tools/installers:head", - **kwargs): - """Runs the given targets as a remote test. - - Args: - installer: Script to run before all targets. - **kwargs: All test arguments. Should include targets and image. - """ - targets = kwargs.pop("targets", []) - if installer: - targets = [installer] + targets - if default_installer(): - targets = [default_installer()] + targets - _vm_test( - tags = [ - "local", - "manual", - ], - targets = targets, - local = 1, - **kwargs - ) diff --git a/tools/images/execute.sh b/tools/images/execute.sh deleted file mode 100755 index ba4b1ac0e..000000000 --- a/tools/images/execute.sh +++ /dev/null @@ -1,152 +0,0 @@ -#!/bin/bash - -# Copyright 2019 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -xeo pipefail - -# Required input. -if ! [[ -v IMAGE ]]; then - echo "no image provided: set IMAGE." - exit 1 -fi - -# Parameters. -declare -r USERNAME=${USERNAME:-test} -declare -r KEYNAME=$(mktemp --tmpdir -u key-XXXXXX) -declare -r SSHKEYS=$(mktemp --tmpdir -u sshkeys-XXXXXX) -declare -r INSTANCE_NAME=$(mktemp -u test-XXXXXX | tr A-Z a-z) -declare -r MACHINE=${MACHINE:-n1-standard-1} -declare -r ZONE=${ZONE:-us-central1-f} -declare -r SUDO=${SUDO:-false} - -# This script is executed as a test rule, which will reset the value of HOME. -# Unfortunately, it is needed to load the gconfig credentials. We will reset -# HOME when we actually execute in the remote environment, defined below. -export HOME=$(eval echo ~$(whoami)) - -# Generate unique keys for this test. -[[ -f "${KEYNAME}" ]] || ssh-keygen -t rsa -N "" -f "${KEYNAME}" -C "${USERNAME}" -cat > "${SSHKEYS}" </dev/null; then - success=$((${success}+1)) - fi -done -if [[ "${success}" -eq "0" ]]; then - echo "connect timed out after ${timeout} seconds." - exit 1 -fi - -# Copy the local directory over. -tar czf - --dereference --exclude=.git . | - gcloud compute ssh \ - --ssh-key-file="${KEYNAME}" \ - --zone "${ZONE}" \ - "${USERNAME}"@"${INSTANCE_NAME}" -- tar xzf - - -# Execute the command remotely. -for cmd; do - # Setup relevant environment. - # - # N.B. This is not a complete test environment, but is complete enough to - # provide rudimentary sharding and test output support. - declare -a PREFIX=( "env" ) - if [[ -v TEST_SHARD_INDEX ]]; then - PREFIX+=( "TEST_SHARD_INDEX=${TEST_SHARD_INDEX}" ) - fi - if [[ -v TEST_SHARD_STATUS_FILE ]]; then - SHARD_STATUS_FILE=$(mktemp -u test-shard-status-XXXXXX) - PREFIX+=( "TEST_SHARD_STATUS_FILE=/tmp/${SHARD_STATUS_FILE}" ) - fi - if [[ -v TEST_TOTAL_SHARDS ]]; then - PREFIX+=( "TEST_TOTAL_SHARDS=${TEST_TOTAL_SHARDS}" ) - fi - if [[ -v TEST_TMPDIR ]]; then - REMOTE_TMPDIR=$(mktemp -u test-XXXXXX) - PREFIX+=( "TEST_TMPDIR=/tmp/${REMOTE_TMPDIR}" ) - # Create remotely. - gcloud compute ssh \ - --ssh-key-file="${KEYNAME}" \ - --zone "${ZONE}" \ - "${USERNAME}"@"${INSTANCE_NAME}" -- \ - mkdir -p "/tmp/${REMOTE_TMPDIR}" - fi - if [[ -v XML_OUTPUT_FILE ]]; then - TEST_XML_OUTPUT=$(mktemp -u xml-output-XXXXXX) - PREFIX+=( "XML_OUTPUT_FILE=/tmp/${TEST_XML_OUTPUT}" ) - fi - if [[ "${SUDO}" == "true" ]]; then - PREFIX+=( "sudo" "-E" ) - fi - - # Execute the command. - gcloud compute ssh \ - --ssh-key-file="${KEYNAME}" \ - --zone "${ZONE}" \ - "${USERNAME}"@"${INSTANCE_NAME}" -- \ - "${PREFIX[@]}" "${cmd}" - - # Collect relevant results. - if [[ -v TEST_SHARD_STATUS_FILE ]]; then - gcloud compute scp \ - --ssh-key-file="${KEYNAME}" \ - --zone "${ZONE}" \ - "${USERNAME}"@"${INSTANCE_NAME}":/tmp/"${SHARD_STATUS_FILE}" \ - "${TEST_SHARD_STATUS_FILE}" 2>/dev/null || true # Allowed to fail. - fi - if [[ -v XML_OUTPUT_FILE ]]; then - gcloud compute scp \ - --ssh-key-file="${KEYNAME}" \ - --zone "${ZONE}" \ - "${USERNAME}"@"${INSTANCE_NAME}":/tmp/"${TEST_XML_OUTPUT}" \ - "${XML_OUTPUT_FILE}" 2>/dev/null || true # Allowed to fail. - fi - - # Clean up the temporary directory. - if [[ -v TEST_TMPDIR ]]; then - gcloud compute ssh \ - --ssh-key-file="${KEYNAME}" \ - --zone "${ZONE}" \ - "${USERNAME}"@"${INSTANCE_NAME}" -- \ - rm -rf "/tmp/${REMOTE_TMPDIR}" - fi -done diff --git a/tools/images/test.cc b/tools/images/test.cc deleted file mode 100644 index 4f31d93c5..000000000 --- a/tools/images/test.cc +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "gtest/gtest.h" - -namespace { - -TEST(Image, Sanity) { - // Do nothing. -} - -} // namespace diff --git a/tools/images/ubuntu1604/10_core.sh b/tools/images/ubuntu1604/10_core.sh deleted file mode 100755 index cd518d6ac..000000000 --- a/tools/images/ubuntu1604/10_core.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -# Copyright 2019 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -xeo pipefail - -# Install all essential build tools. -while true; do - if (apt-get update && apt-get install -y \ - make \ - git-core \ - build-essential \ - linux-headers-$(uname -r) \ - pkg-config); then - break - fi - result=$? - if [[ $result -ne 100 ]]; then - exit $result - fi -done - -# Install a recent go toolchain. -if ! [[ -d /usr/local/go ]]; then - wget https://dl.google.com/go/go1.13.5.linux-amd64.tar.gz - tar -xvf go1.13.5.linux-amd64.tar.gz - mv go /usr/local -fi - -# Link the Go binary from /usr/bin; replacing anything there. -(cd /usr/bin && rm -f go && sudo ln -fs /usr/local/go/bin/go go) diff --git a/tools/images/ubuntu1604/20_bazel.sh b/tools/images/ubuntu1604/20_bazel.sh deleted file mode 100755 index bb7afa676..000000000 --- a/tools/images/ubuntu1604/20_bazel.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash - -# Copyright 2019 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -xeo pipefail - -declare -r BAZEL_VERSION=2.0.0 - -# Install bazel dependencies. -while true; do - if (apt-get update && apt-get install -y \ - openjdk-8-jdk-headless \ - unzip); then - break - fi - result=$? - if [[ $result -ne 100 ]]; then - exit $result - fi -done - -# Use the release installer. -curl -L -o bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh -chmod a+x bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh -./bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh -rm -f bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh diff --git a/tools/images/ubuntu1604/25_docker.sh b/tools/images/ubuntu1604/25_docker.sh deleted file mode 100755 index 11eea2d72..000000000 --- a/tools/images/ubuntu1604/25_docker.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash - -# Copyright 2019 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Add dependencies. -while true; do - if (apt-get update && apt-get install -y \ - apt-transport-https \ - ca-certificates \ - curl \ - gnupg-agent \ - software-properties-common); then - break - fi - result=$? - if [[ $result -ne 100 ]]; then - exit $result - fi -done - -# Install the key. -curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - - -# Add the repository. -add-apt-repository \ - "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ - $(lsb_release -cs) \ - stable" - -# Install docker. -while true; do - if (apt-get update && apt-get install -y \ - docker-ce \ - docker-ce-cli \ - containerd.io); then - break - fi - result=$? - if [[ $result -ne 100 ]]; then - exit $result - fi -done diff --git a/tools/images/ubuntu1604/30_containerd.sh b/tools/images/ubuntu1604/30_containerd.sh deleted file mode 100755 index fb3699c12..000000000 --- a/tools/images/ubuntu1604/30_containerd.sh +++ /dev/null @@ -1,86 +0,0 @@ -#!/bin/bash - -# Copyright 2019 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -xeo pipefail - -# Helper for Go packages below. -install_helper() { - PACKAGE="${1}" - TAG="${2}" - GOPATH="${3}" - - # Clone the repository. - mkdir -p "${GOPATH}"/src/$(dirname "${PACKAGE}") && \ - git clone https://"${PACKAGE}" "${GOPATH}"/src/"${PACKAGE}" - - # Checkout and build the repository. - (cd "${GOPATH}"/src/"${PACKAGE}" && \ - git checkout "${TAG}" && \ - GOPATH="${GOPATH}" make && \ - GOPATH="${GOPATH}" make install) -} - -# Install dependencies for the crictl tests. -while true; do - if (apt-get update && apt-get install -y \ - btrfs-tools \ - libseccomp-dev); then - break - fi - result=$? - if [[ $result -ne 100 ]]; then - exit $result - fi -done - -# Install containerd & cri-tools. -GOPATH=$(mktemp -d --tmpdir gopathXXXXX) -install_helper github.com/containerd/containerd v1.2.2 "${GOPATH}" -install_helper github.com/kubernetes-sigs/cri-tools v1.11.0 "${GOPATH}" - -# Install gvisor-containerd-shim. -declare -r base="https://storage.googleapis.com/cri-containerd-staging/gvisor-containerd-shim" -declare -r latest=$(mktemp --tmpdir gvisor-containerd-shim-latest.XXXXXX) -declare -r shim_path=$(mktemp --tmpdir gvisor-containerd-shim.XXXXXX) -wget --no-verbose "${base}"/latest -O ${latest} -wget --no-verbose "${base}"/gvisor-containerd-shim-$(cat ${latest}) -O ${shim_path} -chmod +x ${shim_path} -mv ${shim_path} /usr/local/bin - -# Configure containerd-shim. -declare -r shim_config_path=/etc/containerd -declare -r shim_config_tmp_path=$(mktemp --tmpdir gvisor-containerd-shim.XXXXXX.toml) -mkdir -p ${shim_config_path} -cat > ${shim_config_tmp_path} <<-EOF - runc_shim = "/usr/local/bin/containerd-shim" - -[runsc_config] - debug = "true" - debug-log = "/tmp/runsc-logs/" - strace = "true" - file-access = "shared" -EOF -mv ${shim_config_tmp_path} ${shim_config_path} - -# Configure CNI. -(cd "${GOPATH}" && GOPATH="${GOPATH}" \ - src/github.com/containerd/containerd/script/setup/install-cni) - -# Cleanup the above. -rm -rf "${GOPATH}" -rm -rf "${latest}" -rm -rf "${shim_path}" -rm -rf "${shim_config_tmp_path}" diff --git a/tools/images/ubuntu1604/40_kokoro.sh b/tools/images/ubuntu1604/40_kokoro.sh deleted file mode 100755 index 06a1e6c48..000000000 --- a/tools/images/ubuntu1604/40_kokoro.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/bash - -# Copyright 2019 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -xeo pipefail - -# Declare kokoro's required public keys. -declare -r ssh_public_keys=( - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDg7L/ZaEauETWrPklUTky3kvxqQfe2Ax/2CsSqhNIGNMnK/8d79CHlmY9+dE1FFQ/RzKNCaltgy7XcN/fCYiCZr5jm2ZtnLuGNOTzupMNhaYiPL419qmL+5rZXt4/dWTrsHbFRACxT8j51PcRMO5wgbL0Bg2XXimbx8kDFaurL2gqduQYqlu4lxWCaJqOL71WogcimeL63Nq/yeH5PJPWpqE4P9VUQSwAzBWFK/hLeds/AiP3MgVS65qHBnhq0JsHy8JQsqjZbG7Iidt/Ll0+gqzEbi62gDIcczG4KC0iOVzDDP/1BxDtt1lKeA23ll769Fcm3rJyoBMYxjvdw1TDx sabujp@trigger.mtv.corp.google.com" - "ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBNgGK/hCdjmulHfRE3hp4rZs38NCR8yAh0eDsztxqGcuXnuSnL7jOlRrbcQpremJ84omD4eKrIpwJUs+YokMdv4= sabujp@trigger.svl.corp.google.com" -) - -# Install dependencies. -while true; do - if (apt-get update && apt-get install -y \ - rsync \ - coreutils \ - python-psutil \ - qemu-kvm \ - python-pip \ - python3-pip \ - zip); then - break - fi - result=$? - if [[ $result -ne 100 ]]; then - exit $result - fi -done - -# junitparser is used to merge junit xml files. -pip install junitparser - -# We need a kbuilder user. -if useradd -c "kbuilder user" -m -s /bin/bash kbuilder; then - # User was added successfully; we add the relevant SSH keys here. - mkdir -p ~kbuilder/.ssh - (IFS=$'\n'; echo "${ssh_public_keys[*]}") > ~kbuilder/.ssh/authorized_keys - chmod 0600 ~kbuilder/.ssh/authorized_keys - chown -R kbuilder ~kbuilder/.ssh -fi - -# Give passwordless sudo access. -cat > /etc/sudoers.d/kokoro </dev/null) +function cleanup { + (set -x; gcloud compute instances delete --quiet --zone "${ZONE}" "${INSTANCE_NAME}") +} +trap cleanup EXIT + +# Wait for the instance to become available (up to 5 minutes). +echo -n "Waiting for ${INSTANCE_NAME}" +declare timeout=300 +declare success=0 +declare internal="" +declare -r start=$(date +%s) +declare -r end=$((${start}+${timeout})) +while [[ "$(date +%s)" -lt "${end}" ]] && [[ "${success}" -lt 3 ]]; do + echo -n "." + if gcloud compute ssh --zone "${ZONE}" "${USERNAME}"@"${INSTANCE_NAME}" -- true 2>/dev/null; then + success=$((${success}+1)) + elif gcloud compute ssh --internal-ip --zone "${ZONE}" "${USERNAME}"@"${INSTANCE_NAME}" -- true 2>/dev/null; then + success=$((${success}+1)) + internal="--internal-ip" + fi +done + +if [[ "${success}" -eq "0" ]]; then + echo "connect timed out after ${timeout} seconds." + exit 1 +else + echo "done." +fi + +# Run the install scripts provided. +for arg; do + (set -x; gcloud compute ssh ${internal} \ + --zone "${ZONE}" \ + "${USERNAME}"@"${INSTANCE_NAME}" -- \ + "${SSH_ARGS[@]}" \ + sudo bash - <"${arg}" >/dev/null) +done + +# Stop the instance; required before creating an image. +(set -x; gcloud compute instances stop --quiet --zone "${ZONE}" "${INSTANCE_NAME}" >/dev/null) + +# Create a snapshot of the instance disk. +(set -x; gcloud compute disks snapshot \ + --quiet \ + --zone "${ZONE}" \ + --snapshot-names="${SNAPSHOT_NAME}" \ + "${INSTANCE_NAME}" >/dev/null) + +# Create the disk image. +(set -x; gcloud compute images create \ + --quiet \ + --source-snapshot="${SNAPSHOT_NAME}" \ + --licenses="https://www.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx" \ + "${IMAGE_NAME}" >/dev/null) + +# Finish up. +echo "${IMAGE_NAME}" diff --git a/tools/vm/defs.bzl b/tools/vm/defs.bzl new file mode 100644 index 000000000..24bf0aabc --- /dev/null +++ b/tools/vm/defs.bzl @@ -0,0 +1,198 @@ +"""Image configuration. See README.md.""" + +load("//tools:defs.bzl", "default_installer") + +# vm_image_builder is a rule that will construct a shell script that actually +# generates a given VM image. Note that this does not _run_ the shell script +# (although it can be run manually). It will be run manually during generation +# of the vm_image target itself. This level of indirection is used so that the +# build system itself only runs the builder once when multiple targets depend +# on it, avoiding a set of races and conflicts. +def _vm_image_builder_impl(ctx): + # Generate a binary that actually builds the image. + builder = ctx.actions.declare_file(ctx.label.name) + script_paths = [] + for script in ctx.files.scripts: + script_paths.append(script.short_path) + builder_content = "\n".join([ + "#!/bin/bash", + "export ZONE=$(%s)" % ctx.files.zone[0].short_path, + "export USERNAME=%s" % ctx.attr.username, + "export IMAGE_PROJECT=%s" % ctx.attr.project, + "export IMAGE_FAMILY=%s" % ctx.attr.family, + "%s %s" % (ctx.files._builder[0].short_path, " ".join(script_paths)), + "", + ]) + ctx.actions.write(builder, builder_content, is_executable = True) + + # Note that the scripts should only be files, and should not include any + # indirect transitive dependencies. The build script wouldn't work. + return [DefaultInfo( + executable = builder, + runfiles = ctx.runfiles( + files = ctx.files.scripts + ctx.files._builder + ctx.files.zone, + ), + )] + +vm_image_builder = rule( + attrs = { + "_builder": attr.label( + executable = True, + default = "//tools/vm:builder", + cfg = "host", + ), + "username": attr.string(default = "$(whoami)"), + "zone": attr.label( + executable = True, + default = "//tools/vm:zone", + cfg = "host", + ), + "family": attr.string(mandatory = True), + "project": attr.string(mandatory = True), + "scripts": attr.label_list(allow_files = True), + }, + executable = True, + implementation = _vm_image_builder_impl, +) + +# See vm_image_builder above. +def _vm_image_impl(ctx): + # Run the builder to generate our output. + echo = ctx.actions.declare_file(ctx.label.name) + resolved_inputs, argv, runfiles_manifests = ctx.resolve_command( + command = "echo -ne \"#!/bin/bash\\necho $(%s)\\n\" > %s && chmod 0755 %s" % ( + ctx.files.builder[0].path, + echo.path, + echo.path, + ), + tools = [ctx.attr.builder], + ) + ctx.actions.run_shell( + tools = resolved_inputs, + outputs = [echo], + progress_message = "Building image...", + execution_requirements = {"local": "true"}, + command = argv, + input_manifests = runfiles_manifests, + ) + + # Return just the echo command. All of the builder runfiles have been + # resolved and consumed in the generation of the trivial echo script. + return [DefaultInfo(executable = echo)] + +_vm_image_test = rule( + attrs = { + "builder": attr.label( + executable = True, + cfg = "host", + ), + }, + test = True, + implementation = _vm_image_impl, +) + +def vm_image(name, **kwargs): + vm_image_builder( + name = name + "_builder", + **kwargs + ) + _vm_image_test( + name = name, + builder = ":" + name + "_builder", + tags = [ + "local", + "manual", + ], + ) + +def _vm_test_impl(ctx): + runner = ctx.actions.declare_file("%s-executer" % ctx.label.name) + + # Note that the remote execution case must actually generate an + # intermediate target in order to collect all the relevant runfiles so that + # they can be copied over for remote execution. + runner_content = "\n".join([ + "#!/bin/bash", + "export ZONE=$(%s)" % ctx.files.zone[0].short_path, + "export USERNAME=%s" % ctx.attr.username, + "export IMAGE=$(%s)" % ctx.files.image[0].short_path, + "export SUDO=%s" % "true" if ctx.attr.sudo else "false", + "%s %s" % ( + ctx.executable.executer.short_path, + " ".join([ + target.files_to_run.executable.short_path + for target in ctx.attr.targets + ]), + ), + "", + ]) + ctx.actions.write(runner, runner_content, is_executable = True) + + # Return with all transitive files. + runfiles = ctx.runfiles( + transitive_files = depset(transitive = [ + depset(target.data_runfiles.files) + for target in ctx.attr.targets + if hasattr(target, "data_runfiles") + ]), + files = ctx.files.executer + ctx.files.zone + ctx.files.image + + ctx.files.targets, + collect_default = True, + collect_data = True, + ) + return [DefaultInfo(executable = runner, runfiles = runfiles)] + +_vm_test = rule( + attrs = { + "image": attr.label( + executable = True, + default = "//tools/vm:ubuntu1804", + cfg = "host", + ), + "executer": attr.label( + executable = True, + default = "//tools/vm:executer", + cfg = "host", + ), + "username": attr.string(default = "$(whoami)"), + "zone": attr.label( + executable = True, + default = "//tools/vm:zone", + cfg = "host", + ), + "sudo": attr.bool(default = True), + "machine": attr.string(default = "n1-standard-1"), + "targets": attr.label_list( + mandatory = True, + allow_empty = False, + cfg = "target", + ), + }, + test = True, + implementation = _vm_test_impl, +) + +def vm_test( + installers = None, + **kwargs): + """Runs the given targets as a remote test. + + Args: + installer: Script to run before all targets. + **kwargs: All test arguments. Should include targets and image. + """ + targets = kwargs.pop("targets", []) + if installers == None: + installers = ["//tools/installers:head"] + targets = installers + targets + if default_installer(): + targets = [default_installer()] + targets + _vm_test( + tags = [ + "local", + "manual", + ], + targets = targets, + local = 1, + **kwargs + ) diff --git a/tools/vm/execute.sh b/tools/vm/execute.sh new file mode 100755 index 000000000..1f1f3ce01 --- /dev/null +++ b/tools/vm/execute.sh @@ -0,0 +1,160 @@ +#!/bin/bash + +# Copyright 2019 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -xeo pipefail + +# Required input. +if ! [[ -v IMAGE ]]; then + echo "no image provided: set IMAGE." + exit 1 +fi + +# Parameters. +declare -r USERNAME=${USERNAME:-test} +declare -r KEYNAME=$(mktemp --tmpdir -u key-XXXXXX) +declare -r SSHKEYS=$(mktemp --tmpdir -u sshkeys-XXXXXX) +declare -r INSTANCE_NAME=$(mktemp -u test-XXXXXX | tr A-Z a-z) +declare -r MACHINE=${MACHINE:-n1-standard-1} +declare -r ZONE=${ZONE:-us-central1-f} +declare -r SUDO=${SUDO:-false} + +# Standard arguments (applies only on script execution). +declare -ar SSH_ARGS=("-o" "ConnectTimeout=60" "--") + +# This script is executed as a test rule, which will reset the value of HOME. +# Unfortunately, it is needed to load the gconfig credentials. We will reset +# HOME when we actually execute in the remote environment, defined below. +export HOME=$(eval echo ~$(whoami)) + +# Generate unique keys for this test. +[[ -f "${KEYNAME}" ]] || ssh-keygen -t rsa -N "" -f "${KEYNAME}" -C "${USERNAME}" +cat > "${SSHKEYS}" </dev/null; then + success=$((${success}+1)) + fi +done +if [[ "${success}" -eq "0" ]]; then + echo "connect timed out after ${timeout} seconds." + exit 1 +fi + +# Copy the local directory over. +tar czf - --dereference --exclude=.git . | + gcloud compute ssh \ + --ssh-key-file="${KEYNAME}" \ + --zone "${ZONE}" \ + "${USERNAME}"@"${INSTANCE_NAME}" -- \ + "${SSH_ARGS[@]}" \ + tar xzf - + +# Execute the command remotely. +for cmd; do + # Setup relevant environment. + # + # N.B. This is not a complete test environment, but is complete enough to + # provide rudimentary sharding and test output support. + declare -a PREFIX=( "env" ) + if [[ -v TEST_SHARD_INDEX ]]; then + PREFIX+=( "TEST_SHARD_INDEX=${TEST_SHARD_INDEX}" ) + fi + if [[ -v TEST_SHARD_STATUS_FILE ]]; then + SHARD_STATUS_FILE=$(mktemp -u test-shard-status-XXXXXX) + PREFIX+=( "TEST_SHARD_STATUS_FILE=/tmp/${SHARD_STATUS_FILE}" ) + fi + if [[ -v TEST_TOTAL_SHARDS ]]; then + PREFIX+=( "TEST_TOTAL_SHARDS=${TEST_TOTAL_SHARDS}" ) + fi + if [[ -v TEST_TMPDIR ]]; then + REMOTE_TMPDIR=$(mktemp -u test-XXXXXX) + PREFIX+=( "TEST_TMPDIR=/tmp/${REMOTE_TMPDIR}" ) + # Create remotely. + gcloud compute ssh \ + --ssh-key-file="${KEYNAME}" \ + --zone "${ZONE}" \ + "${USERNAME}"@"${INSTANCE_NAME}" -- \ + "${SSH_ARGS[@]}" \ + mkdir -p "/tmp/${REMOTE_TMPDIR}" + fi + if [[ -v XML_OUTPUT_FILE ]]; then + TEST_XML_OUTPUT=$(mktemp -u xml-output-XXXXXX) + PREFIX+=( "XML_OUTPUT_FILE=/tmp/${TEST_XML_OUTPUT}" ) + fi + if [[ "${SUDO}" == "true" ]]; then + PREFIX+=( "sudo" "-E" ) + fi + + # Execute the command. + gcloud compute ssh \ + --ssh-key-file="${KEYNAME}" \ + --zone "${ZONE}" \ + "${USERNAME}"@"${INSTANCE_NAME}" -- \ + "${SSH_ARGS[@]}" \ + "${PREFIX[@]}" "${cmd}" + + # Collect relevant results. + if [[ -v TEST_SHARD_STATUS_FILE ]]; then + gcloud compute scp \ + --ssh-key-file="${KEYNAME}" \ + --zone "${ZONE}" \ + "${USERNAME}"@"${INSTANCE_NAME}":/tmp/"${SHARD_STATUS_FILE}" \ + "${TEST_SHARD_STATUS_FILE}" 2>/dev/null || true # Allowed to fail. + fi + if [[ -v XML_OUTPUT_FILE ]]; then + gcloud compute scp \ + --ssh-key-file="${KEYNAME}" \ + --zone "${ZONE}" \ + "${USERNAME}"@"${INSTANCE_NAME}":/tmp/"${TEST_XML_OUTPUT}" \ + "${XML_OUTPUT_FILE}" 2>/dev/null || true # Allowed to fail. + fi + + # Clean up the temporary directory. + if [[ -v TEST_TMPDIR ]]; then + gcloud compute ssh \ + --ssh-key-file="${KEYNAME}" \ + --zone "${ZONE}" \ + "${USERNAME}"@"${INSTANCE_NAME}" -- \ + "${SSH_ARGS[@]}" \ + rm -rf "/tmp/${REMOTE_TMPDIR}" + fi +done diff --git a/tools/vm/test.cc b/tools/vm/test.cc new file mode 100644 index 000000000..c0ceacda1 --- /dev/null +++ b/tools/vm/test.cc @@ -0,0 +1,27 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" + +namespace { + +TEST(Image, Sanity0) { + // Do nothing (in shard 0). +} + +TEST(Image, Sanity1) { + // Do nothing (in shard 1). +} + +} // namespace diff --git a/tools/vm/ubuntu1604/10_core.sh b/tools/vm/ubuntu1604/10_core.sh new file mode 100755 index 000000000..cd518d6ac --- /dev/null +++ b/tools/vm/ubuntu1604/10_core.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +# Copyright 2019 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -xeo pipefail + +# Install all essential build tools. +while true; do + if (apt-get update && apt-get install -y \ + make \ + git-core \ + build-essential \ + linux-headers-$(uname -r) \ + pkg-config); then + break + fi + result=$? + if [[ $result -ne 100 ]]; then + exit $result + fi +done + +# Install a recent go toolchain. +if ! [[ -d /usr/local/go ]]; then + wget https://dl.google.com/go/go1.13.5.linux-amd64.tar.gz + tar -xvf go1.13.5.linux-amd64.tar.gz + mv go /usr/local +fi + +# Link the Go binary from /usr/bin; replacing anything there. +(cd /usr/bin && rm -f go && sudo ln -fs /usr/local/go/bin/go go) diff --git a/tools/vm/ubuntu1604/20_bazel.sh b/tools/vm/ubuntu1604/20_bazel.sh new file mode 100755 index 000000000..bb7afa676 --- /dev/null +++ b/tools/vm/ubuntu1604/20_bazel.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# Copyright 2019 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -xeo pipefail + +declare -r BAZEL_VERSION=2.0.0 + +# Install bazel dependencies. +while true; do + if (apt-get update && apt-get install -y \ + openjdk-8-jdk-headless \ + unzip); then + break + fi + result=$? + if [[ $result -ne 100 ]]; then + exit $result + fi +done + +# Use the release installer. +curl -L -o bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh +chmod a+x bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh +./bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh +rm -f bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh diff --git a/tools/vm/ubuntu1604/25_docker.sh b/tools/vm/ubuntu1604/25_docker.sh new file mode 100755 index 000000000..11eea2d72 --- /dev/null +++ b/tools/vm/ubuntu1604/25_docker.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +# Copyright 2019 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Add dependencies. +while true; do + if (apt-get update && apt-get install -y \ + apt-transport-https \ + ca-certificates \ + curl \ + gnupg-agent \ + software-properties-common); then + break + fi + result=$? + if [[ $result -ne 100 ]]; then + exit $result + fi +done + +# Install the key. +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - + +# Add the repository. +add-apt-repository \ + "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ + $(lsb_release -cs) \ + stable" + +# Install docker. +while true; do + if (apt-get update && apt-get install -y \ + docker-ce \ + docker-ce-cli \ + containerd.io); then + break + fi + result=$? + if [[ $result -ne 100 ]]; then + exit $result + fi +done diff --git a/tools/vm/ubuntu1604/30_containerd.sh b/tools/vm/ubuntu1604/30_containerd.sh new file mode 100755 index 000000000..fb3699c12 --- /dev/null +++ b/tools/vm/ubuntu1604/30_containerd.sh @@ -0,0 +1,86 @@ +#!/bin/bash + +# Copyright 2019 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -xeo pipefail + +# Helper for Go packages below. +install_helper() { + PACKAGE="${1}" + TAG="${2}" + GOPATH="${3}" + + # Clone the repository. + mkdir -p "${GOPATH}"/src/$(dirname "${PACKAGE}") && \ + git clone https://"${PACKAGE}" "${GOPATH}"/src/"${PACKAGE}" + + # Checkout and build the repository. + (cd "${GOPATH}"/src/"${PACKAGE}" && \ + git checkout "${TAG}" && \ + GOPATH="${GOPATH}" make && \ + GOPATH="${GOPATH}" make install) +} + +# Install dependencies for the crictl tests. +while true; do + if (apt-get update && apt-get install -y \ + btrfs-tools \ + libseccomp-dev); then + break + fi + result=$? + if [[ $result -ne 100 ]]; then + exit $result + fi +done + +# Install containerd & cri-tools. +GOPATH=$(mktemp -d --tmpdir gopathXXXXX) +install_helper github.com/containerd/containerd v1.2.2 "${GOPATH}" +install_helper github.com/kubernetes-sigs/cri-tools v1.11.0 "${GOPATH}" + +# Install gvisor-containerd-shim. +declare -r base="https://storage.googleapis.com/cri-containerd-staging/gvisor-containerd-shim" +declare -r latest=$(mktemp --tmpdir gvisor-containerd-shim-latest.XXXXXX) +declare -r shim_path=$(mktemp --tmpdir gvisor-containerd-shim.XXXXXX) +wget --no-verbose "${base}"/latest -O ${latest} +wget --no-verbose "${base}"/gvisor-containerd-shim-$(cat ${latest}) -O ${shim_path} +chmod +x ${shim_path} +mv ${shim_path} /usr/local/bin + +# Configure containerd-shim. +declare -r shim_config_path=/etc/containerd +declare -r shim_config_tmp_path=$(mktemp --tmpdir gvisor-containerd-shim.XXXXXX.toml) +mkdir -p ${shim_config_path} +cat > ${shim_config_tmp_path} <<-EOF + runc_shim = "/usr/local/bin/containerd-shim" + +[runsc_config] + debug = "true" + debug-log = "/tmp/runsc-logs/" + strace = "true" + file-access = "shared" +EOF +mv ${shim_config_tmp_path} ${shim_config_path} + +# Configure CNI. +(cd "${GOPATH}" && GOPATH="${GOPATH}" \ + src/github.com/containerd/containerd/script/setup/install-cni) + +# Cleanup the above. +rm -rf "${GOPATH}" +rm -rf "${latest}" +rm -rf "${shim_path}" +rm -rf "${shim_config_tmp_path}" diff --git a/tools/vm/ubuntu1604/40_kokoro.sh b/tools/vm/ubuntu1604/40_kokoro.sh new file mode 100755 index 000000000..06a1e6c48 --- /dev/null +++ b/tools/vm/ubuntu1604/40_kokoro.sh @@ -0,0 +1,72 @@ +#!/bin/bash + +# Copyright 2019 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -xeo pipefail + +# Declare kokoro's required public keys. +declare -r ssh_public_keys=( + "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDg7L/ZaEauETWrPklUTky3kvxqQfe2Ax/2CsSqhNIGNMnK/8d79CHlmY9+dE1FFQ/RzKNCaltgy7XcN/fCYiCZr5jm2ZtnLuGNOTzupMNhaYiPL419qmL+5rZXt4/dWTrsHbFRACxT8j51PcRMO5wgbL0Bg2XXimbx8kDFaurL2gqduQYqlu4lxWCaJqOL71WogcimeL63Nq/yeH5PJPWpqE4P9VUQSwAzBWFK/hLeds/AiP3MgVS65qHBnhq0JsHy8JQsqjZbG7Iidt/Ll0+gqzEbi62gDIcczG4KC0iOVzDDP/1BxDtt1lKeA23ll769Fcm3rJyoBMYxjvdw1TDx sabujp@trigger.mtv.corp.google.com" + "ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBNgGK/hCdjmulHfRE3hp4rZs38NCR8yAh0eDsztxqGcuXnuSnL7jOlRrbcQpremJ84omD4eKrIpwJUs+YokMdv4= sabujp@trigger.svl.corp.google.com" +) + +# Install dependencies. +while true; do + if (apt-get update && apt-get install -y \ + rsync \ + coreutils \ + python-psutil \ + qemu-kvm \ + python-pip \ + python3-pip \ + zip); then + break + fi + result=$? + if [[ $result -ne 100 ]]; then + exit $result + fi +done + +# junitparser is used to merge junit xml files. +pip install junitparser + +# We need a kbuilder user. +if useradd -c "kbuilder user" -m -s /bin/bash kbuilder; then + # User was added successfully; we add the relevant SSH keys here. + mkdir -p ~kbuilder/.ssh + (IFS=$'\n'; echo "${ssh_public_keys[*]}") > ~kbuilder/.ssh/authorized_keys + chmod 0600 ~kbuilder/.ssh/authorized_keys + chown -R kbuilder ~kbuilder/.ssh +fi + +# Give passwordless sudo access. +cat > /etc/sudoers.d/kokoro < Date: Tue, 21 Apr 2020 09:41:40 -0700 Subject: Internal change. PiperOrigin-RevId: 307622320 --- test/runner/defs.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/runner/defs.bzl b/test/runner/defs.bzl index 56743a526..0a75b158f 100644 --- a/test/runner/defs.bzl +++ b/test/runner/defs.bzl @@ -23,7 +23,7 @@ def _runner_test_impl(ctx): # Return with all transitive files. runfiles = ctx.runfiles( transitive_files = depset(transitive = [ - depset(target.data_runfiles.files) + target.data_runfiles.files for target in (ctx.attr.runner, ctx.attr.test) if hasattr(target, "data_runfiles") ]), -- cgit v1.2.3 From 639c8dd80870133f61465588e717b725417a0c41 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Tue, 21 Apr 2020 10:56:04 -0700 Subject: Restore euid upon test finish PiperOrigin-RevId: 307638329 --- test/syscalls/linux/uidgid.cc | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/uidgid.cc b/test/syscalls/linux/uidgid.cc index ff66a79f4..64d6d0b8f 100644 --- a/test/syscalls/linux/uidgid.cc +++ b/test/syscalls/linux/uidgid.cc @@ -253,12 +253,21 @@ TEST(UidGidRootTest, Setgroups) { TEST(UidGidRootTest, Setuid_prlimit) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsRoot())); - // Change our UID. - EXPECT_THAT(seteuid(65534), SyscallSucceeds()); + // Do seteuid in a separate thread so that after finishing this test, the + // process can still open files the test harness created before starting this + // test. Otherwise, the files are created by root (UID before the test), but + // cannot be opened by the `uid` set below after the test. + ScopedThread([&] { + // Use syscall instead of glibc setuid wrapper because we want this seteuid + // call to only apply to this task. POSIX threads, however, require that all + // threads have the same UIDs, so using the seteuid wrapper sets all + // threads' UID. + EXPECT_THAT(syscall(SYS_setreuid, -1, 65534), SyscallSucceeds()); - // Despite the UID change, we should be able to get our own limits. - struct rlimit rl = {}; - ASSERT_THAT(prlimit(0, RLIMIT_NOFILE, NULL, &rl), SyscallSucceeds()); + // Despite the UID change, we should be able to get our own limits. + struct rlimit rl = {}; + EXPECT_THAT(prlimit(0, RLIMIT_NOFILE, NULL, &rl), SyscallSucceeds()); + }); } } // namespace -- cgit v1.2.3 From 0e013d8b00dbc3ad96e98bc0405ec2e21887308e Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Tue, 21 Apr 2020 16:54:08 -0700 Subject: Don't ignore override if it is longer than layerStates PiperOrigin-RevId: 307708653 --- test/packetimpact/testbench/connections.go | 33 +++++++++++++++----- test/packetimpact/testbench/layers_test.go | 50 ++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 8 deletions(-) (limited to 'test') diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index f84fd8ba7..00a366894 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -363,16 +363,33 @@ type Connection struct { // reverse is never a match. override overrides the default matchers for each // Layer. func (conn *Connection) match(override, received Layers) bool { - if len(received) < len(conn.layerStates) { + var layersToMatch int + if len(override) < len(conn.layerStates) { + layersToMatch = len(conn.layerStates) + } else { + layersToMatch = len(override) + } + if len(received) < layersToMatch { return false } - for i, s := range conn.layerStates { - toMatch := s.incoming(received[i]) - if toMatch == nil { - return false - } - if i < len(override) { - toMatch.merge(override[i]) + for i := 0; i < layersToMatch; i++ { + var toMatch Layer + if i < len(conn.layerStates) { + s := conn.layerStates[i] + toMatch = s.incoming(received[i]) + if toMatch == nil { + return false + } + if i < len(override) { + if err := toMatch.merge(override[i]); err != nil { + conn.t.Fatalf("failed to merge: %s", err) + } + } + } else { + toMatch = override[i] + if toMatch == nil { + conn.t.Fatalf("expect the overriding layers to be non-nil") + } } if !toMatch.match(received[i]) { return false diff --git a/test/packetimpact/testbench/layers_test.go b/test/packetimpact/testbench/layers_test.go index b32efda93..c99cf6312 100644 --- a/test/packetimpact/testbench/layers_test.go +++ b/test/packetimpact/testbench/layers_test.go @@ -154,3 +154,53 @@ func TestLayerStringFormat(t *testing.T) { }) } } + +func TestConnectionMatch(t *testing.T) { + conn := Connection{ + layerStates: []layerState{ðerState{}}, + } + protoNum0 := tcpip.NetworkProtocolNumber(0) + protoNum1 := tcpip.NetworkProtocolNumber(1) + for _, tt := range []struct { + description string + override, received Layers + wantMatch bool + }{ + { + description: "shorter override", + override: []Layer{&Ether{}}, + received: []Layer{&Ether{}, &Payload{Bytes: []byte("hello")}}, + wantMatch: true, + }, + { + description: "longer override", + override: []Layer{&Ether{}, &Payload{Bytes: []byte("hello")}}, + received: []Layer{&Ether{}}, + wantMatch: false, + }, + { + description: "ether layer mismatch", + override: []Layer{&Ether{Type: &protoNum0}}, + received: []Layer{&Ether{Type: &protoNum1}}, + wantMatch: false, + }, + { + description: "both nil", + override: nil, + received: nil, + wantMatch: false, + }, + { + description: "nil override", + override: nil, + received: []Layer{&Ether{}}, + wantMatch: true, + }, + } { + t.Run(tt.description, func(t *testing.T) { + if gotMatch := conn.match(tt.override, tt.received); gotMatch != tt.wantMatch { + t.Fatalf("conn.match(%s, %s) = %t, want %t", tt.override, tt.received, gotMatch, tt.wantMatch) + } + }) + } +} -- cgit v1.2.3 From 6d23673e10bca2fb573809ff78506fc0566817dd Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Wed, 22 Apr 2020 07:27:34 -0700 Subject: Add comments about deepcopy in Layer.incoming() PiperOrigin-RevId: 307812340 --- test/packetimpact/testbench/connections.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index 00a366894..952a717e0 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -72,7 +72,8 @@ type layerState interface { // incoming creates an expected Layer for comparing against a received Layer. // Because the expectation can depend on values in the received Layer, it is // an input to incoming. For example, the ACK number needs to be checked in a - // TCP packet but only if the ACK flag is set in the received packet. + // TCP packet but only if the ACK flag is set in the received packet. The + // calles takes ownership of the returned Layer. incoming(received Layer) Layer // sent updates the layerState based on the Layer that was sent. The input is @@ -124,6 +125,7 @@ func (s *etherState) outgoing() Layer { return &s.out } +// incoming implements layerState.incoming. func (s *etherState) incoming(Layer) Layer { return deepcopy.Copy(&s.in).(Layer) } @@ -168,6 +170,7 @@ func (s *ipv4State) outgoing() Layer { return &s.out } +// incoming implements layerState.incoming. func (s *ipv4State) incoming(Layer) Layer { return deepcopy.Copy(&s.in).(Layer) } @@ -234,6 +237,7 @@ func (s *tcpState) outgoing() Layer { return &newOutgoing } +// incoming implements layerState.incoming. func (s *tcpState) incoming(received Layer) Layer { tcpReceived, ok := received.(*TCP) if !ok { @@ -328,6 +332,7 @@ func (s *udpState) outgoing() Layer { return &s.out } +// incoming implements layerState.incoming. func (s *udpState) incoming(Layer) Layer { return deepcopy.Copy(&s.in).(Layer) } -- cgit v1.2.3 From 37f863f62813f76b05979494c1bc2fe102629321 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 22 Apr 2020 14:15:33 -0700 Subject: tcp: handle listen after shutdown properly Right now, sentry panics in this case: panic: close of nil channel goroutine 67 [running]: pkg/tcpip/transport/tcp/tcp.(*endpoint).listen(0xc0000ce000, 0x9, 0x0) pkg/tcpip/transport/tcp/endpoint.go:2208 +0x170 pkg/tcpip/transport/tcp/tcp.(*endpoint).Listen(0xc0000ce000, 0x9, 0xc0003a1ad0) pkg/tcpip/transport/tcp/endpoint.go:2179 +0x50 Fixes #2468 PiperOrigin-RevId: 307896725 --- pkg/tcpip/transport/tcp/endpoint.go | 43 +++++++++++++++-------------- pkg/tcpip/transport/tcp/endpoint_state.go | 5 ++++ test/syscalls/linux/socket_inet_loopback.cc | 43 +++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 20 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 45f2aa78b..07d3e64c8 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -2158,8 +2158,6 @@ func (e *endpoint) shutdownLocked(flags tcpip.ShutdownFlags) *tcpip.Error { // // By not removing this endpoint from the demuxer mapping, we // ensure that any other bind to the same port fails, as on Linux. - // TODO(gvisor.dev/issue/2468): We need to enable applications to - // start listening on this endpoint again similar to Linux. e.rcvListMu.Lock() e.rcvClosed = true e.rcvListMu.Unlock() @@ -2188,26 +2186,31 @@ func (e *endpoint) listen(backlog int) *tcpip.Error { e.LockUser() defer e.UnlockUser() - // Allow the backlog to be adjusted if the endpoint is not shutting down. - // When the endpoint shuts down, it sets workerCleanup to true, and from - // that point onward, acceptedChan is the responsibility of the cleanup() - // method (and should not be touched anywhere else, including here). - if e.EndpointState() == StateListen && !e.workerCleanup { - // Adjust the size of the channel iff we can fix existing - // pending connections into the new one. + if e.EndpointState() == StateListen && !e.closed { e.acceptMu.Lock() defer e.acceptMu.Unlock() - if len(e.acceptedChan) > backlog { - return tcpip.ErrInvalidEndpointState - } - if cap(e.acceptedChan) == backlog { - return nil - } - origChan := e.acceptedChan - e.acceptedChan = make(chan *endpoint, backlog) - close(origChan) - for ep := range origChan { - e.acceptedChan <- ep + if e.acceptedChan == nil { + // listen is called after shutdown. + e.acceptedChan = make(chan *endpoint, backlog) + e.shutdownFlags = 0 + e.rcvListMu.Lock() + e.rcvClosed = false + e.rcvListMu.Unlock() + } else { + // Adjust the size of the channel iff we can fix + // existing pending connections into the new one. + if len(e.acceptedChan) > backlog { + return tcpip.ErrInvalidEndpointState + } + if cap(e.acceptedChan) == backlog { + return nil + } + origChan := e.acceptedChan + e.acceptedChan = make(chan *endpoint, backlog) + close(origChan) + for ep := range origChan { + e.acceptedChan <- ep + } } // Notify any blocked goroutines that they can attempt to diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go index c3c692555..8b7562396 100644 --- a/pkg/tcpip/transport/tcp/endpoint_state.go +++ b/pkg/tcpip/transport/tcp/endpoint_state.go @@ -247,6 +247,11 @@ func (e *endpoint) Resume(s *stack.Stack) { if err := e.Listen(backlog); err != nil { panic("endpoint listening failed: " + err.String()) } + e.LockUser() + if e.shutdownFlags != 0 { + e.shutdownLocked(e.shutdownFlags) + } + e.UnlockUser() listenLoading.Done() tcpip.AsyncLoading.Done() }() diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index d3000dbc6..9400ffaeb 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -319,6 +319,49 @@ TEST_P(SocketInetLoopbackTest, TCPListenUnbound) { tcpSimpleConnectTest(listener, connector, false); } +TEST_P(SocketInetLoopbackTest, TCPListenShutdownListen) { + const auto& param = GetParam(); + + const TestAddress& listener = param.listener; + const TestAddress& connector = param.connector; + + constexpr int kBacklog = 5; + + // Create the listening socket. + FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + + ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds()); + ASSERT_THAT(shutdown(listen_fd.get(), SHUT_RD), SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast(&listen_addr), &addrlen), + SyscallSucceeds()); + const uint16_t port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + + for (int i = 0; i < kBacklog; i++) { + auto client = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + ASSERT_THAT(connect(client.get(), reinterpret_cast(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + } + for (int i = 0; i < kBacklog; i++) { + ASSERT_THAT(accept(listen_fd.get(), nullptr, nullptr), SyscallSucceeds()); + } +} + TEST_P(SocketInetLoopbackTest, TCPListenShutdown) { auto const& param = GetParam(); -- cgit v1.2.3 From a2925a079fa04ff4c891016a0eea1818bdb2cf4b Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Thu, 23 Apr 2020 08:34:42 -0700 Subject: Run failing packetimpact test and expect failure. This will make it easier to notice if a code change causes an existing test to pass. PiperOrigin-RevId: 308057978 --- test/packetimpact/tests/defs.bzl | 35 ++++++++++++++++++++++++++-------- test/packetimpact/tests/test_runner.sh | 19 +++++++++++++++--- 2 files changed, 43 insertions(+), 11 deletions(-) (limited to 'test') diff --git a/test/packetimpact/tests/defs.bzl b/test/packetimpact/tests/defs.bzl index 8c0d058b2..27c5de375 100644 --- a/test/packetimpact/tests/defs.bzl +++ b/test/packetimpact/tests/defs.bzl @@ -59,7 +59,11 @@ _packetimpact_test = rule( PACKETIMPACT_TAGS = ["local", "manual"] -def packetimpact_linux_test(name, testbench_binary, **kwargs): +def packetimpact_linux_test( + name, + testbench_binary, + expect_failure = False, + **kwargs): """Add a packetimpact test on linux. Args: @@ -67,28 +71,37 @@ def packetimpact_linux_test(name, testbench_binary, **kwargs): testbench_binary: the testbench binary **kwargs: all the other args, forwarded to _packetimpact_test """ + expect_failure_flag = ["--expect_failure"] if expect_failure else [] _packetimpact_test( name = name + "_linux_test", testbench_binary = testbench_binary, - flags = ["--dut_platform", "linux"], + flags = ["--dut_platform", "linux"] + expect_failure_flag, tags = PACKETIMPACT_TAGS + ["packetimpact"], **kwargs ) -def packetimpact_netstack_test(name, testbench_binary, **kwargs): +def packetimpact_netstack_test( + name, + testbench_binary, + expect_failure = False, + **kwargs): """Add a packetimpact test on netstack. Args: name: name of the test testbench_binary: the testbench binary + expect_failure: the test must fail **kwargs: all the other args, forwarded to _packetimpact_test """ + expect_failure_flag = [] + if expect_failure: + expect_failure_flag = ["--expect_failure"] _packetimpact_test( name = name + "_netstack_test", testbench_binary = testbench_binary, # This is the default runtime unless # "--test_arg=--runtime=OTHER_RUNTIME" is used to override the value. - flags = ["--dut_platform", "netstack", "--runtime=runsc-d"], + flags = ["--dut_platform", "netstack", "--runtime=runsc-d"] + expect_failure_flag, tags = PACKETIMPACT_TAGS + ["packetimpact"], **kwargs ) @@ -112,7 +125,13 @@ def packetimpact_go_test(name, size = "small", pure = True, linux = True, netsta tags = PACKETIMPACT_TAGS, **kwargs ) - if linux: - packetimpact_linux_test(name = name, testbench_binary = testbench_binary) - if netstack: - packetimpact_netstack_test(name = name, testbench_binary = testbench_binary) + packetimpact_linux_test( + name = name, + expect_failure = not linux, + testbench_binary = testbench_binary, + ) + packetimpact_netstack_test( + name = name, + expect_failure = not netstack, + testbench_binary = testbench_binary, + ) diff --git a/test/packetimpact/tests/test_runner.sh b/test/packetimpact/tests/test_runner.sh index e99fc7d09..2be3c17c3 100755 --- a/test/packetimpact/tests/test_runner.sh +++ b/test/packetimpact/tests/test_runner.sh @@ -29,7 +29,7 @@ function failure() { } trap 'failure ${LINENO} "$BASH_COMMAND"' ERR -declare -r LONGOPTS="dut_platform:,posix_server_binary:,testbench_binary:,runtime:,tshark,extra_test_arg:" +declare -r LONGOPTS="dut_platform:,posix_server_binary:,testbench_binary:,runtime:,tshark,extra_test_arg:,expect_failure" # Don't use declare below so that the error from getopt will end the script. PARSED=$(getopt --options "" --longoptions=$LONGOPTS --name "$0" -- "$@") @@ -68,6 +68,10 @@ while true; do EXTRA_TEST_ARGS+="$2" shift 2 ;; + --expect_failure) + declare -r EXPECT_FAILURE="1" + shift 1 + ;; --) shift break @@ -263,6 +267,15 @@ docker exec -t "${TESTBENCH}" \ --local_ipv4=${TEST_NET_PREFIX}${TESTBENCH_NET_SUFFIX} \ --remote_mac=${REMOTE_MAC} \ --local_mac=${LOCAL_MAC} \ - --device=${TEST_DEVICE}" - + --device=${TEST_DEVICE}" && true +declare -r TEST_RESULT="${?}" +if [[ -z "${EXPECT_FAILURE-}" && "${TEST_RESULT}" != 0 ]]; then + echo 'FAIL: This test was expected to pass.' + exit ${TEST_RESULT} +fi +if [[ ! -z "${EXPECT_FAILURE-}" && "${TEST_RESULT}" == 0 ]]; then + echo 'FAIL: This test was expected to fail but passed. Enable the test and' \ + 'mark the corresponding bug as fixed.' + exit 1 +fi echo PASS: No errors. -- cgit v1.2.3 From 1481499fe27157ad2716c00682f6ad819115a6c7 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Thu, 23 Apr 2020 11:32:08 -0700 Subject: Simplify Docker test infrastructure. This change adds a layer of abstraction around the internal Docker APIs, and eliminates all direct dependencies on Dockerfiles in the infrastructure. A subsequent change will automated the generation of local images (with efficient caching). Note that this change drops the use of bazel container rules, as that experiment does not seem to be viable. PiperOrigin-RevId: 308095430 --- WORKSPACE | 39 - pkg/sentry/fsimpl/ext/BUILD | 2 +- pkg/sentry/fsimpl/ext/ext_test.go | 3 +- pkg/tcpip/transport/tcp/BUILD | 2 +- pkg/tcpip/transport/tcp/tcp_noracedetector_test.go | 2 +- pkg/test/criutil/BUILD | 14 + pkg/test/criutil/criutil.go | 306 +++ pkg/test/dockerutil/BUILD | 14 + pkg/test/dockerutil/dockerutil.go | 581 ++++++ pkg/test/testutil/BUILD | 20 + pkg/test/testutil/testutil.go | 550 ++++++ pkg/test/testutil/testutil_runfiles.go | 75 + runsc/boot/BUILD | 1 + runsc/cmd/BUILD | 2 +- runsc/cmd/capability_test.go | 9 +- runsc/container/BUILD | 6 +- runsc/container/console_test.go | 115 +- runsc/container/container.go | 2 +- runsc/container/container_norace_test.go | 20 + runsc/container/container_race_test.go | 20 + runsc/container/container_test.go | 2046 ++++++++++---------- runsc/container/multi_container_test.go | 1161 +++++------ runsc/container/shared_volume_test.go | 18 +- runsc/container/test_app/BUILD | 21 - runsc/container/test_app/fds.go | 185 -- runsc/container/test_app/test_app.go | 394 ---- runsc/criutil/BUILD | 11 - runsc/criutil/criutil.go | 277 --- runsc/dockerutil/BUILD | 14 - runsc/dockerutil/dockerutil.go | 486 ----- runsc/testutil/BUILD | 21 - runsc/testutil/testutil.go | 433 ----- runsc/testutil/testutil_runfiles.go | 75 - scripts/iptables_tests.sh | 13 +- test/cmd/test_app/BUILD | 21 + test/cmd/test_app/fds.go | 185 ++ test/cmd/test_app/test_app.go | 394 ++++ test/e2e/BUILD | 4 +- test/e2e/exec_test.go | 193 +- test/e2e/integration_test.go | 233 ++- test/e2e/regression_test.go | 18 +- test/image/BUILD | 4 +- test/image/image_test.go | 195 +- test/image/ruby.sh | 0 test/iptables/BUILD | 8 +- test/iptables/README.md | 2 +- test/iptables/iptables.go | 7 + test/iptables/iptables_test.go | 271 +-- test/iptables/iptables_util.go | 2 +- test/iptables/runner/BUILD | 17 +- test/iptables/runner/main.go | 3 + test/packetdrill/packetdrill_test.sh | 25 +- test/packetimpact/testbench/dut.go | 2 +- test/packetimpact/tests/test_runner.sh | 24 +- test/root/BUILD | 8 +- test/root/cgroup_test.go | 114 +- test/root/chroot_test.go | 20 +- test/root/crictl_test.go | 192 +- test/root/main_test.go | 2 +- test/root/oom_score_adj_test.go | 78 +- test/root/runsc_test.go | 2 +- test/root/testdata/BUILD | 18 - test/root/testdata/busybox.go | 32 - test/root/testdata/containerd_config.go | 39 - test/root/testdata/httpd.go | 32 - test/root/testdata/httpd_mount_paths.go | 53 - test/root/testdata/sandbox.go | 30 - test/root/testdata/simple.go | 41 - test/runner/BUILD | 2 +- test/runner/runner.go | 12 +- test/runtimes/BUILD | 22 +- test/runtimes/README.md | 56 - test/runtimes/blacklist_test.go | 37 - test/runtimes/build_defs.bzl | 75 - test/runtimes/defs.bzl | 79 + test/runtimes/images/proctor/BUILD | 26 - test/runtimes/images/proctor/go.go | 90 - test/runtimes/images/proctor/java.go | 71 - test/runtimes/images/proctor/nodejs.go | 46 - test/runtimes/images/proctor/php.go | 42 - test/runtimes/images/proctor/proctor.go | 163 -- test/runtimes/images/proctor/proctor_test.go | 127 -- test/runtimes/images/proctor/python.go | 49 - test/runtimes/proctor/BUILD | 27 + test/runtimes/proctor/go.go | 90 + test/runtimes/proctor/java.go | 71 + test/runtimes/proctor/nodejs.go | 46 + test/runtimes/proctor/php.go | 42 + test/runtimes/proctor/proctor.go | 163 ++ test/runtimes/proctor/proctor_test.go | 127 ++ test/runtimes/proctor/python.go | 49 + test/runtimes/runner.go | 196 -- test/runtimes/runner.sh | 35 - test/runtimes/runner/BUILD | 21 + test/runtimes/runner/blacklist_test.go | 37 + test/runtimes/runner/main.go | 189 ++ tools/bazeldefs/defs.bzl | 4 - tools/defs.bzl | 4 +- 98 files changed, 5512 insertions(+), 5693 deletions(-) create mode 100644 pkg/test/criutil/BUILD create mode 100644 pkg/test/criutil/criutil.go create mode 100644 pkg/test/dockerutil/BUILD create mode 100644 pkg/test/dockerutil/dockerutil.go create mode 100644 pkg/test/testutil/BUILD create mode 100644 pkg/test/testutil/testutil.go create mode 100644 pkg/test/testutil/testutil_runfiles.go create mode 100644 runsc/container/container_norace_test.go create mode 100644 runsc/container/container_race_test.go delete mode 100644 runsc/container/test_app/BUILD delete mode 100644 runsc/container/test_app/fds.go delete mode 100644 runsc/container/test_app/test_app.go delete mode 100644 runsc/criutil/BUILD delete mode 100644 runsc/criutil/criutil.go delete mode 100644 runsc/dockerutil/BUILD delete mode 100644 runsc/dockerutil/dockerutil.go delete mode 100644 runsc/testutil/BUILD delete mode 100644 runsc/testutil/testutil.go delete mode 100644 runsc/testutil/testutil_runfiles.go create mode 100644 test/cmd/test_app/BUILD create mode 100644 test/cmd/test_app/fds.go create mode 100644 test/cmd/test_app/test_app.go mode change 100644 => 100755 test/image/ruby.sh delete mode 100644 test/root/testdata/BUILD delete mode 100644 test/root/testdata/busybox.go delete mode 100644 test/root/testdata/containerd_config.go delete mode 100644 test/root/testdata/httpd.go delete mode 100644 test/root/testdata/httpd_mount_paths.go delete mode 100644 test/root/testdata/sandbox.go delete mode 100644 test/root/testdata/simple.go delete mode 100644 test/runtimes/README.md delete mode 100644 test/runtimes/blacklist_test.go delete mode 100644 test/runtimes/build_defs.bzl create mode 100644 test/runtimes/defs.bzl delete mode 100644 test/runtimes/images/proctor/BUILD delete mode 100644 test/runtimes/images/proctor/go.go delete mode 100644 test/runtimes/images/proctor/java.go delete mode 100644 test/runtimes/images/proctor/nodejs.go delete mode 100644 test/runtimes/images/proctor/php.go delete mode 100644 test/runtimes/images/proctor/proctor.go delete mode 100644 test/runtimes/images/proctor/proctor_test.go delete mode 100644 test/runtimes/images/proctor/python.go create mode 100644 test/runtimes/proctor/BUILD create mode 100644 test/runtimes/proctor/go.go create mode 100644 test/runtimes/proctor/java.go create mode 100644 test/runtimes/proctor/nodejs.go create mode 100644 test/runtimes/proctor/php.go create mode 100644 test/runtimes/proctor/proctor.go create mode 100644 test/runtimes/proctor/proctor_test.go create mode 100644 test/runtimes/proctor/python.go delete mode 100644 test/runtimes/runner.go delete mode 100755 test/runtimes/runner.sh create mode 100644 test/runtimes/runner/BUILD create mode 100644 test/runtimes/runner/blacklist_test.go create mode 100644 test/runtimes/runner/main.go (limited to 'test') diff --git a/WORKSPACE b/WORKSPACE index b895647fb..3bf5cc9c1 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -127,45 +127,6 @@ load("@rules_pkg//:deps.bzl", "rules_pkg_dependencies") rules_pkg_dependencies() -# Container rules. -http_archive( - name = "io_bazel_rules_docker", - sha256 = "14ac30773fdb393ddec90e158c9ec7ebb3f8a4fd533ec2abbfd8789ad81a284b", - strip_prefix = "rules_docker-0.12.1", - urls = ["https://github.com/bazelbuild/rules_docker/releases/download/v0.12.1/rules_docker-v0.12.1.tar.gz"], -) - -load( - "@io_bazel_rules_docker//repositories:repositories.bzl", - container_repositories = "repositories", -) - -container_repositories() - -load("@io_bazel_rules_docker//repositories:deps.bzl", container_deps = "deps") - -container_deps() - -load( - "@io_bazel_rules_docker//container:container.bzl", - "container_pull", -) - -# This container is built from the Dockerfile in test/iptables/runner. -container_pull( - name = "iptables-test", - digest = "sha256:a137d692a2eb9fc7bf95c5f4a568da090e2c31098e93634421ed88f3a3f1db65", - registry = "gcr.io", - repository = "gvisor-presubmit/iptables-test", -) - -load( - "@io_bazel_rules_docker//go:image.bzl", - _go_image_repos = "repositories", -) - -_go_image_repos() - # Load C++ grpc rules. http_archive( name = "com_github_grpc_grpc", diff --git a/pkg/sentry/fsimpl/ext/BUILD b/pkg/sentry/fsimpl/ext/BUILD index a4947c480..ff861d0fe 100644 --- a/pkg/sentry/fsimpl/ext/BUILD +++ b/pkg/sentry/fsimpl/ext/BUILD @@ -93,8 +93,8 @@ go_test( "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", "//pkg/syserror", + "//pkg/test/testutil", "//pkg/usermem", - "//runsc/testutil", "@com_github_google_go-cmp//cmp:go_default_library", "@com_github_google_go-cmp//cmp/cmpopts:go_default_library", ], diff --git a/pkg/sentry/fsimpl/ext/ext_test.go b/pkg/sentry/fsimpl/ext/ext_test.go index 29bb73765..64e9a579f 100644 --- a/pkg/sentry/fsimpl/ext/ext_test.go +++ b/pkg/sentry/fsimpl/ext/ext_test.go @@ -32,9 +32,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/pkg/usermem" - - "gvisor.dev/gvisor/runsc/testutil" ) const ( diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD index 61426623c..f2aa69069 100644 --- a/pkg/tcpip/transport/tcp/BUILD +++ b/pkg/tcpip/transport/tcp/BUILD @@ -105,8 +105,8 @@ go_test( "//pkg/tcpip/seqnum", "//pkg/tcpip/stack", "//pkg/tcpip/transport/tcp/testing/context", + "//pkg/test/testutil", "//pkg/waiter", - "//runsc/testutil", ], ) diff --git a/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go b/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go index 359a75e73..5fe23113b 100644 --- a/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go +++ b/pkg/tcpip/transport/tcp/tcp_noracedetector_test.go @@ -31,7 +31,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" "gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context" - "gvisor.dev/gvisor/runsc/testutil" + "gvisor.dev/gvisor/pkg/test/testutil" ) func TestFastRecovery(t *testing.T) { diff --git a/pkg/test/criutil/BUILD b/pkg/test/criutil/BUILD new file mode 100644 index 000000000..a7b082cee --- /dev/null +++ b/pkg/test/criutil/BUILD @@ -0,0 +1,14 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "criutil", + testonly = 1, + srcs = ["criutil.go"], + visibility = ["//:sandbox"], + deps = [ + "//pkg/test/dockerutil", + "//pkg/test/testutil", + ], +) diff --git a/pkg/test/criutil/criutil.go b/pkg/test/criutil/criutil.go new file mode 100644 index 000000000..bebebb48e --- /dev/null +++ b/pkg/test/criutil/criutil.go @@ -0,0 +1,306 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package criutil contains utility functions for interacting with the +// Container Runtime Interface (CRI), principally via the crictl command line +// tool. This requires critools to be installed on the local system. +package criutil + +import ( + "encoding/json" + "fmt" + "os" + "os/exec" + "strings" + "time" + + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/pkg/test/testutil" +) + +// Crictl contains information required to run the crictl utility. +type Crictl struct { + logger testutil.Logger + endpoint string + cleanup []func() +} + +// resolvePath attempts to find binary paths. It may set the path to invalid, +// which will cause the execution to fail with a sensible error. +func resolvePath(executable string) string { + guess, err := exec.LookPath(executable) + if err != nil { + guess = fmt.Sprintf("/usr/local/bin/%s", executable) + } + return guess +} + +// NewCrictl returns a Crictl configured with a timeout and an endpoint over +// which it will talk to containerd. +func NewCrictl(logger testutil.Logger, endpoint string) *Crictl { + // Attempt to find the executable, but don't bother propagating the + // error at this point. The first command executed will return with a + // binary not found error. + return &Crictl{ + logger: logger, + endpoint: endpoint, + } +} + +// CleanUp executes cleanup functions. +func (cc *Crictl) CleanUp() { + for _, c := range cc.cleanup { + c() + } + cc.cleanup = nil +} + +// RunPod creates a sandbox. It corresponds to `crictl runp`. +func (cc *Crictl) RunPod(sbSpecFile string) (string, error) { + podID, err := cc.run("runp", sbSpecFile) + if err != nil { + return "", fmt.Errorf("runp failed: %v", err) + } + // Strip the trailing newline from crictl output. + return strings.TrimSpace(podID), nil +} + +// Create creates a container within a sandbox. It corresponds to `crictl +// create`. +func (cc *Crictl) Create(podID, contSpecFile, sbSpecFile string) (string, error) { + podID, err := cc.run("create", podID, contSpecFile, sbSpecFile) + if err != nil { + return "", fmt.Errorf("create failed: %v", err) + } + // Strip the trailing newline from crictl output. + return strings.TrimSpace(podID), nil +} + +// Start starts a container. It corresponds to `crictl start`. +func (cc *Crictl) Start(contID string) (string, error) { + output, err := cc.run("start", contID) + if err != nil { + return "", fmt.Errorf("start failed: %v", err) + } + return output, nil +} + +// Stop stops a container. It corresponds to `crictl stop`. +func (cc *Crictl) Stop(contID string) error { + _, err := cc.run("stop", contID) + return err +} + +// Exec execs a program inside a container. It corresponds to `crictl exec`. +func (cc *Crictl) Exec(contID string, args ...string) (string, error) { + a := []string{"exec", contID} + a = append(a, args...) + output, err := cc.run(a...) + if err != nil { + return "", fmt.Errorf("exec failed: %v", err) + } + return output, nil +} + +// Rm removes a container. It corresponds to `crictl rm`. +func (cc *Crictl) Rm(contID string) error { + _, err := cc.run("rm", contID) + return err +} + +// StopPod stops a pod. It corresponds to `crictl stopp`. +func (cc *Crictl) StopPod(podID string) error { + _, err := cc.run("stopp", podID) + return err +} + +// containsConfig is a minimal copy of +// https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/apis/cri/runtime/v1alpha2/api.proto +// It only contains fields needed for testing. +type containerConfig struct { + Status containerStatus +} + +type containerStatus struct { + Network containerNetwork +} + +type containerNetwork struct { + IP string +} + +// PodIP returns a pod's IP address. +func (cc *Crictl) PodIP(podID string) (string, error) { + output, err := cc.run("inspectp", podID) + if err != nil { + return "", err + } + conf := &containerConfig{} + if err := json.Unmarshal([]byte(output), conf); err != nil { + return "", fmt.Errorf("failed to unmarshal JSON: %v, %s", err, output) + } + if conf.Status.Network.IP == "" { + return "", fmt.Errorf("no IP found in config: %s", output) + } + return conf.Status.Network.IP, nil +} + +// RmPod removes a container. It corresponds to `crictl rmp`. +func (cc *Crictl) RmPod(podID string) error { + _, err := cc.run("rmp", podID) + return err +} + +// Import imports the given container from the local Docker instance. +func (cc *Crictl) Import(image string) error { + // Note that we provide a 10 minute timeout after connect because we may + // be pushing a lot of bytes in order to import the image. The connect + // timeout stays the same and is inherited from the Crictl instance. + cmd := testutil.Command(cc.logger, + resolvePath("ctr"), + fmt.Sprintf("--connect-timeout=%s", 30*time.Second), + fmt.Sprintf("--address=%s", cc.endpoint), + "-n", "k8s.io", "images", "import", "-") + cmd.Stderr = os.Stderr // Pass through errors. + + // Create a pipe and start the program. + w, err := cmd.StdinPipe() + if err != nil { + return err + } + if err := cmd.Start(); err != nil { + return err + } + + // Save the image on the other end. + if err := dockerutil.Save(cc.logger, image, w); err != nil { + cmd.Wait() + return err + } + + // Close our pipe reference & see if it was loaded. + if err := w.Close(); err != nil { + return w.Close() + } + + return cmd.Wait() +} + +// StartContainer pulls the given image ands starts the container in the +// sandbox with the given podID. +// +// Note that the image will always be imported from the local docker daemon. +func (cc *Crictl) StartContainer(podID, image, sbSpec, contSpec string) (string, error) { + if err := cc.Import(image); err != nil { + return "", err + } + + // Write the specs to files that can be read by crictl. + sbSpecFile, cleanup, err := testutil.WriteTmpFile("sbSpec", sbSpec) + if err != nil { + return "", fmt.Errorf("failed to write sandbox spec: %v", err) + } + cc.cleanup = append(cc.cleanup, cleanup) + contSpecFile, cleanup, err := testutil.WriteTmpFile("contSpec", contSpec) + if err != nil { + return "", fmt.Errorf("failed to write container spec: %v", err) + } + cc.cleanup = append(cc.cleanup, cleanup) + + return cc.startContainer(podID, image, sbSpecFile, contSpecFile) +} + +func (cc *Crictl) startContainer(podID, image, sbSpecFile, contSpecFile string) (string, error) { + contID, err := cc.Create(podID, contSpecFile, sbSpecFile) + if err != nil { + return "", fmt.Errorf("failed to create container in pod %q: %v", podID, err) + } + + if _, err := cc.Start(contID); err != nil { + return "", fmt.Errorf("failed to start container %q in pod %q: %v", contID, podID, err) + } + + return contID, nil +} + +// StopContainer stops and deletes the container with the given container ID. +func (cc *Crictl) StopContainer(contID string) error { + if err := cc.Stop(contID); err != nil { + return fmt.Errorf("failed to stop container %q: %v", contID, err) + } + + if err := cc.Rm(contID); err != nil { + return fmt.Errorf("failed to remove container %q: %v", contID, err) + } + + return nil +} + +// StartPodAndContainer starts a sandbox and container in that sandbox. It +// returns the pod ID and container ID. +func (cc *Crictl) StartPodAndContainer(image, sbSpec, contSpec string) (string, string, error) { + if err := cc.Import(image); err != nil { + return "", "", err + } + + // Write the specs to files that can be read by crictl. + sbSpecFile, cleanup, err := testutil.WriteTmpFile("sbSpec", sbSpec) + if err != nil { + return "", "", fmt.Errorf("failed to write sandbox spec: %v", err) + } + cc.cleanup = append(cc.cleanup, cleanup) + contSpecFile, cleanup, err := testutil.WriteTmpFile("contSpec", contSpec) + if err != nil { + return "", "", fmt.Errorf("failed to write container spec: %v", err) + } + cc.cleanup = append(cc.cleanup, cleanup) + + podID, err := cc.RunPod(sbSpecFile) + if err != nil { + return "", "", err + } + + contID, err := cc.startContainer(podID, image, sbSpecFile, contSpecFile) + + return podID, contID, err +} + +// StopPodAndContainer stops a container and pod. +func (cc *Crictl) StopPodAndContainer(podID, contID string) error { + if err := cc.StopContainer(contID); err != nil { + return fmt.Errorf("failed to stop container %q in pod %q: %v", contID, podID, err) + } + + if err := cc.StopPod(podID); err != nil { + return fmt.Errorf("failed to stop pod %q: %v", podID, err) + } + + if err := cc.RmPod(podID); err != nil { + return fmt.Errorf("failed to remove pod %q: %v", podID, err) + } + + return nil +} + +// run runs crictl with the given args. +func (cc *Crictl) run(args ...string) (string, error) { + defaultArgs := []string{ + resolvePath("crictl"), + "--image-endpoint", fmt.Sprintf("unix://%s", cc.endpoint), + "--runtime-endpoint", fmt.Sprintf("unix://%s", cc.endpoint), + } + fullArgs := append(defaultArgs, args...) + out, err := testutil.Command(cc.logger, fullArgs...).CombinedOutput() + return string(out), err +} diff --git a/pkg/test/dockerutil/BUILD b/pkg/test/dockerutil/BUILD new file mode 100644 index 000000000..7c8758e35 --- /dev/null +++ b/pkg/test/dockerutil/BUILD @@ -0,0 +1,14 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "dockerutil", + testonly = 1, + srcs = ["dockerutil.go"], + visibility = ["//:sandbox"], + deps = [ + "//pkg/test/testutil", + "@com_github_kr_pty//:go_default_library", + ], +) diff --git a/pkg/test/dockerutil/dockerutil.go b/pkg/test/dockerutil/dockerutil.go new file mode 100644 index 000000000..baa8fc2f2 --- /dev/null +++ b/pkg/test/dockerutil/dockerutil.go @@ -0,0 +1,581 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package dockerutil is a collection of utility functions. +package dockerutil + +import ( + "encoding/json" + "flag" + "fmt" + "io" + "io/ioutil" + "log" + "net" + "os" + "os/exec" + "path" + "regexp" + "strconv" + "strings" + "syscall" + "time" + + "github.com/kr/pty" + "gvisor.dev/gvisor/pkg/test/testutil" +) + +var ( + // runtime is the runtime to use for tests. This will be applied to all + // containers. Note that the default here ("runsc") corresponds to the + // default used by the installations. This is important, because the + // default installer for vm_tests (in tools/installers:head, invoked + // via tools/vm:defs.bzl) will install with this name. So without + // changing anything, tests should have a runsc runtime available to + // them. Otherwise installers should update the existing runtime + // instead of installing a new one. + runtime = flag.String("runtime", "runsc", "specify which runtime to use") + + // config is the default Docker daemon configuration path. + config = flag.String("config_path", "/etc/docker/daemon.json", "configuration file for reading paths") +) + +// EnsureSupportedDockerVersion checks if correct docker is installed. +// +// This logs directly to stderr, as it is typically called from a Main wrapper. +func EnsureSupportedDockerVersion() { + cmd := exec.Command("docker", "version") + out, err := cmd.CombinedOutput() + if err != nil { + log.Fatalf("error running %q: %v", "docker version", err) + } + re := regexp.MustCompile(`Version:\s+(\d+)\.(\d+)\.\d.*`) + matches := re.FindStringSubmatch(string(out)) + if len(matches) != 3 { + log.Fatalf("Invalid docker output: %s", out) + } + major, _ := strconv.Atoi(matches[1]) + minor, _ := strconv.Atoi(matches[2]) + if major < 17 || (major == 17 && minor < 9) { + log.Fatalf("Docker version 17.09.0 or greater is required, found: %02d.%02d", major, minor) + } +} + +// RuntimePath returns the binary path for the current runtime. +func RuntimePath() (string, error) { + // Read the configuration data; the file must exist. + configBytes, err := ioutil.ReadFile(*config) + if err != nil { + return "", err + } + + // Unmarshal the configuration. + c := make(map[string]interface{}) + if err := json.Unmarshal(configBytes, &c); err != nil { + return "", err + } + + // Decode the expected configuration. + r, ok := c["runtimes"] + if !ok { + return "", fmt.Errorf("no runtimes declared: %v", c) + } + rs, ok := r.(map[string]interface{}) + if !ok { + // The runtimes are not a map. + return "", fmt.Errorf("unexpected format: %v", c) + } + r, ok = rs[*runtime] + if !ok { + // The expected runtime is not declared. + return "", fmt.Errorf("runtime %q not found: %v", *runtime, c) + } + rs, ok = r.(map[string]interface{}) + if !ok { + // The runtime is not a map. + return "", fmt.Errorf("unexpected format: %v", c) + } + p, ok := rs["path"].(string) + if !ok { + // The runtime does not declare a path. + return "", fmt.Errorf("unexpected format: %v", c) + } + return p, nil +} + +// Save exports a container image to the given Writer. +// +// Note that the writer should be actively consuming the output, otherwise it +// is not guaranteed that the Save will make any progress and the call may +// stall indefinitely. +// +// This is called by criutil in order to import imports. +func Save(logger testutil.Logger, image string, w io.Writer) error { + cmd := testutil.Command(logger, "docker", "save", testutil.ImageByName(image)) + cmd.Stdout = w // Send directly to the writer. + return cmd.Run() +} + +// MountMode describes if the mount should be ro or rw. +type MountMode int + +const ( + // ReadOnly is what the name says. + ReadOnly MountMode = iota + // ReadWrite is what the name says. + ReadWrite +) + +// String returns the mount mode argument for this MountMode. +func (m MountMode) String() string { + switch m { + case ReadOnly: + return "ro" + case ReadWrite: + return "rw" + } + panic(fmt.Sprintf("invalid mode: %d", m)) +} + +// Docker contains the name and the runtime of a docker container. +type Docker struct { + logger testutil.Logger + Runtime string + Name string + copyErr error + mounts []string + cleanups []func() +} + +// MakeDocker sets up the struct for a Docker container. +// +// Names of containers will be unique. +func MakeDocker(logger testutil.Logger) *Docker { + return &Docker{ + logger: logger, + Name: testutil.RandomID(logger.Name()), + Runtime: *runtime, + } +} + +// Mount mounts the given source and makes it available in the container. +func (d *Docker) Mount(target, source string, mode MountMode) { + d.mounts = append(d.mounts, fmt.Sprintf("-v=%s:%s:%v", source, target, mode)) +} + +// CopyFiles copies in and mounts the given files. They are always ReadOnly. +func (d *Docker) CopyFiles(target string, sources ...string) { + dir, err := ioutil.TempDir("", d.Name) + if err != nil { + d.copyErr = fmt.Errorf("ioutil.TempDir failed: %v", err) + return + } + d.cleanups = append(d.cleanups, func() { os.RemoveAll(dir) }) + if err := os.Chmod(dir, 0755); err != nil { + d.copyErr = fmt.Errorf("os.Chmod(%q, 0755) failed: %v", dir, err) + return + } + for _, name := range sources { + src, err := testutil.FindFile(name) + if err != nil { + d.copyErr = fmt.Errorf("testutil.FindFile(%q) failed: %v", name, err) + return + } + dst := path.Join(dir, path.Base(name)) + if err := testutil.Copy(src, dst); err != nil { + d.copyErr = fmt.Errorf("testutil.Copy(%q, %q) failed: %v", src, dst, err) + return + } + d.logger.Logf("copy: %s -> %s", src, dst) + } + d.Mount(target, dir, ReadOnly) +} + +// Link links the given target. +func (d *Docker) Link(target string, source *Docker) { + d.mounts = append(d.mounts, fmt.Sprintf("--link=%s:%s", source.Name, target)) +} + +// RunOpts are options for running a container. +type RunOpts struct { + // Image is the image relative to images/. This will be mangled + // appropriately, to ensure that only first-party images are used. + Image string + + // Memory is the memory limit in kB. + Memory int + + // Ports are the ports to be allocated. + Ports []int + + // WorkDir sets the working directory. + WorkDir string + + // ReadOnly sets the read-only flag. + ReadOnly bool + + // Env are additional environment variables. + Env []string + + // User is the user to use. + User string + + // Privileged enables privileged mode. + Privileged bool + + // CapAdd are the extra set of capabilities to add. + CapAdd []string + + // CapDrop are the extra set of capabilities to drop. + CapDrop []string + + // Pty indicates that a pty will be allocated. If this is non-nil, then + // this will run after start-up with the *exec.Command and Pty file + // passed in to the function. + Pty func(*exec.Cmd, *os.File) + + // Foreground indicates that the container should be run in the + // foreground. If this is true, then the output will be available as a + // return value from the Run function. + Foreground bool + + // Extra are extra arguments that may be passed. + Extra []string +} + +// args returns common arguments. +// +// Note that this does not define the complete behavior. +func (d *Docker) argsFor(r *RunOpts, command string, p []string) (rv []string) { + isExec := command == "exec" + isRun := command == "run" + + if isRun || isExec { + rv = append(rv, "-i") + } + if r.Pty != nil { + rv = append(rv, "-t") + } + if r.User != "" { + rv = append(rv, fmt.Sprintf("--user=%s", r.User)) + } + if r.Privileged { + rv = append(rv, "--privileged") + } + for _, c := range r.CapAdd { + rv = append(rv, fmt.Sprintf("--cap-add=%s", c)) + } + for _, c := range r.CapDrop { + rv = append(rv, fmt.Sprintf("--cap-drop=%s", c)) + } + for _, e := range r.Env { + rv = append(rv, fmt.Sprintf("--env=%s", e)) + } + if r.WorkDir != "" { + rv = append(rv, fmt.Sprintf("--workdir=%s", r.WorkDir)) + } + if !isExec { + if r.Memory != 0 { + rv = append(rv, fmt.Sprintf("--memory=%dk", r.Memory)) + } + for _, p := range r.Ports { + rv = append(rv, fmt.Sprintf("--publish=%d", p)) + } + if r.ReadOnly { + rv = append(rv, fmt.Sprintf("--read-only")) + } + if len(p) > 0 { + rv = append(rv, "--entrypoint=") + } + } + + // Always attach the test environment & Extra. + rv = append(rv, fmt.Sprintf("--env=RUNSC_TEST_NAME=%s", d.Name)) + rv = append(rv, r.Extra...) + + // Attach necessary bits. + if isExec { + rv = append(rv, d.Name) + } else { + rv = append(rv, d.mounts...) + rv = append(rv, fmt.Sprintf("--runtime=%s", d.Runtime)) + rv = append(rv, fmt.Sprintf("--name=%s", d.Name)) + rv = append(rv, testutil.ImageByName(r.Image)) + } + + // Attach other arguments. + rv = append(rv, p...) + return rv +} + +// run runs a complete command. +func (d *Docker) run(r RunOpts, command string, p ...string) (string, error) { + if d.copyErr != nil { + return "", d.copyErr + } + basicArgs := []string{"docker"} + if command == "spawn" { + command = "run" + basicArgs = append(basicArgs, command) + basicArgs = append(basicArgs, "-d") + } else { + basicArgs = append(basicArgs, command) + } + customArgs := d.argsFor(&r, command, p) + cmd := testutil.Command(d.logger, append(basicArgs, customArgs...)...) + if r.Pty != nil { + // If allocating a terminal, then we just ignore the output + // from the command. + ptmx, err := pty.Start(cmd.Cmd) + if err != nil { + return "", err + } + defer cmd.Wait() // Best effort. + r.Pty(cmd.Cmd, ptmx) + } else { + // Can't support PTY or streaming. + out, err := cmd.CombinedOutput() + return string(out), err + } + return "", nil +} + +// Create calls 'docker create' with the arguments provided. +func (d *Docker) Create(r RunOpts, args ...string) error { + _, err := d.run(r, "create", args...) + return err +} + +// Start calls 'docker start'. +func (d *Docker) Start() error { + return testutil.Command(d.logger, "docker", "start", d.Name).Run() +} + +// Stop calls 'docker stop'. +func (d *Docker) Stop() error { + return testutil.Command(d.logger, "docker", "stop", d.Name).Run() +} + +// Run calls 'docker run' with the arguments provided. +func (d *Docker) Run(r RunOpts, args ...string) (string, error) { + return d.run(r, "run", args...) +} + +// Spawn starts the container and detaches. +func (d *Docker) Spawn(r RunOpts, args ...string) error { + _, err := d.run(r, "spawn", args...) + return err +} + +// Logs calls 'docker logs'. +func (d *Docker) Logs() (string, error) { + // Don't capture the output; since it will swamp the logs. + out, err := exec.Command("docker", "logs", d.Name).CombinedOutput() + return string(out), err +} + +// Exec calls 'docker exec' with the arguments provided. +func (d *Docker) Exec(r RunOpts, args ...string) (string, error) { + return d.run(r, "exec", args...) +} + +// Pause calls 'docker pause'. +func (d *Docker) Pause() error { + return testutil.Command(d.logger, "docker", "pause", d.Name).Run() +} + +// Unpause calls 'docker pause'. +func (d *Docker) Unpause() error { + return testutil.Command(d.logger, "docker", "unpause", d.Name).Run() +} + +// Checkpoint calls 'docker checkpoint'. +func (d *Docker) Checkpoint(name string) error { + return testutil.Command(d.logger, "docker", "checkpoint", "create", d.Name, name).Run() +} + +// Restore calls 'docker start --checkname [name]'. +func (d *Docker) Restore(name string) error { + return testutil.Command(d.logger, "docker", "start", fmt.Sprintf("--checkpoint=%s", name), d.Name).Run() +} + +// Kill calls 'docker kill'. +func (d *Docker) Kill() error { + // Skip logging this command, it will likely be an error. + out, err := exec.Command("docker", "kill", d.Name).CombinedOutput() + if err != nil && !strings.Contains(string(out), "is not running") { + return err + } + return nil +} + +// Remove calls 'docker rm'. +func (d *Docker) Remove() error { + return testutil.Command(d.logger, "docker", "rm", d.Name).Run() +} + +// CleanUp kills and deletes the container (best effort). +func (d *Docker) CleanUp() { + // Kill the container. + if err := d.Kill(); err != nil { + // Just log; can't do anything here. + d.logger.Logf("error killing container %q: %v", d.Name, err) + } + // Remove the image. + if err := d.Remove(); err != nil { + d.logger.Logf("error removing container %q: %v", d.Name, err) + } + // Forget all mounts. + d.mounts = nil + // Execute all cleanups. + for _, c := range d.cleanups { + c() + } + d.cleanups = nil +} + +// FindPort returns the host port that is mapped to 'sandboxPort'. This calls +// docker to allocate a free port in the host and prevent conflicts. +func (d *Docker) FindPort(sandboxPort int) (int, error) { + format := fmt.Sprintf(`{{ (index (index .NetworkSettings.Ports "%d/tcp") 0).HostPort }}`, sandboxPort) + out, err := testutil.Command(d.logger, "docker", "inspect", "-f", format, d.Name).CombinedOutput() + if err != nil { + return -1, fmt.Errorf("error retrieving port: %v", err) + } + port, err := strconv.Atoi(strings.TrimSuffix(string(out), "\n")) + if err != nil { + return -1, fmt.Errorf("error parsing port %q: %v", out, err) + } + return port, nil +} + +// FindIP returns the IP address of the container. +func (d *Docker) FindIP() (net.IP, error) { + const format = `{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}` + out, err := testutil.Command(d.logger, "docker", "inspect", "-f", format, d.Name).CombinedOutput() + if err != nil { + return net.IP{}, fmt.Errorf("error retrieving IP: %v", err) + } + ip := net.ParseIP(strings.TrimSpace(string(out))) + if ip == nil { + return net.IP{}, fmt.Errorf("invalid IP: %q", string(out)) + } + return ip, nil +} + +// SandboxPid returns the PID to the sandbox process. +func (d *Docker) SandboxPid() (int, error) { + out, err := testutil.Command(d.logger, "docker", "inspect", "-f={{.State.Pid}}", d.Name).CombinedOutput() + if err != nil { + return -1, fmt.Errorf("error retrieving pid: %v", err) + } + pid, err := strconv.Atoi(strings.TrimSuffix(string(out), "\n")) + if err != nil { + return -1, fmt.Errorf("error parsing pid %q: %v", out, err) + } + return pid, nil +} + +// ID returns the container ID. +func (d *Docker) ID() (string, error) { + out, err := testutil.Command(d.logger, "docker", "inspect", "-f={{.Id}}", d.Name).CombinedOutput() + if err != nil { + return "", fmt.Errorf("error retrieving ID: %v", err) + } + return strings.TrimSpace(string(out)), nil +} + +// Wait waits for container to exit, up to the given timeout. Returns error if +// wait fails or timeout is hit. Returns the application return code otherwise. +// Note that the application may have failed even if err == nil, always check +// the exit code. +func (d *Docker) Wait(timeout time.Duration) (syscall.WaitStatus, error) { + timeoutChan := time.After(timeout) + waitChan := make(chan (syscall.WaitStatus)) + errChan := make(chan (error)) + + go func() { + out, err := testutil.Command(d.logger, "docker", "wait", d.Name).CombinedOutput() + if err != nil { + errChan <- fmt.Errorf("error waiting for container %q: %v", d.Name, err) + } + exit, err := strconv.Atoi(strings.TrimSuffix(string(out), "\n")) + if err != nil { + errChan <- fmt.Errorf("error parsing exit code %q: %v", out, err) + } + waitChan <- syscall.WaitStatus(uint32(exit)) + }() + + select { + case ws := <-waitChan: + return ws, nil + case err := <-errChan: + return syscall.WaitStatus(1), err + case <-timeoutChan: + return syscall.WaitStatus(1), fmt.Errorf("timeout waiting for container %q", d.Name) + } +} + +// WaitForOutput calls 'docker logs' to retrieve containers output and searches +// for the given pattern. +func (d *Docker) WaitForOutput(pattern string, timeout time.Duration) (string, error) { + matches, err := d.WaitForOutputSubmatch(pattern, timeout) + if err != nil { + return "", err + } + if len(matches) == 0 { + return "", nil + } + return matches[0], nil +} + +// WaitForOutputSubmatch calls 'docker logs' to retrieve containers output and +// searches for the given pattern. It returns any regexp submatches as well. +func (d *Docker) WaitForOutputSubmatch(pattern string, timeout time.Duration) ([]string, error) { + re := regexp.MustCompile(pattern) + var ( + lastOut string + stopped bool + ) + for exp := time.Now().Add(timeout); time.Now().Before(exp); { + out, err := d.Logs() + if err != nil { + return nil, err + } + if out != lastOut { + if lastOut == "" { + d.logger.Logf("output (start): %s", out) + } else if strings.HasPrefix(out, lastOut) { + d.logger.Logf("output (contn): %s", out[len(lastOut):]) + } else { + d.logger.Logf("output (trunc): %s", out) + } + lastOut = out // Save for future. + if matches := re.FindStringSubmatch(lastOut); matches != nil { + return matches, nil // Success! + } + } else if stopped { + // The sandbox stopped and we looked at the + // logs at least once since determining that. + return nil, fmt.Errorf("no longer running: %v", err) + } else if pid, err := d.SandboxPid(); pid == 0 || err != nil { + // The sandbox may have stopped, but it's + // possible that it has emitted the terminal + // line between the last call to Logs and here. + stopped = true + } + time.Sleep(100 * time.Millisecond) + } + return nil, fmt.Errorf("timeout waiting for output %q: %s", re.String(), lastOut) +} diff --git a/pkg/test/testutil/BUILD b/pkg/test/testutil/BUILD new file mode 100644 index 000000000..03b1b4677 --- /dev/null +++ b/pkg/test/testutil/BUILD @@ -0,0 +1,20 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "testutil", + testonly = 1, + srcs = [ + "testutil.go", + "testutil_runfiles.go", + ], + visibility = ["//:sandbox"], + deps = [ + "//pkg/sync", + "//runsc/boot", + "//runsc/specutils", + "@com_github_cenkalti_backoff//:go_default_library", + "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", + ], +) diff --git a/pkg/test/testutil/testutil.go b/pkg/test/testutil/testutil.go new file mode 100644 index 000000000..d75ceca3d --- /dev/null +++ b/pkg/test/testutil/testutil.go @@ -0,0 +1,550 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package testutil contains utility functions for runsc tests. +package testutil + +import ( + "bufio" + "context" + "debug/elf" + "encoding/base32" + "encoding/json" + "flag" + "fmt" + "io" + "io/ioutil" + "log" + "math" + "math/rand" + "net/http" + "os" + "os/exec" + "os/signal" + "path" + "path/filepath" + "strconv" + "strings" + "sync/atomic" + "syscall" + "testing" + "time" + + "github.com/cenkalti/backoff" + specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/specutils" +) + +var ( + checkpoint = flag.Bool("checkpoint", true, "control checkpoint/restore support") +) + +// IsCheckpointSupported returns the relevant command line flag. +func IsCheckpointSupported() bool { + return *checkpoint +} + +// nameToActual is used by ImageByName (for now). +var nameToActual = map[string]string{ + "basic/alpine": "alpine", + "basic/busybox": "busybox:1.31.1", + "basic/httpd": "httpd", + "basic/mysql": "mysql", + "basic/nginx": "nginx", + "basic/python": "gcr.io/gvisor-presubmit/python-hello", + "basic/resolv": "k8s.gcr.io/busybox", + "basic/ruby": "ruby", + "basic/tomcat": "tomcat:8.0", + "basic/ubuntu": "ubuntu:trusty", + "iptables": "gcr.io/gvisor-presubmit/iptables-test", + "packetdrill": "gcr.io/gvisor-presubmit/packetdrill", + "packetimpact": "gcr.io/gvisor-presubmit/packetimpact", + "runtimes/go1.12": "gcr.io/gvisor-presubmit/go1.12", + "runtimes/java11": "gcr.io/gvisor-presubmit/java11", + "runtimes/nodejs12.4.0": "gcr.io/gvisor-presubmit/nodejs12.4.0", + "runtimes/php7.3.6": "gcr.io/gvisor-presubmit/php7.3.6", + "runtimes/python3.7.3": "gcr.io/gvisor-presubmit/python3.7.3", +} + +// ImageByName mangles the image name used locally. +// +// For now, this is implemented as a static lookup table. In a subsequent +// change, this will be used to reference a locally-generated image. +func ImageByName(name string) string { + actual, ok := nameToActual[name] + if !ok { + panic(fmt.Sprintf("unknown image: %v", name)) + } + // A terrible hack, for now execute a manual pull. + if out, err := exec.Command("docker", "pull", actual).CombinedOutput(); err != nil { + panic(fmt.Sprintf("error pulling image %q -> %q: %v, out: %s", name, actual, err, string(out))) + } + return actual +} + +// ConfigureExePath configures the executable for runsc in the test environment. +func ConfigureExePath() error { + path, err := FindFile("runsc/runsc") + if err != nil { + return err + } + specutils.ExePath = path + return nil +} + +// TmpDir returns the absolute path to a writable directory that can be used as +// scratch by the test. +func TmpDir() string { + dir := os.Getenv("TEST_TMPDIR") + if dir == "" { + dir = "/tmp" + } + return dir +} + +// Logger is a simple logging wrapper. +// +// This is designed to be implemented by *testing.T. +type Logger interface { + Name() string + Logf(fmt string, args ...interface{}) +} + +// DefaultLogger logs using the log package. +type DefaultLogger string + +// Name implements Logger.Name. +func (d DefaultLogger) Name() string { + return string(d) +} + +// Logf implements Logger.Logf. +func (d DefaultLogger) Logf(fmt string, args ...interface{}) { + log.Printf(fmt, args...) +} + +// Cmd is a simple wrapper. +type Cmd struct { + logger Logger + *exec.Cmd +} + +// CombinedOutput returns the output and logs. +func (c *Cmd) CombinedOutput() ([]byte, error) { + out, err := c.Cmd.CombinedOutput() + if len(out) > 0 { + c.logger.Logf("output: %s", string(out)) + } + if err != nil { + c.logger.Logf("error: %v", err) + } + return out, err +} + +// Command is a simple wrapper around exec.Command, that logs. +func Command(logger Logger, args ...string) *Cmd { + logger.Logf("command: %s", strings.Join(args, " ")) + return &Cmd{ + logger: logger, + Cmd: exec.Command(args[0], args[1:]...), + } +} + +// TestConfig returns the default configuration to use in tests. Note that +// 'RootDir' must be set by caller if required. +func TestConfig(t *testing.T) *boot.Config { + logDir := os.TempDir() + if dir, ok := os.LookupEnv("TEST_UNDECLARED_OUTPUTS_DIR"); ok { + logDir = dir + "/" + } + return &boot.Config{ + Debug: true, + DebugLog: path.Join(logDir, "runsc.log."+t.Name()+".%TIMESTAMP%.%COMMAND%"), + LogFormat: "text", + DebugLogFormat: "text", + LogPackets: true, + Network: boot.NetworkNone, + Strace: true, + Platform: "ptrace", + FileAccess: boot.FileAccessExclusive, + NumNetworkChannels: 1, + + TestOnlyAllowRunAsCurrentUserWithoutChroot: true, + } +} + +// NewSpecWithArgs creates a simple spec with the given args suitable for use +// in tests. +func NewSpecWithArgs(args ...string) *specs.Spec { + return &specs.Spec{ + // The host filesystem root is the container root. + Root: &specs.Root{ + Path: "/", + Readonly: true, + }, + Process: &specs.Process{ + Args: args, + Env: []string{ + "PATH=" + os.Getenv("PATH"), + }, + Capabilities: specutils.AllCapabilities(), + }, + Mounts: []specs.Mount{ + // Hide the host /etc to avoid any side-effects. + // For example, bash reads /etc/passwd and if it is + // very big, tests can fail by timeout. + { + Type: "tmpfs", + Destination: "/etc", + }, + // Root is readonly, but many tests want to write to tmpdir. + // This creates a writable mount inside the root. Also, when tmpdir points + // to "/tmp", it makes the the actual /tmp to be mounted and not a tmpfs + // inside the sentry. + { + Type: "bind", + Destination: TmpDir(), + Source: TmpDir(), + }, + }, + Hostname: "runsc-test-hostname", + } +} + +// SetupRootDir creates a root directory for containers. +func SetupRootDir() (string, func(), error) { + rootDir, err := ioutil.TempDir(TmpDir(), "containers") + if err != nil { + return "", nil, fmt.Errorf("error creating root dir: %v", err) + } + return rootDir, func() { os.RemoveAll(rootDir) }, nil +} + +// SetupContainer creates a bundle and root dir for the container, generates a +// test config, and writes the spec to config.json in the bundle dir. +func SetupContainer(spec *specs.Spec, conf *boot.Config) (rootDir, bundleDir string, cleanup func(), err error) { + rootDir, rootCleanup, err := SetupRootDir() + if err != nil { + return "", "", nil, err + } + conf.RootDir = rootDir + bundleDir, bundleCleanup, err := SetupBundleDir(spec) + if err != nil { + rootCleanup() + return "", "", nil, err + } + return rootDir, bundleDir, func() { + bundleCleanup() + rootCleanup() + }, err +} + +// SetupBundleDir creates a bundle dir and writes the spec to config.json. +func SetupBundleDir(spec *specs.Spec) (string, func(), error) { + bundleDir, err := ioutil.TempDir(TmpDir(), "bundle") + if err != nil { + return "", nil, fmt.Errorf("error creating bundle dir: %v", err) + } + cleanup := func() { os.RemoveAll(bundleDir) } + if err := writeSpec(bundleDir, spec); err != nil { + cleanup() + return "", nil, fmt.Errorf("error writing spec: %v", err) + } + return bundleDir, cleanup, nil +} + +// writeSpec writes the spec to disk in the given directory. +func writeSpec(dir string, spec *specs.Spec) error { + b, err := json.Marshal(spec) + if err != nil { + return err + } + return ioutil.WriteFile(filepath.Join(dir, "config.json"), b, 0755) +} + +// RandomID returns 20 random bytes following the given prefix. +func RandomID(prefix string) string { + // Read 20 random bytes. + b := make([]byte, 20) + // "[Read] always returns len(p) and a nil error." --godoc + if _, err := rand.Read(b); err != nil { + panic("rand.Read failed: " + err.Error()) + } + return fmt.Sprintf("%s-%s", prefix, base32.StdEncoding.EncodeToString(b)) +} + +// RandomContainerID generates a random container id for each test. +// +// The container id is used to create an abstract unix domain socket, which +// must be unique. While the container forbids creating two containers with the +// same name, sometimes between test runs the socket does not get cleaned up +// quickly enough, causing container creation to fail. +func RandomContainerID() string { + return RandomID("test-container-") +} + +// Copy copies file from src to dst. +func Copy(src, dst string) error { + in, err := os.Open(src) + if err != nil { + return err + } + defer in.Close() + + st, err := in.Stat() + if err != nil { + return err + } + + out, err := os.OpenFile(dst, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, st.Mode().Perm()) + if err != nil { + return err + } + defer out.Close() + + // Mirror the local user's permissions across all users. This is + // because as we inject things into the container, the UID/GID will + // change. Also, the build system may generate artifacts with different + // modes. At the top-level (volume mapping) we have a big read-only + // knob that can be applied to prevent modifications. + // + // Note that this must be done via a separate Chmod call, otherwise the + // current process's umask will get in the way. + var mode os.FileMode + if st.Mode()&0100 != 0 { + mode |= 0111 + } + if st.Mode()&0200 != 0 { + mode |= 0222 + } + if st.Mode()&0400 != 0 { + mode |= 0444 + } + if err := os.Chmod(dst, mode); err != nil { + return err + } + + _, err = io.Copy(out, in) + return err +} + +// Poll is a shorthand function to poll for something with given timeout. +func Poll(cb func() error, timeout time.Duration) error { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx) + return backoff.Retry(cb, b) +} + +// WaitForHTTP tries GET requests on a port until the call succeeds or timeout. +func WaitForHTTP(port int, timeout time.Duration) error { + cb := func() error { + c := &http.Client{ + // Calculate timeout to be able to do minimum 5 attempts. + Timeout: timeout / 5, + } + url := fmt.Sprintf("http://localhost:%d/", port) + resp, err := c.Get(url) + if err != nil { + log.Printf("Waiting %s: %v", url, err) + return err + } + resp.Body.Close() + return nil + } + return Poll(cb, timeout) +} + +// Reaper reaps child processes. +type Reaper struct { + // mu protects ch, which will be nil if the reaper is not running. + mu sync.Mutex + ch chan os.Signal +} + +// Start starts reaping child processes. +func (r *Reaper) Start() { + r.mu.Lock() + defer r.mu.Unlock() + + if r.ch != nil { + panic("reaper.Start called on a running reaper") + } + + r.ch = make(chan os.Signal, 1) + signal.Notify(r.ch, syscall.SIGCHLD) + + go func() { + for { + r.mu.Lock() + ch := r.ch + r.mu.Unlock() + if ch == nil { + return + } + + _, ok := <-ch + if !ok { + // Channel closed. + return + } + for { + cpid, _ := syscall.Wait4(-1, nil, syscall.WNOHANG, nil) + if cpid < 1 { + break + } + } + } + }() +} + +// Stop stops reaping child processes. +func (r *Reaper) Stop() { + r.mu.Lock() + defer r.mu.Unlock() + + if r.ch == nil { + panic("reaper.Stop called on a stopped reaper") + } + + signal.Stop(r.ch) + close(r.ch) + r.ch = nil +} + +// StartReaper is a helper that starts a new Reaper and returns a function to +// stop it. +func StartReaper() func() { + r := &Reaper{} + r.Start() + return r.Stop +} + +// WaitUntilRead reads from the given reader until the wanted string is found +// or until timeout. +func WaitUntilRead(r io.Reader, want string, split bufio.SplitFunc, timeout time.Duration) error { + sc := bufio.NewScanner(r) + if split != nil { + sc.Split(split) + } + // done must be accessed atomically. A value greater than 0 indicates + // that the read loop can exit. + var done uint32 + doneCh := make(chan struct{}) + go func() { + for sc.Scan() { + t := sc.Text() + if strings.Contains(t, want) { + atomic.StoreUint32(&done, 1) + close(doneCh) + break + } + if atomic.LoadUint32(&done) > 0 { + break + } + } + }() + select { + case <-time.After(timeout): + atomic.StoreUint32(&done, 1) + return fmt.Errorf("timeout waiting to read %q", want) + case <-doneCh: + return nil + } +} + +// KillCommand kills the process running cmd unless it hasn't been started. It +// returns an error if it cannot kill the process unless the reason is that the +// process has already exited. +// +// KillCommand will also reap the process. +func KillCommand(cmd *exec.Cmd) error { + if cmd.Process == nil { + return nil + } + if err := cmd.Process.Kill(); err != nil { + if !strings.Contains(err.Error(), "process already finished") { + return fmt.Errorf("failed to kill process %v: %v", cmd, err) + } + } + return cmd.Wait() +} + +// WriteTmpFile writes text to a temporary file, closes the file, and returns +// the name of the file. A cleanup function is also returned. +func WriteTmpFile(pattern, text string) (string, func(), error) { + file, err := ioutil.TempFile(TmpDir(), pattern) + if err != nil { + return "", nil, err + } + defer file.Close() + if _, err := file.Write([]byte(text)); err != nil { + return "", nil, err + } + return file.Name(), func() { os.RemoveAll(file.Name()) }, nil +} + +// IsStatic returns true iff the given file is a static binary. +func IsStatic(filename string) (bool, error) { + f, err := elf.Open(filename) + if err != nil { + return false, err + } + for _, prog := range f.Progs { + if prog.Type == elf.PT_INTERP { + return false, nil // Has interpreter. + } + } + return true, nil +} + +// TestIndicesForShard returns indices for this test shard based on the +// TEST_SHARD_INDEX and TEST_TOTAL_SHARDS environment vars. +// +// If either of the env vars are not present, then the function will return all +// tests. If there are more shards than there are tests, then the returned list +// may be empty. +func TestIndicesForShard(numTests int) ([]int, error) { + var ( + shardIndex = 0 + shardTotal = 1 + ) + + indexStr, totalStr := os.Getenv("TEST_SHARD_INDEX"), os.Getenv("TEST_TOTAL_SHARDS") + if indexStr != "" && totalStr != "" { + // Parse index and total to ints. + var err error + shardIndex, err = strconv.Atoi(indexStr) + if err != nil { + return nil, fmt.Errorf("invalid TEST_SHARD_INDEX %q: %v", indexStr, err) + } + shardTotal, err = strconv.Atoi(totalStr) + if err != nil { + return nil, fmt.Errorf("invalid TEST_TOTAL_SHARDS %q: %v", totalStr, err) + } + } + + // Calculate! + var indices []int + numBlocks := int(math.Ceil(float64(numTests) / float64(shardTotal))) + for i := 0; i < numBlocks; i++ { + pick := i*shardTotal + shardIndex + if pick < numTests { + indices = append(indices, pick) + } + } + return indices, nil +} diff --git a/pkg/test/testutil/testutil_runfiles.go b/pkg/test/testutil/testutil_runfiles.go new file mode 100644 index 000000000..ece9ea9a1 --- /dev/null +++ b/pkg/test/testutil/testutil_runfiles.go @@ -0,0 +1,75 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package testutil + +import ( + "fmt" + "os" + "path/filepath" +) + +// FindFile searchs for a file inside the test run environment. It returns the +// full path to the file. It fails if none or more than one file is found. +func FindFile(path string) (string, error) { + wd, err := os.Getwd() + if err != nil { + return "", err + } + + // The test root is demarcated by a path element called "__main__". Search for + // it backwards from the working directory. + root := wd + for { + dir, name := filepath.Split(root) + if name == "__main__" { + break + } + if len(dir) == 0 { + return "", fmt.Errorf("directory __main__ not found in %q", wd) + } + // Remove ending slash to loop around. + root = dir[:len(dir)-1] + } + + // Annoyingly, bazel adds the build type to the directory path for go + // binaries, but not for c++ binaries. We use two different patterns to + // to find our file. + patterns := []string{ + // Try the obvious path first. + filepath.Join(root, path), + // If it was a go binary, use a wildcard to match the build + // type. The pattern is: /test-path/__main__/directories/*/file. + filepath.Join(root, filepath.Dir(path), "*", filepath.Base(path)), + } + + for _, p := range patterns { + matches, err := filepath.Glob(p) + if err != nil { + // "The only possible returned error is ErrBadPattern, + // when pattern is malformed." -godoc + return "", fmt.Errorf("error globbing %q: %v", p, err) + } + switch len(matches) { + case 0: + // Try the next pattern. + case 1: + // We found it. + return matches[0], nil + default: + return "", fmt.Errorf("more than one match found for %q: %s", path, matches) + } + } + return "", fmt.Errorf("file %q not found", path) +} diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 72c2fe381..69dcc74f2 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -23,6 +23,7 @@ go_library( "vfs.go", ], visibility = [ + "//pkg/test:__subpackages__", "//runsc:__subpackages__", "//test:__subpackages__", ], diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD index 4900fbe16..af3538ef0 100644 --- a/runsc/cmd/BUILD +++ b/runsc/cmd/BUILD @@ -82,11 +82,11 @@ go_test( "//pkg/log", "//pkg/sentry/control", "//pkg/sentry/kernel/auth", + "//pkg/test/testutil", "//pkg/urpc", "//runsc/boot", "//runsc/container", "//runsc/specutils", - "//runsc/testutil", "@com_github_google_go-cmp//cmp:go_default_library", "@com_github_google_go-cmp//cmp/cmpopts:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", diff --git a/runsc/cmd/capability_test.go b/runsc/cmd/capability_test.go index 9360d7442..a84067112 100644 --- a/runsc/cmd/capability_test.go +++ b/runsc/cmd/capability_test.go @@ -23,10 +23,10 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/syndtr/gocapability/capability" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/container" "gvisor.dev/gvisor/runsc/specutils" - "gvisor.dev/gvisor/runsc/testutil" ) func init() { @@ -90,16 +90,15 @@ func TestCapabilities(t *testing.T) { // Use --network=host to make sandbox use spec's capabilities. conf.Network = boot.NetworkHost - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := container.Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } diff --git a/runsc/container/BUILD b/runsc/container/BUILD index 0aaeea3a8..331b8e866 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -33,13 +33,15 @@ go_test( size = "large", srcs = [ "console_test.go", + "container_norace_test.go", + "container_race_test.go", "container_test.go", "multi_container_test.go", "shared_volume_test.go", ], data = [ "//runsc", - "//runsc/container/test_app", + "//test/cmd/test_app", ], library = ":container", shard_count = 5, @@ -54,12 +56,12 @@ go_test( "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sync", + "//pkg/test/testutil", "//pkg/unet", "//pkg/urpc", "//runsc/boot", "//runsc/boot/platforms", "//runsc/specutils", - "//runsc/testutil", "@com_github_cenkalti_backoff//:go_default_library", "@com_github_kr_pty//:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go index af245b6d8..294dca5e7 100644 --- a/runsc/container/console_test.go +++ b/runsc/container/console_test.go @@ -29,9 +29,9 @@ import ( "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/pkg/unet" "gvisor.dev/gvisor/pkg/urpc" - "gvisor.dev/gvisor/runsc/testutil" ) // socketPath creates a path inside bundleDir and ensures that the returned @@ -58,25 +58,26 @@ func socketPath(bundleDir string) (string, error) { } // createConsoleSocket creates a socket at the given path that will receive a -// console fd from the sandbox. If no error occurs, it returns the server -// socket and a cleanup function. -func createConsoleSocket(path string) (*unet.ServerSocket, func() error, error) { +// console fd from the sandbox. If an error occurs, t.Fatalf will be called. +// The function returning should be deferred as cleanup. +func createConsoleSocket(t *testing.T, path string) (*unet.ServerSocket, func()) { + t.Helper() srv, err := unet.BindAndListen(path, false) if err != nil { - return nil, nil, fmt.Errorf("error binding and listening to socket %q: %v", path, err) + t.Fatalf("error binding and listening to socket %q: %v", path, err) } - cleanup := func() error { + cleanup := func() { + // Log errors; nothing can be done. if err := srv.Close(); err != nil { - return fmt.Errorf("error closing socket %q: %v", path, err) + t.Logf("error closing socket %q: %v", path, err) } if err := os.Remove(path); err != nil { - return fmt.Errorf("error removing socket %q: %v", path, err) + t.Logf("error removing socket %q: %v", path, err) } - return nil } - return srv, cleanup, nil + return srv, cleanup } // receiveConsolePTY accepts a connection on the server socket and reads fds. @@ -118,45 +119,42 @@ func receiveConsolePTY(srv *unet.ServerSocket) (*os.File, error) { // Test that an pty FD is sent over the console socket if one is provided. func TestConsoleSocket(t *testing.T) { - for _, conf := range configs(t, all...) { - t.Logf("Running test with conf: %+v", conf) - spec := testutil.NewSpecWithArgs("true") - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + spec := testutil.NewSpecWithArgs("true") + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - sock, err := socketPath(bundleDir) - if err != nil { - t.Fatalf("error getting socket path: %v", err) - } - srv, cleanup, err := createConsoleSocket(sock) - if err != nil { - t.Fatalf("error creating socket at %q: %v", sock, err) - } - defer cleanup() - - // Create the container and pass the socket name. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - ConsoleSocket: sock, - } - c, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer c.Destroy() + sock, err := socketPath(bundleDir) + if err != nil { + t.Fatalf("error getting socket path: %v", err) + } + srv, cleanup := createConsoleSocket(t, sock) + defer cleanup() + + // Create the container and pass the socket name. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + ConsoleSocket: sock, + } + c, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() - // Make sure we get a console PTY. - ptyMaster, err := receiveConsolePTY(srv) - if err != nil { - t.Fatalf("error receiving console FD: %v", err) - } - ptyMaster.Close() + // Make sure we get a console PTY. + ptyMaster, err := receiveConsolePTY(srv) + if err != nil { + t.Fatalf("error receiving console FD: %v", err) + } + ptyMaster.Close() + }) } } @@ -165,16 +163,15 @@ func TestJobControlSignalExec(t *testing.T) { spec := testutil.NewSpecWithArgs("/bin/sleep", "10000") conf := testutil.TestConfig(t) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -292,26 +289,22 @@ func TestJobControlSignalRootContainer(t *testing.T) { spec := testutil.NewSpecWithArgs("/bin/bash", "--noprofile", "--norc") spec.Process.Terminal = true - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() sock, err := socketPath(bundleDir) if err != nil { t.Fatalf("error getting socket path: %v", err) } - srv, cleanup, err := createConsoleSocket(sock) - if err != nil { - t.Fatalf("error creating socket at %q: %v", sock, err) - } + srv, cleanup := createConsoleSocket(t, sock) defer cleanup() // Create the container and pass the socket name. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, ConsoleSocket: sock, @@ -368,7 +361,7 @@ func TestJobControlSignalRootContainer(t *testing.T) { {PID: 1, Cmd: "bash", Threads: []kernel.ThreadID{1}}, } if err := waitForProcessList(c, expectedPL); err != nil { - t.Fatal(err) + t.Fatalf("error waiting for processes: %v", err) } // Execute sleep via the terminal. @@ -377,7 +370,7 @@ func TestJobControlSignalRootContainer(t *testing.T) { // Wait for sleep to start. expectedPL = append(expectedPL, &control.Process{PID: 2, PPID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{2}}) if err := waitForProcessList(c, expectedPL); err != nil { - t.Fatal(err) + t.Fatalf("error waiting for processes: %v", err) } // Reset the pty buffer, so there is less output for us to scan later. diff --git a/runsc/container/container.go b/runsc/container/container.go index 7233659b1..117ea7d7b 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -274,7 +274,7 @@ func New(conf *boot.Config, args Args) (*Container, error) { } if err := os.MkdirAll(conf.RootDir, 0711); err != nil { - return nil, fmt.Errorf("creating container root directory: %v", err) + return nil, fmt.Errorf("creating container root directory %q: %v", conf.RootDir, err) } c := &Container{ diff --git a/runsc/container/container_norace_test.go b/runsc/container/container_norace_test.go new file mode 100644 index 000000000..838c1e20a --- /dev/null +++ b/runsc/container/container_norace_test.go @@ -0,0 +1,20 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !race + +package container + +// Allow both kvm and ptrace for non-race builds. +var platformOptions = []configOption{ptrace, kvm} diff --git a/runsc/container/container_race_test.go b/runsc/container/container_race_test.go new file mode 100644 index 000000000..9fb4c4fc0 --- /dev/null +++ b/runsc/container/container_race_test.go @@ -0,0 +1,20 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build race + +package container + +// Only enabled ptrace with race builds. +var platformOptions = []configOption{ptrace} diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 5db6d64aa..3ff89f38c 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -39,10 +39,10 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/boot/platforms" "gvisor.dev/gvisor/runsc/specutils" - "gvisor.dev/gvisor/runsc/testutil" ) // waitForProcessList waits for the given process list to show up in the container. @@ -215,16 +215,15 @@ func readOutputNum(file string, position int) (int, error) { // run starts the sandbox and waits for it to exit, checking that the // application succeeded. func run(spec *specs.Spec, conf *boot.Config) error { - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { return fmt.Errorf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create, start and wait for the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, Attached: true, @@ -243,35 +242,41 @@ type configOption int const ( overlay configOption = iota + ptrace kvm nonExclusiveFS ) -var noOverlay = []configOption{kvm, nonExclusiveFS} -var all = append(noOverlay, overlay) +var ( + noOverlay = append(platformOptions, nonExclusiveFS) + all = append(noOverlay, overlay) +) // configs generates different configurations to run tests. -func configs(t *testing.T, opts ...configOption) []*boot.Config { +func configs(t *testing.T, opts ...configOption) map[string]*boot.Config { // Always load the default config. - cs := []*boot.Config{testutil.TestConfig(t)} - + cs := make(map[string]*boot.Config) for _, o := range opts { - c := testutil.TestConfig(t) switch o { case overlay: + c := testutil.TestConfig(t) c.Overlay = true + cs["overlay"] = c + case ptrace: + c := testutil.TestConfig(t) + c.Platform = platforms.Ptrace + cs["ptrace"] = c case kvm: - // TODO(b/112165693): KVM tests are flaky. Disable until fixed. - continue - + c := testutil.TestConfig(t) c.Platform = platforms.KVM + cs["kvm"] = c case nonExclusiveFS: + c := testutil.TestConfig(t) c.FileAccess = boot.FileAccessShared + cs["non-exclusive"] = c default: panic(fmt.Sprintf("unknown config option %v", o)) - } - cs = append(cs, c) } return cs } @@ -285,133 +290,133 @@ func TestLifecycle(t *testing.T) { childReaper.Start() defer childReaper.Stop() - for _, conf := range configs(t, all...) { - t.Logf("Running test with conf: %+v", conf) - // The container will just sleep for a long time. We will kill it before - // it finishes sleeping. - spec := testutil.NewSpecWithArgs("sleep", "100") - - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) - - // expectedPL lists the expected process state of the container. - expectedPL := []*control.Process{ - { - UID: 0, - PID: 1, - PPID: 0, - C: 0, - Cmd: "sleep", - Threads: []kernel.ThreadID{1}, - }, - } - // Create the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - c, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer c.Destroy() + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + // The container will just sleep for a long time. We will kill it before + // it finishes sleeping. + spec := testutil.NewSpecWithArgs("sleep", "100") - // Load the container from disk and check the status. - c, err = Load(rootDir, args.ID) - if err != nil { - t.Fatalf("error loading container: %v", err) - } - if got, want := c.Status, Created; got != want { - t.Errorf("container status got %v, want %v", got, want) - } + rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // List should return the container id. - ids, err := List(rootDir) - if err != nil { - t.Fatalf("error listing containers: %v", err) - } - if got, want := ids, []string{args.ID}; !reflect.DeepEqual(got, want) { - t.Errorf("container list got %v, want %v", got, want) - } + // expectedPL lists the expected process state of the container. + expectedPL := []*control.Process{ + { + UID: 0, + PID: 1, + PPID: 0, + C: 0, + Cmd: "sleep", + Threads: []kernel.ThreadID{1}, + }, + } + // Create the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() - // Start the container. - if err := c.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Load the container from disk and check the status. + c, err = Load(rootDir, args.ID) + if err != nil { + t.Fatalf("error loading container: %v", err) + } + if got, want := c.Status, Created; got != want { + t.Errorf("container status got %v, want %v", got, want) + } - // Load the container from disk and check the status. - c, err = Load(rootDir, args.ID) - if err != nil { - t.Fatalf("error loading container: %v", err) - } - if got, want := c.Status, Running; got != want { - t.Errorf("container status got %v, want %v", got, want) - } + // List should return the container id. + ids, err := List(rootDir) + if err != nil { + t.Fatalf("error listing containers: %v", err) + } + if got, want := ids, []string{args.ID}; !reflect.DeepEqual(got, want) { + t.Errorf("container list got %v, want %v", got, want) + } - // Verify that "sleep 100" is running. - if err := waitForProcessList(c, expectedPL); err != nil { - t.Error(err) - } + // Start the container. + if err := c.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - // Wait on the container. - var wg sync.WaitGroup - wg.Add(1) - ch := make(chan struct{}) - go func() { - ch <- struct{}{} - ws, err := c.Wait() + // Load the container from disk and check the status. + c, err = Load(rootDir, args.ID) if err != nil { - t.Fatalf("error waiting on container: %v", err) + t.Fatalf("error loading container: %v", err) } - if got, want := ws.Signal(), syscall.SIGTERM; got != want { - t.Fatalf("got signal %v, want %v", got, want) + if got, want := c.Status, Running; got != want { + t.Errorf("container status got %v, want %v", got, want) } - wg.Done() - }() - // Wait a bit to ensure that we've started waiting on the - // container before we signal. - <-ch - time.Sleep(100 * time.Millisecond) - // Send the container a SIGTERM which will cause it to stop. - if err := c.SignalContainer(syscall.SIGTERM, false); err != nil { - t.Fatalf("error sending signal %v to container: %v", syscall.SIGTERM, err) - } - // Wait for it to die. - wg.Wait() + // Verify that "sleep 100" is running. + if err := waitForProcessList(c, expectedPL); err != nil { + t.Error(err) + } - // Load the container from disk and check the status. - c, err = Load(rootDir, args.ID) - if err != nil { - t.Fatalf("error loading container: %v", err) - } - if got, want := c.Status, Stopped; got != want { - t.Errorf("container status got %v, want %v", got, want) - } + // Wait on the container. + ch := make(chan error) + go func() { + ws, err := c.Wait() + if err != nil { + ch <- err + } + if got, want := ws.Signal(), syscall.SIGTERM; got != want { + ch <- fmt.Errorf("got signal %v, want %v", got, want) + } + ch <- nil + }() - // Destroy the container. - if err := c.Destroy(); err != nil { - t.Fatalf("error destroying container: %v", err) - } + // Wait a bit to ensure that we've started waiting on + // the container before we signal. + time.Sleep(time.Second) - // List should not return the container id. - ids, err = List(rootDir) - if err != nil { - t.Fatalf("error listing containers: %v", err) - } - if len(ids) != 0 { - t.Errorf("expected container list to be empty, but got %v", ids) - } + // Send the container a SIGTERM which will cause it to stop. + if err := c.SignalContainer(syscall.SIGTERM, false); err != nil { + t.Fatalf("error sending signal %v to container: %v", syscall.SIGTERM, err) + } - // Loading the container by id should fail. - if _, err = Load(rootDir, args.ID); err == nil { - t.Errorf("expected loading destroyed container to fail, but it did not") - } + // Wait for it to die. + if err := <-ch; err != nil { + t.Fatalf("error waiting for container: %v", err) + } + + // Load the container from disk and check the status. + c, err = Load(rootDir, args.ID) + if err != nil { + t.Fatalf("error loading container: %v", err) + } + if got, want := c.Status, Stopped; got != want { + t.Errorf("container status got %v, want %v", got, want) + } + + // Destroy the container. + if err := c.Destroy(); err != nil { + t.Fatalf("error destroying container: %v", err) + } + + // List should not return the container id. + ids, err = List(rootDir) + if err != nil { + t.Fatalf("error listing containers: %v", err) + } + if len(ids) != 0 { + t.Errorf("expected container list to be empty, but got %v", ids) + } + + // Loading the container by id should fail. + if _, err = Load(rootDir, args.ID); err == nil { + t.Errorf("expected loading destroyed container to fail, but it did not") + } + }) } } @@ -420,12 +425,14 @@ func TestExePath(t *testing.T) { // Create two directories that will be prepended to PATH. firstPath, err := ioutil.TempDir(testutil.TmpDir(), "first") if err != nil { - t.Fatal(err) + t.Fatalf("error creating temporary directory: %v", err) } + defer os.RemoveAll(firstPath) secondPath, err := ioutil.TempDir(testutil.TmpDir(), "second") if err != nil { - t.Fatal(err) + t.Fatalf("error creating temporary directory: %v", err) } + defer os.RemoveAll(secondPath) // Create two minimal executables in the second path, two of which // will be masked by files in first path. @@ -433,11 +440,11 @@ func TestExePath(t *testing.T) { path := filepath.Join(secondPath, p) f, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0777) if err != nil { - t.Fatal(err) + t.Fatalf("error opening path: %v", err) } defer f.Close() if _, err := io.WriteString(f, "#!/bin/true\n"); err != nil { - t.Fatal(err) + t.Fatalf("error writing contents: %v", err) } } @@ -446,7 +453,7 @@ func TestExePath(t *testing.T) { nonExecutable := filepath.Join(firstPath, "masked1") f2, err := os.OpenFile(nonExecutable, os.O_CREATE|os.O_EXCL, 0666) if err != nil { - t.Fatal(err) + t.Fatalf("error opening file: %v", err) } f2.Close() @@ -454,68 +461,69 @@ func TestExePath(t *testing.T) { // executable in the second. nonRegular := filepath.Join(firstPath, "masked2") if err := os.Mkdir(nonRegular, 0777); err != nil { - t.Fatal(err) - } - - for _, conf := range configs(t, overlay) { - t.Logf("Running test with conf: %+v", conf) - for _, test := range []struct { - path string - success bool - }{ - {path: "true", success: true}, - {path: "bin/true", success: true}, - {path: "/bin/true", success: true}, - {path: "thisfiledoesntexit", success: false}, - {path: "bin/thisfiledoesntexit", success: false}, - {path: "/bin/thisfiledoesntexit", success: false}, - - {path: "unmasked", success: true}, - {path: filepath.Join(firstPath, "unmasked"), success: false}, - {path: filepath.Join(secondPath, "unmasked"), success: true}, - - {path: "masked1", success: true}, - {path: filepath.Join(firstPath, "masked1"), success: false}, - {path: filepath.Join(secondPath, "masked1"), success: true}, - - {path: "masked2", success: true}, - {path: filepath.Join(firstPath, "masked2"), success: false}, - {path: filepath.Join(secondPath, "masked2"), success: true}, - } { - spec := testutil.NewSpecWithArgs(test.path) - spec.Process.Env = []string{ - fmt.Sprintf("PATH=%s:%s:%s", firstPath, secondPath, os.Getenv("PATH")), - } - - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("exec: %s, error setting up container: %v", test.path, err) - } - - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - Attached: true, - } - ws, err := Run(conf, args) + t.Fatalf("error making directory: %v", err) + } + + for name, conf := range configs(t, overlay) { + t.Run(name, func(t *testing.T) { + for _, test := range []struct { + path string + success bool + }{ + {path: "true", success: true}, + {path: "bin/true", success: true}, + {path: "/bin/true", success: true}, + {path: "thisfiledoesntexit", success: false}, + {path: "bin/thisfiledoesntexit", success: false}, + {path: "/bin/thisfiledoesntexit", success: false}, + + {path: "unmasked", success: true}, + {path: filepath.Join(firstPath, "unmasked"), success: false}, + {path: filepath.Join(secondPath, "unmasked"), success: true}, + + {path: "masked1", success: true}, + {path: filepath.Join(firstPath, "masked1"), success: false}, + {path: filepath.Join(secondPath, "masked1"), success: true}, + + {path: "masked2", success: true}, + {path: filepath.Join(firstPath, "masked2"), success: false}, + {path: filepath.Join(secondPath, "masked2"), success: true}, + } { + t.Run(fmt.Sprintf("path=%s,success=%t", test.path, test.success), func(t *testing.T) { + spec := testutil.NewSpecWithArgs(test.path) + spec.Process.Env = []string{ + fmt.Sprintf("PATH=%s:%s:%s", firstPath, secondPath, os.Getenv("PATH")), + } - os.RemoveAll(rootDir) - os.RemoveAll(bundleDir) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("exec: error setting up container: %v", err) + } + defer cleanup() - if test.success { - if err != nil { - t.Errorf("exec: %s, error running container: %v", test.path, err) - } - if ws.ExitStatus() != 0 { - t.Errorf("exec: %s, got exit status %v want %v", test.path, ws.ExitStatus(), 0) - } - } else { - if err == nil { - t.Errorf("exec: %s, got: no error, want: error", test.path) - } + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + Attached: true, + } + ws, err := Run(conf, args) + + if test.success { + if err != nil { + t.Errorf("exec: error running container: %v", err) + } + if ws.ExitStatus() != 0 { + t.Errorf("exec: got exit status %v want %v", ws.ExitStatus(), 0) + } + } else { + if err == nil { + t.Errorf("exec: got: no error, want: error") + } + } + }) } - } + }) } } @@ -534,15 +542,14 @@ func doAppExitStatus(t *testing.T, vfs2 bool) { succSpec := testutil.NewSpecWithArgs("true") conf := testutil.TestConfig(t) conf.VFS2 = vfs2 - rootDir, bundleDir, err := testutil.SetupContainer(succSpec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(succSpec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: succSpec, BundleDir: bundleDir, Attached: true, @@ -559,15 +566,14 @@ func doAppExitStatus(t *testing.T, vfs2 bool) { wantStatus := 123 errSpec := testutil.NewSpecWithArgs("bash", "-c", fmt.Sprintf("exit %d", wantStatus)) - rootDir2, bundleDir2, err := testutil.SetupContainer(errSpec, conf) + _, bundleDir2, cleanup2, err := testutil.SetupContainer(errSpec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir2) - defer os.RemoveAll(bundleDir2) + defer cleanup2() args2 := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: errSpec, BundleDir: bundleDir2, Attached: true, @@ -583,166 +589,163 @@ func doAppExitStatus(t *testing.T, vfs2 bool) { // TestExec verifies that a container can exec a new program. func TestExec(t *testing.T) { - for _, conf := range configs(t, overlay) { - t.Logf("Running test with conf: %+v", conf) + for name, conf := range configs(t, overlay) { + t.Run(name, func(t *testing.T) { + const uid = 343 + spec := testutil.NewSpecWithArgs("sleep", "100") - const uid = 343 - spec := testutil.NewSpecWithArgs("sleep", "100") + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + // Create and start the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - // Create and start the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont.Destroy() - if err := cont.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // expectedPL lists the expected process state of the container. + expectedPL := []*control.Process{ + { + UID: 0, + PID: 1, + PPID: 0, + C: 0, + Cmd: "sleep", + Threads: []kernel.ThreadID{1}, + }, + { + UID: uid, + PID: 2, + PPID: 0, + C: 0, + Cmd: "sleep", + Threads: []kernel.ThreadID{2}, + }, + } - // expectedPL lists the expected process state of the container. - expectedPL := []*control.Process{ - { - UID: 0, - PID: 1, - PPID: 0, - C: 0, - Cmd: "sleep", - Threads: []kernel.ThreadID{1}, - }, - { - UID: uid, - PID: 2, - PPID: 0, - C: 0, - Cmd: "sleep", - Threads: []kernel.ThreadID{2}, - }, - } + // Verify that "sleep 100" is running. + if err := waitForProcessList(cont, expectedPL[:1]); err != nil { + t.Error(err) + } - // Verify that "sleep 100" is running. - if err := waitForProcessList(cont, expectedPL[:1]); err != nil { - t.Error(err) - } + execArgs := &control.ExecArgs{ + Filename: "/bin/sleep", + Argv: []string{"/bin/sleep", "5"}, + WorkingDirectory: "/", + KUID: uid, + } - execArgs := &control.ExecArgs{ - Filename: "/bin/sleep", - Argv: []string{"/bin/sleep", "5"}, - WorkingDirectory: "/", - KUID: uid, - } + // Verify that "sleep 100" and "sleep 5" are running + // after exec. First, start running exec (whick + // blocks). + ch := make(chan error) + go func() { + exitStatus, err := cont.executeSync(execArgs) + if err != nil { + ch <- err + } else if exitStatus != 0 { + ch <- fmt.Errorf("failed with exit status: %v", exitStatus) + } else { + ch <- nil + } + }() - // Verify that "sleep 100" and "sleep 5" are running after exec. - // First, start running exec (whick blocks). - status := make(chan error, 1) - go func() { - exitStatus, err := cont.executeSync(execArgs) - if err != nil { - log.Debugf("error executing: %v", err) - status <- err - } else if exitStatus != 0 { - log.Debugf("bad status: %d", exitStatus) - status <- fmt.Errorf("failed with exit status: %v", exitStatus) - } else { - status <- nil + if err := waitForProcessList(cont, expectedPL); err != nil { + t.Fatalf("error waiting for processes: %v", err) } - }() - - if err := waitForProcessList(cont, expectedPL); err != nil { - t.Fatal(err) - } - // Ensure that exec finished without error. - select { - case <-time.After(10 * time.Second): - t.Fatalf("container timed out waiting for exec to finish.") - case st := <-status: - if st != nil { - t.Errorf("container failed to exec %v: %v", args, err) + // Ensure that exec finished without error. + select { + case <-time.After(10 * time.Second): + t.Fatalf("container timed out waiting for exec to finish.") + case err := <-ch: + if err != nil { + t.Errorf("container failed to exec %v: %v", args, err) + } } - } + }) } } // TestKillPid verifies that we can signal individual exec'd processes. func TestKillPid(t *testing.T) { - for _, conf := range configs(t, overlay) { - t.Logf("Running test with conf: %+v", conf) - - app, err := testutil.FindFile("runsc/container/test_app/test_app") - if err != nil { - t.Fatal("error finding test_app:", err) - } + for name, conf := range configs(t, overlay) { + t.Run(name, func(t *testing.T) { + app, err := testutil.FindFile("test/cmd/test_app/test_app") + if err != nil { + t.Fatal("error finding test_app:", err) + } - const nProcs = 4 - spec := testutil.NewSpecWithArgs(app, "task-tree", "--depth", strconv.Itoa(nProcs-1), "--width=1", "--pause=true") - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + const nProcs = 4 + spec := testutil.NewSpecWithArgs(app, "task-tree", "--depth", strconv.Itoa(nProcs-1), "--width=1", "--pause=true") + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create and start the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont.Destroy() - if err := cont.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create and start the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - // Verify that all processes are running. - if err := waitForProcessCount(cont, nProcs); err != nil { - t.Fatalf("timed out waiting for processes to start: %v", err) - } + // Verify that all processes are running. + if err := waitForProcessCount(cont, nProcs); err != nil { + t.Fatalf("timed out waiting for processes to start: %v", err) + } - // Kill the child process with the largest PID. - procs, err := cont.Processes() - if err != nil { - t.Fatalf("failed to get process list: %v", err) - } - var pid int32 - for _, p := range procs { - if pid < int32(p.PID) { - pid = int32(p.PID) + // Kill the child process with the largest PID. + procs, err := cont.Processes() + if err != nil { + t.Fatalf("failed to get process list: %v", err) + } + var pid int32 + for _, p := range procs { + if pid < int32(p.PID) { + pid = int32(p.PID) + } + } + if err := cont.SignalProcess(syscall.SIGKILL, pid); err != nil { + t.Fatalf("failed to signal process %d: %v", pid, err) } - } - if err := cont.SignalProcess(syscall.SIGKILL, pid); err != nil { - t.Fatalf("failed to signal process %d: %v", pid, err) - } - // Verify that one process is gone. - if err := waitForProcessCount(cont, nProcs-1); err != nil { - t.Fatal(err) - } + // Verify that one process is gone. + if err := waitForProcessCount(cont, nProcs-1); err != nil { + t.Fatalf("error waiting for processes: %v", err) + } - procs, err = cont.Processes() - if err != nil { - t.Fatalf("failed to get process list: %v", err) - } - for _, p := range procs { - if pid == int32(p.PID) { - t.Fatalf("pid %d is still alive, which should be killed", pid) + procs, err = cont.Processes() + if err != nil { + t.Fatalf("failed to get process list: %v", err) } - } + for _, p := range procs { + if pid == int32(p.PID) { + t.Fatalf("pid %d is still alive, which should be killed", pid) + } + } + }) } } @@ -753,160 +756,160 @@ func TestKillPid(t *testing.T) { // be the next consecutive number after the last number from the checkpointed container. func TestCheckpointRestore(t *testing.T) { // Skip overlay because test requires writing to host file. - for _, conf := range configs(t, noOverlay...) { - t.Logf("Running test with conf: %+v", conf) - - dir, err := ioutil.TempDir(testutil.TmpDir(), "checkpoint-test") - if err != nil { - t.Fatalf("ioutil.TempDir failed: %v", err) - } - if err := os.Chmod(dir, 0777); err != nil { - t.Fatalf("error chmoding file: %q, %v", dir, err) - } + for name, conf := range configs(t, noOverlay...) { + t.Run(name, func(t *testing.T) { + dir, err := ioutil.TempDir(testutil.TmpDir(), "checkpoint-test") + if err != nil { + t.Fatalf("ioutil.TempDir failed: %v", err) + } + defer os.RemoveAll(dir) + if err := os.Chmod(dir, 0777); err != nil { + t.Fatalf("error chmoding file: %q, %v", dir, err) + } - outputPath := filepath.Join(dir, "output") - outputFile, err := createWriteableOutputFile(outputPath) - if err != nil { - t.Fatalf("error creating output file: %v", err) - } - defer outputFile.Close() + outputPath := filepath.Join(dir, "output") + outputFile, err := createWriteableOutputFile(outputPath) + if err != nil { + t.Fatalf("error creating output file: %v", err) + } + defer outputFile.Close() - script := fmt.Sprintf("for ((i=0; ;i++)); do echo $i >> %q; sleep 1; done", outputPath) - spec := testutil.NewSpecWithArgs("bash", "-c", script) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + script := fmt.Sprintf("for ((i=0; ;i++)); do echo $i >> %q; sleep 1; done", outputPath) + spec := testutil.NewSpecWithArgs("bash", "-c", script) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create and start the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont.Destroy() - if err := cont.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create and start the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - // Set the image path, which is where the checkpoint image will be saved. - imagePath := filepath.Join(dir, "test-image-file") + // Set the image path, which is where the checkpoint image will be saved. + imagePath := filepath.Join(dir, "test-image-file") - // Create the image file and open for writing. - file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644) - if err != nil { - t.Fatalf("error opening new file at imagePath: %v", err) - } - defer file.Close() + // Create the image file and open for writing. + file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644) + if err != nil { + t.Fatalf("error opening new file at imagePath: %v", err) + } + defer file.Close() - // Wait until application has ran. - if err := waitForFileNotEmpty(outputFile); err != nil { - t.Fatalf("Failed to wait for output file: %v", err) - } + // Wait until application has ran. + if err := waitForFileNotEmpty(outputFile); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } - // Checkpoint running container; save state into new file. - if err := cont.Checkpoint(file); err != nil { - t.Fatalf("error checkpointing container to empty file: %v", err) - } - defer os.RemoveAll(imagePath) + // Checkpoint running container; save state into new file. + if err := cont.Checkpoint(file); err != nil { + t.Fatalf("error checkpointing container to empty file: %v", err) + } + defer os.RemoveAll(imagePath) - lastNum, err := readOutputNum(outputPath, -1) - if err != nil { - t.Fatalf("error with outputFile: %v", err) - } + lastNum, err := readOutputNum(outputPath, -1) + if err != nil { + t.Fatalf("error with outputFile: %v", err) + } - // Delete and recreate file before restoring. - if err := os.Remove(outputPath); err != nil { - t.Fatalf("error removing file") - } - outputFile2, err := createWriteableOutputFile(outputPath) - if err != nil { - t.Fatalf("error creating output file: %v", err) - } - defer outputFile2.Close() + // Delete and recreate file before restoring. + if err := os.Remove(outputPath); err != nil { + t.Fatalf("error removing file") + } + outputFile2, err := createWriteableOutputFile(outputPath) + if err != nil { + t.Fatalf("error creating output file: %v", err) + } + defer outputFile2.Close() - // Restore into a new container. - args2 := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont2, err := New(conf, args2) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont2.Destroy() + // Restore into a new container. + args2 := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont2, err := New(conf, args2) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont2.Destroy() - if err := cont2.Restore(spec, conf, imagePath); err != nil { - t.Fatalf("error restoring container: %v", err) - } + if err := cont2.Restore(spec, conf, imagePath); err != nil { + t.Fatalf("error restoring container: %v", err) + } - // Wait until application has ran. - if err := waitForFileNotEmpty(outputFile2); err != nil { - t.Fatalf("Failed to wait for output file: %v", err) - } + // Wait until application has ran. + if err := waitForFileNotEmpty(outputFile2); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } - firstNum, err := readOutputNum(outputPath, 0) - if err != nil { - t.Fatalf("error with outputFile: %v", err) - } + firstNum, err := readOutputNum(outputPath, 0) + if err != nil { + t.Fatalf("error with outputFile: %v", err) + } - // Check that lastNum is one less than firstNum and that the container picks - // up from where it left off. - if lastNum+1 != firstNum { - t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum) - } - cont2.Destroy() + // Check that lastNum is one less than firstNum and that the container picks + // up from where it left off. + if lastNum+1 != firstNum { + t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum) + } + cont2.Destroy() - // Restore into another container! - // Delete and recreate file before restoring. - if err := os.Remove(outputPath); err != nil { - t.Fatalf("error removing file") - } - outputFile3, err := createWriteableOutputFile(outputPath) - if err != nil { - t.Fatalf("error creating output file: %v", err) - } - defer outputFile3.Close() + // Restore into another container! + // Delete and recreate file before restoring. + if err := os.Remove(outputPath); err != nil { + t.Fatalf("error removing file") + } + outputFile3, err := createWriteableOutputFile(outputPath) + if err != nil { + t.Fatalf("error creating output file: %v", err) + } + defer outputFile3.Close() - // Restore into a new container. - args3 := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont3, err := New(conf, args3) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont3.Destroy() + // Restore into a new container. + args3 := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont3, err := New(conf, args3) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont3.Destroy() - if err := cont3.Restore(spec, conf, imagePath); err != nil { - t.Fatalf("error restoring container: %v", err) - } + if err := cont3.Restore(spec, conf, imagePath); err != nil { + t.Fatalf("error restoring container: %v", err) + } - // Wait until application has ran. - if err := waitForFileNotEmpty(outputFile3); err != nil { - t.Fatalf("Failed to wait for output file: %v", err) - } + // Wait until application has ran. + if err := waitForFileNotEmpty(outputFile3); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } - firstNum2, err := readOutputNum(outputPath, 0) - if err != nil { - t.Fatalf("error with outputFile: %v", err) - } + firstNum2, err := readOutputNum(outputPath, 0) + if err != nil { + t.Fatalf("error with outputFile: %v", err) + } - // Check that lastNum is one less than firstNum and that the container picks - // up from where it left off. - if lastNum+1 != firstNum2 { - t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum2) - } - cont3.Destroy() + // Check that lastNum is one less than firstNum and that the container picks + // up from where it left off. + if lastNum+1 != firstNum2 { + t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum2) + } + cont3.Destroy() + }) } } @@ -914,135 +917,134 @@ func TestCheckpointRestore(t *testing.T) { // with filesystem Unix Domain Socket use. func TestUnixDomainSockets(t *testing.T) { // Skip overlay because test requires writing to host file. - for _, conf := range configs(t, noOverlay...) { - t.Logf("Running test with conf: %+v", conf) - - // UDS path is limited to 108 chars for compatibility with older systems. - // Use '/tmp' (instead of testutil.TmpDir) to ensure the size limit is - // not exceeded. Assumes '/tmp' exists in the system. - dir, err := ioutil.TempDir("/tmp", "uds-test") - if err != nil { - t.Fatalf("ioutil.TempDir failed: %v", err) - } - defer os.RemoveAll(dir) + for name, conf := range configs(t, noOverlay...) { + t.Run(name, func(t *testing.T) { + // UDS path is limited to 108 chars for compatibility with older systems. + // Use '/tmp' (instead of testutil.TmpDir) to ensure the size limit is + // not exceeded. Assumes '/tmp' exists in the system. + dir, err := ioutil.TempDir("/tmp", "uds-test") + if err != nil { + t.Fatalf("ioutil.TempDir failed: %v", err) + } + defer os.RemoveAll(dir) - outputPath := filepath.Join(dir, "uds_output") - outputFile, err := os.OpenFile(outputPath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666) - if err != nil { - t.Fatalf("error creating output file: %v", err) - } - defer outputFile.Close() + outputPath := filepath.Join(dir, "uds_output") + outputFile, err := os.OpenFile(outputPath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666) + if err != nil { + t.Fatalf("error creating output file: %v", err) + } + defer outputFile.Close() - app, err := testutil.FindFile("runsc/container/test_app/test_app") - if err != nil { - t.Fatal("error finding test_app:", err) - } + app, err := testutil.FindFile("test/cmd/test_app/test_app") + if err != nil { + t.Fatal("error finding test_app:", err) + } - socketPath := filepath.Join(dir, "uds_socket") - defer os.Remove(socketPath) + socketPath := filepath.Join(dir, "uds_socket") + defer os.Remove(socketPath) - spec := testutil.NewSpecWithArgs(app, "uds", "--file", outputPath, "--socket", socketPath) - spec.Process.User = specs.User{ - UID: uint32(os.Getuid()), - GID: uint32(os.Getgid()), - } - spec.Mounts = []specs.Mount{{ - Type: "bind", - Destination: dir, - Source: dir, - }} + spec := testutil.NewSpecWithArgs(app, "uds", "--file", outputPath, "--socket", socketPath) + spec.Process.User = specs.User{ + UID: uint32(os.Getuid()), + GID: uint32(os.Getgid()), + } + spec.Mounts = []specs.Mount{{ + Type: "bind", + Destination: dir, + Source: dir, + }} - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create and start the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont.Destroy() - if err := cont.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create and start the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - // Set the image path, the location where the checkpoint image will be saved. - imagePath := filepath.Join(dir, "test-image-file") + // Set the image path, the location where the checkpoint image will be saved. + imagePath := filepath.Join(dir, "test-image-file") - // Create the image file and open for writing. - file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644) - if err != nil { - t.Fatalf("error opening new file at imagePath: %v", err) - } - defer file.Close() - defer os.RemoveAll(imagePath) + // Create the image file and open for writing. + file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644) + if err != nil { + t.Fatalf("error opening new file at imagePath: %v", err) + } + defer file.Close() + defer os.RemoveAll(imagePath) - // Wait until application has ran. - if err := waitForFileNotEmpty(outputFile); err != nil { - t.Fatalf("Failed to wait for output file: %v", err) - } + // Wait until application has ran. + if err := waitForFileNotEmpty(outputFile); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } - // Checkpoint running container; save state into new file. - if err := cont.Checkpoint(file); err != nil { - t.Fatalf("error checkpointing container to empty file: %v", err) - } + // Checkpoint running container; save state into new file. + if err := cont.Checkpoint(file); err != nil { + t.Fatalf("error checkpointing container to empty file: %v", err) + } - // Read last number outputted before checkpoint. - lastNum, err := readOutputNum(outputPath, -1) - if err != nil { - t.Fatalf("error with outputFile: %v", err) - } + // Read last number outputted before checkpoint. + lastNum, err := readOutputNum(outputPath, -1) + if err != nil { + t.Fatalf("error with outputFile: %v", err) + } - // Delete and recreate file before restoring. - if err := os.Remove(outputPath); err != nil { - t.Fatalf("error removing file") - } - outputFile2, err := os.OpenFile(outputPath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666) - if err != nil { - t.Fatalf("error creating output file: %v", err) - } - defer outputFile2.Close() + // Delete and recreate file before restoring. + if err := os.Remove(outputPath); err != nil { + t.Fatalf("error removing file") + } + outputFile2, err := os.OpenFile(outputPath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666) + if err != nil { + t.Fatalf("error creating output file: %v", err) + } + defer outputFile2.Close() - // Restore into a new container. - argsRestore := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - contRestore, err := New(conf, argsRestore) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer contRestore.Destroy() + // Restore into a new container. + argsRestore := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + contRestore, err := New(conf, argsRestore) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer contRestore.Destroy() - if err := contRestore.Restore(spec, conf, imagePath); err != nil { - t.Fatalf("error restoring container: %v", err) - } + if err := contRestore.Restore(spec, conf, imagePath); err != nil { + t.Fatalf("error restoring container: %v", err) + } - // Wait until application has ran. - if err := waitForFileNotEmpty(outputFile2); err != nil { - t.Fatalf("Failed to wait for output file: %v", err) - } + // Wait until application has ran. + if err := waitForFileNotEmpty(outputFile2); err != nil { + t.Fatalf("Failed to wait for output file: %v", err) + } - // Read first number outputted after restore. - firstNum, err := readOutputNum(outputPath, 0) - if err != nil { - t.Fatalf("error with outputFile: %v", err) - } + // Read first number outputted after restore. + firstNum, err := readOutputNum(outputPath, 0) + if err != nil { + t.Fatalf("error with outputFile: %v", err) + } - // Check that lastNum is one less than firstNum. - if lastNum+1 != firstNum { - t.Errorf("error numbers not consecutive, previous: %d, next: %d", lastNum, firstNum) - } - contRestore.Destroy() + // Check that lastNum is one less than firstNum. + if lastNum+1 != firstNum { + t.Errorf("error numbers not consecutive, previous: %d, next: %d", lastNum, firstNum) + } + contRestore.Destroy() + }) } } @@ -1052,10 +1054,8 @@ func TestUnixDomainSockets(t *testing.T) { // recreated. Then it resumes the container, verify that the file gets created // again. func TestPauseResume(t *testing.T) { - for _, conf := range configs(t, noOverlay...) { - t.Run(fmt.Sprintf("conf: %+v", conf), func(t *testing.T) { - t.Logf("Running test with conf: %+v", conf) - + for name, conf := range configs(t, noOverlay...) { + t.Run(name, func(t *testing.T) { tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "lock") if err != nil { t.Fatalf("error creating temp dir: %v", err) @@ -1066,16 +1066,15 @@ func TestPauseResume(t *testing.T) { script := fmt.Sprintf("while [[ true ]]; do touch %q; sleep 0.1; done", running) spec := testutil.NewSpecWithArgs("/bin/bash", "-c", script) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -1134,16 +1133,15 @@ func TestPauseResume(t *testing.T) { func TestPauseResumeStatus(t *testing.T) { spec := testutil.NewSpecWithArgs("sleep", "20") conf := testutil.TestConfig(t) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -1199,359 +1197,356 @@ func TestCapabilities(t *testing.T) { uid := auth.KUID(os.Getuid() + 1) gid := auth.KGID(os.Getgid() + 1) - for _, conf := range configs(t, all...) { - t.Logf("Running test with conf: %+v", conf) - - spec := testutil.NewSpecWithArgs("sleep", "100") - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + spec := testutil.NewSpecWithArgs("sleep", "100") + rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create and start the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer cont.Destroy() - if err := cont.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create and start the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - // expectedPL lists the expected process state of the container. - expectedPL := []*control.Process{ - { - UID: 0, - PID: 1, - PPID: 0, - C: 0, - Cmd: "sleep", - Threads: []kernel.ThreadID{1}, - }, - { - UID: uid, - PID: 2, - PPID: 0, - C: 0, - Cmd: "exe", - Threads: []kernel.ThreadID{2}, - }, - } - if err := waitForProcessList(cont, expectedPL[:1]); err != nil { - t.Fatalf("Failed to wait for sleep to start, err: %v", err) - } + // expectedPL lists the expected process state of the container. + expectedPL := []*control.Process{ + { + UID: 0, + PID: 1, + PPID: 0, + C: 0, + Cmd: "sleep", + Threads: []kernel.ThreadID{1}, + }, + { + UID: uid, + PID: 2, + PPID: 0, + C: 0, + Cmd: "exe", + Threads: []kernel.ThreadID{2}, + }, + } + if err := waitForProcessList(cont, expectedPL[:1]); err != nil { + t.Fatalf("Failed to wait for sleep to start, err: %v", err) + } - // Create an executable that can't be run with the specified UID:GID. - // This shouldn't be callable within the container until we add the - // CAP_DAC_OVERRIDE capability to skip the access check. - exePath := filepath.Join(rootDir, "exe") - if err := ioutil.WriteFile(exePath, []byte("#!/bin/sh\necho hello"), 0770); err != nil { - t.Fatalf("couldn't create executable: %v", err) - } - defer os.Remove(exePath) - - // Need to traverse the intermediate directory. - os.Chmod(rootDir, 0755) - - execArgs := &control.ExecArgs{ - Filename: exePath, - Argv: []string{exePath}, - WorkingDirectory: "/", - KUID: uid, - KGID: gid, - Capabilities: &auth.TaskCapabilities{}, - } + // Create an executable that can't be run with the specified UID:GID. + // This shouldn't be callable within the container until we add the + // CAP_DAC_OVERRIDE capability to skip the access check. + exePath := filepath.Join(rootDir, "exe") + if err := ioutil.WriteFile(exePath, []byte("#!/bin/sh\necho hello"), 0770); err != nil { + t.Fatalf("couldn't create executable: %v", err) + } + defer os.Remove(exePath) + + // Need to traverse the intermediate directory. + os.Chmod(rootDir, 0755) + + execArgs := &control.ExecArgs{ + Filename: exePath, + Argv: []string{exePath}, + WorkingDirectory: "/", + KUID: uid, + KGID: gid, + Capabilities: &auth.TaskCapabilities{}, + } - // "exe" should fail because we don't have the necessary permissions. - if _, err := cont.executeSync(execArgs); err == nil { - t.Fatalf("container executed without error, but an error was expected") - } + // "exe" should fail because we don't have the necessary permissions. + if _, err := cont.executeSync(execArgs); err == nil { + t.Fatalf("container executed without error, but an error was expected") + } - // Now we run with the capability enabled and should succeed. - execArgs.Capabilities = &auth.TaskCapabilities{ - EffectiveCaps: auth.CapabilitySetOf(linux.CAP_DAC_OVERRIDE), - } - // "exe" should not fail this time. - if _, err := cont.executeSync(execArgs); err != nil { - t.Fatalf("container failed to exec %v: %v", args, err) - } + // Now we run with the capability enabled and should succeed. + execArgs.Capabilities = &auth.TaskCapabilities{ + EffectiveCaps: auth.CapabilitySetOf(linux.CAP_DAC_OVERRIDE), + } + // "exe" should not fail this time. + if _, err := cont.executeSync(execArgs); err != nil { + t.Fatalf("container failed to exec %v: %v", args, err) + } + }) } } // TestRunNonRoot checks that sandbox can be configured when running as // non-privileged user. func TestRunNonRoot(t *testing.T) { - for _, conf := range configs(t, noOverlay...) { - t.Logf("Running test with conf: %+v", conf) - - spec := testutil.NewSpecWithArgs("/bin/true") - - // Set a random user/group with no access to "blocked" dir. - spec.Process.User.UID = 343 - spec.Process.User.GID = 2401 - spec.Process.Capabilities = nil + for name, conf := range configs(t, noOverlay...) { + t.Run(name, func(t *testing.T) { + spec := testutil.NewSpecWithArgs("/bin/true") + + // Set a random user/group with no access to "blocked" dir. + spec.Process.User.UID = 343 + spec.Process.User.GID = 2401 + spec.Process.Capabilities = nil + + // User running inside container can't list '$TMP/blocked' and would fail to + // mount it. + dir, err := ioutil.TempDir(testutil.TmpDir(), "blocked") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + if err := os.Chmod(dir, 0700); err != nil { + t.Fatalf("os.MkDir(%q) failed: %v", dir, err) + } + dir = path.Join(dir, "test") + if err := os.Mkdir(dir, 0755); err != nil { + t.Fatalf("os.MkDir(%q) failed: %v", dir, err) + } - // User running inside container can't list '$TMP/blocked' and would fail to - // mount it. - dir, err := ioutil.TempDir(testutil.TmpDir(), "blocked") - if err != nil { - t.Fatalf("ioutil.TempDir() failed: %v", err) - } - if err := os.Chmod(dir, 0700); err != nil { - t.Fatalf("os.MkDir(%q) failed: %v", dir, err) - } - dir = path.Join(dir, "test") - if err := os.Mkdir(dir, 0755); err != nil { - t.Fatalf("os.MkDir(%q) failed: %v", dir, err) - } + src, err := ioutil.TempDir(testutil.TmpDir(), "src") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } - src, err := ioutil.TempDir(testutil.TmpDir(), "src") - if err != nil { - t.Fatalf("ioutil.TempDir() failed: %v", err) - } + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: dir, + Source: src, + Type: "bind", + }) - spec.Mounts = append(spec.Mounts, specs.Mount{ - Destination: dir, - Source: src, - Type: "bind", + if err := run(spec, conf); err != nil { + t.Fatalf("error running sandbox: %v", err) + } }) - - if err := run(spec, conf); err != nil { - t.Fatalf("error running sandbox: %v", err) - } } } // TestMountNewDir checks that runsc will create destination directory if it // doesn't exit. func TestMountNewDir(t *testing.T) { - for _, conf := range configs(t, overlay) { - t.Logf("Running test with conf: %+v", conf) + for name, conf := range configs(t, overlay) { + t.Run(name, func(t *testing.T) { + root, err := ioutil.TempDir(testutil.TmpDir(), "root") + if err != nil { + t.Fatal("ioutil.TempDir() failed:", err) + } - root, err := ioutil.TempDir(testutil.TmpDir(), "root") - if err != nil { - t.Fatal("ioutil.TempDir() failed:", err) - } + srcDir := path.Join(root, "src", "dir", "anotherdir") + if err := os.MkdirAll(srcDir, 0755); err != nil { + t.Fatalf("os.MkDir(%q) failed: %v", srcDir, err) + } - srcDir := path.Join(root, "src", "dir", "anotherdir") - if err := os.MkdirAll(srcDir, 0755); err != nil { - t.Fatalf("os.MkDir(%q) failed: %v", srcDir, err) - } + mountDir := path.Join(root, "dir", "anotherdir") - mountDir := path.Join(root, "dir", "anotherdir") + spec := testutil.NewSpecWithArgs("/bin/ls", mountDir) + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: mountDir, + Source: srcDir, + Type: "bind", + }) - spec := testutil.NewSpecWithArgs("/bin/ls", mountDir) - spec.Mounts = append(spec.Mounts, specs.Mount{ - Destination: mountDir, - Source: srcDir, - Type: "bind", + if err := run(spec, conf); err != nil { + t.Fatalf("error running sandbox: %v", err) + } }) - - if err := run(spec, conf); err != nil { - t.Fatalf("error running sandbox: %v", err) - } } } func TestReadonlyRoot(t *testing.T) { - for _, conf := range configs(t, overlay) { - t.Logf("Running test with conf: %+v", conf) - - spec := testutil.NewSpecWithArgs("/bin/touch", "/foo") - spec.Root.Readonly = true - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + for name, conf := range configs(t, overlay) { + t.Run(name, func(t *testing.T) { + spec := testutil.NewSpecWithArgs("/bin/touch", "/foo") + spec.Root.Readonly = true + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create, start and wait for the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - c, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer c.Destroy() - if err := c.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create, start and wait for the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() + if err := c.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - ws, err := c.Wait() - if err != nil { - t.Fatalf("error waiting on container: %v", err) - } - if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM { - t.Fatalf("container failed, waitStatus: %v", ws) - } + ws, err := c.Wait() + if err != nil { + t.Fatalf("error waiting on container: %v", err) + } + if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM { + t.Fatalf("container failed, waitStatus: %v", ws) + } + }) } } func TestUIDMap(t *testing.T) { - for _, conf := range configs(t, noOverlay...) { - t.Logf("Running test with conf: %+v", conf) - testDir, err := ioutil.TempDir(testutil.TmpDir(), "test-mount") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(testDir) - testFile := path.Join(testDir, "testfile") - - spec := testutil.NewSpecWithArgs("touch", "/tmp/testfile") - uid := os.Getuid() - gid := os.Getgid() - spec.Linux = &specs.Linux{ - Namespaces: []specs.LinuxNamespace{ - {Type: specs.UserNamespace}, - {Type: specs.PIDNamespace}, - {Type: specs.MountNamespace}, - }, - UIDMappings: []specs.LinuxIDMapping{ - { - ContainerID: 0, - HostID: uint32(uid), - Size: 1, + for name, conf := range configs(t, noOverlay...) { + t.Run(name, func(t *testing.T) { + testDir, err := ioutil.TempDir(testutil.TmpDir(), "test-mount") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + defer os.RemoveAll(testDir) + testFile := path.Join(testDir, "testfile") + + spec := testutil.NewSpecWithArgs("touch", "/tmp/testfile") + uid := os.Getuid() + gid := os.Getgid() + spec.Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + {Type: specs.UserNamespace}, + {Type: specs.PIDNamespace}, + {Type: specs.MountNamespace}, }, - }, - GIDMappings: []specs.LinuxIDMapping{ - { - ContainerID: 0, - HostID: uint32(gid), - Size: 1, + UIDMappings: []specs.LinuxIDMapping{ + { + ContainerID: 0, + HostID: uint32(uid), + Size: 1, + }, }, - }, - } + GIDMappings: []specs.LinuxIDMapping{ + { + ContainerID: 0, + HostID: uint32(gid), + Size: 1, + }, + }, + } - spec.Mounts = append(spec.Mounts, specs.Mount{ - Destination: "/tmp", - Source: testDir, - Type: "bind", - }) + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: "/tmp", + Source: testDir, + Type: "bind", + }) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create, start and wait for the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - c, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer c.Destroy() - if err := c.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create, start and wait for the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() + if err := c.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - ws, err := c.Wait() - if err != nil { - t.Fatalf("error waiting on container: %v", err) - } - if !ws.Exited() || ws.ExitStatus() != 0 { - t.Fatalf("container failed, waitStatus: %v", ws) - } - st := syscall.Stat_t{} - if err := syscall.Stat(testFile, &st); err != nil { - t.Fatalf("error stat /testfile: %v", err) - } + ws, err := c.Wait() + if err != nil { + t.Fatalf("error waiting on container: %v", err) + } + if !ws.Exited() || ws.ExitStatus() != 0 { + t.Fatalf("container failed, waitStatus: %v", ws) + } + st := syscall.Stat_t{} + if err := syscall.Stat(testFile, &st); err != nil { + t.Fatalf("error stat /testfile: %v", err) + } - if st.Uid != uint32(uid) || st.Gid != uint32(gid) { - t.Fatalf("UID: %d (%d) GID: %d (%d)", st.Uid, uid, st.Gid, gid) - } + if st.Uid != uint32(uid) || st.Gid != uint32(gid) { + t.Fatalf("UID: %d (%d) GID: %d (%d)", st.Uid, uid, st.Gid, gid) + } + }) } } func TestReadonlyMount(t *testing.T) { - for _, conf := range configs(t, overlay) { - t.Logf("Running test with conf: %+v", conf) - - dir, err := ioutil.TempDir(testutil.TmpDir(), "ro-mount") - spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file")) - if err != nil { - t.Fatalf("ioutil.TempDir() failed: %v", err) - } - spec.Mounts = append(spec.Mounts, specs.Mount{ - Destination: dir, - Source: dir, - Type: "bind", - Options: []string{"ro"}, - }) - spec.Root.Readonly = false - - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + for name, conf := range configs(t, overlay) { + t.Run(name, func(t *testing.T) { + dir, err := ioutil.TempDir(testutil.TmpDir(), "ro-mount") + spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file")) + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: dir, + Source: dir, + Type: "bind", + Options: []string{"ro"}, + }) + spec.Root.Readonly = false + + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create, start and wait for the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - c, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer c.Destroy() - if err := c.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create, start and wait for the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() + if err := c.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - ws, err := c.Wait() - if err != nil { - t.Fatalf("error waiting on container: %v", err) - } - if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM { - t.Fatalf("container failed, waitStatus: %v", ws) - } + ws, err := c.Wait() + if err != nil { + t.Fatalf("error waiting on container: %v", err) + } + if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM { + t.Fatalf("container failed, waitStatus: %v", ws) + } + }) } } // TestAbbreviatedIDs checks that runsc supports using abbreviated container // IDs in place of full IDs. func TestAbbreviatedIDs(t *testing.T) { - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() conf := testutil.TestConfig(t) conf.RootDir = rootDir cids := []string{ - "foo-" + testutil.UniqueContainerID(), - "bar-" + testutil.UniqueContainerID(), - "baz-" + testutil.UniqueContainerID(), + "foo-" + testutil.RandomContainerID(), + "bar-" + testutil.RandomContainerID(), + "baz-" + testutil.RandomContainerID(), } for _, cid := range cids { spec := testutil.NewSpecWithArgs("sleep", "100") - bundleDir, err := testutil.SetupBundleDir(spec) + bundleDir, cleanup, err := testutil.SetupBundleDir(spec) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := Args{ @@ -1596,16 +1591,15 @@ func TestAbbreviatedIDs(t *testing.T) { func TestGoferExits(t *testing.T) { spec := testutil.NewSpecWithArgs("/bin/sleep", "10000") conf := testutil.TestConfig(t) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -1634,7 +1628,7 @@ func TestGoferExits(t *testing.T) { } func TestRootNotMount(t *testing.T) { - appSym, err := testutil.FindFile("runsc/container/test_app/test_app") + appSym, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } @@ -1671,7 +1665,7 @@ func TestRootNotMount(t *testing.T) { } func TestUserLog(t *testing.T) { - app, err := testutil.FindFile("runsc/container/test_app/test_app") + app, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } @@ -1679,12 +1673,11 @@ func TestUserLog(t *testing.T) { // sched_rr_get_interval = 148 - not implemented in gvisor. spec := testutil.NewSpecWithArgs(app, "syscall", "--syscall=148") conf := testutil.TestConfig(t) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() dir, err := ioutil.TempDir(testutil.TmpDir(), "user_log_test") if err != nil { @@ -1694,7 +1687,7 @@ func TestUserLog(t *testing.T) { // Create, start and wait for the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, UserLog: userLog, @@ -1718,72 +1711,70 @@ func TestUserLog(t *testing.T) { } func TestWaitOnExitedSandbox(t *testing.T) { - for _, conf := range configs(t, all...) { - t.Logf("Running test with conf: %+v", conf) - - // Run a shell that sleeps for 1 second and then exits with a - // non-zero code. - const wantExit = 17 - cmd := fmt.Sprintf("sleep 1; exit %d", wantExit) - spec := testutil.NewSpecWithArgs("/bin/sh", "-c", cmd) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + // Run a shell that sleeps for 1 second and then exits with a + // non-zero code. + const wantExit = 17 + cmd := fmt.Sprintf("sleep 1; exit %d", wantExit) + spec := testutil.NewSpecWithArgs("/bin/sh", "-c", cmd) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - // Create and Start the container. - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - c, err := New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - defer c.Destroy() - if err := c.Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + // Create and Start the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() + if err := c.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - // Wait on the sandbox. This will make an RPC to the sandbox - // and get the actual exit status of the application. - ws, err := c.Wait() - if err != nil { - t.Fatalf("error waiting on container: %v", err) - } - if got := ws.ExitStatus(); got != wantExit { - t.Errorf("got exit status %d, want %d", got, wantExit) - } + // Wait on the sandbox. This will make an RPC to the sandbox + // and get the actual exit status of the application. + ws, err := c.Wait() + if err != nil { + t.Fatalf("error waiting on container: %v", err) + } + if got := ws.ExitStatus(); got != wantExit { + t.Errorf("got exit status %d, want %d", got, wantExit) + } - // Now the sandbox has exited, but the zombie sandbox process - // still exists. Calling Wait() now will return the sandbox - // exit status. - ws, err = c.Wait() - if err != nil { - t.Fatalf("error waiting on container: %v", err) - } - if got := ws.ExitStatus(); got != wantExit { - t.Errorf("got exit status %d, want %d", got, wantExit) - } + // Now the sandbox has exited, but the zombie sandbox process + // still exists. Calling Wait() now will return the sandbox + // exit status. + ws, err = c.Wait() + if err != nil { + t.Fatalf("error waiting on container: %v", err) + } + if got := ws.ExitStatus(); got != wantExit { + t.Errorf("got exit status %d, want %d", got, wantExit) + } + }) } } func TestDestroyNotStarted(t *testing.T) { spec := testutil.NewSpecWithArgs("/bin/sleep", "100") conf := testutil.TestConfig(t) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create the container and check that it can be destroyed. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -1801,16 +1792,15 @@ func TestDestroyStarting(t *testing.T) { for i := 0; i < 10; i++ { spec := testutil.NewSpecWithArgs("/bin/sleep", "100") conf := testutil.TestConfig(t) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create the container and check that it can be destroyed. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -1845,23 +1835,23 @@ func TestDestroyStarting(t *testing.T) { } func TestCreateWorkingDir(t *testing.T) { - for _, conf := range configs(t, overlay) { - t.Logf("Running test with conf: %+v", conf) - - tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "cwd-create") - if err != nil { - t.Fatalf("ioutil.TempDir() failed: %v", err) - } - dir := path.Join(tmpDir, "new/working/dir") + for name, conf := range configs(t, overlay) { + t.Run(name, func(t *testing.T) { + tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "cwd-create") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + dir := path.Join(tmpDir, "new/working/dir") - // touch will fail if the directory doesn't exist. - spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file")) - spec.Process.Cwd = dir - spec.Root.Readonly = true + // touch will fail if the directory doesn't exist. + spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file")) + spec.Process.Cwd = dir + spec.Root.Readonly = true - if err := run(spec, conf); err != nil { - t.Fatalf("Error running container: %v", err) - } + if err := run(spec, conf); err != nil { + t.Fatalf("Error running container: %v", err) + } + }) } } @@ -1919,15 +1909,14 @@ func TestMountPropagation(t *testing.T) { } conf := testutil.TestConfig(t) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -1969,81 +1958,81 @@ func TestMountPropagation(t *testing.T) { } func TestMountSymlink(t *testing.T) { - for _, conf := range configs(t, overlay) { - t.Logf("Running test with conf: %+v", conf) - - dir, err := ioutil.TempDir(testutil.TmpDir(), "mount-symlink") - if err != nil { - t.Fatalf("ioutil.TempDir() failed: %v", err) - } + for name, conf := range configs(t, overlay) { + t.Run(name, func(t *testing.T) { + dir, err := ioutil.TempDir(testutil.TmpDir(), "mount-symlink") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + defer os.RemoveAll(dir) - source := path.Join(dir, "source") - target := path.Join(dir, "target") - for _, path := range []string{source, target} { - if err := os.MkdirAll(path, 0777); err != nil { - t.Fatalf("os.MkdirAll(): %v", err) + source := path.Join(dir, "source") + target := path.Join(dir, "target") + for _, path := range []string{source, target} { + if err := os.MkdirAll(path, 0777); err != nil { + t.Fatalf("os.MkdirAll(): %v", err) + } } - } - f, err := os.Create(path.Join(source, "file")) - if err != nil { - t.Fatalf("os.Create(): %v", err) - } - f.Close() + f, err := os.Create(path.Join(source, "file")) + if err != nil { + t.Fatalf("os.Create(): %v", err) + } + f.Close() - link := path.Join(dir, "link") - if err := os.Symlink(target, link); err != nil { - t.Fatalf("os.Symlink(%q, %q): %v", target, link, err) - } + link := path.Join(dir, "link") + if err := os.Symlink(target, link); err != nil { + t.Fatalf("os.Symlink(%q, %q): %v", target, link, err) + } - spec := testutil.NewSpecWithArgs("/bin/sleep", "1000") + spec := testutil.NewSpecWithArgs("/bin/sleep", "1000") - // Mount to a symlink to ensure the mount code will follow it and mount - // at the symlink target. - spec.Mounts = append(spec.Mounts, specs.Mount{ - Type: "bind", - Destination: link, - Source: source, - }) + // Mount to a symlink to ensure the mount code will follow it and mount + // at the symlink target. + spec.Mounts = append(spec.Mounts, specs.Mount{ + Type: "bind", + Destination: link, + Source: source, + }) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) - if err != nil { - t.Fatalf("error setting up container: %v", err) - } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() - args := Args{ - ID: testutil.UniqueContainerID(), - Spec: spec, - BundleDir: bundleDir, - } - cont, err := New(conf, args) - if err != nil { - t.Fatalf("creating container: %v", err) - } - defer cont.Destroy() + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) + if err != nil { + t.Fatalf("creating container: %v", err) + } + defer cont.Destroy() - if err := cont.Start(conf); err != nil { - t.Fatalf("starting container: %v", err) - } + if err := cont.Start(conf); err != nil { + t.Fatalf("starting container: %v", err) + } - // Check that symlink was resolved and mount was created where the symlink - // is pointing to. - file := path.Join(target, "file") - execArgs := &control.ExecArgs{ - Filename: "/usr/bin/test", - Argv: []string{"test", "-f", file}, - } - if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 { - t.Fatalf("exec: test -f %q, ws: %v, err: %v", file, ws, err) - } + // Check that symlink was resolved and mount was created where the symlink + // is pointing to. + file := path.Join(target, "file") + execArgs := &control.ExecArgs{ + Filename: "/usr/bin/test", + Argv: []string{"test", "-f", file}, + } + if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 { + t.Fatalf("exec: test -f %q, ws: %v, err: %v", file, ws, err) + } + }) } } // Check that --net-raw disables the CAP_NET_RAW capability. func TestNetRaw(t *testing.T) { capNetRaw := strconv.FormatUint(bits.MaskOf64(int(linux.CAP_NET_RAW)), 10) - app, err := testutil.FindFile("runsc/container/test_app/test_app") + app, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } @@ -2106,7 +2095,7 @@ func TestTTYField(t *testing.T) { stop := testutil.StartReaper() defer stop() - testApp, err := testutil.FindFile("runsc/container/test_app/test_app") + testApp, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } @@ -2140,16 +2129,15 @@ func TestTTYField(t *testing.T) { } spec := testutil.NewSpecWithArgs(cmd...) - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index dc2fb42ce..e3704b453 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -30,15 +30,15 @@ import ( "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/specutils" - "gvisor.dev/gvisor/runsc/testutil" ) func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) { var specs []*specs.Spec var ids []string - rootID := testutil.UniqueContainerID() + rootID := testutil.RandomContainerID() for i, cmd := range cmds { spec := testutil.NewSpecWithArgs(cmd...) @@ -52,7 +52,7 @@ func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) { specutils.ContainerdContainerTypeAnnotation: specutils.ContainerdContainerTypeContainer, specutils.ContainerdSandboxIDAnnotation: rootID, } - ids = append(ids, testutil.UniqueContainerID()) + ids = append(ids, testutil.RandomContainerID()) } specs = append(specs, spec) } @@ -64,23 +64,29 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C panic("conf.RootDir not set. Call testutil.SetupRootDir() to set.") } - var containers []*Container - var bundles []string - cleanup := func() { + var ( + containers []*Container + cleanups []func() + ) + cleanups = append(cleanups, func() { for _, c := range containers { c.Destroy() } - for _, b := range bundles { - os.RemoveAll(b) + }) + cleanupAll := func() { + for _, c := range cleanups { + c() } } + localClean := specutils.MakeCleanup(cleanupAll) + defer localClean.Clean() + for i, spec := range specs { - bundleDir, err := testutil.SetupBundleDir(spec) + bundleDir, cleanup, err := testutil.SetupBundleDir(spec) if err != nil { - cleanup() return nil, nil, fmt.Errorf("error setting up container: %v", err) } - bundles = append(bundles, bundleDir) + cleanups = append(cleanups, cleanup) args := Args{ ID: ids[i], @@ -89,17 +95,17 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C } cont, err := New(conf, args) if err != nil { - cleanup() return nil, nil, fmt.Errorf("error creating container: %v", err) } containers = append(containers, cont) if err := cont.Start(conf); err != nil { - cleanup() return nil, nil, fmt.Errorf("error starting container: %v", err) } } - return containers, cleanup, nil + + localClean.Release() + return containers, cleanupAll, nil } type execDesc struct { @@ -135,159 +141,159 @@ func createSharedMount(mount specs.Mount, name string, pod ...*specs.Spec) { // TestMultiContainerSanity checks that it is possible to run 2 dead-simple // containers in the same sandbox. func TestMultiContainerSanity(t *testing.T) { - for _, conf := range configs(t, all...) { - t.Logf("Running test with conf: %+v", conf) - - rootDir, err := testutil.SetupRootDir() - if err != nil { - t.Fatalf("error creating root dir: %v", err) - } - defer os.RemoveAll(rootDir) - conf.RootDir = rootDir + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir - // Setup the containers. - sleep := []string{"sleep", "100"} - specs, ids := createSpecs(sleep, sleep) - containers, cleanup, err := startContainers(conf, specs, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + // Setup the containers. + sleep := []string{"sleep", "100"} + specs, ids := createSpecs(sleep, sleep) + containers, cleanup, err := startContainers(conf, specs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - // Check via ps that multiple processes are running. - expectedPL := []*control.Process{ - {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, - } - if err := waitForProcessList(containers[0], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } - expectedPL = []*control.Process{ - {PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}}, - } - if err := waitForProcessList(containers[1], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } + // Check via ps that multiple processes are running. + expectedPL := []*control.Process{ + {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + } + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + expectedPL = []*control.Process{ + {PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}}, + } + if err := waitForProcessList(containers[1], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + }) } } // TestMultiPIDNS checks that it is possible to run 2 dead-simple // containers in the same sandbox with different pidns. func TestMultiPIDNS(t *testing.T) { - for _, conf := range configs(t, all...) { - t.Logf("Running test with conf: %+v", conf) - - rootDir, err := testutil.SetupRootDir() - if err != nil { - t.Fatalf("error creating root dir: %v", err) - } - defer os.RemoveAll(rootDir) - conf.RootDir = rootDir - - // Setup the containers. - sleep := []string{"sleep", "100"} - testSpecs, ids := createSpecs(sleep, sleep) - testSpecs[1].Linux = &specs.Linux{ - Namespaces: []specs.LinuxNamespace{ - { - Type: "pid", + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir + + // Setup the containers. + sleep := []string{"sleep", "100"} + testSpecs, ids := createSpecs(sleep, sleep) + testSpecs[1].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + }, }, - }, - } + } - containers, cleanup, err := startContainers(conf, testSpecs, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + containers, cleanup, err := startContainers(conf, testSpecs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - // Check via ps that multiple processes are running. - expectedPL := []*control.Process{ - {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, - } - if err := waitForProcessList(containers[0], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } - expectedPL = []*control.Process{ - {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, - } - if err := waitForProcessList(containers[1], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } + // Check via ps that multiple processes are running. + expectedPL := []*control.Process{ + {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + } + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + expectedPL = []*control.Process{ + {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + } + if err := waitForProcessList(containers[1], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + }) } } // TestMultiPIDNSPath checks the pidns path. func TestMultiPIDNSPath(t *testing.T) { - for _, conf := range configs(t, all...) { - t.Logf("Running test with conf: %+v", conf) - - rootDir, err := testutil.SetupRootDir() - if err != nil { - t.Fatalf("error creating root dir: %v", err) - } - defer os.RemoveAll(rootDir) - conf.RootDir = rootDir - - // Setup the containers. - sleep := []string{"sleep", "100"} - testSpecs, ids := createSpecs(sleep, sleep, sleep) - testSpecs[0].Linux = &specs.Linux{ - Namespaces: []specs.LinuxNamespace{ - { - Type: "pid", - Path: "/proc/1/ns/pid", + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir + + // Setup the containers. + sleep := []string{"sleep", "100"} + testSpecs, ids := createSpecs(sleep, sleep, sleep) + testSpecs[0].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + Path: "/proc/1/ns/pid", + }, }, - }, - } - testSpecs[1].Linux = &specs.Linux{ - Namespaces: []specs.LinuxNamespace{ - { - Type: "pid", - Path: "/proc/1/ns/pid", + } + testSpecs[1].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + Path: "/proc/1/ns/pid", + }, }, - }, - } - testSpecs[2].Linux = &specs.Linux{ - Namespaces: []specs.LinuxNamespace{ - { - Type: "pid", - Path: "/proc/2/ns/pid", + } + testSpecs[2].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + Path: "/proc/2/ns/pid", + }, }, - }, - } + } - containers, cleanup, err := startContainers(conf, testSpecs, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + containers, cleanup, err := startContainers(conf, testSpecs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - // Check via ps that multiple processes are running. - expectedPL := []*control.Process{ - {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, - } - if err := waitForProcessList(containers[0], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } - if err := waitForProcessList(containers[2], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } + // Check via ps that multiple processes are running. + expectedPL := []*control.Process{ + {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + } + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + if err := waitForProcessList(containers[2], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } - expectedPL = []*control.Process{ - {PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}}, - } - if err := waitForProcessList(containers[1], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } + expectedPL = []*control.Process{ + {PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}}, + } + if err := waitForProcessList(containers[1], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + }) } } func TestMultiContainerWait(t *testing.T) { - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() conf := testutil.TestConfig(t) conf.RootDir = rootDir @@ -361,11 +367,11 @@ func TestMultiContainerWait(t *testing.T) { // TestExecWait ensures what we can wait containers and individual processes in the // sandbox that have already exited. func TestExecWait(t *testing.T) { - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() conf := testutil.TestConfig(t) conf.RootDir = rootDir @@ -457,11 +463,11 @@ func TestMultiContainerMount(t *testing.T) { }) // Setup the containers. - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() conf := testutil.TestConfig(t) conf.RootDir = rootDir @@ -484,174 +490,174 @@ func TestMultiContainerMount(t *testing.T) { // TestMultiContainerSignal checks that it is possible to signal individual // containers without killing the entire sandbox. func TestMultiContainerSignal(t *testing.T) { - for _, conf := range configs(t, all...) { - t.Logf("Running test with conf: %+v", conf) - - rootDir, err := testutil.SetupRootDir() - if err != nil { - t.Fatalf("error creating root dir: %v", err) - } - defer os.RemoveAll(rootDir) - conf.RootDir = rootDir + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir - // Setup the containers. - sleep := []string{"sleep", "100"} - specs, ids := createSpecs(sleep, sleep) - containers, cleanup, err := startContainers(conf, specs, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + // Setup the containers. + sleep := []string{"sleep", "100"} + specs, ids := createSpecs(sleep, sleep) + containers, cleanup, err := startContainers(conf, specs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - // Check via ps that container 1 process is running. - expectedPL := []*control.Process{ - {PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}}, - } + // Check via ps that container 1 process is running. + expectedPL := []*control.Process{ + {PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}}, + } - if err := waitForProcessList(containers[1], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } + if err := waitForProcessList(containers[1], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } - // Kill process 2. - if err := containers[1].SignalContainer(syscall.SIGKILL, false); err != nil { - t.Errorf("failed to kill process 2: %v", err) - } + // Kill process 2. + if err := containers[1].SignalContainer(syscall.SIGKILL, false); err != nil { + t.Errorf("failed to kill process 2: %v", err) + } - // Make sure process 1 is still running. - expectedPL = []*control.Process{ - {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, - } - if err := waitForProcessList(containers[0], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } + // Make sure process 1 is still running. + expectedPL = []*control.Process{ + {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + } + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } - // goferPid is reset when container is destroyed. - goferPid := containers[1].GoferPid + // goferPid is reset when container is destroyed. + goferPid := containers[1].GoferPid - // Destroy container and ensure container's gofer process has exited. - if err := containers[1].Destroy(); err != nil { - t.Errorf("failed to destroy container: %v", err) - } - _, _, err = specutils.RetryEintr(func() (uintptr, uintptr, error) { - cpid, err := syscall.Wait4(goferPid, nil, 0, nil) - return uintptr(cpid), 0, err - }) - if err != syscall.ECHILD { - t.Errorf("error waiting for gofer to exit: %v", err) - } - // Make sure process 1 is still running. - if err := waitForProcessList(containers[0], expectedPL); err != nil { - t.Errorf("failed to wait for sleep to start: %v", err) - } + // Destroy container and ensure container's gofer process has exited. + if err := containers[1].Destroy(); err != nil { + t.Errorf("failed to destroy container: %v", err) + } + _, _, err = specutils.RetryEintr(func() (uintptr, uintptr, error) { + cpid, err := syscall.Wait4(goferPid, nil, 0, nil) + return uintptr(cpid), 0, err + }) + if err != syscall.ECHILD { + t.Errorf("error waiting for gofer to exit: %v", err) + } + // Make sure process 1 is still running. + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } - // Now that process 2 is gone, ensure we get an error trying to - // signal it again. - if err := containers[1].SignalContainer(syscall.SIGKILL, false); err == nil { - t.Errorf("container %q shouldn't exist, but we were able to signal it", containers[1].ID) - } + // Now that process 2 is gone, ensure we get an error trying to + // signal it again. + if err := containers[1].SignalContainer(syscall.SIGKILL, false); err == nil { + t.Errorf("container %q shouldn't exist, but we were able to signal it", containers[1].ID) + } - // Kill process 1. - if err := containers[0].SignalContainer(syscall.SIGKILL, false); err != nil { - t.Errorf("failed to kill process 1: %v", err) - } + // Kill process 1. + if err := containers[0].SignalContainer(syscall.SIGKILL, false); err != nil { + t.Errorf("failed to kill process 1: %v", err) + } - // Ensure that container's gofer and sandbox process are no more. - err = blockUntilWaitable(containers[0].GoferPid) - if err != nil && err != syscall.ECHILD { - t.Errorf("error waiting for gofer to exit: %v", err) - } + // Ensure that container's gofer and sandbox process are no more. + err = blockUntilWaitable(containers[0].GoferPid) + if err != nil && err != syscall.ECHILD { + t.Errorf("error waiting for gofer to exit: %v", err) + } - err = blockUntilWaitable(containers[0].Sandbox.Pid) - if err != nil && err != syscall.ECHILD { - t.Errorf("error waiting for sandbox to exit: %v", err) - } + err = blockUntilWaitable(containers[0].Sandbox.Pid) + if err != nil && err != syscall.ECHILD { + t.Errorf("error waiting for sandbox to exit: %v", err) + } - // The sentry should be gone, so signaling should yield an error. - if err := containers[0].SignalContainer(syscall.SIGKILL, false); err == nil { - t.Errorf("sandbox %q shouldn't exist, but we were able to signal it", containers[0].Sandbox.ID) - } + // The sentry should be gone, so signaling should yield an error. + if err := containers[0].SignalContainer(syscall.SIGKILL, false); err == nil { + t.Errorf("sandbox %q shouldn't exist, but we were able to signal it", containers[0].Sandbox.ID) + } - if err := containers[0].Destroy(); err != nil { - t.Errorf("failed to destroy container: %v", err) - } + if err := containers[0].Destroy(); err != nil { + t.Errorf("failed to destroy container: %v", err) + } + }) } } // TestMultiContainerDestroy checks that container are properly cleaned-up when // they are destroyed. func TestMultiContainerDestroy(t *testing.T) { - app, err := testutil.FindFile("runsc/container/test_app/test_app") + app, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } - for _, conf := range configs(t, all...) { - t.Logf("Running test with conf: %+v", conf) - - rootDir, err := testutil.SetupRootDir() - if err != nil { - t.Fatalf("error creating root dir: %v", err) - } - defer os.RemoveAll(rootDir) - conf.RootDir = rootDir - - // First container will remain intact while the second container is killed. - podSpecs, ids := createSpecs( - []string{"sleep", "100"}, - []string{app, "fork-bomb"}) - - // Run the fork bomb in a PID namespace to prevent processes to be - // re-parented to PID=1 in the root container. - podSpecs[1].Linux = &specs.Linux{ - Namespaces: []specs.LinuxNamespace{{Type: "pid"}}, - } - containers, cleanup, err := startContainers(conf, podSpecs, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir + + // First container will remain intact while the second container is killed. + podSpecs, ids := createSpecs( + []string{"sleep", "100"}, + []string{app, "fork-bomb"}) + + // Run the fork bomb in a PID namespace to prevent processes to be + // re-parented to PID=1 in the root container. + podSpecs[1].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{{Type: "pid"}}, + } + containers, cleanup, err := startContainers(conf, podSpecs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - // Exec more processes to ensure signal all works for exec'd processes too. - args := &control.ExecArgs{ - Filename: app, - Argv: []string{app, "fork-bomb"}, - } - if _, err := containers[1].Execute(args); err != nil { - t.Fatalf("error exec'ing: %v", err) - } + // Exec more processes to ensure signal all works for exec'd processes too. + args := &control.ExecArgs{ + Filename: app, + Argv: []string{app, "fork-bomb"}, + } + if _, err := containers[1].Execute(args); err != nil { + t.Fatalf("error exec'ing: %v", err) + } - // Let it brew... - time.Sleep(500 * time.Millisecond) + // Let it brew... + time.Sleep(500 * time.Millisecond) - if err := containers[1].Destroy(); err != nil { - t.Fatalf("error destroying container: %v", err) - } + if err := containers[1].Destroy(); err != nil { + t.Fatalf("error destroying container: %v", err) + } - // Check that destroy killed all processes belonging to the container and - // waited for them to exit before returning. - pss, err := containers[0].Sandbox.Processes("") - if err != nil { - t.Fatalf("error getting process data from sandbox: %v", err) - } - expectedPL := []*control.Process{{PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}} - if r, err := procListsEqual(pss, expectedPL); !r { - t.Errorf("container got process list: %s, want: %s: error: %v", - procListToString(pss), procListToString(expectedPL), err) - } + // Check that destroy killed all processes belonging to the container and + // waited for them to exit before returning. + pss, err := containers[0].Sandbox.Processes("") + if err != nil { + t.Fatalf("error getting process data from sandbox: %v", err) + } + expectedPL := []*control.Process{{PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}} + if r, err := procListsEqual(pss, expectedPL); !r { + t.Errorf("container got process list: %s, want: %s: error: %v", + procListToString(pss), procListToString(expectedPL), err) + } - // Check that cont.Destroy is safe to call multiple times. - if err := containers[1].Destroy(); err != nil { - t.Errorf("error destroying container: %v", err) - } + // Check that cont.Destroy is safe to call multiple times. + if err := containers[1].Destroy(); err != nil { + t.Errorf("error destroying container: %v", err) + } + }) } } func TestMultiContainerProcesses(t *testing.T) { - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() conf := testutil.TestConfig(t) conf.RootDir = rootDir @@ -706,11 +712,11 @@ func TestMultiContainerProcesses(t *testing.T) { // TestMultiContainerKillAll checks that all process that belong to a container // are killed when SIGKILL is sent to *all* processes in that container. func TestMultiContainerKillAll(t *testing.T) { - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() conf := testutil.TestConfig(t) conf.RootDir = rootDir @@ -721,7 +727,7 @@ func TestMultiContainerKillAll(t *testing.T) { {killContainer: true}, {killContainer: false}, } { - app, err := testutil.FindFile("runsc/container/test_app/test_app") + app, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } @@ -739,11 +745,11 @@ func TestMultiContainerKillAll(t *testing.T) { // Wait until all processes are created. rootProcCount := int(math.Pow(2, 3) - 1) if err := waitForProcessCount(containers[0], rootProcCount); err != nil { - t.Fatal(err) + t.Fatalf("error waitting for processes: %v", err) } procCount := int(math.Pow(2, 5) - 1) if err := waitForProcessCount(containers[1], procCount); err != nil { - t.Fatal(err) + t.Fatalf("error waiting for processes: %v", err) } // Exec more processes to ensure signal works for exec'd processes too. @@ -757,7 +763,7 @@ func TestMultiContainerKillAll(t *testing.T) { // Wait for these new processes to start. procCount += int(math.Pow(2, 3) - 1) if err := waitForProcessCount(containers[1], procCount); err != nil { - t.Fatal(err) + t.Fatalf("error waiting for processes: %v", err) } if tc.killContainer { @@ -790,11 +796,11 @@ func TestMultiContainerKillAll(t *testing.T) { // Check that all processes are gone. if err := waitForProcessCount(containers[1], 0); err != nil { - t.Fatal(err) + t.Fatalf("error waiting for processes: %v", err) } // Check that root container was not affected. if err := waitForProcessCount(containers[0], rootProcCount); err != nil { - t.Fatal(err) + t.Fatalf("error waiting for processes: %v", err) } } } @@ -805,17 +811,16 @@ func TestMultiContainerDestroyNotStarted(t *testing.T) { []string{"/bin/sleep", "100"}) conf := testutil.TestConfig(t) - rootDir, rootBundleDir, err := testutil.SetupContainer(specs[0], conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(specs[0], conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(rootBundleDir) + defer cleanup() rootArgs := Args{ ID: ids[0], Spec: specs[0], - BundleDir: rootBundleDir, + BundleDir: bundleDir, } root, err := New(conf, rootArgs) if err != nil { @@ -827,11 +832,11 @@ func TestMultiContainerDestroyNotStarted(t *testing.T) { } // Create and destroy sub-container. - bundleDir, err := testutil.SetupBundleDir(specs[1]) + bundleDir, cleanupSub, err := testutil.SetupBundleDir(specs[1]) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(bundleDir) + defer cleanupSub() args := Args{ ID: ids[1], @@ -859,17 +864,16 @@ func TestMultiContainerDestroyStarting(t *testing.T) { specs, ids := createSpecs(cmds...) conf := testutil.TestConfig(t) - rootDir, rootBundleDir, err := testutil.SetupContainer(specs[0], conf) + rootDir, bundleDir, cleanup, err := testutil.SetupContainer(specs[0], conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(rootBundleDir) + defer cleanup() rootArgs := Args{ ID: ids[0], Spec: specs[0], - BundleDir: rootBundleDir, + BundleDir: bundleDir, } root, err := New(conf, rootArgs) if err != nil { @@ -886,16 +890,16 @@ func TestMultiContainerDestroyStarting(t *testing.T) { continue // skip root container } - bundleDir, err := testutil.SetupBundleDir(specs[i]) + bundleDir, cleanup, err := testutil.SetupBundleDir(specs[i]) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(bundleDir) + defer cleanup() rootArgs := Args{ ID: ids[i], Spec: specs[i], - BundleDir: rootBundleDir, + BundleDir: bundleDir, } cont, err := New(conf, rootArgs) if err != nil { @@ -937,11 +941,11 @@ func TestMultiContainerDifferentFilesystems(t *testing.T) { script := fmt.Sprintf("if [ -f %q ]; then exit 1; else touch %q; fi", filename, filename) cmd := []string{"sh", "-c", script} - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() conf := testutil.TestConfig(t) conf.RootDir = rootDir @@ -977,7 +981,7 @@ func TestMultiContainerDifferentFilesystems(t *testing.T) { // TestMultiContainerContainerDestroyStress tests that IO operations continue // to work after containers have been stopped and gofers killed. func TestMultiContainerContainerDestroyStress(t *testing.T) { - app, err := testutil.FindFile("runsc/container/test_app/test_app") + app, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } @@ -1007,12 +1011,11 @@ func TestMultiContainerContainerDestroyStress(t *testing.T) { childrenIDs := allIDs[1:] conf := testutil.TestConfig(t) - rootDir, bundleDir, err := testutil.SetupContainer(rootSpec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(rootSpec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Start root container. rootArgs := Args{ @@ -1038,11 +1041,11 @@ func TestMultiContainerContainerDestroyStress(t *testing.T) { var children []*Container for j, spec := range specs { - bundleDir, err := testutil.SetupBundleDir(spec) + bundleDir, cleanup, err := testutil.SetupBundleDir(spec) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(bundleDir) + defer cleanup() args := Args{ ID: ids[j], @@ -1080,306 +1083,306 @@ func TestMultiContainerContainerDestroyStress(t *testing.T) { // Test that pod shared mounts are properly mounted in 2 containers and that // changes from one container is reflected in the other. func TestMultiContainerSharedMount(t *testing.T) { - for _, conf := range configs(t, all...) { - t.Logf("Running test with conf: %+v", conf) - - rootDir, err := testutil.SetupRootDir() - if err != nil { - t.Fatalf("error creating root dir: %v", err) - } - defer os.RemoveAll(rootDir) - conf.RootDir = rootDir - - // Setup the containers. - sleep := []string{"sleep", "100"} - podSpec, ids := createSpecs(sleep, sleep) - mnt0 := specs.Mount{ - Destination: "/mydir/test", - Source: "/some/dir", - Type: "tmpfs", - Options: nil, - } - podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir + + // Setup the containers. + sleep := []string{"sleep", "100"} + podSpec, ids := createSpecs(sleep, sleep) + mnt0 := specs.Mount{ + Destination: "/mydir/test", + Source: "/some/dir", + Type: "tmpfs", + Options: nil, + } + podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) - mnt1 := mnt0 - mnt1.Destination = "/mydir2/test2" - podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1) + mnt1 := mnt0 + mnt1.Destination = "/mydir2/test2" + podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1) - createSharedMount(mnt0, "test-mount", podSpec...) + createSharedMount(mnt0, "test-mount", podSpec...) - containers, cleanup, err := startContainers(conf, podSpec, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + containers, cleanup, err := startContainers(conf, podSpec, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - file0 := path.Join(mnt0.Destination, "abc") - file1 := path.Join(mnt1.Destination, "abc") - execs := []execDesc{ - { - c: containers[0], - cmd: []string{"/usr/bin/test", "-d", mnt0.Destination}, - desc: "directory is mounted in container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "-d", mnt1.Destination}, - desc: "directory is mounted in container1", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/touch", file0}, - desc: "create file in container0", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/test", "-f", file0}, - desc: "file appears in container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "-f", file1}, - desc: "file appears in container1", - }, - { - c: containers[1], - cmd: []string{"/bin/rm", file1}, - desc: "file removed from container1", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/test", "!", "-f", file0}, - desc: "file removed from container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "!", "-f", file1}, - desc: "file removed from container1", - }, - { - c: containers[1], - cmd: []string{"/bin/mkdir", file1}, - desc: "create directory in container1", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/test", "-d", file0}, - desc: "dir appears in container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "-d", file1}, - desc: "dir appears in container1", - }, - { - c: containers[0], - cmd: []string{"/bin/rmdir", file0}, - desc: "create directory in container0", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/test", "!", "-d", file0}, - desc: "dir removed from container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "!", "-d", file1}, - desc: "dir removed from container1", - }, - } - if err := execMany(execs); err != nil { - t.Fatal(err.Error()) - } + file0 := path.Join(mnt0.Destination, "abc") + file1 := path.Join(mnt1.Destination, "abc") + execs := []execDesc{ + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-d", mnt0.Destination}, + desc: "directory is mounted in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-d", mnt1.Destination}, + desc: "directory is mounted in container1", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/touch", file0}, + desc: "create file in container0", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-f", file0}, + desc: "file appears in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-f", file1}, + desc: "file appears in container1", + }, + { + c: containers[1], + cmd: []string{"/bin/rm", file1}, + desc: "file removed from container1", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "!", "-f", file0}, + desc: "file removed from container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "!", "-f", file1}, + desc: "file removed from container1", + }, + { + c: containers[1], + cmd: []string{"/bin/mkdir", file1}, + desc: "create directory in container1", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-d", file0}, + desc: "dir appears in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-d", file1}, + desc: "dir appears in container1", + }, + { + c: containers[0], + cmd: []string{"/bin/rmdir", file0}, + desc: "create directory in container0", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "!", "-d", file0}, + desc: "dir removed from container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "!", "-d", file1}, + desc: "dir removed from container1", + }, + } + if err := execMany(execs); err != nil { + t.Fatal(err.Error()) + } + }) } } // Test that pod mounts are mounted as readonly when requested. func TestMultiContainerSharedMountReadonly(t *testing.T) { - for _, conf := range configs(t, all...) { - t.Logf("Running test with conf: %+v", conf) - - rootDir, err := testutil.SetupRootDir() - if err != nil { - t.Fatalf("error creating root dir: %v", err) - } - defer os.RemoveAll(rootDir) - conf.RootDir = rootDir - - // Setup the containers. - sleep := []string{"sleep", "100"} - podSpec, ids := createSpecs(sleep, sleep) - mnt0 := specs.Mount{ - Destination: "/mydir/test", - Source: "/some/dir", - Type: "tmpfs", - Options: []string{"ro"}, - } - podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir + + // Setup the containers. + sleep := []string{"sleep", "100"} + podSpec, ids := createSpecs(sleep, sleep) + mnt0 := specs.Mount{ + Destination: "/mydir/test", + Source: "/some/dir", + Type: "tmpfs", + Options: []string{"ro"}, + } + podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) - mnt1 := mnt0 - mnt1.Destination = "/mydir2/test2" - podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1) + mnt1 := mnt0 + mnt1.Destination = "/mydir2/test2" + podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1) - createSharedMount(mnt0, "test-mount", podSpec...) + createSharedMount(mnt0, "test-mount", podSpec...) - containers, cleanup, err := startContainers(conf, podSpec, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + containers, cleanup, err := startContainers(conf, podSpec, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - file0 := path.Join(mnt0.Destination, "abc") - file1 := path.Join(mnt1.Destination, "abc") - execs := []execDesc{ - { - c: containers[0], - cmd: []string{"/usr/bin/test", "-d", mnt0.Destination}, - desc: "directory is mounted in container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "-d", mnt1.Destination}, - desc: "directory is mounted in container1", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/touch", file0}, - want: 1, - desc: "fails to write to container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/touch", file1}, - want: 1, - desc: "fails to write to container1", - }, - } - if err := execMany(execs); err != nil { - t.Fatal(err.Error()) - } + file0 := path.Join(mnt0.Destination, "abc") + file1 := path.Join(mnt1.Destination, "abc") + execs := []execDesc{ + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-d", mnt0.Destination}, + desc: "directory is mounted in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-d", mnt1.Destination}, + desc: "directory is mounted in container1", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/touch", file0}, + want: 1, + desc: "fails to write to container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/touch", file1}, + want: 1, + desc: "fails to write to container1", + }, + } + if err := execMany(execs); err != nil { + t.Fatal(err.Error()) + } + }) } } // Test that shared pod mounts continue to work after container is restarted. func TestMultiContainerSharedMountRestart(t *testing.T) { - for _, conf := range configs(t, all...) { - t.Logf("Running test with conf: %+v", conf) - - rootDir, err := testutil.SetupRootDir() - if err != nil { - t.Fatalf("error creating root dir: %v", err) - } - defer os.RemoveAll(rootDir) - conf.RootDir = rootDir - - // Setup the containers. - sleep := []string{"sleep", "100"} - podSpec, ids := createSpecs(sleep, sleep) - mnt0 := specs.Mount{ - Destination: "/mydir/test", - Source: "/some/dir", - Type: "tmpfs", - Options: nil, - } - podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir + + // Setup the containers. + sleep := []string{"sleep", "100"} + podSpec, ids := createSpecs(sleep, sleep) + mnt0 := specs.Mount{ + Destination: "/mydir/test", + Source: "/some/dir", + Type: "tmpfs", + Options: nil, + } + podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) - mnt1 := mnt0 - mnt1.Destination = "/mydir2/test2" - podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1) + mnt1 := mnt0 + mnt1.Destination = "/mydir2/test2" + podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1) - createSharedMount(mnt0, "test-mount", podSpec...) + createSharedMount(mnt0, "test-mount", podSpec...) - containers, cleanup, err := startContainers(conf, podSpec, ids) - if err != nil { - t.Fatalf("error starting containers: %v", err) - } - defer cleanup() + containers, cleanup, err := startContainers(conf, podSpec, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() - file0 := path.Join(mnt0.Destination, "abc") - file1 := path.Join(mnt1.Destination, "abc") - execs := []execDesc{ - { - c: containers[0], - cmd: []string{"/usr/bin/touch", file0}, - desc: "create file in container0", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/test", "-f", file0}, - desc: "file appears in container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "-f", file1}, - desc: "file appears in container1", - }, - } - if err := execMany(execs); err != nil { - t.Fatal(err.Error()) - } + file0 := path.Join(mnt0.Destination, "abc") + file1 := path.Join(mnt1.Destination, "abc") + execs := []execDesc{ + { + c: containers[0], + cmd: []string{"/usr/bin/touch", file0}, + desc: "create file in container0", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-f", file0}, + desc: "file appears in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-f", file1}, + desc: "file appears in container1", + }, + } + if err := execMany(execs); err != nil { + t.Fatal(err.Error()) + } - containers[1].Destroy() + containers[1].Destroy() - bundleDir, err := testutil.SetupBundleDir(podSpec[1]) - if err != nil { - t.Fatalf("error restarting container: %v", err) - } - defer os.RemoveAll(bundleDir) + bundleDir, cleanup, err := testutil.SetupBundleDir(podSpec[1]) + if err != nil { + t.Fatalf("error restarting container: %v", err) + } + defer cleanup() - args := Args{ - ID: ids[1], - Spec: podSpec[1], - BundleDir: bundleDir, - } - containers[1], err = New(conf, args) - if err != nil { - t.Fatalf("error creating container: %v", err) - } - if err := containers[1].Start(conf); err != nil { - t.Fatalf("error starting container: %v", err) - } + args := Args{ + ID: ids[1], + Spec: podSpec[1], + BundleDir: bundleDir, + } + containers[1], err = New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + if err := containers[1].Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } - execs = []execDesc{ - { - c: containers[0], - cmd: []string{"/usr/bin/test", "-f", file0}, - desc: "file is still in container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "-f", file1}, - desc: "file is still in container1", - }, - { - c: containers[1], - cmd: []string{"/bin/rm", file1}, - desc: "file removed from container1", - }, - { - c: containers[0], - cmd: []string{"/usr/bin/test", "!", "-f", file0}, - desc: "file removed from container0", - }, - { - c: containers[1], - cmd: []string{"/usr/bin/test", "!", "-f", file1}, - desc: "file removed from container1", - }, - } - if err := execMany(execs); err != nil { - t.Fatal(err.Error()) - } + execs = []execDesc{ + { + c: containers[0], + cmd: []string{"/usr/bin/test", "-f", file0}, + desc: "file is still in container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "-f", file1}, + desc: "file is still in container1", + }, + { + c: containers[1], + cmd: []string{"/bin/rm", file1}, + desc: "file removed from container1", + }, + { + c: containers[0], + cmd: []string{"/usr/bin/test", "!", "-f", file0}, + desc: "file removed from container0", + }, + { + c: containers[1], + cmd: []string{"/usr/bin/test", "!", "-f", file1}, + desc: "file removed from container1", + }, + } + if err := execMany(execs); err != nil { + t.Fatal(err.Error()) + } + }) } } // Test that unsupported pod mounts options are ignored when matching master and // slave mounts. func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) { - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() conf := testutil.TestConfig(t) conf.RootDir = rootDir @@ -1428,7 +1431,7 @@ func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) { // Test that one container can send an FD to another container, even though // they have distinct MountNamespaces. func TestMultiContainerMultiRootCanHandleFDs(t *testing.T) { - app, err := testutil.FindFile("runsc/container/test_app/test_app") + app, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } @@ -1457,11 +1460,11 @@ func TestMultiContainerMultiRootCanHandleFDs(t *testing.T) { Type: "tmpfs", } - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() conf := testutil.TestConfig(t) conf.RootDir = rootDir @@ -1494,11 +1497,11 @@ func TestMultiContainerMultiRootCanHandleFDs(t *testing.T) { // Test that container is destroyed when Gofer is killed. func TestMultiContainerGoferKilled(t *testing.T) { - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() conf := testutil.TestConfig(t) conf.RootDir = rootDir @@ -1581,11 +1584,11 @@ func TestMultiContainerLoadSandbox(t *testing.T) { sleep := []string{"sleep", "100"} specs, ids := createSpecs(sleep, sleep, sleep) - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() conf := testutil.TestConfig(t) conf.RootDir = rootDir @@ -1614,7 +1617,7 @@ func TestMultiContainerLoadSandbox(t *testing.T) { } // Create a valid but empty container directory. - randomCID := testutil.UniqueContainerID() + randomCID := testutil.RandomContainerID() dir = filepath.Join(conf.RootDir, randomCID) if err := os.MkdirAll(dir, 0755); err != nil { t.Fatalf("os.MkdirAll(%q)=%v", dir, err) @@ -1681,11 +1684,11 @@ func TestMultiContainerRunNonRoot(t *testing.T) { Type: "bind", }) - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() conf := testutil.TestConfig(t) conf.RootDir = rootDir diff --git a/runsc/container/shared_volume_test.go b/runsc/container/shared_volume_test.go index f80852414..bac177a88 100644 --- a/runsc/container/shared_volume_test.go +++ b/runsc/container/shared_volume_test.go @@ -24,8 +24,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/runsc/boot" - "gvisor.dev/gvisor/runsc/testutil" ) // TestSharedVolume checks that modifications to a volume mount are propagated @@ -33,7 +33,6 @@ import ( func TestSharedVolume(t *testing.T) { conf := testutil.TestConfig(t) conf.FileAccess = boot.FileAccessShared - t.Logf("Running test with conf: %+v", conf) // Main process just sleeps. We will use "exec" to probe the state of // the filesystem. @@ -44,16 +43,15 @@ func TestSharedVolume(t *testing.T) { t.Fatalf("TempDir failed: %v", err) } - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } @@ -192,7 +190,6 @@ func checkFile(c *Container, filename string, want []byte) error { func TestSharedVolumeFile(t *testing.T) { conf := testutil.TestConfig(t) conf.FileAccess = boot.FileAccessShared - t.Logf("Running test with conf: %+v", conf) // Main process just sleeps. We will use "exec" to probe the state of // the filesystem. @@ -203,16 +200,15 @@ func TestSharedVolumeFile(t *testing.T) { t.Fatalf("TempDir failed: %v", err) } - rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("error setting up container: %v", err) } - defer os.RemoveAll(rootDir) - defer os.RemoveAll(bundleDir) + defer cleanup() // Create and start the container. args := Args{ - ID: testutil.UniqueContainerID(), + ID: testutil.RandomContainerID(), Spec: spec, BundleDir: bundleDir, } diff --git a/runsc/container/test_app/BUILD b/runsc/container/test_app/BUILD deleted file mode 100644 index 0defbd9fc..000000000 --- a/runsc/container/test_app/BUILD +++ /dev/null @@ -1,21 +0,0 @@ -load("//tools:defs.bzl", "go_binary") - -package(licenses = ["notice"]) - -go_binary( - name = "test_app", - testonly = 1, - srcs = [ - "fds.go", - "test_app.go", - ], - pure = True, - visibility = ["//runsc/container:__pkg__"], - deps = [ - "//pkg/unet", - "//runsc/flag", - "//runsc/testutil", - "@com_github_google_subcommands//:go_default_library", - "@com_github_kr_pty//:go_default_library", - ], -) diff --git a/runsc/container/test_app/fds.go b/runsc/container/test_app/fds.go deleted file mode 100644 index 2a146a2c3..000000000 --- a/runsc/container/test_app/fds.go +++ /dev/null @@ -1,185 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "context" - "io/ioutil" - "log" - "os" - "time" - - "github.com/google/subcommands" - "gvisor.dev/gvisor/pkg/unet" - "gvisor.dev/gvisor/runsc/flag" - "gvisor.dev/gvisor/runsc/testutil" -) - -const fileContents = "foobarbaz" - -// fdSender will open a file and send the FD over a unix domain socket. -type fdSender struct { - socketPath string -} - -// Name implements subcommands.Command.Name. -func (*fdSender) Name() string { - return "fd_sender" -} - -// Synopsis implements subcommands.Command.Synopsys. -func (*fdSender) Synopsis() string { - return "creates a file and sends the FD over the socket" -} - -// Usage implements subcommands.Command.Usage. -func (*fdSender) Usage() string { - return "fd_sender " -} - -// SetFlags implements subcommands.Command.SetFlags. -func (fds *fdSender) SetFlags(f *flag.FlagSet) { - f.StringVar(&fds.socketPath, "socket", "", "path to socket") -} - -// Execute implements subcommands.Command.Execute. -func (fds *fdSender) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - if fds.socketPath == "" { - log.Fatalf("socket flag must be set") - } - - dir, err := ioutil.TempDir("", "") - if err != nil { - log.Fatalf("TempDir failed: %v", err) - } - - fileToSend, err := ioutil.TempFile(dir, "") - if err != nil { - log.Fatalf("TempFile failed: %v", err) - } - defer fileToSend.Close() - - if _, err := fileToSend.WriteString(fileContents); err != nil { - log.Fatalf("Write(%q) failed: %v", fileContents, err) - } - - // Receiver may not be started yet, so try connecting in a poll loop. - var s *unet.Socket - if err := testutil.Poll(func() error { - var err error - s, err = unet.Connect(fds.socketPath, true /* SEQPACKET, so we can send empty message with FD */) - return err - }, 10*time.Second); err != nil { - log.Fatalf("Error connecting to socket %q: %v", fds.socketPath, err) - } - defer s.Close() - - w := s.Writer(true) - w.ControlMessage.PackFDs(int(fileToSend.Fd())) - if _, err := w.WriteVec([][]byte{[]byte{'a'}}); err != nil { - log.Fatalf("Error sending FD %q over socket %q: %v", fileToSend.Fd(), fds.socketPath, err) - } - - log.Print("FD SENDER exiting successfully") - return subcommands.ExitSuccess -} - -// fdReceiver receives an FD from a unix domain socket and does things to it. -type fdReceiver struct { - socketPath string -} - -// Name implements subcommands.Command.Name. -func (*fdReceiver) Name() string { - return "fd_receiver" -} - -// Synopsis implements subcommands.Command.Synopsys. -func (*fdReceiver) Synopsis() string { - return "reads an FD from a unix socket, and then does things to it" -} - -// Usage implements subcommands.Command.Usage. -func (*fdReceiver) Usage() string { - return "fd_receiver " -} - -// SetFlags implements subcommands.Command.SetFlags. -func (fdr *fdReceiver) SetFlags(f *flag.FlagSet) { - f.StringVar(&fdr.socketPath, "socket", "", "path to socket") -} - -// Execute implements subcommands.Command.Execute. -func (fdr *fdReceiver) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - if fdr.socketPath == "" { - log.Fatalf("Flags cannot be empty, given: socket: %q", fdr.socketPath) - } - - ss, err := unet.BindAndListen(fdr.socketPath, true /* packet */) - if err != nil { - log.Fatalf("BindAndListen(%q) failed: %v", fdr.socketPath, err) - } - defer ss.Close() - - var s *unet.Socket - c := make(chan error, 1) - go func() { - var err error - s, err = ss.Accept() - c <- err - }() - - select { - case err := <-c: - if err != nil { - log.Fatalf("Accept() failed: %v", err) - } - case <-time.After(10 * time.Second): - log.Fatalf("Timeout waiting for accept") - } - - r := s.Reader(true) - r.EnableFDs(1) - b := [][]byte{{'a'}} - if n, err := r.ReadVec(b); n != 1 || err != nil { - log.Fatalf("ReadVec got n=%d err %v (wanted 0, nil)", n, err) - } - - fds, err := r.ExtractFDs() - if err != nil { - log.Fatalf("ExtractFD() got err %v", err) - } - if len(fds) != 1 { - log.Fatalf("ExtractFD() got %d FDs, wanted 1", len(fds)) - } - fd := fds[0] - - file := os.NewFile(uintptr(fd), "received file") - defer file.Close() - if _, err := file.Seek(0, os.SEEK_SET); err != nil { - log.Fatalf("Seek(0, 0) failed: %v", err) - } - - got, err := ioutil.ReadAll(file) - if err != nil { - log.Fatalf("ReadAll failed: %v", err) - } - if string(got) != fileContents { - log.Fatalf("ReadAll got %q want %q", string(got), fileContents) - } - - log.Print("FD RECEIVER exiting successfully") - return subcommands.ExitSuccess -} diff --git a/runsc/container/test_app/test_app.go b/runsc/container/test_app/test_app.go deleted file mode 100644 index 5f1c4b7d6..000000000 --- a/runsc/container/test_app/test_app.go +++ /dev/null @@ -1,394 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Binary test_app is like a swiss knife for tests that need to run anything -// inside the sandbox. New functionality can be added with new commands. -package main - -import ( - "context" - "fmt" - "io" - "io/ioutil" - "log" - "net" - "os" - "os/exec" - "regexp" - "strconv" - sys "syscall" - "time" - - "github.com/google/subcommands" - "github.com/kr/pty" - "gvisor.dev/gvisor/runsc/flag" - "gvisor.dev/gvisor/runsc/testutil" -) - -func main() { - subcommands.Register(subcommands.HelpCommand(), "") - subcommands.Register(subcommands.FlagsCommand(), "") - subcommands.Register(new(capability), "") - subcommands.Register(new(fdReceiver), "") - subcommands.Register(new(fdSender), "") - subcommands.Register(new(forkBomb), "") - subcommands.Register(new(ptyRunner), "") - subcommands.Register(new(reaper), "") - subcommands.Register(new(syscall), "") - subcommands.Register(new(taskTree), "") - subcommands.Register(new(uds), "") - - flag.Parse() - - exitCode := subcommands.Execute(context.Background()) - os.Exit(int(exitCode)) -} - -type uds struct { - fileName string - socketPath string -} - -// Name implements subcommands.Command.Name. -func (*uds) Name() string { - return "uds" -} - -// Synopsis implements subcommands.Command.Synopsys. -func (*uds) Synopsis() string { - return "creates unix domain socket client and server. Client sends a contant flow of sequential numbers. Server prints them to --file" -} - -// Usage implements subcommands.Command.Usage. -func (*uds) Usage() string { - return "uds " -} - -// SetFlags implements subcommands.Command.SetFlags. -func (c *uds) SetFlags(f *flag.FlagSet) { - f.StringVar(&c.fileName, "file", "", "name of output file") - f.StringVar(&c.socketPath, "socket", "", "path to socket") -} - -// Execute implements subcommands.Command.Execute. -func (c *uds) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - if c.fileName == "" || c.socketPath == "" { - log.Fatalf("Flags cannot be empty, given: fileName: %q, socketPath: %q", c.fileName, c.socketPath) - return subcommands.ExitFailure - } - outputFile, err := os.OpenFile(c.fileName, os.O_WRONLY|os.O_CREATE, 0666) - if err != nil { - log.Fatal("error opening output file:", err) - } - - defer os.Remove(c.socketPath) - - listener, err := net.Listen("unix", c.socketPath) - if err != nil { - log.Fatalf("error listening on socket %q: %v", c.socketPath, err) - } - - go server(listener, outputFile) - for i := 0; ; i++ { - conn, err := net.Dial("unix", c.socketPath) - if err != nil { - log.Fatal("error dialing:", err) - } - if _, err := conn.Write([]byte(strconv.Itoa(i))); err != nil { - log.Fatal("error writing:", err) - } - conn.Close() - time.Sleep(100 * time.Millisecond) - } -} - -func server(listener net.Listener, out *os.File) { - buf := make([]byte, 16) - - for { - c, err := listener.Accept() - if err != nil { - log.Fatal("error accepting connection:", err) - } - nr, err := c.Read(buf) - if err != nil { - log.Fatal("error reading from buf:", err) - } - data := buf[0:nr] - fmt.Fprint(out, string(data)+"\n") - } -} - -type taskTree struct { - depth int - width int - pause bool -} - -// Name implements subcommands.Command. -func (*taskTree) Name() string { - return "task-tree" -} - -// Synopsis implements subcommands.Command. -func (*taskTree) Synopsis() string { - return "creates a tree of tasks" -} - -// Usage implements subcommands.Command. -func (*taskTree) Usage() string { - return "task-tree " -} - -// SetFlags implements subcommands.Command. -func (c *taskTree) SetFlags(f *flag.FlagSet) { - f.IntVar(&c.depth, "depth", 1, "number of levels to create") - f.IntVar(&c.width, "width", 1, "number of tasks at each level") - f.BoolVar(&c.pause, "pause", false, "whether the tasks should pause perpetually") -} - -// Execute implements subcommands.Command. -func (c *taskTree) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - stop := testutil.StartReaper() - defer stop() - - if c.depth == 0 { - log.Printf("Child sleeping, PID: %d\n", os.Getpid()) - select {} - } - log.Printf("Parent %d sleeping, PID: %d\n", c.depth, os.Getpid()) - - var cmds []*exec.Cmd - for i := 0; i < c.width; i++ { - cmd := exec.Command( - "/proc/self/exe", c.Name(), - "--depth", strconv.Itoa(c.depth-1), - "--width", strconv.Itoa(c.width), - "--pause", strconv.FormatBool(c.pause)) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - - if err := cmd.Start(); err != nil { - log.Fatal("failed to call self:", err) - } - cmds = append(cmds, cmd) - } - - for _, c := range cmds { - c.Wait() - } - - if c.pause { - select {} - } - - return subcommands.ExitSuccess -} - -type forkBomb struct { - delay time.Duration -} - -// Name implements subcommands.Command. -func (*forkBomb) Name() string { - return "fork-bomb" -} - -// Synopsis implements subcommands.Command. -func (*forkBomb) Synopsis() string { - return "creates child process until the end of times" -} - -// Usage implements subcommands.Command. -func (*forkBomb) Usage() string { - return "fork-bomb " -} - -// SetFlags implements subcommands.Command. -func (c *forkBomb) SetFlags(f *flag.FlagSet) { - f.DurationVar(&c.delay, "delay", 100*time.Millisecond, "amount of time to delay creation of child") -} - -// Execute implements subcommands.Command. -func (c *forkBomb) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - time.Sleep(c.delay) - - cmd := exec.Command("/proc/self/exe", c.Name()) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - if err := cmd.Run(); err != nil { - log.Fatal("failed to call self:", err) - } - return subcommands.ExitSuccess -} - -type reaper struct{} - -// Name implements subcommands.Command. -func (*reaper) Name() string { - return "reaper" -} - -// Synopsis implements subcommands.Command. -func (*reaper) Synopsis() string { - return "reaps all children in a loop" -} - -// Usage implements subcommands.Command. -func (*reaper) Usage() string { - return "reaper " -} - -// SetFlags implements subcommands.Command. -func (*reaper) SetFlags(*flag.FlagSet) {} - -// Execute implements subcommands.Command. -func (c *reaper) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - stop := testutil.StartReaper() - defer stop() - select {} -} - -type syscall struct { - sysno uint64 -} - -// Name implements subcommands.Command. -func (*syscall) Name() string { - return "syscall" -} - -// Synopsis implements subcommands.Command. -func (*syscall) Synopsis() string { - return "syscall makes a syscall" -} - -// Usage implements subcommands.Command. -func (*syscall) Usage() string { - return "syscall " -} - -// SetFlags implements subcommands.Command. -func (s *syscall) SetFlags(f *flag.FlagSet) { - f.Uint64Var(&s.sysno, "syscall", 0, "syscall to call") -} - -// Execute implements subcommands.Command. -func (s *syscall) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - if _, _, errno := sys.Syscall(uintptr(s.sysno), 0, 0, 0); errno != 0 { - fmt.Printf("syscall(%d, 0, 0...) failed: %v\n", s.sysno, errno) - } else { - fmt.Printf("syscall(%d, 0, 0...) success\n", s.sysno) - } - return subcommands.ExitSuccess -} - -type capability struct { - enabled uint64 - disabled uint64 -} - -// Name implements subcommands.Command. -func (*capability) Name() string { - return "capability" -} - -// Synopsis implements subcommands.Command. -func (*capability) Synopsis() string { - return "checks if effective capabilities are set/unset" -} - -// Usage implements subcommands.Command. -func (*capability) Usage() string { - return "capability [--enabled=number] [--disabled=number]" -} - -// SetFlags implements subcommands.Command. -func (c *capability) SetFlags(f *flag.FlagSet) { - f.Uint64Var(&c.enabled, "enabled", 0, "") - f.Uint64Var(&c.disabled, "disabled", 0, "") -} - -// Execute implements subcommands.Command. -func (c *capability) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - if c.enabled == 0 && c.disabled == 0 { - fmt.Println("One of the flags must be set") - return subcommands.ExitUsageError - } - - status, err := ioutil.ReadFile("/proc/self/status") - if err != nil { - fmt.Printf("Error reading %q: %v\n", "proc/self/status", err) - return subcommands.ExitFailure - } - re := regexp.MustCompile("CapEff:\t([0-9a-f]+)\n") - matches := re.FindStringSubmatch(string(status)) - if matches == nil || len(matches) != 2 { - fmt.Printf("Effective capabilities not found in\n%s\n", status) - return subcommands.ExitFailure - } - caps, err := strconv.ParseUint(matches[1], 16, 64) - if err != nil { - fmt.Printf("failed to convert capabilities %q: %v\n", matches[1], err) - return subcommands.ExitFailure - } - - if c.enabled != 0 && (caps&c.enabled) != c.enabled { - fmt.Printf("Missing capabilities, want: %#x: got: %#x\n", c.enabled, caps) - return subcommands.ExitFailure - } - if c.disabled != 0 && (caps&c.disabled) != 0 { - fmt.Printf("Extra capabilities found, dont_want: %#x: got: %#x\n", c.disabled, caps) - return subcommands.ExitFailure - } - - return subcommands.ExitSuccess -} - -type ptyRunner struct{} - -// Name implements subcommands.Command. -func (*ptyRunner) Name() string { - return "pty-runner" -} - -// Synopsis implements subcommands.Command. -func (*ptyRunner) Synopsis() string { - return "runs the given command with an open pty terminal" -} - -// Usage implements subcommands.Command. -func (*ptyRunner) Usage() string { - return "pty-runner [command]" -} - -// SetFlags implements subcommands.Command.SetFlags. -func (*ptyRunner) SetFlags(f *flag.FlagSet) {} - -// Execute implements subcommands.Command. -func (*ptyRunner) Execute(_ context.Context, fs *flag.FlagSet, _ ...interface{}) subcommands.ExitStatus { - c := exec.Command(fs.Args()[0], fs.Args()[1:]...) - f, err := pty.Start(c) - if err != nil { - fmt.Printf("pty.Start failed: %v", err) - return subcommands.ExitFailure - } - defer f.Close() - - // Copy stdout from the command to keep this process alive until the - // subprocess exits. - io.Copy(os.Stdout, f) - - return subcommands.ExitSuccess -} diff --git a/runsc/criutil/BUILD b/runsc/criutil/BUILD deleted file mode 100644 index 8a571a000..000000000 --- a/runsc/criutil/BUILD +++ /dev/null @@ -1,11 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "criutil", - testonly = 1, - srcs = ["criutil.go"], - visibility = ["//:sandbox"], - deps = ["//runsc/testutil"], -) diff --git a/runsc/criutil/criutil.go b/runsc/criutil/criutil.go deleted file mode 100644 index 773f5a1c4..000000000 --- a/runsc/criutil/criutil.go +++ /dev/null @@ -1,277 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package criutil contains utility functions for interacting with the -// Container Runtime Interface (CRI), principally via the crictl command line -// tool. This requires critools to be installed on the local system. -package criutil - -import ( - "encoding/json" - "fmt" - "os" - "os/exec" - "strings" - "time" - - "gvisor.dev/gvisor/runsc/testutil" -) - -const endpointPrefix = "unix://" - -// Crictl contains information required to run the crictl utility. -type Crictl struct { - executable string - timeout time.Duration - imageEndpoint string - runtimeEndpoint string -} - -// NewCrictl returns a Crictl configured with a timeout and an endpoint over -// which it will talk to containerd. -func NewCrictl(timeout time.Duration, endpoint string) *Crictl { - // Bazel doesn't pass PATH through, assume the location of crictl - // unless specified by environment variable. - executable := os.Getenv("CRICTL_PATH") - if executable == "" { - executable = "/usr/local/bin/crictl" - } - return &Crictl{ - executable: executable, - timeout: timeout, - imageEndpoint: endpointPrefix + endpoint, - runtimeEndpoint: endpointPrefix + endpoint, - } -} - -// Pull pulls an container image. It corresponds to `crictl pull`. -func (cc *Crictl) Pull(imageName string) error { - _, err := cc.run("pull", imageName) - return err -} - -// RunPod creates a sandbox. It corresponds to `crictl runp`. -func (cc *Crictl) RunPod(sbSpecFile string) (string, error) { - podID, err := cc.run("runp", sbSpecFile) - if err != nil { - return "", fmt.Errorf("runp failed: %v", err) - } - // Strip the trailing newline from crictl output. - return strings.TrimSpace(podID), nil -} - -// Create creates a container within a sandbox. It corresponds to `crictl -// create`. -func (cc *Crictl) Create(podID, contSpecFile, sbSpecFile string) (string, error) { - podID, err := cc.run("create", podID, contSpecFile, sbSpecFile) - if err != nil { - return "", fmt.Errorf("create failed: %v", err) - } - // Strip the trailing newline from crictl output. - return strings.TrimSpace(podID), nil -} - -// Start starts a container. It corresponds to `crictl start`. -func (cc *Crictl) Start(contID string) (string, error) { - output, err := cc.run("start", contID) - if err != nil { - return "", fmt.Errorf("start failed: %v", err) - } - return output, nil -} - -// Stop stops a container. It corresponds to `crictl stop`. -func (cc *Crictl) Stop(contID string) error { - _, err := cc.run("stop", contID) - return err -} - -// Exec execs a program inside a container. It corresponds to `crictl exec`. -func (cc *Crictl) Exec(contID string, args ...string) (string, error) { - a := []string{"exec", contID} - a = append(a, args...) - output, err := cc.run(a...) - if err != nil { - return "", fmt.Errorf("exec failed: %v", err) - } - return output, nil -} - -// Rm removes a container. It corresponds to `crictl rm`. -func (cc *Crictl) Rm(contID string) error { - _, err := cc.run("rm", contID) - return err -} - -// StopPod stops a pod. It corresponds to `crictl stopp`. -func (cc *Crictl) StopPod(podID string) error { - _, err := cc.run("stopp", podID) - return err -} - -// containsConfig is a minimal copy of -// https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/apis/cri/runtime/v1alpha2/api.proto -// It only contains fields needed for testing. -type containerConfig struct { - Status containerStatus -} - -type containerStatus struct { - Network containerNetwork -} - -type containerNetwork struct { - IP string -} - -// PodIP returns a pod's IP address. -func (cc *Crictl) PodIP(podID string) (string, error) { - output, err := cc.run("inspectp", podID) - if err != nil { - return "", err - } - conf := &containerConfig{} - if err := json.Unmarshal([]byte(output), conf); err != nil { - return "", fmt.Errorf("failed to unmarshal JSON: %v, %s", err, output) - } - if conf.Status.Network.IP == "" { - return "", fmt.Errorf("no IP found in config: %s", output) - } - return conf.Status.Network.IP, nil -} - -// RmPod removes a container. It corresponds to `crictl rmp`. -func (cc *Crictl) RmPod(podID string) error { - _, err := cc.run("rmp", podID) - return err -} - -// StartContainer pulls the given image ands starts the container in the -// sandbox with the given podID. -func (cc *Crictl) StartContainer(podID, image, sbSpec, contSpec string) (string, error) { - // Write the specs to files that can be read by crictl. - sbSpecFile, err := testutil.WriteTmpFile("sbSpec", sbSpec) - if err != nil { - return "", fmt.Errorf("failed to write sandbox spec: %v", err) - } - contSpecFile, err := testutil.WriteTmpFile("contSpec", contSpec) - if err != nil { - return "", fmt.Errorf("failed to write container spec: %v", err) - } - - return cc.startContainer(podID, image, sbSpecFile, contSpecFile) -} - -func (cc *Crictl) startContainer(podID, image, sbSpecFile, contSpecFile string) (string, error) { - if err := cc.Pull(image); err != nil { - return "", fmt.Errorf("failed to pull %s: %v", image, err) - } - - contID, err := cc.Create(podID, contSpecFile, sbSpecFile) - if err != nil { - return "", fmt.Errorf("failed to create container in pod %q: %v", podID, err) - } - - if _, err := cc.Start(contID); err != nil { - return "", fmt.Errorf("failed to start container %q in pod %q: %v", contID, podID, err) - } - - return contID, nil -} - -// StopContainer stops and deletes the container with the given container ID. -func (cc *Crictl) StopContainer(contID string) error { - if err := cc.Stop(contID); err != nil { - return fmt.Errorf("failed to stop container %q: %v", contID, err) - } - - if err := cc.Rm(contID); err != nil { - return fmt.Errorf("failed to remove container %q: %v", contID, err) - } - - return nil -} - -// StartPodAndContainer pulls an image, then starts a sandbox and container in -// that sandbox. It returns the pod ID and container ID. -func (cc *Crictl) StartPodAndContainer(image, sbSpec, contSpec string) (string, string, error) { - // Write the specs to files that can be read by crictl. - sbSpecFile, err := testutil.WriteTmpFile("sbSpec", sbSpec) - if err != nil { - return "", "", fmt.Errorf("failed to write sandbox spec: %v", err) - } - contSpecFile, err := testutil.WriteTmpFile("contSpec", contSpec) - if err != nil { - return "", "", fmt.Errorf("failed to write container spec: %v", err) - } - - podID, err := cc.RunPod(sbSpecFile) - if err != nil { - return "", "", err - } - - contID, err := cc.startContainer(podID, image, sbSpecFile, contSpecFile) - - return podID, contID, err -} - -// StopPodAndContainer stops a container and pod. -func (cc *Crictl) StopPodAndContainer(podID, contID string) error { - if err := cc.StopContainer(contID); err != nil { - return fmt.Errorf("failed to stop container %q in pod %q: %v", contID, podID, err) - } - - if err := cc.StopPod(podID); err != nil { - return fmt.Errorf("failed to stop pod %q: %v", podID, err) - } - - if err := cc.RmPod(podID); err != nil { - return fmt.Errorf("failed to remove pod %q: %v", podID, err) - } - - return nil -} - -// run runs crictl with the given args and returns an error if it takes longer -// than cc.Timeout to run. -func (cc *Crictl) run(args ...string) (string, error) { - defaultArgs := []string{ - "--image-endpoint", cc.imageEndpoint, - "--runtime-endpoint", cc.runtimeEndpoint, - } - cmd := exec.Command(cc.executable, append(defaultArgs, args...)...) - - // Run the command with a timeout. - done := make(chan string) - errCh := make(chan error) - go func() { - output, err := cmd.CombinedOutput() - if err != nil { - errCh <- fmt.Errorf("error: \"%v\", output: %s", err, string(output)) - return - } - done <- string(output) - }() - select { - case output := <-done: - return output, nil - case err := <-errCh: - return "", err - case <-time.After(cc.timeout): - if err := testutil.KillCommand(cmd); err != nil { - return "", fmt.Errorf("timed out, then couldn't kill process %+v: %v", cmd, err) - } - return "", fmt.Errorf("timed out: %+v", cmd) - } -} diff --git a/runsc/dockerutil/BUILD b/runsc/dockerutil/BUILD deleted file mode 100644 index 8621af901..000000000 --- a/runsc/dockerutil/BUILD +++ /dev/null @@ -1,14 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "dockerutil", - testonly = 1, - srcs = ["dockerutil.go"], - visibility = ["//:sandbox"], - deps = [ - "//runsc/testutil", - "@com_github_kr_pty//:go_default_library", - ], -) diff --git a/runsc/dockerutil/dockerutil.go b/runsc/dockerutil/dockerutil.go deleted file mode 100644 index f009486bc..000000000 --- a/runsc/dockerutil/dockerutil.go +++ /dev/null @@ -1,486 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package dockerutil is a collection of utility functions, primarily for -// testing. -package dockerutil - -import ( - "encoding/json" - "flag" - "fmt" - "io/ioutil" - "log" - "os" - "os/exec" - "path" - "regexp" - "strconv" - "strings" - "syscall" - "time" - - "github.com/kr/pty" - "gvisor.dev/gvisor/runsc/testutil" -) - -var ( - // runtime is the runtime to use for tests. This will be applied to all - // containers. Note that the default here ("runsc") corresponds to the - // default used by the installations. This is important, because the - // default installer for vm_tests (in tools/installers:head, invoked - // via tools/vm:defs.bzl) will install with this name. So without - // changing anything, tests should have a runsc runtime available to - // them. Otherwise installers should update the existing runtime - // instead of installing a new one. - runtime = flag.String("runtime", "runsc", "specify which runtime to use") - - // config is the default Docker daemon configuration path. - config = flag.String("config_path", "/etc/docker/daemon.json", "configuration file for reading paths") -) - -// EnsureSupportedDockerVersion checks if correct docker is installed. -func EnsureSupportedDockerVersion() { - cmd := exec.Command("docker", "version") - out, err := cmd.CombinedOutput() - if err != nil { - log.Fatalf("Error running %q: %v", "docker version", err) - } - re := regexp.MustCompile(`Version:\s+(\d+)\.(\d+)\.\d.*`) - matches := re.FindStringSubmatch(string(out)) - if len(matches) != 3 { - log.Fatalf("Invalid docker output: %s", out) - } - major, _ := strconv.Atoi(matches[1]) - minor, _ := strconv.Atoi(matches[2]) - if major < 17 || (major == 17 && minor < 9) { - log.Fatalf("Docker version 17.09.0 or greater is required, found: %02d.%02d", major, minor) - } -} - -// RuntimePath returns the binary path for the current runtime. -func RuntimePath() (string, error) { - // Read the configuration data; the file must exist. - configBytes, err := ioutil.ReadFile(*config) - if err != nil { - return "", err - } - - // Unmarshal the configuration. - c := make(map[string]interface{}) - if err := json.Unmarshal(configBytes, &c); err != nil { - return "", err - } - - // Decode the expected configuration. - r, ok := c["runtimes"] - if !ok { - return "", fmt.Errorf("no runtimes declared: %v", c) - } - rs, ok := r.(map[string]interface{}) - if !ok { - // The runtimes are not a map. - return "", fmt.Errorf("unexpected format: %v", c) - } - r, ok = rs[*runtime] - if !ok { - // The expected runtime is not declared. - return "", fmt.Errorf("runtime %q not found: %v", *runtime, c) - } - rs, ok = r.(map[string]interface{}) - if !ok { - // The runtime is not a map. - return "", fmt.Errorf("unexpected format: %v", c) - } - p, ok := rs["path"].(string) - if !ok { - // The runtime does not declare a path. - return "", fmt.Errorf("unexpected format: %v", c) - } - return p, nil -} - -// MountMode describes if the mount should be ro or rw. -type MountMode int - -const ( - // ReadOnly is what the name says. - ReadOnly MountMode = iota - // ReadWrite is what the name says. - ReadWrite -) - -// String returns the mount mode argument for this MountMode. -func (m MountMode) String() string { - switch m { - case ReadOnly: - return "ro" - case ReadWrite: - return "rw" - } - panic(fmt.Sprintf("invalid mode: %d", m)) -} - -// MountArg formats the volume argument to mount in the container. -func MountArg(source, target string, mode MountMode) string { - return fmt.Sprintf("-v=%s:%s:%v", source, target, mode) -} - -// LinkArg formats the link argument. -func LinkArg(source *Docker, target string) string { - return fmt.Sprintf("--link=%s:%s", source.Name, target) -} - -// PrepareFiles creates temp directory to copy files there. The sandbox doesn't -// have access to files in the test dir. -func PrepareFiles(names ...string) (string, error) { - dir, err := ioutil.TempDir("", "image-test") - if err != nil { - return "", fmt.Errorf("ioutil.TempDir failed: %v", err) - } - if err := os.Chmod(dir, 0777); err != nil { - return "", fmt.Errorf("os.Chmod(%q, 0777) failed: %v", dir, err) - } - for _, name := range names { - src, err := testutil.FindFile(name) - if err != nil { - return "", fmt.Errorf("testutil.Preparefiles(%q) failed: %v", name, err) - } - dst := path.Join(dir, path.Base(name)) - if err := testutil.Copy(src, dst); err != nil { - return "", fmt.Errorf("testutil.Copy(%q, %q) failed: %v", src, dst, err) - } - } - return dir, nil -} - -// do executes docker command. -func do(args ...string) (string, error) { - log.Printf("Running: docker %s\n", args) - cmd := exec.Command("docker", args...) - out, err := cmd.CombinedOutput() - if err != nil { - return "", fmt.Errorf("error executing docker %s: %v\nout: %s", args, err, out) - } - return string(out), nil -} - -// doWithPty executes docker command with stdio attached to a pty. -func doWithPty(args ...string) (*exec.Cmd, *os.File, error) { - log.Printf("Running with pty: docker %s\n", args) - cmd := exec.Command("docker", args...) - ptmx, err := pty.Start(cmd) - if err != nil { - return nil, nil, fmt.Errorf("error executing docker %s with a pty: %v", args, err) - } - return cmd, ptmx, nil -} - -// Pull pulls a docker image. This is used in tests to isolate the -// time to pull the image off the network from the time to actually -// start the container, to avoid timeouts over slow networks. -func Pull(image string) error { - _, err := do("pull", image) - return err -} - -// Docker contains the name and the runtime of a docker container. -type Docker struct { - Runtime string - Name string -} - -// MakeDocker sets up the struct for a Docker container. -// Names of containers will be unique. -func MakeDocker(namePrefix string) Docker { - return Docker{ - Name: testutil.RandomName(namePrefix), - Runtime: *runtime, - } -} - -// logDockerID logs a container id, which is needed to find container runsc logs. -func (d *Docker) logDockerID() { - id, err := d.ID() - if err != nil { - log.Printf("%v\n", err) - } - log.Printf("Name: %s ID: %v\n", d.Name, id) -} - -// Create calls 'docker create' with the arguments provided. -func (d *Docker) Create(args ...string) error { - a := []string{"create", "--runtime", d.Runtime, "--name", d.Name} - a = append(a, args...) - _, err := do(a...) - if err == nil { - d.logDockerID() - } - return err -} - -// Start calls 'docker start'. -func (d *Docker) Start() error { - if _, err := do("start", d.Name); err != nil { - return fmt.Errorf("error starting container %q: %v", d.Name, err) - } - return nil -} - -// Stop calls 'docker stop'. -func (d *Docker) Stop() error { - if _, err := do("stop", d.Name); err != nil { - return fmt.Errorf("error stopping container %q: %v", d.Name, err) - } - return nil -} - -// Run calls 'docker run' with the arguments provided. The container starts -// running in the background and the call returns immediately. -func (d *Docker) Run(args ...string) error { - a := d.runArgs("-d") - a = append(a, args...) - _, err := do(a...) - if err == nil { - d.logDockerID() - } - return err -} - -// RunWithPty is like Run but with an attached pty. -func (d *Docker) RunWithPty(args ...string) (*exec.Cmd, *os.File, error) { - a := d.runArgs("-it") - a = append(a, args...) - return doWithPty(a...) -} - -// RunFg calls 'docker run' with the arguments provided in the foreground. It -// blocks until the container exits and returns the output. -func (d *Docker) RunFg(args ...string) (string, error) { - a := d.runArgs(args...) - out, err := do(a...) - if err == nil { - d.logDockerID() - } - return string(out), err -} - -func (d *Docker) runArgs(args ...string) []string { - // Environment variable RUNSC_TEST_NAME is picked up by the runtime and added - // to the log name, so one can easily identify the corresponding logs for - // this test. - rv := []string{"run", "--runtime", d.Runtime, "--name", d.Name, "-e", "RUNSC_TEST_NAME=" + d.Name} - return append(rv, args...) -} - -// Logs calls 'docker logs'. -func (d *Docker) Logs() (string, error) { - return do("logs", d.Name) -} - -// Exec calls 'docker exec' with the arguments provided. -func (d *Docker) Exec(args ...string) (string, error) { - return d.ExecWithFlags(nil, args...) -} - -// ExecWithFlags calls 'docker exec name '. -func (d *Docker) ExecWithFlags(flags []string, args ...string) (string, error) { - a := []string{"exec"} - a = append(a, flags...) - a = append(a, d.Name) - a = append(a, args...) - return do(a...) -} - -// ExecAsUser calls 'docker exec' as the given user with the arguments -// provided. -func (d *Docker) ExecAsUser(user string, args ...string) (string, error) { - a := []string{"exec", "--user", user, d.Name} - a = append(a, args...) - return do(a...) -} - -// ExecWithTerminal calls 'docker exec -it' with the arguments provided and -// attaches a pty to stdio. -func (d *Docker) ExecWithTerminal(args ...string) (*exec.Cmd, *os.File, error) { - a := []string{"exec", "-it", d.Name} - a = append(a, args...) - return doWithPty(a...) -} - -// Pause calls 'docker pause'. -func (d *Docker) Pause() error { - if _, err := do("pause", d.Name); err != nil { - return fmt.Errorf("error pausing container %q: %v", d.Name, err) - } - return nil -} - -// Unpause calls 'docker pause'. -func (d *Docker) Unpause() error { - if _, err := do("unpause", d.Name); err != nil { - return fmt.Errorf("error unpausing container %q: %v", d.Name, err) - } - return nil -} - -// Checkpoint calls 'docker checkpoint'. -func (d *Docker) Checkpoint(name string) error { - if _, err := do("checkpoint", "create", d.Name, name); err != nil { - return fmt.Errorf("error pausing container %q: %v", d.Name, err) - } - return nil -} - -// Restore calls 'docker start --checkname [name]'. -func (d *Docker) Restore(name string) error { - if _, err := do("start", "--checkpoint", name, d.Name); err != nil { - return fmt.Errorf("error starting container %q: %v", d.Name, err) - } - return nil -} - -// Remove calls 'docker rm'. -func (d *Docker) Remove() error { - if _, err := do("rm", d.Name); err != nil { - return fmt.Errorf("error deleting container %q: %v", d.Name, err) - } - return nil -} - -// CleanUp kills and deletes the container (best effort). -func (d *Docker) CleanUp() { - d.logDockerID() - if _, err := do("kill", d.Name); err != nil { - if strings.Contains(err.Error(), "is not running") { - // Nothing to kill. Don't log the error in this case. - } else { - log.Printf("error killing container %q: %v", d.Name, err) - } - } - if err := d.Remove(); err != nil { - log.Print(err) - } -} - -// FindPort returns the host port that is mapped to 'sandboxPort'. This calls -// docker to allocate a free port in the host and prevent conflicts. -func (d *Docker) FindPort(sandboxPort int) (int, error) { - format := fmt.Sprintf(`{{ (index (index .NetworkSettings.Ports "%d/tcp") 0).HostPort }}`, sandboxPort) - out, err := do("inspect", "-f", format, d.Name) - if err != nil { - return -1, fmt.Errorf("error retrieving port: %v", err) - } - port, err := strconv.Atoi(strings.TrimSuffix(string(out), "\n")) - if err != nil { - return -1, fmt.Errorf("error parsing port %q: %v", out, err) - } - return port, nil -} - -// FindIP returns the IP address of the container as a string. -func (d *Docker) FindIP() (string, error) { - const format = `{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}` - out, err := do("inspect", "-f", format, d.Name) - if err != nil { - return "", fmt.Errorf("error retrieving IP: %v", err) - } - return strings.TrimSpace(out), nil -} - -// SandboxPid returns the PID to the sandbox process. -func (d *Docker) SandboxPid() (int, error) { - out, err := do("inspect", "-f={{.State.Pid}}", d.Name) - if err != nil { - return -1, fmt.Errorf("error retrieving pid: %v", err) - } - pid, err := strconv.Atoi(strings.TrimSuffix(string(out), "\n")) - if err != nil { - return -1, fmt.Errorf("error parsing pid %q: %v", out, err) - } - return pid, nil -} - -// ID returns the container ID. -func (d *Docker) ID() (string, error) { - out, err := do("inspect", "-f={{.Id}}", d.Name) - if err != nil { - return "", fmt.Errorf("error retrieving ID: %v", err) - } - return strings.TrimSpace(string(out)), nil -} - -// Wait waits for container to exit, up to the given timeout. Returns error if -// wait fails or timeout is hit. Returns the application return code otherwise. -// Note that the application may have failed even if err == nil, always check -// the exit code. -func (d *Docker) Wait(timeout time.Duration) (syscall.WaitStatus, error) { - timeoutChan := time.After(timeout) - waitChan := make(chan (syscall.WaitStatus)) - errChan := make(chan (error)) - - go func() { - out, err := do("wait", d.Name) - if err != nil { - errChan <- fmt.Errorf("error waiting for container %q: %v", d.Name, err) - } - exit, err := strconv.Atoi(strings.TrimSuffix(string(out), "\n")) - if err != nil { - errChan <- fmt.Errorf("error parsing exit code %q: %v", out, err) - } - waitChan <- syscall.WaitStatus(uint32(exit)) - }() - - select { - case ws := <-waitChan: - return ws, nil - case err := <-errChan: - return syscall.WaitStatus(1), err - case <-timeoutChan: - return syscall.WaitStatus(1), fmt.Errorf("timeout waiting for container %q", d.Name) - } -} - -// WaitForOutput calls 'docker logs' to retrieve containers output and searches -// for the given pattern. -func (d *Docker) WaitForOutput(pattern string, timeout time.Duration) (string, error) { - matches, err := d.WaitForOutputSubmatch(pattern, timeout) - if err != nil { - return "", err - } - if len(matches) == 0 { - return "", nil - } - return matches[0], nil -} - -// WaitForOutputSubmatch calls 'docker logs' to retrieve containers output and -// searches for the given pattern. It returns any regexp submatches as well. -func (d *Docker) WaitForOutputSubmatch(pattern string, timeout time.Duration) ([]string, error) { - re := regexp.MustCompile(pattern) - var out string - for exp := time.Now().Add(timeout); time.Now().Before(exp); { - var err error - out, err = d.Logs() - if err != nil { - return nil, err - } - if matches := re.FindStringSubmatch(out); matches != nil { - // Success! - return matches, nil - } - time.Sleep(100 * time.Millisecond) - } - return nil, fmt.Errorf("timeout waiting for output %q: %s", re.String(), out) -} diff --git a/runsc/testutil/BUILD b/runsc/testutil/BUILD deleted file mode 100644 index 945405303..000000000 --- a/runsc/testutil/BUILD +++ /dev/null @@ -1,21 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "testutil", - testonly = 1, - srcs = [ - "testutil.go", - "testutil_runfiles.go", - ], - visibility = ["//:sandbox"], - deps = [ - "//pkg/log", - "//pkg/sync", - "//runsc/boot", - "//runsc/specutils", - "@com_github_cenkalti_backoff//:go_default_library", - "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", - ], -) diff --git a/runsc/testutil/testutil.go b/runsc/testutil/testutil.go deleted file mode 100644 index 5e09f8f16..000000000 --- a/runsc/testutil/testutil.go +++ /dev/null @@ -1,433 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package testutil contains utility functions for runsc tests. -package testutil - -import ( - "bufio" - "context" - "debug/elf" - "encoding/base32" - "encoding/json" - "flag" - "fmt" - "io" - "io/ioutil" - "math" - "math/rand" - "net/http" - "os" - "os/exec" - "os/signal" - "path" - "path/filepath" - "strconv" - "strings" - "sync/atomic" - "syscall" - "testing" - "time" - - "github.com/cenkalti/backoff" - specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/runsc/boot" - "gvisor.dev/gvisor/runsc/specutils" -) - -var ( - checkpoint = flag.Bool("checkpoint", true, "control checkpoint/restore support") -) - -func init() { - rand.Seed(time.Now().UnixNano()) -} - -// IsCheckpointSupported returns the relevant command line flag. -func IsCheckpointSupported() bool { - return *checkpoint -} - -// TmpDir returns the absolute path to a writable directory that can be used as -// scratch by the test. -func TmpDir() string { - dir := os.Getenv("TEST_TMPDIR") - if dir == "" { - dir = "/tmp" - } - return dir -} - -// ConfigureExePath configures the executable for runsc in the test environment. -func ConfigureExePath() error { - path, err := FindFile("runsc/runsc") - if err != nil { - return err - } - specutils.ExePath = path - return nil -} - -// TestConfig returns the default configuration to use in tests. Note that -// 'RootDir' must be set by caller if required. -func TestConfig(t *testing.T) *boot.Config { - logDir := "" - if dir, ok := os.LookupEnv("TEST_UNDECLARED_OUTPUTS_DIR"); ok { - logDir = dir + "/" - } - return &boot.Config{ - Debug: true, - DebugLog: path.Join(logDir, "runsc.log."+t.Name()+".%TIMESTAMP%.%COMMAND%"), - LogFormat: "text", - DebugLogFormat: "text", - LogPackets: true, - Network: boot.NetworkNone, - Strace: true, - Platform: "ptrace", - FileAccess: boot.FileAccessExclusive, - NumNetworkChannels: 1, - - TestOnlyAllowRunAsCurrentUserWithoutChroot: true, - } -} - -// NewSpecWithArgs creates a simple spec with the given args suitable for use -// in tests. -func NewSpecWithArgs(args ...string) *specs.Spec { - return &specs.Spec{ - // The host filesystem root is the container root. - Root: &specs.Root{ - Path: "/", - Readonly: true, - }, - Process: &specs.Process{ - Args: args, - Env: []string{ - "PATH=" + os.Getenv("PATH"), - }, - Capabilities: specutils.AllCapabilities(), - }, - Mounts: []specs.Mount{ - // Hide the host /etc to avoid any side-effects. - // For example, bash reads /etc/passwd and if it is - // very big, tests can fail by timeout. - { - Type: "tmpfs", - Destination: "/etc", - }, - // Root is readonly, but many tests want to write to tmpdir. - // This creates a writable mount inside the root. Also, when tmpdir points - // to "/tmp", it makes the the actual /tmp to be mounted and not a tmpfs - // inside the sentry. - { - Type: "bind", - Destination: TmpDir(), - Source: TmpDir(), - }, - }, - Hostname: "runsc-test-hostname", - } -} - -// SetupRootDir creates a root directory for containers. -func SetupRootDir() (string, error) { - rootDir, err := ioutil.TempDir(TmpDir(), "containers") - if err != nil { - return "", fmt.Errorf("error creating root dir: %v", err) - } - return rootDir, nil -} - -// SetupContainer creates a bundle and root dir for the container, generates a -// test config, and writes the spec to config.json in the bundle dir. -func SetupContainer(spec *specs.Spec, conf *boot.Config) (rootDir, bundleDir string, err error) { - rootDir, err = SetupRootDir() - if err != nil { - return "", "", err - } - conf.RootDir = rootDir - bundleDir, err = SetupBundleDir(spec) - return rootDir, bundleDir, err -} - -// SetupBundleDir creates a bundle dir and writes the spec to config.json. -func SetupBundleDir(spec *specs.Spec) (bundleDir string, err error) { - bundleDir, err = ioutil.TempDir(TmpDir(), "bundle") - if err != nil { - return "", fmt.Errorf("error creating bundle dir: %v", err) - } - - if err = writeSpec(bundleDir, spec); err != nil { - return "", fmt.Errorf("error writing spec: %v", err) - } - return bundleDir, nil -} - -// writeSpec writes the spec to disk in the given directory. -func writeSpec(dir string, spec *specs.Spec) error { - b, err := json.Marshal(spec) - if err != nil { - return err - } - return ioutil.WriteFile(filepath.Join(dir, "config.json"), b, 0755) -} - -// UniqueContainerID generates a unique container id for each test. -// -// The container id is used to create an abstract unix domain socket, which must -// be unique. While the container forbids creating two containers with the same -// name, sometimes between test runs the socket does not get cleaned up quickly -// enough, causing container creation to fail. -func UniqueContainerID() string { - // Read 20 random bytes. - b := make([]byte, 20) - // "[Read] always returns len(p) and a nil error." --godoc - if _, err := rand.Read(b); err != nil { - panic("rand.Read failed: " + err.Error()) - } - // base32 encode the random bytes, so that the name is a valid - // container id and can be used as a socket name in the filesystem. - return fmt.Sprintf("test-container-%s", base32.StdEncoding.EncodeToString(b)) -} - -// Copy copies file from src to dst. -func Copy(src, dst string) error { - in, err := os.Open(src) - if err != nil { - return err - } - defer in.Close() - - out, err := os.Create(dst) - if err != nil { - return err - } - defer out.Close() - - _, err = io.Copy(out, in) - return err -} - -// Poll is a shorthand function to poll for something with given timeout. -func Poll(cb func() error, timeout time.Duration) error { - ctx, cancel := context.WithTimeout(context.Background(), timeout) - defer cancel() - b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx) - return backoff.Retry(cb, b) -} - -// WaitForHTTP tries GET requests on a port until the call succeeds or timeout. -func WaitForHTTP(port int, timeout time.Duration) error { - cb := func() error { - c := &http.Client{ - // Calculate timeout to be able to do minimum 5 attempts. - Timeout: timeout / 5, - } - url := fmt.Sprintf("http://localhost:%d/", port) - resp, err := c.Get(url) - if err != nil { - log.Infof("Waiting %s: %v", url, err) - return err - } - resp.Body.Close() - return nil - } - return Poll(cb, timeout) -} - -// Reaper reaps child processes. -type Reaper struct { - // mu protects ch, which will be nil if the reaper is not running. - mu sync.Mutex - ch chan os.Signal -} - -// Start starts reaping child processes. -func (r *Reaper) Start() { - r.mu.Lock() - defer r.mu.Unlock() - - if r.ch != nil { - panic("reaper.Start called on a running reaper") - } - - r.ch = make(chan os.Signal, 1) - signal.Notify(r.ch, syscall.SIGCHLD) - - go func() { - for { - r.mu.Lock() - ch := r.ch - r.mu.Unlock() - if ch == nil { - return - } - - _, ok := <-ch - if !ok { - // Channel closed. - return - } - for { - cpid, _ := syscall.Wait4(-1, nil, syscall.WNOHANG, nil) - if cpid < 1 { - break - } - } - } - }() -} - -// Stop stops reaping child processes. -func (r *Reaper) Stop() { - r.mu.Lock() - defer r.mu.Unlock() - - if r.ch == nil { - panic("reaper.Stop called on a stopped reaper") - } - - signal.Stop(r.ch) - close(r.ch) - r.ch = nil -} - -// StartReaper is a helper that starts a new Reaper and returns a function to -// stop it. -func StartReaper() func() { - r := &Reaper{} - r.Start() - return r.Stop -} - -// WaitUntilRead reads from the given reader until the wanted string is found -// or until timeout. -func WaitUntilRead(r io.Reader, want string, split bufio.SplitFunc, timeout time.Duration) error { - sc := bufio.NewScanner(r) - if split != nil { - sc.Split(split) - } - // done must be accessed atomically. A value greater than 0 indicates - // that the read loop can exit. - var done uint32 - doneCh := make(chan struct{}) - go func() { - for sc.Scan() { - t := sc.Text() - if strings.Contains(t, want) { - atomic.StoreUint32(&done, 1) - close(doneCh) - break - } - if atomic.LoadUint32(&done) > 0 { - break - } - } - }() - select { - case <-time.After(timeout): - atomic.StoreUint32(&done, 1) - return fmt.Errorf("timeout waiting to read %q", want) - case <-doneCh: - return nil - } -} - -// KillCommand kills the process running cmd unless it hasn't been started. It -// returns an error if it cannot kill the process unless the reason is that the -// process has already exited. -func KillCommand(cmd *exec.Cmd) error { - if cmd.Process == nil { - return nil - } - if err := cmd.Process.Kill(); err != nil { - if !strings.Contains(err.Error(), "process already finished") { - return fmt.Errorf("failed to kill process %v: %v", cmd, err) - } - } - return nil -} - -// WriteTmpFile writes text to a temporary file, closes the file, and returns -// the name of the file. -func WriteTmpFile(pattern, text string) (string, error) { - file, err := ioutil.TempFile(TmpDir(), pattern) - if err != nil { - return "", err - } - defer file.Close() - if _, err := file.Write([]byte(text)); err != nil { - return "", err - } - return file.Name(), nil -} - -// RandomName create a name with a 6 digit random number appended to it. -func RandomName(prefix string) string { - return fmt.Sprintf("%s-%06d", prefix, rand.Int31n(1000000)) -} - -// IsStatic returns true iff the given file is a static binary. -func IsStatic(filename string) (bool, error) { - f, err := elf.Open(filename) - if err != nil { - return false, err - } - for _, prog := range f.Progs { - if prog.Type == elf.PT_INTERP { - return false, nil // Has interpreter. - } - } - return true, nil -} - -// TestIndicesForShard returns indices for this test shard based on the -// TEST_SHARD_INDEX and TEST_TOTAL_SHARDS environment vars. -// -// If either of the env vars are not present, then the function will return all -// tests. If there are more shards than there are tests, then the returned list -// may be empty. -func TestIndicesForShard(numTests int) ([]int, error) { - var ( - shardIndex = 0 - shardTotal = 1 - ) - - indexStr, totalStr := os.Getenv("TEST_SHARD_INDEX"), os.Getenv("TEST_TOTAL_SHARDS") - if indexStr != "" && totalStr != "" { - // Parse index and total to ints. - var err error - shardIndex, err = strconv.Atoi(indexStr) - if err != nil { - return nil, fmt.Errorf("invalid TEST_SHARD_INDEX %q: %v", indexStr, err) - } - shardTotal, err = strconv.Atoi(totalStr) - if err != nil { - return nil, fmt.Errorf("invalid TEST_TOTAL_SHARDS %q: %v", totalStr, err) - } - } - - // Calculate! - var indices []int - numBlocks := int(math.Ceil(float64(numTests) / float64(shardTotal))) - for i := 0; i < numBlocks; i++ { - pick := i*shardTotal + shardIndex - if pick < numTests { - indices = append(indices, pick) - } - } - return indices, nil -} diff --git a/runsc/testutil/testutil_runfiles.go b/runsc/testutil/testutil_runfiles.go deleted file mode 100644 index ece9ea9a1..000000000 --- a/runsc/testutil/testutil_runfiles.go +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package testutil - -import ( - "fmt" - "os" - "path/filepath" -) - -// FindFile searchs for a file inside the test run environment. It returns the -// full path to the file. It fails if none or more than one file is found. -func FindFile(path string) (string, error) { - wd, err := os.Getwd() - if err != nil { - return "", err - } - - // The test root is demarcated by a path element called "__main__". Search for - // it backwards from the working directory. - root := wd - for { - dir, name := filepath.Split(root) - if name == "__main__" { - break - } - if len(dir) == 0 { - return "", fmt.Errorf("directory __main__ not found in %q", wd) - } - // Remove ending slash to loop around. - root = dir[:len(dir)-1] - } - - // Annoyingly, bazel adds the build type to the directory path for go - // binaries, but not for c++ binaries. We use two different patterns to - // to find our file. - patterns := []string{ - // Try the obvious path first. - filepath.Join(root, path), - // If it was a go binary, use a wildcard to match the build - // type. The pattern is: /test-path/__main__/directories/*/file. - filepath.Join(root, filepath.Dir(path), "*", filepath.Base(path)), - } - - for _, p := range patterns { - matches, err := filepath.Glob(p) - if err != nil { - // "The only possible returned error is ErrBadPattern, - // when pattern is malformed." -godoc - return "", fmt.Errorf("error globbing %q: %v", p, err) - } - switch len(matches) { - case 0: - // Try the next pattern. - case 1: - // We found it. - return matches[0], nil - default: - return "", fmt.Errorf("more than one match found for %q: %s", path, matches) - } - } - return "", fmt.Errorf("file %q not found", path) -} diff --git a/scripts/iptables_tests.sh b/scripts/iptables_tests.sh index 0f46909ac..c8da1f32d 100755 --- a/scripts/iptables_tests.sh +++ b/scripts/iptables_tests.sh @@ -17,14 +17,5 @@ source $(dirname $0)/common.sh install_runsc_for_test iptables --net-raw - -# Build the docker image for the test. -run //test/iptables/runner:runner-image --norun - -test //test/iptables:iptables_test \ - "--test_arg=--runtime=runc" \ - "--test_arg=--image=bazel/test/iptables/runner:runner-image" - -test //test/iptables:iptables_test \ - "--test_arg=--runtime=${RUNTIME}" \ - "--test_arg=--image=bazel/test/iptables/runner:runner-image" +test //test/iptables:iptables_test --test_arg=--runtime=runc +test //test/iptables:iptables_test --test_arg=--runtime=${RUNTIME} diff --git a/test/cmd/test_app/BUILD b/test/cmd/test_app/BUILD new file mode 100644 index 000000000..98ba5a3d9 --- /dev/null +++ b/test/cmd/test_app/BUILD @@ -0,0 +1,21 @@ +load("//tools:defs.bzl", "go_binary") + +package(licenses = ["notice"]) + +go_binary( + name = "test_app", + testonly = 1, + srcs = [ + "fds.go", + "test_app.go", + ], + pure = True, + visibility = ["//runsc/container:__pkg__"], + deps = [ + "//pkg/test/testutil", + "//pkg/unet", + "//runsc/flag", + "@com_github_google_subcommands//:go_default_library", + "@com_github_kr_pty//:go_default_library", + ], +) diff --git a/test/cmd/test_app/fds.go b/test/cmd/test_app/fds.go new file mode 100644 index 000000000..a7658eefd --- /dev/null +++ b/test/cmd/test_app/fds.go @@ -0,0 +1,185 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "io/ioutil" + "log" + "os" + "time" + + "github.com/google/subcommands" + "gvisor.dev/gvisor/pkg/test/testutil" + "gvisor.dev/gvisor/pkg/unet" + "gvisor.dev/gvisor/runsc/flag" +) + +const fileContents = "foobarbaz" + +// fdSender will open a file and send the FD over a unix domain socket. +type fdSender struct { + socketPath string +} + +// Name implements subcommands.Command.Name. +func (*fdSender) Name() string { + return "fd_sender" +} + +// Synopsis implements subcommands.Command.Synopsys. +func (*fdSender) Synopsis() string { + return "creates a file and sends the FD over the socket" +} + +// Usage implements subcommands.Command.Usage. +func (*fdSender) Usage() string { + return "fd_sender " +} + +// SetFlags implements subcommands.Command.SetFlags. +func (fds *fdSender) SetFlags(f *flag.FlagSet) { + f.StringVar(&fds.socketPath, "socket", "", "path to socket") +} + +// Execute implements subcommands.Command.Execute. +func (fds *fdSender) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if fds.socketPath == "" { + log.Fatalf("socket flag must be set") + } + + dir, err := ioutil.TempDir("", "") + if err != nil { + log.Fatalf("TempDir failed: %v", err) + } + + fileToSend, err := ioutil.TempFile(dir, "") + if err != nil { + log.Fatalf("TempFile failed: %v", err) + } + defer fileToSend.Close() + + if _, err := fileToSend.WriteString(fileContents); err != nil { + log.Fatalf("Write(%q) failed: %v", fileContents, err) + } + + // Receiver may not be started yet, so try connecting in a poll loop. + var s *unet.Socket + if err := testutil.Poll(func() error { + var err error + s, err = unet.Connect(fds.socketPath, true /* SEQPACKET, so we can send empty message with FD */) + return err + }, 10*time.Second); err != nil { + log.Fatalf("Error connecting to socket %q: %v", fds.socketPath, err) + } + defer s.Close() + + w := s.Writer(true) + w.ControlMessage.PackFDs(int(fileToSend.Fd())) + if _, err := w.WriteVec([][]byte{[]byte{'a'}}); err != nil { + log.Fatalf("Error sending FD %q over socket %q: %v", fileToSend.Fd(), fds.socketPath, err) + } + + log.Print("FD SENDER exiting successfully") + return subcommands.ExitSuccess +} + +// fdReceiver receives an FD from a unix domain socket and does things to it. +type fdReceiver struct { + socketPath string +} + +// Name implements subcommands.Command.Name. +func (*fdReceiver) Name() string { + return "fd_receiver" +} + +// Synopsis implements subcommands.Command.Synopsys. +func (*fdReceiver) Synopsis() string { + return "reads an FD from a unix socket, and then does things to it" +} + +// Usage implements subcommands.Command.Usage. +func (*fdReceiver) Usage() string { + return "fd_receiver " +} + +// SetFlags implements subcommands.Command.SetFlags. +func (fdr *fdReceiver) SetFlags(f *flag.FlagSet) { + f.StringVar(&fdr.socketPath, "socket", "", "path to socket") +} + +// Execute implements subcommands.Command.Execute. +func (fdr *fdReceiver) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if fdr.socketPath == "" { + log.Fatalf("Flags cannot be empty, given: socket: %q", fdr.socketPath) + } + + ss, err := unet.BindAndListen(fdr.socketPath, true /* packet */) + if err != nil { + log.Fatalf("BindAndListen(%q) failed: %v", fdr.socketPath, err) + } + defer ss.Close() + + var s *unet.Socket + c := make(chan error, 1) + go func() { + var err error + s, err = ss.Accept() + c <- err + }() + + select { + case err := <-c: + if err != nil { + log.Fatalf("Accept() failed: %v", err) + } + case <-time.After(10 * time.Second): + log.Fatalf("Timeout waiting for accept") + } + + r := s.Reader(true) + r.EnableFDs(1) + b := [][]byte{{'a'}} + if n, err := r.ReadVec(b); n != 1 || err != nil { + log.Fatalf("ReadVec got n=%d err %v (wanted 0, nil)", n, err) + } + + fds, err := r.ExtractFDs() + if err != nil { + log.Fatalf("ExtractFD() got err %v", err) + } + if len(fds) != 1 { + log.Fatalf("ExtractFD() got %d FDs, wanted 1", len(fds)) + } + fd := fds[0] + + file := os.NewFile(uintptr(fd), "received file") + defer file.Close() + if _, err := file.Seek(0, os.SEEK_SET); err != nil { + log.Fatalf("Seek(0, 0) failed: %v", err) + } + + got, err := ioutil.ReadAll(file) + if err != nil { + log.Fatalf("ReadAll failed: %v", err) + } + if string(got) != fileContents { + log.Fatalf("ReadAll got %q want %q", string(got), fileContents) + } + + log.Print("FD RECEIVER exiting successfully") + return subcommands.ExitSuccess +} diff --git a/test/cmd/test_app/test_app.go b/test/cmd/test_app/test_app.go new file mode 100644 index 000000000..3ba4f38f8 --- /dev/null +++ b/test/cmd/test_app/test_app.go @@ -0,0 +1,394 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Binary test_app is like a swiss knife for tests that need to run anything +// inside the sandbox. New functionality can be added with new commands. +package main + +import ( + "context" + "fmt" + "io" + "io/ioutil" + "log" + "net" + "os" + "os/exec" + "regexp" + "strconv" + sys "syscall" + "time" + + "github.com/google/subcommands" + "github.com/kr/pty" + "gvisor.dev/gvisor/pkg/test/testutil" + "gvisor.dev/gvisor/runsc/flag" +) + +func main() { + subcommands.Register(subcommands.HelpCommand(), "") + subcommands.Register(subcommands.FlagsCommand(), "") + subcommands.Register(new(capability), "") + subcommands.Register(new(fdReceiver), "") + subcommands.Register(new(fdSender), "") + subcommands.Register(new(forkBomb), "") + subcommands.Register(new(ptyRunner), "") + subcommands.Register(new(reaper), "") + subcommands.Register(new(syscall), "") + subcommands.Register(new(taskTree), "") + subcommands.Register(new(uds), "") + + flag.Parse() + + exitCode := subcommands.Execute(context.Background()) + os.Exit(int(exitCode)) +} + +type uds struct { + fileName string + socketPath string +} + +// Name implements subcommands.Command.Name. +func (*uds) Name() string { + return "uds" +} + +// Synopsis implements subcommands.Command.Synopsys. +func (*uds) Synopsis() string { + return "creates unix domain socket client and server. Client sends a contant flow of sequential numbers. Server prints them to --file" +} + +// Usage implements subcommands.Command.Usage. +func (*uds) Usage() string { + return "uds " +} + +// SetFlags implements subcommands.Command.SetFlags. +func (c *uds) SetFlags(f *flag.FlagSet) { + f.StringVar(&c.fileName, "file", "", "name of output file") + f.StringVar(&c.socketPath, "socket", "", "path to socket") +} + +// Execute implements subcommands.Command.Execute. +func (c *uds) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if c.fileName == "" || c.socketPath == "" { + log.Fatalf("Flags cannot be empty, given: fileName: %q, socketPath: %q", c.fileName, c.socketPath) + return subcommands.ExitFailure + } + outputFile, err := os.OpenFile(c.fileName, os.O_WRONLY|os.O_CREATE, 0666) + if err != nil { + log.Fatal("error opening output file:", err) + } + + defer os.Remove(c.socketPath) + + listener, err := net.Listen("unix", c.socketPath) + if err != nil { + log.Fatalf("error listening on socket %q: %v", c.socketPath, err) + } + + go server(listener, outputFile) + for i := 0; ; i++ { + conn, err := net.Dial("unix", c.socketPath) + if err != nil { + log.Fatal("error dialing:", err) + } + if _, err := conn.Write([]byte(strconv.Itoa(i))); err != nil { + log.Fatal("error writing:", err) + } + conn.Close() + time.Sleep(100 * time.Millisecond) + } +} + +func server(listener net.Listener, out *os.File) { + buf := make([]byte, 16) + + for { + c, err := listener.Accept() + if err != nil { + log.Fatal("error accepting connection:", err) + } + nr, err := c.Read(buf) + if err != nil { + log.Fatal("error reading from buf:", err) + } + data := buf[0:nr] + fmt.Fprint(out, string(data)+"\n") + } +} + +type taskTree struct { + depth int + width int + pause bool +} + +// Name implements subcommands.Command. +func (*taskTree) Name() string { + return "task-tree" +} + +// Synopsis implements subcommands.Command. +func (*taskTree) Synopsis() string { + return "creates a tree of tasks" +} + +// Usage implements subcommands.Command. +func (*taskTree) Usage() string { + return "task-tree " +} + +// SetFlags implements subcommands.Command. +func (c *taskTree) SetFlags(f *flag.FlagSet) { + f.IntVar(&c.depth, "depth", 1, "number of levels to create") + f.IntVar(&c.width, "width", 1, "number of tasks at each level") + f.BoolVar(&c.pause, "pause", false, "whether the tasks should pause perpetually") +} + +// Execute implements subcommands.Command. +func (c *taskTree) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + stop := testutil.StartReaper() + defer stop() + + if c.depth == 0 { + log.Printf("Child sleeping, PID: %d\n", os.Getpid()) + select {} + } + log.Printf("Parent %d sleeping, PID: %d\n", c.depth, os.Getpid()) + + var cmds []*exec.Cmd + for i := 0; i < c.width; i++ { + cmd := exec.Command( + "/proc/self/exe", c.Name(), + "--depth", strconv.Itoa(c.depth-1), + "--width", strconv.Itoa(c.width), + "--pause", strconv.FormatBool(c.pause)) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + if err := cmd.Start(); err != nil { + log.Fatal("failed to call self:", err) + } + cmds = append(cmds, cmd) + } + + for _, c := range cmds { + c.Wait() + } + + if c.pause { + select {} + } + + return subcommands.ExitSuccess +} + +type forkBomb struct { + delay time.Duration +} + +// Name implements subcommands.Command. +func (*forkBomb) Name() string { + return "fork-bomb" +} + +// Synopsis implements subcommands.Command. +func (*forkBomb) Synopsis() string { + return "creates child process until the end of times" +} + +// Usage implements subcommands.Command. +func (*forkBomb) Usage() string { + return "fork-bomb " +} + +// SetFlags implements subcommands.Command. +func (c *forkBomb) SetFlags(f *flag.FlagSet) { + f.DurationVar(&c.delay, "delay", 100*time.Millisecond, "amount of time to delay creation of child") +} + +// Execute implements subcommands.Command. +func (c *forkBomb) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + time.Sleep(c.delay) + + cmd := exec.Command("/proc/self/exe", c.Name()) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + log.Fatal("failed to call self:", err) + } + return subcommands.ExitSuccess +} + +type reaper struct{} + +// Name implements subcommands.Command. +func (*reaper) Name() string { + return "reaper" +} + +// Synopsis implements subcommands.Command. +func (*reaper) Synopsis() string { + return "reaps all children in a loop" +} + +// Usage implements subcommands.Command. +func (*reaper) Usage() string { + return "reaper " +} + +// SetFlags implements subcommands.Command. +func (*reaper) SetFlags(*flag.FlagSet) {} + +// Execute implements subcommands.Command. +func (c *reaper) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + stop := testutil.StartReaper() + defer stop() + select {} +} + +type syscall struct { + sysno uint64 +} + +// Name implements subcommands.Command. +func (*syscall) Name() string { + return "syscall" +} + +// Synopsis implements subcommands.Command. +func (*syscall) Synopsis() string { + return "syscall makes a syscall" +} + +// Usage implements subcommands.Command. +func (*syscall) Usage() string { + return "syscall " +} + +// SetFlags implements subcommands.Command. +func (s *syscall) SetFlags(f *flag.FlagSet) { + f.Uint64Var(&s.sysno, "syscall", 0, "syscall to call") +} + +// Execute implements subcommands.Command. +func (s *syscall) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if _, _, errno := sys.Syscall(uintptr(s.sysno), 0, 0, 0); errno != 0 { + fmt.Printf("syscall(%d, 0, 0...) failed: %v\n", s.sysno, errno) + } else { + fmt.Printf("syscall(%d, 0, 0...) success\n", s.sysno) + } + return subcommands.ExitSuccess +} + +type capability struct { + enabled uint64 + disabled uint64 +} + +// Name implements subcommands.Command. +func (*capability) Name() string { + return "capability" +} + +// Synopsis implements subcommands.Command. +func (*capability) Synopsis() string { + return "checks if effective capabilities are set/unset" +} + +// Usage implements subcommands.Command. +func (*capability) Usage() string { + return "capability [--enabled=number] [--disabled=number]" +} + +// SetFlags implements subcommands.Command. +func (c *capability) SetFlags(f *flag.FlagSet) { + f.Uint64Var(&c.enabled, "enabled", 0, "") + f.Uint64Var(&c.disabled, "disabled", 0, "") +} + +// Execute implements subcommands.Command. +func (c *capability) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if c.enabled == 0 && c.disabled == 0 { + fmt.Println("One of the flags must be set") + return subcommands.ExitUsageError + } + + status, err := ioutil.ReadFile("/proc/self/status") + if err != nil { + fmt.Printf("Error reading %q: %v\n", "proc/self/status", err) + return subcommands.ExitFailure + } + re := regexp.MustCompile("CapEff:\t([0-9a-f]+)\n") + matches := re.FindStringSubmatch(string(status)) + if matches == nil || len(matches) != 2 { + fmt.Printf("Effective capabilities not found in\n%s\n", status) + return subcommands.ExitFailure + } + caps, err := strconv.ParseUint(matches[1], 16, 64) + if err != nil { + fmt.Printf("failed to convert capabilities %q: %v\n", matches[1], err) + return subcommands.ExitFailure + } + + if c.enabled != 0 && (caps&c.enabled) != c.enabled { + fmt.Printf("Missing capabilities, want: %#x: got: %#x\n", c.enabled, caps) + return subcommands.ExitFailure + } + if c.disabled != 0 && (caps&c.disabled) != 0 { + fmt.Printf("Extra capabilities found, dont_want: %#x: got: %#x\n", c.disabled, caps) + return subcommands.ExitFailure + } + + return subcommands.ExitSuccess +} + +type ptyRunner struct{} + +// Name implements subcommands.Command. +func (*ptyRunner) Name() string { + return "pty-runner" +} + +// Synopsis implements subcommands.Command. +func (*ptyRunner) Synopsis() string { + return "runs the given command with an open pty terminal" +} + +// Usage implements subcommands.Command. +func (*ptyRunner) Usage() string { + return "pty-runner [command]" +} + +// SetFlags implements subcommands.Command.SetFlags. +func (*ptyRunner) SetFlags(f *flag.FlagSet) {} + +// Execute implements subcommands.Command. +func (*ptyRunner) Execute(_ context.Context, fs *flag.FlagSet, _ ...interface{}) subcommands.ExitStatus { + c := exec.Command(fs.Args()[0], fs.Args()[1:]...) + f, err := pty.Start(c) + if err != nil { + fmt.Printf("pty.Start failed: %v", err) + return subcommands.ExitFailure + } + defer f.Close() + + // Copy stdout from the command to keep this process alive until the + // subprocess exits. + io.Copy(os.Stdout, f) + + return subcommands.ExitSuccess +} diff --git a/test/e2e/BUILD b/test/e2e/BUILD index 76e04f878..44cce0e3b 100644 --- a/test/e2e/BUILD +++ b/test/e2e/BUILD @@ -20,9 +20,9 @@ go_test( deps = [ "//pkg/abi/linux", "//pkg/bits", - "//runsc/dockerutil", + "//pkg/test/dockerutil", + "//pkg/test/testutil", "//runsc/specutils", - "//runsc/testutil", ], ) diff --git a/test/e2e/exec_test.go b/test/e2e/exec_test.go index 594c8e752..6a63b1232 100644 --- a/test/e2e/exec_test.go +++ b/test/e2e/exec_test.go @@ -23,6 +23,8 @@ package integration import ( "fmt" + "os" + "os/exec" "strconv" "strings" "syscall" @@ -31,23 +33,23 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bits" - "gvisor.dev/gvisor/runsc/dockerutil" + "gvisor.dev/gvisor/pkg/test/dockerutil" "gvisor.dev/gvisor/runsc/specutils" ) // Test that exec uses the exact same capability set as the container. func TestExecCapabilities(t *testing.T) { - if err := dockerutil.Pull("alpine"); err != nil { - t.Fatalf("docker pull failed: %v", err) - } - d := dockerutil.MakeDocker("exec-capabilities-test") + d := dockerutil.MakeDocker(t) + defer d.CleanUp() // Start the container. - if err := d.Run("alpine", "sh", "-c", "cat /proc/self/status; sleep 100"); err != nil { + if err := d.Spawn(dockerutil.RunOpts{ + Image: "basic/alpine", + }, "sh", "-c", "cat /proc/self/status; sleep 100"); err != nil { t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() + // Check that capability. matches, err := d.WaitForOutputSubmatch("CapEff:\t([0-9a-f]+)\n", 5*time.Second) if err != nil { t.Fatalf("WaitForOutputSubmatch() timeout: %v", err) @@ -59,7 +61,7 @@ func TestExecCapabilities(t *testing.T) { t.Log("Root capabilities:", want) // Now check that exec'd process capabilities match the root. - got, err := d.Exec("grep", "CapEff:", "/proc/self/status") + got, err := d.Exec(dockerutil.RunOpts{}, "grep", "CapEff:", "/proc/self/status") if err != nil { t.Fatalf("docker exec failed: %v", err) } @@ -72,16 +74,16 @@ func TestExecCapabilities(t *testing.T) { // Test that 'exec --privileged' adds all capabilities, except for CAP_NET_RAW // which is removed from the container when --net-raw=false. func TestExecPrivileged(t *testing.T) { - if err := dockerutil.Pull("alpine"); err != nil { - t.Fatalf("docker pull failed: %v", err) - } - d := dockerutil.MakeDocker("exec-privileged-test") + d := dockerutil.MakeDocker(t) + defer d.CleanUp() // Start the container with all capabilities dropped. - if err := d.Run("--cap-drop=all", "alpine", "sh", "-c", "cat /proc/self/status; sleep 100"); err != nil { + if err := d.Spawn(dockerutil.RunOpts{ + Image: "basic/alpine", + CapDrop: []string{"all"}, + }, "sh", "-c", "cat /proc/self/status; sleep 100"); err != nil { t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() // Check that all capabilities where dropped from container. matches, err := d.WaitForOutputSubmatch("CapEff:\t([0-9a-f]+)\n", 5*time.Second) @@ -100,9 +102,11 @@ func TestExecPrivileged(t *testing.T) { t.Fatalf("Container should have no capabilities: %x", containerCaps) } - // Check that 'exec --privileged' adds all capabilities, except - // for CAP_NET_RAW. - got, err := d.ExecWithFlags([]string{"--privileged"}, "grep", "CapEff:", "/proc/self/status") + // Check that 'exec --privileged' adds all capabilities, except for + // CAP_NET_RAW. + got, err := d.Exec(dockerutil.RunOpts{ + Privileged: true, + }, "grep", "CapEff:", "/proc/self/status") if err != nil { t.Fatalf("docker exec failed: %v", err) } @@ -114,97 +118,99 @@ func TestExecPrivileged(t *testing.T) { } func TestExecJobControl(t *testing.T) { - if err := dockerutil.Pull("alpine"); err != nil { - t.Fatalf("docker pull failed: %v", err) - } - d := dockerutil.MakeDocker("exec-job-control-test") + d := dockerutil.MakeDocker(t) + defer d.CleanUp() // Start the container. - if err := d.Run("alpine", "sleep", "1000"); err != nil { + if err := d.Spawn(dockerutil.RunOpts{ + Image: "basic/alpine", + }, "sleep", "1000"); err != nil { t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() // Exec 'sh' with an attached pty. - cmd, ptmx, err := d.ExecWithTerminal("sh") - if err != nil { + if _, err := d.Exec(dockerutil.RunOpts{ + Pty: func(cmd *exec.Cmd, ptmx *os.File) { + // Call "sleep 100 | cat" in the shell. We pipe to cat + // so that there will be two processes in the + // foreground process group. + if _, err := ptmx.Write([]byte("sleep 100 | cat\n")); err != nil { + t.Fatalf("error writing to pty: %v", err) + } + + // Give shell a few seconds to start executing the sleep. + time.Sleep(2 * time.Second) + + // Send a ^C to the pty, which should kill sleep and + // cat, but not the shell. \x03 is ASCII "end of + // text", which is the same as ^C. + if _, err := ptmx.Write([]byte{'\x03'}); err != nil { + t.Fatalf("error writing to pty: %v", err) + } + + // The shell should still be alive at this point. Sleep + // should have exited with code 2+128=130. We'll exit + // with 10 plus that number, so that we can be sure + // that the shell did not get signalled. + if _, err := ptmx.Write([]byte("exit $(expr $? + 10)\n")); err != nil { + t.Fatalf("error writing to pty: %v", err) + } + + // Exec process should exit with code 10+130=140. + ps, err := cmd.Process.Wait() + if err != nil { + t.Fatalf("error waiting for exec process: %v", err) + } + ws := ps.Sys().(syscall.WaitStatus) + if !ws.Exited() { + t.Errorf("ws.Exited got false, want true") + } + if got, want := ws.ExitStatus(), 140; got != want { + t.Errorf("ws.ExitedStatus got %d, want %d", got, want) + } + }, + }, "sh"); err != nil { t.Fatalf("docker exec failed: %v", err) } - defer ptmx.Close() - - // Call "sleep 100 | cat" in the shell. We pipe to cat so that there - // will be two processes in the foreground process group. - if _, err := ptmx.Write([]byte("sleep 100 | cat\n")); err != nil { - t.Fatalf("error writing to pty: %v", err) - } - - // Give shell a few seconds to start executing the sleep. - time.Sleep(2 * time.Second) - - // Send a ^C to the pty, which should kill sleep and cat, but not the - // shell. \x03 is ASCII "end of text", which is the same as ^C. - if _, err := ptmx.Write([]byte{'\x03'}); err != nil { - t.Fatalf("error writing to pty: %v", err) - } - - // The shell should still be alive at this point. Sleep should have - // exited with code 2+128=130. We'll exit with 10 plus that number, so - // that we can be sure that the shell did not get signalled. - if _, err := ptmx.Write([]byte("exit $(expr $? + 10)\n")); err != nil { - t.Fatalf("error writing to pty: %v", err) - } - - // Exec process should exit with code 10+130=140. - ps, err := cmd.Process.Wait() - if err != nil { - t.Fatalf("error waiting for exec process: %v", err) - } - ws := ps.Sys().(syscall.WaitStatus) - if !ws.Exited() { - t.Errorf("ws.Exited got false, want true") - } - if got, want := ws.ExitStatus(), 140; got != want { - t.Errorf("ws.ExitedStatus got %d, want %d", got, want) - } } // Test that failure to exec returns proper error message. func TestExecError(t *testing.T) { - if err := dockerutil.Pull("alpine"); err != nil { - t.Fatalf("docker pull failed: %v", err) - } - d := dockerutil.MakeDocker("exec-error-test") + d := dockerutil.MakeDocker(t) + defer d.CleanUp() // Start the container. - if err := d.Run("alpine", "sleep", "1000"); err != nil { + if err := d.Spawn(dockerutil.RunOpts{ + Image: "basic/alpine", + }, "sleep", "1000"); err != nil { t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() - _, err := d.Exec("no_can_find") + // Attempt to exec a binary that doesn't exist. + out, err := d.Exec(dockerutil.RunOpts{}, "no_can_find") if err == nil { t.Fatalf("docker exec didn't fail") } - if want := `error finding executable "no_can_find" in PATH`; !strings.Contains(err.Error(), want) { - t.Fatalf("docker exec wrong error, got: %s, want: .*%s.*", err.Error(), want) + if want := `error finding executable "no_can_find" in PATH`; !strings.Contains(out, want) { + t.Fatalf("docker exec wrong error, got: %s, want: .*%s.*", out, want) } } // Test that exec inherits environment from run. func TestExecEnv(t *testing.T) { - if err := dockerutil.Pull("alpine"); err != nil { - t.Fatalf("docker pull failed: %v", err) - } - d := dockerutil.MakeDocker("exec-env-test") + d := dockerutil.MakeDocker(t) + defer d.CleanUp() // Start the container with env FOO=BAR. - if err := d.Run("-e", "FOO=BAR", "alpine", "sleep", "1000"); err != nil { + if err := d.Spawn(dockerutil.RunOpts{ + Image: "basic/alpine", + Env: []string{"FOO=BAR"}, + }, "sleep", "1000"); err != nil { t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() // Exec "echo $FOO". - got, err := d.Exec("/bin/sh", "-c", "echo $FOO") + got, err := d.Exec(dockerutil.RunOpts{}, "/bin/sh", "-c", "echo $FOO") if err != nil { t.Fatalf("docker exec failed: %v", err) } @@ -216,17 +222,19 @@ func TestExecEnv(t *testing.T) { // TestRunEnvHasHome tests that run always has HOME environment set. func TestRunEnvHasHome(t *testing.T) { // Base alpine image does not have any environment variables set. - if err := dockerutil.Pull("alpine"); err != nil { - t.Fatalf("docker pull failed: %v", err) - } - d := dockerutil.MakeDocker("run-env-test") + d := dockerutil.MakeDocker(t) + defer d.CleanUp() // Exec "echo $HOME". The 'bin' user's home dir is '/bin'. - got, err := d.RunFg("--user", "bin", "alpine", "/bin/sh", "-c", "echo $HOME") + got, err := d.Run(dockerutil.RunOpts{ + Image: "basic/alpine", + User: "bin", + }, "/bin/sh", "-c", "echo $HOME") if err != nil { t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() + + // Check that the directory matches. if got, want := strings.TrimSpace(got), "/bin"; got != want { t.Errorf("bad output from 'docker run'. Got %q; Want %q.", got, want) } @@ -235,18 +243,17 @@ func TestRunEnvHasHome(t *testing.T) { // Test that exec always has HOME environment set, even when not set in run. func TestExecEnvHasHome(t *testing.T) { // Base alpine image does not have any environment variables set. - if err := dockerutil.Pull("alpine"); err != nil { - t.Fatalf("docker pull failed: %v", err) - } - d := dockerutil.MakeDocker("exec-env-home-test") + d := dockerutil.MakeDocker(t) + defer d.CleanUp() - if err := d.Run("alpine", "sleep", "1000"); err != nil { + if err := d.Spawn(dockerutil.RunOpts{ + Image: "basic/alpine", + }, "sleep", "1000"); err != nil { t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() // Exec "echo $HOME", and expect to see "/root". - got, err := d.Exec("/bin/sh", "-c", "echo $HOME") + got, err := d.Exec(dockerutil.RunOpts{}, "/bin/sh", "-c", "echo $HOME") if err != nil { t.Fatalf("docker exec failed: %v", err) } @@ -258,12 +265,14 @@ func TestExecEnvHasHome(t *testing.T) { newUID := 1234 newHome := "/foo/bar" cmd := fmt.Sprintf("mkdir -p -m 777 %q && adduser foo -D -u %d -h %q", newHome, newUID, newHome) - if _, err := d.Exec("/bin/sh", "-c", cmd); err != nil { + if _, err := d.Exec(dockerutil.RunOpts{}, "/bin/sh", "-c", cmd); err != nil { t.Fatalf("docker exec failed: %v", err) } // Execute the same as the new user and expect newHome. - got, err = d.ExecAsUser(strconv.Itoa(newUID), "/bin/sh", "-c", "echo $HOME") + got, err = d.Exec(dockerutil.RunOpts{ + User: strconv.Itoa(newUID), + }, "/bin/sh", "-c", "echo $HOME") if err != nil { t.Fatalf("docker exec failed: %v", err) } diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go index cc4fbbaed..404e37689 100644 --- a/test/e2e/integration_test.go +++ b/test/e2e/integration_test.go @@ -27,14 +27,15 @@ import ( "net" "net/http" "os" + "os/exec" "strconv" "strings" "syscall" "testing" "time" - "gvisor.dev/gvisor/runsc/dockerutil" - "gvisor.dev/gvisor/runsc/testutil" + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/pkg/test/testutil" ) // httpRequestSucceeds sends a request to a given url and checks that the status is OK. @@ -53,65 +54,66 @@ func httpRequestSucceeds(client http.Client, server string, port int) error { // TestLifeCycle tests a basic Create/Start/Stop docker container life cycle. func TestLifeCycle(t *testing.T) { - if err := dockerutil.Pull("nginx"); err != nil { - t.Fatal("docker pull failed:", err) - } - d := dockerutil.MakeDocker("lifecycle-test") - if err := d.Create("-p", "80", "nginx"); err != nil { - t.Fatal("docker create failed:", err) + d := dockerutil.MakeDocker(t) + defer d.CleanUp() + + // Start the container. + if err := d.Create(dockerutil.RunOpts{ + Image: "basic/nginx", + Ports: []int{80}, + }); err != nil { + t.Fatalf("docker create failed: %v", err) } if err := d.Start(); err != nil { - d.CleanUp() - t.Fatal("docker start failed:", err) + t.Fatalf("docker start failed: %v", err) } - // Test that container is working + // Test that container is working. port, err := d.FindPort(80) if err != nil { - t.Fatal("docker.FindPort(80) failed: ", err) + t.Fatalf("docker.FindPort(80) failed: %v", err) } if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil { - t.Fatal("WaitForHTTP() timeout:", err) + t.Fatalf("WaitForHTTP() timeout: %v", err) } client := http.Client{Timeout: time.Duration(2 * time.Second)} if err := httpRequestSucceeds(client, "localhost", port); err != nil { - t.Error("http request failed:", err) + t.Errorf("http request failed: %v", err) } if err := d.Stop(); err != nil { - d.CleanUp() - t.Fatal("docker stop failed:", err) + t.Fatalf("docker stop failed: %v", err) } if err := d.Remove(); err != nil { - t.Fatal("docker rm failed:", err) + t.Fatalf("docker rm failed: %v", err) } } func TestPauseResume(t *testing.T) { - const img = "gcr.io/gvisor-presubmit/python-hello" if !testutil.IsCheckpointSupported() { - t.Log("Checkpoint is not supported, skipping test.") - return + t.Skip("Checkpoint is not supported.") } - if err := dockerutil.Pull(img); err != nil { - t.Fatal("docker pull failed:", err) - } - d := dockerutil.MakeDocker("pause-resume-test") - if err := d.Run("-p", "8080", img); err != nil { + d := dockerutil.MakeDocker(t) + defer d.CleanUp() + + // Start the container. + if err := d.Spawn(dockerutil.RunOpts{ + Image: "basic/python", + Ports: []int{8080}, // See Dockerfile. + }); err != nil { t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() // Find where port 8080 is mapped to. port, err := d.FindPort(8080) if err != nil { - t.Fatal("docker.FindPort(8080) failed:", err) + t.Fatalf("docker.FindPort(8080) failed: %v", err) } // Wait until it's up and running. if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil { - t.Fatal("WaitForHTTP() timeout:", err) + t.Fatalf("WaitForHTTP() timeout: %v", err) } // Check that container is working. @@ -121,7 +123,7 @@ func TestPauseResume(t *testing.T) { } if err := d.Pause(); err != nil { - t.Fatal("docker pause failed:", err) + t.Fatalf("docker pause failed: %v", err) } // Check if container is paused. @@ -137,12 +139,12 @@ func TestPauseResume(t *testing.T) { } if err := d.Unpause(); err != nil { - t.Fatal("docker unpause failed:", err) + t.Fatalf("docker unpause failed: %v", err) } // Wait until it's up and running. if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil { - t.Fatal("WaitForHTTP() timeout:", err) + t.Fatalf("WaitForHTTP() timeout: %v", err) } // Check if container is working again. @@ -152,43 +154,43 @@ func TestPauseResume(t *testing.T) { } func TestCheckpointRestore(t *testing.T) { - const img = "gcr.io/gvisor-presubmit/python-hello" if !testutil.IsCheckpointSupported() { - t.Log("Pause/resume is not supported, skipping test.") - return + t.Skip("Pause/resume is not supported.") } - if err := dockerutil.Pull(img); err != nil { - t.Fatal("docker pull failed:", err) - } - d := dockerutil.MakeDocker("save-restore-test") - if err := d.Run("-p", "8080", img); err != nil { + d := dockerutil.MakeDocker(t) + defer d.CleanUp() + + // Start the container. + if err := d.Spawn(dockerutil.RunOpts{ + Image: "basic/python", + Ports: []int{8080}, // See Dockerfile. + }); err != nil { t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() + // Create a snapshot. if err := d.Checkpoint("test"); err != nil { - t.Fatal("docker checkpoint failed:", err) + t.Fatalf("docker checkpoint failed: %v", err) } - if _, err := d.Wait(30 * time.Second); err != nil { - t.Fatal(err) + t.Fatalf("wait failed: %v", err) } // TODO(b/143498576): Remove Poll after github.com/moby/moby/issues/38963 is fixed. if err := testutil.Poll(func() error { return d.Restore("test") }, 15*time.Second); err != nil { - t.Fatal("docker restore failed:", err) + t.Fatalf("docker restore failed: %v", err) } // Find where port 8080 is mapped to. port, err := d.FindPort(8080) if err != nil { - t.Fatal("docker.FindPort(8080) failed:", err) + t.Fatalf("docker.FindPort(8080) failed: %v", err) } // Wait until it's up and running. if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil { - t.Fatal("WaitForHTTP() timeout:", err) + t.Fatalf("WaitForHTTP() timeout: %v", err) } // Check if container is working again. @@ -200,26 +202,28 @@ func TestCheckpointRestore(t *testing.T) { // Create client and server that talk to each other using the local IP. func TestConnectToSelf(t *testing.T) { - d := dockerutil.MakeDocker("connect-to-self-test") + d := dockerutil.MakeDocker(t) + defer d.CleanUp() // Creates server that replies "server" and exists. Sleeps at the end because // 'docker exec' gets killed if the init process exists before it can finish. - if err := d.Run("ubuntu:trusty", "/bin/sh", "-c", "echo server | nc -l -p 8080 && sleep 1"); err != nil { - t.Fatal("docker run failed:", err) + if err := d.Spawn(dockerutil.RunOpts{ + Image: "basic/ubuntu", + }, "/bin/sh", "-c", "echo server | nc -l -p 8080 && sleep 1"); err != nil { + t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() // Finds IP address for host. - ip, err := d.Exec("/bin/sh", "-c", "cat /etc/hosts | grep ${HOSTNAME} | awk '{print $1}'") + ip, err := d.Exec(dockerutil.RunOpts{}, "/bin/sh", "-c", "cat /etc/hosts | grep ${HOSTNAME} | awk '{print $1}'") if err != nil { - t.Fatal("docker exec failed:", err) + t.Fatalf("docker exec failed: %v", err) } ip = strings.TrimRight(ip, "\n") // Runs client that sends "client" to the server and exits. - reply, err := d.Exec("/bin/sh", "-c", fmt.Sprintf("echo client | nc %s 8080", ip)) + reply, err := d.Exec(dockerutil.RunOpts{}, "/bin/sh", "-c", fmt.Sprintf("echo client | nc %s 8080", ip)) if err != nil { - t.Fatal("docker exec failed:", err) + t.Fatalf("docker exec failed: %v", err) } // Ensure both client and server got the message from each other. @@ -227,21 +231,22 @@ func TestConnectToSelf(t *testing.T) { t.Errorf("Error on server, want: %q, got: %q", want, reply) } if _, err := d.WaitForOutput("^client\n$", 1*time.Second); err != nil { - t.Fatal("docker.WaitForOutput(client) timeout:", err) + t.Fatalf("docker.WaitForOutput(client) timeout: %v", err) } } func TestMemLimit(t *testing.T) { - if err := dockerutil.Pull("alpine"); err != nil { - t.Fatal("docker pull failed:", err) - } - d := dockerutil.MakeDocker("cgroup-test") - cmd := "cat /proc/meminfo | grep MemTotal: | awk '{print $2}'" - out, err := d.RunFg("--memory=500MB", "alpine", "sh", "-c", cmd) + d := dockerutil.MakeDocker(t) + defer d.CleanUp() + + allocMemory := 500 * 1024 + out, err := d.Run(dockerutil.RunOpts{ + Image: "basic/alpine", + Memory: allocMemory, // In kB. + }, "sh", "-c", "cat /proc/meminfo | grep MemTotal: | awk '{print $2}'") if err != nil { - t.Fatal("docker run failed:", err) + t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() // Remove warning message that swap isn't present. if strings.HasPrefix(out, "WARNING") { @@ -252,27 +257,30 @@ func TestMemLimit(t *testing.T) { out = lines[1] } + // Ensure the memory matches what we want. got, err := strconv.ParseUint(strings.TrimSpace(out), 10, 64) if err != nil { t.Fatalf("failed to parse %q: %v", out, err) } - if want := uint64(500 * 1024); got != want { + if want := uint64(allocMemory); got != want { t.Errorf("MemTotal got: %d, want: %d", got, want) } } func TestNumCPU(t *testing.T) { - if err := dockerutil.Pull("alpine"); err != nil { - t.Fatal("docker pull failed:", err) - } - d := dockerutil.MakeDocker("cgroup-test") - cmd := "cat /proc/cpuinfo | grep 'processor.*:' | wc -l" - out, err := d.RunFg("--cpuset-cpus=0", "alpine", "sh", "-c", cmd) + d := dockerutil.MakeDocker(t) + defer d.CleanUp() + + // Read how many cores are in the container. + out, err := d.Run(dockerutil.RunOpts{ + Image: "basic/alpine", + Extra: []string{"--cpuset-cpus=0"}, + }, "sh", "-c", "cat /proc/cpuinfo | grep 'processor.*:' | wc -l") if err != nil { - t.Fatal("docker run failed:", err) + t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() + // Ensure it matches what we want. got, err := strconv.Atoi(strings.TrimSpace(out)) if err != nil { t.Fatalf("failed to parse %q: %v", out, err) @@ -284,39 +292,39 @@ func TestNumCPU(t *testing.T) { // TestJobControl tests that job control characters are handled properly. func TestJobControl(t *testing.T) { - if err := dockerutil.Pull("alpine"); err != nil { - t.Fatalf("docker pull failed: %v", err) - } - d := dockerutil.MakeDocker("job-control-test") + d := dockerutil.MakeDocker(t) + defer d.CleanUp() // Start the container with an attached PTY. - _, ptmx, err := d.RunWithPty("alpine", "sh") - if err != nil { + if _, err := d.Run(dockerutil.RunOpts{ + Image: "basic/alpine", + Pty: func(_ *exec.Cmd, ptmx *os.File) { + // Call "sleep 100" in the shell. + if _, err := ptmx.Write([]byte("sleep 100\n")); err != nil { + t.Fatalf("error writing to pty: %v", err) + } + + // Give shell a few seconds to start executing the sleep. + time.Sleep(2 * time.Second) + + // Send a ^C to the pty, which should kill sleep, but + // not the shell. \x03 is ASCII "end of text", which + // is the same as ^C. + if _, err := ptmx.Write([]byte{'\x03'}); err != nil { + t.Fatalf("error writing to pty: %v", err) + } + + // The shell should still be alive at this point. Sleep + // should have exited with code 2+128=130. We'll exit + // with 10 plus that number, so that we can be sure + // that the shell did not get signalled. + if _, err := ptmx.Write([]byte("exit $(expr $? + 10)\n")); err != nil { + t.Fatalf("error writing to pty: %v", err) + } + }, + }, "sh"); err != nil { t.Fatalf("docker run failed: %v", err) } - defer ptmx.Close() - defer d.CleanUp() - - // Call "sleep 100" in the shell. - if _, err := ptmx.Write([]byte("sleep 100\n")); err != nil { - t.Fatalf("error writing to pty: %v", err) - } - - // Give shell a few seconds to start executing the sleep. - time.Sleep(2 * time.Second) - - // Send a ^C to the pty, which should kill sleep, but not the shell. - // \x03 is ASCII "end of text", which is the same as ^C. - if _, err := ptmx.Write([]byte{'\x03'}); err != nil { - t.Fatalf("error writing to pty: %v", err) - } - - // The shell should still be alive at this point. Sleep should have - // exited with code 2+128=130. We'll exit with 10 plus that number, so - // that we can be sure that the shell did not get signalled. - if _, err := ptmx.Write([]byte("exit $(expr $? + 10)\n")); err != nil { - t.Fatalf("error writing to pty: %v", err) - } // Wait for the container to exit. got, err := d.Wait(5 * time.Second) @@ -332,14 +340,25 @@ func TestJobControl(t *testing.T) { // TestTmpFile checks that files inside '/tmp' are not overridden. In addition, // it checks that working dir is created if it doesn't exit. func TestTmpFile(t *testing.T) { - if err := dockerutil.Pull("alpine"); err != nil { - t.Fatal("docker pull failed:", err) + d := dockerutil.MakeDocker(t) + defer d.CleanUp() + + // Should work without ReadOnly + if _, err := d.Run(dockerutil.RunOpts{ + Image: "basic/alpine", + WorkDir: "/tmp/foo/bar", + }, "touch", "/tmp/foo/bar/file"); err != nil { + t.Fatalf("docker run failed: %v", err) } - d := dockerutil.MakeDocker("tmp-file-test") - if err := d.Run("-w=/tmp/foo/bar", "--read-only", "alpine", "touch", "/tmp/foo/bar/file"); err != nil { - t.Fatal("docker run failed:", err) + + // Expect failure. + if _, err := d.Run(dockerutil.RunOpts{ + Image: "basic/alpine", + WorkDir: "/tmp/foo/bar", + ReadOnly: true, + }, "touch", "/tmp/foo/bar/file"); err == nil { + t.Fatalf("docker run expected failure, but succeeded") } - defer d.CleanUp() } func TestMain(m *testing.M) { diff --git a/test/e2e/regression_test.go b/test/e2e/regression_test.go index 2488be383..327a2174c 100644 --- a/test/e2e/regression_test.go +++ b/test/e2e/regression_test.go @@ -18,7 +18,7 @@ import ( "strings" "testing" - "gvisor.dev/gvisor/runsc/dockerutil" + "gvisor.dev/gvisor/pkg/test/dockerutil" ) // Test that UDS can be created using overlay when parent directory is in lower @@ -27,19 +27,19 @@ import ( // Prerequisite: the directory where the socket file is created must not have // been open for write before bind(2) is called. func TestBindOverlay(t *testing.T) { - if err := dockerutil.Pull("ubuntu:trusty"); err != nil { - t.Fatal("docker pull failed:", err) - } - d := dockerutil.MakeDocker("bind-overlay-test") + d := dockerutil.MakeDocker(t) + defer d.CleanUp() - cmd := "nc -l -U /var/run/sock & p=$! && sleep 1 && echo foobar-asdf | nc -U /var/run/sock && wait $p" - got, err := d.RunFg("ubuntu:trusty", "bash", "-c", cmd) + // Run the container. + got, err := d.Run(dockerutil.RunOpts{ + Image: "basic/ubuntu", + }, "bash", "-c", "nc -l -U /var/run/sock & p=$! && sleep 1 && echo foobar-asdf | nc -U /var/run/sock && wait $p") if err != nil { - t.Fatal("docker run failed:", err) + t.Fatalf("docker run failed: %v", err) } + // Check the output contains what we want. if want := "foobar-asdf"; !strings.Contains(got, want) { t.Fatalf("docker run output is missing %q: %s", want, got) } - defer d.CleanUp() } diff --git a/test/image/BUILD b/test/image/BUILD index 7392ac54e..e749e47d4 100644 --- a/test/image/BUILD +++ b/test/image/BUILD @@ -22,8 +22,8 @@ go_test( ], visibility = ["//:sandbox"], deps = [ - "//runsc/dockerutil", - "//runsc/testutil", + "//pkg/test/dockerutil", + "//pkg/test/testutil", ], ) diff --git a/test/image/image_test.go b/test/image/image_test.go index 0a1e19d6f..2e3543109 100644 --- a/test/image/image_test.go +++ b/test/image/image_test.go @@ -28,24 +28,29 @@ import ( "log" "net/http" "os" - "path/filepath" "strings" "testing" "time" - "gvisor.dev/gvisor/runsc/dockerutil" - "gvisor.dev/gvisor/runsc/testutil" + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/pkg/test/testutil" ) func TestHelloWorld(t *testing.T) { - d := dockerutil.MakeDocker("hello-test") - if err := d.Run("hello-world"); err != nil { + d := dockerutil.MakeDocker(t) + defer d.CleanUp() + + // Run the basic container. + out, err := d.Run(dockerutil.RunOpts{ + Image: "basic/alpine", + }, "echo", "Hello world!") + if err != nil { t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() - if _, err := d.WaitForOutput("Hello from Docker!", 5*time.Second); err != nil { - t.Fatalf("docker didn't say hello: %v", err) + // Check the output. + if !strings.Contains(out, "Hello world!") { + t.Fatalf("docker didn't say hello: got %s", out) } } @@ -102,27 +107,22 @@ func testHTTPServer(t *testing.T, port int) { } func TestHttpd(t *testing.T) { - if err := dockerutil.Pull("httpd"); err != nil { - t.Fatalf("docker pull failed: %v", err) - } - d := dockerutil.MakeDocker("http-test") - - dir, err := dockerutil.PrepareFiles("test/image/latin10k.txt") - if err != nil { - t.Fatalf("PrepareFiles() failed: %v", err) - } + d := dockerutil.MakeDocker(t) + defer d.CleanUp() // Start the container. - mountArg := dockerutil.MountArg(dir, "/usr/local/apache2/htdocs", dockerutil.ReadOnly) - if err := d.Run("-p", "80", mountArg, "httpd"); err != nil { + d.CopyFiles("/usr/local/apache2/htdocs", "test/image/latin10k.txt") + if err := d.Spawn(dockerutil.RunOpts{ + Image: "basic/httpd", + Ports: []int{80}, + }); err != nil { t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() // Find where port 80 is mapped to. port, err := d.FindPort(80) if err != nil { - t.Fatalf("docker.FindPort(80) failed: %v", err) + t.Fatalf("FindPort(80) failed: %v", err) } // Wait until it's up and running. @@ -134,27 +134,22 @@ func TestHttpd(t *testing.T) { } func TestNginx(t *testing.T) { - if err := dockerutil.Pull("nginx"); err != nil { - t.Fatalf("docker pull failed: %v", err) - } - d := dockerutil.MakeDocker("net-test") - - dir, err := dockerutil.PrepareFiles("test/image/latin10k.txt") - if err != nil { - t.Fatalf("PrepareFiles() failed: %v", err) - } + d := dockerutil.MakeDocker(t) + defer d.CleanUp() // Start the container. - mountArg := dockerutil.MountArg(dir, "/usr/share/nginx/html", dockerutil.ReadOnly) - if err := d.Run("-p", "80", mountArg, "nginx"); err != nil { + d.CopyFiles("/usr/share/nginx/html", "test/image/latin10k.txt") + if err := d.Spawn(dockerutil.RunOpts{ + Image: "basic/nginx", + Ports: []int{80}, + }); err != nil { t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() // Find where port 80 is mapped to. port, err := d.FindPort(80) if err != nil { - t.Fatalf("docker.FindPort(80) failed: %v", err) + t.Fatalf("FindPort(80) failed: %v", err) } // Wait until it's up and running. @@ -166,99 +161,58 @@ func TestNginx(t *testing.T) { } func TestMysql(t *testing.T) { - if err := dockerutil.Pull("mysql"); err != nil { - t.Fatalf("docker pull failed: %v", err) - } - d := dockerutil.MakeDocker("mysql-test") + server := dockerutil.MakeDocker(t) + defer server.CleanUp() // Start the container. - if err := d.Run("-e", "MYSQL_ROOT_PASSWORD=foobar123", "mysql"); err != nil { + if err := server.Spawn(dockerutil.RunOpts{ + Image: "basic/mysql", + Env: []string{"MYSQL_ROOT_PASSWORD=foobar123"}, + }); err != nil { t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() // Wait until it's up and running. - if _, err := d.WaitForOutput("port: 3306 MySQL Community Server", 3*time.Minute); err != nil { - t.Fatalf("docker.WaitForOutput() timeout: %v", err) + if _, err := server.WaitForOutput("port: 3306 MySQL Community Server", 3*time.Minute); err != nil { + t.Fatalf("WaitForOutput() timeout: %v", err) } - client := dockerutil.MakeDocker("mysql-client-test") - dir, err := dockerutil.PrepareFiles("test/image/mysql.sql") - if err != nil { - t.Fatalf("PrepareFiles() failed: %v", err) - } + // Generate the client and copy in the SQL payload. + client := dockerutil.MakeDocker(t) + defer client.CleanUp() - // Tell mysql client to connect to the server and execute the file in verbose - // mode to verify the output. - args := []string{ - dockerutil.LinkArg(&d, "mysql"), - dockerutil.MountArg(dir, "/sql", dockerutil.ReadWrite), - "mysql", - "mysql", "-hmysql", "-uroot", "-pfoobar123", "-v", "-e", "source /sql/mysql.sql", - } - if err := client.Run(args...); err != nil { + // Tell mysql client to connect to the server and execute the file in + // verbose mode to verify the output. + client.CopyFiles("/sql", "test/image/mysql.sql") + client.Link("mysql", server) + if _, err := client.Run(dockerutil.RunOpts{ + Image: "basic/mysql", + }, "mysql", "-hmysql", "-uroot", "-pfoobar123", "-v", "-e", "source /sql/mysql.sql"); err != nil { t.Fatalf("docker run failed: %v", err) } - defer client.CleanUp() // Ensure file executed to the end and shutdown mysql. - if _, err := client.WaitForOutput("--------------\nshutdown\n--------------", 15*time.Second); err != nil { - t.Fatalf("docker.WaitForOutput() timeout: %v", err) - } - if _, err := d.WaitForOutput("mysqld: Shutdown complete", 30*time.Second); err != nil { - t.Fatalf("docker.WaitForOutput() timeout: %v", err) + if _, err := server.WaitForOutput("mysqld: Shutdown complete", 30*time.Second); err != nil { + t.Fatalf("WaitForOutput() timeout: %v", err) } } -func TestPythonHello(t *testing.T) { - // TODO(b/136503277): Once we have more complete python runtime tests, - // we can drop this one. - const img = "gcr.io/gvisor-presubmit/python-hello" - if err := dockerutil.Pull(img); err != nil { - t.Fatalf("docker pull failed: %v", err) - } - d := dockerutil.MakeDocker("python-hello-test") - if err := d.Run("-p", "8080", img); err != nil { - t.Fatalf("docker run failed: %v", err) - } +func TestTomcat(t *testing.T) { + d := dockerutil.MakeDocker(t) defer d.CleanUp() - // Find where port 8080 is mapped to. - port, err := d.FindPort(8080) - if err != nil { - t.Fatalf("docker.FindPort(8080) failed: %v", err) - } - - // Wait until it's up and running. - if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil { - t.Fatalf("WaitForHTTP() timeout: %v", err) - } - - // Ensure that content is being served. - url := fmt.Sprintf("http://localhost:%d", port) - resp, err := http.Get(url) - if err != nil { - t.Errorf("Error reaching http server: %v", err) - } - if want := http.StatusOK; resp.StatusCode != want { - t.Errorf("Wrong response code, got: %d, want: %d", resp.StatusCode, want) - } -} - -func TestTomcat(t *testing.T) { - if err := dockerutil.Pull("tomcat:8.0"); err != nil { - t.Fatalf("docker pull failed: %v", err) - } - d := dockerutil.MakeDocker("tomcat-test") - if err := d.Run("-p", "8080", "tomcat:8.0"); err != nil { + // Start the server. + if err := d.Spawn(dockerutil.RunOpts{ + Image: "basic/tomcat", + Ports: []int{8080}, + }); err != nil { t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() // Find where port 8080 is mapped to. port, err := d.FindPort(8080) if err != nil { - t.Fatalf("docker.FindPort(8080) failed: %v", err) + t.Fatalf("FindPort(8080) failed: %v", err) } // Wait until it's up and running. @@ -278,28 +232,22 @@ func TestTomcat(t *testing.T) { } func TestRuby(t *testing.T) { - if err := dockerutil.Pull("ruby"); err != nil { - t.Fatalf("docker pull failed: %v", err) - } - d := dockerutil.MakeDocker("ruby-test") - - dir, err := dockerutil.PrepareFiles("test/image/ruby.rb", "test/image/ruby.sh") - if err != nil { - t.Fatalf("PrepareFiles() failed: %v", err) - } - if err := os.Chmod(filepath.Join(dir, "ruby.sh"), 0333); err != nil { - t.Fatalf("os.Chmod(%q, 0333) failed: %v", dir, err) - } + d := dockerutil.MakeDocker(t) + defer d.CleanUp() - if err := d.Run("-p", "8080", dockerutil.MountArg(dir, "/src", dockerutil.ReadOnly), "ruby", "/src/ruby.sh"); err != nil { + // Execute the ruby workload. + d.CopyFiles("/src", "test/image/ruby.rb", "test/image/ruby.sh") + if err := d.Spawn(dockerutil.RunOpts{ + Image: "basic/ruby", + Ports: []int{8080}, + }, "/src/ruby.sh"); err != nil { t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() // Find where port 8080 is mapped to. port, err := d.FindPort(8080) if err != nil { - t.Fatalf("docker.FindPort(8080) failed: %v", err) + t.Fatalf("FindPort(8080) failed: %v", err) } // Wait until it's up and running, 'gem install' can take some time. @@ -326,18 +274,17 @@ func TestRuby(t *testing.T) { } func TestStdio(t *testing.T) { - if err := dockerutil.Pull("alpine"); err != nil { - t.Fatalf("docker pull failed: %v", err) - } - d := dockerutil.MakeDocker("stdio-test") + d := dockerutil.MakeDocker(t) + defer d.CleanUp() wantStdout := "hello stdout" wantStderr := "bonjour stderr" cmd := fmt.Sprintf("echo %q; echo %q 1>&2;", wantStdout, wantStderr) - if err := d.Run("alpine", "/bin/sh", "-c", cmd); err != nil { + if err := d.Spawn(dockerutil.RunOpts{ + Image: "basic/alpine", + }, "/bin/sh", "-c", cmd); err != nil { t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() for _, want := range []string{wantStdout, wantStderr} { if _, err := d.WaitForOutput(want, 5*time.Second); err != nil { diff --git a/test/image/ruby.sh b/test/image/ruby.sh old mode 100644 new mode 100755 diff --git a/test/iptables/BUILD b/test/iptables/BUILD index 6bb3b82b5..3e29ca90d 100644 --- a/test/iptables/BUILD +++ b/test/iptables/BUILD @@ -14,7 +14,7 @@ go_library( ], visibility = ["//test/iptables:__subpackages__"], deps = [ - "//runsc/testutil", + "//pkg/test/testutil", ], ) @@ -23,14 +23,14 @@ go_test( srcs = [ "iptables_test.go", ], + data = ["//test/iptables/runner"], library = ":iptables", tags = [ "local", "manual", ], deps = [ - "//pkg/log", - "//runsc/dockerutil", - "//runsc/testutil", + "//pkg/test/dockerutil", + "//pkg/test/testutil", ], ) diff --git a/test/iptables/README.md b/test/iptables/README.md index cc8a2fcac..b9f44bd40 100644 --- a/test/iptables/README.md +++ b/test/iptables/README.md @@ -38,7 +38,7 @@ Build the testing Docker container. Re-run this when you modify the test code in this directory: ```bash -$ bazel run //test/iptables/runner:runner-image -- --norun +$ make load-iptables ``` Run an individual test via: diff --git a/test/iptables/iptables.go b/test/iptables/iptables.go index 2e565d988..16cb4f4da 100644 --- a/test/iptables/iptables.go +++ b/test/iptables/iptables.go @@ -18,12 +18,19 @@ package iptables import ( "fmt" "net" + "time" ) // IPExchangePort is the port the container listens on to receive the IP // address of the local process. const IPExchangePort = 2349 +// TerminalStatement is the last statement in the test runner. +const TerminalStatement = "Finished!" + +// TestTimeout is the timeout used for all tests. +const TestTimeout = 10 * time.Minute + // A TestCase contains one action to run in the container and one to run // locally. The actions run concurrently and each must succeed for the test // pass. diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 493d69052..334d8e676 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -15,28 +15,14 @@ package iptables import ( - "flag" "fmt" "net" - "os" - "path" "testing" - "time" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/runsc/dockerutil" - "gvisor.dev/gvisor/runsc/testutil" + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/pkg/test/testutil" ) -const timeout = 18 * time.Second - -var image = flag.String("image", "bazel/test/iptables/runner:runner-image", "image to run tests in") - -type result struct { - output string - err error -} - // singleTest runs a TestCase. Each test follows a pattern: // - Create a container. // - Get the container's IP. @@ -46,77 +32,45 @@ type result struct { // // Container output is logged to $TEST_UNDECLARED_OUTPUTS_DIR if it exists, or // to stderr. -func singleTest(test TestCase) error { +func singleTest(t *testing.T, test TestCase) { if _, ok := Tests[test.Name()]; !ok { - return fmt.Errorf("no test found with name %q. Has it been registered?", test.Name()) + t.Fatalf("no test found with name %q. Has it been registered?", test.Name()) } + d := dockerutil.MakeDocker(t) + defer d.CleanUp() + // Create and start the container. - cont := dockerutil.MakeDocker("gvisor-iptables") - defer cont.CleanUp() - resultChan := make(chan *result) - go func() { - output, err := cont.RunFg("--cap-add=NET_ADMIN", *image, "-name", test.Name()) - logContainer(output, err) - resultChan <- &result{output, err} - }() + d.CopyFiles("/runner", "test/iptables/runner/runner") + if err := d.Spawn(dockerutil.RunOpts{ + Image: "iptables", + CapAdd: []string{"NET_ADMIN"}, + }, "/runner/runner", "-name", test.Name()); err != nil { + t.Fatalf("docker run failed: %v", err) + } // Get the container IP. - ip, err := getIP(cont) + ip, err := d.FindIP() if err != nil { - return fmt.Errorf("failed to get container IP: %v", err) + t.Fatalf("failed to get container IP: %v", err) } // Give the container our IP. if err := sendIP(ip); err != nil { - return fmt.Errorf("failed to send IP to container: %v", err) + t.Fatalf("failed to send IP to container: %v", err) } // Run our side of the test. - errChan := make(chan error) - go func() { - errChan <- test.LocalAction(ip) - }() - - // Wait for both the container and local tests to finish. - var res *result - to := time.After(timeout) - for localDone := false; res == nil || !localDone; { - select { - case res = <-resultChan: - log.Infof("Container finished.") - case err, localDone = <-errChan: - log.Infof("Local finished.") - if err != nil { - return fmt.Errorf("local test failed: %v", err) - } - case <-to: - return fmt.Errorf("timed out after %f seconds", timeout.Seconds()) - } + if err := test.LocalAction(ip); err != nil { + t.Fatalf("LocalAction failed: %v", err) } - return res.err -} - -func getIP(cont dockerutil.Docker) (net.IP, error) { - // The container might not have started yet, so retry a few times. - var ipStr string - to := time.After(timeout) - for ipStr == "" { - ipStr, _ = cont.FindIP() - select { - case <-to: - return net.IP{}, fmt.Errorf("timed out getting IP after %f seconds", timeout.Seconds()) - default: - time.Sleep(250 * time.Millisecond) - } - } - ip := net.ParseIP(ipStr) - if ip == nil { - return net.IP{}, fmt.Errorf("invalid IP: %q", ipStr) + // Wait for the final statement. This structure has the side effect + // that all container logs will appear within the individual test + // context. + if _, err := d.WaitForOutput(TerminalStatement, TestTimeout); err != nil { + t.Fatalf("test failed: %v", err) } - log.Infof("Container has IP of %s", ipStr) - return ip, nil } func sendIP(ip net.IP) error { @@ -132,7 +86,7 @@ func sendIP(ip net.IP) error { conn = c return err } - if err := testutil.Poll(cb, timeout); err != nil { + if err := testutil.Poll(cb, TestTimeout); err != nil { return fmt.Errorf("timed out waiting to send IP, most recent error: %v", err) } if _, err := conn.Write([]byte{0}); err != nil { @@ -141,281 +95,184 @@ func sendIP(ip net.IP) error { return nil } -func logContainer(output string, err error) { - msg := fmt.Sprintf("Container error: %v\nContainer output:\n%v", err, output) - if artifactsDir := os.Getenv("TEST_UNDECLARED_OUTPUTS_DIR"); artifactsDir != "" { - fpath := path.Join(artifactsDir, "container.log") - if file, err := os.OpenFile(fpath, os.O_WRONLY|os.O_CREATE, 0644); err != nil { - log.Warningf("Failed to open log file %q: %v", fpath, err) - } else { - defer file.Close() - if _, err := file.Write([]byte(msg)); err == nil { - return - } - log.Warningf("Failed to write to log file %s: %v", fpath, err) - } - } - - // We couldn't write to the output directory -- just log to stderr. - log.Infof(msg) -} - func TestFilterInputDropUDP(t *testing.T) { - if err := singleTest(FilterInputDropUDP{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterInputDropUDP{}) } func TestFilterInputDropUDPPort(t *testing.T) { - if err := singleTest(FilterInputDropUDPPort{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterInputDropUDPPort{}) } func TestFilterInputDropDifferentUDPPort(t *testing.T) { - if err := singleTest(FilterInputDropDifferentUDPPort{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterInputDropDifferentUDPPort{}) } func TestFilterInputDropAll(t *testing.T) { - if err := singleTest(FilterInputDropAll{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterInputDropAll{}) } func TestFilterInputDropOnlyUDP(t *testing.T) { - if err := singleTest(FilterInputDropOnlyUDP{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterInputDropOnlyUDP{}) } func TestNATRedirectUDPPort(t *testing.T) { // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") - if err := singleTest(NATRedirectUDPPort{}); err != nil { - t.Fatal(err) - } + singleTest(t, NATRedirectUDPPort{}) } func TestNATRedirectTCPPort(t *testing.T) { // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") - if err := singleTest(NATRedirectTCPPort{}); err != nil { - t.Fatal(err) - } + singleTest(t, NATRedirectTCPPort{}) } func TestNATDropUDP(t *testing.T) { // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") - if err := singleTest(NATDropUDP{}); err != nil { - t.Fatal(err) - } + singleTest(t, NATDropUDP{}) } func TestNATAcceptAll(t *testing.T) { // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") - if err := singleTest(NATAcceptAll{}); err != nil { - t.Fatal(err) - } + singleTest(t, NATAcceptAll{}) } func TestFilterInputDropTCPDestPort(t *testing.T) { - if err := singleTest(FilterInputDropTCPDestPort{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterInputDropTCPDestPort{}) } func TestFilterInputDropTCPSrcPort(t *testing.T) { - if err := singleTest(FilterInputDropTCPSrcPort{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterInputDropTCPSrcPort{}) } func TestFilterInputCreateUserChain(t *testing.T) { - if err := singleTest(FilterInputCreateUserChain{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterInputCreateUserChain{}) } func TestFilterInputDefaultPolicyAccept(t *testing.T) { - if err := singleTest(FilterInputDefaultPolicyAccept{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterInputDefaultPolicyAccept{}) } func TestFilterInputDefaultPolicyDrop(t *testing.T) { - if err := singleTest(FilterInputDefaultPolicyDrop{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterInputDefaultPolicyDrop{}) } func TestFilterInputReturnUnderflow(t *testing.T) { - if err := singleTest(FilterInputReturnUnderflow{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterInputReturnUnderflow{}) } func TestFilterOutputDropTCPDestPort(t *testing.T) { // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("filter OUTPUT isn't supported yet (gvisor.dev/issue/170).") - if err := singleTest(FilterOutputDropTCPDestPort{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterOutputDropTCPDestPort{}) } func TestFilterOutputDropTCPSrcPort(t *testing.T) { // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("filter OUTPUT isn't supported yet (gvisor.dev/issue/170).") - if err := singleTest(FilterOutputDropTCPSrcPort{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterOutputDropTCPSrcPort{}) } func TestFilterOutputAcceptTCPOwner(t *testing.T) { - if err := singleTest(FilterOutputAcceptTCPOwner{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterOutputAcceptTCPOwner{}) } func TestFilterOutputDropTCPOwner(t *testing.T) { - if err := singleTest(FilterOutputDropTCPOwner{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterOutputDropTCPOwner{}) } func TestFilterOutputAcceptUDPOwner(t *testing.T) { - if err := singleTest(FilterOutputAcceptUDPOwner{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterOutputAcceptUDPOwner{}) } func TestFilterOutputDropUDPOwner(t *testing.T) { - if err := singleTest(FilterOutputDropUDPOwner{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterOutputDropUDPOwner{}) } func TestFilterOutputOwnerFail(t *testing.T) { - if err := singleTest(FilterOutputOwnerFail{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterOutputOwnerFail{}) } func TestJumpSerialize(t *testing.T) { - if err := singleTest(FilterInputSerializeJump{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterInputSerializeJump{}) } func TestJumpBasic(t *testing.T) { - if err := singleTest(FilterInputJumpBasic{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterInputJumpBasic{}) } func TestJumpReturn(t *testing.T) { - if err := singleTest(FilterInputJumpReturn{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterInputJumpReturn{}) } func TestJumpReturnDrop(t *testing.T) { - if err := singleTest(FilterInputJumpReturnDrop{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterInputJumpReturnDrop{}) } func TestJumpBuiltin(t *testing.T) { - if err := singleTest(FilterInputJumpBuiltin{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterInputJumpBuiltin{}) } func TestJumpTwice(t *testing.T) { - if err := singleTest(FilterInputJumpTwice{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterInputJumpTwice{}) } func TestInputDestination(t *testing.T) { - if err := singleTest(FilterInputDestination{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterInputDestination{}) } func TestInputInvertDestination(t *testing.T) { - if err := singleTest(FilterInputInvertDestination{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterInputInvertDestination{}) } func TestOutputDestination(t *testing.T) { - if err := singleTest(FilterOutputDestination{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterOutputDestination{}) } func TestOutputInvertDestination(t *testing.T) { - if err := singleTest(FilterOutputInvertDestination{}); err != nil { - t.Fatal(err) - } + singleTest(t, FilterOutputInvertDestination{}) } func TestNATOutRedirectIP(t *testing.T) { // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") - if err := singleTest(NATOutRedirectIP{}); err != nil { - t.Fatal(err) - } + singleTest(t, NATOutRedirectIP{}) } func TestNATOutDontRedirectIP(t *testing.T) { // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") - if err := singleTest(NATOutDontRedirectIP{}); err != nil { - t.Fatal(err) - } + singleTest(t, NATOutDontRedirectIP{}) } func TestNATOutRedirectInvert(t *testing.T) { // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") - if err := singleTest(NATOutRedirectInvert{}); err != nil { - t.Fatal(err) - } + singleTest(t, NATOutRedirectInvert{}) } func TestNATPreRedirectIP(t *testing.T) { // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") - if err := singleTest(NATPreRedirectIP{}); err != nil { - t.Fatal(err) - } + singleTest(t, NATPreRedirectIP{}) } func TestNATPreDontRedirectIP(t *testing.T) { // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") - if err := singleTest(NATPreDontRedirectIP{}); err != nil { - t.Fatal(err) - } + singleTest(t, NATPreDontRedirectIP{}) } func TestNATPreRedirectInvert(t *testing.T) { // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") - if err := singleTest(NATPreRedirectInvert{}); err != nil { - t.Fatal(err) - } + singleTest(t, NATPreRedirectInvert{}) } func TestNATRedirectRequiresProtocol(t *testing.T) { // TODO(gvisor.dev/issue/170): Enable when supported. t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") - if err := singleTest(NATRedirectRequiresProtocol{}); err != nil { - t.Fatal(err) - } + singleTest(t, NATRedirectRequiresProtocol{}) } diff --git a/test/iptables/iptables_util.go b/test/iptables/iptables_util.go index 134391e8d..2a00677be 100644 --- a/test/iptables/iptables_util.go +++ b/test/iptables/iptables_util.go @@ -20,7 +20,7 @@ import ( "os/exec" "time" - "gvisor.dev/gvisor/runsc/testutil" + "gvisor.dev/gvisor/pkg/test/testutil" ) const iptablesBinary = "iptables" diff --git a/test/iptables/runner/BUILD b/test/iptables/runner/BUILD index b9199387a..24504a1b9 100644 --- a/test/iptables/runner/BUILD +++ b/test/iptables/runner/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "container_image", "go_binary", "go_image") +load("//tools:defs.bzl", "go_binary") package(licenses = ["notice"]) @@ -6,18 +6,7 @@ go_binary( name = "runner", testonly = 1, srcs = ["main.go"], - deps = ["//test/iptables"], -) - -container_image( - name = "iptables-base", - base = "@iptables-test//image", -) - -go_image( - name = "runner-image", - testonly = 1, - srcs = ["main.go"], - base = ":iptables-base", + pure = True, + visibility = ["//test/iptables:__subpackages__"], deps = ["//test/iptables"], ) diff --git a/test/iptables/runner/main.go b/test/iptables/runner/main.go index 3c794114e..6f77c0684 100644 --- a/test/iptables/runner/main.go +++ b/test/iptables/runner/main.go @@ -46,6 +46,9 @@ func main() { if err := test.ContainerAction(ip); err != nil { log.Fatalf("Failed running test %q: %v", *name, err) } + + // Emit the final line. + log.Printf("%s", iptables.TerminalStatement) } // getIP listens for a connection from the local process and returns the source diff --git a/test/packetdrill/packetdrill_test.sh b/test/packetdrill/packetdrill_test.sh index c8268170f..922547d65 100755 --- a/test/packetdrill/packetdrill_test.sh +++ b/test/packetdrill/packetdrill_test.sh @@ -85,23 +85,26 @@ if [[ ! -x "${INIT_SCRIPT-}" ]]; then exit 2 fi +function new_net_prefix() { + # Class C, 192.0.0.0 to 223.255.255.255, transitionally has mask 24. + echo "$(shuf -i 192-223 -n 1).$(shuf -i 0-255 -n 1).$(shuf -i 0-255 -n 1)" +} + # Variables specific to the control network and interface start with CTRL_. # Variables specific to the test network and interface start with TEST_. # Variables specific to the DUT start with DUT_. # Variables specific to the test runner start with TEST_RUNNER_. declare -r PACKETDRILL="/packetdrill/gtests/net/packetdrill/packetdrill" # Use random numbers so that test networks don't collide. -declare -r CTRL_NET="ctrl_net-$(shuf -i 0-99999999 -n 1)" -declare -r TEST_NET="test_net-$(shuf -i 0-99999999 -n 1)" +declare CTRL_NET="ctrl_net-$(shuf -i 0-99999999 -n 1)" +declare CTRL_NET_PREFIX=$(new_net_prefix) +declare TEST_NET="test_net-$(shuf -i 0-99999999 -n 1)" +declare TEST_NET_PREFIX=$(new_net_prefix) declare -r tolerance_usecs=100000 # On both DUT and test runner, testing packets are on the eth2 interface. declare -r TEST_DEVICE="eth2" # Number of bits in the *_NET_PREFIX variables. declare -r NET_MASK="24" -function new_net_prefix() { - # Class C, 192.0.0.0 to 223.255.255.255, transitionally has mask 24. - echo "$(shuf -i 192-223 -n 1).$(shuf -i 0-255 -n 1).$(shuf -i 0-255 -n 1)" -} # Last bits of the DUT's IP address. declare -r DUT_NET_SUFFIX=".10" # Control port. @@ -137,23 +140,21 @@ function finish { trap finish EXIT # Subnet for control packets between test runner and DUT. -declare CTRL_NET_PREFIX=$(new_net_prefix) while ! docker network create \ "--subnet=${CTRL_NET_PREFIX}.0/${NET_MASK}" "${CTRL_NET}"; do sleep 0.1 - declare CTRL_NET_PREFIX=$(new_net_prefix) + CTRL_NET_PREFIX=$(new_net_prefix) + CTRL_NET="ctrl_net-$(shuf -i 0-99999999 -n 1)" done # Subnet for the packets that are part of the test. -declare TEST_NET_PREFIX=$(new_net_prefix) while ! docker network create \ "--subnet=${TEST_NET_PREFIX}.0/${NET_MASK}" "${TEST_NET}"; do sleep 0.1 - declare TEST_NET_PREFIX=$(new_net_prefix) + TEST_NET_PREFIX=$(new_net_prefix) + TEST_NET="test_net-$(shuf -i 0-99999999 -n 1)" done -docker pull "${IMAGE_TAG}" - # Create the DUT container and connect to network. DUT=$(docker create ${RUNTIME_ARG} --privileged --rm \ --stop-timeout ${TIMEOUT} -it ${IMAGE_TAG}) diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go index 9335909c0..3f340c6bc 100644 --- a/test/packetimpact/testbench/dut.go +++ b/test/packetimpact/testbench/dut.go @@ -132,7 +132,7 @@ func (dut *DUT) CreateBoundSocket(typ, proto int32, addr net.IP) (int32, uint16) copy(sa.Addr[:], addr.To16()) dut.Bind(fd, &sa) } else { - dut.t.Fatal("unknown ip addr type for remoteIP") + dut.t.Fatalf("unknown ip addr type for remoteIP") } sa := dut.GetSockName(fd) var port int diff --git a/test/packetimpact/tests/test_runner.sh b/test/packetimpact/tests/test_runner.sh index 2be3c17c3..46d63d5e5 100755 --- a/test/packetimpact/tests/test_runner.sh +++ b/test/packetimpact/tests/test_runner.sh @@ -107,21 +107,24 @@ if [[ ! -f "${TESTBENCH_BINARY-}" ]]; then exit 2 fi +function new_net_prefix() { + # Class C, 192.0.0.0 to 223.255.255.255, transitionally has mask 24. + echo "$(shuf -i 192-223 -n 1).$(shuf -i 0-255 -n 1).$(shuf -i 0-255 -n 1)" +} + # Variables specific to the control network and interface start with CTRL_. # Variables specific to the test network and interface start with TEST_. # Variables specific to the DUT start with DUT_. # Variables specific to the test bench start with TESTBENCH_. # Use random numbers so that test networks don't collide. -declare -r CTRL_NET="ctrl_net-${RANDOM}${RANDOM}" -declare -r TEST_NET="test_net-${RANDOM}${RANDOM}" +declare CTRL_NET="ctrl_net-${RANDOM}${RANDOM}" +declare CTRL_NET_PREFIX=$(new_net_prefix) +declare TEST_NET="test_net-${RANDOM}${RANDOM}" +declare TEST_NET_PREFIX=$(new_net_prefix) # On both DUT and test bench, testing packets are on the eth2 interface. declare -r TEST_DEVICE="eth2" # Number of bits in the *_NET_PREFIX variables. declare -r NET_MASK="24" -function new_net_prefix() { - # Class C, 192.0.0.0 to 223.255.255.255, transitionally has mask 24. - echo "$(shuf -i 192-223 -n 1).$(shuf -i 0-255 -n 1).$(shuf -i 0-255 -n 1)" -} # Last bits of the DUT's IP address. declare -r DUT_NET_SUFFIX=".10" # Control port. @@ -130,6 +133,7 @@ declare -r CTRL_PORT="40000" declare -r TESTBENCH_NET_SUFFIX=".20" declare -r TIMEOUT="60" declare -r IMAGE_TAG="gcr.io/gvisor-presubmit/packetimpact" + # Make sure that docker is installed. docker --version @@ -169,19 +173,19 @@ function finish { trap finish EXIT # Subnet for control packets between test bench and DUT. -declare CTRL_NET_PREFIX=$(new_net_prefix) while ! docker network create \ "--subnet=${CTRL_NET_PREFIX}.0/${NET_MASK}" "${CTRL_NET}"; do sleep 0.1 - declare CTRL_NET_PREFIX=$(new_net_prefix) + CTRL_NET_PREFIX=$(new_net_prefix) + CTRL_NET="ctrl_net-${RANDOM}${RANDOM}" done # Subnet for the packets that are part of the test. -declare TEST_NET_PREFIX=$(new_net_prefix) while ! docker network create \ "--subnet=${TEST_NET_PREFIX}.0/${NET_MASK}" "${TEST_NET}"; do sleep 0.1 - declare TEST_NET_PREFIX=$(new_net_prefix) + TEST_NET_PREFIX=$(new_net_prefix) + TEST_NET="test_net-${RANDOM}${RANDOM}" done docker pull "${IMAGE_TAG}" diff --git a/test/root/BUILD b/test/root/BUILD index 05166673a..17e51e66e 100644 --- a/test/root/BUILD +++ b/test/root/BUILD @@ -33,14 +33,12 @@ go_test( ], visibility = ["//:sandbox"], deps = [ - "//runsc/boot", + "//pkg/test/criutil", + "//pkg/test/dockerutil", + "//pkg/test/testutil", "//runsc/cgroup", "//runsc/container", - "//runsc/criutil", - "//runsc/dockerutil", "//runsc/specutils", - "//runsc/testutil", - "//test/root/testdata", "@com_github_cenkalti_backoff//:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", "@com_github_syndtr_gocapability//capability:go_default_library", diff --git a/test/root/cgroup_test.go b/test/root/cgroup_test.go index 679342def..8876d0d61 100644 --- a/test/root/cgroup_test.go +++ b/test/root/cgroup_test.go @@ -26,9 +26,9 @@ import ( "testing" "time" + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/runsc/cgroup" - "gvisor.dev/gvisor/runsc/dockerutil" - "gvisor.dev/gvisor/runsc/testutil" ) func verifyPid(pid int, path string) error { @@ -56,54 +56,70 @@ func verifyPid(pid int, path string) error { return fmt.Errorf("got: %v, want: %d", gots, pid) } -// TestCgroup sets cgroup options and checks that cgroup was properly configured. func TestMemCGroup(t *testing.T) { - allocMemSize := 128 << 20 - if err := dockerutil.Pull("python"); err != nil { - t.Fatal("docker pull failed:", err) - } - d := dockerutil.MakeDocker("memusage-test") + d := dockerutil.MakeDocker(t) + defer d.CleanUp() // Start a new container and allocate the specified about of memory. - args := []string{ - "--memory=256MB", - "python", - "python", - "-c", - fmt.Sprintf("import time; s = 'a' * %d; time.sleep(100)", allocMemSize), - } - if err := d.Run(args...); err != nil { - t.Fatal("docker create failed:", err) + allocMemSize := 128 << 20 + allocMemLimit := 2 * allocMemSize + if err := d.Spawn(dockerutil.RunOpts{ + Image: "basic/python", + Memory: allocMemLimit / 1024, // Must be in Kb. + }, "python", "-c", fmt.Sprintf("import time; s = 'a' * %d; time.sleep(100)", allocMemSize)); err != nil { + t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() + // Extract the ID to lookup the cgroup. gid, err := d.ID() if err != nil { t.Fatalf("Docker.ID() failed: %v", err) } t.Logf("cgroup ID: %s", gid) - path := filepath.Join("/sys/fs/cgroup/memory/docker", gid, "memory.usage_in_bytes") - memUsage := 0 - // Wait when the container will allocate memory. + memUsage := 0 start := time.Now() - for time.Now().Sub(start) < 30*time.Second { + for time.Since(start) < 30*time.Second { + // Sleep for a brief period of time after spawning the + // container (so that Docker can create the cgroup etc. + // or after looping below (so the application can start). + time.Sleep(100 * time.Millisecond) + + // Read the cgroup memory limit. + path := filepath.Join("/sys/fs/cgroup/memory/docker", gid, "memory.limit_in_bytes") outRaw, err := ioutil.ReadFile(path) if err != nil { - t.Fatalf("failed to read %q: %v", path, err) + // It's possible that the container does not exist yet. + continue } out := strings.TrimSpace(string(outRaw)) + memLimit, err := strconv.Atoi(out) + if err != nil { + t.Fatalf("Atoi(%v): %v", out, err) + } + if memLimit != allocMemLimit { + // The group may not have had the correct limit set yet. + continue + } + + // Read the cgroup memory usage. + path = filepath.Join("/sys/fs/cgroup/memory/docker", gid, "memory.max_usage_in_bytes") + outRaw, err = ioutil.ReadFile(path) + if err != nil { + t.Fatalf("error reading usage: %v", err) + } + out = strings.TrimSpace(string(outRaw)) memUsage, err = strconv.Atoi(out) if err != nil { t.Fatalf("Atoi(%v): %v", out, err) } + t.Logf("read usage: %v, wanted: %v", memUsage, allocMemSize) - if memUsage > allocMemSize { + // Are we done? + if memUsage >= allocMemSize { return } - - time.Sleep(100 * time.Millisecond) } t.Fatalf("%vMB is less than %vMB", memUsage>>20, allocMemSize>>20) @@ -111,10 +127,8 @@ func TestMemCGroup(t *testing.T) { // TestCgroup sets cgroup options and checks that cgroup was properly configured. func TestCgroup(t *testing.T) { - if err := dockerutil.Pull("alpine"); err != nil { - t.Fatal("docker pull failed:", err) - } - d := dockerutil.MakeDocker("cgroup-test") + d := dockerutil.MakeDocker(t) + defer d.CleanUp() // This is not a comprehensive list of attributes. // @@ -179,10 +193,11 @@ func TestCgroup(t *testing.T) { want: "5", }, { - arg: "--blkio-weight=750", - ctrl: "blkio", - file: "blkio.weight", - want: "750", + arg: "--blkio-weight=750", + ctrl: "blkio", + file: "blkio.weight", + want: "750", + skipIfNotFound: true, // blkio groups may not be available. }, } @@ -191,12 +206,15 @@ func TestCgroup(t *testing.T) { args = append(args, attr.arg) } - args = append(args, "alpine", "sleep", "10000") - if err := d.Run(args...); err != nil { - t.Fatal("docker create failed:", err) + // Start the container. + if err := d.Spawn(dockerutil.RunOpts{ + Image: "basic/alpine", + Extra: args, // Cgroup arguments. + }, "sleep", "10000"); err != nil { + t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() + // Lookup the relevant cgroup ID. gid, err := d.ID() if err != nil { t.Fatalf("Docker.ID() failed: %v", err) @@ -245,17 +263,21 @@ func TestCgroup(t *testing.T) { } } +// TestCgroup sets cgroup options and checks that cgroup was properly configured. func TestCgroupParent(t *testing.T) { - if err := dockerutil.Pull("alpine"); err != nil { - t.Fatal("docker pull failed:", err) - } - d := dockerutil.MakeDocker("cgroup-test") + d := dockerutil.MakeDocker(t) + defer d.CleanUp() - parent := testutil.RandomName("runsc") - if err := d.Run("--cgroup-parent", parent, "alpine", "sleep", "10000"); err != nil { - t.Fatal("docker create failed:", err) + // Construct a known cgroup name. + parent := testutil.RandomID("runsc-") + if err := d.Spawn(dockerutil.RunOpts{ + Image: "basic/alpine", + Extra: []string{fmt.Sprintf("--cgroup-parent=%s", parent)}, + }, "sleep", "10000"); err != nil { + t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() + + // Extract the ID to look up the cgroup. gid, err := d.ID() if err != nil { t.Fatalf("Docker.ID() failed: %v", err) diff --git a/test/root/chroot_test.go b/test/root/chroot_test.go index be0f63d18..a306132a4 100644 --- a/test/root/chroot_test.go +++ b/test/root/chroot_test.go @@ -24,17 +24,20 @@ import ( "strings" "testing" - "gvisor.dev/gvisor/runsc/dockerutil" + "gvisor.dev/gvisor/pkg/test/dockerutil" ) // TestChroot verifies that the sandbox is chroot'd and that mounts are cleaned // up after the sandbox is destroyed. func TestChroot(t *testing.T) { - d := dockerutil.MakeDocker("chroot-test") - if err := d.Run("alpine", "sleep", "10000"); err != nil { + d := dockerutil.MakeDocker(t) + defer d.CleanUp() + + if err := d.Spawn(dockerutil.RunOpts{ + Image: "basic/alpine", + }, "sleep", "10000"); err != nil { t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() pid, err := d.SandboxPid() if err != nil { @@ -76,11 +79,14 @@ func TestChroot(t *testing.T) { } func TestChrootGofer(t *testing.T) { - d := dockerutil.MakeDocker("chroot-test") - if err := d.Run("alpine", "sleep", "10000"); err != nil { + d := dockerutil.MakeDocker(t) + defer d.CleanUp() + + if err := d.Spawn(dockerutil.RunOpts{ + Image: "basic/alpine", + }, "sleep", "10000"); err != nil { t.Fatalf("docker run failed: %v", err) } - defer d.CleanUp() // It's tricky to find gofers. Get sandbox PID first, then find parent. From // parent get all immediate children, remove the sandbox, and everything else diff --git a/test/root/crictl_test.go b/test/root/crictl_test.go index 3f90c4c6a..85007dcce 100644 --- a/test/root/crictl_test.go +++ b/test/root/crictl_test.go @@ -16,6 +16,7 @@ package root import ( "bytes" + "encoding/json" "fmt" "io" "io/ioutil" @@ -29,16 +30,58 @@ import ( "testing" "time" - "gvisor.dev/gvisor/runsc/criutil" - "gvisor.dev/gvisor/runsc/dockerutil" + "gvisor.dev/gvisor/pkg/test/criutil" + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/runsc/specutils" - "gvisor.dev/gvisor/runsc/testutil" - "gvisor.dev/gvisor/test/root/testdata" ) // Tests for crictl have to be run as root (rather than in a user namespace) // because crictl creates named network namespaces in /var/run/netns/. +// SimpleSpec returns a JSON config for a simple container that runs the +// specified command in the specified image. +func SimpleSpec(name, image string, cmd []string, extra map[string]interface{}) string { + s := map[string]interface{}{ + "metadata": map[string]string{ + "name": name, + }, + "image": map[string]string{ + "image": testutil.ImageByName(image), + }, + "log_path": fmt.Sprintf("%s.log", name), + } + if len(cmd) > 0 { // Omit if empty. + s["command"] = cmd + } + for k, v := range extra { + s[k] = v // Extra settings. + } + v, err := json.Marshal(s) + if err != nil { + // This shouldn't happen. + panic(err) + } + return string(v) +} + +// Sandbox is a default JSON config for a sandbox. +var Sandbox = `{ + "metadata": { + "name": "default-sandbox", + "namespace": "default", + "attempt": 1, + "uid": "hdishd83djaidwnduwk28bcsb" + }, + "linux": { + }, + "log_directory": "/tmp" +} +` + +// Httpd is a JSON config for an httpd container. +var Httpd = SimpleSpec("httpd", "basic/httpd", nil, nil) + // TestCrictlSanity refers to b/112433158. func TestCrictlSanity(t *testing.T) { // Setup containerd and crictl. @@ -47,9 +90,9 @@ func TestCrictlSanity(t *testing.T) { t.Fatalf("failed to setup crictl: %v", err) } defer cleanup() - podID, contID, err := crictl.StartPodAndContainer("httpd", testdata.Sandbox, testdata.Httpd) + podID, contID, err := crictl.StartPodAndContainer("basic/httpd", Sandbox, Httpd) if err != nil { - t.Fatal(err) + t.Fatalf("start failed: %v", err) } // Look for the httpd page. @@ -59,10 +102,38 @@ func TestCrictlSanity(t *testing.T) { // Stop everything. if err := crictl.StopPodAndContainer(podID, contID); err != nil { - t.Fatal(err) + t.Fatalf("stop failed: %v", err) } } +// HttpdMountPaths is a JSON config for an httpd container with additional +// mounts. +var HttpdMountPaths = SimpleSpec("httpd", "basic/httpd", nil, map[string]interface{}{ + "mounts": []map[string]interface{}{ + map[string]interface{}{ + "container_path": "/var/run/secrets/kubernetes.io/serviceaccount", + "host_path": "/var/lib/kubelet/pods/82bae206-cdf5-11e8-b245-8cdcd43ac064/volumes/kubernetes.io~secret/default-token-2rpfx", + "readonly": true, + }, + map[string]interface{}{ + "container_path": "/etc/hosts", + "host_path": "/var/lib/kubelet/pods/82bae206-cdf5-11e8-b245-8cdcd43ac064/etc-hosts", + "readonly": false, + }, + map[string]interface{}{ + "container_path": "/dev/termination-log", + "host_path": "/var/lib/kubelet/pods/82bae206-cdf5-11e8-b245-8cdcd43ac064/containers/httpd/d1709580", + "readonly": false, + }, + map[string]interface{}{ + "container_path": "/usr/local/apache2/htdocs/test", + "host_path": "/var/lib/kubelet/pods/82bae206-cdf5-11e8-b245-8cdcd43ac064", + "readonly": true, + }, + }, + "linux": map[string]interface{}{}, +}) + // TestMountPaths refers to b/117635704. func TestMountPaths(t *testing.T) { // Setup containerd and crictl. @@ -71,9 +142,9 @@ func TestMountPaths(t *testing.T) { t.Fatalf("failed to setup crictl: %v", err) } defer cleanup() - podID, contID, err := crictl.StartPodAndContainer("httpd", testdata.Sandbox, testdata.HttpdMountPaths) + podID, contID, err := crictl.StartPodAndContainer("basic/httpd", Sandbox, HttpdMountPaths) if err != nil { - t.Fatal(err) + t.Fatalf("start failed: %v", err) } // Look for the directory available at /test. @@ -83,7 +154,7 @@ func TestMountPaths(t *testing.T) { // Stop everything. if err := crictl.StopPodAndContainer(podID, contID); err != nil { - t.Fatal(err) + t.Fatalf("stop failed: %v", err) } } @@ -95,14 +166,16 @@ func TestMountOverSymlinks(t *testing.T) { t.Fatalf("failed to setup crictl: %v", err) } defer cleanup() - podID, contID, err := crictl.StartPodAndContainer("k8s.gcr.io/busybox", testdata.Sandbox, testdata.MountOverSymlink) + + spec := SimpleSpec("busybox", "basic/resolv", []string{"sleep", "1000"}, nil) + podID, contID, err := crictl.StartPodAndContainer("basic/resolv", Sandbox, spec) if err != nil { - t.Fatal(err) + t.Fatalf("start failed: %v", err) } out, err := crictl.Exec(contID, "readlink", "/etc/resolv.conf") if err != nil { - t.Fatal(err) + t.Fatalf("readlink failed: %v, out: %s", err, out) } if want := "/tmp/resolv.conf"; !strings.Contains(string(out), want) { t.Fatalf("/etc/resolv.conf is not pointing to %q: %q", want, string(out)) @@ -110,11 +183,11 @@ func TestMountOverSymlinks(t *testing.T) { etc, err := crictl.Exec(contID, "cat", "/etc/resolv.conf") if err != nil { - t.Fatal(err) + t.Fatalf("cat failed: %v, out: %s", err, etc) } tmp, err := crictl.Exec(contID, "cat", "/tmp/resolv.conf") if err != nil { - t.Fatal(err) + t.Fatalf("cat failed: %v, out: %s", err, out) } if tmp != etc { t.Fatalf("file content doesn't match:\n\t/etc/resolv.conf: %s\n\t/tmp/resolv.conf: %s", string(etc), string(tmp)) @@ -122,7 +195,7 @@ func TestMountOverSymlinks(t *testing.T) { // Stop everything. if err := crictl.StopPodAndContainer(podID, contID); err != nil { - t.Fatal(err) + t.Fatalf("stop failed: %v", err) } } @@ -135,16 +208,16 @@ func TestHomeDir(t *testing.T) { t.Fatalf("failed to setup crictl: %v", err) } defer cleanup() - contSpec := testdata.SimpleSpec("root", "k8s.gcr.io/busybox", []string{"sleep", "1000"}) - podID, contID, err := crictl.StartPodAndContainer("k8s.gcr.io/busybox", testdata.Sandbox, contSpec) + contSpec := SimpleSpec("root", "basic/busybox", []string{"sleep", "1000"}, nil) + podID, contID, err := crictl.StartPodAndContainer("basic/busybox", Sandbox, contSpec) if err != nil { - t.Fatal(err) + t.Fatalf("start failed: %v", err) } t.Run("root container", func(t *testing.T) { out, err := crictl.Exec(contID, "sh", "-c", "echo $HOME") if err != nil { - t.Fatal(err) + t.Fatalf("exec failed: %v, out: %s", err, out) } if got, want := strings.TrimSpace(string(out)), "/root"; got != want { t.Fatalf("Home directory invalid. Got %q, Want : %q", got, want) @@ -153,32 +226,47 @@ func TestHomeDir(t *testing.T) { t.Run("sub-container", func(t *testing.T) { // Create a sub container in the same pod. - subContSpec := testdata.SimpleSpec("subcontainer", "k8s.gcr.io/busybox", []string{"sleep", "1000"}) - subContID, err := crictl.StartContainer(podID, "k8s.gcr.io/busybox", testdata.Sandbox, subContSpec) + subContSpec := SimpleSpec("subcontainer", "basic/busybox", []string{"sleep", "1000"}, nil) + subContID, err := crictl.StartContainer(podID, "basic/busybox", Sandbox, subContSpec) if err != nil { - t.Fatal(err) + t.Fatalf("start failed: %v", err) } out, err := crictl.Exec(subContID, "sh", "-c", "echo $HOME") if err != nil { - t.Fatal(err) + t.Fatalf("exec failed: %v, out: %s", err, out) } if got, want := strings.TrimSpace(string(out)), "/root"; got != want { t.Fatalf("Home directory invalid. Got %q, Want: %q", got, want) } if err := crictl.StopContainer(subContID); err != nil { - t.Fatal(err) + t.Fatalf("stop failed: %v", err) } }) // Stop everything. if err := crictl.StopPodAndContainer(podID, contID); err != nil { - t.Fatal(err) + t.Fatalf("stop failed: %v", err) } } +// containerdConfigTemplate is a .toml config for containerd. It contains a +// formatting verb so the runtime field can be set via fmt.Sprintf. +const containerdConfigTemplate = ` +disabled_plugins = ["restart"] +[plugins.linux] + runtime = "%s" + runtime_root = "/tmp/test-containerd/runsc" + shim = "/usr/local/bin/gvisor-containerd-shim" + shim_debug = true + +[plugins.cri.containerd.runtimes.runsc] + runtime_type = "io.containerd.runtime.v1.linux" + runtime_engine = "%s" +` + // setup sets up before a test. Specifically it: // * Creates directories and a socket for containerd to utilize. // * Runs containerd and waits for it to reach a "ready" state for testing. @@ -213,50 +301,52 @@ func setup(t *testing.T) (*criutil.Crictl, func(), error) { if err != nil { t.Fatalf("error discovering runtime path: %v", err) } - config, err := testutil.WriteTmpFile("containerd-config", testdata.ContainerdConfig(runtime)) + config, configCleanup, err := testutil.WriteTmpFile("containerd-config", fmt.Sprintf(containerdConfigTemplate, runtime, runtime)) if err != nil { t.Fatalf("failed to write containerd config") } - cleanups = append(cleanups, func() { os.RemoveAll(config) }) + cleanups = append(cleanups, configCleanup) // Start containerd. - containerd := exec.Command(getContainerd(), + cmd := exec.Command(getContainerd(), "--config", config, "--log-level", "debug", "--root", containerdRoot, "--state", containerdState, "--address", sockAddr) + startupR, startupW := io.Pipe() + defer startupR.Close() + defer startupW.Close() + stderr := &bytes.Buffer{} + stdout := &bytes.Buffer{} + cmd.Stderr = io.MultiWriter(startupW, stderr) + cmd.Stdout = io.MultiWriter(startupW, stdout) cleanups = append(cleanups, func() { - if err := testutil.KillCommand(containerd); err != nil { - log.Printf("error killing containerd: %v", err) - } + t.Logf("containerd stdout: %s", stdout.String()) + t.Logf("containerd stderr: %s", stderr.String()) }) - containerdStderr, err := containerd.StderrPipe() - if err != nil { - t.Fatalf("failed to get containerd stderr: %v", err) - } - containerdStdout, err := containerd.StdoutPipe() - if err != nil { - t.Fatalf("failed to get containerd stdout: %v", err) - } - if err := containerd.Start(); err != nil { + + // Start the process. + if err := cmd.Start(); err != nil { t.Fatalf("failed running containerd: %v", err) } - // Wait for containerd to boot. Then put all containerd output into a - // buffer to be logged at the end of the test. - testutil.WaitUntilRead(containerdStderr, "Start streaming server", nil, 10*time.Second) - stdoutBuf := &bytes.Buffer{} - stderrBuf := &bytes.Buffer{} - go func() { io.Copy(stdoutBuf, containerdStdout) }() - go func() { io.Copy(stderrBuf, containerdStderr) }() + // Wait for containerd to boot. + if err := testutil.WaitUntilRead(startupR, "Start streaming server", nil, 10*time.Second); err != nil { + t.Fatalf("failed to start containerd: %v", err) + } + + // Kill must be the last cleanup (as it will be executed first). + cc := criutil.NewCrictl(t, sockAddr) cleanups = append(cleanups, func() { - t.Logf("containerd stdout: %s", string(stdoutBuf.Bytes())) - t.Logf("containerd stderr: %s", string(stderrBuf.Bytes())) + cc.CleanUp() // Remove tmp files, etc. + if err := testutil.KillCommand(cmd); err != nil { + log.Printf("error killing containerd: %v", err) + } }) cleanup.Release() - return criutil.NewCrictl(20*time.Second, sockAddr), cleanupFunc, nil + return cc, cleanupFunc, nil } // httpGet GETs the contents of a file served from a pod on port 80. diff --git a/test/root/main_test.go b/test/root/main_test.go index d74dec85f..9fb17e0dd 100644 --- a/test/root/main_test.go +++ b/test/root/main_test.go @@ -21,7 +21,7 @@ import ( "testing" "github.com/syndtr/gocapability/capability" - "gvisor.dev/gvisor/runsc/dockerutil" + "gvisor.dev/gvisor/pkg/test/dockerutil" "gvisor.dev/gvisor/runsc/specutils" ) diff --git a/test/root/oom_score_adj_test.go b/test/root/oom_score_adj_test.go index 22488b05d..9a3cecd97 100644 --- a/test/root/oom_score_adj_test.go +++ b/test/root/oom_score_adj_test.go @@ -20,10 +20,9 @@ import ( "testing" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/runsc/container" "gvisor.dev/gvisor/runsc/specutils" - "gvisor.dev/gvisor/runsc/testutil" ) var ( @@ -40,15 +39,6 @@ var ( // TestOOMScoreAdjSingle tests that oom_score_adj is set properly in a // single container sandbox. func TestOOMScoreAdjSingle(t *testing.T) { - rootDir, err := testutil.SetupRootDir() - if err != nil { - t.Fatalf("error creating root dir: %v", err) - } - defer os.RemoveAll(rootDir) - - conf := testutil.TestConfig(t) - conf.RootDir = rootDir - ppid, err := specutils.GetParentPid(os.Getpid()) if err != nil { t.Fatalf("getting parent pid: %v", err) @@ -89,11 +79,11 @@ func TestOOMScoreAdjSingle(t *testing.T) { for _, testCase := range testCases { t.Run(testCase.Name, func(t *testing.T) { - id := testutil.UniqueContainerID() + id := testutil.RandomContainerID() s := testutil.NewSpecWithArgs("sleep", "1000") s.Process.OOMScoreAdj = testCase.OOMScoreAdj - containers, cleanup, err := startContainers(conf, []*specs.Spec{s}, []string{id}) + containers, cleanup, err := startContainers(t, []*specs.Spec{s}, []string{id}) if err != nil { t.Fatalf("error starting containers: %v", err) } @@ -131,15 +121,6 @@ func TestOOMScoreAdjSingle(t *testing.T) { // TestOOMScoreAdjMulti tests that oom_score_adj is set properly in a // multi-container sandbox. func TestOOMScoreAdjMulti(t *testing.T) { - rootDir, err := testutil.SetupRootDir() - if err != nil { - t.Fatalf("error creating root dir: %v", err) - } - defer os.RemoveAll(rootDir) - - conf := testutil.TestConfig(t) - conf.RootDir = rootDir - ppid, err := specutils.GetParentPid(os.Getpid()) if err != nil { t.Fatalf("getting parent pid: %v", err) @@ -257,7 +238,7 @@ func TestOOMScoreAdjMulti(t *testing.T) { } } - containers, cleanup, err := startContainers(conf, specs, ids) + containers, cleanup, err := startContainers(t, specs, ids) if err != nil { t.Fatalf("error starting containers: %v", err) } @@ -321,7 +302,7 @@ func TestOOMScoreAdjMulti(t *testing.T) { func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) { var specs []*specs.Spec var ids []string - rootID := testutil.UniqueContainerID() + rootID := testutil.RandomContainerID() for i, cmd := range cmds { spec := testutil.NewSpecWithArgs(cmd...) @@ -335,35 +316,48 @@ func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) { specutils.ContainerdContainerTypeAnnotation: specutils.ContainerdContainerTypeContainer, specutils.ContainerdSandboxIDAnnotation: rootID, } - ids = append(ids, testutil.UniqueContainerID()) + ids = append(ids, testutil.RandomContainerID()) } specs = append(specs, spec) } return specs, ids } -func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*container.Container, func(), error) { - if len(conf.RootDir) == 0 { - panic("conf.RootDir not set. Call testutil.SetupRootDir() to set.") - } - - var containers []*container.Container - var bundles []string - cleanup := func() { +func startContainers(t *testing.T, specs []*specs.Spec, ids []string) ([]*container.Container, func(), error) { + var ( + containers []*container.Container + cleanups []func() + ) + cleanups = append(cleanups, func() { for _, c := range containers { c.Destroy() } - for _, b := range bundles { - os.RemoveAll(b) + }) + cleanupAll := func() { + for _, c := range cleanups { + c() } } + localClean := specutils.MakeCleanup(cleanupAll) + defer localClean.Clean() + + // All containers must share the same root. + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + cleanups = append(cleanups, cleanup) + + // Point this to from the configuration. + conf := testutil.TestConfig(t) + conf.RootDir = rootDir + for i, spec := range specs { - bundleDir, err := testutil.SetupBundleDir(spec) + bundleDir, cleanup, err := testutil.SetupBundleDir(spec) if err != nil { - cleanup() - return nil, nil, fmt.Errorf("error setting up container: %v", err) + return nil, nil, fmt.Errorf("error setting up bundle: %v", err) } - bundles = append(bundles, bundleDir) + cleanups = append(cleanups, cleanup) args := container.Args{ ID: ids[i], @@ -372,15 +366,15 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*c } cont, err := container.New(conf, args) if err != nil { - cleanup() return nil, nil, fmt.Errorf("error creating container: %v", err) } containers = append(containers, cont) if err := cont.Start(conf); err != nil { - cleanup() return nil, nil, fmt.Errorf("error starting container: %v", err) } } - return containers, cleanup, nil + + localClean.Release() + return containers, cleanupAll, nil } diff --git a/test/root/runsc_test.go b/test/root/runsc_test.go index 90373e2db..25204bebb 100644 --- a/test/root/runsc_test.go +++ b/test/root/runsc_test.go @@ -28,8 +28,8 @@ import ( "github.com/cenkalti/backoff" "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/runsc/specutils" - "gvisor.dev/gvisor/runsc/testutil" ) // TestDoKill checks that when "runsc do..." is killed, the sandbox process is diff --git a/test/root/testdata/BUILD b/test/root/testdata/BUILD deleted file mode 100644 index 6859541ad..000000000 --- a/test/root/testdata/BUILD +++ /dev/null @@ -1,18 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "testdata", - srcs = [ - "busybox.go", - "containerd_config.go", - "httpd.go", - "httpd_mount_paths.go", - "sandbox.go", - "simple.go", - ], - visibility = [ - "//:sandbox", - ], -) diff --git a/test/root/testdata/busybox.go b/test/root/testdata/busybox.go deleted file mode 100644 index e4dbd2843..000000000 --- a/test/root/testdata/busybox.go +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package testdata - -// MountOverSymlink is a JSON config for a container that /etc/resolv.conf is a -// symlink to /tmp/resolv.conf. -var MountOverSymlink = ` -{ - "metadata": { - "name": "busybox" - }, - "image": { - "image": "k8s.gcr.io/busybox" - }, - "command": [ - "sleep", - "1000" - ] -} -` diff --git a/test/root/testdata/containerd_config.go b/test/root/testdata/containerd_config.go deleted file mode 100644 index e12f1ec88..000000000 --- a/test/root/testdata/containerd_config.go +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package testdata contains data required for root tests. -package testdata - -import "fmt" - -// containerdConfigTemplate is a .toml config for containerd. It contains a -// formatting verb so the runtime field can be set via fmt.Sprintf. -const containerdConfigTemplate = ` -disabled_plugins = ["restart"] -[plugins.linux] - runtime = "%s" - runtime_root = "/tmp/test-containerd/runsc" - shim = "/usr/local/bin/gvisor-containerd-shim" - shim_debug = true - -[plugins.cri.containerd.runtimes.runsc] - runtime_type = "io.containerd.runtime.v1.linux" - runtime_engine = "%s" -` - -// ContainerdConfig returns a containerd config file with the specified -// runtime. -func ContainerdConfig(runtime string) string { - return fmt.Sprintf(containerdConfigTemplate, runtime, runtime) -} diff --git a/test/root/testdata/httpd.go b/test/root/testdata/httpd.go deleted file mode 100644 index 45d5e33d4..000000000 --- a/test/root/testdata/httpd.go +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package testdata - -// Httpd is a JSON config for an httpd container. -const Httpd = ` -{ - "metadata": { - "name": "httpd" - }, - "image":{ - "image": "httpd" - }, - "mounts": [ - ], - "linux": { - }, - "log_path": "httpd.log" -} -` diff --git a/test/root/testdata/httpd_mount_paths.go b/test/root/testdata/httpd_mount_paths.go deleted file mode 100644 index ac3f4446a..000000000 --- a/test/root/testdata/httpd_mount_paths.go +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package testdata - -// HttpdMountPaths is a JSON config for an httpd container with additional -// mounts. -const HttpdMountPaths = ` -{ - "metadata": { - "name": "httpd" - }, - "image":{ - "image": "httpd" - }, - "mounts": [ - { - "container_path": "/var/run/secrets/kubernetes.io/serviceaccount", - "host_path": "/var/lib/kubelet/pods/82bae206-cdf5-11e8-b245-8cdcd43ac064/volumes/kubernetes.io~secret/default-token-2rpfx", - "readonly": true - }, - { - "container_path": "/etc/hosts", - "host_path": "/var/lib/kubelet/pods/82bae206-cdf5-11e8-b245-8cdcd43ac064/etc-hosts", - "readonly": false - }, - { - "container_path": "/dev/termination-log", - "host_path": "/var/lib/kubelet/pods/82bae206-cdf5-11e8-b245-8cdcd43ac064/containers/httpd/d1709580", - "readonly": false - }, - { - "container_path": "/usr/local/apache2/htdocs/test", - "host_path": "/var/lib/kubelet/pods/82bae206-cdf5-11e8-b245-8cdcd43ac064", - "readonly": true - } - ], - "linux": { - }, - "log_path": "httpd.log" -} -` diff --git a/test/root/testdata/sandbox.go b/test/root/testdata/sandbox.go deleted file mode 100644 index 0db210370..000000000 --- a/test/root/testdata/sandbox.go +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package testdata - -// Sandbox is a default JSON config for a sandbox. -const Sandbox = ` -{ - "metadata": { - "name": "default-sandbox", - "namespace": "default", - "attempt": 1, - "uid": "hdishd83djaidwnduwk28bcsb" - }, - "linux": { - }, - "log_directory": "/tmp" -} -` diff --git a/test/root/testdata/simple.go b/test/root/testdata/simple.go deleted file mode 100644 index 1cca53f0c..000000000 --- a/test/root/testdata/simple.go +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package testdata - -import ( - "encoding/json" - "fmt" -) - -// SimpleSpec returns a JSON config for a simple container that runs the -// specified command in the specified image. -func SimpleSpec(name, image string, cmd []string) string { - cmds, err := json.Marshal(cmd) - if err != nil { - // This shouldn't happen. - panic(err) - } - return fmt.Sprintf(` -{ - "metadata": { - "name": %q - }, - "image": { - "image": %q - }, - "command": %s - } -`, name, image, cmds) -} diff --git a/test/runner/BUILD b/test/runner/BUILD index 9959ef9b0..6833c9986 100644 --- a/test/runner/BUILD +++ b/test/runner/BUILD @@ -12,8 +12,8 @@ go_binary( visibility = ["//:sandbox"], deps = [ "//pkg/log", + "//pkg/test/testutil", "//runsc/specutils", - "//runsc/testutil", "//test/runner/gtest", "//test/uds", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", diff --git a/test/runner/runner.go b/test/runner/runner.go index 0d3742f71..14c9cbc47 100644 --- a/test/runner/runner.go +++ b/test/runner/runner.go @@ -32,8 +32,8 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/runsc/specutils" - "gvisor.dev/gvisor/runsc/testutil" "gvisor.dev/gvisor/test/runner/gtest" "gvisor.dev/gvisor/test/uds" ) @@ -115,20 +115,20 @@ func runTestCaseNative(testBin string, tc gtest.TestCase, t *testing.T) { // // Returns an error if the sandboxed application exits non-zero. func runRunsc(tc gtest.TestCase, spec *specs.Spec) error { - bundleDir, err := testutil.SetupBundleDir(spec) + bundleDir, cleanup, err := testutil.SetupBundleDir(spec) if err != nil { return fmt.Errorf("SetupBundleDir failed: %v", err) } - defer os.RemoveAll(bundleDir) + defer cleanup() - rootDir, err := testutil.SetupRootDir() + rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { return fmt.Errorf("SetupRootDir failed: %v", err) } - defer os.RemoveAll(rootDir) + defer cleanup() name := tc.FullName() - id := testutil.UniqueContainerID() + id := testutil.RandomContainerID() log.Infof("Running test %q in container %q", name, id) specutils.LogSpec(spec) diff --git a/test/runtimes/BUILD b/test/runtimes/BUILD index 2c472bf8d..4cd627222 100644 --- a/test/runtimes/BUILD +++ b/test/runtimes/BUILD @@ -1,20 +1,7 @@ -# These packages are used to run language runtime tests inside gVisor sandboxes. - -load("//tools:defs.bzl", "go_binary", "go_test") -load("//test/runtimes:build_defs.bzl", "runtime_test") +load("//test/runtimes:defs.bzl", "runtime_test") package(licenses = ["notice"]) -go_binary( - name = "runner", - testonly = 1, - srcs = ["runner.go"], - deps = [ - "//runsc/dockerutil", - "//runsc/testutil", - ], -) - runtime_test( name = "go1.12", blacklist_file = "blacklist_go1.12.csv", @@ -44,10 +31,3 @@ runtime_test( blacklist_file = "blacklist_python3.7.3.csv", lang = "python", ) - -go_test( - name = "blacklist_test", - size = "small", - srcs = ["blacklist_test.go"], - library = ":runner", -) diff --git a/test/runtimes/README.md b/test/runtimes/README.md deleted file mode 100644 index 42d722553..000000000 --- a/test/runtimes/README.md +++ /dev/null @@ -1,56 +0,0 @@ -# Runtimes Tests Dockerfiles - -The Dockerfiles defined under this path are configured to host the execution of -the runtimes language tests. Each Dockerfile can support the language indicated -by its directory. - -The following runtimes are currently supported: - -- Go 1.12 -- Java 11 -- Node.js 12 -- PHP 7.3 -- Python 3.7 - -### Building and pushing the images: - -The canonical source of images is the -[gvisor-presubmit container registry](https://gcr.io/gvisor-presubmit/). You can -build new images with the following command: - -```bash -$ cd images -$ docker build -f Dockerfile_$LANG [-t $NAME] . -``` - -To push them to our container registry, set the tag in the command above to -`gcr.io/gvisor-presubmit/$LANG`, then push them. (Note that you will need -appropriate permissions to the `gvisor-presubmit` GCP project.) - -```bash -gcloud docker -- push gcr.io/gvisor-presubmit/$LANG -``` - -#### Running in Docker locally: - -1) [Install and configure Docker](https://docs.docker.com/install/) - -2) Pull the image you want to run: - -```bash -$ docker pull gcr.io/gvisor-presubmit/$LANG -``` - -3) Run docker with the image. - -```bash -$ docker run [--runtime=runsc] --rm -it $NAME [FLAG] -``` - -Running the command with no flags will cause all the available tests to execute. - -Flags can be added for additional functionality: - -- --list: Print a list of all available tests -- --test <name>: Run a single test from the list of available tests -- --v: Print the language version diff --git a/test/runtimes/blacklist_test.go b/test/runtimes/blacklist_test.go deleted file mode 100644 index 0ff69ab18..000000000 --- a/test/runtimes/blacklist_test.go +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "flag" - "os" - "testing" -) - -func TestMain(m *testing.M) { - flag.Parse() - os.Exit(m.Run()) -} - -// Test that the blacklist parses without error. -func TestBlacklists(t *testing.T) { - bl, err := getBlacklist() - if err != nil { - t.Fatalf("error parsing blacklist: %v", err) - } - if *blacklistFile != "" && len(bl) == 0 { - t.Errorf("got empty blacklist for file %q", *blacklistFile) - } -} diff --git a/test/runtimes/build_defs.bzl b/test/runtimes/build_defs.bzl deleted file mode 100644 index 92e275a76..000000000 --- a/test/runtimes/build_defs.bzl +++ /dev/null @@ -1,75 +0,0 @@ -"""Defines a rule for runtime test targets.""" - -load("//tools:defs.bzl", "go_test", "loopback") - -def runtime_test( - name, - lang, - image_repo = "gcr.io/gvisor-presubmit", - image_name = None, - blacklist_file = None, - shard_count = 50, - size = "enormous"): - """Generates sh_test and blacklist test targets for a given runtime. - - Args: - name: The name of the runtime being tested. Typically, the lang + version. - This is used in the names of the generated test targets. - lang: The language being tested. - image_repo: The docker repository containing the proctor image to run. - i.e., the prefix to the fully qualified docker image id. - image_name: The name of the image in the image_repo. - Defaults to the test name. - blacklist_file: A test blacklist to pass to the runtime test's runner. - shard_count: See Bazel common test attributes. - size: See Bazel common test attributes. - """ - if image_name == None: - image_name = name - args = [ - "--lang", - lang, - "--image", - "/".join([image_repo, image_name]), - ] - data = [ - ":runner", - loopback, - ] - if blacklist_file: - args += ["--blacklist_file", "test/runtimes/" + blacklist_file] - data += [blacklist_file] - - # Add a test that the blacklist parses correctly. - blacklist_test(name, blacklist_file) - - sh_test( - name = name + "_test", - srcs = ["runner.sh"], - args = args, - data = data, - size = size, - shard_count = shard_count, - tags = [ - # Requires docker and runsc to be configured before the test runs. - "local", - # Don't include test target in wildcard target patterns. - "manual", - ], - ) - -def blacklist_test(name, blacklist_file): - """Test that a blacklist parses correctly.""" - go_test( - name = name + "_blacklist_test", - library = ":runner", - srcs = ["blacklist_test.go"], - args = ["--blacklist_file", "test/runtimes/" + blacklist_file], - data = [blacklist_file], - ) - -def sh_test(**kwargs): - """Wraps the standard sh_test.""" - native.sh_test( - **kwargs - ) diff --git a/test/runtimes/defs.bzl b/test/runtimes/defs.bzl new file mode 100644 index 000000000..f836dd952 --- /dev/null +++ b/test/runtimes/defs.bzl @@ -0,0 +1,79 @@ +"""Defines a rule for runtime test targets.""" + +load("//tools:defs.bzl", "go_test") + +def _runtime_test_impl(ctx): + # Construct arguments. + args = [ + "--lang", + ctx.attr.lang, + "--image", + ctx.attr.image, + ] + if ctx.attr.blacklist_file: + args += [ + "--blacklist_file", + ctx.files.blacklist_file[0].short_path, + ] + + # Build a runner. + runner = ctx.actions.declare_file("%s-executer" % ctx.label.name) + runner_content = "\n".join([ + "#!/bin/bash", + "%s %s\n" % (ctx.files._runner[0].short_path, " ".join(args)), + ]) + ctx.actions.write(runner, runner_content, is_executable = True) + + # Return the runner. + return [DefaultInfo( + executable = runner, + runfiles = ctx.runfiles( + files = ctx.files._runner + ctx.files.blacklist_file + ctx.files._proctor, + collect_default = True, + collect_data = True, + ), + )] + +_runtime_test = rule( + implementation = _runtime_test_impl, + attrs = { + "image": attr.string( + mandatory = False, + ), + "lang": attr.string( + mandatory = True, + ), + "blacklist_file": attr.label( + mandatory = False, + allow_single_file = True, + ), + "_runner": attr.label( + default = "//test/runtimes/runner:runner", + ), + "_proctor": attr.label( + default = "//test/runtimes/proctor:proctor", + ), + }, + test = True, +) + +def runtime_test(name, **kwargs): + _runtime_test( + name = name, + image = name, # Resolved as images/runtimes/%s. + tags = [ + "local", + "manual", + ], + **kwargs + ) + +def blacklist_test(name, blacklist_file): + """Test that a blacklist parses correctly.""" + go_test( + name = name + "_blacklist_test", + library = ":runner", + srcs = ["blacklist_test.go"], + args = ["--blacklist_file", "test/runtimes/" + blacklist_file], + data = [blacklist_file], + ) diff --git a/test/runtimes/images/proctor/BUILD b/test/runtimes/images/proctor/BUILD deleted file mode 100644 index 85e004c45..000000000 --- a/test/runtimes/images/proctor/BUILD +++ /dev/null @@ -1,26 +0,0 @@ -load("//tools:defs.bzl", "go_binary", "go_test") - -package(licenses = ["notice"]) - -go_binary( - name = "proctor", - srcs = [ - "go.go", - "java.go", - "nodejs.go", - "php.go", - "proctor.go", - "python.go", - ], - visibility = ["//test/runtimes/images:__subpackages__"], -) - -go_test( - name = "proctor_test", - size = "small", - srcs = ["proctor_test.go"], - library = ":proctor", - deps = [ - "//runsc/testutil", - ], -) diff --git a/test/runtimes/images/proctor/go.go b/test/runtimes/images/proctor/go.go deleted file mode 100644 index 3e2d5d8db..000000000 --- a/test/runtimes/images/proctor/go.go +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "fmt" - "os" - "os/exec" - "regexp" - "strings" -) - -var ( - goTestRegEx = regexp.MustCompile(`^.+\.go$`) - - // Directories with .dir contain helper files for tests. - // Exclude benchmarks and stress tests. - goDirFilter = regexp.MustCompile(`^(bench|stress)\/.+$|^.+\.dir.+$`) -) - -// Location of Go tests on disk. -const goTestDir = "/usr/local/go/test" - -// goRunner implements TestRunner for Go. -// -// There are two types of Go tests: "Go tool tests" and "Go tests on disk". -// "Go tool tests" are found and executed using `go tool dist test`. "Go tests -// on disk" are found in the /usr/local/go/test directory and are executed -// using `go run run.go`. -type goRunner struct{} - -var _ TestRunner = goRunner{} - -// ListTests implements TestRunner.ListTests. -func (goRunner) ListTests() ([]string, error) { - // Go tool dist test tests. - args := []string{"tool", "dist", "test", "-list"} - cmd := exec.Command("go", args...) - cmd.Stderr = os.Stderr - out, err := cmd.Output() - if err != nil { - return nil, fmt.Errorf("failed to list: %v", err) - } - var toolSlice []string - for _, test := range strings.Split(string(out), "\n") { - toolSlice = append(toolSlice, test) - } - - // Go tests on disk. - diskSlice, err := search(goTestDir, goTestRegEx) - if err != nil { - return nil, err - } - // Remove items from /bench/, /stress/ and .dir files - diskFiltered := diskSlice[:0] - for _, file := range diskSlice { - if !goDirFilter.MatchString(file) { - diskFiltered = append(diskFiltered, file) - } - } - - return append(toolSlice, diskFiltered...), nil -} - -// TestCmd implements TestRunner.TestCmd. -func (goRunner) TestCmd(test string) *exec.Cmd { - // Check if test exists on disk by searching for file of the same name. - // This will determine whether or not it is a Go test on disk. - if strings.HasSuffix(test, ".go") { - // Test has suffix ".go" which indicates a disk test, run it as such. - cmd := exec.Command("go", "run", "run.go", "-v", "--", test) - cmd.Dir = goTestDir - return cmd - } - - // No ".go" suffix, run as a tool test. - return exec.Command("go", "tool", "dist", "test", "-run", test) -} diff --git a/test/runtimes/images/proctor/java.go b/test/runtimes/images/proctor/java.go deleted file mode 100644 index 8b362029d..000000000 --- a/test/runtimes/images/proctor/java.go +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "fmt" - "os" - "os/exec" - "regexp" - "strings" -) - -// Directories to exclude from tests. -var javaExclDirs = regexp.MustCompile(`(^(sun\/security)|(java\/util\/stream)|(java\/time)| )`) - -// Location of java tests. -const javaTestDir = "/root/test/jdk" - -// javaRunner implements TestRunner for Java. -type javaRunner struct{} - -var _ TestRunner = javaRunner{} - -// ListTests implements TestRunner.ListTests. -func (javaRunner) ListTests() ([]string, error) { - args := []string{ - "-dir:" + javaTestDir, - "-ignore:quiet", - "-a", - "-listtests", - ":jdk_core", - ":jdk_svc", - ":jdk_sound", - ":jdk_imageio", - } - cmd := exec.Command("jtreg", args...) - cmd.Stderr = os.Stderr - out, err := cmd.Output() - if err != nil { - return nil, fmt.Errorf("jtreg -listtests : %v", err) - } - var testSlice []string - for _, test := range strings.Split(string(out), "\n") { - if !javaExclDirs.MatchString(test) { - testSlice = append(testSlice, test) - } - } - return testSlice, nil -} - -// TestCmd implements TestRunner.TestCmd. -func (javaRunner) TestCmd(test string) *exec.Cmd { - args := []string{ - "-noreport", - "-dir:" + javaTestDir, - test, - } - return exec.Command("jtreg", args...) -} diff --git a/test/runtimes/images/proctor/nodejs.go b/test/runtimes/images/proctor/nodejs.go deleted file mode 100644 index bd57db444..000000000 --- a/test/runtimes/images/proctor/nodejs.go +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "os/exec" - "path/filepath" - "regexp" -) - -var nodejsTestRegEx = regexp.MustCompile(`^test-[^-].+\.js$`) - -// Location of nodejs tests relative to working dir. -const nodejsTestDir = "test" - -// nodejsRunner implements TestRunner for NodeJS. -type nodejsRunner struct{} - -var _ TestRunner = nodejsRunner{} - -// ListTests implements TestRunner.ListTests. -func (nodejsRunner) ListTests() ([]string, error) { - testSlice, err := search(nodejsTestDir, nodejsTestRegEx) - if err != nil { - return nil, err - } - return testSlice, nil -} - -// TestCmd implements TestRunner.TestCmd. -func (nodejsRunner) TestCmd(test string) *exec.Cmd { - args := []string{filepath.Join("tools", "test.py"), test} - return exec.Command("/usr/bin/python", args...) -} diff --git a/test/runtimes/images/proctor/php.go b/test/runtimes/images/proctor/php.go deleted file mode 100644 index 9115040e1..000000000 --- a/test/runtimes/images/proctor/php.go +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "os/exec" - "regexp" -) - -var phpTestRegEx = regexp.MustCompile(`^.+\.phpt$`) - -// phpRunner implements TestRunner for PHP. -type phpRunner struct{} - -var _ TestRunner = phpRunner{} - -// ListTests implements TestRunner.ListTests. -func (phpRunner) ListTests() ([]string, error) { - testSlice, err := search(".", phpTestRegEx) - if err != nil { - return nil, err - } - return testSlice, nil -} - -// TestCmd implements TestRunner.TestCmd. -func (phpRunner) TestCmd(test string) *exec.Cmd { - args := []string{"test", "TESTS=" + test} - return exec.Command("make", args...) -} diff --git a/test/runtimes/images/proctor/proctor.go b/test/runtimes/images/proctor/proctor.go deleted file mode 100644 index b54abe434..000000000 --- a/test/runtimes/images/proctor/proctor.go +++ /dev/null @@ -1,163 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Binary proctor runs the test for a particular runtime. It is meant to be -// included in Docker images for all runtime tests. -package main - -import ( - "flag" - "fmt" - "log" - "os" - "os/exec" - "os/signal" - "path/filepath" - "regexp" - "syscall" -) - -// TestRunner is an interface that must be implemented for each runtime -// integrated with proctor. -type TestRunner interface { - // ListTests returns a string slice of tests available to run. - ListTests() ([]string, error) - - // TestCmd returns an *exec.Cmd that will run the given test. - TestCmd(test string) *exec.Cmd -} - -var ( - runtime = flag.String("runtime", "", "name of runtime") - list = flag.Bool("list", false, "list all available tests") - testName = flag.String("test", "", "run a single test from the list of available tests") - pause = flag.Bool("pause", false, "cause container to pause indefinitely, reaping any zombie children") -) - -func main() { - flag.Parse() - - if *pause { - pauseAndReap() - panic("pauseAndReap should never return") - } - - if *runtime == "" { - log.Fatalf("runtime flag must be provided") - } - - tr, err := testRunnerForRuntime(*runtime) - if err != nil { - log.Fatalf("%v", err) - } - - // List tests. - if *list { - tests, err := tr.ListTests() - if err != nil { - log.Fatalf("failed to list tests: %v", err) - } - for _, test := range tests { - fmt.Println(test) - } - return - } - - var tests []string - if *testName == "" { - // Run every test. - tests, err = tr.ListTests() - if err != nil { - log.Fatalf("failed to get all tests: %v", err) - } - } else { - // Run a single test. - tests = []string{*testName} - } - for _, test := range tests { - cmd := tr.TestCmd(test) - cmd.Stdout, cmd.Stderr = os.Stdout, os.Stderr - if err := cmd.Run(); err != nil { - log.Fatalf("FAIL: %v", err) - } - } -} - -// testRunnerForRuntime returns a new TestRunner for the given runtime. -func testRunnerForRuntime(runtime string) (TestRunner, error) { - switch runtime { - case "go": - return goRunner{}, nil - case "java": - return javaRunner{}, nil - case "nodejs": - return nodejsRunner{}, nil - case "php": - return phpRunner{}, nil - case "python": - return pythonRunner{}, nil - } - return nil, fmt.Errorf("invalid runtime %q", runtime) -} - -// pauseAndReap is like init. It runs forever and reaps any children. -func pauseAndReap() { - // Get notified of any new children. - ch := make(chan os.Signal, 1) - signal.Notify(ch, syscall.SIGCHLD) - - for { - if _, ok := <-ch; !ok { - // Channel closed. This should not happen. - panic("signal channel closed") - } - - // Reap the child. - for { - if cpid, _ := syscall.Wait4(-1, nil, syscall.WNOHANG, nil); cpid < 1 { - break - } - } - } -} - -// search is a helper function to find tests in the given directory that match -// the regex. -func search(root string, testFilter *regexp.Regexp) ([]string, error) { - var testSlice []string - - err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - - name := filepath.Base(path) - - if info.IsDir() || !testFilter.MatchString(name) { - return nil - } - - relPath, err := filepath.Rel(root, path) - if err != nil { - return err - } - testSlice = append(testSlice, relPath) - return nil - }) - if err != nil { - return nil, fmt.Errorf("walking %q: %v", root, err) - } - - return testSlice, nil -} diff --git a/test/runtimes/images/proctor/proctor_test.go b/test/runtimes/images/proctor/proctor_test.go deleted file mode 100644 index 6bb61d142..000000000 --- a/test/runtimes/images/proctor/proctor_test.go +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "io/ioutil" - "os" - "path/filepath" - "reflect" - "regexp" - "strings" - "testing" - - "gvisor.dev/gvisor/runsc/testutil" -) - -func touch(t *testing.T, name string) { - t.Helper() - f, err := os.Create(name) - if err != nil { - t.Fatal(err) - } - if err := f.Close(); err != nil { - t.Fatal(err) - } -} - -func TestSearchEmptyDir(t *testing.T) { - td, err := ioutil.TempDir(testutil.TmpDir(), "searchtest") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(td) - - var want []string - - testFilter := regexp.MustCompile(`^test-[^-].+\.tc$`) - got, err := search(td, testFilter) - if err != nil { - t.Errorf("search error: %v", err) - } - - if !reflect.DeepEqual(got, want) { - t.Errorf("Found %#v; want %#v", got, want) - } -} - -func TestSearch(t *testing.T) { - td, err := ioutil.TempDir(testutil.TmpDir(), "searchtest") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(td) - - // Creating various files similar to the test filter regex. - files := []string{ - "emp/", - "tee/", - "test-foo.tc", - "test-foo.tc", - "test-bar.tc", - "test-sam.tc", - "Test-que.tc", - "test-brett", - "test--abc.tc", - "test---xyz.tc", - "test-bool.TC", - "--test-gvs.tc", - " test-pew.tc", - "dir/test_baz.tc", - "dir/testsnap.tc", - "dir/test-luk.tc", - "dir/nest/test-ok.tc", - "dir/dip/diz/goog/test-pack.tc", - "dir/dip/diz/wobble/thud/test-cas.e", - "dir/dip/diz/wobble/thud/test-cas.tc", - } - want := []string{ - "dir/dip/diz/goog/test-pack.tc", - "dir/dip/diz/wobble/thud/test-cas.tc", - "dir/nest/test-ok.tc", - "dir/test-luk.tc", - "test-bar.tc", - "test-foo.tc", - "test-sam.tc", - } - - for _, item := range files { - if strings.HasSuffix(item, "/") { - // This item is a directory, create it. - if err := os.MkdirAll(filepath.Join(td, item), 0755); err != nil { - t.Fatal(err) - } - } else { - // This item is a file, create the directory and touch file. - // Create directory in which file should be created - fullDirPath := filepath.Join(td, filepath.Dir(item)) - if err := os.MkdirAll(fullDirPath, 0755); err != nil { - t.Fatal(err) - } - // Create file with full path to file. - touch(t, filepath.Join(td, item)) - } - } - - testFilter := regexp.MustCompile(`^test-[^-].+\.tc$`) - got, err := search(td, testFilter) - if err != nil { - t.Errorf("search error: %v", err) - } - - if !reflect.DeepEqual(got, want) { - t.Errorf("Found %#v; want %#v", got, want) - } -} diff --git a/test/runtimes/images/proctor/python.go b/test/runtimes/images/proctor/python.go deleted file mode 100644 index b9e0fbe6f..000000000 --- a/test/runtimes/images/proctor/python.go +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "fmt" - "os" - "os/exec" - "strings" -) - -// pythonRunner implements TestRunner for Python. -type pythonRunner struct{} - -var _ TestRunner = pythonRunner{} - -// ListTests implements TestRunner.ListTests. -func (pythonRunner) ListTests() ([]string, error) { - args := []string{"-m", "test", "--list-tests"} - cmd := exec.Command("./python", args...) - cmd.Stderr = os.Stderr - out, err := cmd.Output() - if err != nil { - return nil, fmt.Errorf("failed to list: %v", err) - } - var toolSlice []string - for _, test := range strings.Split(string(out), "\n") { - toolSlice = append(toolSlice, test) - } - return toolSlice, nil -} - -// TestCmd implements TestRunner.TestCmd. -func (pythonRunner) TestCmd(test string) *exec.Cmd { - args := []string{"-m", "test", test} - return exec.Command("./python", args...) -} diff --git a/test/runtimes/proctor/BUILD b/test/runtimes/proctor/BUILD new file mode 100644 index 000000000..50a26d182 --- /dev/null +++ b/test/runtimes/proctor/BUILD @@ -0,0 +1,27 @@ +load("//tools:defs.bzl", "go_binary", "go_test") + +package(licenses = ["notice"]) + +go_binary( + name = "proctor", + srcs = [ + "go.go", + "java.go", + "nodejs.go", + "php.go", + "proctor.go", + "python.go", + ], + pure = True, + visibility = ["//test/runtimes:__pkg__"], +) + +go_test( + name = "proctor_test", + size = "small", + srcs = ["proctor_test.go"], + library = ":proctor", + deps = [ + "//pkg/test/testutil", + ], +) diff --git a/test/runtimes/proctor/go.go b/test/runtimes/proctor/go.go new file mode 100644 index 000000000..3e2d5d8db --- /dev/null +++ b/test/runtimes/proctor/go.go @@ -0,0 +1,90 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "os" + "os/exec" + "regexp" + "strings" +) + +var ( + goTestRegEx = regexp.MustCompile(`^.+\.go$`) + + // Directories with .dir contain helper files for tests. + // Exclude benchmarks and stress tests. + goDirFilter = regexp.MustCompile(`^(bench|stress)\/.+$|^.+\.dir.+$`) +) + +// Location of Go tests on disk. +const goTestDir = "/usr/local/go/test" + +// goRunner implements TestRunner for Go. +// +// There are two types of Go tests: "Go tool tests" and "Go tests on disk". +// "Go tool tests" are found and executed using `go tool dist test`. "Go tests +// on disk" are found in the /usr/local/go/test directory and are executed +// using `go run run.go`. +type goRunner struct{} + +var _ TestRunner = goRunner{} + +// ListTests implements TestRunner.ListTests. +func (goRunner) ListTests() ([]string, error) { + // Go tool dist test tests. + args := []string{"tool", "dist", "test", "-list"} + cmd := exec.Command("go", args...) + cmd.Stderr = os.Stderr + out, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("failed to list: %v", err) + } + var toolSlice []string + for _, test := range strings.Split(string(out), "\n") { + toolSlice = append(toolSlice, test) + } + + // Go tests on disk. + diskSlice, err := search(goTestDir, goTestRegEx) + if err != nil { + return nil, err + } + // Remove items from /bench/, /stress/ and .dir files + diskFiltered := diskSlice[:0] + for _, file := range diskSlice { + if !goDirFilter.MatchString(file) { + diskFiltered = append(diskFiltered, file) + } + } + + return append(toolSlice, diskFiltered...), nil +} + +// TestCmd implements TestRunner.TestCmd. +func (goRunner) TestCmd(test string) *exec.Cmd { + // Check if test exists on disk by searching for file of the same name. + // This will determine whether or not it is a Go test on disk. + if strings.HasSuffix(test, ".go") { + // Test has suffix ".go" which indicates a disk test, run it as such. + cmd := exec.Command("go", "run", "run.go", "-v", "--", test) + cmd.Dir = goTestDir + return cmd + } + + // No ".go" suffix, run as a tool test. + return exec.Command("go", "tool", "dist", "test", "-run", test) +} diff --git a/test/runtimes/proctor/java.go b/test/runtimes/proctor/java.go new file mode 100644 index 000000000..8b362029d --- /dev/null +++ b/test/runtimes/proctor/java.go @@ -0,0 +1,71 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "os" + "os/exec" + "regexp" + "strings" +) + +// Directories to exclude from tests. +var javaExclDirs = regexp.MustCompile(`(^(sun\/security)|(java\/util\/stream)|(java\/time)| )`) + +// Location of java tests. +const javaTestDir = "/root/test/jdk" + +// javaRunner implements TestRunner for Java. +type javaRunner struct{} + +var _ TestRunner = javaRunner{} + +// ListTests implements TestRunner.ListTests. +func (javaRunner) ListTests() ([]string, error) { + args := []string{ + "-dir:" + javaTestDir, + "-ignore:quiet", + "-a", + "-listtests", + ":jdk_core", + ":jdk_svc", + ":jdk_sound", + ":jdk_imageio", + } + cmd := exec.Command("jtreg", args...) + cmd.Stderr = os.Stderr + out, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("jtreg -listtests : %v", err) + } + var testSlice []string + for _, test := range strings.Split(string(out), "\n") { + if !javaExclDirs.MatchString(test) { + testSlice = append(testSlice, test) + } + } + return testSlice, nil +} + +// TestCmd implements TestRunner.TestCmd. +func (javaRunner) TestCmd(test string) *exec.Cmd { + args := []string{ + "-noreport", + "-dir:" + javaTestDir, + test, + } + return exec.Command("jtreg", args...) +} diff --git a/test/runtimes/proctor/nodejs.go b/test/runtimes/proctor/nodejs.go new file mode 100644 index 000000000..bd57db444 --- /dev/null +++ b/test/runtimes/proctor/nodejs.go @@ -0,0 +1,46 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "os/exec" + "path/filepath" + "regexp" +) + +var nodejsTestRegEx = regexp.MustCompile(`^test-[^-].+\.js$`) + +// Location of nodejs tests relative to working dir. +const nodejsTestDir = "test" + +// nodejsRunner implements TestRunner for NodeJS. +type nodejsRunner struct{} + +var _ TestRunner = nodejsRunner{} + +// ListTests implements TestRunner.ListTests. +func (nodejsRunner) ListTests() ([]string, error) { + testSlice, err := search(nodejsTestDir, nodejsTestRegEx) + if err != nil { + return nil, err + } + return testSlice, nil +} + +// TestCmd implements TestRunner.TestCmd. +func (nodejsRunner) TestCmd(test string) *exec.Cmd { + args := []string{filepath.Join("tools", "test.py"), test} + return exec.Command("/usr/bin/python", args...) +} diff --git a/test/runtimes/proctor/php.go b/test/runtimes/proctor/php.go new file mode 100644 index 000000000..9115040e1 --- /dev/null +++ b/test/runtimes/proctor/php.go @@ -0,0 +1,42 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "os/exec" + "regexp" +) + +var phpTestRegEx = regexp.MustCompile(`^.+\.phpt$`) + +// phpRunner implements TestRunner for PHP. +type phpRunner struct{} + +var _ TestRunner = phpRunner{} + +// ListTests implements TestRunner.ListTests. +func (phpRunner) ListTests() ([]string, error) { + testSlice, err := search(".", phpTestRegEx) + if err != nil { + return nil, err + } + return testSlice, nil +} + +// TestCmd implements TestRunner.TestCmd. +func (phpRunner) TestCmd(test string) *exec.Cmd { + args := []string{"test", "TESTS=" + test} + return exec.Command("make", args...) +} diff --git a/test/runtimes/proctor/proctor.go b/test/runtimes/proctor/proctor.go new file mode 100644 index 000000000..b54abe434 --- /dev/null +++ b/test/runtimes/proctor/proctor.go @@ -0,0 +1,163 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Binary proctor runs the test for a particular runtime. It is meant to be +// included in Docker images for all runtime tests. +package main + +import ( + "flag" + "fmt" + "log" + "os" + "os/exec" + "os/signal" + "path/filepath" + "regexp" + "syscall" +) + +// TestRunner is an interface that must be implemented for each runtime +// integrated with proctor. +type TestRunner interface { + // ListTests returns a string slice of tests available to run. + ListTests() ([]string, error) + + // TestCmd returns an *exec.Cmd that will run the given test. + TestCmd(test string) *exec.Cmd +} + +var ( + runtime = flag.String("runtime", "", "name of runtime") + list = flag.Bool("list", false, "list all available tests") + testName = flag.String("test", "", "run a single test from the list of available tests") + pause = flag.Bool("pause", false, "cause container to pause indefinitely, reaping any zombie children") +) + +func main() { + flag.Parse() + + if *pause { + pauseAndReap() + panic("pauseAndReap should never return") + } + + if *runtime == "" { + log.Fatalf("runtime flag must be provided") + } + + tr, err := testRunnerForRuntime(*runtime) + if err != nil { + log.Fatalf("%v", err) + } + + // List tests. + if *list { + tests, err := tr.ListTests() + if err != nil { + log.Fatalf("failed to list tests: %v", err) + } + for _, test := range tests { + fmt.Println(test) + } + return + } + + var tests []string + if *testName == "" { + // Run every test. + tests, err = tr.ListTests() + if err != nil { + log.Fatalf("failed to get all tests: %v", err) + } + } else { + // Run a single test. + tests = []string{*testName} + } + for _, test := range tests { + cmd := tr.TestCmd(test) + cmd.Stdout, cmd.Stderr = os.Stdout, os.Stderr + if err := cmd.Run(); err != nil { + log.Fatalf("FAIL: %v", err) + } + } +} + +// testRunnerForRuntime returns a new TestRunner for the given runtime. +func testRunnerForRuntime(runtime string) (TestRunner, error) { + switch runtime { + case "go": + return goRunner{}, nil + case "java": + return javaRunner{}, nil + case "nodejs": + return nodejsRunner{}, nil + case "php": + return phpRunner{}, nil + case "python": + return pythonRunner{}, nil + } + return nil, fmt.Errorf("invalid runtime %q", runtime) +} + +// pauseAndReap is like init. It runs forever and reaps any children. +func pauseAndReap() { + // Get notified of any new children. + ch := make(chan os.Signal, 1) + signal.Notify(ch, syscall.SIGCHLD) + + for { + if _, ok := <-ch; !ok { + // Channel closed. This should not happen. + panic("signal channel closed") + } + + // Reap the child. + for { + if cpid, _ := syscall.Wait4(-1, nil, syscall.WNOHANG, nil); cpid < 1 { + break + } + } + } +} + +// search is a helper function to find tests in the given directory that match +// the regex. +func search(root string, testFilter *regexp.Regexp) ([]string, error) { + var testSlice []string + + err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + name := filepath.Base(path) + + if info.IsDir() || !testFilter.MatchString(name) { + return nil + } + + relPath, err := filepath.Rel(root, path) + if err != nil { + return err + } + testSlice = append(testSlice, relPath) + return nil + }) + if err != nil { + return nil, fmt.Errorf("walking %q: %v", root, err) + } + + return testSlice, nil +} diff --git a/test/runtimes/proctor/proctor_test.go b/test/runtimes/proctor/proctor_test.go new file mode 100644 index 000000000..6ef2de085 --- /dev/null +++ b/test/runtimes/proctor/proctor_test.go @@ -0,0 +1,127 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "io/ioutil" + "os" + "path/filepath" + "reflect" + "regexp" + "strings" + "testing" + + "gvisor.dev/gvisor/pkg/test/testutil" +) + +func touch(t *testing.T, name string) { + t.Helper() + f, err := os.Create(name) + if err != nil { + t.Fatalf("error creating file %q: %v", name, err) + } + if err := f.Close(); err != nil { + t.Fatalf("error closing file %q: %v", name, err) + } +} + +func TestSearchEmptyDir(t *testing.T) { + td, err := ioutil.TempDir(testutil.TmpDir(), "searchtest") + if err != nil { + t.Fatalf("error creating searchtest: %v", err) + } + defer os.RemoveAll(td) + + var want []string + + testFilter := regexp.MustCompile(`^test-[^-].+\.tc$`) + got, err := search(td, testFilter) + if err != nil { + t.Errorf("search error: %v", err) + } + + if !reflect.DeepEqual(got, want) { + t.Errorf("Found %#v; want %#v", got, want) + } +} + +func TestSearch(t *testing.T) { + td, err := ioutil.TempDir(testutil.TmpDir(), "searchtest") + if err != nil { + t.Fatalf("error creating searchtest: %v", err) + } + defer os.RemoveAll(td) + + // Creating various files similar to the test filter regex. + files := []string{ + "emp/", + "tee/", + "test-foo.tc", + "test-foo.tc", + "test-bar.tc", + "test-sam.tc", + "Test-que.tc", + "test-brett", + "test--abc.tc", + "test---xyz.tc", + "test-bool.TC", + "--test-gvs.tc", + " test-pew.tc", + "dir/test_baz.tc", + "dir/testsnap.tc", + "dir/test-luk.tc", + "dir/nest/test-ok.tc", + "dir/dip/diz/goog/test-pack.tc", + "dir/dip/diz/wobble/thud/test-cas.e", + "dir/dip/diz/wobble/thud/test-cas.tc", + } + want := []string{ + "dir/dip/diz/goog/test-pack.tc", + "dir/dip/diz/wobble/thud/test-cas.tc", + "dir/nest/test-ok.tc", + "dir/test-luk.tc", + "test-bar.tc", + "test-foo.tc", + "test-sam.tc", + } + + for _, item := range files { + if strings.HasSuffix(item, "/") { + // This item is a directory, create it. + if err := os.MkdirAll(filepath.Join(td, item), 0755); err != nil { + t.Fatalf("error making directory: %v", err) + } + } else { + // This item is a file, create the directory and touch file. + // Create directory in which file should be created + fullDirPath := filepath.Join(td, filepath.Dir(item)) + if err := os.MkdirAll(fullDirPath, 0755); err != nil { + t.Fatalf("error making directory: %v", err) + } + // Create file with full path to file. + touch(t, filepath.Join(td, item)) + } + } + + testFilter := regexp.MustCompile(`^test-[^-].+\.tc$`) + got, err := search(td, testFilter) + if err != nil { + t.Errorf("search error: %v", err) + } + + if !reflect.DeepEqual(got, want) { + t.Errorf("Found %#v; want %#v", got, want) + } +} diff --git a/test/runtimes/proctor/python.go b/test/runtimes/proctor/python.go new file mode 100644 index 000000000..b9e0fbe6f --- /dev/null +++ b/test/runtimes/proctor/python.go @@ -0,0 +1,49 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "os" + "os/exec" + "strings" +) + +// pythonRunner implements TestRunner for Python. +type pythonRunner struct{} + +var _ TestRunner = pythonRunner{} + +// ListTests implements TestRunner.ListTests. +func (pythonRunner) ListTests() ([]string, error) { + args := []string{"-m", "test", "--list-tests"} + cmd := exec.Command("./python", args...) + cmd.Stderr = os.Stderr + out, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("failed to list: %v", err) + } + var toolSlice []string + for _, test := range strings.Split(string(out), "\n") { + toolSlice = append(toolSlice, test) + } + return toolSlice, nil +} + +// TestCmd implements TestRunner.TestCmd. +func (pythonRunner) TestCmd(test string) *exec.Cmd { + args := []string{"-m", "test", test} + return exec.Command("./python", args...) +} diff --git a/test/runtimes/runner.go b/test/runtimes/runner.go deleted file mode 100644 index 3c98f4570..000000000 --- a/test/runtimes/runner.go +++ /dev/null @@ -1,196 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Binary runner runs the runtime tests in a Docker container. -package main - -import ( - "encoding/csv" - "flag" - "fmt" - "io" - "os" - "sort" - "strings" - "testing" - "time" - - "gvisor.dev/gvisor/runsc/dockerutil" - "gvisor.dev/gvisor/runsc/testutil" -) - -var ( - lang = flag.String("lang", "", "language runtime to test") - image = flag.String("image", "", "docker image with runtime tests") - blacklistFile = flag.String("blacklist_file", "", "file containing blacklist of tests to exclude, in CSV format with fields: test name, bug id, comment") -) - -// Wait time for each test to run. -const timeout = 5 * time.Minute - -func main() { - flag.Parse() - if *lang == "" || *image == "" { - fmt.Fprintf(os.Stderr, "lang and image flags must not be empty\n") - os.Exit(1) - } - - os.Exit(runTests()) -} - -// runTests is a helper that is called by main. It exists so that we can run -// defered functions before exiting. It returns an exit code that should be -// passed to os.Exit. -func runTests() int { - // Get tests to blacklist. - blacklist, err := getBlacklist() - if err != nil { - fmt.Fprintf(os.Stderr, "Error getting blacklist: %s\n", err.Error()) - return 1 - } - - // Create a single docker container that will be used for all tests. - d := dockerutil.MakeDocker("gvisor-" + *lang) - defer d.CleanUp() - - // Get a slice of tests to run. This will also start a single Docker - // container that will be used to run each test. The final test will - // stop the Docker container. - tests, err := getTests(d, blacklist) - if err != nil { - fmt.Fprintf(os.Stderr, "%s\n", err.Error()) - return 1 - } - - m := testing.MainStart(testDeps{}, tests, nil, nil) - return m.Run() -} - -// getTests returns a slice of tests to run, subject to the shard size and -// index. -func getTests(d dockerutil.Docker, blacklist map[string]struct{}) ([]testing.InternalTest, error) { - // Pull the image. - if err := dockerutil.Pull(*image); err != nil { - return nil, fmt.Errorf("docker pull %q failed: %v", *image, err) - } - - // Run proctor with --pause flag to keep container alive forever. - if err := d.Run(*image, "--pause"); err != nil { - return nil, fmt.Errorf("docker run failed: %v", err) - } - - // Get a list of all tests in the image. - list, err := d.Exec("/proctor", "--runtime", *lang, "--list") - if err != nil { - return nil, fmt.Errorf("docker exec failed: %v", err) - } - - // Calculate a subset of tests to run corresponding to the current - // shard. - tests := strings.Fields(list) - sort.Strings(tests) - indices, err := testutil.TestIndicesForShard(len(tests)) - if err != nil { - return nil, fmt.Errorf("TestsForShard() failed: %v", err) - } - - var itests []testing.InternalTest - for _, tci := range indices { - // Capture tc in this scope. - tc := tests[tci] - itests = append(itests, testing.InternalTest{ - Name: tc, - F: func(t *testing.T) { - // Is the test blacklisted? - if _, ok := blacklist[tc]; ok { - t.Skipf("SKIP: blacklisted test %q", tc) - } - - var ( - now = time.Now() - done = make(chan struct{}) - output string - err error - ) - - go func() { - fmt.Printf("RUNNING %s...\n", tc) - output, err = d.Exec("/proctor", "--runtime", *lang, "--test", tc) - close(done) - }() - - select { - case <-done: - if err == nil { - fmt.Printf("PASS: %s (%v)\n\n", tc, time.Since(now)) - return - } - t.Errorf("FAIL: %s (%v):\n%s\n", tc, time.Since(now), output) - case <-time.After(timeout): - t.Errorf("TIMEOUT: %s (%v):\n%s\n", tc, time.Since(now), output) - } - }, - }) - } - return itests, nil -} - -// getBlacklist reads the blacklist file and returns a set of test names to -// exclude. -func getBlacklist() (map[string]struct{}, error) { - blacklist := make(map[string]struct{}) - if *blacklistFile == "" { - return blacklist, nil - } - file, err := testutil.FindFile(*blacklistFile) - if err != nil { - return nil, err - } - f, err := os.Open(file) - if err != nil { - return nil, err - } - defer f.Close() - - r := csv.NewReader(f) - - // First line is header. Skip it. - if _, err := r.Read(); err != nil { - return nil, err - } - - for { - record, err := r.Read() - if err == io.EOF { - break - } - if err != nil { - return nil, err - } - blacklist[record[0]] = struct{}{} - } - return blacklist, nil -} - -// testDeps implements testing.testDeps (an unexported interface), and is -// required to use testing.MainStart. -type testDeps struct{} - -func (f testDeps) MatchString(a, b string) (bool, error) { return a == b, nil } -func (f testDeps) StartCPUProfile(io.Writer) error { return nil } -func (f testDeps) StopCPUProfile() {} -func (f testDeps) WriteProfileTo(string, io.Writer, int) error { return nil } -func (f testDeps) ImportPath() string { return "" } -func (f testDeps) StartTestLog(io.Writer) {} -func (f testDeps) StopTestLog() error { return nil } diff --git a/test/runtimes/runner.sh b/test/runtimes/runner.sh deleted file mode 100755 index a8d9a3460..000000000 --- a/test/runtimes/runner.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash - -# Copyright 2018 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -euf -x -o pipefail - -echo -- "$@" - -# Create outputs dir if it does not exist. -if [[ -n "${TEST_UNDECLARED_OUTPUTS_DIR}" ]]; then - mkdir -p "${TEST_UNDECLARED_OUTPUTS_DIR}" - chmod a+rwx "${TEST_UNDECLARED_OUTPUTS_DIR}" -fi - -# Update the timestamp on the shard status file. Bazel looks for this. -touch "${TEST_SHARD_STATUS_FILE}" - -# Get location of runner binary. -readonly runner=$(find "${TEST_SRCDIR}" -name runner) - -# Pass the arguments of this script directly to the runner. -exec "${runner}" "$@" - diff --git a/test/runtimes/runner/BUILD b/test/runtimes/runner/BUILD new file mode 100644 index 000000000..63924b9c5 --- /dev/null +++ b/test/runtimes/runner/BUILD @@ -0,0 +1,21 @@ +load("//tools:defs.bzl", "go_binary", "go_test") + +package(licenses = ["notice"]) + +go_binary( + name = "runner", + testonly = 1, + srcs = ["main.go"], + visibility = ["//test/runtimes:__pkg__"], + deps = [ + "//pkg/test/dockerutil", + "//pkg/test/testutil", + ], +) + +go_test( + name = "blacklist_test", + size = "small", + srcs = ["blacklist_test.go"], + library = ":runner", +) diff --git a/test/runtimes/runner/blacklist_test.go b/test/runtimes/runner/blacklist_test.go new file mode 100644 index 000000000..0ff69ab18 --- /dev/null +++ b/test/runtimes/runner/blacklist_test.go @@ -0,0 +1,37 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "flag" + "os" + "testing" +) + +func TestMain(m *testing.M) { + flag.Parse() + os.Exit(m.Run()) +} + +// Test that the blacklist parses without error. +func TestBlacklists(t *testing.T) { + bl, err := getBlacklist() + if err != nil { + t.Fatalf("error parsing blacklist: %v", err) + } + if *blacklistFile != "" && len(bl) == 0 { + t.Errorf("got empty blacklist for file %q", *blacklistFile) + } +} diff --git a/test/runtimes/runner/main.go b/test/runtimes/runner/main.go new file mode 100644 index 000000000..57540e00e --- /dev/null +++ b/test/runtimes/runner/main.go @@ -0,0 +1,189 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Binary runner runs the runtime tests in a Docker container. +package main + +import ( + "encoding/csv" + "flag" + "fmt" + "io" + "os" + "sort" + "strings" + "testing" + "time" + + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/pkg/test/testutil" +) + +var ( + lang = flag.String("lang", "", "language runtime to test") + image = flag.String("image", "", "docker image with runtime tests") + blacklistFile = flag.String("blacklist_file", "", "file containing blacklist of tests to exclude, in CSV format with fields: test name, bug id, comment") +) + +// Wait time for each test to run. +const timeout = 5 * time.Minute + +func main() { + flag.Parse() + if *lang == "" || *image == "" { + fmt.Fprintf(os.Stderr, "lang and image flags must not be empty\n") + os.Exit(1) + } + os.Exit(runTests()) +} + +// runTests is a helper that is called by main. It exists so that we can run +// defered functions before exiting. It returns an exit code that should be +// passed to os.Exit. +func runTests() int { + // Get tests to blacklist. + blacklist, err := getBlacklist() + if err != nil { + fmt.Fprintf(os.Stderr, "Error getting blacklist: %s\n", err.Error()) + return 1 + } + + // Construct the shared docker instance. + d := dockerutil.MakeDocker(testutil.DefaultLogger(*lang)) + defer d.CleanUp() + + // Get a slice of tests to run. This will also start a single Docker + // container that will be used to run each test. The final test will + // stop the Docker container. + tests, err := getTests(d, blacklist) + if err != nil { + fmt.Fprintf(os.Stderr, "%s\n", err.Error()) + return 1 + } + + m := testing.MainStart(testDeps{}, tests, nil, nil) + return m.Run() +} + +// getTests executes all tests as table tests. +func getTests(d *dockerutil.Docker, blacklist map[string]struct{}) ([]testing.InternalTest, error) { + // Start the container. + d.CopyFiles("/proctor", "test/runtimes/proctor/proctor") + if err := d.Spawn(dockerutil.RunOpts{ + Image: fmt.Sprintf("runtimes/%s", *image), + }, "/proctor/proctor", "--pause"); err != nil { + return nil, fmt.Errorf("docker run failed: %v", err) + } + + // Get a list of all tests in the image. + list, err := d.Exec(dockerutil.RunOpts{}, "/proctor/proctor", "--runtime", *lang, "--list") + if err != nil { + return nil, fmt.Errorf("docker exec failed: %v", err) + } + + // Calculate a subset of tests to run corresponding to the current + // shard. + tests := strings.Fields(list) + sort.Strings(tests) + indices, err := testutil.TestIndicesForShard(len(tests)) + if err != nil { + return nil, fmt.Errorf("TestsForShard() failed: %v", err) + } + + var itests []testing.InternalTest + for _, tci := range indices { + // Capture tc in this scope. + tc := tests[tci] + itests = append(itests, testing.InternalTest{ + Name: tc, + F: func(t *testing.T) { + // Is the test blacklisted? + if _, ok := blacklist[tc]; ok { + t.Skipf("SKIP: blacklisted test %q", tc) + } + + var ( + now = time.Now() + done = make(chan struct{}) + output string + err error + ) + + go func() { + fmt.Printf("RUNNING %s...\n", tc) + output, err = d.Exec(dockerutil.RunOpts{}, "/proctor/proctor", "--runtime", *lang, "--test", tc) + close(done) + }() + + select { + case <-done: + if err == nil { + fmt.Printf("PASS: %s (%v)\n\n", tc, time.Since(now)) + return + } + t.Errorf("FAIL: %s (%v):\n%s\n", tc, time.Since(now), output) + case <-time.After(timeout): + t.Errorf("TIMEOUT: %s (%v):\n%s\n", tc, time.Since(now), output) + } + }, + }) + } + + return itests, nil +} + +// getBlacklist reads the blacklist file and returns a set of test names to +// exclude. +func getBlacklist() (map[string]struct{}, error) { + blacklist := make(map[string]struct{}) + if *blacklistFile == "" { + return blacklist, nil + } + f, err := os.Open(*blacklistFile) + if err != nil { + return nil, err + } + defer f.Close() + + r := csv.NewReader(f) + + // First line is header. Skip it. + if _, err := r.Read(); err != nil { + return nil, err + } + + for { + record, err := r.Read() + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + blacklist[record[0]] = struct{}{} + } + return blacklist, nil +} + +// testDeps implements testing.testDeps (an unexported interface), and is +// required to use testing.MainStart. +type testDeps struct{} + +func (f testDeps) MatchString(a, b string) (bool, error) { return a == b, nil } +func (f testDeps) StartCPUProfile(io.Writer) error { return nil } +func (f testDeps) StopCPUProfile() {} +func (f testDeps) WriteProfileTo(string, io.Writer, int) error { return nil } +func (f testDeps) ImportPath() string { return "" } +func (f testDeps) StartTestLog(io.Writer) {} +func (f testDeps) StopTestLog() error { return nil } diff --git a/tools/bazeldefs/defs.bzl b/tools/bazeldefs/defs.bzl index 2207b9b34..3c22aec24 100644 --- a/tools/bazeldefs/defs.bzl +++ b/tools/bazeldefs/defs.bzl @@ -5,18 +5,14 @@ load("@io_bazel_rules_go//go:def.bzl", "GoLibrary", _go_binary = "go_binary", _g load("@io_bazel_rules_go//proto:def.bzl", _go_grpc_library = "go_grpc_library", _go_proto_library = "go_proto_library") load("@rules_cc//cc:defs.bzl", _cc_binary = "cc_binary", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test") load("@rules_pkg//:pkg.bzl", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar") -load("@io_bazel_rules_docker//go:image.bzl", _go_image = "go_image") -load("@io_bazel_rules_docker//container:container.bzl", _container_image = "container_image") load("@pydeps//:requirements.bzl", _py_requirement = "requirement") load("@com_github_grpc_grpc//bazel:cc_grpc_library.bzl", _cc_grpc_library = "cc_grpc_library") -container_image = _container_image cc_library = _cc_library cc_flags_supplier = _cc_flags_supplier cc_proto_library = _cc_proto_library cc_test = _cc_test cc_toolchain = "@bazel_tools//tools/cpp:current_cc_toolchain" -go_image = _go_image go_embed_data = _go_embed_data gtest = "@com_google_googletest//:gtest" grpcpp = "@com_github_grpc_grpc//:grpc++" diff --git a/tools/defs.bzl b/tools/defs.bzl index 33240e7f4..cdaf281f3 100644 --- a/tools/defs.bzl +++ b/tools/defs.bzl @@ -7,7 +7,7 @@ change for Google-internal and bazel-compatible rules. load("//tools/go_stateify:defs.bzl", "go_stateify") load("//tools/go_marshal:defs.bzl", "go_marshal", "marshal_deps", "marshal_test_deps") -load("//tools/bazeldefs:defs.bzl", _cc_binary = "cc_binary", _cc_flags_supplier = "cc_flags_supplier", _cc_grpc_library = "cc_grpc_library", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test", _cc_toolchain = "cc_toolchain", _container_image = "container_image", _default_installer = "default_installer", _default_net_util = "default_net_util", _gbenchmark = "gbenchmark", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_grpc_and_proto_libraries = "go_grpc_and_proto_libraries", _go_image = "go_image", _go_library = "go_library", _go_proto_library = "go_proto_library", _go_test = "go_test", _grpcpp = "grpcpp", _gtest = "gtest", _loopback = "loopback", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar", _proto_library = "proto_library", _py_binary = "py_binary", _py_library = "py_library", _py_requirement = "py_requirement", _py_test = "py_test", _select_arch = "select_arch", _select_system = "select_system") +load("//tools/bazeldefs:defs.bzl", _cc_binary = "cc_binary", _cc_flags_supplier = "cc_flags_supplier", _cc_grpc_library = "cc_grpc_library", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test", _cc_toolchain = "cc_toolchain", _default_installer = "default_installer", _default_net_util = "default_net_util", _gbenchmark = "gbenchmark", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_grpc_and_proto_libraries = "go_grpc_and_proto_libraries", _go_library = "go_library", _go_proto_library = "go_proto_library", _go_test = "go_test", _grpcpp = "grpcpp", _gtest = "gtest", _loopback = "loopback", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar", _proto_library = "proto_library", _py_binary = "py_binary", _py_library = "py_library", _py_requirement = "py_requirement", _py_test = "py_test", _select_arch = "select_arch", _select_system = "select_system") load("//tools/bazeldefs:platforms.bzl", _default_platform = "default_platform", _platforms = "platforms") load("//tools/bazeldefs:tags.bzl", "go_suffixes") load("//tools/nogo:defs.bzl", "nogo_test") @@ -19,12 +19,10 @@ cc_grpc_library = _cc_grpc_library cc_library = _cc_library cc_test = _cc_test cc_toolchain = _cc_toolchain -container_image = _container_image default_installer = _default_installer default_net_util = _default_net_util gbenchmark = _gbenchmark go_embed_data = _go_embed_data -go_image = _go_image go_test = _go_test gtest = _gtest grpcpp = _grpcpp -- cgit v1.2.3 From cc5de905e628c5e9aca7e7a333d6dd9638719b6a Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Thu, 23 Apr 2020 13:11:23 -0700 Subject: Fix test output so that filenames have the correct path. Tested: Intentionally introduce an error and then run: blaze test --test_output=streamed //third_party/gvisor/test/packetimpact/tests:tcp_outside_the_window_linux_test PiperOrigin-RevId: 308114194 --- test/packetimpact/tests/test_runner.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/test/packetimpact/tests/test_runner.sh b/test/packetimpact/tests/test_runner.sh index 46d63d5e5..e938de782 100755 --- a/test/packetimpact/tests/test_runner.sh +++ b/test/packetimpact/tests/test_runner.sh @@ -262,7 +262,10 @@ sleep 3 # Start a packetimpact test on the test bench. The packetimpact test sends and # receives packets and also sends POSIX socket commands to the posix_server to # be executed on the DUT. -docker exec -t "${TESTBENCH}" \ +docker exec \ + -e XML_OUTPUT_FILE="/test.xml" \ + -e TEST_TARGET \ + -t "${TESTBENCH}" \ /bin/bash -c "${DOCKER_TESTBENCH_BINARY} \ ${EXTRA_TEST_ARGS[@]-} \ --posix_server_ip=${CTRL_NET_PREFIX}${DUT_NET_SUFFIX} \ -- cgit v1.2.3 From 79542417fe97a62ee86aa211ac559bcc5cac5e5e Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Thu, 23 Apr 2020 18:20:43 -0700 Subject: Fix Layer merge and add unit tests mergo was improperly merging nil and empty strings PiperOrigin-RevId: 308170862 --- WORKSPACE | 7 -- test/packetimpact/testbench/BUILD | 6 +- test/packetimpact/testbench/layers.go | 44 +++++++----- test/packetimpact/testbench/layers_test.go | 109 +++++++++++++++++++++++++++++ 4 files changed, 141 insertions(+), 25 deletions(-) (limited to 'test') diff --git a/WORKSPACE b/WORKSPACE index 3bf5cc9c1..c86e0fcdc 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -188,13 +188,6 @@ go_repository( version = "v0.0.0-20171129191014-dec09d789f3d", ) -go_repository( - name = "com_github_imdario_mergo", - importpath = "github.com/imdario/mergo", - sum = "h1:CGgOkSJeqMRmt0D9XLWExdT4m4F1vd3FV3VPt+0VxkQ=", - version = "v0.3.8", -) - go_repository( name = "com_github_kr_pretty", importpath = "github.com/kr/pretty", diff --git a/test/packetimpact/testbench/BUILD b/test/packetimpact/testbench/BUILD index b6a254882..3ceceb9d7 100644 --- a/test/packetimpact/testbench/BUILD +++ b/test/packetimpact/testbench/BUILD @@ -23,7 +23,6 @@ go_library( "//test/packetimpact/proto:posix_server_go_proto", "@com_github_google_go-cmp//cmp:go_default_library", "@com_github_google_go-cmp//cmp/cmpopts:go_default_library", - "@com_github_imdario_mergo//:go_default_library", "@com_github_mohae_deepcopy//:go_default_library", "@org_golang_google_grpc//:go_default_library", "@org_golang_google_grpc//keepalive:go_default_library", @@ -37,5 +36,8 @@ go_test( size = "small", srcs = ["layers_test.go"], library = ":testbench", - deps = ["//pkg/tcpip"], + deps = [ + "//pkg/tcpip", + "@com_github_mohae_deepcopy//:go_default_library", + ], ) diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go index 5ce324f0d..01e99567d 100644 --- a/test/packetimpact/testbench/layers.go +++ b/test/packetimpact/testbench/layers.go @@ -22,7 +22,6 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" - "github.com/imdario/mergo" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" @@ -111,13 +110,31 @@ func equalLayer(x, y Layer) bool { return cmp.Equal(x, y, opt, cmpopts.IgnoreTypes(LayerBase{})) } -// mergeLayer merges other in layer. Any non-nil value in other overrides the -// corresponding value in layer. If other is nil, no action is performed. -func mergeLayer(layer, other Layer) error { - if other == nil { +// mergeLayer merges y into x. Any fields for which y has a non-nil value, that +// value overwrite the corresponding fields in x. +func mergeLayer(x, y Layer) error { + if y == nil { return nil } - return mergo.Merge(layer, other, mergo.WithOverride) + if reflect.TypeOf(x) != reflect.TypeOf(y) { + return fmt.Errorf("can't merge %T into %T", y, x) + } + vx := reflect.ValueOf(x).Elem() + vy := reflect.ValueOf(y).Elem() + t := vy.Type() + for i := 0; i < vy.NumField(); i++ { + t := t.Field(i) + if t.Anonymous { + // Ignore the LayerBase in the Layer struct. + continue + } + v := vy.Field(i) + if v.IsNil() { + continue + } + vx.Field(i).Set(v) + } + return nil } func stringLayer(l Layer) string { @@ -243,8 +260,7 @@ func (l *Ether) length() int { return header.EthernetMinimumSize } -// merge overrides the values in l with the values from other but only in fields -// where the value is not nil. +// merge implements Layer.merge. func (l *Ether) merge(other Layer) error { return mergeLayer(l, other) } @@ -399,8 +415,7 @@ func (l *IPv4) length() int { return int(*l.IHL) } -// merge overrides the values in l with the values from other but only in fields -// where the value is not nil. +// merge implements Layer.merge. func (l *IPv4) merge(other Layer) error { return mergeLayer(l, other) } @@ -544,8 +559,7 @@ func (l *TCP) length() int { return int(*l.DataOffset) } -// merge overrides the values in l with the values from other but only in fields -// where the value is not nil. +// merge implements Layer.merge. func (l *TCP) merge(other Layer) error { return mergeLayer(l, other) } @@ -622,8 +636,7 @@ func (l *UDP) length() int { return int(*l.Length) } -// merge overrides the values in l with the values from other but only in fields -// where the value is not nil. +// merge implements Layer.merge. func (l *UDP) merge(other Layer) error { return mergeLayer(l, other) } @@ -659,8 +672,7 @@ func (l *Payload) length() int { return len(l.Bytes) } -// merge overrides the values in l with the values from other but only in fields -// where the value is not nil. +// merge implements Layer.merge. func (l *Payload) merge(other Layer) error { return mergeLayer(l, other) } diff --git a/test/packetimpact/testbench/layers_test.go b/test/packetimpact/testbench/layers_test.go index c99cf6312..f07ec5eb2 100644 --- a/test/packetimpact/testbench/layers_test.go +++ b/test/packetimpact/testbench/layers_test.go @@ -17,6 +17,7 @@ package testbench import ( "testing" + "github.com/mohae/deepcopy" "gvisor.dev/gvisor/pkg/tcpip" ) @@ -52,6 +53,114 @@ func TestLayerMatch(t *testing.T) { } } +func TestLayerMergeMismatch(t *testing.T) { + tcp := &TCP{} + otherTCP := &TCP{} + ipv4 := &IPv4{} + ether := &Ether{} + for _, tt := range []struct { + a, b Layer + success bool + }{ + {tcp, tcp, true}, + {tcp, otherTCP, true}, + {tcp, ipv4, false}, + {tcp, ether, false}, + {tcp, nil, true}, + + {otherTCP, otherTCP, true}, + {otherTCP, ipv4, false}, + {otherTCP, ether, false}, + {otherTCP, nil, true}, + + {ipv4, ipv4, true}, + {ipv4, ether, false}, + {ipv4, nil, true}, + + {ether, ether, true}, + {ether, nil, true}, + } { + if err := tt.a.merge(tt.b); (err == nil) != tt.success { + t.Errorf("%s.merge(%s) got %s, wanted the opposite", tt.a, tt.b, err) + } + if tt.b != nil { + if err := tt.b.merge(tt.a); (err == nil) != tt.success { + t.Errorf("%s.merge(%s) got %s, wanted the opposite", tt.b, tt.a, err) + } + } + } +} + +func TestLayerMerge(t *testing.T) { + zero := Uint32(0) + one := Uint32(1) + two := Uint32(2) + empty := []byte{} + foo := []byte("foo") + bar := []byte("bar") + for _, tt := range []struct { + a, b Layer + want Layer + }{ + {&TCP{AckNum: nil}, &TCP{AckNum: nil}, &TCP{AckNum: nil}}, + {&TCP{AckNum: nil}, &TCP{AckNum: zero}, &TCP{AckNum: zero}}, + {&TCP{AckNum: nil}, &TCP{AckNum: one}, &TCP{AckNum: one}}, + {&TCP{AckNum: nil}, &TCP{AckNum: two}, &TCP{AckNum: two}}, + {&TCP{AckNum: nil}, nil, &TCP{AckNum: nil}}, + + {&TCP{AckNum: zero}, &TCP{AckNum: nil}, &TCP{AckNum: zero}}, + {&TCP{AckNum: zero}, &TCP{AckNum: zero}, &TCP{AckNum: zero}}, + {&TCP{AckNum: zero}, &TCP{AckNum: one}, &TCP{AckNum: one}}, + {&TCP{AckNum: zero}, &TCP{AckNum: two}, &TCP{AckNum: two}}, + {&TCP{AckNum: zero}, nil, &TCP{AckNum: zero}}, + + {&TCP{AckNum: one}, &TCP{AckNum: nil}, &TCP{AckNum: one}}, + {&TCP{AckNum: one}, &TCP{AckNum: zero}, &TCP{AckNum: zero}}, + {&TCP{AckNum: one}, &TCP{AckNum: one}, &TCP{AckNum: one}}, + {&TCP{AckNum: one}, &TCP{AckNum: two}, &TCP{AckNum: two}}, + {&TCP{AckNum: one}, nil, &TCP{AckNum: one}}, + + {&TCP{AckNum: two}, &TCP{AckNum: nil}, &TCP{AckNum: two}}, + {&TCP{AckNum: two}, &TCP{AckNum: zero}, &TCP{AckNum: zero}}, + {&TCP{AckNum: two}, &TCP{AckNum: one}, &TCP{AckNum: one}}, + {&TCP{AckNum: two}, &TCP{AckNum: two}, &TCP{AckNum: two}}, + {&TCP{AckNum: two}, nil, &TCP{AckNum: two}}, + + {&Payload{Bytes: nil}, &Payload{Bytes: nil}, &Payload{Bytes: nil}}, + {&Payload{Bytes: nil}, &Payload{Bytes: empty}, &Payload{Bytes: empty}}, + {&Payload{Bytes: nil}, &Payload{Bytes: foo}, &Payload{Bytes: foo}}, + {&Payload{Bytes: nil}, &Payload{Bytes: bar}, &Payload{Bytes: bar}}, + {&Payload{Bytes: nil}, nil, &Payload{Bytes: nil}}, + + {&Payload{Bytes: empty}, &Payload{Bytes: nil}, &Payload{Bytes: empty}}, + {&Payload{Bytes: empty}, &Payload{Bytes: empty}, &Payload{Bytes: empty}}, + {&Payload{Bytes: empty}, &Payload{Bytes: foo}, &Payload{Bytes: foo}}, + {&Payload{Bytes: empty}, &Payload{Bytes: bar}, &Payload{Bytes: bar}}, + {&Payload{Bytes: empty}, nil, &Payload{Bytes: empty}}, + + {&Payload{Bytes: foo}, &Payload{Bytes: nil}, &Payload{Bytes: foo}}, + {&Payload{Bytes: foo}, &Payload{Bytes: empty}, &Payload{Bytes: empty}}, + {&Payload{Bytes: foo}, &Payload{Bytes: foo}, &Payload{Bytes: foo}}, + {&Payload{Bytes: foo}, &Payload{Bytes: bar}, &Payload{Bytes: bar}}, + {&Payload{Bytes: foo}, nil, &Payload{Bytes: foo}}, + + {&Payload{Bytes: bar}, &Payload{Bytes: nil}, &Payload{Bytes: bar}}, + {&Payload{Bytes: bar}, &Payload{Bytes: empty}, &Payload{Bytes: empty}}, + {&Payload{Bytes: bar}, &Payload{Bytes: foo}, &Payload{Bytes: foo}}, + {&Payload{Bytes: bar}, &Payload{Bytes: bar}, &Payload{Bytes: bar}}, + {&Payload{Bytes: bar}, nil, &Payload{Bytes: bar}}, + } { + a := deepcopy.Copy(tt.a).(Layer) + if err := a.merge(tt.b); err != nil { + t.Errorf("%s.merge(%s) = %s, wanted nil", tt.a, tt.b, err) + continue + } + if a.String() != tt.want.String() { + t.Errorf("%s.merge(%s) merge result got %s, want %s", tt.a, tt.b, a, tt.want) + } + } +} + func TestLayerStringFormat(t *testing.T) { for _, tt := range []struct { name string -- cgit v1.2.3 From c60613475c92185c9b15468d0de87b321ef2b4d7 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Fri, 24 Apr 2020 14:10:28 -0700 Subject: Standardize all Docker images. This change moves all Docker images to a standard location, and abstracts the build process so that they can be maintained in an automated fashion. This also allows the images to be architecture-independent. All images will now be referred to by the test framework via the canonical `gvisor.dev/images/`, where `` is a function of the path within the source tree. In a subsequent change, continuous integration will be added so that the images will always be correct and available locally. In the end, using `bazel` for Docker containers is simply not possible. Given that we already have the need to use `make` with the base container (for Docker), we extend this approach to get more flexibility. This change also adds a self-documenting and powerful Makefile that is intended to replace the collection of scripts in scripts. Canonical (self-documenting) targets can be added here for targets that understand which images need to be loaded and/or built. PiperOrigin-RevId: 308322438 --- .travis.yml | 11 +- CONTRIBUTING.md | 31 +---- Dockerfile | 9 -- Makefile | 199 ++++++++++++++++++++++----- images/BUILD | 11 ++ images/Makefile | 93 +++++++++++++ images/README.md | 61 ++++++++ images/basic/alpine/Dockerfile | 1 + images/basic/busybox/Dockerfile | 1 + images/basic/httpd/Dockerfile | 1 + images/basic/mysql/Dockerfile | 1 + images/basic/nginx/Dockerfile | 1 + images/basic/python/Dockerfile | 2 + images/basic/resolv/Dockerfile | 1 + images/basic/ruby/Dockerfile | 1 + images/basic/tomcat/Dockerfile | 1 + images/basic/ubuntu/Dockerfile | 1 + images/default/Dockerfile | 11 ++ images/iptables/Dockerfile | 2 + images/packetdrill/Dockerfile | 8 ++ images/packetimpact/Dockerfile | 16 +++ images/runtimes/go1.12/Dockerfile | 4 + images/runtimes/java11/Dockerfile | 22 +++ images/runtimes/nodejs12.4.0/Dockerfile | 21 +++ images/runtimes/php7.3.6/Dockerfile | 19 +++ images/runtimes/python3.7.3/Dockerfile | 21 +++ pkg/test/testutil/testutil.go | 38 +---- scripts/build.sh | 4 - scripts/common.sh | 22 --- scripts/docker_tests.sh | 2 + scripts/hostnet_tests.sh | 2 + scripts/iptables_tests.sh | 6 +- scripts/kvm_tests.sh | 2 + scripts/make_tests.sh | 5 - scripts/overlay_tests.sh | 2 + scripts/packetdrill_tests.sh | 2 + scripts/packetimpact_tests.sh | 2 + scripts/root_tests.sh | 3 +- scripts/swgso_tests.sh | 2 + test/iptables/runner/Dockerfile | 4 - test/packetdrill/Dockerfile | 9 -- test/packetimpact/tests/Dockerfile | 17 --- test/root/BUILD | 1 + test/runtimes/images/Dockerfile_go1.12 | 10 -- test/runtimes/images/Dockerfile_java11 | 30 ---- test/runtimes/images/Dockerfile_nodejs12.4.0 | 28 ---- test/runtimes/images/Dockerfile_php7.3.6 | 27 ---- test/runtimes/images/Dockerfile_python3.7.3 | 30 ---- tools/bazel.mk | 106 ++++++++++++++ tools/installers/BUILD | 8 ++ tools/installers/images.sh | 24 ++++ tools/make_repository.sh | 31 ++++- tools/vm/defs.bzl | 5 +- 53 files changed, 665 insertions(+), 307 deletions(-) delete mode 100644 Dockerfile create mode 100644 images/BUILD create mode 100644 images/Makefile create mode 100644 images/README.md create mode 100644 images/basic/alpine/Dockerfile create mode 100644 images/basic/busybox/Dockerfile create mode 100644 images/basic/httpd/Dockerfile create mode 100644 images/basic/mysql/Dockerfile create mode 100644 images/basic/nginx/Dockerfile create mode 100644 images/basic/python/Dockerfile create mode 100644 images/basic/resolv/Dockerfile create mode 100644 images/basic/ruby/Dockerfile create mode 100644 images/basic/tomcat/Dockerfile create mode 100644 images/basic/ubuntu/Dockerfile create mode 100644 images/default/Dockerfile create mode 100644 images/iptables/Dockerfile create mode 100644 images/packetdrill/Dockerfile create mode 100644 images/packetimpact/Dockerfile create mode 100644 images/runtimes/go1.12/Dockerfile create mode 100644 images/runtimes/java11/Dockerfile create mode 100644 images/runtimes/nodejs12.4.0/Dockerfile create mode 100644 images/runtimes/php7.3.6/Dockerfile create mode 100644 images/runtimes/python3.7.3/Dockerfile delete mode 100644 test/iptables/runner/Dockerfile delete mode 100644 test/packetdrill/Dockerfile delete mode 100644 test/packetimpact/tests/Dockerfile delete mode 100644 test/runtimes/images/Dockerfile_go1.12 delete mode 100644 test/runtimes/images/Dockerfile_java11 delete mode 100644 test/runtimes/images/Dockerfile_nodejs12.4.0 delete mode 100644 test/runtimes/images/Dockerfile_php7.3.6 delete mode 100644 test/runtimes/images/Dockerfile_python3.7.3 create mode 100644 tools/bazel.mk create mode 100755 tools/installers/images.sh (limited to 'test') diff --git a/.travis.yml b/.travis.yml index acbd3d61b..40c8773fa 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,22 +1,19 @@ -language: minimal -sudo: required +language: shell dist: xenial cache: directories: - /home/travis/.cache/bazel/ +os: linux services: - docker -matrix: +jobs: include: - os: linux arch: amd64 - env: RUNSC_PATH=./bazel-bin/runsc/linux_amd64_pure_stripped/runsc - os: linux arch: arm64 - env: RUNSC_PATH=./bazel-bin/runsc/linux_arm64_pure_stripped/runsc script: - - uname -a - - make DOCKER_RUN_OPTIONS="" BAZEL_OPTIONS="build runsc:runsc" bazel && $RUNSC_PATH --alsologtostderr --network none --debug --TESTONLY-unsafe-nonroot=true --rootless do ls + - uname -a && make smoke-test branches: except: # Skip copybara branches. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ad8e710da..423cf7a34 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -108,32 +108,15 @@ ignored. ### Build and test with Docker -`scripts/dev.sh` is a convenient script that builds and installs `runsc` as a -new Docker runtime for you. The scripts tries to extract the runtime name from -your local environment and will print it at the end. You can also customize it. -The script creates one regular runtime and another with debug flags enabled. -Here are a few examples: +Running `make dev` is a convenient way to build and install `runsc` as a Docker +runtime. The output of this command will show the runtimes installed. + +You may use `make refresh` to refresh the binary after any changes. For example: ```bash -# Default case (inside branch my-branch) -$ scripts/dev.sh -... -Runtimes my-branch and my-branch-d (debug enabled) setup. -Use --runtime=my-branch with your Docker command. - docker run --rm --runtime=my-branch --rm hello-world - -If you rebuild, use scripts/dev.sh --refresh. -Logs are in: /tmp/my-branch/logs - -# --refresh just updates the runtime binary and doesn't restart docker. -$ git/my_branch> scripts/dev.sh --refresh - -# Using a custom runtime name -$ git/my_branch> scripts/dev.sh my-runtime -... -Runtimes my-runtime and my-runtime-d (debug enabled) setup. -Use --runtime=my-runtime with your Docker command. - docker run --rm --runtime=my-runtime --rm hello-world +make dev +docker run --rm --runtime=my-branch --rm hello-world +make refresh ``` ### The small print diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 0fac71710..000000000 --- a/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -FROM fedora:31 - -RUN dnf install -y dnf-plugins-core && dnf copr enable -y vbatts/bazel - -RUN dnf install -y bazel2 git gcc make golang gcc-c++ glibc-devel python3 which python3-pip python3-devel libffi-devel openssl-devel pkg-config glibc-static libstdc++-static patch - -RUN pip install pycparser - -WORKDIR /gvisor diff --git a/Makefile b/Makefile index d9531fbd5..c56c6ed48 100644 --- a/Makefile +++ b/Makefile @@ -1,50 +1,173 @@ -UID := $(shell id -u ${USER}) -GID := $(shell id -g ${USER}) -GVISOR_BAZEL_CACHE := $(shell readlink -f ~/.cache/bazel/) +#!/usr/bin/make -f -# The --privileged is required to run tests. -DOCKER_RUN_OPTIONS ?= --privileged +# Copyright 2019 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -all: runsc +# Described below. +OPTIONS := +TARGETS := //runsc +ARGS := -docker-build: - docker build -t gvisor-bazel . +default: runsc +.PHONY: default -bazel-shutdown: - docker exec -i gvisor-bazel bazel shutdown && \ - docker kill gvisor-bazel +## usage: make +## or +## make OPTIONS="..." TARGETS="..." ARGS="..." +## +## Basic targets. +## +## This Makefile wraps basic build and test targets for ease-of-use. Bazel +## is run inside a canonical Docker container in order to simplify up-front +## requirements. +## +## There are common arguments that may be passed to targets. These are: +## OPTIONS - Build or test options. +## TARGETS - The bazel targets. +## ARGS - Arguments for run or sudo. +## +## Additionally, the copy target expects a DESTINATION to be provided. +## +## For example, to build runsc using this Makefile, you can run: +## make build OPTIONS="" TARGETS="//runsc"' +## +help: ## Shows all targets and help from the Makefile (this message). + @grep --no-filename -E '^([a-z.A-Z_-]+:.*?|)##' $(MAKEFILE_LIST) | \ + awk 'BEGIN {FS = "(:.*?|)## ?"}; { \ + if (length($$1) > 0) { \ + printf " \033[36m%-20s\033[0m %s\n", $$1, $$2; \ + } else { \ + printf "%s\n", $$2; \ + } \ + }' +build: ## Builds the given $(TARGETS) with the given $(OPTIONS). E.g. make build TARGETS=runsc +test: ## Tests the given $(TARGETS) with the given $(OPTIONS). E.g. make test TARGETS=pkg/buffer:buffer_test +copy: ## Copies the given $(TARGETS) to the given $(DESTINATION). E.g. make copy TARGETS=runsc DESTINATION=/tmp +run: ## Runs the given $(TARGETS), built with $(OPTIONS), using $(ARGS). E.g. make run TARGETS=runsc ARGS=-version +sudo: ## Runs the given $(TARGETS) as per run, but using "sudo -E". E.g. make sudo TARGETS=test/root:root_test ARGS=-test.v +.PHONY: help build test copy run sudo -bazel-server-start: docker-build - mkdir -p "$(GVISOR_BAZEL_CACHE)" && \ - docker run -d --rm --name gvisor-bazel \ - --user 0:0 \ - -v "$(GVISOR_BAZEL_CACHE):$(HOME)/.cache/bazel/" \ - -v "$(CURDIR):$(CURDIR)" \ - --workdir "$(CURDIR)" \ - --tmpfs /tmp:rw,exec \ - $(DOCKER_RUN_OPTIONS) \ - gvisor-bazel \ - sh -c "while :; do sleep 100; done" && \ - docker exec --user 0:0 -i gvisor-bazel sh -c "groupadd --gid $(GID) --non-unique gvisor && useradd --uid $(UID) --non-unique --gid $(GID) -d $(HOME) gvisor" +# Load all bazel wrappers. +# +# This file should define the basic "build", "test", "run" and "sudo" rules, in +# addition to the $(BRANCH_NAME) variable. +ifneq (,$(wildcard tools/google.mk)) +include tools/google.mk +else +include tools/bazel.mk +endif -bazel-server: - docker exec gvisor-bazel true || \ - $(MAKE) bazel-server-start +## +## Docker image targets. +## +## Images used by the tests must also be built and available locally. +## The canonical test targets defined below will automatically load +## relevant images. These can be loaded or built manually via these +## targets. +## +## (*) Note that you may provide an ARCH parameter in order to build +## and load images from an alternate archiecture (using qemu). When +## bazel is run as a server, this has the effect of running an full +## cross-architecture chain, and can produce cross-compiled binaries. +## +define images +$(1)-%: ## Image tool: $(1) a given image (also may use 'all-images'). + @$(MAKE) -C images $$@ +endef +rebuild-...: ## Rebuild the given image. Also may use 'rebuild-all-images'. +$(eval $(call images,rebuild)) +push-...: ## Push the given image. Also may use 'push-all-images'. +$(eval $(call images,pull)) +pull-...: ## Pull the given image. Also may use 'pull-all-images'. +$(eval $(call images,push)) +load-...: ## Load (pull or rebuild) the given image. Also may use 'load-all-images'. +$(eval $(call images,load)) +list-images: ## List all available images. + @$(MAKE) -C images $$@ -BAZEL_OPTIONS := build runsc -bazel: bazel-server - docker exec -u $(UID):$(GID) -i gvisor-bazel bazel $(BAZEL_OPTIONS) +## +## Canonical build and test targets. +## +## These targets are used by continuous integration and provide +## convenient entrypoints for testing changes. If you're adding a +## new subsystem or workflow, consider adding a new target here. +## +runsc: ## Builds the runsc binary. + @$(MAKE) build TARGETS="//runsc" +.PHONY: runsc -bazel-alias: - @echo "alias bazel='docker exec -u $(UID):$(GID) -i gvisor-bazel bazel'" +smoke-test: ## Runs a simple smoke test after build runsc. + @$(MAKE) run DOCKER_RUN_OPTIONS="" ARGS="--alsologtostderr --network none --debug --TESTONLY-unsafe-nonroot=true --rootless do true" +.PHONY: smoke-tests -runsc: - $(MAKE) BAZEL_OPTIONS="build runsc" bazel +unit-tests: ## Runs all unit tests in pkg runsc and tools. + @$(MAKE) test OPTIONS="pkg/... runsc/... tools/..." +.PHONY: unit-tests -tests: - $(MAKE) BAZEL_OPTIONS="test --test_tag_filters runsc_ptrace //test/syscalls/..." bazel +tests: ## Runs all local ptrace system call tests. + @$(MAKE) test OPTIONS="--test_tag_filter runsc_ptrace test/syscalls/..." +.PHONY: tests -unit-tests: - $(MAKE) BAZEL_OPTIONS="test //pkg/... //runsc/... //tools/..." bazel +## +## Development helpers and tooling. +## +## These targets faciliate local development by automatically +## installing and configuring a runtime. Several variables may +## be used here to tweak the installation: +## RUNTIME - The name of the installed runtime (default: branch). +## RUNTIME_DIR - Where the runtime will be installed (default: temporary directory with the $RUNTIME). +## RUNTIME_BIN - The runtime binary (default: $RUNTIME_DIR/runsc). +## RUNTIME_LOG_DIR - The logs directory (default: $RUNTIME_DIR/logs). +## RUNTIME_LOGS - The log pattern (default: $RUNTIME_LOG_DIR/runsc.log.%TEST%.%TIMESTAMP%.%COMMAND%). +## +ifeq (,$(BRANCH_NAME)) +RUNTIME := runsc +RUNTIME_DIR := $(shell dirname $(shell mktemp -u))/runsc +else +RUNTIME := $(BRANCH_NAME) +RUNTIME_DIR := $(shell dirname $(shell mktemp -u))/$(BRANCH_NAME) +endif +RUNTIME_BIN := $(RUNTIME_DIR)/runsc +RUNTIME_LOG_DIR := $(RUNTIME_DIR)/logs +RUNTIME_LOGS := $(RUNTIME_LOG_DIR)/runsc.log.%TEST%.%TIMESTAMP%.%COMMAND% -.PHONY: docker-build bazel-shutdown bazel-server-start bazel-server bazel runsc tests +dev: ## Installs a set of local runtimes. Requires sudo. + @$(MAKE) refresh ARGS="--net-raw" + @$(MAKE) configure RUNTIME="$(RUNTIME)" ARGS="--net-raw" + @$(MAKE) configure RUNTIME="$(RUNTIME)-d" ARGS="--net-raw --debug --strace --log-packets" + @$(MAKE) configure RUNTIME="$(RUNTIME)-p" ARGS="--net-raw --profile" + @sudo systemctl restart docker +.PHONY: dev + +refresh: ## Refreshes the runtime binary (for development only). Must have called 'dev' or 'test-install' first. + @mkdir -p "$(RUNTIME_DIR)" + @$(MAKE) copy TARGETS=runsc DESTINATION="$(RUNTIME_BIN)" && chmod 0755 "$(RUNTIME_BIN)" +.PHONY: install + +test-install: ## Installs the runtime for testing. Requires sudo. + @$(MAKE) refresh ARGS="--net-raw --TESTONLY-test-name-env=RUNSC_TEST_NAME --debug --strace --log-packets $(ARGS)" + @$(MAKE) configure + @sudo systemctl restart docker +.PHONY: install-test + +configure: ## Configures a single runtime. Requires sudo. Typically called from dev or test-install. + @sudo sudo "$(RUNTIME_BIN)" install --experimental=true --runtime="$(RUNTIME)" -- --debug-log "$(RUNTIME_LOGS)" $(ARGS) + @echo "Installed runtime \"$(RUNTIME)\" @ $(RUNTIME_BIN)" + @echo "Logs are in: $(RUNTIME_LOG_DIR)" + @sudo rm -rf "$(RUNTIME_LOG_DIR)" && mkdir -p "$(RUNTIME_LOG_DIR)" +.PHONY: configure + +test-runtime: ## A convenient wrapper around test that provides the runtime argument. Target must still be provided. + @$(MAKE) test OPTIONS="$(OPTIONS) --test_arg=--runtime=$(RUNTIME)" +.PHONY: runtime-test diff --git a/images/BUILD b/images/BUILD new file mode 100644 index 000000000..a50f388e9 --- /dev/null +++ b/images/BUILD @@ -0,0 +1,11 @@ +package(licenses = ["notice"]) + +# The images filegroup is definitely not a hermetic target, and requires Make +# to do anything meaningful with. However, this will be slurped up and used by +# the tools/installer/images.sh installer, which will ensure that all required +# images are available locally when running vm_tests. +filegroup( + name = "images", + srcs = glob(["**"]), + visibility = ["//tools/installers:__pkg__"], +) diff --git a/images/Makefile b/images/Makefile new file mode 100644 index 000000000..1485607bd --- /dev/null +++ b/images/Makefile @@ -0,0 +1,93 @@ +#!/usr/bin/make -f + +# Copyright 2018 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ARCH is the architecture used for the build. This may be overriden at the +# command line in order to perform a cross-build (in a limited capacity). +ARCH := $(shell uname -m) + +# Note that the image prefixes used here must match the image mangling in +# runsc/testutil.MangleImage. Names are mangled in this way to ensure that all +# tests are using locally-defined images (that are consistent and idempotent). +REMOTE_IMAGE_PREFIX ?= gcr.io/gvisor-presubmit +LOCAL_IMAGE_PREFIX ?= gvisor.dev/images +ALL_IMAGES := $(subst /,_,$(subst ./,,$(shell find . -name Dockerfile -exec dirname {} \;))) +ifneq ($(ARCH),$(shell uname -m)) +DOCKER_PLATFORM_ARGS := --platform=$(ARCH) +else +DOCKER_PLATFORM_ARGS := +endif + +list-all-images: + @for image in $(ALL_IMAGES); do echo $${image}; done +.PHONY: list-build-images + +%-all-images: + @$(MAKE) $(patsubst %,$*-%,$(ALL_IMAGES)) + +# tag is a function that returns the tag name, given an image. +# +# The tag constructed is used to memoize the image generated (see README.md). +# This scheme is used to enable aggressive caching in a central repository, but +# ensuring that images will always be sourced using the local files if there +# are changes. +path = $(subst _,/,$(1)) +tag = $(shell find $(call path,$(1)) -type f -print | sort | xargs -n 1 sha256sum | sha256sum - | cut -c 1-16) +remote_image = $(REMOTE_IMAGE_PREFIX)/$(subst _,/,$(1))_$(ARCH):$(call tag,$(1)) +local_image = $(LOCAL_IMAGE_PREFIX)/$(subst _,/,$(1)) + +# rebuild builds the image locally. Only the "remote" tag will be applied. Note +# we need to explicitly repull the base layer in order to ensure that the +# architecture is correct. Note that we use the term "rebuild" here to avoid +# conflicting with the bazel "build" terminology, which is used elsewhere. +rebuild-%: register-cross + FROM=$(shell grep FROM $(call path,$*)/Dockerfile | cut -d' ' -f2-) && \ + docker pull $(DOCKER_PLATFORM_ARGS) $$FROM + T=$$(mktemp -d) && cp -a $(call path,$*)/* $$T && \ + docker build $(DOCKER_PLATFORM_ARGS) -t $(call remote_image,$*) $$T && \ + rm -rf $$T + +# pull will check the "remote" image and pull if necessary. If the remote image +# must be pulled, then it will tag with the latest local target. Note that pull +# may fail if the remote image is not available. +pull-%: + docker pull $(DOCKER_PLATFORM_ARGS) $(call remote_image,$*) + +# load will either pull the "remote" or build it locally. This is the preferred +# entrypoint, as it should never file. The local tag should always be set after +# this returns (either by the pull or the build). +load-%: + docker inspect $(call remote_image,$*) >/dev/null 2>&1 || $(MAKE) pull-$* || $(MAKE) rebuild-$* + docker tag $(call remote_image,$*) $(call local_image,$*) + +# push pushes the remote image, after either pulling (to validate that the tag +# already exists) or building manually. +push-%: load-% + docker push $(call remote_image,$*) + +# register-cross registers the necessary qemu binaries for cross-compilation. +# This may be used by any target that may execute containers that are not the +# native format. +register-cross: +ifneq ($(ARCH),$(shell uname -m)) +ifeq (,$(wildcard /proc/sys/fs/binfmt_misc/qemu-*)) + docker run --rm --privileged multiarch/qemu-user-static --reset --persistent yes +else + @true # Already registered. +endif +else + @true # No cross required. +endif +.PHONY: register-cross diff --git a/images/README.md b/images/README.md new file mode 100644 index 000000000..d2efb5db4 --- /dev/null +++ b/images/README.md @@ -0,0 +1,61 @@ +# Container Images + +This directory contains all images used by tests. + +Note that all these images must be pushed to the testing project hosted on +[Google Container Registry][gcr]. This will happen automatically as part of +continuous integration. This will speed up loading as images will not need to be +built from scratch for each test run. + +Image tooling is accessible via `make`, specifically via `tools/images.mk`. + +## Why make? + +Make is used because it can bootstrap the `default` image, which contains +`bazel` and all other parts of the toolchain. + +## Listing images + +To list all images, use `make list-all-images` from the top-level directory. + +## Loading and referencing images + +To build a specific image, use `make load-` from the top-level directory. +This will ensure that an image `gvisor.dev/images/:latest` is available. + +Images should always be referred to via the `gvisor.dev/images` canonical path. +This tag exists only locally, but serves to decouple tests from the underlying +image infrastructure. + +The continuous integration system can either take fine-grained dependencies on +single images via individual `load` targets, or pull all images via a single +`load-all-images` invocation. + +## Adding new images + +To add a new image, create a new directory under `images` containing a +Dockerfile and any other files that the image requires. You may choose to add to +an existing subdirectory if applicable, or create a new one. + +All images will be tagged and memoized using a hash of the directory contents. +As a result, every image should be made completely reproducible if possible. +This means using fixed tags and fixed versions whenever feasible. + +Notes that images should also be made architecture-independent if possible. The +build scripts will handling loading the appropriate architecture onto the +machine and tagging it with the single canonical tag. + +Add a `load-` dependency in the Makefile if the image is required for a +particular set of tests. This target will pull the tag from the image repository +if available. + +## Building and pushing images + +All images can be built manually by running `build-` and pushed using +`push-`. Note that you can also use `build-all-images` and +`push-all-images`. Note that pushing will require appropriate permissions in the +project. + +The continuous integration system can either take fine-grained dependencies on +individual `push` targets, or ensure all images are up-to-date with a single +`push-all-images` invocation. diff --git a/images/basic/alpine/Dockerfile b/images/basic/alpine/Dockerfile new file mode 100644 index 000000000..12b26040a --- /dev/null +++ b/images/basic/alpine/Dockerfile @@ -0,0 +1 @@ +FROM alpine:3.11.5 diff --git a/images/basic/busybox/Dockerfile b/images/basic/busybox/Dockerfile new file mode 100644 index 000000000..79b3f683a --- /dev/null +++ b/images/basic/busybox/Dockerfile @@ -0,0 +1 @@ +FROM busybox:1.31.1 diff --git a/images/basic/httpd/Dockerfile b/images/basic/httpd/Dockerfile new file mode 100644 index 000000000..83bc0ed88 --- /dev/null +++ b/images/basic/httpd/Dockerfile @@ -0,0 +1 @@ +FROM httpd:2.4.43 diff --git a/images/basic/mysql/Dockerfile b/images/basic/mysql/Dockerfile new file mode 100644 index 000000000..95da9c48d --- /dev/null +++ b/images/basic/mysql/Dockerfile @@ -0,0 +1 @@ +FROM mysql:8.0.19 diff --git a/images/basic/nginx/Dockerfile b/images/basic/nginx/Dockerfile new file mode 100644 index 000000000..af2e62526 --- /dev/null +++ b/images/basic/nginx/Dockerfile @@ -0,0 +1 @@ +FROM nginx:1.17.9 diff --git a/images/basic/python/Dockerfile b/images/basic/python/Dockerfile new file mode 100644 index 000000000..acf07cca9 --- /dev/null +++ b/images/basic/python/Dockerfile @@ -0,0 +1,2 @@ +FROM python:3 +ENTRYPOINT ["python", "-m", "http.server", "8080"] diff --git a/images/basic/resolv/Dockerfile b/images/basic/resolv/Dockerfile new file mode 100644 index 000000000..13665bdaf --- /dev/null +++ b/images/basic/resolv/Dockerfile @@ -0,0 +1 @@ +FROM k8s.gcr.io/busybox:latest diff --git a/images/basic/ruby/Dockerfile b/images/basic/ruby/Dockerfile new file mode 100644 index 000000000..d290418fb --- /dev/null +++ b/images/basic/ruby/Dockerfile @@ -0,0 +1 @@ +FROM ruby:2.7.1 diff --git a/images/basic/tomcat/Dockerfile b/images/basic/tomcat/Dockerfile new file mode 100644 index 000000000..c7db39a36 --- /dev/null +++ b/images/basic/tomcat/Dockerfile @@ -0,0 +1 @@ +FROM tomcat:8.0 diff --git a/images/basic/ubuntu/Dockerfile b/images/basic/ubuntu/Dockerfile new file mode 100644 index 000000000..331b71343 --- /dev/null +++ b/images/basic/ubuntu/Dockerfile @@ -0,0 +1 @@ +FROM ubuntu:trusty diff --git a/images/default/Dockerfile b/images/default/Dockerfile new file mode 100644 index 000000000..2d0bb5ba5 --- /dev/null +++ b/images/default/Dockerfile @@ -0,0 +1,11 @@ +FROM fedora:31 +RUN dnf install -y dnf-plugins-core && dnf copr enable -y vbatts/bazel +RUN dnf install -y git gcc make golang gcc-c++ glibc-devel python3 which python3-pip python3-devel libffi-devel openssl-devel pkg-config glibc-static libstdc++-static patch +RUN pip install pycparser +RUN dnf install -y bazel3 +RUN curl https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-289.0.0-linux-x86_64.tar.gz | \ + tar zxvf - google-cloud-sdk && \ + google-cloud-sdk/install.sh && \ + ln -s /google-cloud-sdk/bin/gcloud /usr/bin/gcloud +WORKDIR /workspace +ENTRYPOINT ["/usr/bin/bazel"] diff --git a/images/iptables/Dockerfile b/images/iptables/Dockerfile new file mode 100644 index 000000000..efd91cb80 --- /dev/null +++ b/images/iptables/Dockerfile @@ -0,0 +1,2 @@ +FROM ubuntu +RUN apt update && apt install -y iptables diff --git a/images/packetdrill/Dockerfile b/images/packetdrill/Dockerfile new file mode 100644 index 000000000..7a006c85f --- /dev/null +++ b/images/packetdrill/Dockerfile @@ -0,0 +1,8 @@ +FROM ubuntu:bionic +RUN apt-get update && apt-get install -y net-tools git iptables iputils-ping \ + netcat tcpdump jq tar bison flex make +RUN hash -r +RUN git clone --branch packetdrill-v2.0 \ + https://github.com/google/packetdrill.git +RUN cd packetdrill/gtests/net/packetdrill && ./configure && make +CMD /bin/bash diff --git a/images/packetimpact/Dockerfile b/images/packetimpact/Dockerfile new file mode 100644 index 000000000..87aa99ef2 --- /dev/null +++ b/images/packetimpact/Dockerfile @@ -0,0 +1,16 @@ +FROM ubuntu:bionic +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + # iptables to disable OS native packet processing. + iptables \ + # nc to check that the posix_server is running. + netcat \ + # tcpdump to log brief packet sniffing. + tcpdump \ + # ip link show to display MAC addresses. + iproute2 \ + # tshark to log verbose packet sniffing. + tshark \ + # killall for cleanup. + psmisc +RUN hash -r +CMD /bin/bash diff --git a/images/runtimes/go1.12/Dockerfile b/images/runtimes/go1.12/Dockerfile new file mode 100644 index 000000000..cb2944062 --- /dev/null +++ b/images/runtimes/go1.12/Dockerfile @@ -0,0 +1,4 @@ +# Go is easy, since we already have everything we need to compile the proctor +# binary and run the tests in the golang Docker image. +FROM golang:1.12 +RUN ["go", "tool", "dist", "test", "-compile-only"] diff --git a/images/runtimes/java11/Dockerfile b/images/runtimes/java11/Dockerfile new file mode 100644 index 000000000..03bc8aaf1 --- /dev/null +++ b/images/runtimes/java11/Dockerfile @@ -0,0 +1,22 @@ +FROM ubuntu:bionic +RUN apt-get update && apt-get install -y \ + autoconf \ + build-essential \ + curl \ + make \ + openjdk-11-jdk \ + unzip \ + zip + +# Download the JDK test library. +WORKDIR /root +RUN set -ex \ + && curl -fsSL --retry 10 -o /tmp/jdktests.tar.gz http://hg.openjdk.java.net/jdk/jdk11/archive/76072a077ee1.tar.gz/test \ + && tar -xzf /tmp/jdktests.tar.gz \ + && mv jdk11-76072a077ee1/test test \ + && rm -f /tmp/jdktests.tar.gz + +# Install jtreg and add to PATH. +RUN curl -o jtreg.tar.gz https://ci.adoptopenjdk.net/view/Dependencies/job/jtreg/lastSuccessfulBuild/artifact/jtreg-4.2.0-tip.tar.gz +RUN tar -xzf jtreg.tar.gz +ENV PATH="/root/jtreg/bin:$PATH" diff --git a/images/runtimes/nodejs12.4.0/Dockerfile b/images/runtimes/nodejs12.4.0/Dockerfile new file mode 100644 index 000000000..d17924b62 --- /dev/null +++ b/images/runtimes/nodejs12.4.0/Dockerfile @@ -0,0 +1,21 @@ +FROM ubuntu:bionic +RUN apt-get update && apt-get install -y \ + curl \ + dumb-init \ + g++ \ + make \ + python + +WORKDIR /root +ARG VERSION=v12.4.0 +RUN curl -o node-${VERSION}.tar.gz https://nodejs.org/dist/${VERSION}/node-${VERSION}.tar.gz +RUN tar -zxf node-${VERSION}.tar.gz + +WORKDIR /root/node-${VERSION} +RUN ./configure +RUN make +RUN make test-build + +# Including dumb-init emulates the Linux "init" process, preventing the failure +# of tests involving worker processes. +ENTRYPOINT ["/usr/bin/dumb-init"] diff --git a/images/runtimes/php7.3.6/Dockerfile b/images/runtimes/php7.3.6/Dockerfile new file mode 100644 index 000000000..e5f67f79c --- /dev/null +++ b/images/runtimes/php7.3.6/Dockerfile @@ -0,0 +1,19 @@ +FROM ubuntu:bionic +RUN apt-get update && apt-get install -y \ + autoconf \ + automake \ + bison \ + build-essential \ + curl \ + libtool \ + libxml2-dev \ + re2c + +WORKDIR /root +ARG VERSION=7.3.6 +RUN curl -o php-${VERSION}.tar.gz https://www.php.net/distributions/php-${VERSION}.tar.gz +RUN tar -zxf php-${VERSION}.tar.gz + +WORKDIR /root/php-${VERSION} +RUN ./configure +RUN make diff --git a/images/runtimes/python3.7.3/Dockerfile b/images/runtimes/python3.7.3/Dockerfile new file mode 100644 index 000000000..4d1e1e221 --- /dev/null +++ b/images/runtimes/python3.7.3/Dockerfile @@ -0,0 +1,21 @@ +FROM ubuntu:bionic +RUN apt-get update && apt-get install -y \ + curl \ + gcc \ + libbz2-dev \ + libffi-dev \ + liblzma-dev \ + libreadline-dev \ + libssl-dev \ + make \ + zlib1g-dev + +# Use flags -LJO to follow the html redirect and download .tar.gz. +WORKDIR /root +ARG VERSION=3.7.3 +RUN curl -LJO https://github.com/python/cpython/archive/v${VERSION}.tar.gz +RUN tar -zxf cpython-${VERSION}.tar.gz + +WORKDIR /root/cpython-${VERSION} +RUN ./configure --with-pydebug +RUN make -s -j2 diff --git a/pkg/test/testutil/testutil.go b/pkg/test/testutil/testutil.go index d75ceca3d..ee8c78014 100644 --- a/pkg/test/testutil/testutil.go +++ b/pkg/test/testutil/testutil.go @@ -57,42 +57,10 @@ func IsCheckpointSupported() bool { return *checkpoint } -// nameToActual is used by ImageByName (for now). -var nameToActual = map[string]string{ - "basic/alpine": "alpine", - "basic/busybox": "busybox:1.31.1", - "basic/httpd": "httpd", - "basic/mysql": "mysql", - "basic/nginx": "nginx", - "basic/python": "gcr.io/gvisor-presubmit/python-hello", - "basic/resolv": "k8s.gcr.io/busybox", - "basic/ruby": "ruby", - "basic/tomcat": "tomcat:8.0", - "basic/ubuntu": "ubuntu:trusty", - "iptables": "gcr.io/gvisor-presubmit/iptables-test", - "packetdrill": "gcr.io/gvisor-presubmit/packetdrill", - "packetimpact": "gcr.io/gvisor-presubmit/packetimpact", - "runtimes/go1.12": "gcr.io/gvisor-presubmit/go1.12", - "runtimes/java11": "gcr.io/gvisor-presubmit/java11", - "runtimes/nodejs12.4.0": "gcr.io/gvisor-presubmit/nodejs12.4.0", - "runtimes/php7.3.6": "gcr.io/gvisor-presubmit/php7.3.6", - "runtimes/python3.7.3": "gcr.io/gvisor-presubmit/python3.7.3", -} - -// ImageByName mangles the image name used locally. -// -// For now, this is implemented as a static lookup table. In a subsequent -// change, this will be used to reference a locally-generated image. +// ImageByName mangles the image name used locally. This depends on the image +// build infrastructure in images/ and tools/vm. func ImageByName(name string) string { - actual, ok := nameToActual[name] - if !ok { - panic(fmt.Sprintf("unknown image: %v", name)) - } - // A terrible hack, for now execute a manual pull. - if out, err := exec.Command("docker", "pull", actual).CombinedOutput(); err != nil { - panic(fmt.Sprintf("error pulling image %q -> %q: %v, out: %s", name, actual, err, string(out))) - } - return actual + return fmt.Sprintf("gvisor.dev/images/%s", name) } // ConfigureExePath configures the executable for runsc in the test environment. diff --git a/scripts/build.sh b/scripts/build.sh index 7c9c99800..e821e8624 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -16,9 +16,6 @@ source $(dirname $0)/common.sh -# Install required packages for make_repository.sh et al. -apt_install dpkg-sig coreutils apt-utils xz-utils - # Build runsc. runsc=$(build -c opt //runsc) @@ -45,7 +42,6 @@ if [[ -v KOKORO_REPO_KEY ]]; then repo=$(tools/make_repository.sh \ "${KOKORO_KEYSTORE_DIR}/${KOKORO_REPO_KEY}" \ gvisor-bot@google.com \ - main \ "${KOKORO_ARTIFACTS_DIR}" \ ${pkgs}) fi diff --git a/scripts/common.sh b/scripts/common.sh index bc6ba71e8..3ca699e4a 100755 --- a/scripts/common.sh +++ b/scripts/common.sh @@ -84,25 +84,3 @@ function install_runsc() { # Restart docker to pick up the new runtime configuration. sudo systemctl restart docker } - -# Installs the given packages. Note that the package names should be verified to -# be correct, otherwise this may result in a loop that spins until time out. -function apt_install() { - while true; do - sudo apt-get update && - sudo apt-get install -y "$@" && - true - result="${?}" - case $result in - 0) - break - ;; - 100) - # 100 is the error code that apt-get returns. - ;; - *) - exit $result - ;; - esac - done -} diff --git a/scripts/docker_tests.sh b/scripts/docker_tests.sh index 72ba05260..931ce1aa4 100755 --- a/scripts/docker_tests.sh +++ b/scripts/docker_tests.sh @@ -16,5 +16,7 @@ source $(dirname $0)/common.sh +make load-all-images + install_runsc_for_test docker test_runsc //test/image:image_test //test/e2e:integration_test diff --git a/scripts/hostnet_tests.sh b/scripts/hostnet_tests.sh index 41298293d..992db50dd 100755 --- a/scripts/hostnet_tests.sh +++ b/scripts/hostnet_tests.sh @@ -16,6 +16,8 @@ source $(dirname $0)/common.sh +make load-all-images + # Install the runtime and perform basic tests. install_runsc_for_test hostnet --network=host test_runsc --test_arg=-checkpoint=false //test/image:image_test //test/e2e:integration_test diff --git a/scripts/iptables_tests.sh b/scripts/iptables_tests.sh index c8da1f32d..2a8c24907 100755 --- a/scripts/iptables_tests.sh +++ b/scripts/iptables_tests.sh @@ -16,6 +16,8 @@ source $(dirname $0)/common.sh +make load-iptables + install_runsc_for_test iptables --net-raw -test //test/iptables:iptables_test --test_arg=--runtime=runc -test //test/iptables:iptables_test --test_arg=--runtime=${RUNTIME} +test //test/iptables:iptables_test "--test_arg=--runtime=runc" +test //test/iptables:iptables_test "--test_arg=--runtime=${RUNTIME}" diff --git a/scripts/kvm_tests.sh b/scripts/kvm_tests.sh index 5662401df..619571c74 100755 --- a/scripts/kvm_tests.sh +++ b/scripts/kvm_tests.sh @@ -16,6 +16,8 @@ source $(dirname $0)/common.sh +make load-all-images + # Ensure that KVM is loaded, and we can use it. (lsmod | grep -E '^(kvm_intel|kvm_amd)') || sudo modprobe kvm sudo chmod a+rw /dev/kvm diff --git a/scripts/make_tests.sh b/scripts/make_tests.sh index 79426756d..dbf1bba77 100755 --- a/scripts/make_tests.sh +++ b/scripts/make_tests.sh @@ -16,10 +16,5 @@ source $(dirname $0)/common.sh -top_level=$(git rev-parse --show-toplevel 2>/dev/null) -[[ $? -eq 0 ]] && cd "${top_level}" || exit 1 - -make make runsc -make BAZEL_OPTIONS="build //..." bazel make bazel-shutdown diff --git a/scripts/overlay_tests.sh b/scripts/overlay_tests.sh index 2a1f12c0b..448864953 100755 --- a/scripts/overlay_tests.sh +++ b/scripts/overlay_tests.sh @@ -16,6 +16,8 @@ source $(dirname $0)/common.sh +make load-all-images + # Install the runtime and perform basic tests. install_runsc_for_test overlay --overlay test_runsc //test/image:image_test //test/e2e:integration_test diff --git a/scripts/packetdrill_tests.sh b/scripts/packetdrill_tests.sh index fc6bef79c..f0fc444c8 100755 --- a/scripts/packetdrill_tests.sh +++ b/scripts/packetdrill_tests.sh @@ -16,5 +16,7 @@ source $(dirname $0)/common.sh +make load-packetdrill + install_runsc_for_test runsc-d test_runsc $(bazel query "attr(tags, manual, tests(//test/packetdrill/...))") diff --git a/scripts/packetimpact_tests.sh b/scripts/packetimpact_tests.sh index 027d11e64..17fc43f27 100755 --- a/scripts/packetimpact_tests.sh +++ b/scripts/packetimpact_tests.sh @@ -16,5 +16,7 @@ source $(dirname $0)/common.sh +make load-packetimpact + install_runsc_for_test runsc-d test_runsc $(bazel query "attr(tags, packetimpact, tests(//test/packetimpact/...))") diff --git a/scripts/root_tests.sh b/scripts/root_tests.sh index 4e4fcc76b..d629bf2aa 100755 --- a/scripts/root_tests.sh +++ b/scripts/root_tests.sh @@ -16,6 +16,8 @@ source $(dirname $0)/common.sh +make load-all-images + # Reinstall the latest containerd shim. declare -r base="https://storage.googleapis.com/cri-containerd-staging/gvisor-containerd-shim" declare -r latest=$(mktemp --tmpdir gvisor-containerd-shim-latest.XXXXXX) @@ -28,4 +30,3 @@ sudo mv ${shim_path} /usr/local/bin/gvisor-containerd-shim # Run the tests that require root. install_runsc_for_test root run_as_root //test/root:root_test --runtime=${RUNTIME} - diff --git a/scripts/swgso_tests.sh b/scripts/swgso_tests.sh index 0de2df1d2..c67f2fe5c 100755 --- a/scripts/swgso_tests.sh +++ b/scripts/swgso_tests.sh @@ -16,6 +16,8 @@ source $(dirname $0)/common.sh +make load-all-images + # Install the runtime and perform basic tests. install_runsc_for_test swgso --software-gso=true --gso=false test_runsc //test/image:image_test //test/e2e:integration_test diff --git a/test/iptables/runner/Dockerfile b/test/iptables/runner/Dockerfile deleted file mode 100644 index b77db44a1..000000000 --- a/test/iptables/runner/Dockerfile +++ /dev/null @@ -1,4 +0,0 @@ -# This Dockerfile builds the image hosted at -# gcr.io/gvisor-presubmit/iptables-test. -FROM ubuntu -RUN apt update && apt install -y iptables diff --git a/test/packetdrill/Dockerfile b/test/packetdrill/Dockerfile deleted file mode 100644 index 4b75e9527..000000000 --- a/test/packetdrill/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -FROM ubuntu:bionic - -RUN apt-get update && apt-get install -y net-tools git iptables iputils-ping \ - netcat tcpdump jq tar bison flex make -RUN hash -r -RUN git clone --branch packetdrill-v2.0 \ - https://github.com/google/packetdrill.git -RUN cd packetdrill/gtests/net/packetdrill && ./configure && make -CMD /bin/bash diff --git a/test/packetimpact/tests/Dockerfile b/test/packetimpact/tests/Dockerfile deleted file mode 100644 index 9075bc555..000000000 --- a/test/packetimpact/tests/Dockerfile +++ /dev/null @@ -1,17 +0,0 @@ -FROM ubuntu:bionic - -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ - # iptables to disable OS native packet processing. - iptables \ - # nc to check that the posix_server is running. - netcat \ - # tcpdump to log brief packet sniffing. - tcpdump \ - # ip link show to display MAC addresses. - iproute2 \ - # tshark to log verbose packet sniffing. - tshark \ - # killall for cleanup. - psmisc -RUN hash -r -CMD /bin/bash diff --git a/test/root/BUILD b/test/root/BUILD index 17e51e66e..639e293e3 100644 --- a/test/root/BUILD +++ b/test/root/BUILD @@ -48,6 +48,7 @@ go_test( vm_test( name = "root_vm_test", + size = "large", shard_count = 1, targets = [ "//tools/installers:shim", diff --git a/test/runtimes/images/Dockerfile_go1.12 b/test/runtimes/images/Dockerfile_go1.12 deleted file mode 100644 index ab9d6abf3..000000000 --- a/test/runtimes/images/Dockerfile_go1.12 +++ /dev/null @@ -1,10 +0,0 @@ -# Go is easy, since we already have everything we need to compile the proctor -# binary and run the tests in the golang Docker image. -FROM golang:1.12 -ADD ["proctor/", "/go/src/proctor/"] -RUN ["go", "build", "-o", "/proctor", "/go/src/proctor"] - -# Pre-compile the tests so we don't need to do so in each test run. -RUN ["go", "tool", "dist", "test", "-compile-only"] - -ENTRYPOINT ["/proctor", "--runtime=go"] diff --git a/test/runtimes/images/Dockerfile_java11 b/test/runtimes/images/Dockerfile_java11 deleted file mode 100644 index 9b7c3d5a3..000000000 --- a/test/runtimes/images/Dockerfile_java11 +++ /dev/null @@ -1,30 +0,0 @@ -# Compile the proctor binary. -FROM golang:1.12 AS golang -ADD ["proctor/", "/go/src/proctor/"] -RUN ["go", "build", "-o", "/proctor", "/go/src/proctor"] - -FROM ubuntu:bionic -RUN apt-get update && apt-get install -y \ - autoconf \ - build-essential \ - curl \ - make \ - openjdk-11-jdk \ - unzip \ - zip - -# Download the JDK test library. -WORKDIR /root -RUN set -ex \ - && curl -fsSL --retry 10 -o /tmp/jdktests.tar.gz http://hg.openjdk.java.net/jdk/jdk11/archive/76072a077ee1.tar.gz/test \ - && tar -xzf /tmp/jdktests.tar.gz \ - && mv jdk11-76072a077ee1/test test \ - && rm -f /tmp/jdktests.tar.gz - -# Install jtreg and add to PATH. -RUN curl -o jtreg.tar.gz https://ci.adoptopenjdk.net/view/Dependencies/job/jtreg/lastSuccessfulBuild/artifact/jtreg-4.2.0-tip.tar.gz -RUN tar -xzf jtreg.tar.gz -ENV PATH="/root/jtreg/bin:$PATH" - -COPY --from=golang /proctor /proctor -ENTRYPOINT ["/proctor", "--runtime=java"] diff --git a/test/runtimes/images/Dockerfile_nodejs12.4.0 b/test/runtimes/images/Dockerfile_nodejs12.4.0 deleted file mode 100644 index 26f68b487..000000000 --- a/test/runtimes/images/Dockerfile_nodejs12.4.0 +++ /dev/null @@ -1,28 +0,0 @@ -# Compile the proctor binary. -FROM golang:1.12 AS golang -ADD ["proctor/", "/go/src/proctor/"] -RUN ["go", "build", "-o", "/proctor", "/go/src/proctor"] - -FROM ubuntu:bionic -RUN apt-get update && apt-get install -y \ - curl \ - dumb-init \ - g++ \ - make \ - python - -WORKDIR /root -ARG VERSION=v12.4.0 -RUN curl -o node-${VERSION}.tar.gz https://nodejs.org/dist/${VERSION}/node-${VERSION}.tar.gz -RUN tar -zxf node-${VERSION}.tar.gz - -WORKDIR /root/node-${VERSION} -RUN ./configure -RUN make -RUN make test-build - -COPY --from=golang /proctor /proctor - -# Including dumb-init emulates the Linux "init" process, preventing the failure -# of tests involving worker processes. -ENTRYPOINT ["/usr/bin/dumb-init", "/proctor", "--runtime=nodejs"] diff --git a/test/runtimes/images/Dockerfile_php7.3.6 b/test/runtimes/images/Dockerfile_php7.3.6 deleted file mode 100644 index e6b4c6329..000000000 --- a/test/runtimes/images/Dockerfile_php7.3.6 +++ /dev/null @@ -1,27 +0,0 @@ -# Compile the proctor binary. -FROM golang:1.12 AS golang -ADD ["proctor/", "/go/src/proctor/"] -RUN ["go", "build", "-o", "/proctor", "/go/src/proctor"] - -FROM ubuntu:bionic -RUN apt-get update && apt-get install -y \ - autoconf \ - automake \ - bison \ - build-essential \ - curl \ - libtool \ - libxml2-dev \ - re2c - -WORKDIR /root -ARG VERSION=7.3.6 -RUN curl -o php-${VERSION}.tar.gz https://www.php.net/distributions/php-${VERSION}.tar.gz -RUN tar -zxf php-${VERSION}.tar.gz - -WORKDIR /root/php-${VERSION} -RUN ./configure -RUN make - -COPY --from=golang /proctor /proctor -ENTRYPOINT ["/proctor", "--runtime=php"] diff --git a/test/runtimes/images/Dockerfile_python3.7.3 b/test/runtimes/images/Dockerfile_python3.7.3 deleted file mode 100644 index 905cd22d7..000000000 --- a/test/runtimes/images/Dockerfile_python3.7.3 +++ /dev/null @@ -1,30 +0,0 @@ -# Compile the proctor binary. -FROM golang:1.12 AS golang -ADD ["proctor/", "/go/src/proctor/"] -RUN ["go", "build", "-o", "/proctor", "/go/src/proctor"] - -FROM ubuntu:bionic - -RUN apt-get update && apt-get install -y \ - curl \ - gcc \ - libbz2-dev \ - libffi-dev \ - liblzma-dev \ - libreadline-dev \ - libssl-dev \ - make \ - zlib1g-dev - -# Use flags -LJO to follow the html redirect and download .tar.gz. -WORKDIR /root -ARG VERSION=3.7.3 -RUN curl -LJO https://github.com/python/cpython/archive/v${VERSION}.tar.gz -RUN tar -zxf cpython-${VERSION}.tar.gz - -WORKDIR /root/cpython-${VERSION} -RUN ./configure --with-pydebug -RUN make -s -j2 - -COPY --from=golang /proctor /proctor -ENTRYPOINT ["/proctor", "--runtime=python"] diff --git a/tools/bazel.mk b/tools/bazel.mk new file mode 100644 index 000000000..45fbbecca --- /dev/null +++ b/tools/bazel.mk @@ -0,0 +1,106 @@ +#!/usr/bin/make -f + +# Copyright 2018 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# See base Makefile. +BRANCH_NAME := $(shell (git branch --show-current 2>/dev/null || \ + git rev-parse --abbrev-ref HEAD 2>/dev/null) | \ + xargs -n 1 basename 2>/dev/null) + +# Bazel container configuration (see below). +USER ?= gvisor +DOCKER_NAME ?= gvisor-bazel +DOCKER_RUN_OPTIONS ?= --privileged +BAZEL_CACHE := $(shell readlink -m ~/.cache/bazel/) +GCLOUD_CONFIG := $(shell readlink -m ~/.config/gcloud/) +DOCKER_SOCKET := /var/run/docker.sock + +# Non-configurable. +UID := $(shell id -u ${USER}) +GID := $(shell id -g ${USER}) +FULL_DOCKER_RUN_OPTIONS := $(DOCKER_RUN_OPTIONS) +FULL_DOCKER_RUN_OPTIONS += -v "$(BAZEL_CACHE):$(BAZEL_CACHE)" +FULL_DOCKER_RUN_OPTIONS += -v "$(GCLOUD_CONFIG):$(GCLOUD_CONFIG)" +FULL_DOCKER_RUN_OPTIONS += -v "$(DOCKER_SOCKET):$(DOCKER_SOCKET)" + +## +## Bazel helpers. +## +## This file supports targets that wrap bazel in a running Docker +## container to simplify development. Some options are available to +## control the behavior of this container: +## USER - The in-container user. +## DOCKER_RUN_OPTIONS - Options for the container (default: --privileged, required for tests). +## DOCKER_NAME - The container name (default: gvisor-bazel-HASH). +## BAZEL_CACHE - The bazel cache directory (default: detected). +## GCLOUD_CONFIG - The gcloud config directory (detect: detected). +## DOCKER_SOCKET - The Docker socket (default: detected). +## +bazel-server-start: load-default ## Starts the bazel server. + docker run -d --rm \ + --name $(DOCKER_NAME) \ + --user 0:0 \ + -v "$(CURDIR):$(CURDIR)" \ + --workdir "$(CURDIR)" \ + --tmpfs /tmp:rw,exec \ + --entrypoint "" \ + $(FULL_DOCKER_RUN_OPTIONS) \ + gvisor.dev/images/default \ + sh -c "groupadd --gid $(GID) --non-unique $(USER) && \ + useradd --uid $(UID) --non-unique --no-create-home --gid $(GID) -d $(HOME) $(USER) && \ + bazel version && \ + while :; do sleep 3600; done" + @while :; do if docker logs $(DOCKER_NAME) 2>/dev/null | grep "Build label:" >/dev/null; then break; fi; sleep 1; done +.PHONY: bazel-server-start + +bazel-shutdown: ## Shuts down a running bazel server. + @docker exec --user $(UID):$(GID) $(DOCKER_NAME) bazel shutdown; rc=$$?; docker kill $(DOCKER_NAME) || [[ $$rc -ne 0 ]] +.PHONY: bazel-shutdown + +bazel-alias: ## Emits an alias that can be used within the shell. + @echo "alias bazel='docker exec --user $(UID):$(GID) -i $(DOCKER_NAME) bazel'" +.PHONY: bazel-alias + +bazel-server: ## Ensures that the server exists. Used as an internal target. + @docker exec $(DOCKER_NAME) true || $(MAKE) bazel-server-start +.PHONY: bazel-server + +build_paths = docker exec --user $(UID):$(GID) -i $(DOCKER_NAME) sh -c 'bazel build $(OPTIONS) $(TARGETS) 2>&1 \ + | tee /dev/fd/2 \ + | grep -E "^ bazel-bin/" \ + | awk "{print $$1;}"' \ + | xargs -n 1 -I {} sh -c "$(1)" + +build: bazel-server + @$(call build_paths,echo {}) +.PHONY: build + +copy: bazel-server +ifeq (,$(DESTINATION)) + $(error Destination not provided.) +endif + @$(call build_paths,cp -a {} $(DESTINATION)) + +run: bazel-server + @$(call build_paths,{} $(ARGS)) +.PHONY: run + +sudo: bazel-server + @$(call build_paths,sudo -E {} $(ARGS)) +.PHONY: sudo + +test: bazel-server + @docker exec --user $(UID):$(GID) -i $(DOCKER_NAME) bazel test $(OPTIONS) $(TARGETS) +.PHONY: test diff --git a/tools/installers/BUILD b/tools/installers/BUILD index d78a265ca..caa7b1983 100644 --- a/tools/installers/BUILD +++ b/tools/installers/BUILD @@ -16,6 +16,14 @@ sh_binary( data = [":runsc"], ) +sh_binary( + name = "images", + srcs = ["images.sh"], + data = [ + "//images", + ], +) + sh_binary( name = "master", srcs = ["master.sh"], diff --git a/tools/installers/images.sh b/tools/installers/images.sh new file mode 100755 index 000000000..52e750f57 --- /dev/null +++ b/tools/installers/images.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# Copyright 2020 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -xeuo pipefail + +# Find the images directory. +for images in $(find . -type d -name images); do + if [[ -f "${images}"/Makefile ]]; then + make -C "${images}" load-all-images + fi +done diff --git a/tools/make_repository.sh b/tools/make_repository.sh index 27ffbc9f3..c91fd283c 100755 --- a/tools/make_repository.sh +++ b/tools/make_repository.sh @@ -17,14 +17,37 @@ # Parse arguments. We require more than two arguments, which are the private # keyring, the e-mail associated with the signer, and the list of packages. if [ "$#" -le 3 ]; then - echo "usage: $0 " + echo "usage: $0 " exit 1 fi declare -r private_key=$(readlink -e "$1"); shift declare -r signer="$1"; shift -declare -r component="$1"; shift declare -r root="$1"; shift +# Ensure that we have the correct packages installed. +function apt_install() { + while true; do + sudo apt-get update && + sudo apt-get install -y "$@" && + true + result="${?}" + case $result in + 0) + break + ;; + 100) + # 100 is the error code that apt-get returns. + ;; + *) + exit $result + ;; + esac + done +} +dpkg-sig --help >/dev/null || apt_install dpkg-sig +apt-ftparchive --help >/dev/null || apt_install apt-utils +xz --help >/dev/null || apt_install xz-utils + # Verbose from this point. set -xeo pipefail @@ -78,7 +101,7 @@ for dir in "${root}"/pool/*/binary-*; do name=$(basename "${dir}") arch=${name##binary-} arches+=("${arch}") - repo_packages="${tmpdir}"/"${component}"/"${name}" + repo_packages="${tmpdir}"/main/"${name}" mkdir -p "${repo_packages}" (cd "${root}" && apt-ftparchive --arch "${arch}" packages pool > "${repo_packages}"/Packages) (cd "${repo_packages}" && cat Packages | gzip > Packages.gz) @@ -91,7 +114,7 @@ APT { FTPArchive { Release { Architectures "${arches[@]}"; - Components "${component}"; + Components "main"; }; }; }; diff --git a/tools/vm/defs.bzl b/tools/vm/defs.bzl index 24bf0aabc..61feefcbc 100644 --- a/tools/vm/defs.bzl +++ b/tools/vm/defs.bzl @@ -183,7 +183,10 @@ def vm_test( """ targets = kwargs.pop("targets", []) if installers == None: - installers = ["//tools/installers:head"] + installers = [ + "//tools/installers:head", + "//tools/installers:images", + ] targets = installers + targets if default_installer(): targets = [default_installer()] + targets -- cgit v1.2.3 From d5776be3fbcc9e71c449b7b41786929734ce47e2 Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Fri, 24 Apr 2020 14:41:33 -0700 Subject: Improve and update packetimpact README.md PiperOrigin-RevId: 308328860 --- test/packetimpact/README.md | 304 +++++++++++++++++++++++++++++++++----------- 1 file changed, 227 insertions(+), 77 deletions(-) (limited to 'test') diff --git a/test/packetimpact/README.md b/test/packetimpact/README.md index ece4dedc6..a82ad996a 100644 --- a/test/packetimpact/README.md +++ b/test/packetimpact/README.md @@ -29,24 +29,24 @@ There are a few ways to write networking tests for gVisor currently: The right choice depends on the needs of the test. -Feature | Go unit test | syscall test | packetdrill | packetimpact -------------- | ------------ | ------------ | ----------- | ------------ -Multiplatform | no | **YES** | **YES** | **YES** -Concise | no | somewhat | somewhat | **VERY** -Control-flow | **YES** | **YES** | no | **YES** -Flexible | **VERY** | no | somewhat | **VERY** +Feature | Go unit test | syscall test | packetdrill | packetimpact +-------------- | ------------ | ------------ | ----------- | ------------ +Multi-platform | no | **YES** | **YES** | **YES** +Concise | no | somewhat | somewhat | **VERY** +Control-flow | **YES** | **YES** | no | **YES** +Flexible | **VERY** | no | somewhat | **VERY** ### Go unit tests If the test depends on the internals of gVisor and doesn't need to run on Linux or other platforms for comparison purposes, a Go unit test can be appropriate. They can observe internals of gVisor networking. The downside is that they are -**not concise** and **not multiplatform**. If you require insight on gVisor +**not concise** and **not multi-platform**. If you require insight on gVisor internals, this is the right choice. ### Syscall tests -Syscall tests are **multiplatform** but cannot examine the internals of gVisor +Syscall tests are **multi-platform** but cannot examine the internals of gVisor networking. They are **concise**. They can use **control-flow** structures like conditionals, for loops, and variables. However, they are limited to only what the POSIX interface provides so they are **not flexible**. For example, you @@ -57,7 +57,7 @@ protocols, wrong sequence numbers, etc. ### Packetdrill tests -Packetdrill tests are **multiplatform** and can run against both Linux and +Packetdrill tests are **multi-platform** and can run against both Linux and gVisor. They are **concise** and use a special packetdrill scripting language. They are **more flexible** than a syscall test in that they can send packets that a syscall test would have difficulty sending, like a packet with a @@ -73,7 +73,7 @@ other side supports window scaling, for example. Packetimpact tests are similar to Packetdrill tests except that they are written in Go instead of the packetdrill scripting language. That gives them all the **control-flow** abilities of Go (loops, functions, variables, etc). They are -**multiplatform** in the same way as packetdrill tests but even more +**multi-platform** in the same way as packetdrill tests but even more **flexible** because Go is more expressive than the scripting language of packetdrill. However, Go is **not as concise** as the packetdrill language. Many design decisions below are made to mitigate that. @@ -81,21 +81,27 @@ design decisions below are made to mitigate that. ## How it works ``` - +--------------+ +--------------+ - | | TEST NET | | - | | <===========> | Device | - | Test | | Under | - | Bench | | Test | - | | <===========> | (DUT) | - | | CONTROL NET | | - +--------------+ +--------------+ + Testbench Device-Under-Test (DUT) + +-------------------+ +------------------------+ + | | TEST NET | | + | rawsockets.go <-->| <===========> | <---+ | + | ^ | | | | + | | | | | | + | v | | | | + | unittest | | | | + | ^ | | | | + | | | | | | + | v | | v | + | dut.go <========gRPC========> posix server | + | | CONTROL NET | | + +-------------------+ +------------------------+ ``` -Two docker containers are created by a script, one for the test bench and the -other for the device under test (DUT). The script connects the two containers -with a control network and test network. It also does some other tasks like -waiting until the DUT is ready before starting the test and disabling Linux -networking that would interfere with the test bench. +Two docker containers are created by a "runner" script, one for the testbench +and the other for the device under test (DUT). The script connects the two +containers with a control network and test network. It also does some other +tasks like waiting until the DUT is ready before starting the test and disabling +Linux networking that would interfere with the test bench. ### DUT @@ -220,7 +226,8 @@ func (i *Injector) Send(b []byte) {...} container and in practice, the container doesn't recognize binaries built on the host if they use cgo. * Both gVisor and gopacket have the ability to read and write pcap files - without cgo but that is insufficient here. + without cgo but that is insufficient here because we can't just replay pcap + files, we need a more dynamic solution. * The sniffer and injector can't share a socket because they need to be bound differently. * Sniffing could have been done asynchronously with channels, obviating the @@ -270,11 +277,10 @@ but with a pointer for each field that may be `nil`. * Many functions, one per field, like: `filterByFlag(myBytes, SYN)`, `filterByLength(myBytes, 20)`, `filterByNextProto(myBytes, 0x8000)`, etc. - * Using pointers allows us to combine `Layer`s with a one-line call to - `mergo.Merge(...)`. So the default `Layers` can be overridden by a - `Layers` with just the TCP conection's src/dst which can be overridden - by one with just a test specific TCP window size. Each override is - specified as just one call to `mergo.Merge`. + * Using pointers allows us to combine `Layer`s with reflection. So the + default `Layers` can be overridden by a `Layers` with just the TCP + conection's src/dst which can be overridden by one with just a test + specific TCP window size. * It's a proven way to separate the details of a packet from the byte format as shown by scapy's success. * Use packetgo. It's more general than parsing packets with gVisor. However: @@ -334,6 +340,14 @@ type Layer interface { } ``` +The `next` and `prev` make up a link listed so that each layer can get at the +information in the layer around it. This is necessary for some protocols, like +TCP that needs the layer before and payload after to compute the checksum. Any +sequence of `Layer` structs is valid so long as the parser and `toBytes` +functions can map from type to protool number and vice-versa. When the mapping +fails, an error is emitted explaining what functionality is missing. The +solution is either to fix the ordering or implement the missing protocol. + For each `Layer` there is also a parsing function. For example, this one is for Ethernet: @@ -392,81 +406,217 @@ for { ##### Alternatives considered * Don't use previous and next pointers. - * Each layer may need to be able to interrogate the layers aroung it, like + * Each layer may need to be able to interrogate the layers around it, like for computing the next protocol number or total length. So *some* mechanism is needed for a `Layer` to see neighboring layers. * We could pass the entire array `Layers` to the `toBytes()` function. Passing an array to a method that includes in the array the function receiver itself seems wrong. -#### Connections +#### `layerState` -Using `Layers` above, we can create connection structures to maintain state -about connections. For example, here is the `TCPIPv4` struct: +`Layers` represents the different headers of a packet but a connection includes +more state. For example, a TCP connection needs to keep track of the next +expected sequence number and also the next sequence number to send. This is +stored in a `layerState` struct. This is the `layerState` for TCP: +```go +// tcpState maintains state about a TCP connection. +type tcpState struct { + out, in TCP + localSeqNum, remoteSeqNum *seqnum.Value + synAck *TCP + portPickerFD int + finSent bool +} ``` -type TCPIPv4 struct { - outgoing Layers - incoming Layers - localSeqNum uint32 - remoteSeqNum uint32 - sniffer Sniffer - injector Injector - t *testing.T + +The next sequence numbers for each side of the connection are stored. `out` and +`in` have defaults for the TCP header, such as the expected source and +destination ports for outgoing packets and incoming packets. + +##### `layerState` interface + +```go +// layerState stores the state of a layer of a connection. +type layerState interface { + // outgoing returns an outgoing layer to be sent in a frame. + outgoing() Layer + + // incoming creates an expected Layer for comparing against a received Layer. + // Because the expectation can depend on values in the received Layer, it is + // an input to incoming. For example, the ACK number needs to be checked in a + // TCP packet but only if the ACK flag is set in the received packet. + incoming(received Layer) Layer + + // sent updates the layerState based on the Layer that was sent. The input is + // a Layer with all prev and next pointers populated so that the entire frame + // as it was sent is available. + sent(sent Layer) error + + // received updates the layerState based on a Layer that is receieved. The + // input is a Layer with all prev and next pointers populated so that the + // entire frame as it was receieved is available. + received(received Layer) error + + // close frees associated resources held by the LayerState. + close() error } ``` -`TCPIPv4` contains an `outgoing Layers` which holds the defaults for the -connection, such as the source and destination MACs, IPs, and ports. When -`outgoing.toBytes()` is called a valid packet for this TCPIPv4 flow is built. +`outgoing` generates the default Layer for an outgoing packet. For TCP, this +would be a `TCP` with the source and destination ports populated. Because they +are static, they are stored inside the `out` member of `tcpState`. However, the +sequence numbers change frequently so the outgoing sequence number is stored in +the `localSeqNum` and put into the output of outgoing for each call. + +`incoming` does the same functions for packets that arrive but instead of +generating a packet to send, it generates an expect packet for filtering packets +that arrive. For example, if a `TCP` header arrives with the wrong ports, it can +be ignored as belonging to a different connection. `incoming` needs the received +header itself as an input because the filter may depend on the input. For +example, the expected sequence number depends on the flags in the TCP header. + +`sent` and `received` are run for each header that is actually sent or received +and used to update the internal state. `incoming` and `outgoing` should *not* be +used for these purpose. For example, `incoming` is called on every packet that +arrives but only packets that match ought to actually update the state. +`outgoing` is called to created outgoing packets and those packets are always +sent, so unlike `incoming`/`received`, there is one `outgoing` call for each +`sent` call. + +`close` cleans up after the layerState. For example, TCP and UDP need to keep a +port reserved and then release it. + +#### Connections + +Using `layerState` above, we can create connections. -It also contains `incoming Layers` which holds filter for incoming packets that -belong to this flow. `incoming.match(Layers)` is used on received bytes to check -if they are part of the flow. +```go +// Connection holds a collection of layer states for maintaining a connection +// along with sockets for sniffer and injecting packets. +type Connection struct { + layerStates []layerState + injector Injector + sniffer Sniffer + t *testing.T +} +``` -The `sniffer` and `injector` are for receiving and sending raw packet bytes. The -`localSeqNum` and `remoteSeqNum` are updated by `Send` and `Recv` so that -outgoing packets will have, by default, the correct sequence number and ack -number. +The connection stores an array of `layerState` in the order that the headers +should be present in the frame to send. For example, Ether then IPv4 then TCP. +The injector and sniffer are for writing and reading frames. A `*testing.T` is +stored so that internal errors can be reported directly without code in the unit +test. -TCPIPv4 provides some functions: +The `Connection` has some useful functions: +```go +// Close frees associated resources held by the Connection. +func (conn *Connection) Close() {...} +// CreateFrame builds a frame for the connection with layer overriding defaults +// of the innermost layer and additionalLayers added after it. +func (conn *Connection) CreateFrame(layer Layer, additionalLayers ...Layer) Layers {...} +// SendFrame sends a frame on the wire and updates the state of all layers. +func (conn *Connection) SendFrame(frame Layers) {...} +// Send a packet with reasonable defaults. Potentially override the final layer +// in the connection with the provided layer and add additionLayers. +func (conn *Connection) Send(layer Layer, additionalLayers ...Layer) {...} +// Expect a frame with the final layerStates layer matching the provided Layer +// within the timeout specified. If it doesn't arrive in time, it returns nil. +func (conn *Connection) Expect(layer Layer, timeout time.Duration) (Layer, error) {...} +// ExpectFrame expects a frame that matches the provided Layers within the +// timeout specified. If it doesn't arrive in time, it returns nil. +func (conn *Connection) ExpectFrame(layers Layers, timeout time.Duration) (Layers, error) {...} +// Drain drains the sniffer's receive buffer by receiving packets until there's +// nothing else to receive. +func (conn *Connection) Drain() {...} ``` -func (conn *TCPIPv4) Send(tcp TCP) {...} -func (conn *TCPIPv4) Recv(timeout time.Duration) *TCP {...} + +`CreateFrame` uses the `[]layerState` to create a frame to send. The first +argument is for overriding defaults in the last header of the frame, because +this is the most common need. For a TCPIPv4 connection, this would be the TCP +header. Optional additionalLayers can be specified to add to the frame being +created, such as a `Payload` for `TCP`. + +`SendFrame` sends the frame to the DUT. It is combined with `CreateFrame` to +make `Send`. For unittests with basic sending needs, `Send` can be used. If more +control is needed over the frame, it can be made with `CreateFrame`, modified in +the unit test, and then sent with `SendFrame`. + +On the receiving side, there is `Expect` and `ExpectFrame`. Like with the +sending side, there are two forms of each function, one for just the last header +and one for the whole frame. The expect functions use the `[]layerState` to +create a template for the expected incoming frame. That frame is then overridden +by the values in the first argument. Finally, a loop starts sniffing packets on +the wire for frames. If a matching frame is found before the timeout, it is +returned without error. If not, nil is returned and the error contains text of +all the received frames that didn't match. Exactly one of the outputs will be +non-nil, even if no frames are received at all. + +`Drain` sniffs and discards all the frames that have yet to be received. A +common way to write a test is: + +```go +conn.Drain() // Discard all outstanding frames. +conn.Send(...) // Send a frame with overrides. +// Now expect a frame with a certain header and fail if it doesn't arrive. +if _, err := conn.Expect(...); err != nil { t.Fatal(...) } ``` -`Send(tcp TCP)` uses [mergo](https://github.com/imdario/mergo) to merge the -provided `TCP` (a `Layer`) into `outgoing`. This way the user can specify -concisely just which fields of `outgoing` to modify. The packet is sent using -the `injector`. +Or for a test where we want to check that no frame arrives: -`Recv(timeout time.Duration)` reads packets from the sniffer until either the -timeout has elapsed or a packet that matches `incoming` arrives. +```go +if gotOne, _ := conn.Expect(...); gotOne != nil { t.Fatal(...) } +``` + +#### Specializing `Connection` -Using those, we can perform a TCP 3-way handshake without too much code: +Because there are some common combinations of `layerState` into `Connection`, +they are defined: ```go -func (conn *TCPIPv4) Handshake() { - syn := uint8(header.TCPFlagSyn) - synack := uint8(header.TCPFlagSyn) - ack := uint8(header.TCPFlagAck) - conn.Send(TCP{Flags: &syn}) // Send a packet with all defaults but set TCP-SYN. - - // Wait for the SYN-ACK response. - for { - newTCP := conn.Recv(time.Second) // This already filters by MAC, IP, and ports. - if TCP{Flags: &synack}.match(newTCP) { - break // Only if it's a SYN-ACK proceed. - } - } +// TCPIPv4 maintains the state for all the layers in a TCP/IPv4 connection. +type TCPIPv4 Connection +// UDPIPv4 maintains the state for all the layers in a UDP/IPv4 connection. +type UDPIPv4 Connection +``` + +Each has a `NewXxx` function to create a new connection with reasonable +defaults. They also have functions that call the underlying `Connection` +functions but with specialization and tighter type-checking. For example: - conn.Send(TCP{Flags: &ack}) // Send an ACK. The seq and ack numbers are set correctly. +```go +func (conn *TCPIPv4) Send(tcp TCP, additionalLayers ...Layer) { + (*Connection)(conn).Send(&tcp, additionalLayers...) +} +func (conn *TCPIPv4) Drain() { + conn.sniffer.Drain() +} +``` + +They may also have some accessors to get or set the internal state of the +connection: + +```go +func (conn *TCPIPv4) state() *tcpState { + state, ok := conn.layerStates[len(conn.layerStates)-1].(*tcpState) + if !ok { + conn.t.Fatalf("expected final state of %v to be tcpState", conn.layerStates) + } + return state +} +func (conn *TCPIPv4) RemoteSeqNum() *seqnum.Value { + return conn.state().remoteSeqNum +} +func (conn *TCPIPv4) LocalSeqNum() *seqnum.Value { + return conn.state().localSeqNum } ``` -The handshake code is part of the testbench utilities so tests can share this -common sequence, making tests even more concise. +Unittests will in practice use these functions and not the functions on +`Connection`. For example, `NewTCPIPv4()` and then call `Send` on that rather +than cast is to a `Connection` and call `Send` on that cast result. ##### Alternatives considered -- cgit v1.2.3 From 3d860530a904004aea5bc95e6331b3b11cec1877 Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Fri, 24 Apr 2020 15:02:33 -0700 Subject: Better error message from ExpectFrame Display the errors as diffs between the expected and wanted frame. PiperOrigin-RevId: 308333271 --- test/packetimpact/testbench/connections.go | 94 +++++---- test/packetimpact/testbench/layers.go | 245 ++++++++++++++++++++++ test/packetimpact/testbench/layers_test.go | 80 +++++++ test/packetimpact/tests/fin_wait2_timeout_test.go | 4 +- 4 files changed, 376 insertions(+), 47 deletions(-) (limited to 'test') diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index 952a717e0..42a90a859 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -21,7 +21,6 @@ import ( "fmt" "math/rand" "net" - "strings" "testing" "time" @@ -66,14 +65,16 @@ func pickPort() (int, uint16, error) { // layerState stores the state of a layer of a connection. type layerState interface { - // outgoing returns an outgoing layer to be sent in a frame. + // outgoing returns an outgoing layer to be sent in a frame. It should not + // update layerState, that is done in layerState.sent. outgoing() Layer // incoming creates an expected Layer for comparing against a received Layer. // Because the expectation can depend on values in the received Layer, it is // an input to incoming. For example, the ACK number needs to be checked in a - // TCP packet but only if the ACK flag is set in the received packet. The - // calles takes ownership of the returned Layer. + // TCP packet but only if the ACK flag is set in the received packet. It + // should not update layerState, that is done in layerState.received. The + // caller takes ownership of the returned Layer. incoming(received Layer) Layer // sent updates the layerState based on the Layer that was sent. The input is @@ -363,44 +364,33 @@ type Connection struct { t *testing.T } -// match tries to match each Layer in received against the incoming filter. If -// received is longer than layerStates then that may still count as a match. The -// reverse is never a match. override overrides the default matchers for each -// Layer. -func (conn *Connection) match(override, received Layers) bool { - var layersToMatch int - if len(override) < len(conn.layerStates) { - layersToMatch = len(conn.layerStates) - } else { - layersToMatch = len(override) - } - if len(received) < layersToMatch { - return false - } - for i := 0; i < layersToMatch; i++ { - var toMatch Layer - if i < len(conn.layerStates) { - s := conn.layerStates[i] - toMatch = s.incoming(received[i]) - if toMatch == nil { - return false - } - if i < len(override) { - if err := toMatch.merge(override[i]); err != nil { - conn.t.Fatalf("failed to merge: %s", err) - } - } - } else { - toMatch = override[i] - if toMatch == nil { - conn.t.Fatalf("expect the overriding layers to be non-nil") - } - } - if !toMatch.match(received[i]) { - return false +// Returns the default incoming frame against which to match. If received is +// longer than layerStates then that may still count as a match. The reverse is +// never a match and nil is returned. +func (conn *Connection) incoming(received Layers) Layers { + if len(received) < len(conn.layerStates) { + return nil + } + in := Layers{} + for i, s := range conn.layerStates { + toMatch := s.incoming(received[i]) + if toMatch == nil { + return nil } + in = append(in, toMatch) + } + return in +} + +func (conn *Connection) match(override, received Layers) bool { + toMatch := conn.incoming(received) + if toMatch == nil { + return false // Not enough layers in gotLayers for matching. } - return true + if err := toMatch.merge(override); err != nil { + return false // Failing to merge is not matching. + } + return toMatch.match(received) } // Close frees associated resources held by the Connection. @@ -470,6 +460,16 @@ func (conn *Connection) recvFrame(timeout time.Duration) Layers { return parse(parseEther, b) } +// layersError stores the Layers that we got and the Layers that we wanted to +// match. +type layersError struct { + got, want Layers +} + +func (e *layersError) Error() string { + return e.got.diff(e.want) +} + // Expect a frame with the final layerStates layer matching the provided Layer // within the timeout specified. If it doesn't arrive in time, it returns nil. func (conn *Connection) Expect(layer Layer, timeout time.Duration) (Layer, error) { @@ -485,21 +485,25 @@ func (conn *Connection) Expect(layer Layer, timeout time.Duration) (Layer, error return gotFrame[len(conn.layerStates)-1], nil } conn.t.Fatal("the received frame should be at least as long as the expected layers") - return nil, fmt.Errorf("the received frame should be at least as long as the expected layers") + panic("unreachable") } // ExpectFrame expects a frame that matches the provided Layers within the -// timeout specified. If it doesn't arrive in time, it returns nil. +// timeout specified. If one arrives in time, the Layers is returned without an +// error. If it doesn't arrive in time, it returns nil and error is non-nil. func (conn *Connection) ExpectFrame(layers Layers, timeout time.Duration) (Layers, error) { deadline := time.Now().Add(timeout) - var allLayers []string + var errs error for { var gotLayers Layers if timeout = time.Until(deadline); timeout > 0 { gotLayers = conn.recvFrame(timeout) } if gotLayers == nil { - return nil, fmt.Errorf("got %d packets:\n%s", len(allLayers), strings.Join(allLayers, "\n")) + if errs == nil { + return nil, fmt.Errorf("got no frames matching %v during %s", layers, timeout) + } + return nil, fmt.Errorf("got no frames matching %v during %s: got %w", layers, timeout, errs) } if conn.match(layers, gotLayers) { for i, s := range conn.layerStates { @@ -509,7 +513,7 @@ func (conn *Connection) ExpectFrame(layers Layers, timeout time.Duration) (Layer } return gotLayers, nil } - allLayers = append(allLayers, fmt.Sprintf("%s", gotLayers)) + errs = multierr.Combine(errs, &layersError{got: gotLayers, want: conn.incoming(gotLayers)}) } } diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go index 01e99567d..2cbbbb318 100644 --- a/test/packetimpact/testbench/layers.go +++ b/test/packetimpact/testbench/layers.go @@ -22,6 +22,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + "go.uber.org/multierr" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" @@ -720,3 +721,247 @@ func (ls *Layers) match(other Layers) bool { } return true } + +// layerDiff stores the diffs for each field along with the label for the Layer. +// If rows is nil, that means that there was no diff. +type layerDiff struct { + label string + rows []layerDiffRow +} + +// layerDiffRow stores the fields and corresponding values for two got and want +// layers. If the value was nil then the string stored is the empty string. +type layerDiffRow struct { + field, got, want string +} + +// diffLayer extracts all differing fields between two layers. +func diffLayer(got, want Layer) []layerDiffRow { + vGot := reflect.ValueOf(got).Elem() + vWant := reflect.ValueOf(want).Elem() + if vGot.Type() != vWant.Type() { + return nil + } + t := vGot.Type() + var result []layerDiffRow + for i := 0; i < t.NumField(); i++ { + t := t.Field(i) + if t.Anonymous { + // Ignore the LayerBase in the Layer struct. + continue + } + vGot := vGot.Field(i) + vWant := vWant.Field(i) + gotString := "" + if !vGot.IsNil() { + gotString = fmt.Sprint(reflect.Indirect(vGot)) + } + wantString := "" + if !vWant.IsNil() { + wantString = fmt.Sprint(reflect.Indirect(vWant)) + } + result = append(result, layerDiffRow{t.Name, gotString, wantString}) + } + return result +} + +// layerType returns a concise string describing the type of the Layer, like +// "TCP", or "IPv6". +func layerType(l Layer) string { + return reflect.TypeOf(l).Elem().Name() +} + +// diff compares Layers and returns a representation of the difference. Each +// Layer in the Layers is pairwise compared. If an element in either is nil, it +// is considered a match with the other Layer. If two Layers have differing +// types, they don't match regardless of the contents. If two Layers have the +// same type then the fields in the Layer are pairwise compared. Fields that are +// nil always match. Two non-nil fields only match if they point to equal +// values. diff returns an empty string if and only if *ls and other match. +func (ls *Layers) diff(other Layers) string { + var allDiffs []layerDiff + // Check the cases where one list is longer than the other, where one or both + // elements are nil, where the sides have different types, and where the sides + // have the same type. + for i := 0; i < len(*ls) || i < len(other); i++ { + if i >= len(*ls) { + // Matching ls against other where other is longer than ls. missing + // matches everything so we just include a label without any rows. Having + // no rows is a sign that there was no diff. + allDiffs = append(allDiffs, layerDiff{ + label: "missing matches " + layerType(other[i]), + }) + continue + } + + if i >= len(other) { + // Matching ls against other where ls is longer than other. missing + // matches everything so we just include a label without any rows. Having + // no rows is a sign that there was no diff. + allDiffs = append(allDiffs, layerDiff{ + label: layerType((*ls)[i]) + " matches missing", + }) + continue + } + + if (*ls)[i] == nil && other[i] == nil { + // Matching ls against other where both elements are nil. nil matches + // everything so we just include a label without any rows. Having no rows + // is a sign that there was no diff. + allDiffs = append(allDiffs, layerDiff{ + label: "nil matches nil", + }) + continue + } + + if (*ls)[i] == nil { + // Matching ls against other where the element in ls is nil. nil matches + // everything so we just include a label without any rows. Having no rows + // is a sign that there was no diff. + allDiffs = append(allDiffs, layerDiff{ + label: "nil matches " + layerType(other[i]), + }) + continue + } + + if other[i] == nil { + // Matching ls against other where the element in other is nil. nil + // matches everything so we just include a label without any rows. Having + // no rows is a sign that there was no diff. + allDiffs = append(allDiffs, layerDiff{ + label: layerType((*ls)[i]) + " matches nil", + }) + continue + } + + if reflect.TypeOf((*ls)[i]) == reflect.TypeOf(other[i]) { + // Matching ls against other where both elements have the same type. Match + // each field pairwise and only report a diff if there is a mismatch, + // which is only when both sides are non-nil and have differring values. + diff := diffLayer((*ls)[i], other[i]) + var layerDiffRows []layerDiffRow + for _, d := range diff { + if d.got == "" || d.want == "" || d.got == d.want { + continue + } + layerDiffRows = append(layerDiffRows, layerDiffRow{ + d.field, + d.got, + d.want, + }) + } + if len(layerDiffRows) > 0 { + allDiffs = append(allDiffs, layerDiff{ + label: layerType((*ls)[i]), + rows: layerDiffRows, + }) + } else { + allDiffs = append(allDiffs, layerDiff{ + label: layerType((*ls)[i]) + " matches " + layerType(other[i]), + // Having no rows is a sign that there was no diff. + }) + } + continue + } + // Neither side is nil and the types are different, so we'll display one + // side then the other. + allDiffs = append(allDiffs, layerDiff{ + label: layerType((*ls)[i]) + " doesn't match " + layerType(other[i]), + }) + diff := diffLayer((*ls)[i], (*ls)[i]) + layerDiffRows := []layerDiffRow{} + for _, d := range diff { + if len(d.got) == 0 { + continue + } + layerDiffRows = append(layerDiffRows, layerDiffRow{ + d.field, + d.got, + "", + }) + } + allDiffs = append(allDiffs, layerDiff{ + label: layerType((*ls)[i]), + rows: layerDiffRows, + }) + + layerDiffRows = []layerDiffRow{} + diff = diffLayer(other[i], other[i]) + for _, d := range diff { + if len(d.want) == 0 { + continue + } + layerDiffRows = append(layerDiffRows, layerDiffRow{ + d.field, + "", + d.want, + }) + } + allDiffs = append(allDiffs, layerDiff{ + label: layerType(other[i]), + rows: layerDiffRows, + }) + } + + output := "" + // These are for output formatting. + maxLabelLen, maxFieldLen, maxGotLen, maxWantLen := 0, 0, 0, 0 + foundOne := false + for _, l := range allDiffs { + if len(l.label) > maxLabelLen && len(l.rows) > 0 { + maxLabelLen = len(l.label) + } + if l.rows != nil { + foundOne = true + } + for _, r := range l.rows { + if len(r.field) > maxFieldLen { + maxFieldLen = len(r.field) + } + if l := len(fmt.Sprint(r.got)); l > maxGotLen { + maxGotLen = l + } + if l := len(fmt.Sprint(r.want)); l > maxWantLen { + maxWantLen = l + } + } + } + if !foundOne { + return "" + } + for _, l := range allDiffs { + if len(l.rows) == 0 { + output += "(" + l.label + ")\n" + continue + } + for i, r := range l.rows { + var label string + if i == 0 { + label = l.label + ":" + } + output += fmt.Sprintf( + "%*s %*s %*v %*v\n", + maxLabelLen+1, label, + maxFieldLen+1, r.field+":", + maxGotLen, r.got, + maxWantLen, r.want, + ) + } + } + return output +} + +// merge merges the other Layers into ls. If the other Layers is longer, those +// additional Layer structs are added to ls. The errors from merging are +// collected and returned. +func (ls *Layers) merge(other Layers) error { + var errs error + for i, o := range other { + if i < len(*ls) { + errs = multierr.Combine(errs, (*ls)[i].merge(o)) + } else { + *ls = append(*ls, o) + } + } + return errs +} diff --git a/test/packetimpact/testbench/layers_test.go b/test/packetimpact/testbench/layers_test.go index f07ec5eb2..96f72de5b 100644 --- a/test/packetimpact/testbench/layers_test.go +++ b/test/packetimpact/testbench/layers_test.go @@ -313,3 +313,83 @@ func TestConnectionMatch(t *testing.T) { }) } } + +func TestLayersDiff(t *testing.T) { + for _, tt := range []struct { + x, y Layers + want string + }{ + { + Layers{&Ether{Type: NetworkProtocolNumber(12)}, &TCP{DataOffset: Uint8(5), SeqNum: Uint32(5)}}, + Layers{&Ether{Type: NetworkProtocolNumber(13)}, &TCP{DataOffset: Uint8(7), SeqNum: Uint32(6)}}, + "Ether: Type: 12 13\n" + + " TCP: SeqNum: 5 6\n" + + " DataOffset: 5 7\n", + }, + { + Layers{&Ether{Type: NetworkProtocolNumber(12)}, &UDP{SrcPort: Uint16(123)}}, + Layers{&Ether{Type: NetworkProtocolNumber(13)}, &TCP{DataOffset: Uint8(7), SeqNum: Uint32(6)}}, + "Ether: Type: 12 13\n" + + "(UDP doesn't match TCP)\n" + + " UDP: SrcPort: 123 \n" + + " TCP: SeqNum: 6\n" + + " DataOffset: 7\n", + }, + { + Layers{&UDP{SrcPort: Uint16(123)}}, + Layers{&Ether{Type: NetworkProtocolNumber(13)}, &TCP{DataOffset: Uint8(7), SeqNum: Uint32(6)}}, + "(UDP doesn't match Ether)\n" + + " UDP: SrcPort: 123 \n" + + "Ether: Type: 13\n" + + "(missing matches TCP)\n", + }, + { + Layers{nil, &UDP{SrcPort: Uint16(123)}}, + Layers{&Ether{Type: NetworkProtocolNumber(13)}, &TCP{DataOffset: Uint8(7), SeqNum: Uint32(6)}}, + "(nil matches Ether)\n" + + "(UDP doesn't match TCP)\n" + + "UDP: SrcPort: 123 \n" + + "TCP: SeqNum: 6\n" + + " DataOffset: 7\n", + }, + { + Layers{&Ether{Type: NetworkProtocolNumber(13)}, &IPv4{IHL: Uint8(4)}, &TCP{DataOffset: Uint8(7), SeqNum: Uint32(6)}}, + Layers{&Ether{Type: NetworkProtocolNumber(13)}, &IPv4{IHL: Uint8(6)}, &TCP{DataOffset: Uint8(7), SeqNum: Uint32(6)}}, + "(Ether matches Ether)\n" + + "IPv4: IHL: 4 6\n" + + "(TCP matches TCP)\n", + }, + { + Layers{&Payload{Bytes: []byte("foo")}}, + Layers{&Payload{Bytes: []byte("bar")}}, + "Payload: Bytes: [102 111 111] [98 97 114]\n", + }, + { + Layers{&Payload{Bytes: []byte("")}}, + Layers{&Payload{}}, + "", + }, + { + Layers{&Payload{Bytes: []byte("")}}, + Layers{&Payload{Bytes: []byte("")}}, + "", + }, + { + Layers{&UDP{}}, + Layers{&TCP{}}, + "(UDP doesn't match TCP)\n" + + "(UDP)\n" + + "(TCP)\n", + }, + } { + if got := tt.x.diff(tt.y); got != tt.want { + t.Errorf("%s.diff(%s) = %q, want %q", tt.x, tt.y, got, tt.want) + } + if tt.x.match(tt.y) != (tt.x.diff(tt.y) == "") { + t.Errorf("match and diff of %s and %s disagree", tt.x, tt.y) + } + if tt.y.match(tt.x) != (tt.y.diff(tt.x) == "") { + t.Errorf("match and diff of %s and %s disagree", tt.y, tt.x) + } + } +} diff --git a/test/packetimpact/tests/fin_wait2_timeout_test.go b/test/packetimpact/tests/fin_wait2_timeout_test.go index b98594f94..99dc77f9a 100644 --- a/test/packetimpact/tests/fin_wait2_timeout_test.go +++ b/test/packetimpact/tests/fin_wait2_timeout_test.go @@ -61,8 +61,8 @@ func TestFinWait2Timeout(t *testing.T) { t.Fatalf("expected a RST packet within a second but got none: %s", err) } } else { - if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, 10*time.Second); err == nil { - t.Fatalf("expected no RST packets within ten seconds but got one: %s", err) + if got, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, 10*time.Second); got != nil || err == nil { + t.Fatalf("expected no RST packets within ten seconds but got one: %s", got) } } }) -- cgit v1.2.3 From dfff265fe422499af3bbe7d58e8db35ba32304f5 Mon Sep 17 00:00:00 2001 From: Eyal Soha Date: Fri, 24 Apr 2020 15:55:11 -0700 Subject: Add ICMP6 param problem test Tested: When run on Linux, a correct ICMPv6 response is received. On netstack, no ICMPv6 response is received. PiperOrigin-RevId: 308343113 --- pkg/tcpip/header/ipv6.go | 10 +- test/packetimpact/testbench/connections.go | 176 ++++++++++++++--- test/packetimpact/testbench/layers.go | 212 +++++++++++++++++++-- test/packetimpact/tests/BUILD | 13 ++ .../tests/icmpv6_param_problem_test.go | 73 +++++++ test/packetimpact/tests/test_runner.sh | 41 +++- 6 files changed, 484 insertions(+), 41 deletions(-) create mode 100644 test/packetimpact/tests/icmpv6_param_problem_test.go (limited to 'test') diff --git a/pkg/tcpip/header/ipv6.go b/pkg/tcpip/header/ipv6.go index 76e88e9b3..ba80b64a8 100644 --- a/pkg/tcpip/header/ipv6.go +++ b/pkg/tcpip/header/ipv6.go @@ -27,7 +27,9 @@ const ( // IPv6PayloadLenOffset is the offset of the PayloadLength field in // IPv6 header. IPv6PayloadLenOffset = 4 - nextHdr = 6 + // IPv6NextHeaderOffset is the offset of the NextHeader field in + // IPv6 header. + IPv6NextHeaderOffset = 6 hopLimit = 7 v6SrcAddr = 8 v6DstAddr = v6SrcAddr + IPv6AddressSize @@ -163,7 +165,7 @@ func (b IPv6) HopLimit() uint8 { // NextHeader returns the value of the "next header" field of the ipv6 header. func (b IPv6) NextHeader() uint8 { - return b[nextHdr] + return b[IPv6NextHeaderOffset] } // TransportProtocol implements Network.TransportProtocol. @@ -223,7 +225,7 @@ func (b IPv6) SetDestinationAddress(addr tcpip.Address) { // SetNextHeader sets the value of the "next header" field of the ipv6 header. func (b IPv6) SetNextHeader(v uint8) { - b[nextHdr] = v + b[IPv6NextHeaderOffset] = v } // SetChecksum implements Network.SetChecksum. Given that IPv6 doesn't have a @@ -235,7 +237,7 @@ func (IPv6) SetChecksum(uint16) { func (b IPv6) Encode(i *IPv6Fields) { b.SetTOS(i.TrafficClass, i.FlowLabel) b.SetPayloadLength(i.PayloadLength) - b[nextHdr] = i.NextHeader + b[IPv6NextHeaderOffset] = i.NextHeader b[hopLimit] = i.HopLimit b.SetSourceAddress(i.SrcAddr) b.SetDestinationAddress(i.DstAddr) diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index 42a90a859..2280bd4ee 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -34,33 +34,60 @@ import ( var localIPv4 = flag.String("local_ipv4", "", "local IPv4 address for test packets") var remoteIPv4 = flag.String("remote_ipv4", "", "remote IPv4 address for test packets") +var localIPv6 = flag.String("local_ipv6", "", "local IPv6 address for test packets") +var remoteIPv6 = flag.String("remote_ipv6", "", "remote IPv6 address for test packets") var localMAC = flag.String("local_mac", "", "local mac address for test packets") var remoteMAC = flag.String("remote_mac", "", "remote mac address for test packets") -// pickPort makes a new socket and returns the socket FD and port. The caller -// must close the FD when done with the port if there is no error. -func pickPort() (int, uint16, error) { - fd, err := unix.Socket(unix.AF_INET, unix.SOCK_STREAM, 0) +// pickPort makes a new socket and returns the socket FD and port. The domain +// should be AF_INET or AF_INET6. The caller must close the FD when done with +// the port if there is no error. +func pickPort(domain, typ int) (fd int, port uint16, err error) { + fd, err = unix.Socket(domain, typ, 0) if err != nil { return -1, 0, err } - var sa unix.SockaddrInet4 - copy(sa.Addr[0:4], net.ParseIP(*localIPv4).To4()) - if err := unix.Bind(fd, &sa); err != nil { - unix.Close(fd) + defer func() { + if err != nil { + err = multierr.Append(err, unix.Close(fd)) + } + }() + var sa unix.Sockaddr + switch domain { + case unix.AF_INET: + var sa4 unix.SockaddrInet4 + copy(sa4.Addr[:], net.ParseIP(*localIPv4).To4()) + sa = &sa4 + case unix.AF_INET6: + var sa6 unix.SockaddrInet6 + copy(sa6.Addr[:], net.ParseIP(*localIPv6).To16()) + sa = &sa6 + default: + return -1, 0, fmt.Errorf("invalid domain %d, it should be one of unix.AF_INET or unix.AF_INET6", domain) + } + if err = unix.Bind(fd, sa); err != nil { return -1, 0, err } newSockAddr, err := unix.Getsockname(fd) if err != nil { - unix.Close(fd) return -1, 0, err } - newSockAddrInet4, ok := newSockAddr.(*unix.SockaddrInet4) - if !ok { - unix.Close(fd) - return -1, 0, fmt.Errorf("can't cast Getsockname result to SockaddrInet4") + switch domain { + case unix.AF_INET: + newSockAddrInet4, ok := newSockAddr.(*unix.SockaddrInet4) + if !ok { + return -1, 0, fmt.Errorf("can't cast Getsockname result %T to SockaddrInet4", newSockAddr) + } + return fd, uint16(newSockAddrInet4.Port), nil + case unix.AF_INET6: + newSockAddrInet6, ok := newSockAddr.(*unix.SockaddrInet6) + if !ok { + return -1, 0, fmt.Errorf("can't cast Getsockname result %T to SockaddrInet6", newSockAddr) + } + return fd, uint16(newSockAddrInet6.Port), nil + default: + return -1, 0, fmt.Errorf("invalid domain %d, it should be one of unix.AF_INET or unix.AF_INET6", domain) } - return fd, uint16(newSockAddrInet4.Port), nil } // layerState stores the state of a layer of a connection. @@ -123,7 +150,7 @@ func newEtherState(out, in Ether) (*etherState, error) { } func (s *etherState) outgoing() Layer { - return &s.out + return deepcopy.Copy(&s.out).(Layer) } // incoming implements layerState.incoming. @@ -168,7 +195,7 @@ func newIPv4State(out, in IPv4) (*ipv4State, error) { } func (s *ipv4State) outgoing() Layer { - return &s.out + return deepcopy.Copy(&s.out).(Layer) } // incoming implements layerState.incoming. @@ -188,6 +215,54 @@ func (*ipv4State) close() error { return nil } +// ipv6State maintains state about an IPv6 connection. +type ipv6State struct { + out, in IPv6 +} + +var _ layerState = (*ipv6State)(nil) + +// newIPv6State creates a new ipv6State. +func newIPv6State(out, in IPv6) (*ipv6State, error) { + lIP := tcpip.Address(net.ParseIP(*localIPv6).To16()) + rIP := tcpip.Address(net.ParseIP(*remoteIPv6).To16()) + s := ipv6State{ + out: IPv6{SrcAddr: &lIP, DstAddr: &rIP}, + in: IPv6{SrcAddr: &rIP, DstAddr: &lIP}, + } + if err := s.out.merge(&out); err != nil { + return nil, err + } + if err := s.in.merge(&in); err != nil { + return nil, err + } + return &s, nil +} + +// outgoing returns an outgoing layer to be sent in a frame. +func (s *ipv6State) outgoing() Layer { + return deepcopy.Copy(&s.out).(Layer) +} + +func (s *ipv6State) incoming(Layer) Layer { + return deepcopy.Copy(&s.in).(Layer) +} + +func (s *ipv6State) sent(Layer) error { + // Nothing to do. + return nil +} + +func (s *ipv6State) received(Layer) error { + // Nothing to do. + return nil +} + +// close cleans up any resources held. +func (s *ipv6State) close() error { + return nil +} + // tcpState maintains state about a TCP connection. type tcpState struct { out, in TCP @@ -206,8 +281,8 @@ func SeqNumValue(v seqnum.Value) *seqnum.Value { } // newTCPState creates a new TCPState. -func newTCPState(out, in TCP) (*tcpState, error) { - portPickerFD, localPort, err := pickPort() +func newTCPState(domain int, out, in TCP) (*tcpState, error) { + portPickerFD, localPort, err := pickPort(domain, unix.SOCK_STREAM) if err != nil { return nil, err } @@ -310,8 +385,8 @@ type udpState struct { var _ layerState = (*udpState)(nil) // newUDPState creates a new udpState. -func newUDPState(out, in UDP) (*udpState, error) { - portPickerFD, localPort, err := pickPort() +func newUDPState(domain int, out, in UDP) (*udpState, error) { + portPickerFD, localPort, err := pickPort(domain, unix.SOCK_DGRAM) if err != nil { return nil, err } @@ -330,7 +405,7 @@ func newUDPState(out, in UDP) (*udpState, error) { } func (s *udpState) outgoing() Layer { - return &s.out + return deepcopy.Copy(&s.out).(Layer) } // incoming implements layerState.incoming. @@ -422,7 +497,7 @@ func (conn *Connection) CreateFrame(layer Layer, additionalLayers ...Layer) Laye // SendFrame sends a frame on the wire and updates the state of all layers. func (conn *Connection) SendFrame(frame Layers) { - outBytes, err := frame.toBytes() + outBytes, err := frame.ToBytes() if err != nil { conn.t.Fatalf("can't build outgoing TCP packet: %s", err) } @@ -536,7 +611,7 @@ func NewTCPIPv4(t *testing.T, outgoingTCP, incomingTCP TCP) TCPIPv4 { if err != nil { t.Fatalf("can't make ipv4State: %s", err) } - tcpState, err := newTCPState(outgoingTCP, incomingTCP) + tcpState, err := newTCPState(unix.AF_INET, outgoingTCP, incomingTCP) if err != nil { t.Fatalf("can't make tcpState: %s", err) } @@ -633,6 +708,59 @@ func (conn *TCPIPv4) SynAck() *TCP { return conn.state().synAck } +// IPv6Conn maintains the state for all the layers in a IPv6 connection. +type IPv6Conn Connection + +// NewIPv6Conn creates a new IPv6Conn connection with reasonable defaults. +func NewIPv6Conn(t *testing.T, outgoingIPv6, incomingIPv6 IPv6) IPv6Conn { + etherState, err := newEtherState(Ether{}, Ether{}) + if err != nil { + t.Fatalf("can't make EtherState: %s", err) + } + ipv6State, err := newIPv6State(outgoingIPv6, incomingIPv6) + if err != nil { + t.Fatalf("can't make IPv6State: %s", err) + } + + injector, err := NewInjector(t) + if err != nil { + t.Fatalf("can't make injector: %s", err) + } + sniffer, err := NewSniffer(t) + if err != nil { + t.Fatalf("can't make sniffer: %s", err) + } + + return IPv6Conn{ + layerStates: []layerState{etherState, ipv6State}, + injector: injector, + sniffer: sniffer, + t: t, + } +} + +// SendFrame sends a frame on the wire and updates the state of all layers. +func (conn *IPv6Conn) SendFrame(frame Layers) { + (*Connection)(conn).SendFrame(frame) +} + +// CreateFrame builds a frame for the connection with ipv6 overriding the ipv6 +// layer defaults and additionalLayers added after it. +func (conn *IPv6Conn) CreateFrame(ipv6 IPv6, additionalLayers ...Layer) Layers { + return (*Connection)(conn).CreateFrame(&ipv6, additionalLayers...) +} + +// Close to clean up any resources held. +func (conn *IPv6Conn) Close() { + (*Connection)(conn).Close() +} + +// ExpectFrame expects a frame that matches the provided Layers within the +// timeout specified. If it doesn't arrive in time, it returns nil. +func (conn *IPv6Conn) ExpectFrame(frame Layers, timeout time.Duration) (Layers, error) { + return (*Connection)(conn).ExpectFrame(frame, timeout) +} + // Drain drains the sniffer's receive buffer by receiving packets until there's // nothing else to receive. func (conn *TCPIPv4) Drain() { @@ -652,7 +780,7 @@ func NewUDPIPv4(t *testing.T, outgoingUDP, incomingUDP UDP) UDPIPv4 { if err != nil { t.Fatalf("can't make ipv4State: %s", err) } - tcpState, err := newUDPState(outgoingUDP, incomingUDP) + tcpState, err := newUDPState(unix.AF_INET, outgoingUDP, incomingUDP) if err != nil { t.Fatalf("can't make udpState: %s", err) } diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go index 2cbbbb318..817f5c261 100644 --- a/test/packetimpact/testbench/layers.go +++ b/test/packetimpact/testbench/layers.go @@ -36,14 +36,14 @@ import ( type Layer interface { fmt.Stringer - // toBytes converts the Layer into bytes. In places where the Layer's field + // ToBytes converts the Layer into bytes. In places where the Layer's field // isn't nil, the value that is pointed to is used. When the field is nil, a // reasonable default for the Layer is used. For example, "64" for IPv4 TTL // and a calculated checksum for TCP or IP. Some layers require information // from the previous or next layers in order to compute a default, such as // TCP's checksum or Ethernet's type, so each Layer has a doubly-linked list // to the layer's neighbors. - toBytes() ([]byte, error) + ToBytes() ([]byte, error) // match checks if the current Layer matches the provided Layer. If either // Layer has a nil in a given field, that field is considered matching. @@ -174,7 +174,8 @@ func (l *Ether) String() string { return stringLayer(l) } -func (l *Ether) toBytes() ([]byte, error) { +// ToBytes implements Layer.ToBytes. +func (l *Ether) ToBytes() ([]byte, error) { b := make([]byte, header.EthernetMinimumSize) h := header.Ethernet(b) fields := &header.EthernetFields{} @@ -190,8 +191,9 @@ func (l *Ether) toBytes() ([]byte, error) { switch n := l.next().(type) { case *IPv4: fields.Type = header.IPv4ProtocolNumber + case *IPv6: + fields.Type = header.IPv6ProtocolNumber default: - // TODO(b/150301488): Support more protocols, like IPv6. return nil, fmt.Errorf("ethernet header's next layer is unrecognized: %#v", n) } } @@ -246,6 +248,8 @@ func parseEther(b []byte) (Layer, layerParser) { switch h.Type() { case header.IPv4ProtocolNumber: nextParser = parseIPv4 + case header.IPv6ProtocolNumber: + nextParser = parseIPv6 default: // Assume that the rest is a payload. nextParser = parsePayload @@ -286,7 +290,8 @@ func (l *IPv4) String() string { return stringLayer(l) } -func (l *IPv4) toBytes() ([]byte, error) { +// ToBytes implements Layer.ToBytes. +func (l *IPv4) ToBytes() ([]byte, error) { b := make([]byte, header.IPv4MinimumSize) h := header.IPv4(b) fields := &header.IPv4Fields{ @@ -421,6 +426,186 @@ func (l *IPv4) merge(other Layer) error { return mergeLayer(l, other) } +// IPv6 can construct and match an IPv6 encapsulation. +type IPv6 struct { + LayerBase + TrafficClass *uint8 + FlowLabel *uint32 + PayloadLength *uint16 + NextHeader *uint8 + HopLimit *uint8 + SrcAddr *tcpip.Address + DstAddr *tcpip.Address +} + +func (l *IPv6) String() string { + return stringLayer(l) +} + +// ToBytes implements Layer.ToBytes. +func (l *IPv6) ToBytes() ([]byte, error) { + b := make([]byte, header.IPv6MinimumSize) + h := header.IPv6(b) + fields := &header.IPv6Fields{ + HopLimit: 64, + } + if l.TrafficClass != nil { + fields.TrafficClass = *l.TrafficClass + } + if l.FlowLabel != nil { + fields.FlowLabel = *l.FlowLabel + } + if l.PayloadLength != nil { + fields.PayloadLength = *l.PayloadLength + } else { + for current := l.next(); current != nil; current = current.next() { + fields.PayloadLength += uint16(current.length()) + } + } + if l.NextHeader != nil { + fields.NextHeader = *l.NextHeader + } else { + switch n := l.next().(type) { + case *TCP: + fields.NextHeader = uint8(header.TCPProtocolNumber) + case *UDP: + fields.NextHeader = uint8(header.UDPProtocolNumber) + case *ICMPv6: + fields.NextHeader = uint8(header.ICMPv6ProtocolNumber) + default: + // TODO(b/150301488): Support more protocols as needed. + return nil, fmt.Errorf("ToBytes can't deduce the IPv6 header's next protocol: %#v", n) + } + } + if l.HopLimit != nil { + fields.HopLimit = *l.HopLimit + } + if l.SrcAddr != nil { + fields.SrcAddr = *l.SrcAddr + } + if l.DstAddr != nil { + fields.DstAddr = *l.DstAddr + } + h.Encode(fields) + return h, nil +} + +// parseIPv6 parses the bytes assuming that they start with an ipv6 header and +// continues parsing further encapsulations. +func parseIPv6(b []byte) (Layer, layerParser) { + h := header.IPv6(b) + tos, flowLabel := h.TOS() + ipv6 := IPv6{ + TrafficClass: &tos, + FlowLabel: &flowLabel, + PayloadLength: Uint16(h.PayloadLength()), + NextHeader: Uint8(h.NextHeader()), + HopLimit: Uint8(h.HopLimit()), + SrcAddr: Address(h.SourceAddress()), + DstAddr: Address(h.DestinationAddress()), + } + var nextParser layerParser + switch h.TransportProtocol() { + case header.TCPProtocolNumber: + nextParser = parseTCP + case header.UDPProtocolNumber: + nextParser = parseUDP + case header.ICMPv6ProtocolNumber: + nextParser = parseICMPv6 + default: + // Assume that the rest is a payload. + nextParser = parsePayload + } + return &ipv6, nextParser +} + +func (l *IPv6) match(other Layer) bool { + return equalLayer(l, other) +} + +func (l *IPv6) length() int { + return header.IPv6MinimumSize +} + +// merge overrides the values in l with the values from other but only in fields +// where the value is not nil. +func (l *IPv6) merge(other Layer) error { + return mergeLayer(l, other) +} + +// ICMPv6 can construct and match an ICMPv6 encapsulation. +type ICMPv6 struct { + LayerBase + Type *header.ICMPv6Type + Code *byte + Checksum *uint16 + NDPPayload []byte +} + +func (l *ICMPv6) String() string { + // TODO(eyalsoha): Do something smarter here when *l.Type is ParameterProblem? + // We could parse the contents of the Payload as if it were an IPv6 packet. + return stringLayer(l) +} + +// ToBytes implements Layer.ToBytes. +func (l *ICMPv6) ToBytes() ([]byte, error) { + b := make([]byte, header.ICMPv6HeaderSize+len(l.NDPPayload)) + h := header.ICMPv6(b) + if l.Type != nil { + h.SetType(*l.Type) + } + if l.Code != nil { + h.SetCode(*l.Code) + } + copy(h.NDPPayload(), l.NDPPayload) + if l.Checksum != nil { + h.SetChecksum(*l.Checksum) + } else { + ipv6 := l.prev().(*IPv6) + h.SetChecksum(header.ICMPv6Checksum(h, *ipv6.SrcAddr, *ipv6.DstAddr, buffer.VectorisedView{})) + } + return h, nil +} + +// ICMPv6Type is a helper routine that allocates a new ICMPv6Type value to store +// v and returns a pointer to it. +func ICMPv6Type(v header.ICMPv6Type) *header.ICMPv6Type { + return &v +} + +// Byte is a helper routine that allocates a new byte value to store +// v and returns a pointer to it. +func Byte(v byte) *byte { + return &v +} + +// parseICMPv6 parses the bytes assuming that they start with an ICMPv6 header. +func parseICMPv6(b []byte) (Layer, layerParser) { + h := header.ICMPv6(b) + icmpv6 := ICMPv6{ + Type: ICMPv6Type(h.Type()), + Code: Byte(h.Code()), + Checksum: Uint16(h.Checksum()), + NDPPayload: h.NDPPayload(), + } + return &icmpv6, nil +} + +func (l *ICMPv6) match(other Layer) bool { + return equalLayer(l, other) +} + +func (l *ICMPv6) length() int { + return header.ICMPv6HeaderSize + len(l.NDPPayload) +} + +// merge overrides the values in l with the values from other but only in fields +// where the value is not nil. +func (l *ICMPv6) merge(other Layer) error { + return mergeLayer(l, other) +} + // TCP can construct and match a TCP encapsulation. type TCP struct { LayerBase @@ -439,7 +624,8 @@ func (l *TCP) String() string { return stringLayer(l) } -func (l *TCP) toBytes() ([]byte, error) { +// ToBytes implements Layer.ToBytes. +func (l *TCP) ToBytes() ([]byte, error) { b := make([]byte, header.TCPMinimumSize) h := header.TCP(b) if l.SrcPort != nil { @@ -504,7 +690,7 @@ func layerChecksum(l Layer, protoNumber tcpip.TransportProtocolNumber) (uint16, } var payloadBytes buffer.VectorisedView for current := l.next(); current != nil; current = current.next() { - payload, err := current.toBytes() + payload, err := current.ToBytes() if err != nil { return 0, fmt.Errorf("can't get bytes for next header: %s", payload) } @@ -578,7 +764,8 @@ func (l *UDP) String() string { return stringLayer(l) } -func (l *UDP) toBytes() ([]byte, error) { +// ToBytes implements Layer.ToBytes. +func (l *UDP) ToBytes() ([]byte, error) { b := make([]byte, header.UDPMinimumSize) h := header.UDP(b) if l.SrcPort != nil { @@ -661,7 +848,8 @@ func parsePayload(b []byte) (Layer, layerParser) { return &payload, nil } -func (l *Payload) toBytes() ([]byte, error) { +// ToBytes implements Layer.ToBytes. +func (l *Payload) ToBytes() ([]byte, error) { return l.Bytes, nil } @@ -697,11 +885,13 @@ func (ls *Layers) linkLayers() { } } -func (ls *Layers) toBytes() ([]byte, error) { +// ToBytes converts the Layers into bytes. It creates a linked list of the Layer +// structs and then concatentates the output of ToBytes on each Layer. +func (ls *Layers) ToBytes() ([]byte, error) { ls.linkLayers() outBytes := []byte{} for _, l := range *ls { - layerBytes, err := l.toBytes() + layerBytes, err := l.ToBytes() if err != nil { return nil, err } diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 47c722ccd..42f87e3f3 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -96,6 +96,19 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "icmpv6_param_problem", + srcs = ["icmpv6_param_problem_test.go"], + # TODO(b/153485026): Fix netstack then remove the line below. + netstack = False, + deps = [ + "//pkg/tcpip", + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + sh_binary( name = "test_runner", srcs = ["test_runner.sh"], diff --git a/test/packetimpact/tests/icmpv6_param_problem_test.go b/test/packetimpact/tests/icmpv6_param_problem_test.go new file mode 100644 index 000000000..b48e55df4 --- /dev/null +++ b/test/packetimpact/tests/icmpv6_param_problem_test.go @@ -0,0 +1,73 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package icmpv6_param_problem_test + +import ( + "encoding/binary" + "testing" + "time" + + "gvisor.dev/gvisor/pkg/tcpip/header" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +// TestICMPv6ParamProblemTest sends a packet with a bad next header. The DUT +// should respond with an ICMPv6 Parameter Problem message. +func TestICMPv6ParamProblemTest(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + conn := tb.NewIPv6Conn(t, tb.IPv6{}, tb.IPv6{}) + defer conn.Close() + ipv6 := tb.IPv6{ + // 254 is reserved and used for experimentation and testing. This should + // cause an error. + NextHeader: tb.Uint8(254), + } + icmpv6 := tb.ICMPv6{ + Type: tb.ICMPv6Type(header.ICMPv6EchoRequest), + NDPPayload: []byte("hello world"), + } + + toSend := conn.CreateFrame(ipv6, &icmpv6) + conn.SendFrame(toSend) + + // Build the expected ICMPv6 payload, which includes an index to the + // problematic byte and also the problematic packet as described in + // https://tools.ietf.org/html/rfc4443#page-12 . + ipv6Sent := toSend[1:] + expectedPayload, err := ipv6Sent.ToBytes() + if err != nil { + t.Fatalf("can't convert %s to bytes: %s", ipv6Sent, err) + } + + // The problematic field is the NextHeader. + b := make([]byte, 4) + binary.BigEndian.PutUint32(b, header.IPv6NextHeaderOffset) + expectedPayload = append(b, expectedPayload...) + expectedICMPv6 := tb.ICMPv6{ + Type: tb.ICMPv6Type(header.ICMPv6ParamProblem), + NDPPayload: expectedPayload, + } + + paramProblem := tb.Layers{ + &tb.Ether{}, + &tb.IPv6{}, + &expectedICMPv6, + } + timeout := time.Second + if _, err := conn.ExpectFrame(paramProblem, timeout); err != nil { + t.Errorf("expected %s within %s but got none: %s", paramProblem, timeout, err) + } +} diff --git a/test/packetimpact/tests/test_runner.sh b/test/packetimpact/tests/test_runner.sh index e938de782..706441cce 100755 --- a/test/packetimpact/tests/test_runner.sh +++ b/test/packetimpact/tests/test_runner.sh @@ -192,6 +192,8 @@ docker pull "${IMAGE_TAG}" # Create the DUT container and connect to network. DUT=$(docker create ${RUNTIME_ARG} --privileged --rm \ + --cap-add NET_ADMIN \ + --sysctl net.ipv6.conf.all.disable_ipv6=0 \ --stop-timeout ${TIMEOUT} -it ${IMAGE_TAG}) docker network connect "${CTRL_NET}" \ --ip "${CTRL_NET_PREFIX}${DUT_NET_SUFFIX}" "${DUT}" \ @@ -203,6 +205,8 @@ docker start "${DUT}" # Create the test bench container and connect to network. TESTBENCH=$(docker create --privileged --rm \ + --cap-add NET_ADMIN \ + --sysctl net.ipv6.conf.all.disable_ipv6=0 \ --stop-timeout ${TIMEOUT} -it ${IMAGE_TAG}) docker network connect "${CTRL_NET}" \ --ip "${CTRL_NET_PREFIX}${TESTBENCH_NET_SUFFIX}" "${TESTBENCH}" \ @@ -237,6 +241,32 @@ declare -r REMOTE_MAC=$(docker exec -t "${DUT}" ip link show \ "${TEST_DEVICE}" | tail -1 | cut -d' ' -f6) declare -r LOCAL_MAC=$(docker exec -t "${TESTBENCH}" ip link show \ "${TEST_DEVICE}" | tail -1 | cut -d' ' -f6) +declare REMOTE_IPV6=$(docker exec -t "${DUT}" ip addr show scope link \ + "${TEST_DEVICE}" | grep inet6 | cut -d' ' -f6 | cut -d'/' -f1) +declare -r LOCAL_IPV6=$(docker exec -t "${TESTBENCH}" ip addr show scope link \ + "${TEST_DEVICE}" | grep inet6 | cut -d' ' -f6 | cut -d'/' -f1) + +# Netstack as DUT doesn't assign IPv6 addresses automatically so do it if +# needed. Convert the MAC address to an IPv6 link local address as described in +# RFC 4291 page 20: https://tools.ietf.org/html/rfc4291#page-20 +if [[ -z "${REMOTE_IPV6}" ]]; then + # Split the octets of the MAC into an array of strings. + IFS=":" read -a REMOTE_OCTETS <<< "${REMOTE_MAC}" + # Flip the global bit. + REMOTE_OCTETS[0]=$(printf '%x' "$((0x${REMOTE_OCTETS[0]} ^ 2))") + # Add the IPv6 address. + docker exec "${DUT}" \ + ip addr add $(printf 'fe80::%02x%02x:%02xff:fe%02x:%02x%02x/64' \ + "0x${REMOTE_OCTETS[0]}" "0x${REMOTE_OCTETS[1]}" "0x${REMOTE_OCTETS[2]}" \ + "0x${REMOTE_OCTETS[3]}" "0x${REMOTE_OCTETS[4]}" "0x${REMOTE_OCTETS[5]}") \ + scope link \ + dev "${TEST_DEVICE}" + # Re-extract the IPv6 address. + # TODO(eyalsoha): Add "scope link" below when netstack supports correctly + # creating link-local IPv6 addresses. + REMOTE_IPV6=$(docker exec -t "${DUT}" ip addr show \ + "${TEST_DEVICE}" | grep inet6 | cut -d' ' -f6 | cut -d'/' -f1) +fi declare -r DOCKER_TESTBENCH_BINARY="/$(basename ${TESTBENCH_BINARY})" docker cp -L "${TESTBENCH_BINARY}" "${TESTBENCH}:${DOCKER_TESTBENCH_BINARY}" @@ -245,7 +275,10 @@ if [[ -z "${TSHARK-}" ]]; then # Run tcpdump in the test bench unbuffered, without dns resolution, just on # the interface with the test packets. docker exec -t "${TESTBENCH}" \ - tcpdump -S -vvv -U -n -i "${TEST_DEVICE}" net "${TEST_NET_PREFIX}/24" & + tcpdump -S -vvv -U -n -i "${TEST_DEVICE}" \ + net "${TEST_NET_PREFIX}/24" or \ + host "${REMOTE_IPV6}" or \ + host "${LOCAL_IPV6}" & else # Run tshark in the test bench unbuffered, without dns resolution, just on the # interface with the test packets. @@ -253,7 +286,9 @@ else tshark -V -l -n -i "${TEST_DEVICE}" \ -o tcp.check_checksum:TRUE \ -o udp.check_checksum:TRUE \ - host "${TEST_NET_PREFIX}${TESTBENCH_NET_SUFFIX}" & + net "${TEST_NET_PREFIX}/24" or \ + host "${REMOTE_IPV6}" or \ + host "${LOCAL_IPV6}" & fi # tcpdump and tshark take time to startup @@ -272,6 +307,8 @@ docker exec \ --posix_server_port=${CTRL_PORT} \ --remote_ipv4=${TEST_NET_PREFIX}${DUT_NET_SUFFIX} \ --local_ipv4=${TEST_NET_PREFIX}${TESTBENCH_NET_SUFFIX} \ + --remote_ipv6=${REMOTE_IPV6} \ + --local_ipv6=${LOCAL_IPV6} \ --remote_mac=${REMOTE_MAC} \ --local_mac=${LOCAL_MAC} \ --device=${TEST_DEVICE}" && true -- cgit v1.2.3 From 4af39dd1c522f7852312ecbfd3678892fc656322 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Fri, 24 Apr 2020 18:15:26 -0700 Subject: Propagate PID limit from OCI to sandbox cgroup Closes #2489 PiperOrigin-RevId: 308362434 --- runsc/cgroup/cgroup.go | 14 ++++++++++++-- test/root/cgroup_test.go | 6 ++++++ 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go index 653ca5f52..fa40ee509 100644 --- a/runsc/cgroup/cgroup.go +++ b/runsc/cgroup/cgroup.go @@ -45,13 +45,13 @@ var controllers = map[string]controller{ "memory": &memory{}, "net_cls": &networkClass{}, "net_prio": &networkPrio{}, + "pids": &pids{}, // These controllers either don't have anything in the OCI spec or is - // irrevalant for a sandbox, e.g. pids. + // irrelevant for a sandbox. "devices": &noop{}, "freezer": &noop{}, "perf_event": &noop{}, - "pids": &noop{}, "systemd": &noop{}, } @@ -525,3 +525,13 @@ func (*networkPrio) set(spec *specs.LinuxResources, path string) error { } return nil } + +type pids struct{} + +func (*pids) set(spec *specs.LinuxResources, path string) error { + if spec.Pids == nil { + return nil + } + val := strconv.FormatInt(spec.Pids.Limit, 10) + return setValue(path, "pids.max", val) +} diff --git a/test/root/cgroup_test.go b/test/root/cgroup_test.go index 8876d0d61..d0634b5c3 100644 --- a/test/root/cgroup_test.go +++ b/test/root/cgroup_test.go @@ -199,6 +199,12 @@ func TestCgroup(t *testing.T) { want: "750", skipIfNotFound: true, // blkio groups may not be available. }, + { + arg: "--pids-limit=1000", + ctrl: "pids", + file: "pids.max", + want: "1000", + }, } args := make([]string, 0, len(attrs)) -- cgit v1.2.3 From 42822603551e5f0c0796918abf0ce92990c80da8 Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Tue, 28 Apr 2020 11:27:38 -0700 Subject: Don't unlink named pipes in pipe test. TempPath's destructor runs at the end of the named pipe creation functions, deleting the named pipe. If the named pipe is backed by a "non-virtual" filesystem (!fs.Inode.IsVirtual()), this causes the following save attempt to fail because there are FDs holding the deleted named pipe open. PiperOrigin-RevId: 308861999 --- test/syscalls/linux/pipe.cc | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/pipe.cc b/test/syscalls/linux/pipe.cc index 67228b66b..34291850d 100644 --- a/test/syscalls/linux/pipe.cc +++ b/test/syscalls/linux/pipe.cc @@ -631,11 +631,14 @@ INSTANTIATE_TEST_SUITE_P( "namednonblocking", [](int fds[2], bool* is_blocking, bool* is_namedpipe) { // Create a new file-based pipe (non-blocking). - auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); - ASSERT_THAT(unlink(file.path().c_str()), SyscallSucceeds()); - SKIP_IF(mkfifo(file.path().c_str(), 0644) != 0); - fds[0] = open(file.path().c_str(), O_NONBLOCK | O_RDONLY); - fds[1] = open(file.path().c_str(), O_NONBLOCK | O_WRONLY); + std::string path; + { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + path = file.path(); + } + SKIP_IF(mkfifo(path.c_str(), 0644) != 0); + fds[0] = open(path.c_str(), O_NONBLOCK | O_RDONLY); + fds[1] = open(path.c_str(), O_NONBLOCK | O_WRONLY); MaybeSave(); *is_blocking = false; *is_namedpipe = true; @@ -645,13 +648,15 @@ INSTANTIATE_TEST_SUITE_P( "namedblocking", [](int fds[2], bool* is_blocking, bool* is_namedpipe) { // Create a new file-based pipe (blocking). - auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); - ASSERT_THAT(unlink(file.path().c_str()), SyscallSucceeds()); - SKIP_IF(mkfifo(file.path().c_str(), 0644) != 0); - ScopedThread t([&file, &fds]() { - fds[1] = open(file.path().c_str(), O_WRONLY); - }); - fds[0] = open(file.path().c_str(), O_RDONLY); + std::string path; + { + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + path = file.path(); + } + SKIP_IF(mkfifo(path.c_str(), 0644) != 0); + ScopedThread t( + [&path, &fds]() { fds[1] = open(path.c_str(), O_WRONLY); }); + fds[0] = open(path.c_str(), O_RDONLY); t.Join(); MaybeSave(); *is_blocking = true; -- cgit v1.2.3 From 24abccbc1c3b7b0dd06b6da97e5b4c90c8c13907 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Tue, 28 Apr 2020 18:49:19 -0700 Subject: Internal change. PiperOrigin-RevId: 308940886 --- pkg/tcpip/header/icmpv4.go | 1 + test/packetimpact/dut/posix_server.cc | 163 ++++++++++++---- test/packetimpact/proto/posix_server.proto | 73 +++++++ test/packetimpact/testbench/connections.go | 120 ++++++++---- test/packetimpact/testbench/dut.go | 153 +++++++++++++++ test/packetimpact/testbench/layers.go | 104 ++++++++-- test/packetimpact/tests/BUILD | 13 ++ .../tests/udp_icmp_error_propagation_test.go | 209 +++++++++++++++++++++ 8 files changed, 749 insertions(+), 87 deletions(-) create mode 100644 test/packetimpact/tests/udp_icmp_error_propagation_test.go (limited to 'test') diff --git a/pkg/tcpip/header/icmpv4.go b/pkg/tcpip/header/icmpv4.go index 0cac6c0a5..7908c5744 100644 --- a/pkg/tcpip/header/icmpv4.go +++ b/pkg/tcpip/header/icmpv4.go @@ -71,6 +71,7 @@ const ( // Values for ICMP code as defined in RFC 792. const ( + ICMPv4TTLExceeded = 0 ICMPv4PortUnreachable = 3 ICMPv4FragmentationNeeded = 4 ) diff --git a/test/packetimpact/dut/posix_server.cc b/test/packetimpact/dut/posix_server.cc index 86e580c6f..cb499b0b1 100644 --- a/test/packetimpact/dut/posix_server.cc +++ b/test/packetimpact/dut/posix_server.cc @@ -60,6 +60,45 @@ return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, "Unknown Sockaddr"); } +::grpc::Status proto_to_sockaddr(const posix_server::Sockaddr &sockaddr_proto, + sockaddr_storage *addr) { + switch (sockaddr_proto.sockaddr_case()) { + case posix_server::Sockaddr::SockaddrCase::kIn: { + auto proto_in = sockaddr_proto.in(); + if (proto_in.addr().size() != 4) { + return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, + "IPv4 address must be 4 bytes"); + } + auto addr_in = reinterpret_cast(addr); + addr_in->sin_family = proto_in.family(); + addr_in->sin_port = htons(proto_in.port()); + proto_in.addr().copy(reinterpret_cast(&addr_in->sin_addr.s_addr), + 4); + break; + } + case posix_server::Sockaddr::SockaddrCase::kIn6: { + auto proto_in6 = sockaddr_proto.in6(); + if (proto_in6.addr().size() != 16) { + return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, + "IPv6 address must be 16 bytes"); + } + auto addr_in6 = reinterpret_cast(addr); + addr_in6->sin6_family = proto_in6.family(); + addr_in6->sin6_port = htons(proto_in6.port()); + addr_in6->sin6_flowinfo = htonl(proto_in6.flowinfo()); + proto_in6.addr().copy( + reinterpret_cast(&addr_in6->sin6_addr.s6_addr), 16); + addr_in6->sin6_scope_id = htonl(proto_in6.scope_id()); + break; + } + case posix_server::Sockaddr::SockaddrCase::SOCKADDR_NOT_SET: + default: + return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, + "Unknown Sockaddr"); + } + return ::grpc::Status::OK; +} + class PosixImpl final : public posix_server::Posix::Service { ::grpc::Status Accept(grpc_impl::ServerContext *context, const ::posix_server::AcceptRequest *request, @@ -79,42 +118,13 @@ class PosixImpl final : public posix_server::Posix::Service { return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, "Missing address"); } - sockaddr_storage addr; - switch (request->addr().sockaddr_case()) { - case posix_server::Sockaddr::SockaddrCase::kIn: { - auto request_in = request->addr().in(); - if (request_in.addr().size() != 4) { - return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, - "IPv4 address must be 4 bytes"); - } - auto addr_in = reinterpret_cast(&addr); - addr_in->sin_family = request_in.family(); - addr_in->sin_port = htons(request_in.port()); - request_in.addr().copy( - reinterpret_cast(&addr_in->sin_addr.s_addr), 4); - break; - } - case posix_server::Sockaddr::SockaddrCase::kIn6: { - auto request_in6 = request->addr().in6(); - if (request_in6.addr().size() != 16) { - return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, - "IPv6 address must be 16 bytes"); - } - auto addr_in6 = reinterpret_cast(&addr); - addr_in6->sin6_family = request_in6.family(); - addr_in6->sin6_port = htons(request_in6.port()); - addr_in6->sin6_flowinfo = htonl(request_in6.flowinfo()); - request_in6.addr().copy( - reinterpret_cast(&addr_in6->sin6_addr.s6_addr), 16); - addr_in6->sin6_scope_id = htonl(request_in6.scope_id()); - break; - } - case posix_server::Sockaddr::SockaddrCase::SOCKADDR_NOT_SET: - default: - return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, - "Unknown Sockaddr"); + sockaddr_storage addr; + auto err = proto_to_sockaddr(request->addr(), &addr); + if (!err.ok()) { + return err; } + response->set_ret(bind(request->sockfd(), reinterpret_cast(&addr), sizeof(addr))); response->set_errno_(errno); @@ -129,6 +139,25 @@ class PosixImpl final : public posix_server::Posix::Service { return ::grpc::Status::OK; } + ::grpc::Status Connect(grpc_impl::ServerContext *context, + const ::posix_server::ConnectRequest *request, + ::posix_server::ConnectResponse *response) override { + if (!request->has_addr()) { + return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, + "Missing address"); + } + sockaddr_storage addr; + auto err = proto_to_sockaddr(request->addr(), &addr); + if (!err.ok()) { + return err; + } + + response->set_ret(connect( + request->sockfd(), reinterpret_cast(&addr), sizeof(addr))); + response->set_errno_(errno); + return ::grpc::Status::OK; + } + ::grpc::Status GetSockName( grpc_impl::ServerContext *context, const ::posix_server::GetSockNameRequest *request, @@ -141,6 +170,48 @@ class PosixImpl final : public posix_server::Posix::Service { return sockaddr_to_proto(addr, addrlen, response->mutable_addr()); } + ::grpc::Status GetSockOpt( + grpc_impl::ServerContext *context, + const ::posix_server::GetSockOptRequest *request, + ::posix_server::GetSockOptResponse *response) override { + socklen_t optlen = request->optlen(); + std::vector buf(optlen); + response->set_ret(::getsockopt(request->sockfd(), request->level(), + request->optname(), buf.data(), &optlen)); + response->set_errno_(errno); + if (optlen >= 0) { + response->set_optval(buf.data(), optlen); + } + return ::grpc::Status::OK; + } + + ::grpc::Status GetSockOptInt( + ::grpc::ServerContext *context, + const ::posix_server::GetSockOptIntRequest *request, + ::posix_server::GetSockOptIntResponse *response) override { + int opt = 0; + socklen_t optlen = sizeof(opt); + response->set_ret(::getsockopt(request->sockfd(), request->level(), + request->optname(), &opt, &optlen)); + response->set_errno_(errno); + response->set_intval(opt); + return ::grpc::Status::OK; + } + + ::grpc::Status GetSockOptTimeval( + ::grpc::ServerContext *context, + const ::posix_server::GetSockOptTimevalRequest *request, + ::posix_server::GetSockOptTimevalResponse *response) override { + timeval tv; + socklen_t optlen = sizeof(tv); + response->set_ret(::getsockopt(request->sockfd(), request->level(), + request->optname(), &tv, &optlen)); + response->set_errno_(errno); + response->mutable_timeval()->set_seconds(tv.tv_sec); + response->mutable_timeval()->set_microseconds(tv.tv_usec); + return ::grpc::Status::OK; + } + ::grpc::Status Listen(grpc_impl::ServerContext *context, const ::posix_server::ListenRequest *request, ::posix_server::ListenResponse *response) override { @@ -158,6 +229,26 @@ class PosixImpl final : public posix_server::Posix::Service { return ::grpc::Status::OK; } + ::grpc::Status SendTo(::grpc::ServerContext *context, + const ::posix_server::SendToRequest *request, + ::posix_server::SendToResponse *response) override { + if (!request->has_dest_addr()) { + return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, + "Missing address"); + } + sockaddr_storage addr; + auto err = proto_to_sockaddr(request->dest_addr(), &addr); + if (!err.ok()) { + return err; + } + + response->set_ret(::sendto( + request->sockfd(), request->buf().data(), request->buf().size(), + request->flags(), reinterpret_cast(&addr), sizeof(addr))); + response->set_errno_(errno); + return ::grpc::Status::OK; + } + ::grpc::Status SetSockOpt( grpc_impl::ServerContext *context, const ::posix_server::SetSockOptRequest *request, @@ -208,8 +299,10 @@ class PosixImpl final : public posix_server::Posix::Service { std::vector buf(request->len()); response->set_ret( recv(request->sockfd(), buf.data(), buf.size(), request->flags())); + if (response->ret() >= 0) { + response->set_buf(buf.data(), response->ret()); + } response->set_errno_(errno); - response->set_buf(buf.data(), response->ret()); return ::grpc::Status::OK; } }; diff --git a/test/packetimpact/proto/posix_server.proto b/test/packetimpact/proto/posix_server.proto index 4035e1ee6..ab5ba1c85 100644 --- a/test/packetimpact/proto/posix_server.proto +++ b/test/packetimpact/proto/posix_server.proto @@ -73,6 +73,16 @@ message CloseResponse { int32 errno_ = 2; // "errno" may fail to compile in c++. } +message ConnectRequest { + int32 sockfd = 1; + Sockaddr addr = 2; +} + +message ConnectResponse { + int32 ret = 1; + int32 errno_ = 2; // "errno" may fail to compile in c++. +} + message GetSockNameRequest { int32 sockfd = 1; } @@ -83,6 +93,43 @@ message GetSockNameResponse { Sockaddr addr = 3; } +message GetSockOptRequest { + int32 sockfd = 1; + int32 level = 2; + int32 optname = 3; + int32 optlen = 4; +} + +message GetSockOptResponse { + int32 ret = 1; + int32 errno_ = 2; // "errno" may fail to compile in c++. + bytes optval = 3; +} + +message GetSockOptIntRequest { + int32 sockfd = 1; + int32 level = 2; + int32 optname = 3; +} + +message GetSockOptIntResponse { + int32 ret = 1; + int32 errno_ = 2; // "errno" may fail to compile in c++. + int32 intval = 3; +} + +message GetSockOptTimevalRequest { + int32 sockfd = 1; + int32 level = 2; + int32 optname = 3; +} + +message GetSockOptTimevalResponse { + int32 ret = 1; + int32 errno_ = 2; // "errno" may fail to compile in c++. + Timeval timeval = 3; +} + message ListenRequest { int32 sockfd = 1; int32 backlog = 2; @@ -104,6 +151,18 @@ message SendResponse { int32 errno_ = 2; } +message SendToRequest { + int32 sockfd = 1; + bytes buf = 2; + int32 flags = 3; + Sockaddr dest_addr = 4; +} + +message SendToResponse { + int32 ret = 1; + int32 errno_ = 2; // "errno" may fail to compile in c++. +} + message SetSockOptRequest { int32 sockfd = 1; int32 level = 2; @@ -170,12 +229,26 @@ service Posix { rpc Bind(BindRequest) returns (BindResponse); // Call close() on the DUT. rpc Close(CloseRequest) returns (CloseResponse); + // Call connect() on the DUT. + rpc Connect(ConnectRequest) returns (ConnectResponse); // Call getsockname() on the DUT. rpc GetSockName(GetSockNameRequest) returns (GetSockNameResponse); + // Call getsockopt() on the DUT. You should prefer one of the other + // GetSockOpt* functions with a more structured optval or else you may get the + // encoding wrong, such as making a bad assumption about the server's word + // sizes or endianness. + rpc GetSockOpt(GetSockOptRequest) returns (GetSockOptResponse); + // Call getsockopt() on the DUT with an int optval. + rpc GetSockOptInt(GetSockOptIntRequest) returns (GetSockOptIntResponse); + // Call getsockopt() on the DUT with a Timeval optval. + rpc GetSockOptTimeval(GetSockOptTimevalRequest) + returns (GetSockOptTimevalResponse); // Call listen() on the DUT. rpc Listen(ListenRequest) returns (ListenResponse); // Call send() on the DUT. rpc Send(SendRequest) returns (SendResponse); + // Call sendto() on the DUT. + rpc SendTo(SendToRequest) returns (SendToResponse); // Call setsockopt() on the DUT. You should prefer one of the other // SetSockOpt* functions with a more structured optval or else you may get the // encoding wrong, such as making a bad assumption about the server's word diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index 2280bd4ee..56ac3fa54 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -39,20 +39,28 @@ var remoteIPv6 = flag.String("remote_ipv6", "", "remote IPv6 address for test pa var localMAC = flag.String("local_mac", "", "local mac address for test packets") var remoteMAC = flag.String("remote_mac", "", "remote mac address for test packets") -// pickPort makes a new socket and returns the socket FD and port. The domain -// should be AF_INET or AF_INET6. The caller must close the FD when done with +func portFromSockaddr(sa unix.Sockaddr) (uint16, error) { + switch sa := sa.(type) { + case *unix.SockaddrInet4: + return uint16(sa.Port), nil + case *unix.SockaddrInet6: + return uint16(sa.Port), nil + } + return 0, fmt.Errorf("sockaddr type %T does not contain port", sa) +} + +// pickPort makes a new socket and returns the socket FD and port. The domain should be AF_INET or AF_INET6. The caller must close the FD when done with // the port if there is no error. -func pickPort(domain, typ int) (fd int, port uint16, err error) { +func pickPort(domain, typ int) (fd int, sa unix.Sockaddr, err error) { fd, err = unix.Socket(domain, typ, 0) if err != nil { - return -1, 0, err + return -1, nil, err } defer func() { if err != nil { err = multierr.Append(err, unix.Close(fd)) } }() - var sa unix.Sockaddr switch domain { case unix.AF_INET: var sa4 unix.SockaddrInet4 @@ -63,31 +71,16 @@ func pickPort(domain, typ int) (fd int, port uint16, err error) { copy(sa6.Addr[:], net.ParseIP(*localIPv6).To16()) sa = &sa6 default: - return -1, 0, fmt.Errorf("invalid domain %d, it should be one of unix.AF_INET or unix.AF_INET6", domain) + return -1, nil, fmt.Errorf("invalid domain %d, it should be one of unix.AF_INET or unix.AF_INET6", domain) } if err = unix.Bind(fd, sa); err != nil { - return -1, 0, err + return -1, nil, err } - newSockAddr, err := unix.Getsockname(fd) + sa, err = unix.Getsockname(fd) if err != nil { - return -1, 0, err - } - switch domain { - case unix.AF_INET: - newSockAddrInet4, ok := newSockAddr.(*unix.SockaddrInet4) - if !ok { - return -1, 0, fmt.Errorf("can't cast Getsockname result %T to SockaddrInet4", newSockAddr) - } - return fd, uint16(newSockAddrInet4.Port), nil - case unix.AF_INET6: - newSockAddrInet6, ok := newSockAddr.(*unix.SockaddrInet6) - if !ok { - return -1, 0, fmt.Errorf("can't cast Getsockname result %T to SockaddrInet6", newSockAddr) - } - return fd, uint16(newSockAddrInet6.Port), nil - default: - return -1, 0, fmt.Errorf("invalid domain %d, it should be one of unix.AF_INET or unix.AF_INET6", domain) + return -1, nil, err } + return fd, sa, nil } // layerState stores the state of a layer of a connection. @@ -282,7 +275,11 @@ func SeqNumValue(v seqnum.Value) *seqnum.Value { // newTCPState creates a new TCPState. func newTCPState(domain int, out, in TCP) (*tcpState, error) { - portPickerFD, localPort, err := pickPort(domain, unix.SOCK_STREAM) + portPickerFD, localAddr, err := pickPort(domain, unix.SOCK_STREAM) + if err != nil { + return nil, err + } + localPort, err := portFromSockaddr(localAddr) if err != nil { return nil, err } @@ -385,10 +382,14 @@ type udpState struct { var _ layerState = (*udpState)(nil) // newUDPState creates a new udpState. -func newUDPState(domain int, out, in UDP) (*udpState, error) { - portPickerFD, localPort, err := pickPort(domain, unix.SOCK_DGRAM) +func newUDPState(domain int, out, in UDP) (*udpState, unix.Sockaddr, error) { + portPickerFD, localAddr, err := pickPort(domain, unix.SOCK_DGRAM) if err != nil { - return nil, err + return nil, nil, err + } + localPort, err := portFromSockaddr(localAddr) + if err != nil { + return nil, nil, err } s := udpState{ out: UDP{SrcPort: &localPort}, @@ -396,12 +397,12 @@ func newUDPState(domain int, out, in UDP) (*udpState, error) { portPickerFD: portPickerFD, } if err := s.out.merge(&out); err != nil { - return nil, err + return nil, nil, err } if err := s.in.merge(&in); err != nil { - return nil, err + return nil, nil, err } - return &s, nil + return &s, localAddr, nil } func (s *udpState) outgoing() Layer { @@ -436,6 +437,7 @@ type Connection struct { layerStates []layerState injector Injector sniffer Sniffer + localAddr unix.Sockaddr t *testing.T } @@ -499,7 +501,7 @@ func (conn *Connection) CreateFrame(layer Layer, additionalLayers ...Layer) Laye func (conn *Connection) SendFrame(frame Layers) { outBytes, err := frame.ToBytes() if err != nil { - conn.t.Fatalf("can't build outgoing TCP packet: %s", err) + conn.t.Fatalf("can't build outgoing packet: %s", err) } conn.injector.Send(outBytes) @@ -545,8 +547,9 @@ func (e *layersError) Error() string { return e.got.diff(e.want) } -// Expect a frame with the final layerStates layer matching the provided Layer -// within the timeout specified. If it doesn't arrive in time, it returns nil. +// Expect expects a frame with the final layerStates layer matching the +// provided Layer within the timeout specified. If it doesn't arrive in time, +// an error is returned. func (conn *Connection) Expect(layer Layer, timeout time.Duration) (Layer, error) { // Make a frame that will ignore all but the final layer. layers := make([]Layer, len(conn.layerStates)) @@ -671,8 +674,8 @@ func (conn *TCPIPv4) Close() { (*Connection)(conn).Close() } -// Expect a frame with the TCP layer matching the provided TCP within the -// timeout specified. If it doesn't arrive in time, it returns nil. +// Expect expects a frame with the TCP layer matching the provided TCP within +// the timeout specified. If it doesn't arrive in time, an error is returned. func (conn *TCPIPv4) Expect(tcp TCP, timeout time.Duration) (*TCP, error) { layer, err := (*Connection)(conn).Expect(&tcp, timeout) if layer == nil { @@ -756,7 +759,7 @@ func (conn *IPv6Conn) Close() { } // ExpectFrame expects a frame that matches the provided Layers within the -// timeout specified. If it doesn't arrive in time, it returns nil. +// timeout specified. If it doesn't arrive in time, an error is returned. func (conn *IPv6Conn) ExpectFrame(frame Layers, timeout time.Duration) (Layers, error) { return (*Connection)(conn).ExpectFrame(frame, timeout) } @@ -780,7 +783,7 @@ func NewUDPIPv4(t *testing.T, outgoingUDP, incomingUDP UDP) UDPIPv4 { if err != nil { t.Fatalf("can't make ipv4State: %s", err) } - tcpState, err := newUDPState(unix.AF_INET, outgoingUDP, incomingUDP) + udpState, localAddr, err := newUDPState(unix.AF_INET, outgoingUDP, incomingUDP) if err != nil { t.Fatalf("can't make udpState: %s", err) } @@ -794,24 +797,61 @@ func NewUDPIPv4(t *testing.T, outgoingUDP, incomingUDP UDP) UDPIPv4 { } return UDPIPv4{ - layerStates: []layerState{etherState, ipv4State, tcpState}, + layerStates: []layerState{etherState, ipv4State, udpState}, injector: injector, sniffer: sniffer, + localAddr: localAddr, t: t, } } +// LocalAddr gets the local socket address of this connection. +func (conn *UDPIPv4) LocalAddr() unix.Sockaddr { + return conn.localAddr +} + // CreateFrame builds a frame for the connection with layer overriding defaults // of the innermost layer and additionalLayers added after it. func (conn *UDPIPv4) CreateFrame(layer Layer, additionalLayers ...Layer) Layers { return (*Connection)(conn).CreateFrame(layer, additionalLayers...) } +// Send a packet with reasonable defaults. Potentially override the UDP layer in +// the connection with the provided layer and add additionLayers. +func (conn *UDPIPv4) Send(udp UDP, additionalLayers ...Layer) { + (*Connection)(conn).Send(&udp, additionalLayers...) +} + // SendFrame sends a frame on the wire and updates the state of all layers. func (conn *UDPIPv4) SendFrame(frame Layers) { (*Connection)(conn).SendFrame(frame) } +// SendIP sends a packet with additionalLayers following the IP layer in the +// connection. +func (conn *UDPIPv4) SendIP(additionalLayers ...Layer) { + var layersToSend Layers + for _, s := range conn.layerStates[:len(conn.layerStates)-1] { + layersToSend = append(layersToSend, s.outgoing()) + } + layersToSend = append(layersToSend, additionalLayers...) + conn.SendFrame(layersToSend) +} + +// Expect expects a frame with the UDP layer matching the provided UDP within +// the timeout specified. If it doesn't arrive in time, an error is returned. +func (conn *UDPIPv4) Expect(udp UDP, timeout time.Duration) (*UDP, error) { + layer, err := (*Connection)(conn).Expect(&udp, timeout) + if layer == nil { + return nil, err + } + gotUDP, ok := layer.(*UDP) + if !ok { + conn.t.Fatalf("expected %s to be UDP", layer) + } + return gotUDP, err +} + // Close frees associated resources held by the UDPIPv4 connection. func (conn *UDPIPv4) Close() { (*Connection)(conn).Close() diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go index 3f340c6bc..87eeeeb88 100644 --- a/test/packetimpact/testbench/dut.go +++ b/test/packetimpact/testbench/dut.go @@ -237,6 +237,33 @@ func (dut *DUT) CloseWithErrno(ctx context.Context, fd int32) (int32, error) { return resp.GetRet(), syscall.Errno(resp.GetErrno_()) } +// Connect calls connect on the DUT and causes a fatal test failure if it +// doesn't succeed. If more control over the timeout or error handling is +// needed, use ConnectWithErrno. +func (dut *DUT) Connect(fd int32, sa unix.Sockaddr) { + dut.t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + defer cancel() + ret, err := dut.ConnectWithErrno(ctx, fd, sa) + if ret != 0 { + dut.t.Fatalf("failed to connect socket: %s", err) + } +} + +// ConnectWithErrno calls bind on the DUT. +func (dut *DUT) ConnectWithErrno(ctx context.Context, fd int32, sa unix.Sockaddr) (int32, error) { + dut.t.Helper() + req := pb.ConnectRequest{ + Sockfd: fd, + Addr: dut.sockaddrToProto(sa), + } + resp, err := dut.posixServer.Connect(ctx, &req) + if err != nil { + dut.t.Fatalf("failed to call Connect: %s", err) + } + return resp.GetRet(), syscall.Errno(resp.GetErrno_()) +} + // GetSockName calls getsockname on the DUT and causes a fatal test failure if // it doesn't succeed. If more control over the timeout or error handling is // needed, use GetSockNameWithErrno. @@ -264,6 +291,102 @@ func (dut *DUT) GetSockNameWithErrno(ctx context.Context, sockfd int32) (int32, return resp.GetRet(), dut.protoToSockaddr(resp.GetAddr()), syscall.Errno(resp.GetErrno_()) } +// GetSockOpt calls getsockopt on the DUT and causes a fatal test failure if it +// doesn't succeed. If more control over the timeout or error handling is +// needed, use GetSockOptWithErrno. Because endianess and the width of values +// might differ between the testbench and DUT architectures, prefer to use a +// more specific GetSockOptXxx function. +func (dut *DUT) GetSockOpt(sockfd, level, optname, optlen int32) []byte { + dut.t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + defer cancel() + ret, optval, err := dut.GetSockOptWithErrno(ctx, sockfd, level, optname, optlen) + if ret != 0 { + dut.t.Fatalf("failed to GetSockOpt: %s", err) + } + return optval +} + +// GetSockOptWithErrno calls getsockopt on the DUT. Because endianess and the +// width of values might differ between the testbench and DUT architectures, +// prefer to use a more specific GetSockOptXxxWithErrno function. +func (dut *DUT) GetSockOptWithErrno(ctx context.Context, sockfd, level, optname, optlen int32) (int32, []byte, error) { + dut.t.Helper() + req := pb.GetSockOptRequest{ + Sockfd: sockfd, + Level: level, + Optname: optname, + Optlen: optlen, + } + resp, err := dut.posixServer.GetSockOpt(ctx, &req) + if err != nil { + dut.t.Fatalf("failed to call GetSockOpt: %s", err) + } + return resp.GetRet(), resp.GetOptval(), syscall.Errno(resp.GetErrno_()) +} + +// GetSockOptInt calls getsockopt on the DUT and causes a fatal test failure +// if it doesn't succeed. If more control over the int optval or error handling +// is needed, use GetSockOptIntWithErrno. +func (dut *DUT) GetSockOptInt(sockfd, level, optname int32) int32 { + dut.t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + defer cancel() + ret, intval, err := dut.GetSockOptIntWithErrno(ctx, sockfd, level, optname) + if ret != 0 { + dut.t.Fatalf("failed to GetSockOptInt: %s", err) + } + return intval +} + +// GetSockOptIntWithErrno calls getsockopt with an integer optval. +func (dut *DUT) GetSockOptIntWithErrno(ctx context.Context, sockfd, level, optname int32) (int32, int32, error) { + dut.t.Helper() + req := pb.GetSockOptIntRequest{ + Sockfd: sockfd, + Level: level, + Optname: optname, + } + resp, err := dut.posixServer.GetSockOptInt(ctx, &req) + if err != nil { + dut.t.Fatalf("failed to call GetSockOptInt: %s", err) + } + return resp.GetRet(), resp.GetIntval(), syscall.Errno(resp.GetErrno_()) +} + +// GetSockOptTimeval calls getsockopt on the DUT and causes a fatal test failure +// if it doesn't succeed. If more control over the timeout or error handling is +// needed, use GetSockOptTimevalWithErrno. +func (dut *DUT) GetSockOptTimeval(sockfd, level, optname int32) unix.Timeval { + dut.t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + defer cancel() + ret, timeval, err := dut.GetSockOptTimevalWithErrno(ctx, sockfd, level, optname) + if ret != 0 { + dut.t.Fatalf("failed to GetSockOptTimeval: %s", err) + } + return timeval +} + +// GetSockOptTimevalWithErrno calls getsockopt and returns a timeval. +func (dut *DUT) GetSockOptTimevalWithErrno(ctx context.Context, sockfd, level, optname int32) (int32, unix.Timeval, error) { + dut.t.Helper() + req := pb.GetSockOptTimevalRequest{ + Sockfd: sockfd, + Level: level, + Optname: optname, + } + resp, err := dut.posixServer.GetSockOptTimeval(ctx, &req) + if err != nil { + dut.t.Fatalf("failed to call GetSockOptTimeval: %s", err) + } + timeval := unix.Timeval{ + Sec: resp.GetTimeval().Seconds, + Usec: resp.GetTimeval().Microseconds, + } + return resp.GetRet(), timeval, syscall.Errno(resp.GetErrno_()) +} + // Listen calls listen on the DUT and causes a fatal test failure if it doesn't // succeed. If more control over the timeout or error handling is needed, use // ListenWithErrno. @@ -320,6 +443,36 @@ func (dut *DUT) SendWithErrno(ctx context.Context, sockfd int32, buf []byte, fla return resp.GetRet(), syscall.Errno(resp.GetErrno_()) } +// SendTo calls sendto on the DUT and causes a fatal test failure if it doesn't +// succeed. If more control over the timeout or error handling is needed, use +// SendToWithErrno. +func (dut *DUT) SendTo(sockfd int32, buf []byte, flags int32, destAddr unix.Sockaddr) int32 { + dut.t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + defer cancel() + ret, err := dut.SendToWithErrno(ctx, sockfd, buf, flags, destAddr) + if ret == -1 { + dut.t.Fatalf("failed to sendto: %s", err) + } + return ret +} + +// SendToWithErrno calls sendto on the DUT. +func (dut *DUT) SendToWithErrno(ctx context.Context, sockfd int32, buf []byte, flags int32, destAddr unix.Sockaddr) (int32, error) { + dut.t.Helper() + req := pb.SendToRequest{ + Sockfd: sockfd, + Buf: buf, + Flags: flags, + DestAddr: dut.sockaddrToProto(destAddr), + } + resp, err := dut.posixServer.SendTo(ctx, &req) + if err != nil { + dut.t.Fatalf("faled to call SendTo: %s", err) + } + return resp.GetRet(), syscall.Errno(resp.GetErrno_()) +} + // SetSockOpt calls setsockopt on the DUT and causes a fatal test failure if it // doesn't succeed. If more control over the timeout or error handling is // needed, use SetSockOptWithErrno. Because endianess and the width of values diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go index 817f5c261..165f62d3b 100644 --- a/test/packetimpact/testbench/layers.go +++ b/test/packetimpact/testbench/layers.go @@ -58,7 +58,7 @@ type Layer interface { next() Layer // prev gets a pointer to the Layer encapsulating this one. - prev() Layer + Prev() Layer // setNext sets the pointer to the encapsulated Layer. setNext(Layer) @@ -80,7 +80,8 @@ func (lb *LayerBase) next() Layer { return lb.nextLayer } -func (lb *LayerBase) prev() Layer { +// Prev returns the previous layer. +func (lb *LayerBase) Prev() Layer { return lb.prevLayer } @@ -340,6 +341,8 @@ func (l *IPv4) ToBytes() ([]byte, error) { fields.Protocol = uint8(header.TCPProtocolNumber) case *UDP: fields.Protocol = uint8(header.UDPProtocolNumber) + case *ICMPv4: + fields.Protocol = uint8(header.ICMPv4ProtocolNumber) default: // TODO(b/150301488): Support more protocols as needed. return nil, fmt.Errorf("ipv4 header's next layer is unrecognized: %#v", n) @@ -403,6 +406,8 @@ func parseIPv4(b []byte) (Layer, layerParser) { nextParser = parseTCP case header.UDPProtocolNumber: nextParser = parseUDP + case header.ICMPv4ProtocolNumber: + nextParser = parseICMPv4 default: // Assume that the rest is a payload. nextParser = parsePayload @@ -562,7 +567,7 @@ func (l *ICMPv6) ToBytes() ([]byte, error) { if l.Checksum != nil { h.SetChecksum(*l.Checksum) } else { - ipv6 := l.prev().(*IPv6) + ipv6 := l.Prev().(*IPv6) h.SetChecksum(header.ICMPv6Checksum(h, *ipv6.SrcAddr, *ipv6.DstAddr, buffer.VectorisedView{})) } return h, nil @@ -606,6 +611,72 @@ func (l *ICMPv6) merge(other Layer) error { return mergeLayer(l, other) } +// ICMPv4Type is a helper routine that allocates a new header.ICMPv4Type value +// to store t and returns a pointer to it. +func ICMPv4Type(t header.ICMPv4Type) *header.ICMPv4Type { + return &t +} + +// ICMPv4 can construct and match an ICMPv4 encapsulation. +type ICMPv4 struct { + LayerBase + Type *header.ICMPv4Type + Code *uint8 + Checksum *uint16 +} + +func (l *ICMPv4) String() string { + return stringLayer(l) +} + +// ToBytes implements Layer.ToBytes. +func (l *ICMPv4) ToBytes() ([]byte, error) { + b := make([]byte, header.ICMPv4MinimumSize) + h := header.ICMPv4(b) + if l.Type != nil { + h.SetType(*l.Type) + } + if l.Code != nil { + h.SetCode(byte(*l.Code)) + } + if l.Checksum != nil { + h.SetChecksum(*l.Checksum) + return h, nil + } + payload, err := payload(l) + if err != nil { + return nil, err + } + h.SetChecksum(header.ICMPv4Checksum(h, payload)) + return h, nil +} + +// parseICMPv4 parses the bytes as an ICMPv4 header, returning a Layer and a +// parser for the encapsulated payload. +func parseICMPv4(b []byte) (Layer, layerParser) { + h := header.ICMPv4(b) + icmpv4 := ICMPv4{ + Type: ICMPv4Type(h.Type()), + Code: Uint8(h.Code()), + Checksum: Uint16(h.Checksum()), + } + return &icmpv4, parsePayload +} + +func (l *ICMPv4) match(other Layer) bool { + return equalLayer(l, other) +} + +func (l *ICMPv4) length() int { + return header.ICMPv4MinimumSize +} + +// merge overrides the values in l with the values from other but only in fields +// where the value is not nil. +func (l *ICMPv4) merge(other Layer) error { + return mergeLayer(l, other) +} + // TCP can construct and match a TCP encapsulation. type TCP struct { LayerBase @@ -676,25 +747,34 @@ func totalLength(l Layer) int { return totalLength } +// payload returns a buffer.VectorisedView of l's payload. +func payload(l Layer) (buffer.VectorisedView, error) { + var payloadBytes buffer.VectorisedView + for current := l.next(); current != nil; current = current.next() { + payload, err := current.ToBytes() + if err != nil { + return buffer.VectorisedView{}, fmt.Errorf("can't get bytes for next header: %s", payload) + } + payloadBytes.AppendView(payload) + } + return payloadBytes, nil +} + // layerChecksum calculates the checksum of the Layer header, including the -// peusdeochecksum of the layer before it and all the bytes after it.. +// peusdeochecksum of the layer before it and all the bytes after it. func layerChecksum(l Layer, protoNumber tcpip.TransportProtocolNumber) (uint16, error) { totalLength := uint16(totalLength(l)) var xsum uint16 - switch s := l.prev().(type) { + switch s := l.Prev().(type) { case *IPv4: xsum = header.PseudoHeaderChecksum(protoNumber, *s.SrcAddr, *s.DstAddr, totalLength) default: // TODO(b/150301488): Support more protocols, like IPv6. return 0, fmt.Errorf("can't get src and dst addr from previous layer: %#v", s) } - var payloadBytes buffer.VectorisedView - for current := l.next(); current != nil; current = current.next() { - payload, err := current.ToBytes() - if err != nil { - return 0, fmt.Errorf("can't get bytes for next header: %s", payload) - } - payloadBytes.AppendView(payload) + payloadBytes, err := payload(l) + if err != nil { + return 0, err } xsum = header.ChecksumVV(payloadBytes, xsum) return xsum, nil diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 42f87e3f3..6beccbfd0 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -28,6 +28,19 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "udp_icmp_error_propagation", + srcs = ["udp_icmp_error_propagation_test.go"], + # TODO(b/153926291): Fix netstack then remove the line below. + netstack = False, + deps = [ + "//pkg/tcpip", + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + packetimpact_go_test( name = "tcp_window_shrink", srcs = ["tcp_window_shrink_test.go"], diff --git a/test/packetimpact/tests/udp_icmp_error_propagation_test.go b/test/packetimpact/tests/udp_icmp_error_propagation_test.go new file mode 100644 index 000000000..9e4810842 --- /dev/null +++ b/test/packetimpact/tests/udp_icmp_error_propagation_test.go @@ -0,0 +1,209 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package udp_icmp_error_propagation_test + +import ( + "context" + "fmt" + "net" + "syscall" + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +type connected bool + +func (c connected) String() string { + if c { + return "Connected" + } + return "Connectionless" +} + +type icmpError int + +const ( + portUnreachable icmpError = iota + timeToLiveExceeded +) + +func (e icmpError) String() string { + switch e { + case portUnreachable: + return "PortUnreachable" + case timeToLiveExceeded: + return "TimeToLiveExpired" + } + return "Unknown ICMP error" +} + +func (e icmpError) ToICMPv4() *tb.ICMPv4 { + switch e { + case portUnreachable: + return &tb.ICMPv4{Type: tb.ICMPv4Type(header.ICMPv4DstUnreachable), Code: tb.Uint8(header.ICMPv4PortUnreachable)} + case timeToLiveExceeded: + return &tb.ICMPv4{Type: tb.ICMPv4Type(header.ICMPv4TimeExceeded), Code: tb.Uint8(header.ICMPv4TTLExceeded)} + } + return nil +} + +type errorDetectionFunc func(context.Context, *tb.DUT, *tb.UDPIPv4, int32, syscall.Errno) error + +// testRecv tests observing the ICMP error through the recv syscall. +// A packet is sent to the DUT, and if wantErrno is non-zero, then the first +// recv should fail and the second should succeed. Otherwise if wantErrno is +// zero then the first recv should succeed immediately. +func testRecv(ctx context.Context, dut *tb.DUT, conn *tb.UDPIPv4, remoteFD int32, wantErrno syscall.Errno) error { + conn.Send(tb.UDP{}) + + if wantErrno != syscall.Errno(0) { + ctx, cancel := context.WithTimeout(ctx, time.Second) + defer cancel() + ret, _, err := dut.RecvWithErrno(ctx, remoteFD, 100, 0) + if ret != -1 { + return fmt.Errorf("recv after ICMP error succeeded unexpectedly, expected (%[1]d) %[1]v", wantErrno) + } + if err != wantErrno { + return fmt.Errorf("recv after ICMP error resulted in error (%[1]d) %[1]v, expected (%[2]d) %[2]v", err, wantErrno) + } + } + + dut.Recv(remoteFD, 100, 0) + return nil +} + +// testSendTo tests observing the ICMP error through the send syscall. +// If wantErrno is non-zero, the first send should fail and a subsequent send +// should suceed; while if wantErrno is zero then the first send should just +// succeed. +func testSendTo(ctx context.Context, dut *tb.DUT, conn *tb.UDPIPv4, remoteFD int32, wantErrno syscall.Errno) error { + if wantErrno != syscall.Errno(0) { + ctx, cancel := context.WithTimeout(ctx, time.Second) + defer cancel() + ret, err := dut.SendToWithErrno(ctx, remoteFD, nil, 0, conn.LocalAddr()) + + if ret != -1 { + return fmt.Errorf("sendto after ICMP error succeeded unexpectedly, expected (%[1]d) %[1]v", wantErrno) + } + if err != wantErrno { + return fmt.Errorf("sendto after ICMP error resulted in error (%[1]d) %[1]v, expected (%[2]d) %[2]v", err, wantErrno) + } + } + + dut.SendTo(remoteFD, nil, 0, conn.LocalAddr()) + if _, err := conn.Expect(tb.UDP{}, time.Second); err != nil { + return fmt.Errorf("did not receive UDP packet as expected: %s", err) + } + return nil +} + +func testSockOpt(_ context.Context, dut *tb.DUT, conn *tb.UDPIPv4, remoteFD int32, wantErrno syscall.Errno) error { + errno := syscall.Errno(dut.GetSockOptInt(remoteFD, unix.SOL_SOCKET, unix.SO_ERROR)) + if errno != wantErrno { + return fmt.Errorf("SO_ERROR sockopt after ICMP error is (%[1]d) %[1]v, expected (%[2]d) %[2]v", errno, wantErrno) + } + + // Check that after clearing socket error, sending doesn't fail. + dut.SendTo(remoteFD, nil, 0, conn.LocalAddr()) + if _, err := conn.Expect(tb.UDP{}, time.Second); err != nil { + return fmt.Errorf("did not receive UDP packet as expected: %s", err) + } + return nil +} + +type testParameters struct { + connected connected + icmpErr icmpError + wantErrno syscall.Errno + f errorDetectionFunc + fName string +} + +// TestUDPICMPErrorPropagation tests that ICMP PortUnreachable error messages +// destined for a "connected" UDP socket are observable on said socket by: +// 1. causing the next send to fail with ECONNREFUSED, +// 2. causing the next recv to fail with ECONNREFUSED, or +// 3. returning ECONNREFUSED through the SO_ERROR socket option. +func TestUDPICMPErrorPropagation(t *testing.T) { + var testCases []testParameters + for _, c := range []connected{true, false} { + for _, i := range []icmpError{portUnreachable, timeToLiveExceeded} { + e := syscall.Errno(0) + if c && i == portUnreachable { + e = unix.ECONNREFUSED + } + for _, f := range []struct { + name string + f errorDetectionFunc + }{ + {"SendTo", testSendTo}, + {"Recv", testRecv}, + {"SockOpt", testSockOpt}, + } { + testCases = append(testCases, testParameters{c, i, e, f.f, f.name}) + } + } + } + + for _, tt := range testCases { + t.Run(fmt.Sprintf("%s/%s/%s", tt.connected, tt.icmpErr, tt.fName), func(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + + remoteFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) + defer dut.Close(remoteFD) + + conn := tb.NewUDPIPv4(t, tb.UDP{DstPort: &remotePort}, tb.UDP{SrcPort: &remotePort}) + defer conn.Close() + + if tt.connected { + dut.Connect(remoteFD, conn.LocalAddr()) + } + + dut.SendTo(remoteFD, nil, 0, conn.LocalAddr()) + udp, err := conn.Expect(tb.UDP{}, time.Second) + if err != nil { + t.Fatalf("did not receive message from DUT: %s", err) + } + + if tt.icmpErr == timeToLiveExceeded { + ip, ok := udp.Prev().(*tb.IPv4) + if !ok { + t.Fatalf("expected %s to be IPv4", udp.Prev()) + } + *ip.TTL = 1 + // Let serialization recalculate the checksum since we set the + // TTL to 1. + ip.Checksum = nil + + // Note that the ICMP payload is valid in this case because the UDP + // payload is empty. If the UDP payload were not empty, the packet + // length during serialization may not be calculated correctly, + // resulting in a mal-formed packet. + conn.SendIP(tt.icmpErr.ToICMPv4(), ip, udp) + } else { + conn.SendIP(tt.icmpErr.ToICMPv4(), udp.Prev(), udp) + } + + if err := tt.f(context.Background(), &dut, &conn, remoteFD, tt.wantErrno); err != nil { + t.Fatal(err) + } + }) + } +} -- cgit v1.2.3 From ce19497c1c0829af6ba56f0cc68e3a4cb33cf1c8 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Tue, 28 Apr 2020 20:11:43 -0700 Subject: Fix Unix socket permissions. Enforce write permission checks in BoundEndpointAt, which corresponds to the permission checks in Linux (net/unix/af_unix.c:unix_find_other). Also, create bound socket files with the correct permissions in VFS2. Fixes #2324. PiperOrigin-RevId: 308949084 --- pkg/sentry/fsimpl/ext/filesystem.go | 5 ++- pkg/sentry/fsimpl/kernfs/filesystem.go | 5 ++- pkg/sentry/fsimpl/tmpfs/filesystem.go | 3 ++ pkg/sentry/socket/unix/unix.go | 5 ++- pkg/sentry/socket/unix/unix_vfs2.go | 12 ++++--- pkg/sentry/vfs/anonfs.go | 3 ++ pkg/sentry/vfs/filesystem.go | 8 ++++- test/syscalls/linux/BUILD | 1 + test/syscalls/linux/socket.cc | 61 ++++++++++++++++++++++++++++++++++ 9 files changed, 94 insertions(+), 9 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fsimpl/ext/filesystem.go b/pkg/sentry/fsimpl/ext/filesystem.go index 6c882d0b6..77b644275 100644 --- a/pkg/sentry/fsimpl/ext/filesystem.go +++ b/pkg/sentry/fsimpl/ext/filesystem.go @@ -486,10 +486,13 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error // BoundEndpointAt implements FilesystemImpl.BoundEndpointAt. func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) { - _, _, err := fs.walk(rp, false) + _, inode, err := fs.walk(rp, false) if err != nil { return nil, err } + if err := inode.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil { + return nil, err + } // TODO(b/134676337): Support sockets. return nil, syserror.ECONNREFUSED diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index 40c1b2104..4a12ae245 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -768,12 +768,15 @@ func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error // BoundEndpointAt implements FilesystemImpl.BoundEndpointAt. func (fs *Filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) { fs.mu.RLock() - _, _, err := fs.walkExistingLocked(ctx, rp) + _, inode, err := fs.walkExistingLocked(ctx, rp) fs.mu.RUnlock() fs.processDeferredDecRefs() if err != nil { return nil, err } + if err := inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite); err != nil { + return nil, err + } return nil, syserror.ECONNREFUSED } diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 388b98bef..36ffcb592 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -704,6 +704,9 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath if err != nil { return nil, err } + if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil { + return nil, err + } switch impl := d.inode.impl.(type) { case *socketFile: return impl.ep, nil diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index ddd0eda4b..5b29e9d7f 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -323,7 +323,10 @@ func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { // Create the socket. // - // TODO(gvisor.dev/issue/2324): Correctly set file permissions. + // Note that the file permissions here are not set correctly (see + // gvisor.dev/issue/2324). There is no convenient way to get permissions + // on the socket referred to by s, so we will leave this discrepancy + // unresolved until VFS2 replaces this code. childDir, err := d.Bind(t, t.FSContext().RootDirectory(), name, bep, fs.FilePermissions{User: fs.PermMask{Read: true}}) if err != nil { return syserr.ErrPortInUse diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go index 433cde9cb..23db93f33 100644 --- a/pkg/sentry/socket/unix/unix_vfs2.go +++ b/pkg/sentry/socket/unix/unix_vfs2.go @@ -197,11 +197,13 @@ func (s *SocketVFS2) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { Start: start, Path: path, } - err := t.Kernel().VFS().MknodAt(t, t.Credentials(), &pop, &vfs.MknodOptions{ - // TODO(gvisor.dev/issue/2324): The file permissions should be taken - // from s and t.FSContext().Umask() (see net/unix/af_unix.c:unix_bind), - // but VFS1 just always uses 0400. Resolve this inconsistency. - Mode: linux.S_IFSOCK | 0400, + stat, err := s.vfsfd.Stat(t, vfs.StatOptions{Mask: linux.STATX_MODE}) + if err != nil { + return syserr.FromError(err) + } + err = t.Kernel().VFS().MknodAt(t, t.Credentials(), &pop, &vfs.MknodOptions{ + // File permissions correspond to net/unix/af_unix.c:unix_bind. + Mode: linux.FileMode(linux.S_IFSOCK | uint(stat.Mode)&^t.FSContext().Umask()), Endpoint: bep, }) if err == syserror.EEXIST { diff --git a/pkg/sentry/vfs/anonfs.go b/pkg/sentry/vfs/anonfs.go index b1a998590..981bd8caa 100644 --- a/pkg/sentry/vfs/anonfs.go +++ b/pkg/sentry/vfs/anonfs.go @@ -241,6 +241,9 @@ func (fs *anonFilesystem) BoundEndpointAt(ctx context.Context, rp *ResolvingPath if !rp.Final() { return nil, syserror.ENOTDIR } + if err := GenericCheckPermissions(rp.Credentials(), MayWrite, anonFileMode, anonFileUID, anonFileGID); err != nil { + return nil, err + } return nil, syserror.ECONNREFUSED } diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go index 70385a21f..1edd584c9 100644 --- a/pkg/sentry/vfs/filesystem.go +++ b/pkg/sentry/vfs/filesystem.go @@ -494,7 +494,13 @@ type FilesystemImpl interface { // BoundEndpointAt returns the Unix socket endpoint bound at the path rp. // - // - If a non-socket file exists at rp, then BoundEndpointAt returns ECONNREFUSED. + // Errors: + // + // - If the file does not have write permissions, then BoundEndpointAt + // returns EACCES. + // + // - If a non-socket file exists at rp, then BoundEndpointAt returns + // ECONNREFUSED. BoundEndpointAt(ctx context.Context, rp *ResolvingPath, opts BoundEndpointOptions) (transport.BoundEndpoint, error) // PrependPath prepends a path from vd to vd.Mount().Root() to b. diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index d9095c95f..837e56042 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -365,6 +365,7 @@ cc_binary( ":socket_test_util", "//test/util:file_descriptor", gtest, + "//test/util:temp_umask", "//test/util:test_main", "//test/util:test_util", ], diff --git a/test/syscalls/linux/socket.cc b/test/syscalls/linux/socket.cc index 3a07ac8d2..703d594a2 100644 --- a/test/syscalls/linux/socket.cc +++ b/test/syscalls/linux/socket.cc @@ -13,11 +13,14 @@ // limitations under the License. #include +#include +#include #include #include "gtest/gtest.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/util/file_descriptor.h" +#include "test/util/temp_umask.h" #include "test/util/test_util.h" namespace gvisor { @@ -58,11 +61,69 @@ TEST(SocketTest, ProtocolInet) { } } +TEST(SocketTest, UnixSocketFileMode) { + // TODO(gvisor.dev/issue/1624): Re-enable this test once VFS1 is deleted. It + // should pass in VFS2. + SKIP_IF(IsRunningOnGvisor()); + + FileDescriptor bound = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_STREAM, PF_UNIX)); + + // The permissions of the file created with bind(2) should be defined by the + // permissions of the bound socket and the umask. + mode_t sock_perm = 0765, mask = 0123; + ASSERT_THAT(fchmod(bound.get(), sock_perm), SyscallSucceeds()); + TempUmask m(mask); + + struct sockaddr_un addr = + ASSERT_NO_ERRNO_AND_VALUE(UniqueUnixAddr(/*abstract=*/false, AF_UNIX)); + ASSERT_THAT(bind(bound.get(), reinterpret_cast(&addr), + sizeof(addr)), + SyscallSucceeds()); + + struct stat statbuf = {}; + ASSERT_THAT(stat(addr.sun_path, &statbuf), SyscallSucceeds()); + EXPECT_EQ(statbuf.st_mode, S_IFSOCK | sock_perm & ~mask); +} + +TEST(SocketTest, UnixConnectNeedsWritePerm) { + // TODO(gvisor.dev/issue/1624): Re-enable this test once VFS1 is deleted. It + // should succeed in VFS2. + SKIP_IF(IsRunningOnGvisor()); + + FileDescriptor bound = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_STREAM, PF_UNIX)); + + struct sockaddr_un addr = + ASSERT_NO_ERRNO_AND_VALUE(UniqueUnixAddr(/*abstract=*/false, AF_UNIX)); + ASSERT_THAT(bind(bound.get(), reinterpret_cast(&addr), + sizeof(addr)), + SyscallSucceeds()); + ASSERT_THAT(listen(bound.get(), 1), SyscallSucceeds()); + + // Connect should fail without write perms. + ASSERT_THAT(chmod(addr.sun_path, 0500), SyscallSucceeds()); + FileDescriptor client = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_STREAM, PF_UNIX)); + EXPECT_THAT(connect(client.get(), reinterpret_cast(&addr), + sizeof(addr)), + SyscallFailsWithErrno(EACCES)); + + // Connect should succeed with write perms. + ASSERT_THAT(chmod(addr.sun_path, 0200), SyscallSucceeds()); + EXPECT_THAT(connect(client.get(), reinterpret_cast(&addr), + sizeof(addr)), + SyscallSucceeds()); +} + using SocketOpenTest = ::testing::TestWithParam; // UDS cannot be opened. TEST_P(SocketOpenTest, Unix) { // FIXME(b/142001530): Open incorrectly succeeds on gVisor. + // + // TODO(gvisor.dev/issue/1624): Re-enable this test once VFS1 is deleted. It + // should succeed in VFS2. SKIP_IF(IsRunningOnGvisor()); FileDescriptor bound = -- cgit v1.2.3 From 4875cda8d1f237b9d552468592d036ea475e4f8a Mon Sep 17 00:00:00 2001 From: Zeling Feng Date: Thu, 30 Apr 2020 07:39:24 -0700 Subject: Make tcp_close_wait_ack_test more accurate Previously the test used an out-dated window size which is advertised during the handshake to generate testing packets, but the window size has changed since the handshake; currently it is using the most recent one which is advertised in DUT's ACK to our FIN packet to generate the testing outside-the-window packets. PiperOrigin-RevId: 309222921 --- test/packetimpact/tests/tcp_close_wait_ack_test.go | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'test') diff --git a/test/packetimpact/tests/tcp_close_wait_ack_test.go b/test/packetimpact/tests/tcp_close_wait_ack_test.go index eb4cc7a65..153ce285b 100644 --- a/test/packetimpact/tests/tcp_close_wait_ack_test.go +++ b/test/packetimpact/tests/tcp_close_wait_ack_test.go @@ -28,7 +28,7 @@ import ( func TestCloseWaitAck(t *testing.T) { for _, tt := range []struct { description string - makeTestingTCP func(conn *tb.TCPIPv4, seqNumOffset seqnum.Size) tb.TCP + makeTestingTCP func(conn *tb.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) tb.TCP seqNumOffset seqnum.Size expectAck bool }{ @@ -52,12 +52,14 @@ func TestCloseWaitAck(t *testing.T) { // Send a FIN to DUT to intiate the active close conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagFin)}) - if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, time.Second); err != nil { + gotTCP, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, time.Second) + if err != nil { t.Fatalf("expected an ACK for our fin and DUT should enter CLOSE_WAIT: %s", err) } + windowSize := seqnum.Size(*gotTCP.WindowSize) // Send a segment with OTW Seq / unacc ACK and expect an ACK back - conn.Send(tt.makeTestingTCP(&conn, tt.seqNumOffset), &tb.Payload{Bytes: []byte("Sample Data")}) + conn.Send(tt.makeTestingTCP(&conn, tt.seqNumOffset, windowSize), &tb.Payload{Bytes: []byte("Sample Data")}) gotAck, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, time.Second) if tt.expectAck && err != nil { t.Fatalf("expected an ack but got none: %s", err) @@ -85,9 +87,8 @@ func TestCloseWaitAck(t *testing.T) { // This generates an segment with seqnum = RCV.NXT + RCV.WND + seqNumOffset, the // generated segment is only acceptable when seqNumOffset is 0, otherwise an ACK // is expected from the receiver. -func GenerateOTWSeqSegment(conn *tb.TCPIPv4, seqNumOffset seqnum.Size) tb.TCP { - windowSize := seqnum.Size(*conn.SynAck().WindowSize) - lastAcceptable := conn.LocalSeqNum().Add(windowSize - 1) +func GenerateOTWSeqSegment(conn *tb.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) tb.TCP { + lastAcceptable := conn.LocalSeqNum().Add(windowSize) otwSeq := uint32(lastAcceptable.Add(seqNumOffset)) return tb.TCP{SeqNum: tb.Uint32(otwSeq), Flags: tb.Uint8(header.TCPFlagAck)} } @@ -95,7 +96,7 @@ func GenerateOTWSeqSegment(conn *tb.TCPIPv4, seqNumOffset seqnum.Size) tb.TCP { // This generates an segment with acknum = SND.NXT + seqNumOffset, the generated // segment is only acceptable when seqNumOffset is 0, otherwise an ACK is // expected from the receiver. -func GenerateUnaccACKSegment(conn *tb.TCPIPv4, seqNumOffset seqnum.Size) tb.TCP { +func GenerateUnaccACKSegment(conn *tb.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) tb.TCP { lastAcceptable := conn.RemoteSeqNum() unaccAck := uint32(lastAcceptable.Add(seqNumOffset)) return tb.TCP{AckNum: tb.Uint32(unaccAck), Flags: tb.Uint8(header.TCPFlagAck)} -- cgit v1.2.3 From e7b8a71156b53f835c8d5ea01bdb1859e740649d Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Fri, 1 May 2020 13:51:10 -0700 Subject: Internal change. PiperOrigin-RevId: 309467878 --- .../tests/udp_icmp_error_propagation_test.go | 252 ++++++++++++--------- 1 file changed, 148 insertions(+), 104 deletions(-) (limited to 'test') diff --git a/test/packetimpact/tests/udp_icmp_error_propagation_test.go b/test/packetimpact/tests/udp_icmp_error_propagation_test.go index 9e4810842..66f64eaa2 100644 --- a/test/packetimpact/tests/udp_icmp_error_propagation_test.go +++ b/test/packetimpact/tests/udp_icmp_error_propagation_test.go @@ -27,9 +27,9 @@ import ( tb "gvisor.dev/gvisor/test/packetimpact/testbench" ) -type connected bool +type connectionMode bool -func (c connected) String() string { +func (c connectionMode) String() string { if c { return "Connected" } @@ -63,147 +63,191 @@ func (e icmpError) ToICMPv4() *tb.ICMPv4 { return nil } -type errorDetectionFunc func(context.Context, *tb.DUT, *tb.UDPIPv4, int32, syscall.Errno) error +type errorDetection struct { + name string + useValidConn bool + f func(context.Context, testData) error +} + +type testData struct { + dut *tb.DUT + conn *tb.UDPIPv4 + remoteFD int32 + remotePort uint16 + cleanFD int32 + cleanPort uint16 + wantErrno syscall.Errno +} -// testRecv tests observing the ICMP error through the recv syscall. -// A packet is sent to the DUT, and if wantErrno is non-zero, then the first -// recv should fail and the second should succeed. Otherwise if wantErrno is -// zero then the first recv should succeed immediately. -func testRecv(ctx context.Context, dut *tb.DUT, conn *tb.UDPIPv4, remoteFD int32, wantErrno syscall.Errno) error { - conn.Send(tb.UDP{}) +// testRecv tests observing the ICMP error through the recv syscall. A packet +// is sent to the DUT, and if wantErrno is non-zero, then the first recv should +// fail and the second should succeed. Otherwise if wantErrno is zero then the +// first recv should succeed immediately. +func testRecv(ctx context.Context, d testData) error { + // Check that receiving on the clean socket works. + d.conn.Send(tb.UDP{DstPort: &d.cleanPort}) + d.dut.Recv(d.cleanFD, 100, 0) - if wantErrno != syscall.Errno(0) { + d.conn.Send(tb.UDP{}) + + if d.wantErrno != syscall.Errno(0) { ctx, cancel := context.WithTimeout(ctx, time.Second) defer cancel() - ret, _, err := dut.RecvWithErrno(ctx, remoteFD, 100, 0) + ret, _, err := d.dut.RecvWithErrno(ctx, d.remoteFD, 100, 0) if ret != -1 { - return fmt.Errorf("recv after ICMP error succeeded unexpectedly, expected (%[1]d) %[1]v", wantErrno) + return fmt.Errorf("recv after ICMP error succeeded unexpectedly, expected (%[1]d) %[1]v", d.wantErrno) } - if err != wantErrno { - return fmt.Errorf("recv after ICMP error resulted in error (%[1]d) %[1]v, expected (%[2]d) %[2]v", err, wantErrno) + if err != d.wantErrno { + return fmt.Errorf("recv after ICMP error resulted in error (%[1]d) %[1]v, expected (%[2]d) %[2]v", err, d.wantErrno) } } - dut.Recv(remoteFD, 100, 0) + d.dut.Recv(d.remoteFD, 100, 0) return nil } -// testSendTo tests observing the ICMP error through the send syscall. -// If wantErrno is non-zero, the first send should fail and a subsequent send +// testSendTo tests observing the ICMP error through the send syscall. If +// wantErrno is non-zero, the first send should fail and a subsequent send // should suceed; while if wantErrno is zero then the first send should just // succeed. -func testSendTo(ctx context.Context, dut *tb.DUT, conn *tb.UDPIPv4, remoteFD int32, wantErrno syscall.Errno) error { - if wantErrno != syscall.Errno(0) { +func testSendTo(ctx context.Context, d testData) error { + // Check that sending on the clean socket works. + d.dut.SendTo(d.cleanFD, nil, 0, d.conn.LocalAddr()) + if _, err := d.conn.Expect(tb.UDP{SrcPort: &d.cleanPort}, time.Second); err != nil { + return fmt.Errorf("did not receive UDP packet from clean socket on DUT: %s", err) + } + + if d.wantErrno != syscall.Errno(0) { ctx, cancel := context.WithTimeout(ctx, time.Second) defer cancel() - ret, err := dut.SendToWithErrno(ctx, remoteFD, nil, 0, conn.LocalAddr()) + ret, err := d.dut.SendToWithErrno(ctx, d.remoteFD, nil, 0, d.conn.LocalAddr()) if ret != -1 { - return fmt.Errorf("sendto after ICMP error succeeded unexpectedly, expected (%[1]d) %[1]v", wantErrno) + return fmt.Errorf("sendto after ICMP error succeeded unexpectedly, expected (%[1]d) %[1]v", d.wantErrno) } - if err != wantErrno { - return fmt.Errorf("sendto after ICMP error resulted in error (%[1]d) %[1]v, expected (%[2]d) %[2]v", err, wantErrno) + if err != d.wantErrno { + return fmt.Errorf("sendto after ICMP error resulted in error (%[1]d) %[1]v, expected (%[2]d) %[2]v", err, d.wantErrno) } } - dut.SendTo(remoteFD, nil, 0, conn.LocalAddr()) - if _, err := conn.Expect(tb.UDP{}, time.Second); err != nil { + d.dut.SendTo(d.remoteFD, nil, 0, d.conn.LocalAddr()) + if _, err := d.conn.Expect(tb.UDP{}, time.Second); err != nil { return fmt.Errorf("did not receive UDP packet as expected: %s", err) } return nil } -func testSockOpt(_ context.Context, dut *tb.DUT, conn *tb.UDPIPv4, remoteFD int32, wantErrno syscall.Errno) error { - errno := syscall.Errno(dut.GetSockOptInt(remoteFD, unix.SOL_SOCKET, unix.SO_ERROR)) - if errno != wantErrno { - return fmt.Errorf("SO_ERROR sockopt after ICMP error is (%[1]d) %[1]v, expected (%[2]d) %[2]v", errno, wantErrno) +func testSockOpt(_ context.Context, d testData) error { + // Check that there's no pending error on the clean socket. + if errno := syscall.Errno(d.dut.GetSockOptInt(d.cleanFD, unix.SOL_SOCKET, unix.SO_ERROR)); errno != syscall.Errno(0) { + return fmt.Errorf("unexpected error (%[1]d) %[1]v on clean socket", errno) + } + + if errno := syscall.Errno(d.dut.GetSockOptInt(d.remoteFD, unix.SOL_SOCKET, unix.SO_ERROR)); errno != d.wantErrno { + return fmt.Errorf("SO_ERROR sockopt after ICMP error is (%[1]d) %[1]v, expected (%[2]d) %[2]v", errno, d.wantErrno) } // Check that after clearing socket error, sending doesn't fail. - dut.SendTo(remoteFD, nil, 0, conn.LocalAddr()) - if _, err := conn.Expect(tb.UDP{}, time.Second); err != nil { + d.dut.SendTo(d.remoteFD, nil, 0, d.conn.LocalAddr()) + if _, err := d.conn.Expect(tb.UDP{}, time.Second); err != nil { return fmt.Errorf("did not receive UDP packet as expected: %s", err) } return nil } -type testParameters struct { - connected connected - icmpErr icmpError - wantErrno syscall.Errno - f errorDetectionFunc - fName string -} - -// TestUDPICMPErrorPropagation tests that ICMP PortUnreachable error messages -// destined for a "connected" UDP socket are observable on said socket by: -// 1. causing the next send to fail with ECONNREFUSED, -// 2. causing the next recv to fail with ECONNREFUSED, or -// 3. returning ECONNREFUSED through the SO_ERROR socket option. +// TestUDPICMPErrorPropagation tests that ICMP error messages in response to +// UDP datagrams are processed correctly. RFC 1122 section 4.1.3.3 states that: +// "UDP MUST pass to the application layer all ICMP error messages that it +// receives from the IP layer." +// +// The test cases are parametrized in 3 dimensions: 1. the UDP socket is either +// put into connection mode or left connectionless, 2. the ICMP message type +// and code, and 3. the method by which the ICMP error is observed on the +// socket: sendto, recv, or getsockopt(SO_ERROR). +// +// Linux's udp(7) man page states: "All fatal errors will be passed to the user +// as an error return even when the socket is not connected. This includes +// asynchronous errors received from the network." In practice, the only +// combination of parameters to the test that causes an error to be observable +// on the UDP socket is receiving a port unreachable message on a connected +// socket. func TestUDPICMPErrorPropagation(t *testing.T) { - var testCases []testParameters - for _, c := range []connected{true, false} { - for _, i := range []icmpError{portUnreachable, timeToLiveExceeded} { - e := syscall.Errno(0) - if c && i == portUnreachable { - e = unix.ECONNREFUSED + for _, connect := range []connectionMode{true, false} { + for _, icmpErr := range []icmpError{portUnreachable, timeToLiveExceeded} { + wantErrno := syscall.Errno(0) + if connect && icmpErr == portUnreachable { + wantErrno = unix.ECONNREFUSED } - for _, f := range []struct { - name string - f errorDetectionFunc - }{ - {"SendTo", testSendTo}, - {"Recv", testRecv}, - {"SockOpt", testSockOpt}, + for _, errDetect := range []errorDetection{ + errorDetection{"SendTo", false, testSendTo}, + // Send to an address that's different from the one that caused an ICMP + // error to be returned. + errorDetection{"SendToValid", true, testSendTo}, + errorDetection{"Recv", false, testRecv}, + errorDetection{"SockOpt", false, testSockOpt}, } { - testCases = append(testCases, testParameters{c, i, e, f.f, f.name}) + t.Run(fmt.Sprintf("%s/%s/%s", connect, icmpErr, errDetect.name), func(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + + remoteFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) + defer dut.Close(remoteFD) + + // Create a second, clean socket on the DUT to ensure that the ICMP + // error messages only affect the sockets they are intended for. + cleanFD, cleanPort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) + defer dut.Close(cleanFD) + + conn := tb.NewUDPIPv4(t, tb.UDP{DstPort: &remotePort}, tb.UDP{SrcPort: &remotePort}) + defer conn.Close() + + if connect { + dut.Connect(remoteFD, conn.LocalAddr()) + dut.Connect(cleanFD, conn.LocalAddr()) + } + + dut.SendTo(remoteFD, nil, 0, conn.LocalAddr()) + udp, err := conn.Expect(tb.UDP{}, time.Second) + if err != nil { + t.Fatalf("did not receive message from DUT: %s", err) + } + + if icmpErr == timeToLiveExceeded { + ip, ok := udp.Prev().(*tb.IPv4) + if !ok { + t.Fatalf("expected %s to be IPv4", udp.Prev()) + } + *ip.TTL = 1 + // Let serialization recalculate the checksum since we set the TTL + // to 1. + ip.Checksum = nil + + // Note that the ICMP payload is valid in this case because the UDP + // payload is empty. If the UDP payload were not empty, the packet + // length during serialization may not be calculated correctly, + // resulting in a mal-formed packet. + conn.SendIP(icmpErr.ToICMPv4(), ip, udp) + } else { + conn.SendIP(icmpErr.ToICMPv4(), udp.Prev(), udp) + } + + errDetectConn := &conn + if errDetect.useValidConn { + // connClean is a UDP socket on the test runner that was not + // involved in the generation of the ICMP error. As such, + // interactions between it and the the DUT should be independent of + // the ICMP error at least at the port level. + connClean := tb.NewUDPIPv4(t, tb.UDP{DstPort: &remotePort}, tb.UDP{SrcPort: &remotePort}) + defer connClean.Close() + + errDetectConn = &connClean + } + + if err := errDetect.f(context.Background(), testData{&dut, errDetectConn, remoteFD, remotePort, cleanFD, cleanPort, wantErrno}); err != nil { + t.Fatal(err) + } + }) } } } - - for _, tt := range testCases { - t.Run(fmt.Sprintf("%s/%s/%s", tt.connected, tt.icmpErr, tt.fName), func(t *testing.T) { - dut := tb.NewDUT(t) - defer dut.TearDown() - - remoteFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) - defer dut.Close(remoteFD) - - conn := tb.NewUDPIPv4(t, tb.UDP{DstPort: &remotePort}, tb.UDP{SrcPort: &remotePort}) - defer conn.Close() - - if tt.connected { - dut.Connect(remoteFD, conn.LocalAddr()) - } - - dut.SendTo(remoteFD, nil, 0, conn.LocalAddr()) - udp, err := conn.Expect(tb.UDP{}, time.Second) - if err != nil { - t.Fatalf("did not receive message from DUT: %s", err) - } - - if tt.icmpErr == timeToLiveExceeded { - ip, ok := udp.Prev().(*tb.IPv4) - if !ok { - t.Fatalf("expected %s to be IPv4", udp.Prev()) - } - *ip.TTL = 1 - // Let serialization recalculate the checksum since we set the - // TTL to 1. - ip.Checksum = nil - - // Note that the ICMP payload is valid in this case because the UDP - // payload is empty. If the UDP payload were not empty, the packet - // length during serialization may not be calculated correctly, - // resulting in a mal-formed packet. - conn.SendIP(tt.icmpErr.ToICMPv4(), ip, udp) - } else { - conn.SendIP(tt.icmpErr.ToICMPv4(), udp.Prev(), udp) - } - - if err := tt.f(context.Background(), &dut, &conn, remoteFD, tt.wantErrno); err != nil { - t.Fatal(err) - } - }) - } } -- cgit v1.2.3 From b660f16d18827f0310594c80d9387de11430f15f Mon Sep 17 00:00:00 2001 From: Nayana Bidari Date: Fri, 27 Mar 2020 12:18:45 -0700 Subject: Support for connection tracking of TCP packets. Connection tracking is used to track packets in prerouting and output hooks of iptables. The NAT rules modify the tuples in connections. The connection tracking code modifies the packets by looking at the modified tuples. --- pkg/sentry/socket/netfilter/netfilter.go | 14 +- pkg/sentry/socket/netfilter/targets.go | 3 +- pkg/sentry/socket/netfilter/tcp_matcher.go | 17 +- pkg/sentry/socket/netfilter/udp_matcher.go | 17 +- pkg/tcpip/header/tcp.go | 17 + pkg/tcpip/network/ipv4/ipv4.go | 46 ++- pkg/tcpip/stack/BUILD | 2 + pkg/tcpip/stack/conntrack.go | 480 ++++++++++++++++++++++ pkg/tcpip/stack/iptables.go | 51 ++- pkg/tcpip/stack/iptables_targets.go | 115 +++--- pkg/tcpip/stack/iptables_types.go | 4 +- pkg/tcpip/stack/nic.go | 4 +- pkg/tcpip/stack/packet_buffer.go | 4 + pkg/tcpip/stack/route.go | 13 + pkg/tcpip/stack/stack.go | 19 + pkg/tcpip/transport/tcp/BUILD | 2 +- pkg/tcpip/transport/tcp/rcv.go | 19 +- pkg/tcpip/transport/tcp/rcv_test.go | 6 +- pkg/tcpip/transport/tcpconntrack/BUILD | 1 - pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go | 16 +- test/iptables/filter_output.go | 2 +- test/iptables/iptables_test.go | 66 ++- test/iptables/iptables_util.go | 2 +- test/iptables/nat.go | 103 ++++- 24 files changed, 858 insertions(+), 165 deletions(-) create mode 100644 pkg/tcpip/stack/conntrack.go (limited to 'test') diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index 72d093aa8..40736fb38 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -251,7 +251,7 @@ func marshalTarget(target stack.Target) []byte { case stack.ReturnTarget: return marshalStandardTarget(stack.RuleReturn) case stack.RedirectTarget: - return marshalRedirectTarget() + return marshalRedirectTarget(tg) case JumpTarget: return marshalJumpTarget(tg) default: @@ -288,7 +288,7 @@ func marshalErrorTarget(errorName string) []byte { return binary.Marshal(ret, usermem.ByteOrder, target) } -func marshalRedirectTarget() []byte { +func marshalRedirectTarget(rt stack.RedirectTarget) []byte { // This is a redirect target named redirect target := linux.XTRedirectTarget{ Target: linux.XTEntryTarget{ @@ -298,6 +298,16 @@ func marshalRedirectTarget() []byte { copy(target.Target.Name[:], redirectTargetName) ret := make([]byte, 0, linux.SizeOfXTRedirectTarget) + target.NfRange.RangeSize = 1 + if rt.RangeProtoSpecified { + target.NfRange.RangeIPV4.Flags |= linux.NF_NAT_RANGE_PROTO_SPECIFIED + } + // Convert port from little endian to big endian. + port := make([]byte, 2) + binary.LittleEndian.PutUint16(port, rt.MinPort) + target.NfRange.RangeIPV4.MinPort = binary.BigEndian.Uint16(port) + binary.LittleEndian.PutUint16(port, rt.MaxPort) + target.NfRange.RangeIPV4.MaxPort = binary.BigEndian.Uint16(port) return binary.Marshal(ret, usermem.ByteOrder, target) } diff --git a/pkg/sentry/socket/netfilter/targets.go b/pkg/sentry/socket/netfilter/targets.go index c948de876..84abe8d29 100644 --- a/pkg/sentry/socket/netfilter/targets.go +++ b/pkg/sentry/socket/netfilter/targets.go @@ -15,6 +15,7 @@ package netfilter import ( + "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/stack" ) @@ -29,6 +30,6 @@ type JumpTarget struct { } // Action implements stack.Target.Action. -func (jt JumpTarget) Action(stack.PacketBuffer) (stack.RuleVerdict, int) { +func (jt JumpTarget) Action(*stack.PacketBuffer, *stack.ConnTrackTable, stack.Hook, *stack.GSO, *stack.Route, tcpip.Address) (stack.RuleVerdict, int) { return stack.RuleJump, jt.RuleNum } diff --git a/pkg/sentry/socket/netfilter/tcp_matcher.go b/pkg/sentry/socket/netfilter/tcp_matcher.go index 55c0f04f3..57a1e1c12 100644 --- a/pkg/sentry/socket/netfilter/tcp_matcher.go +++ b/pkg/sentry/socket/netfilter/tcp_matcher.go @@ -120,14 +120,27 @@ func (tm *TCPMatcher) Match(hook stack.Hook, pkt stack.PacketBuffer, interfaceNa if pkt.TransportHeader != nil { tcpHeader = header.TCP(pkt.TransportHeader) } else { + var length int + if hook == stack.Prerouting { + // The network header hasn't been parsed yet. We have to do it here. + hdr, ok := pkt.Data.PullUp(header.IPv4MinimumSize) + if !ok { + // There's no valid TCP header here, so we hotdrop the + // packet. + return false, true + } + h := header.IPv4(hdr) + pkt.NetworkHeader = hdr + length = int(h.HeaderLength()) + } // The TCP header hasn't been parsed yet. We have to do it here. - hdr, ok := pkt.Data.PullUp(header.TCPMinimumSize) + hdr, ok := pkt.Data.PullUp(length + header.TCPMinimumSize) if !ok { // There's no valid TCP header here, so we hotdrop the // packet. return false, true } - tcpHeader = header.TCP(hdr) + tcpHeader = header.TCP(hdr[length:]) } // Check whether the source and destination ports are within the diff --git a/pkg/sentry/socket/netfilter/udp_matcher.go b/pkg/sentry/socket/netfilter/udp_matcher.go index 04d03d494..cfa9e621d 100644 --- a/pkg/sentry/socket/netfilter/udp_matcher.go +++ b/pkg/sentry/socket/netfilter/udp_matcher.go @@ -119,14 +119,27 @@ func (um *UDPMatcher) Match(hook stack.Hook, pkt stack.PacketBuffer, interfaceNa if pkt.TransportHeader != nil { udpHeader = header.UDP(pkt.TransportHeader) } else { + var length int + if hook == stack.Prerouting { + // The network header hasn't been parsed yet. We have to do it here. + hdr, ok := pkt.Data.PullUp(header.IPv4MinimumSize) + if !ok { + // There's no valid UDP header here, so we hotdrop the + // packet. + return false, true + } + h := header.IPv4(hdr) + pkt.NetworkHeader = hdr + length = int(h.HeaderLength()) + } // The UDP header hasn't been parsed yet. We have to do it here. - hdr, ok := pkt.Data.PullUp(header.UDPMinimumSize) + hdr, ok := pkt.Data.PullUp(length + header.UDPMinimumSize) if !ok { // There's no valid UDP header here, so we hotdrop the // packet. return false, true } - udpHeader = header.UDP(hdr) + udpHeader = header.UDP(hdr[length:]) } // Check whether the source and destination ports are within the diff --git a/pkg/tcpip/header/tcp.go b/pkg/tcpip/header/tcp.go index 13480687d..21581257b 100644 --- a/pkg/tcpip/header/tcp.go +++ b/pkg/tcpip/header/tcp.go @@ -594,3 +594,20 @@ func AddTCPOptionPadding(options []byte, offset int) int { } return paddingToAdd } + +// Acceptable checks if a segment that starts at segSeq and has length segLen is +// "acceptable" for arriving in a receive window that starts at rcvNxt and ends +// before rcvAcc, according to the table on page 26 and 69 of RFC 793. +func Acceptable(segSeq seqnum.Value, segLen seqnum.Size, rcvNxt, rcvAcc seqnum.Value) bool { + if rcvNxt == rcvAcc { + return segLen == 0 && segSeq == rcvNxt + } + if segLen == 0 { + // rcvWnd is incremented by 1 because that is Linux's behavior despite the + // RFC. + return segSeq.InRange(rcvNxt, rcvAcc.Add(1)) + } + // Page 70 of RFC 793 allows packets that can be made "acceptable" by trimming + // the payload, so we'll accept any payload that overlaps the receieve window. + return rcvNxt.LessThan(segSeq.Add(segLen)) && segSeq.LessThan(rcvAcc) +} diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go index 1d61fddad..9db42b2a4 100644 --- a/pkg/tcpip/network/ipv4/ipv4.go +++ b/pkg/tcpip/network/ipv4/ipv4.go @@ -252,11 +252,31 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.Netw // iptables filtering. All packets that reach here are locally // generated. ipt := e.stack.IPTables() - if ok := ipt.Check(stack.Output, pkt); !ok { + if ok := ipt.Check(stack.Output, &pkt, gso, r, ""); !ok { // iptables is telling us to drop the packet. return nil } + if pkt.NatDone { + // If the packet is manipulated as per NAT Ouput rules, handle packet + // based on destination address and do not send the packet to link layer. + netHeader := header.IPv4(pkt.NetworkHeader) + ep, err := e.stack.FindNetworkEndpoint(header.IPv4ProtocolNumber, netHeader.DestinationAddress()) + if err == nil { + src := netHeader.SourceAddress() + dst := netHeader.DestinationAddress() + route := r.ReverseRoute(src, dst) + + views := make([]buffer.View, 1, 1+len(pkt.Data.Views())) + views[0] = pkt.Header.View() + views = append(views, pkt.Data.Views()...) + packet := stack.PacketBuffer{ + Data: buffer.NewVectorisedView(len(views[0])+pkt.Data.Size(), views)} + ep.HandlePacket(&route, packet) + return nil + } + } + if r.Loop&stack.PacketLoop != 0 { // The inbound path expects the network header to still be in // the PacketBuffer's Data field. @@ -302,8 +322,8 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe // iptables filtering. All packets that reach here are locally // generated. ipt := e.stack.IPTables() - dropped := ipt.CheckPackets(stack.Output, pkts) - if len(dropped) == 0 { + dropped, natPkts := ipt.CheckPackets(stack.Output, pkts, gso, r) + if len(dropped) == 0 && len(natPkts) == 0 { // Fast path: If no packets are to be dropped then we can just invoke the // faster WritePackets API directly. n, err := e.linkEP.WritePackets(r, gso, pkts, ProtocolNumber) @@ -318,6 +338,24 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe if _, ok := dropped[pkt]; ok { continue } + if _, ok := natPkts[pkt]; ok { + netHeader := header.IPv4(pkt.NetworkHeader) + ep, err := e.stack.FindNetworkEndpoint(header.IPv4ProtocolNumber, netHeader.DestinationAddress()) + if err == nil { + src := netHeader.SourceAddress() + dst := netHeader.DestinationAddress() + route := r.ReverseRoute(src, dst) + + views := make([]buffer.View, 1, 1+len(pkt.Data.Views())) + views[0] = pkt.Header.View() + views = append(views, pkt.Data.Views()...) + packet := stack.PacketBuffer{ + Data: buffer.NewVectorisedView(len(views[0])+pkt.Data.Size(), views)} + ep.HandlePacket(&route, packet) + n++ + continue + } + } if err := e.linkEP.WritePacket(r, gso, ProtocolNumber, *pkt); err != nil { r.Stats().IP.PacketsSent.IncrementBy(uint64(n)) return n, err @@ -407,7 +445,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt stack.PacketBuffer) { // iptables filtering. All packets that reach here are intended for // this machine and will not be forwarded. ipt := e.stack.IPTables() - if ok := ipt.Check(stack.Input, pkt); !ok { + if ok := ipt.Check(stack.Input, &pkt, nil, nil, ""); !ok { // iptables is telling us to drop the packet. return } diff --git a/pkg/tcpip/stack/BUILD b/pkg/tcpip/stack/BUILD index 5e963a4af..f71073207 100644 --- a/pkg/tcpip/stack/BUILD +++ b/pkg/tcpip/stack/BUILD @@ -30,6 +30,7 @@ go_template_instance( go_library( name = "stack", srcs = [ + "conntrack.go", "dhcpv6configurationfromndpra_string.go", "forwarder.go", "icmp_rate_limit.go", @@ -62,6 +63,7 @@ go_library( "//pkg/tcpip/header", "//pkg/tcpip/ports", "//pkg/tcpip/seqnum", + "//pkg/tcpip/transport/tcpconntrack", "//pkg/waiter", "@org_golang_x_time//rate:go_default_library", ], diff --git a/pkg/tcpip/stack/conntrack.go b/pkg/tcpip/stack/conntrack.go new file mode 100644 index 000000000..7d1ede1f2 --- /dev/null +++ b/pkg/tcpip/stack/conntrack.go @@ -0,0 +1,480 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stack + +import ( + "encoding/binary" + "sync" + "time" + + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/hash/jenkins" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcpconntrack" +) + +// Connection tracking is used to track and manipulate packets for NAT rules. +// The connection is created for a packet if it does not exist. Every connection +// contains two tuples (original and reply). The tuples are manipulated if there +// is a matching NAT rule. The packet is modified by looking at the tuples in the +// Prerouting and Output hooks. + +// Direction of the tuple. +type ctDirection int + +const ( + dirOriginal ctDirection = iota + dirReply +) + +// Status of connection. +// TODO(gvisor.dev/issue/170): Add other states of connection. +type connStatus int + +const ( + connNew connStatus = iota + connEstablished +) + +// Manipulation type for the connection. +type manipType int + +const ( + manipDstPrerouting manipType = iota + manipDstOutput +) + +// connTrackMutable is the manipulatable part of the tuple. +type connTrackMutable struct { + // addr is source address of the tuple. + addr tcpip.Address + + // port is source port of the tuple. + port uint16 + + // protocol is network layer protocol. + protocol tcpip.NetworkProtocolNumber +} + +// connTrackImmutable is the non-manipulatable part of the tuple. +type connTrackImmutable struct { + // addr is destination address of the tuple. + addr tcpip.Address + + // direction is direction (original or reply) of the tuple. + direction ctDirection + + // port is destination port of the tuple. + port uint16 + + // protocol is transport layer protocol. + protocol tcpip.TransportProtocolNumber +} + +// connTrackTuple represents the tuple which is created from the +// packet. +type connTrackTuple struct { + // dst is non-manipulatable part of the tuple. + dst connTrackImmutable + + // src is manipulatable part of the tuple. + src connTrackMutable +} + +// connTrackTupleHolder is the container of tuple and connection. +type ConnTrackTupleHolder struct { + // conn is pointer to the connection tracking entry. + conn *connTrack + + // tuple is original or reply tuple. + tuple connTrackTuple +} + +// connTrack is the connection. +type connTrack struct { + // originalTupleHolder contains tuple in original direction. + originalTupleHolder ConnTrackTupleHolder + + // replyTupleHolder contains tuple in reply direction. + replyTupleHolder ConnTrackTupleHolder + + // status indicates connection is new or established. + status connStatus + + // timeout indicates the time connection should be active. + timeout time.Duration + + // manip indicates if the packet should be manipulated. + manip manipType + + // tcb is TCB control block. It is used to keep track of states + // of tcp connection. + tcb tcpconntrack.TCB + + // tcbHook indicates if the packet is inbound or outbound to + // update the state of tcb. + tcbHook Hook +} + +// ConnTrackTable contains a map of all existing connections created for +// NAT rules. +type ConnTrackTable struct { + // connMu protects connTrackTable. + connMu sync.RWMutex + + // connTrackTable maintains a map of tuples needed for connection tracking + // for iptables NAT rules. The key for the map is an integer calculated + // using seed, source address, destination address, source port and + // destination port. + CtMap map[uint32]ConnTrackTupleHolder + + // seed is a one-time random value initialized at stack startup + // and is used in calculation of hash key for connection tracking + // table. + Seed uint32 +} + +// parseHeaders sets headers in the packet. +func parseHeaders(pkt *PacketBuffer) { + newPkt := pkt.Clone() + + // Set network header. + hdr, ok := newPkt.Data.PullUp(header.IPv4MinimumSize) + if !ok { + return + } + netHeader := header.IPv4(hdr) + newPkt.NetworkHeader = hdr + length := int(netHeader.HeaderLength()) + + // TODO(gvisor.dev/issue/170): Need to support for other + // protocols as well. + // Set transport header. + switch protocol := netHeader.TransportProtocol(); protocol { + case header.UDPProtocolNumber: + if newPkt.TransportHeader == nil { + h, ok := newPkt.Data.PullUp(length + header.UDPMinimumSize) + if !ok { + return + } + newPkt.TransportHeader = buffer.View(header.UDP(h[length:])) + } + case header.TCPProtocolNumber: + if newPkt.TransportHeader == nil { + h, ok := newPkt.Data.PullUp(length + header.TCPMinimumSize) + if !ok { + return + } + newPkt.TransportHeader = buffer.View(header.TCP(h[length:])) + } + } + pkt.NetworkHeader = newPkt.NetworkHeader + pkt.TransportHeader = newPkt.TransportHeader +} + +// packetToTuple converts packet to a tuple in original direction. +func packetToTuple(pkt PacketBuffer, hook Hook) (connTrackTuple, *tcpip.Error) { + var tuple connTrackTuple + + netHeader := header.IPv4(pkt.NetworkHeader) + // TODO(gvisor.dev/issue/170): Need to support for other + // protocols as well. + if netHeader == nil || netHeader.TransportProtocol() != header.TCPProtocolNumber { + return tuple, tcpip.ErrUnknownProtocol + } + tcpHeader := header.TCP(pkt.TransportHeader) + if tcpHeader == nil { + return tuple, tcpip.ErrUnknownProtocol + } + + tuple.src.addr = netHeader.SourceAddress() + tuple.src.port = tcpHeader.SourcePort() + tuple.src.protocol = header.IPv4ProtocolNumber + + tuple.dst.addr = netHeader.DestinationAddress() + tuple.dst.port = tcpHeader.DestinationPort() + tuple.dst.protocol = netHeader.TransportProtocol() + + return tuple, nil +} + +// getReplyTuple creates reply tuple for the given tuple. +func getReplyTuple(tuple connTrackTuple) connTrackTuple { + var replyTuple connTrackTuple + replyTuple.src.addr = tuple.dst.addr + replyTuple.src.port = tuple.dst.port + replyTuple.src.protocol = tuple.src.protocol + replyTuple.dst.addr = tuple.src.addr + replyTuple.dst.port = tuple.src.port + replyTuple.dst.protocol = tuple.dst.protocol + replyTuple.dst.direction = dirReply + + return replyTuple +} + +// makeNewConn creates new connection. +func makeNewConn(tuple, replyTuple connTrackTuple) connTrack { + var conn connTrack + conn.status = connNew + conn.originalTupleHolder.tuple = tuple + conn.originalTupleHolder.conn = &conn + conn.replyTupleHolder.tuple = replyTuple + conn.replyTupleHolder.conn = &conn + + return conn +} + +// getTupleHash returns hash of the tuple. The fields used for +// generating hash are seed (generated once for stack), source address, +// destination address, source port and destination ports. +func (ct *ConnTrackTable) getTupleHash(tuple connTrackTuple) uint32 { + h := jenkins.Sum32(ct.Seed) + h.Write([]byte(tuple.src.addr)) + h.Write([]byte(tuple.dst.addr)) + portBuf := make([]byte, 2) + binary.LittleEndian.PutUint16(portBuf, tuple.src.port) + h.Write([]byte(portBuf)) + binary.LittleEndian.PutUint16(portBuf, tuple.dst.port) + h.Write([]byte(portBuf)) + + return h.Sum32() +} + +// connTrackForPacket returns connTrack for packet. +// TODO(gvisor.dev/issue/170): Only TCP packets are supported. Need to support other +// transport protocols. +func (ct *ConnTrackTable) connTrackForPacket(pkt *PacketBuffer, hook Hook, createConn bool) (*connTrack, ctDirection) { + if hook == Prerouting { + // Headers will not be set in Prerouting. + // TODO(gvisor.dev/issue/170): Change this after parsing headers + // code is added. + parseHeaders(pkt) + } + + var dir ctDirection + tuple, err := packetToTuple(*pkt, hook) + if err != nil { + return nil, dir + } + + ct.connMu.Lock() + defer ct.connMu.Unlock() + + connTrackTable := ct.CtMap + hash := ct.getTupleHash(tuple) + + var conn *connTrack + switch createConn { + case true: + // If connection does not exist for the hash, create a new + // connection. + replyTuple := getReplyTuple(tuple) + replyHash := ct.getTupleHash(replyTuple) + newConn := makeNewConn(tuple, replyTuple) + conn = &newConn + + // Add tupleHolders to the map. + // TODO(gvisor.dev/issue/170): Need to support collisions using linked list. + ct.CtMap[hash] = conn.originalTupleHolder + ct.CtMap[replyHash] = conn.replyTupleHolder + default: + tupleHolder, ok := connTrackTable[hash] + if !ok { + return nil, dir + } + + // If this is the reply of new connection, set the connection + // status as ESTABLISHED. + conn = tupleHolder.conn + if conn.status == connNew && tupleHolder.tuple.dst.direction == dirReply { + conn.status = connEstablished + } + if tupleHolder.conn == nil { + panic("tupleHolder has null connection tracking entry") + } + + dir = tupleHolder.tuple.dst.direction + } + return conn, dir +} + +// SetNatInfo will manipulate the tuples according to iptables NAT rules. +func (ct *ConnTrackTable) SetNatInfo(pkt *PacketBuffer, rt RedirectTarget, hook Hook) { + // Get the connection. Connection is always created before this + // function is called. + conn, _ := ct.connTrackForPacket(pkt, hook, false) + if conn == nil { + panic("connection should be created to manipulate tuples.") + } + replyTuple := conn.replyTupleHolder.tuple + replyHash := ct.getTupleHash(replyTuple) + + // TODO(gvisor.dev/issue/170): Support only redirect of ports. Need to + // support changing of address for Prerouting. + + // Change the port as per the iptables rule. This tuple will be used + // to manipulate the packet in HandlePacket. + conn.replyTupleHolder.tuple.src.addr = rt.MinIP + conn.replyTupleHolder.tuple.src.port = rt.MinPort + newHash := ct.getTupleHash(conn.replyTupleHolder.tuple) + + // Add the changed tuple to the map. + ct.connMu.Lock() + defer ct.connMu.Unlock() + ct.CtMap[newHash] = conn.replyTupleHolder + if hook == Output { + conn.replyTupleHolder.conn.manip = manipDstOutput + } + + // Delete the old tuple. + delete(ct.CtMap, replyHash) +} + +// handlePacketPrerouting manipulates ports for packets in Prerouting hook. +// TODO(gvisor.dev/issue/170): Change address for Prerouting hook.. +func handlePacketPrerouting(pkt *PacketBuffer, conn *connTrack, dir ctDirection) { + netHeader := header.IPv4(pkt.NetworkHeader) + tcpHeader := header.TCP(pkt.TransportHeader) + + // For prerouting redirection, packets going in the original direction + // have their destinations modified and replies have their sources + // modified. + switch dir { + case dirOriginal: + port := conn.replyTupleHolder.tuple.src.port + tcpHeader.SetDestinationPort(port) + netHeader.SetDestinationAddress(conn.replyTupleHolder.tuple.src.addr) + case dirReply: + port := conn.originalTupleHolder.tuple.dst.port + tcpHeader.SetSourcePort(port) + netHeader.SetSourceAddress(conn.originalTupleHolder.tuple.dst.addr) + } + + netHeader.SetChecksum(0) + netHeader.SetChecksum(^netHeader.CalculateChecksum()) +} + +// handlePacketOutput manipulates ports for packets in Output hook. +func handlePacketOutput(pkt *PacketBuffer, conn *connTrack, gso *GSO, r *Route, dir ctDirection) { + netHeader := header.IPv4(pkt.NetworkHeader) + tcpHeader := header.TCP(pkt.TransportHeader) + + // For output redirection, packets going in the original direction + // have their destinations modified and replies have their sources + // modified. For prerouting redirection, we only reach this point + // when replying, so packet sources are modified. + if conn.manip == manipDstOutput && dir == dirOriginal { + port := conn.replyTupleHolder.tuple.src.port + tcpHeader.SetDestinationPort(port) + netHeader.SetDestinationAddress(conn.replyTupleHolder.tuple.src.addr) + } else { + port := conn.originalTupleHolder.tuple.dst.port + tcpHeader.SetSourcePort(port) + netHeader.SetSourceAddress(conn.originalTupleHolder.tuple.dst.addr) + } + + // Calculate the TCP checksum and set it. + tcpHeader.SetChecksum(0) + hdr := &pkt.Header + length := uint16(pkt.Data.Size()+hdr.UsedLength()) - uint16(netHeader.HeaderLength()) + xsum := r.PseudoHeaderChecksum(header.TCPProtocolNumber, length) + if gso != nil && gso.NeedsCsum { + tcpHeader.SetChecksum(xsum) + } else if r.Capabilities()&CapabilityTXChecksumOffload == 0 { + xsum = header.ChecksumVVWithOffset(pkt.Data, xsum, int(tcpHeader.DataOffset()), pkt.Data.Size()) + tcpHeader.SetChecksum(^tcpHeader.CalculateChecksum(xsum)) + } + + netHeader.SetChecksum(0) + netHeader.SetChecksum(^netHeader.CalculateChecksum()) +} + +// HandlePacket will manipulate the port and address of the packet if the +// connection exists. +func (ct *ConnTrackTable) HandlePacket(pkt *PacketBuffer, hook Hook, gso *GSO, r *Route) { + if pkt.NatDone { + return + } + + if hook != Prerouting && hook != Output { + return + } + + conn, dir := ct.connTrackForPacket(pkt, hook, false) + // Connection or Rule not found for the packet. + if conn == nil { + return + } + + netHeader := header.IPv4(pkt.NetworkHeader) + // TODO(gvisor.dev/issue/170): Need to support for other transport + // protocols as well. + if netHeader == nil || netHeader.TransportProtocol() != header.TCPProtocolNumber { + return + } + + tcpHeader := header.TCP(pkt.TransportHeader) + if tcpHeader == nil { + return + } + + switch hook { + case Prerouting: + handlePacketPrerouting(pkt, conn, dir) + case Output: + handlePacketOutput(pkt, conn, gso, r, dir) + } + pkt.NatDone = true + + // Update the state of tcb. + // TODO(gvisor.dev/issue/170): Add support in tcpcontrack to handle + // other tcp states. + var st tcpconntrack.Result + if conn.tcb.IsEmpty() { + conn.tcb.Init(tcpHeader) + conn.tcbHook = hook + } else { + switch hook { + case conn.tcbHook: + st = conn.tcb.UpdateStateOutbound(tcpHeader) + default: + st = conn.tcb.UpdateStateInbound(tcpHeader) + } + } + + // Delete conntrack if tcp connection is closed. + if st == tcpconntrack.ResultClosedByPeer || st == tcpconntrack.ResultClosedBySelf || st == tcpconntrack.ResultReset { + ct.deleteConnTrack(conn) + } +} + +// deleteConnTrack deletes the connection. +func (ct *ConnTrackTable) deleteConnTrack(conn *connTrack) { + if conn == nil { + return + } + + tuple := conn.originalTupleHolder.tuple + hash := ct.getTupleHash(tuple) + replyTuple := conn.replyTupleHolder.tuple + replyHash := ct.getTupleHash(replyTuple) + + ct.connMu.Lock() + defer ct.connMu.Unlock() + + delete(ct.CtMap, hash) + delete(ct.CtMap, replyHash) +} diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go index 6b91159d4..7c3c47d50 100644 --- a/pkg/tcpip/stack/iptables.go +++ b/pkg/tcpip/stack/iptables.go @@ -17,6 +17,7 @@ package stack import ( "fmt" + "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" ) @@ -110,6 +111,10 @@ func DefaultTables() IPTables { Prerouting: []string{TablenameMangle, TablenameNat}, Output: []string{TablenameMangle, TablenameNat, TablenameFilter}, }, + connections: ConnTrackTable{ + CtMap: make(map[uint32]ConnTrackTupleHolder), + Seed: generateRandUint32(), + }, } } @@ -173,12 +178,16 @@ const ( // dropped. // // Precondition: pkt.NetworkHeader is set. -func (it *IPTables) Check(hook Hook, pkt PacketBuffer) bool { +func (it *IPTables) Check(hook Hook, pkt *PacketBuffer, gso *GSO, r *Route, address tcpip.Address) bool { + // Packets are manipulated only if connection and matching + // NAT rule exists. + it.connections.HandlePacket(pkt, hook, gso, r) + // Go through each table containing the hook. for _, tablename := range it.Priorities[hook] { table := it.Tables[tablename] ruleIdx := table.BuiltinChains[hook] - switch verdict := it.checkChain(hook, pkt, table, ruleIdx); verdict { + switch verdict := it.checkChain(hook, pkt, table, ruleIdx, gso, r, address); verdict { // If the table returns Accept, move on to the next table. case chainAccept: continue @@ -189,7 +198,7 @@ func (it *IPTables) Check(hook Hook, pkt PacketBuffer) bool { // Any Return from a built-in chain means we have to // call the underflow. underflow := table.Rules[table.Underflows[hook]] - switch v, _ := underflow.Target.Action(pkt); v { + switch v, _ := underflow.Target.Action(pkt, &it.connections, hook, gso, r, address); v { case RuleAccept: continue case RuleDrop: @@ -219,26 +228,34 @@ func (it *IPTables) Check(hook Hook, pkt PacketBuffer) bool { // // NOTE: unlike the Check API the returned map contains packets that should be // dropped. -func (it *IPTables) CheckPackets(hook Hook, pkts PacketBufferList) (drop map[*PacketBuffer]struct{}) { +func (it *IPTables) CheckPackets(hook Hook, pkts PacketBufferList, gso *GSO, r *Route) (drop map[*PacketBuffer]struct{}, natPkts map[*PacketBuffer]struct{}) { for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() { - if ok := it.Check(hook, *pkt); !ok { - if drop == nil { - drop = make(map[*PacketBuffer]struct{}) + if !pkt.NatDone { + if ok := it.Check(hook, pkt, gso, r, ""); !ok { + if drop == nil { + drop = make(map[*PacketBuffer]struct{}) + } + drop[pkt] = struct{}{} + } + if pkt.NatDone { + if natPkts == nil { + natPkts = make(map[*PacketBuffer]struct{}) + } + natPkts[pkt] = struct{}{} } - drop[pkt] = struct{}{} } } - return drop + return drop, natPkts } // Precondition: pkt is a IPv4 packet of at least length header.IPv4MinimumSize. -// TODO(gvisor.dev/issue/170): pk.NetworkHeader will always be set as a +// TODO(gvisor.dev/issue/170): pkt.NetworkHeader will always be set as a // precondition. -func (it *IPTables) checkChain(hook Hook, pkt PacketBuffer, table Table, ruleIdx int) chainVerdict { +func (it *IPTables) checkChain(hook Hook, pkt *PacketBuffer, table Table, ruleIdx int, gso *GSO, r *Route, address tcpip.Address) chainVerdict { // Start from ruleIdx and walk the list of rules until a rule gives us // a verdict. for ruleIdx < len(table.Rules) { - switch verdict, jumpTo := it.checkRule(hook, pkt, table, ruleIdx); verdict { + switch verdict, jumpTo := it.checkRule(hook, pkt, table, ruleIdx, gso, r, address); verdict { case RuleAccept: return chainAccept @@ -255,7 +272,7 @@ func (it *IPTables) checkChain(hook Hook, pkt PacketBuffer, table Table, ruleIdx ruleIdx++ continue } - switch verdict := it.checkChain(hook, pkt, table, jumpTo); verdict { + switch verdict := it.checkChain(hook, pkt, table, jumpTo, gso, r, address); verdict { case chainAccept: return chainAccept case chainDrop: @@ -279,9 +296,9 @@ func (it *IPTables) checkChain(hook Hook, pkt PacketBuffer, table Table, ruleIdx } // Precondition: pkt is a IPv4 packet of at least length header.IPv4MinimumSize. -// TODO(gvisor.dev/issue/170): pk.NetworkHeader will always be set as a +// TODO(gvisor.dev/issue/170): pkt.NetworkHeader will always be set as a // precondition. -func (it *IPTables) checkRule(hook Hook, pkt PacketBuffer, table Table, ruleIdx int) (RuleVerdict, int) { +func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx int, gso *GSO, r *Route, address tcpip.Address) (RuleVerdict, int) { rule := table.Rules[ruleIdx] // If pkt.NetworkHeader hasn't been set yet, it will be contained in @@ -304,7 +321,7 @@ func (it *IPTables) checkRule(hook Hook, pkt PacketBuffer, table Table, ruleIdx // Go through each rule matcher. If they all match, run // the rule target. for _, matcher := range rule.Matchers { - matches, hotdrop := matcher.Match(hook, pkt, "") + matches, hotdrop := matcher.Match(hook, *pkt, "") if hotdrop { return RuleDrop, 0 } @@ -315,7 +332,7 @@ func (it *IPTables) checkRule(hook Hook, pkt PacketBuffer, table Table, ruleIdx } // All the matchers matched, so run the target. - return rule.Target.Action(pkt) + return rule.Target.Action(pkt, &it.connections, hook, gso, r, address) } func filterMatch(filter IPHeaderFilter, hdr header.IPv4) bool { diff --git a/pkg/tcpip/stack/iptables_targets.go b/pkg/tcpip/stack/iptables_targets.go index 8be61f4b1..36cc6275d 100644 --- a/pkg/tcpip/stack/iptables_targets.go +++ b/pkg/tcpip/stack/iptables_targets.go @@ -24,7 +24,7 @@ import ( type AcceptTarget struct{} // Action implements Target.Action. -func (AcceptTarget) Action(packet PacketBuffer) (RuleVerdict, int) { +func (AcceptTarget) Action(*PacketBuffer, *ConnTrackTable, Hook, *GSO, *Route, tcpip.Address) (RuleVerdict, int) { return RuleAccept, 0 } @@ -32,7 +32,7 @@ func (AcceptTarget) Action(packet PacketBuffer) (RuleVerdict, int) { type DropTarget struct{} // Action implements Target.Action. -func (DropTarget) Action(packet PacketBuffer) (RuleVerdict, int) { +func (DropTarget) Action(*PacketBuffer, *ConnTrackTable, Hook, *GSO, *Route, tcpip.Address) (RuleVerdict, int) { return RuleDrop, 0 } @@ -41,7 +41,7 @@ func (DropTarget) Action(packet PacketBuffer) (RuleVerdict, int) { type ErrorTarget struct{} // Action implements Target.Action. -func (ErrorTarget) Action(packet PacketBuffer) (RuleVerdict, int) { +func (ErrorTarget) Action(*PacketBuffer, *ConnTrackTable, Hook, *GSO, *Route, tcpip.Address) (RuleVerdict, int) { log.Debugf("ErrorTarget triggered.") return RuleDrop, 0 } @@ -52,7 +52,7 @@ type UserChainTarget struct { } // Action implements Target.Action. -func (UserChainTarget) Action(PacketBuffer) (RuleVerdict, int) { +func (UserChainTarget) Action(*PacketBuffer, *ConnTrackTable, Hook, *GSO, *Route, tcpip.Address) (RuleVerdict, int) { panic("UserChainTarget should never be called.") } @@ -61,7 +61,7 @@ func (UserChainTarget) Action(PacketBuffer) (RuleVerdict, int) { type ReturnTarget struct{} // Action implements Target.Action. -func (ReturnTarget) Action(PacketBuffer) (RuleVerdict, int) { +func (ReturnTarget) Action(*PacketBuffer, *ConnTrackTable, Hook, *GSO, *Route, tcpip.Address) (RuleVerdict, int) { return RuleReturn, 0 } @@ -75,16 +75,16 @@ type RedirectTarget struct { // redirect. RangeProtoSpecified bool - // Min address used to redirect. + // MinIP indicates address used to redirect. MinIP tcpip.Address - // Max address used to redirect. + // MaxIP indicates address used to redirect. MaxIP tcpip.Address - // Min port used to redirect. + // MinPort indicates port used to redirect. MinPort uint16 - // Max port used to redirect. + // MaxPort indicates port used to redirect. MaxPort uint16 } @@ -92,61 +92,76 @@ type RedirectTarget struct { // TODO(gvisor.dev/issue/170): Parse headers without copying. The current // implementation only works for PREROUTING and calls pkt.Clone(), neither // of which should be the case. -func (rt RedirectTarget) Action(pkt PacketBuffer) (RuleVerdict, int) { - newPkt := pkt.Clone() +func (rt RedirectTarget) Action(pkt *PacketBuffer, ct *ConnTrackTable, hook Hook, gso *GSO, r *Route, address tcpip.Address) (RuleVerdict, int) { + // Packet is already manipulated. + if pkt.NatDone { + return RuleAccept, 0 + } // Set network header. - headerView, ok := newPkt.Data.PullUp(header.IPv4MinimumSize) - if !ok { - return RuleDrop, 0 + if hook == Prerouting { + parseHeaders(pkt) } - netHeader := header.IPv4(headerView) - newPkt.NetworkHeader = headerView - hlen := int(netHeader.HeaderLength()) - tlen := int(netHeader.TotalLength()) - newPkt.Data.TrimFront(hlen) - newPkt.Data.CapLength(tlen - hlen) + // Drop the packet if network and transport header are not set. + if pkt.NetworkHeader == nil || pkt.TransportHeader == nil { + return RuleDrop, 0 + } - // TODO(gvisor.dev/issue/170): Change destination address to - // loopback or interface address on which the packet was - // received. + // Change the address to localhost (127.0.0.1) in Output and + // to primary address of the incoming interface in Prerouting. + switch hook { + case Output: + rt.MinIP = tcpip.Address([]byte{127, 0, 0, 1}) + rt.MaxIP = tcpip.Address([]byte{127, 0, 0, 1}) + case Prerouting: + rt.MinIP = address + rt.MaxIP = address + default: + panic("redirect target is supported only on output and prerouting hooks") + } // TODO(gvisor.dev/issue/170): Check Flags in RedirectTarget if // we need to change dest address (for OUTPUT chain) or ports. + netHeader := header.IPv4(pkt.NetworkHeader) switch protocol := netHeader.TransportProtocol(); protocol { case header.UDPProtocolNumber: - var udpHeader header.UDP - if newPkt.TransportHeader != nil { - udpHeader = header.UDP(newPkt.TransportHeader) - } else { - if pkt.Data.Size() < header.UDPMinimumSize { - return RuleDrop, 0 - } - hdr, ok := newPkt.Data.PullUp(header.UDPMinimumSize) - if !ok { - return RuleDrop, 0 + udpHeader := header.UDP(pkt.TransportHeader) + udpHeader.SetDestinationPort(rt.MinPort) + + // Calculate UDP checksum and set it. + if hook == Output { + udpHeader.SetChecksum(0) + hdr := &pkt.Header + length := uint16(pkt.Data.Size()+hdr.UsedLength()) - uint16(netHeader.HeaderLength()) + + // Only calculate the checksum if offloading isn't supported. + if r.Capabilities()&CapabilityTXChecksumOffload == 0 { + xsum := r.PseudoHeaderChecksum(protocol, length) + for _, v := range pkt.Data.Views() { + xsum = header.Checksum(v, xsum) + } + udpHeader.SetChecksum(0) + udpHeader.SetChecksum(^udpHeader.CalculateChecksum(xsum)) } - udpHeader = header.UDP(hdr) } - udpHeader.SetDestinationPort(rt.MinPort) + // Change destination address. + netHeader.SetDestinationAddress(rt.MinIP) + netHeader.SetChecksum(0) + netHeader.SetChecksum(^netHeader.CalculateChecksum()) + pkt.NatDone = true case header.TCPProtocolNumber: - var tcpHeader header.TCP - if newPkt.TransportHeader != nil { - tcpHeader = header.TCP(newPkt.TransportHeader) - } else { - if pkt.Data.Size() < header.TCPMinimumSize { - return RuleDrop, 0 - } - hdr, ok := newPkt.Data.PullUp(header.TCPMinimumSize) - if !ok { - return RuleDrop, 0 - } - tcpHeader = header.TCP(hdr) + if ct == nil { + return RuleAccept, 0 + } + + // Set up conection for matching NAT rule. + // Only the first packet of the connection comes here. + // Other packets will be manipulated in connection tracking. + if conn, _ := ct.connTrackForPacket(pkt, hook, true); conn != nil { + ct.SetNatInfo(pkt, rt, hook) + ct.HandlePacket(pkt, hook, gso, r) } - // TODO(gvisor.dev/issue/170): Need to recompute checksum - // and implement nat connection tracking to support TCP. - tcpHeader.SetDestinationPort(rt.MinPort) default: return RuleDrop, 0 } diff --git a/pkg/tcpip/stack/iptables_types.go b/pkg/tcpip/stack/iptables_types.go index 2ffb55f2a..1bb0ba1bd 100644 --- a/pkg/tcpip/stack/iptables_types.go +++ b/pkg/tcpip/stack/iptables_types.go @@ -82,6 +82,8 @@ type IPTables struct { // list is the order in which each table should be visited for that // hook. Priorities map[Hook][]string + + connections ConnTrackTable } // A Table defines a set of chains and hooks into the network stack. It is @@ -176,5 +178,5 @@ type Target interface { // Action takes an action on the packet and returns a verdict on how // traversal should (or should not) continue. If the return value is // Jump, it also returns the index of the rule to jump to. - Action(packet PacketBuffer) (RuleVerdict, int) + Action(packet *PacketBuffer, connections *ConnTrackTable, hook Hook, gso *GSO, r *Route, address tcpip.Address) (RuleVerdict, int) } diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index 7b54919bb..8f4c1fe42 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -1230,8 +1230,10 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.Link // TODO(gvisor.dev/issue/170): Not supporting iptables for IPv6 yet. if protocol == header.IPv4ProtocolNumber { + // iptables filtering. ipt := n.stack.IPTables() - if ok := ipt.Check(Prerouting, pkt); !ok { + address := n.primaryAddress(protocol) + if ok := ipt.Check(Prerouting, &pkt, nil, nil, address.Address); !ok { // iptables is telling us to drop the packet. return } diff --git a/pkg/tcpip/stack/packet_buffer.go b/pkg/tcpip/stack/packet_buffer.go index 9ff80ab24..926df4d7b 100644 --- a/pkg/tcpip/stack/packet_buffer.go +++ b/pkg/tcpip/stack/packet_buffer.go @@ -72,6 +72,10 @@ type PacketBuffer struct { EgressRoute *Route GSOOptions *GSO NetworkProtocolNumber tcpip.NetworkProtocolNumber + + // NatDone indicates if the packet has been manipulated as per NAT + // iptables rule. + NatDone bool } // Clone makes a copy of pk. It clones the Data field, which creates a new diff --git a/pkg/tcpip/stack/route.go b/pkg/tcpip/stack/route.go index 53148dc03..150297ab9 100644 --- a/pkg/tcpip/stack/route.go +++ b/pkg/tcpip/stack/route.go @@ -261,3 +261,16 @@ func (r *Route) MakeLoopedRoute() Route { func (r *Route) Stack() *Stack { return r.ref.stack() } + +// ReverseRoute returns new route with given source and destination address. +func (r *Route) ReverseRoute(src tcpip.Address, dst tcpip.Address) Route { + return Route{ + NetProto: r.NetProto, + LocalAddress: dst, + LocalLinkAddress: r.RemoteLinkAddress, + RemoteAddress: src, + RemoteLinkAddress: r.LocalLinkAddress, + ref: r.ref, + Loop: r.Loop, + } +} diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index 4a2dc3dc6..e33fae4eb 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -1885,3 +1885,22 @@ func generateRandInt64() int64 { } return v } + +// FindNetworkEndpoint returns the network endpoint for the given address. +func (s *Stack) FindNetworkEndpoint(netProto tcpip.NetworkProtocolNumber, address tcpip.Address) (NetworkEndpoint, *tcpip.Error) { + s.mu.Lock() + defer s.mu.Unlock() + + for _, nic := range s.nics { + id := NetworkEndpointID{address} + + if ref, ok := nic.mu.endpoints[id]; ok { + nic.mu.RLock() + defer nic.mu.RUnlock() + + // An endpoint with this id exists, check if it can be used and return it. + return ref.ep, nil + } + } + return nil, tcpip.ErrBadAddress +} diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD index f2aa69069..f38eb6833 100644 --- a/pkg/tcpip/transport/tcp/BUILD +++ b/pkg/tcpip/transport/tcp/BUILD @@ -115,7 +115,7 @@ go_test( size = "small", srcs = ["rcv_test.go"], deps = [ - ":tcp", + "//pkg/tcpip/header", "//pkg/tcpip/seqnum", ], ) diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go index a4b73b588..6fe97fefd 100644 --- a/pkg/tcpip/transport/tcp/rcv.go +++ b/pkg/tcpip/transport/tcp/rcv.go @@ -70,24 +70,7 @@ func newReceiver(ep *endpoint, irs seqnum.Value, rcvWnd seqnum.Size, rcvWndScale // acceptable checks if the segment sequence number range is acceptable // according to the table on page 26 of RFC 793. func (r *receiver) acceptable(segSeq seqnum.Value, segLen seqnum.Size) bool { - return Acceptable(segSeq, segLen, r.rcvNxt, r.rcvAcc) -} - -// Acceptable checks if a segment that starts at segSeq and has length segLen is -// "acceptable" for arriving in a receive window that starts at rcvNxt and ends -// before rcvAcc, according to the table on page 26 and 69 of RFC 793. -func Acceptable(segSeq seqnum.Value, segLen seqnum.Size, rcvNxt, rcvAcc seqnum.Value) bool { - if rcvNxt == rcvAcc { - return segLen == 0 && segSeq == rcvNxt - } - if segLen == 0 { - // rcvWnd is incremented by 1 because that is Linux's behavior despite the - // RFC. - return segSeq.InRange(rcvNxt, rcvAcc.Add(1)) - } - // Page 70 of RFC 793 allows packets that can be made "acceptable" by trimming - // the payload, so we'll accept any payload that overlaps the receieve window. - return rcvNxt.LessThan(segSeq.Add(segLen)) && segSeq.LessThan(rcvAcc) + return header.Acceptable(segSeq, segLen, r.rcvNxt, r.rcvAcc) } // getSendParams returns the parameters needed by the sender when building diff --git a/pkg/tcpip/transport/tcp/rcv_test.go b/pkg/tcpip/transport/tcp/rcv_test.go index dc02729ce..c9eeff935 100644 --- a/pkg/tcpip/transport/tcp/rcv_test.go +++ b/pkg/tcpip/transport/tcp/rcv_test.go @@ -17,8 +17,8 @@ package rcv_test import ( "testing" + "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/seqnum" - "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" ) func TestAcceptable(t *testing.T) { @@ -67,8 +67,8 @@ func TestAcceptable(t *testing.T) { {105, 2, 108, 108, false}, {105, 2, 109, 109, false}, } { - if got := tcp.Acceptable(tt.segSeq, tt.segLen, tt.rcvNxt, tt.rcvAcc); got != tt.want { - t.Errorf("tcp.Acceptable(%d, %d, %d, %d) = %t, want %t", tt.segSeq, tt.segLen, tt.rcvNxt, tt.rcvAcc, got, tt.want) + if got := header.Acceptable(tt.segSeq, tt.segLen, tt.rcvNxt, tt.rcvAcc); got != tt.want { + t.Errorf("header.Acceptable(%d, %d, %d, %d) = %t, want %t", tt.segSeq, tt.segLen, tt.rcvNxt, tt.rcvAcc, got, tt.want) } } } diff --git a/pkg/tcpip/transport/tcpconntrack/BUILD b/pkg/tcpip/transport/tcpconntrack/BUILD index 2025ff757..3ad6994a7 100644 --- a/pkg/tcpip/transport/tcpconntrack/BUILD +++ b/pkg/tcpip/transport/tcpconntrack/BUILD @@ -9,7 +9,6 @@ go_library( deps = [ "//pkg/tcpip/header", "//pkg/tcpip/seqnum", - "//pkg/tcpip/transport/tcp", ], ) diff --git a/pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go b/pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go index 30d05200f..12bc1b5b5 100644 --- a/pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go +++ b/pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go @@ -20,7 +20,6 @@ package tcpconntrack import ( "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/seqnum" - "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" ) // Result is returned when the state of a TCB is updated in response to an @@ -312,7 +311,7 @@ type stream struct { // the window is zero, if it's a packet with no payload and sequence number // equal to una. func (s *stream) acceptable(segSeq seqnum.Value, segLen seqnum.Size) bool { - return tcp.Acceptable(segSeq, segLen, s.una, s.end) + return header.Acceptable(segSeq, segLen, s.una, s.end) } // closed determines if the stream has already been closed. This happens when @@ -338,3 +337,16 @@ func logicalLen(tcp header.TCP) seqnum.Size { } return l } + +// IsEmpty returns true if tcb is not initialized. +func (t *TCB) IsEmpty() bool { + if t.inbound != (stream{}) || t.outbound != (stream{}) { + return false + } + + if t.firstFin != nil || t.state != ResultDrop { + return false + } + + return true +} diff --git a/test/iptables/filter_output.go b/test/iptables/filter_output.go index f6d974b85..b1382d353 100644 --- a/test/iptables/filter_output.go +++ b/test/iptables/filter_output.go @@ -42,7 +42,7 @@ func (FilterOutputDropTCPDestPort) Name() string { // ContainerAction implements TestCase.ContainerAction. func (FilterOutputDropTCPDestPort) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "OUTPUT", "-p", "tcp", "-m", "tcp", "--dport", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { + if err := filterTable("-A", "OUTPUT", "-p", "tcp", "-m", "tcp", "--dport", "1024:65535", "-j", "DROP"); err != nil { return err } diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 334d8e676..63a862d35 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -115,30 +115,6 @@ func TestFilterInputDropOnlyUDP(t *testing.T) { singleTest(t, FilterInputDropOnlyUDP{}) } -func TestNATRedirectUDPPort(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. - t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") - singleTest(t, NATRedirectUDPPort{}) -} - -func TestNATRedirectTCPPort(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. - t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") - singleTest(t, NATRedirectTCPPort{}) -} - -func TestNATDropUDP(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. - t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") - singleTest(t, NATDropUDP{}) -} - -func TestNATAcceptAll(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. - t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") - singleTest(t, NATAcceptAll{}) -} - func TestFilterInputDropTCPDestPort(t *testing.T) { singleTest(t, FilterInputDropTCPDestPort{}) } @@ -164,14 +140,10 @@ func TestFilterInputReturnUnderflow(t *testing.T) { } func TestFilterOutputDropTCPDestPort(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. - t.Skip("filter OUTPUT isn't supported yet (gvisor.dev/issue/170).") singleTest(t, FilterOutputDropTCPDestPort{}) } func TestFilterOutputDropTCPSrcPort(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. - t.Skip("filter OUTPUT isn't supported yet (gvisor.dev/issue/170).") singleTest(t, FilterOutputDropTCPSrcPort{}) } @@ -235,44 +207,54 @@ func TestOutputInvertDestination(t *testing.T) { singleTest(t, FilterOutputInvertDestination{}) } +func TestNATPreRedirectUDPPort(t *testing.T) { + singleTest(t, NATPreRedirectUDPPort{}) +} + +func TestNATPreRedirectTCPPort(t *testing.T) { + singleTest(t, NATPreRedirectTCPPort{}) +} + +func TestNATOutRedirectUDPPort(t *testing.T) { + singleTest(t, NATOutRedirectUDPPort{}) +} + +func TestNATOutRedirectTCPPort(t *testing.T) { + singleTest(t, NATOutRedirectTCPPort{}) +} + +func TestNATDropUDP(t *testing.T) { + singleTest(t, NATDropUDP{}) +} + +func TestNATAcceptAll(t *testing.T) { + singleTest(t, NATAcceptAll{}) +} + func TestNATOutRedirectIP(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. - t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") singleTest(t, NATOutRedirectIP{}) } func TestNATOutDontRedirectIP(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. - t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") singleTest(t, NATOutDontRedirectIP{}) } func TestNATOutRedirectInvert(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. - t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") singleTest(t, NATOutRedirectInvert{}) } func TestNATPreRedirectIP(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. - t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") singleTest(t, NATPreRedirectIP{}) } func TestNATPreDontRedirectIP(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. - t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") singleTest(t, NATPreDontRedirectIP{}) } func TestNATPreRedirectInvert(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. - t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") singleTest(t, NATPreRedirectInvert{}) } func TestNATRedirectRequiresProtocol(t *testing.T) { - // TODO(gvisor.dev/issue/170): Enable when supported. - t.Skip("NAT isn't supported yet (gvisor.dev/issue/170).") singleTest(t, NATRedirectRequiresProtocol{}) } diff --git a/test/iptables/iptables_util.go b/test/iptables/iptables_util.go index 2a00677be..2f988cd18 100644 --- a/test/iptables/iptables_util.go +++ b/test/iptables/iptables_util.go @@ -151,7 +151,7 @@ func connectTCP(ip net.IP, port int, timeout time.Duration) error { return err } if err := testutil.Poll(callback, timeout); err != nil { - return fmt.Errorf("timed out waiting to connect IP, most recent error: %v", err) + return fmt.Errorf("timed out waiting to connect IP on port %v, most recent error: %v", port, err) } return nil diff --git a/test/iptables/nat.go b/test/iptables/nat.go index 40096901c..0a10ce7fe 100644 --- a/test/iptables/nat.go +++ b/test/iptables/nat.go @@ -26,8 +26,10 @@ const ( ) func init() { - RegisterTestCase(NATRedirectUDPPort{}) - RegisterTestCase(NATRedirectTCPPort{}) + RegisterTestCase(NATPreRedirectUDPPort{}) + RegisterTestCase(NATPreRedirectTCPPort{}) + RegisterTestCase(NATOutRedirectUDPPort{}) + RegisterTestCase(NATOutRedirectTCPPort{}) RegisterTestCase(NATDropUDP{}) RegisterTestCase(NATAcceptAll{}) RegisterTestCase(NATPreRedirectIP{}) @@ -39,16 +41,16 @@ func init() { RegisterTestCase(NATRedirectRequiresProtocol{}) } -// NATRedirectUDPPort tests that packets are redirected to different port. -type NATRedirectUDPPort struct{} +// NATPreRedirectUDPPort tests that packets are redirected to different port. +type NATPreRedirectUDPPort struct{} // Name implements TestCase.Name. -func (NATRedirectUDPPort) Name() string { - return "NATRedirectUDPPort" +func (NATPreRedirectUDPPort) Name() string { + return "NATPreRedirectUDPPort" } // ContainerAction implements TestCase.ContainerAction. -func (NATRedirectUDPPort) ContainerAction(ip net.IP) error { +func (NATPreRedirectUDPPort) ContainerAction(ip net.IP) error { if err := natTable("-A", "PREROUTING", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil { return err } @@ -61,33 +63,53 @@ func (NATRedirectUDPPort) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (NATRedirectUDPPort) LocalAction(ip net.IP) error { +func (NATPreRedirectUDPPort) LocalAction(ip net.IP) error { return sendUDPLoop(ip, acceptPort, sendloopDuration) } -// NATRedirectTCPPort tests that connections are redirected on specified ports. -type NATRedirectTCPPort struct{} +// NATPreRedirectTCPPort tests that connections are redirected on specified ports. +type NATPreRedirectTCPPort struct{} // Name implements TestCase.Name. -func (NATRedirectTCPPort) Name() string { - return "NATRedirectTCPPort" +func (NATPreRedirectTCPPort) Name() string { + return "NATPreRedirectTCPPort" } // ContainerAction implements TestCase.ContainerAction. -func (NATRedirectTCPPort) ContainerAction(ip net.IP) error { - if err := natTable("-A", "PREROUTING", "-p", "tcp", "-m", "tcp", "--dport", fmt.Sprintf("%d", dropPort), "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil { +func (NATPreRedirectTCPPort) ContainerAction(ip net.IP) error { + if err := natTable("-A", "PREROUTING", "-p", "tcp", "-m", "tcp", "--dport", fmt.Sprintf("%d", dropPort), "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)); err != nil { return err } // Listen for TCP packets on redirect port. - return listenTCP(redirectPort, sendloopDuration) + return listenTCP(acceptPort, sendloopDuration) } // LocalAction implements TestCase.LocalAction. -func (NATRedirectTCPPort) LocalAction(ip net.IP) error { +func (NATPreRedirectTCPPort) LocalAction(ip net.IP) error { return connectTCP(ip, dropPort, sendloopDuration) } +// NATOutRedirectUDPPort tests that packets are redirected to different port. +type NATOutRedirectUDPPort struct{} + +// Name implements TestCase.Name. +func (NATOutRedirectUDPPort) Name() string { + return "NATOutRedirectUDPPort" +} + +// ContainerAction implements TestCase.ContainerAction. +func (NATOutRedirectUDPPort) ContainerAction(ip net.IP) error { + dest := []byte{200, 0, 0, 1} + return loopbackTest(dest, "-A", "OUTPUT", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)) +} + +// LocalAction implements TestCase.LocalAction. +func (NATOutRedirectUDPPort) LocalAction(ip net.IP) error { + // No-op. + return nil +} + // NATDropUDP tests that packets are not received in ports other than redirect // port. type NATDropUDP struct{} @@ -329,3 +351,52 @@ func loopbackTest(dest net.IP, args ...string) error { // sendCh will always take the full sendloop time. return <-sendCh } + +// NATOutRedirectTCPPort tests that connections are redirected on specified ports. +type NATOutRedirectTCPPort struct{} + +// Name implements TestCase.Name. +func (NATOutRedirectTCPPort) Name() string { + return "NATOutRedirectTCPPort" +} + +// ContainerAction implements TestCase.ContainerAction. +func (NATOutRedirectTCPPort) ContainerAction(ip net.IP) error { + if err := natTable("-A", "OUTPUT", "-p", "tcp", "-m", "tcp", "--dport", fmt.Sprintf("%d", dropPort), "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)); err != nil { + return err + } + + timeout := 20 * time.Second + dest := []byte{127, 0, 0, 1} + localAddr := net.TCPAddr{ + IP: dest, + Port: acceptPort, + } + + // Starts listening on port. + lConn, err := net.ListenTCP("tcp", &localAddr) + if err != nil { + return err + } + defer lConn.Close() + + // Accept connections on port. + lConn.SetDeadline(time.Now().Add(timeout)) + err = connectTCP(ip, dropPort, timeout) + if err != nil { + return err + } + + conn, err := lConn.AcceptTCP() + if err != nil { + return err + } + conn.Close() + + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (NATOutRedirectTCPPort) LocalAction(ip net.IP) error { + return nil +} -- cgit v1.2.3 From 56c64e4bb9dc75659799f3e5df9daddf10d810e1 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Fri, 1 May 2020 18:01:58 -0700 Subject: Fix include type. PiperOrigin-RevId: 309506957 --- test/packetimpact/dut/posix_server.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/packetimpact/dut/posix_server.cc b/test/packetimpact/dut/posix_server.cc index cb499b0b1..2ebd3b95b 100644 --- a/test/packetimpact/dut/posix_server.cc +++ b/test/packetimpact/dut/posix_server.cc @@ -11,6 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -25,7 +26,6 @@ #include #include -#include "arpa/inet.h" #include "include/grpcpp/security/server_credentials.h" #include "include/grpcpp/server_builder.h" #include "test/packetimpact/proto/posix_server.grpc.pb.h" -- cgit v1.2.3 From 006f9788291359fc871b2fa7b82338912af7abb7 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 4 May 2020 12:57:04 -0700 Subject: Deflake //third_party/gvisor/test/syscalls:proc_test_native There is the known issue of the linux procfs, that two consequent calls of readdir can return the same entry twice if between these calls one or more entries have been removed from this directory. PiperOrigin-RevId: 309803066 --- test/syscalls/linux/proc.cc | 89 ++++++++++++++++++++++++++++----------------- 1 file changed, 56 insertions(+), 33 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index 79a625ebc..63642880a 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -1939,43 +1939,66 @@ TEST(ProcSelfMounts, RequiredFieldsArePresent) { } void CheckDuplicatesRecursively(std::string path) { - errno = 0; - DIR* dir = opendir(path.c_str()); - if (dir == nullptr) { - // Ignore any directories we can't read or missing directories as the - // directory could have been deleted/mutated from the time the parent - // directory contents were read. - return; - } - auto dir_closer = Cleanup([&dir]() { closedir(dir); }); - std::unordered_set children; - while (true) { - // Readdir(3): If the end of the directory stream is reached, NULL is - // returned and errno is not changed. If an error occurs, NULL is returned - // and errno is set appropriately. To distinguish end of stream and from an - // error, set errno to zero before calling readdir() and then check the - // value of errno if NULL is returned. + std::vector child_dirs; + + // There is the known issue of the linux procfs, that two consequent calls of + // readdir can return the same entry twice if between these calls one or more + // entries have been removed from this directory. + int max_attempts = 5; + for (int i = 0; i < max_attempts; i++) { + child_dirs.clear(); errno = 0; - struct dirent* dp = readdir(dir); - if (dp == nullptr) { - ASSERT_EQ(errno, 0) << path; - break; // We're done. + bool success = true; + DIR* dir = opendir(path.c_str()); + if (dir == nullptr) { + // Ignore any directories we can't read or missing directories as the + // directory could have been deleted/mutated from the time the parent + // directory contents were read. + return; } - - if (strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..") == 0) { - continue; + auto dir_closer = Cleanup([&dir]() { closedir(dir); }); + std::unordered_set children; + while (true) { + // Readdir(3): If the end of the directory stream is reached, NULL is + // returned and errno is not changed. If an error occurs, NULL is + // returned and errno is set appropriately. To distinguish end of stream + // and from an error, set errno to zero before calling readdir() and then + // check the value of errno if NULL is returned. + errno = 0; + struct dirent* dp = readdir(dir); + if (dp == nullptr) { + ASSERT_EQ(errno, 0) << path; + break; // We're done. + } + + if (strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..") == 0) { + continue; + } + + // Ignore a duplicate entry if it isn't the last attempt. + if (i == max_attempts - 1) { + ASSERT_EQ(children.find(std::string(dp->d_name)), children.end()) + << absl::StrCat(path, "/", dp->d_name); + } else if (children.find(std::string(dp->d_name)) != children.end()) { + std::cerr << "Duplicate entry: " << i << ":" + << absl::StrCat(path, "/", dp->d_name) << std::endl; + success = false; + break; + } + children.insert(std::string(dp->d_name)); + + ASSERT_NE(dp->d_type, DT_UNKNOWN); + + if (dp->d_type == DT_DIR) { + child_dirs.push_back(std::string(dp->d_name)); + } } - - ASSERT_EQ(children.find(std::string(dp->d_name)), children.end()) - << dp->d_name; - children.insert(std::string(dp->d_name)); - - ASSERT_NE(dp->d_type, DT_UNKNOWN); - - if (dp->d_type != DT_DIR) { - continue; + if (success) { + break; } - CheckDuplicatesRecursively(absl::StrCat(path, "/", dp->d_name)); + } + for (auto dname = child_dirs.begin(); dname != child_dirs.end(); dname++) { + CheckDuplicatesRecursively(absl::StrCat(path, "/", *dname)); } } -- cgit v1.2.3 From e7ed68d22514fffd9d9e2edd4bf11489ae7b0ac8 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Mon, 4 May 2020 15:47:15 -0700 Subject: Internal change. PiperOrigin-RevId: 309832671 --- .../tests/udp_icmp_error_propagation_test.go | 144 ++++++++++++++++++--- 1 file changed, 123 insertions(+), 21 deletions(-) (limited to 'test') diff --git a/test/packetimpact/tests/udp_icmp_error_propagation_test.go b/test/packetimpact/tests/udp_icmp_error_propagation_test.go index 66f64eaa2..c47af9a3e 100644 --- a/test/packetimpact/tests/udp_icmp_error_propagation_test.go +++ b/test/packetimpact/tests/udp_icmp_error_propagation_test.go @@ -18,6 +18,7 @@ import ( "context" "fmt" "net" + "sync" "syscall" "testing" "time" @@ -79,6 +80,38 @@ type testData struct { wantErrno syscall.Errno } +// wantErrno computes the errno to expect given the connection mode of a UDP +// socket and the ICMP error it will receive. +func wantErrno(c connectionMode, icmpErr icmpError) syscall.Errno { + if c && icmpErr == portUnreachable { + return syscall.Errno(unix.ECONNREFUSED) + } + return syscall.Errno(0) +} + +// sendICMPError sends an ICMP error message in response to a UDP datagram. +func sendICMPError(conn *tb.UDPIPv4, icmpErr icmpError, udp *tb.UDP) error { + if icmpErr == timeToLiveExceeded { + ip, ok := udp.Prev().(*tb.IPv4) + if !ok { + return fmt.Errorf("expected %s to be IPv4", udp.Prev()) + } + *ip.TTL = 1 + // Let serialization recalculate the checksum since we set the TTL + // to 1. + ip.Checksum = nil + + // Note that the ICMP payload is valid in this case because the UDP + // payload is empty. If the UDP payload were not empty, the packet + // length during serialization may not be calculated correctly, + // resulting in a mal-formed packet. + conn.SendIP(icmpErr.ToICMPv4(), ip, udp) + } else { + conn.SendIP(icmpErr.ToICMPv4(), udp.Prev(), udp) + } + return nil +} + // testRecv tests observing the ICMP error through the recv syscall. A packet // is sent to the DUT, and if wantErrno is non-zero, then the first recv should // fail and the second should succeed. Otherwise if wantErrno is zero then the @@ -174,10 +207,8 @@ func testSockOpt(_ context.Context, d testData) error { func TestUDPICMPErrorPropagation(t *testing.T) { for _, connect := range []connectionMode{true, false} { for _, icmpErr := range []icmpError{portUnreachable, timeToLiveExceeded} { - wantErrno := syscall.Errno(0) - if connect && icmpErr == portUnreachable { - wantErrno = unix.ECONNREFUSED - } + wantErrno := wantErrno(connect, icmpErr) + for _, errDetect := range []errorDetection{ errorDetection{"SendTo", false, testSendTo}, // Send to an address that's different from the one that caused an ICMP @@ -212,23 +243,8 @@ func TestUDPICMPErrorPropagation(t *testing.T) { t.Fatalf("did not receive message from DUT: %s", err) } - if icmpErr == timeToLiveExceeded { - ip, ok := udp.Prev().(*tb.IPv4) - if !ok { - t.Fatalf("expected %s to be IPv4", udp.Prev()) - } - *ip.TTL = 1 - // Let serialization recalculate the checksum since we set the TTL - // to 1. - ip.Checksum = nil - - // Note that the ICMP payload is valid in this case because the UDP - // payload is empty. If the UDP payload were not empty, the packet - // length during serialization may not be calculated correctly, - // resulting in a mal-formed packet. - conn.SendIP(icmpErr.ToICMPv4(), ip, udp) - } else { - conn.SendIP(icmpErr.ToICMPv4(), udp.Prev(), udp) + if err := sendICMPError(&conn, icmpErr, udp); err != nil { + t.Fatal(err) } errDetectConn := &conn @@ -251,3 +267,89 @@ func TestUDPICMPErrorPropagation(t *testing.T) { } } } + +// TestICMPErrorDuringUDPRecv tests behavior when a UDP socket is in the middle +// of a blocking recv and receives an ICMP error. +func TestICMPErrorDuringUDPRecv(t *testing.T) { + for _, connect := range []connectionMode{true, false} { + for _, icmpErr := range []icmpError{portUnreachable, timeToLiveExceeded} { + wantErrno := wantErrno(connect, icmpErr) + + t.Run(fmt.Sprintf("%s/%s", connect, icmpErr), func(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + + remoteFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) + defer dut.Close(remoteFD) + + // Create a second, clean socket on the DUT to ensure that the ICMP + // error messages only affect the sockets they are intended for. + cleanFD, cleanPort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) + defer dut.Close(cleanFD) + + conn := tb.NewUDPIPv4(t, tb.UDP{DstPort: &remotePort}, tb.UDP{SrcPort: &remotePort}) + defer conn.Close() + + if connect { + dut.Connect(remoteFD, conn.LocalAddr()) + dut.Connect(cleanFD, conn.LocalAddr()) + } + + dut.SendTo(remoteFD, nil, 0, conn.LocalAddr()) + udp, err := conn.Expect(tb.UDP{}, time.Second) + if err != nil { + t.Fatalf("did not receive message from DUT: %s", err) + } + + var wg sync.WaitGroup + wg.Add(2) + go func() { + if wantErrno != syscall.Errno(0) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + ret, _, err := dut.RecvWithErrno(ctx, remoteFD, 100, 0) + if ret != -1 { + t.Fatalf("recv during ICMP error succeeded unexpectedly, expected (%[1]d) %[1]v", wantErrno) + } + if err != wantErrno { + t.Fatalf("recv during ICMP error resulted in error (%[1]d) %[1]v, expected (%[2]d) %[2]v", err, wantErrno) + } + } + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + if ret, _, err := dut.RecvWithErrno(ctx, remoteFD, 100, 0); ret == -1 { + t.Fatalf("recv after ICMP error failed with (%[1]d) %[1]", err) + } + wg.Done() + }() + + go func() { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + if ret, _, err := dut.RecvWithErrno(ctx, cleanFD, 100, 0); ret == -1 { + t.Fatalf("recv on clean socket failed with (%[1]d) %[1]", err) + } + wg.Done() + }() + + // TODO(b/155684889) This sleep is to allow time for the DUT to + // actually call recv since we want the ICMP error to arrive during the + // blocking recv, and should be replaced when a better synchronization + // alternative is available. + time.Sleep(2 * time.Second) + + if err := sendICMPError(&conn, icmpErr, udp); err != nil { + t.Fatal(err) + } + + conn.Send(tb.UDP{DstPort: &cleanPort}) + conn.Send(tb.UDP{}) + wg.Wait() + }) + } + } +} -- cgit v1.2.3 From da71dc7fddda387232b243c6176de21a1208ad0c Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Mon, 4 May 2020 16:00:22 -0700 Subject: Port eventfd to VFS2. And move sys_timerfd.go to just timerfd.go for consistency. Updates #1475. PiperOrigin-RevId: 309835029 --- pkg/sentry/syscalls/linux/sys_eventfd.go | 17 +- pkg/sentry/syscalls/linux/vfs2/BUILD | 3 +- pkg/sentry/syscalls/linux/vfs2/eventfd.go | 59 +++++ .../syscalls/linux/vfs2/linux64_override_amd64.go | 4 +- pkg/sentry/syscalls/linux/vfs2/sys_timerfd.go | 123 --------- pkg/sentry/syscalls/linux/vfs2/timerfd.go | 123 +++++++++ pkg/sentry/vfs/BUILD | 4 + pkg/sentry/vfs/eventfd.go | 282 +++++++++++++++++++++ pkg/sentry/vfs/eventfd_test.go | 96 +++++++ test/syscalls/linux/eventfd.cc | 16 ++ 10 files changed, 589 insertions(+), 138 deletions(-) create mode 100644 pkg/sentry/syscalls/linux/vfs2/eventfd.go delete mode 100644 pkg/sentry/syscalls/linux/vfs2/sys_timerfd.go create mode 100644 pkg/sentry/syscalls/linux/vfs2/timerfd.go create mode 100644 pkg/sentry/vfs/eventfd.go create mode 100644 pkg/sentry/vfs/eventfd_test.go (limited to 'test') diff --git a/pkg/sentry/syscalls/linux/sys_eventfd.go b/pkg/sentry/syscalls/linux/sys_eventfd.go index 8a34c4e99..ed3413ca6 100644 --- a/pkg/sentry/syscalls/linux/sys_eventfd.go +++ b/pkg/sentry/syscalls/linux/sys_eventfd.go @@ -15,6 +15,7 @@ package linux import ( + "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -22,32 +23,24 @@ import ( "gvisor.dev/gvisor/pkg/syserror" ) -const ( - // EFD_SEMAPHORE is a flag used in syscall eventfd(2) and eventfd2(2). Please - // see its man page for more information. - EFD_SEMAPHORE = 1 - EFD_NONBLOCK = 0x800 - EFD_CLOEXEC = 0x80000 -) - // Eventfd2 implements linux syscall eventfd2(2). func Eventfd2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { initVal := args[0].Int() flags := uint(args[1].Uint()) - allOps := uint(EFD_SEMAPHORE | EFD_NONBLOCK | EFD_CLOEXEC) + allOps := uint(linux.EFD_SEMAPHORE | linux.EFD_NONBLOCK | linux.EFD_CLOEXEC) if flags & ^allOps != 0 { return 0, nil, syserror.EINVAL } - event := eventfd.New(t, uint64(initVal), flags&EFD_SEMAPHORE != 0) + event := eventfd.New(t, uint64(initVal), flags&linux.EFD_SEMAPHORE != 0) event.SetFlags(fs.SettableFileFlags{ - NonBlocking: flags&EFD_NONBLOCK != 0, + NonBlocking: flags&linux.EFD_NONBLOCK != 0, }) defer event.DecRef() fd, err := t.NewFDFrom(0, event, kernel.FDFlags{ - CloseOnExec: flags&EFD_CLOEXEC != 0, + CloseOnExec: flags&linux.EFD_CLOEXEC != 0, }) if err != nil { return 0, nil, err diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD index f6fb0f219..ffca627d4 100644 --- a/pkg/sentry/syscalls/linux/vfs2/BUILD +++ b/pkg/sentry/syscalls/linux/vfs2/BUILD @@ -6,6 +6,7 @@ go_library( name = "vfs2", srcs = [ "epoll.go", + "eventfd.go", "execve.go", "fd.go", "filesystem.go", @@ -26,7 +27,7 @@ go_library( "stat_amd64.go", "stat_arm64.go", "sync.go", - "sys_timerfd.go", + "timerfd.go", "xattr.go", ], marshal = True, diff --git a/pkg/sentry/syscalls/linux/vfs2/eventfd.go b/pkg/sentry/syscalls/linux/vfs2/eventfd.go new file mode 100644 index 000000000..bd2194972 --- /dev/null +++ b/pkg/sentry/syscalls/linux/vfs2/eventfd.go @@ -0,0 +1,59 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vfs2 + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" +) + +// Eventfd2 implements linux syscall eventfd2(2). +func Eventfd2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + initVal := uint64(args[0].Uint()) + flags := uint(args[1].Uint()) + allOps := uint(linux.EFD_SEMAPHORE | linux.EFD_NONBLOCK | linux.EFD_CLOEXEC) + + if flags & ^allOps != 0 { + return 0, nil, syserror.EINVAL + } + + fileFlags := uint32(linux.O_RDWR) + if flags&linux.EFD_NONBLOCK != 0 { + fileFlags |= linux.O_NONBLOCK + } + semMode := flags&linux.EFD_SEMAPHORE != 0 + eventfd, err := t.Kernel().VFS().NewEventFD(initVal, semMode, fileFlags) + if err != nil { + return 0, nil, err + } + defer eventfd.DecRef() + + fd, err := t.NewFDFromVFS2(0, eventfd, kernel.FDFlags{ + CloseOnExec: flags&linux.EFD_CLOEXEC != 0, + }) + if err != nil { + return 0, nil, err + } + + return uintptr(fd), nil, nil +} + +// Eventfd implements linux syscall eventfd(2). +func Eventfd(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + args[1].Value = 0 + return Eventfd2(t, args) +} diff --git a/pkg/sentry/syscalls/linux/vfs2/linux64_override_amd64.go b/pkg/sentry/syscalls/linux/vfs2/linux64_override_amd64.go index 74920f785..074f58e5d 100644 --- a/pkg/sentry/syscalls/linux/vfs2/linux64_override_amd64.go +++ b/pkg/sentry/syscalls/linux/vfs2/linux64_override_amd64.go @@ -141,14 +141,14 @@ func Override(table map[uintptr]kernel.Syscall) { table[281] = syscalls.Supported("epoll_pwait", EpollPwait) delete(table, 282) // signalfd table[283] = syscalls.Supported("timerfd_create", TimerfdCreate) - delete(table, 284) // eventfd + table[284] = syscalls.Supported("eventfd", Eventfd) delete(table, 285) // fallocate table[286] = syscalls.Supported("timerfd_settime", TimerfdSettime) table[287] = syscalls.Supported("timerfd_gettime", TimerfdGettime) // TODO(gvisor.dev/issue/1485): Port all socket variants to VFS2. table[288] = syscalls.PartiallySupported("accept4", Accept4, "In process of porting socket syscalls to VFS2.", nil) delete(table, 289) // signalfd4 - delete(table, 290) // eventfd2 + table[290] = syscalls.Supported("eventfd2", Eventfd2) table[291] = syscalls.Supported("epoll_create1", EpollCreate1) table[292] = syscalls.Supported("dup3", Dup3) table[293] = syscalls.Supported("pipe2", Pipe2) diff --git a/pkg/sentry/syscalls/linux/vfs2/sys_timerfd.go b/pkg/sentry/syscalls/linux/vfs2/sys_timerfd.go deleted file mode 100644 index 7938a5249..000000000 --- a/pkg/sentry/syscalls/linux/vfs2/sys_timerfd.go +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package vfs2 - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/kernel" - ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -// TimerfdCreate implements Linux syscall timerfd_create(2). -func TimerfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - clockID := args[0].Int() - flags := args[1].Int() - - if flags&^(linux.TFD_CLOEXEC|linux.TFD_NONBLOCK) != 0 { - return 0, nil, syserror.EINVAL - } - - var fileFlags uint32 - if flags&linux.TFD_NONBLOCK != 0 { - fileFlags = linux.O_NONBLOCK - } - - var clock ktime.Clock - switch clockID { - case linux.CLOCK_REALTIME: - clock = t.Kernel().RealtimeClock() - case linux.CLOCK_MONOTONIC, linux.CLOCK_BOOTTIME: - clock = t.Kernel().MonotonicClock() - default: - return 0, nil, syserror.EINVAL - } - file, err := t.Kernel().VFS().NewTimerFD(clock, fileFlags) - if err != nil { - return 0, nil, err - } - defer file.DecRef() - fd, err := t.NewFDFromVFS2(0, file, kernel.FDFlags{ - CloseOnExec: flags&linux.TFD_CLOEXEC != 0, - }) - if err != nil { - return 0, nil, err - } - return uintptr(fd), nil, nil -} - -// TimerfdSettime implements Linux syscall timerfd_settime(2). -func TimerfdSettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := args[0].Int() - flags := args[1].Int() - newValAddr := args[2].Pointer() - oldValAddr := args[3].Pointer() - - if flags&^(linux.TFD_TIMER_ABSTIME) != 0 { - return 0, nil, syserror.EINVAL - } - - file := t.GetFileVFS2(fd) - if file == nil { - return 0, nil, syserror.EBADF - } - defer file.DecRef() - - tfd, ok := file.Impl().(*vfs.TimerFileDescription) - if !ok { - return 0, nil, syserror.EINVAL - } - - var newVal linux.Itimerspec - if _, err := t.CopyIn(newValAddr, &newVal); err != nil { - return 0, nil, err - } - newS, err := ktime.SettingFromItimerspec(newVal, flags&linux.TFD_TIMER_ABSTIME != 0, tfd.Clock()) - if err != nil { - return 0, nil, err - } - tm, oldS := tfd.SetTime(newS) - if oldValAddr != 0 { - oldVal := ktime.ItimerspecFromSetting(tm, oldS) - if _, err := t.CopyOut(oldValAddr, &oldVal); err != nil { - return 0, nil, err - } - } - return 0, nil, nil -} - -// TimerfdGettime implements Linux syscall timerfd_gettime(2). -func TimerfdGettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := args[0].Int() - curValAddr := args[1].Pointer() - - file := t.GetFileVFS2(fd) - if file == nil { - return 0, nil, syserror.EBADF - } - defer file.DecRef() - - tfd, ok := file.Impl().(*vfs.TimerFileDescription) - if !ok { - return 0, nil, syserror.EINVAL - } - - tm, s := tfd.GetTime() - curVal := ktime.ItimerspecFromSetting(tm, s) - _, err := t.CopyOut(curValAddr, &curVal) - return 0, nil, err -} diff --git a/pkg/sentry/syscalls/linux/vfs2/timerfd.go b/pkg/sentry/syscalls/linux/vfs2/timerfd.go new file mode 100644 index 000000000..7938a5249 --- /dev/null +++ b/pkg/sentry/syscalls/linux/vfs2/timerfd.go @@ -0,0 +1,123 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vfs2 + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" +) + +// TimerfdCreate implements Linux syscall timerfd_create(2). +func TimerfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + clockID := args[0].Int() + flags := args[1].Int() + + if flags&^(linux.TFD_CLOEXEC|linux.TFD_NONBLOCK) != 0 { + return 0, nil, syserror.EINVAL + } + + var fileFlags uint32 + if flags&linux.TFD_NONBLOCK != 0 { + fileFlags = linux.O_NONBLOCK + } + + var clock ktime.Clock + switch clockID { + case linux.CLOCK_REALTIME: + clock = t.Kernel().RealtimeClock() + case linux.CLOCK_MONOTONIC, linux.CLOCK_BOOTTIME: + clock = t.Kernel().MonotonicClock() + default: + return 0, nil, syserror.EINVAL + } + file, err := t.Kernel().VFS().NewTimerFD(clock, fileFlags) + if err != nil { + return 0, nil, err + } + defer file.DecRef() + fd, err := t.NewFDFromVFS2(0, file, kernel.FDFlags{ + CloseOnExec: flags&linux.TFD_CLOEXEC != 0, + }) + if err != nil { + return 0, nil, err + } + return uintptr(fd), nil, nil +} + +// TimerfdSettime implements Linux syscall timerfd_settime(2). +func TimerfdSettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + flags := args[1].Int() + newValAddr := args[2].Pointer() + oldValAddr := args[3].Pointer() + + if flags&^(linux.TFD_TIMER_ABSTIME) != 0 { + return 0, nil, syserror.EINVAL + } + + file := t.GetFileVFS2(fd) + if file == nil { + return 0, nil, syserror.EBADF + } + defer file.DecRef() + + tfd, ok := file.Impl().(*vfs.TimerFileDescription) + if !ok { + return 0, nil, syserror.EINVAL + } + + var newVal linux.Itimerspec + if _, err := t.CopyIn(newValAddr, &newVal); err != nil { + return 0, nil, err + } + newS, err := ktime.SettingFromItimerspec(newVal, flags&linux.TFD_TIMER_ABSTIME != 0, tfd.Clock()) + if err != nil { + return 0, nil, err + } + tm, oldS := tfd.SetTime(newS) + if oldValAddr != 0 { + oldVal := ktime.ItimerspecFromSetting(tm, oldS) + if _, err := t.CopyOut(oldValAddr, &oldVal); err != nil { + return 0, nil, err + } + } + return 0, nil, nil +} + +// TimerfdGettime implements Linux syscall timerfd_gettime(2). +func TimerfdGettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + curValAddr := args[1].Pointer() + + file := t.GetFileVFS2(fd) + if file == nil { + return 0, nil, syserror.EBADF + } + defer file.DecRef() + + tfd, ok := file.Impl().(*vfs.TimerFileDescription) + if !ok { + return 0, nil, syserror.EINVAL + } + + tm, s := tfd.GetTime() + curVal := ktime.ItimerspecFromSetting(tm, s) + _, err := t.CopyOut(curValAddr, &curVal) + return 0, nil, err +} diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD index 9aeb83fb0..c62505fe2 100644 --- a/pkg/sentry/vfs/BUILD +++ b/pkg/sentry/vfs/BUILD @@ -25,6 +25,7 @@ go_library( "device.go", "epoll.go", "epoll_interest_list.go", + "eventfd.go", "file_description.go", "file_description_impl_util.go", "filesystem.go", @@ -44,6 +45,7 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/fd", + "//pkg/fdnotifier", "//pkg/fspath", "//pkg/gohacks", "//pkg/log", @@ -68,6 +70,7 @@ go_test( name = "vfs_test", size = "small", srcs = [ + "eventfd_test.go", "file_description_impl_util_test.go", "mount_test.go", ], @@ -79,5 +82,6 @@ go_test( "//pkg/sync", "//pkg/syserror", "//pkg/usermem", + "//pkg/waiter", ], ) diff --git a/pkg/sentry/vfs/eventfd.go b/pkg/sentry/vfs/eventfd.go new file mode 100644 index 000000000..f39dacacf --- /dev/null +++ b/pkg/sentry/vfs/eventfd.go @@ -0,0 +1,282 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vfs + +import ( + "math" + "sync" + "syscall" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fdnotifier" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/waiter" +) + +// EventFileDescription implements FileDescriptionImpl for file-based event +// notification (eventfd). Eventfds are usually internal to the Sentry but in +// certain situations they may be converted into a host-backed eventfd. +type EventFileDescription struct { + vfsfd FileDescription + FileDescriptionDefaultImpl + DentryMetadataFileDescriptionImpl + + // queue is used to notify interested parties when the event object + // becomes readable or writable. + queue waiter.Queue `state:"zerovalue"` + + // mu protects the fields below. + mu sync.Mutex `state:"nosave"` + + // val is the current value of the event counter. + val uint64 + + // semMode specifies whether the event is in "semaphore" mode. + semMode bool + + // hostfd indicates whether this eventfd is passed through to the host. + hostfd int +} + +var _ FileDescriptionImpl = (*EventFileDescription)(nil) + +// NewEventFD creates a new event fd. +func (vfs *VirtualFilesystem) NewEventFD(initVal uint64, semMode bool, flags uint32) (*FileDescription, error) { + vd := vfs.NewAnonVirtualDentry("[eventfd]") + defer vd.DecRef() + efd := &EventFileDescription{ + val: initVal, + semMode: semMode, + hostfd: -1, + } + if err := efd.vfsfd.Init(efd, flags, vd.Mount(), vd.Dentry(), &FileDescriptionOptions{ + UseDentryMetadata: true, + DenyPRead: true, + DenyPWrite: true, + }); err != nil { + return nil, err + } + return &efd.vfsfd, nil +} + +// HostFD returns the host eventfd associated with this event. +func (efd *EventFileDescription) HostFD() (int, error) { + efd.mu.Lock() + defer efd.mu.Unlock() + if efd.hostfd >= 0 { + return efd.hostfd, nil + } + + flags := linux.EFD_NONBLOCK + if efd.semMode { + flags |= linux.EFD_SEMAPHORE + } + + fd, _, errno := syscall.Syscall(syscall.SYS_EVENTFD2, uintptr(efd.val), uintptr(flags), 0) + if errno != 0 { + return -1, errno + } + + if err := fdnotifier.AddFD(int32(fd), &efd.queue); err != nil { + if closeErr := syscall.Close(int(fd)); closeErr != nil { + log.Warningf("close(%d) eventfd failed: %v", fd, closeErr) + } + return -1, err + } + + efd.hostfd = int(fd) + return efd.hostfd, nil +} + +// Release implements FileDescriptionImpl.Release() +func (efd *EventFileDescription) Release() { + efd.mu.Lock() + defer efd.mu.Unlock() + if efd.hostfd >= 0 { + fdnotifier.RemoveFD(int32(efd.hostfd)) + if closeErr := syscall.Close(int(efd.hostfd)); closeErr != nil { + log.Warningf("close(%d) eventfd failed: %v", efd.hostfd, closeErr) + } + efd.hostfd = -1 + } +} + +// Read implements FileDescriptionImpl.Read. +func (efd *EventFileDescription) Read(ctx context.Context, dst usermem.IOSequence, _ ReadOptions) (int64, error) { + if dst.NumBytes() < 8 { + return 0, syscall.EINVAL + } + if err := efd.read(ctx, dst); err != nil { + return 0, err + } + return 8, nil +} + +// Write implements FileDescriptionImpl.Write. +func (efd *EventFileDescription) Write(ctx context.Context, src usermem.IOSequence, _ WriteOptions) (int64, error) { + if src.NumBytes() < 8 { + return 0, syscall.EINVAL + } + if err := efd.write(ctx, src); err != nil { + return 0, err + } + return 8, nil +} + +// Preconditions: Must be called with efd.mu locked. +func (efd *EventFileDescription) hostReadLocked(ctx context.Context, dst usermem.IOSequence) error { + var buf [8]byte + if _, err := syscall.Read(efd.hostfd, buf[:]); err != nil { + if err == syscall.EWOULDBLOCK { + return syserror.ErrWouldBlock + } + return err + } + _, err := dst.CopyOut(ctx, buf[:]) + return err +} + +func (efd *EventFileDescription) read(ctx context.Context, dst usermem.IOSequence) error { + efd.mu.Lock() + if efd.hostfd >= 0 { + defer efd.mu.Unlock() + return efd.hostReadLocked(ctx, dst) + } + + // We can't complete the read if the value is currently zero. + if efd.val == 0 { + efd.mu.Unlock() + return syserror.ErrWouldBlock + } + + // Update the value based on the mode the event is operating in. + var val uint64 + if efd.semMode { + val = 1 + // Consistent with Linux, this is done even if writing to memory fails. + efd.val-- + } else { + val = efd.val + efd.val = 0 + } + + efd.mu.Unlock() + + // Notify writers. We do this even if we were already writable because + // it is possible that a writer is waiting to write the maximum value + // to the event. + efd.queue.Notify(waiter.EventOut) + + var buf [8]byte + usermem.ByteOrder.PutUint64(buf[:], val) + _, err := dst.CopyOut(ctx, buf[:]) + return err +} + +// Preconditions: Must be called with efd.mu locked. +func (efd *EventFileDescription) hostWriteLocked(val uint64) error { + var buf [8]byte + usermem.ByteOrder.PutUint64(buf[:], val) + _, err := syscall.Write(efd.hostfd, buf[:]) + if err == syscall.EWOULDBLOCK { + return syserror.ErrWouldBlock + } + return err +} + +func (efd *EventFileDescription) write(ctx context.Context, src usermem.IOSequence) error { + var buf [8]byte + if _, err := src.CopyIn(ctx, buf[:]); err != nil { + return err + } + val := usermem.ByteOrder.Uint64(buf[:]) + + return efd.Signal(val) +} + +// Signal is an internal function to signal the event fd. +func (efd *EventFileDescription) Signal(val uint64) error { + if val == math.MaxUint64 { + return syscall.EINVAL + } + + efd.mu.Lock() + + if efd.hostfd >= 0 { + defer efd.mu.Unlock() + return efd.hostWriteLocked(val) + } + + // We only allow writes that won't cause the value to go over the max + // uint64 minus 1. + if val > math.MaxUint64-1-efd.val { + efd.mu.Unlock() + return syserror.ErrWouldBlock + } + + efd.val += val + efd.mu.Unlock() + + // Always trigger a notification. + efd.queue.Notify(waiter.EventIn) + + return nil +} + +// Readiness implements waiter.Waitable.Readiness. +func (efd *EventFileDescription) Readiness(mask waiter.EventMask) waiter.EventMask { + efd.mu.Lock() + defer efd.mu.Unlock() + + if efd.hostfd >= 0 { + return fdnotifier.NonBlockingPoll(int32(efd.hostfd), mask) + } + + ready := waiter.EventMask(0) + if efd.val > 0 { + ready |= waiter.EventIn + } + + if efd.val < math.MaxUint64-1 { + ready |= waiter.EventOut + } + + return mask & ready +} + +// EventRegister implements waiter.Waitable.EventRegister. +func (efd *EventFileDescription) EventRegister(entry *waiter.Entry, mask waiter.EventMask) { + efd.queue.EventRegister(entry, mask) + + efd.mu.Lock() + defer efd.mu.Unlock() + if efd.hostfd >= 0 { + fdnotifier.UpdateFD(int32(efd.hostfd)) + } +} + +// EventUnregister implements waiter.Waitable.EventUnregister. +func (efd *EventFileDescription) EventUnregister(entry *waiter.Entry) { + efd.queue.EventUnregister(entry) + + efd.mu.Lock() + defer efd.mu.Unlock() + if efd.hostfd >= 0 { + fdnotifier.UpdateFD(int32(efd.hostfd)) + } +} diff --git a/pkg/sentry/vfs/eventfd_test.go b/pkg/sentry/vfs/eventfd_test.go new file mode 100644 index 000000000..2dff2d10b --- /dev/null +++ b/pkg/sentry/vfs/eventfd_test.go @@ -0,0 +1,96 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vfs + +import ( + "testing" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/contexttest" + "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/waiter" +) + +func TestEventFD(t *testing.T) { + initVals := []uint64{ + 0, + // Using a non-zero initial value verifies that writing to an + // eventfd signals when the eventfd's counter was already + // non-zero. + 343, + } + + for _, initVal := range initVals { + ctx := contexttest.Context(t) + vfsObj := &VirtualFilesystem{} + if err := vfsObj.Init(); err != nil { + t.Fatalf("VFS init: %v", err) + } + + // Make a new eventfd that is writable. + eventfd, err := vfsObj.NewEventFD(initVal, false, linux.O_RDWR) + if err != nil { + t.Fatalf("NewEventFD failed: %v", err) + } + defer eventfd.DecRef() + + // Register a callback for a write event. + w, ch := waiter.NewChannelEntry(nil) + eventfd.EventRegister(&w, waiter.EventIn) + defer eventfd.EventUnregister(&w) + + data := []byte("00000124") + // Create and submit a write request. + n, err := eventfd.Write(ctx, usermem.BytesIOSequence(data), WriteOptions{}) + if err != nil { + t.Fatal(err) + } + if n != 8 { + t.Errorf("eventfd.write wrote %d bytes, not full int64", n) + } + + // Check if the callback fired due to the write event. + select { + case <-ch: + default: + t.Errorf("Didn't get notified of EventIn after write") + } + } +} + +func TestEventFDStat(t *testing.T) { + ctx := contexttest.Context(t) + vfsObj := &VirtualFilesystem{} + if err := vfsObj.Init(); err != nil { + t.Fatalf("VFS init: %v", err) + } + + // Make a new eventfd that is writable. + eventfd, err := vfsObj.NewEventFD(0, false, linux.O_RDWR) + if err != nil { + t.Fatalf("NewEventFD failed: %v", err) + } + defer eventfd.DecRef() + + statx, err := eventfd.Stat(ctx, StatOptions{ + Mask: linux.STATX_BASIC_STATS, + }) + if err != nil { + t.Fatalf("eventfd.Stat failed: %v", err) + } + if statx.Size != 0 { + t.Errorf("eventfd size should be 0") + } +} diff --git a/test/syscalls/linux/eventfd.cc b/test/syscalls/linux/eventfd.cc index 927001eee..548b05a64 100644 --- a/test/syscalls/linux/eventfd.cc +++ b/test/syscalls/linux/eventfd.cc @@ -100,6 +100,22 @@ TEST(EventfdTest, SmallRead) { ASSERT_THAT(read(efd.get(), &l, 4), SyscallFailsWithErrno(EINVAL)); } +TEST(EventfdTest, PreadIllegalSeek) { + FileDescriptor efd = + ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE)); + + uint64_t l = 0; + ASSERT_THAT(pread(efd.get(), &l, 4, 0), SyscallFailsWithErrno(ESPIPE)); +} + +TEST(EventfdTest, PwriteIllegalSeek) { + FileDescriptor efd = + ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE)); + + uint64_t l = 0; + ASSERT_THAT(pwrite(efd.get(), &l, 4, 0), SyscallFailsWithErrno(ESPIPE)); +} + TEST(EventfdTest, BigWrite) { FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE)); -- cgit v1.2.3 From e5d9e7c3b2920ef6f7034aa5180f059b6ccecb54 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Tue, 5 May 2020 12:41:59 -0700 Subject: Internal change. PiperOrigin-RevId: 310001058 --- test/runtimes/proctor/BUILD | 1 + 1 file changed, 1 insertion(+) (limited to 'test') diff --git a/test/runtimes/proctor/BUILD b/test/runtimes/proctor/BUILD index 50a26d182..da1e331e1 100644 --- a/test/runtimes/proctor/BUILD +++ b/test/runtimes/proctor/BUILD @@ -21,6 +21,7 @@ go_test( size = "small", srcs = ["proctor_test.go"], library = ":proctor", + nocgo = 1, deps = [ "//pkg/test/testutil", ], -- cgit v1.2.3 From e590314fec7443527e2a3bad8d8c58a83474c86d Mon Sep 17 00:00:00 2001 From: Mithun Iyer Date: Tue, 5 May 2020 14:28:28 -0700 Subject: Support TCP zero window probes. As per RFC 1122 4.2.2.17, when the remote advertizes zero receive window, the sender needs to probe for the window-size to become non-zero starting from the next retransmission interval. The TCP connection needs to be kept open as long as the remote is acknowledging the zero window probes. We reuse the retransmission timers to support this. Fixes #1644 PiperOrigin-RevId: 310021575 --- pkg/tcpip/transport/tcp/snd.go | 94 +++++++++++++++++- pkg/tcpip/transport/tcp/tcp_test.go | 17 +++- test/packetimpact/tests/BUILD | 32 +++++- .../tests/tcp_zero_window_probe_retransmit_test.go | 99 +++++++++++++++++++ .../tests/tcp_zero_window_probe_test.go | 107 +++++++++++++++++++++ .../tcp_zero_window_probe_usertimeout_test.go | 93 ++++++++++++++++++ 6 files changed, 433 insertions(+), 9 deletions(-) create mode 100644 test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go create mode 100644 test/packetimpact/tests/tcp_zero_window_probe_test.go create mode 100644 test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go (limited to 'test') diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index d8cfe3115..a3018914b 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -41,6 +41,10 @@ const ( // nDupAckThreshold is the number of duplicate ACK's required // before fast-retransmit is entered. nDupAckThreshold = 3 + + // MaxRetries is the maximum number of probe retries sender does + // before timing out the connection, Linux default TCP_RETR2. + MaxRetries = 15 ) // ccState indicates the current congestion control state for this sender. @@ -138,6 +142,14 @@ type sender struct { // the first segment that was retransmitted due to RTO expiration. firstRetransmittedSegXmitTime time.Time `state:".(unixTime)"` + // zeroWindowProbing is set if the sender is currently probing + // for zero receive window. + zeroWindowProbing bool `state:"nosave"` + + // unackZeroWindowProbes is the number of unacknowledged zero + // window probes. + unackZeroWindowProbes uint32 `state:"nosave"` + closed bool writeNext *segment writeList segmentList @@ -479,10 +491,24 @@ func (s *sender) retransmitTimerExpired() bool { remaining = uto - elapsed } - if remaining <= 0 || s.rto >= MaxRTO { + // Always honor the user-timeout irrespective of whether the zero + // window probes were acknowledged. + // net/ipv4/tcp_timer.c::tcp_probe_timer() + if remaining <= 0 || s.unackZeroWindowProbes >= MaxRetries { return false } + if s.rto >= MaxRTO { + // RFC 1122 section: 4.2.2.17 + // A TCP MAY keep its offered receive window closed + // indefinitely. As long as the receiving TCP continues to + // send acknowledgments in response to the probe segments, the + // sending TCP MUST allow the connection to stay open. + if !(s.zeroWindowProbing && s.unackZeroWindowProbes == 0) { + return false + } + } + // Set new timeout. The timer will be restarted by the call to sendData // below. s.rto *= 2 @@ -533,6 +559,15 @@ func (s *sender) retransmitTimerExpired() bool { // information is usable after an RTO. s.ep.scoreboard.Reset() s.writeNext = s.writeList.Front() + + // RFC 1122 4.2.2.17: Start sending zero window probes when we still see a + // zero receive window after retransmission interval and we have data to + // send. + if s.zeroWindowProbing { + s.sendZeroWindowProbe() + return true + } + s.sendData() return true @@ -827,6 +862,34 @@ func (s *sender) handleSACKRecovery(limit int, end seqnum.Value) (dataSent bool) return dataSent } +func (s *sender) sendZeroWindowProbe() { + ack, win := s.ep.rcv.getSendParams() + s.unackZeroWindowProbes++ + // Send a zero window probe with sequence number pointing to + // the last acknowledged byte. + s.ep.sendRaw(buffer.VectorisedView{}, header.TCPFlagAck, s.sndUna-1, ack, win) + // Rearm the timer to continue probing. + s.resendTimer.enable(s.rto) +} + +func (s *sender) enableZeroWindowProbing() { + s.zeroWindowProbing = true + // We piggyback the probing on the retransmit timer with the + // current retranmission interval, as we may start probing while + // segment retransmissions. + if s.firstRetransmittedSegXmitTime.IsZero() { + s.firstRetransmittedSegXmitTime = time.Now() + } + s.resendTimer.enable(s.rto) +} + +func (s *sender) disableZeroWindowProbing() { + s.zeroWindowProbing = false + s.unackZeroWindowProbes = 0 + s.firstRetransmittedSegXmitTime = time.Time{} + s.resendTimer.disable() +} + // sendData sends new data segments. It is called when data becomes available or // when the send window opens up. func (s *sender) sendData() { @@ -875,6 +938,13 @@ func (s *sender) sendData() { s.ep.disableKeepaliveTimer() } + // If the sender has advertized zero receive window and we have + // data to be sent out, start zero window probing to query the + // the remote for it's receive window size. + if s.writeNext != nil && s.sndWnd == 0 { + s.enableZeroWindowProbing() + } + // Enable the timer if we have pending data and it's not enabled yet. if !s.resendTimer.enabled() && s.sndUna != s.sndNxt { s.resendTimer.enable(s.rto) @@ -1122,8 +1192,26 @@ func (s *sender) handleRcvdSegment(seg *segment) { // Stash away the current window size. s.sndWnd = seg.window - // Ignore ack if it doesn't acknowledge any new data. ack := seg.ackNumber + + // Disable zero window probing if remote advertizes a non-zero receive + // window. This can be with an ACK to the zero window probe (where the + // acknumber refers to the already acknowledged byte) OR to any previously + // unacknowledged segment. + if s.zeroWindowProbing && seg.window > 0 && + (ack == s.sndUna || (ack-1).InRange(s.sndUna, s.sndNxt)) { + s.disableZeroWindowProbing() + } + + // On receiving the ACK for the zero window probe, account for it and + // skip trying to send any segment as we are still probing for + // receive window to become non-zero. + if s.zeroWindowProbing && s.unackZeroWindowProbes > 0 && ack == s.sndUna { + s.unackZeroWindowProbes-- + return + } + + // Ignore ack if it doesn't acknowledge any new data. if (ack - 1).InRange(s.sndUna, s.sndNxt) { s.dupAckCount = 0 @@ -1143,7 +1231,7 @@ func (s *sender) handleRcvdSegment(seg *segment) { } // When an ack is received we must rearm the timer. - // RFC 6298 5.2 + // RFC 6298 5.3 s.resendTimer.enable(s.rto) // Remove all acknowledged data from the write list. diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 33e2b9a09..49e4ba214 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -1900,7 +1900,7 @@ func TestZeroWindowSend(t *testing.T) { c := context.New(t, defaultMTU) defer c.Cleanup() - c.CreateConnected(789, 0, -1 /* epRcvBuf */) + c.CreateConnected(789 /* iss */, 0 /* rcvWnd */, -1 /* epRcvBuf */) data := []byte{1, 2, 3} view := buffer.NewView(len(data)) @@ -1911,8 +1911,17 @@ func TestZeroWindowSend(t *testing.T) { t.Fatalf("Write failed: %v", err) } - // Since the window is currently zero, check that no packet is received. - c.CheckNoPacket("Packet received when window is zero") + // Check if we got a zero-window probe. + b := c.GetPacket() + checker.IPv4(t, b, + checker.PayloadLen(header.TCPMinimumSize), + checker.TCP( + checker.DstPort(context.TestPort), + checker.SeqNum(uint32(c.IRS)), + checker.AckNum(790), + checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)), + ), + ) // Open up the window. Data should be received now. c.SendPacket(nil, &context.Headers{ @@ -1925,7 +1934,7 @@ func TestZeroWindowSend(t *testing.T) { }) // Check that data is received. - b := c.GetPacket() + b = c.GetPacket() checker.IPv4(t, b, checker.PayloadLen(len(data)+header.TCPMinimumSize), checker.TCP( diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 6beccbfd0..e4ced444b 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -44,8 +44,36 @@ packetimpact_go_test( packetimpact_go_test( name = "tcp_window_shrink", srcs = ["tcp_window_shrink_test.go"], - # TODO(b/153202472): Fix netstack then remove the line below. - netstack = False, + deps = [ + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + +packetimpact_go_test( + name = "tcp_zero_window_probe", + srcs = ["tcp_zero_window_probe_test.go"], + deps = [ + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + +packetimpact_go_test( + name = "tcp_zero_window_probe_retransmit", + srcs = ["tcp_zero_window_probe_retransmit_test.go"], + deps = [ + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + +packetimpact_go_test( + name = "tcp_zero_window_probe_usertimeout", + srcs = ["tcp_zero_window_probe_usertimeout_test.go"], deps = [ "//pkg/tcpip/header", "//test/packetimpact/testbench", diff --git a/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go b/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go new file mode 100644 index 000000000..864e5a634 --- /dev/null +++ b/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go @@ -0,0 +1,99 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_zero_window_probe_retransmit_test + +import ( + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +// TestZeroWindowProbeRetransmit tests retransmits of zero window probes +// to be sent at exponentially inreasing time intervals. +func TestZeroWindowProbeRetransmit(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(listenFd) + conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + defer conn.Close() + + conn.Handshake() + acceptFd, _ := dut.Accept(listenFd) + defer dut.Close(acceptFd) + + dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) + + sampleData := []byte("Sample Data") + samplePayload := &tb.Payload{Bytes: sampleData} + + // Send and receive sample data to the dut. + dut.Send(acceptFd, sampleData, 0) + if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil { + t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) + } + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload) + if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil { + t.Fatalf("expected a packet with sequence number %s", err) + } + + // Check for the dut to keep the connection alive as long as the zero window + // probes are acknowledged. Check if the zero window probes are sent at + // exponentially increasing intervals. The timeout intervals are function + // of the recorded first zero probe transmission duration. + // + // Advertize zero receive window again. + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), WindowSize: tb.Uint16(0)}) + probeSeq := tb.Uint32(uint32(*conn.RemoteSeqNum() - 1)) + ackProbe := tb.Uint32(uint32(*conn.RemoteSeqNum())) + + startProbeDuration := time.Second + current := startProbeDuration + first := time.Now() + // Ask the dut to send out data. + dut.Send(acceptFd, sampleData, 0) + // Expect the dut to keep the connection alive as long as the remote is + // acknowledging the zero-window probes. + for i := 0; i < 5; i++ { + start := time.Now() + // Expect zero-window probe with a timeout which is a function of the typical + // first retransmission time. The retransmission times is supposed to + // exponentially increase. + if _, err := conn.ExpectData(&tb.TCP{SeqNum: probeSeq}, nil, 2*current); err != nil { + t.Fatalf("expected a probe with sequence number %v: loop %d", probeSeq, i) + } + if i == 0 { + startProbeDuration = time.Now().Sub(first) + current = 2 * startProbeDuration + continue + } + // Check if the probes came at exponentially increasing intervals. + if p := time.Since(start); p < current-startProbeDuration { + t.Fatalf("zero probe came sooner interval %d probe %d\n", p, i) + } + // Acknowledge the zero-window probes from the dut. + conn.Send(tb.TCP{AckNum: ackProbe, Flags: tb.Uint8(header.TCPFlagAck), WindowSize: tb.Uint16(0)}) + current *= 2 + } + // Advertize non-zero window. + conn.Send(tb.TCP{AckNum: ackProbe, Flags: tb.Uint8(header.TCPFlagAck)}) + // Expect the dut to recover and transmit data. + if _, err := conn.ExpectData(&tb.TCP{SeqNum: ackProbe}, samplePayload, time.Second); err != nil { + t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) + } +} diff --git a/test/packetimpact/tests/tcp_zero_window_probe_test.go b/test/packetimpact/tests/tcp_zero_window_probe_test.go new file mode 100644 index 000000000..4fa3d0cd4 --- /dev/null +++ b/test/packetimpact/tests/tcp_zero_window_probe_test.go @@ -0,0 +1,107 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_zero_window_probe_test + +import ( + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +// TestZeroWindowProbe tests few cases of zero window probing over the +// same connection. +func TestZeroWindowProbe(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(listenFd) + conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + defer conn.Close() + + conn.Handshake() + acceptFd, _ := dut.Accept(listenFd) + defer dut.Close(acceptFd) + + dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) + + sampleData := []byte("Sample Data") + samplePayload := &tb.Payload{Bytes: sampleData} + + start := time.Now() + // Send and receive sample data to the dut. + dut.Send(acceptFd, sampleData, 0) + if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil { + t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) + } + sendTime := time.Now().Sub(start) + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload) + if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil { + t.Fatalf("expected a packet with sequence number %s", err) + } + + // Test 1: Check for receive of a zero window probe, record the duration for + // probe to be sent. + // + // Advertize zero window to the dut. + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), WindowSize: tb.Uint16(0)}) + + // Expected sequence number of the zero window probe. + probeSeq := tb.Uint32(uint32(*conn.RemoteSeqNum() - 1)) + // Expected ack number of the ACK for the probe. + ackProbe := tb.Uint32(uint32(*conn.RemoteSeqNum())) + + // Expect there are no zero-window probes sent until there is data to be sent out + // from the dut. + if _, err := conn.ExpectData(&tb.TCP{SeqNum: probeSeq}, nil, 2*time.Second); err == nil { + t.Fatalf("unexpected a packet with sequence number %v: %s", probeSeq, err) + } + + start = time.Now() + // Ask the dut to send out data. + dut.Send(acceptFd, sampleData, 0) + // Expect zero-window probe from the dut. + if _, err := conn.ExpectData(&tb.TCP{SeqNum: probeSeq}, nil, time.Second); err != nil { + t.Fatalf("expected a packet with sequence number %v: %s", probeSeq, err) + } + // Expect the probe to be sent after some time. Compare against the previous + // time recorded when the dut immediately sends out data on receiving the + // send command. + if startProbeDuration := time.Now().Sub(start); startProbeDuration <= sendTime { + t.Fatalf("expected the first probe to be sent out after retransmission interval, got %v want > %v\n", startProbeDuration, sendTime) + } + + // Test 2: Check if the dut recovers on advertizing non-zero receive window. + // and sends out the sample payload after the send window opens. + // + // Advertize non-zero window to the dut and ack the zero window probe. + conn.Send(tb.TCP{AckNum: ackProbe, Flags: tb.Uint8(header.TCPFlagAck)}) + // Expect the dut to recover and transmit data. + if _, err := conn.ExpectData(&tb.TCP{SeqNum: ackProbe}, samplePayload, time.Second); err != nil { + t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) + } + + // Test 3: Sanity check for dut's processing of a similar probe it sent. + // Check if the dut responds as we do for a similar probe sent to it. + // Basically with sequence number to one byte behind the unacknowledged + // sequence number. + p := tb.Uint32(uint32(*conn.LocalSeqNum())) + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), SeqNum: tb.Uint32(uint32(*conn.LocalSeqNum() - 1))}) + if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), AckNum: p}, nil, time.Second); err != nil { + t.Fatalf("expected a packet with ack number: %d: %s", p, err) + } +} diff --git a/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go b/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go new file mode 100644 index 000000000..7d81c276c --- /dev/null +++ b/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go @@ -0,0 +1,93 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_zero_window_probe_usertimeout_test + +import ( + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +// TestZeroWindowProbeUserTimeout sanity tests user timeout when we are +// retransmitting zero window probes. +func TestZeroWindowProbeUserTimeout(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(listenFd) + conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + defer conn.Close() + + conn.Handshake() + acceptFd, _ := dut.Accept(listenFd) + defer dut.Close(acceptFd) + + dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) + + sampleData := []byte("Sample Data") + samplePayload := &tb.Payload{Bytes: sampleData} + + // Send and receive sample data to the dut. + dut.Send(acceptFd, sampleData, 0) + if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil { + t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) + } + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload) + if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil { + t.Fatalf("expected a packet with sequence number %s", err) + } + + // Test 1: Check for receive of a zero window probe, record the duration for + // probe to be sent. + // + // Advertize zero window to the dut. + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), WindowSize: tb.Uint16(0)}) + + // Expected sequence number of the zero window probe. + probeSeq := tb.Uint32(uint32(*conn.RemoteSeqNum() - 1)) + start := time.Now() + // Ask the dut to send out data. + dut.Send(acceptFd, sampleData, 0) + // Expect zero-window probe from the dut. + if _, err := conn.ExpectData(&tb.TCP{SeqNum: probeSeq}, nil, time.Second); err != nil { + t.Fatalf("expected a packet with sequence number %v: %s", probeSeq, err) + } + // Record the duration for first probe, the dut sends the zero window probe after + // a retransmission time interval. + startProbeDuration := time.Now().Sub(start) + + // Test 2: Check if the dut times out the connection by honoring usertimeout + // when the dut is sending zero-window probes. + // + // Reduce the retransmit timeout. + dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_USER_TIMEOUT, int32(startProbeDuration.Milliseconds())) + // Advertize zero window again. + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), WindowSize: tb.Uint16(0)}) + // Ask the dut to send out data that would trigger zero window probe retransmissions. + dut.Send(acceptFd, sampleData, 0) + + // Wait for the connection to timeout after multiple zero-window probe retransmissions. + time.Sleep(8 * startProbeDuration) + + // Expect the connection to have timed out and closed which would cause the dut + // to reply with a RST to the ACK we send. + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) + if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil { + t.Fatalf("expected a TCP RST") + } +} -- cgit v1.2.3 From 9509c0b3886f29b488a62d0cc8edde9ccdefa335 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 5 May 2020 15:59:27 -0700 Subject: gvisor/test: use RetryEINTR for connect() connect() returns EINTR after S/R and usually we use RetryEINTR to workaround this. PiperOrigin-RevId: 310038525 --- test/syscalls/linux/socket_inet_loopback.cc | 51 ++++++++++++++++------------- 1 file changed, 28 insertions(+), 23 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 9400ffaeb..fa890ec98 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -162,7 +162,7 @@ TEST_P(DualStackSocketTest, AddressOperations) { ASSERT_NO_ERRNO(SetAddrPort( addr.family(), const_cast(&addr.addr), 1337)); - EXPECT_THAT(connect(fd.get(), addr_in, addr.addr_len), + EXPECT_THAT(RetryEINTR(connect)(fd.get(), addr_in, addr.addr_len), SyscallSucceeds()) << GetAddrStr(addr_in); bound = true; @@ -353,8 +353,9 @@ TEST_P(SocketInetLoopbackTest, TCPListenShutdownListen) { for (int i = 0; i < kBacklog; i++) { auto client = ASSERT_NO_ERRNO_AND_VALUE( Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); - ASSERT_THAT(connect(client.get(), reinterpret_cast(&conn_addr), - connector.addr_len), + ASSERT_THAT(RetryEINTR(connect)(client.get(), + reinterpret_cast(&conn_addr), + connector.addr_len), SyscallSucceeds()); } for (int i = 0; i < kBacklog; i++) { @@ -397,8 +398,9 @@ TEST_P(SocketInetLoopbackTest, TCPListenShutdown) { for (int i = 0; i < kFDs; i++) { auto client = ASSERT_NO_ERRNO_AND_VALUE( Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); - ASSERT_THAT(connect(client.get(), reinterpret_cast(&conn_addr), - connector.addr_len), + ASSERT_THAT(RetryEINTR(connect)(client.get(), + reinterpret_cast(&conn_addr), + connector.addr_len), SyscallSucceeds()); ASSERT_THAT(accept(listen_fd.get(), nullptr, nullptr), SyscallSucceeds()); } @@ -425,8 +427,9 @@ TEST_P(SocketInetLoopbackTest, TCPListenShutdown) { for (int i = 0; i < kFDs; i++) { auto client = ASSERT_NO_ERRNO_AND_VALUE( Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); - ASSERT_THAT(connect(client.get(), reinterpret_cast(&conn_addr), - connector.addr_len), + ASSERT_THAT(RetryEINTR(connect)(client.get(), + reinterpret_cast(&conn_addr), + connector.addr_len), SyscallFailsWithErrno(ECONNREFUSED)); } } @@ -1824,10 +1827,10 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V6EphemeralPortReserved) { // Connect to bind an ephemeral port. const FileDescriptor connected_fd = ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); - ASSERT_THAT( - connect(connected_fd.get(), reinterpret_cast(&bound_addr), - bound_addr_len), - SyscallSucceeds()); + ASSERT_THAT(RetryEINTR(connect)(connected_fd.get(), + reinterpret_cast(&bound_addr), + bound_addr_len), + SyscallSucceeds()); // Get the ephemeral port. sockaddr_storage connected_addr = {}; @@ -1930,8 +1933,9 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V6EphemeralPortReservedReuseAddr) { ASSERT_THAT(setsockopt(connected_fd.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, sizeof(kSockOptOn)), SyscallSucceeds()); - ASSERT_THAT(connect(connected_fd.get(), - reinterpret_cast(&bound_addr), bound_addr_len), + ASSERT_THAT(RetryEINTR(connect)(connected_fd.get(), + reinterpret_cast(&bound_addr), + bound_addr_len), SyscallSucceeds()); // Get the ephemeral port. @@ -1991,10 +1995,10 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V4MappedEphemeralPortReserved) { // Connect to bind an ephemeral port. const FileDescriptor connected_fd = ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); - ASSERT_THAT( - connect(connected_fd.get(), reinterpret_cast(&bound_addr), - bound_addr_len), - SyscallSucceeds()); + ASSERT_THAT(RetryEINTR(connect)(connected_fd.get(), + reinterpret_cast(&bound_addr), + bound_addr_len), + SyscallSucceeds()); // Get the ephemeral port. sockaddr_storage connected_addr = {}; @@ -2121,8 +2125,9 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, ASSERT_THAT(setsockopt(connected_fd.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, sizeof(kSockOptOn)), SyscallSucceeds()); - ASSERT_THAT(connect(connected_fd.get(), - reinterpret_cast(&bound_addr), bound_addr_len), + ASSERT_THAT(RetryEINTR(connect)(connected_fd.get(), + reinterpret_cast(&bound_addr), + bound_addr_len), SyscallSucceeds()); // Get the ephemeral port. @@ -2182,10 +2187,10 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V4EphemeralPortReserved) { // Connect to bind an ephemeral port. const FileDescriptor connected_fd = ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); - ASSERT_THAT( - connect(connected_fd.get(), reinterpret_cast(&bound_addr), - bound_addr_len), - SyscallSucceeds()); + ASSERT_THAT(RetryEINTR(connect)(connected_fd.get(), + reinterpret_cast(&bound_addr), + bound_addr_len), + SyscallSucceeds()); // Get the ephemeral port. sockaddr_storage connected_addr = {}; -- cgit v1.2.3 From 4631de620a2534f6f89f6252269f650e78d11b99 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Wed, 6 May 2020 13:12:06 -0700 Subject: Internal change. PiperOrigin-RevId: 310213705 --- test/packetimpact/dut/posix_server.cc | 119 ++++++++++++++--------------- test/packetimpact/proto/posix_server.proto | 87 +++++---------------- test/packetimpact/testbench/dut.go | 118 +++++++++++++--------------- 3 files changed, 133 insertions(+), 191 deletions(-) (limited to 'test') diff --git a/test/packetimpact/dut/posix_server.cc b/test/packetimpact/dut/posix_server.cc index 2ebd3b95b..dc3024f44 100644 --- a/test/packetimpact/dut/posix_server.cc +++ b/test/packetimpact/dut/posix_server.cc @@ -174,41 +174,41 @@ class PosixImpl final : public posix_server::Posix::Service { grpc_impl::ServerContext *context, const ::posix_server::GetSockOptRequest *request, ::posix_server::GetSockOptResponse *response) override { - socklen_t optlen = request->optlen(); - std::vector buf(optlen); - response->set_ret(::getsockopt(request->sockfd(), request->level(), - request->optname(), buf.data(), &optlen)); - response->set_errno_(errno); - if (optlen >= 0) { - response->set_optval(buf.data(), optlen); + switch (request->type()) { + case ::posix_server::GetSockOptRequest::BYTES: { + socklen_t optlen = request->optlen(); + std::vector buf(optlen); + response->set_ret(::getsockopt(request->sockfd(), request->level(), + request->optname(), buf.data(), + &optlen)); + if (optlen >= 0) { + response->mutable_optval()->set_bytesval(buf.data(), optlen); + } + break; + } + case ::posix_server::GetSockOptRequest::INT: { + int intval = 0; + socklen_t optlen = sizeof(intval); + response->set_ret(::getsockopt(request->sockfd(), request->level(), + request->optname(), &intval, &optlen)); + response->mutable_optval()->set_intval(intval); + break; + } + case ::posix_server::GetSockOptRequest::TIME: { + timeval tv; + socklen_t optlen = sizeof(tv); + response->set_ret(::getsockopt(request->sockfd(), request->level(), + request->optname(), &tv, &optlen)); + response->mutable_optval()->mutable_timeval()->set_seconds(tv.tv_sec); + response->mutable_optval()->mutable_timeval()->set_microseconds( + tv.tv_usec); + break; + } + default: + return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, + "Unknown SockOpt Type"); } - return ::grpc::Status::OK; - } - - ::grpc::Status GetSockOptInt( - ::grpc::ServerContext *context, - const ::posix_server::GetSockOptIntRequest *request, - ::posix_server::GetSockOptIntResponse *response) override { - int opt = 0; - socklen_t optlen = sizeof(opt); - response->set_ret(::getsockopt(request->sockfd(), request->level(), - request->optname(), &opt, &optlen)); - response->set_errno_(errno); - response->set_intval(opt); - return ::grpc::Status::OK; - } - - ::grpc::Status GetSockOptTimeval( - ::grpc::ServerContext *context, - const ::posix_server::GetSockOptTimevalRequest *request, - ::posix_server::GetSockOptTimevalResponse *response) override { - timeval tv; - socklen_t optlen = sizeof(tv); - response->set_ret(::getsockopt(request->sockfd(), request->level(), - request->optname(), &tv, &optlen)); response->set_errno_(errno); - response->mutable_timeval()->set_seconds(tv.tv_sec); - response->mutable_timeval()->set_microseconds(tv.tv_usec); return ::grpc::Status::OK; } @@ -253,33 +253,32 @@ class PosixImpl final : public posix_server::Posix::Service { grpc_impl::ServerContext *context, const ::posix_server::SetSockOptRequest *request, ::posix_server::SetSockOptResponse *response) override { - response->set_ret(setsockopt(request->sockfd(), request->level(), - request->optname(), request->optval().c_str(), - request->optval().size())); - response->set_errno_(errno); - return ::grpc::Status::OK; - } - - ::grpc::Status SetSockOptInt( - ::grpc::ServerContext *context, - const ::posix_server::SetSockOptIntRequest *request, - ::posix_server::SetSockOptIntResponse *response) override { - int opt = request->intval(); - response->set_ret(::setsockopt(request->sockfd(), request->level(), - request->optname(), &opt, sizeof(opt))); - response->set_errno_(errno); - return ::grpc::Status::OK; - } - - ::grpc::Status SetSockOptTimeval( - ::grpc::ServerContext *context, - const ::posix_server::SetSockOptTimevalRequest *request, - ::posix_server::SetSockOptTimevalResponse *response) override { - timeval tv = {.tv_sec = static_cast<__time_t>(request->timeval().seconds()), - .tv_usec = static_cast<__suseconds_t>( - request->timeval().microseconds())}; - response->set_ret(setsockopt(request->sockfd(), request->level(), - request->optname(), &tv, sizeof(tv))); + switch (request->optval().val_case()) { + case ::posix_server::SockOptVal::kBytesval: + response->set_ret(setsockopt(request->sockfd(), request->level(), + request->optname(), + request->optval().bytesval().c_str(), + request->optval().bytesval().size())); + break; + case ::posix_server::SockOptVal::kIntval: { + int opt = request->optval().intval(); + response->set_ret(::setsockopt(request->sockfd(), request->level(), + request->optname(), &opt, sizeof(opt))); + break; + } + case ::posix_server::SockOptVal::kTimeval: { + timeval tv = {.tv_sec = static_cast<__time_t>( + request->optval().timeval().seconds()), + .tv_usec = static_cast<__suseconds_t>( + request->optval().timeval().microseconds())}; + response->set_ret(setsockopt(request->sockfd(), request->level(), + request->optname(), &tv, sizeof(tv))); + break; + } + default: + return ::grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, + "Unknown SockOpt Type"); + } response->set_errno_(errno); return ::grpc::Status::OK; } diff --git a/test/packetimpact/proto/posix_server.proto b/test/packetimpact/proto/posix_server.proto index ab5ba1c85..9dca563f1 100644 --- a/test/packetimpact/proto/posix_server.proto +++ b/test/packetimpact/proto/posix_server.proto @@ -42,6 +42,14 @@ message Timeval { int64 microseconds = 2; } +message SockOptVal { + oneof val { + bytes bytesval = 1; + int32 intval = 2; + Timeval timeval = 3; + } +} + // Request and Response pairs for each Posix service RPC call, sorted. message AcceptRequest { @@ -98,36 +106,19 @@ message GetSockOptRequest { int32 level = 2; int32 optname = 3; int32 optlen = 4; + enum SockOptType { + UNSPECIFIED = 0; + BYTES = 1; + INT = 2; + TIME = 3; + } + SockOptType type = 5; } message GetSockOptResponse { int32 ret = 1; int32 errno_ = 2; // "errno" may fail to compile in c++. - bytes optval = 3; -} - -message GetSockOptIntRequest { - int32 sockfd = 1; - int32 level = 2; - int32 optname = 3; -} - -message GetSockOptIntResponse { - int32 ret = 1; - int32 errno_ = 2; // "errno" may fail to compile in c++. - int32 intval = 3; -} - -message GetSockOptTimevalRequest { - int32 sockfd = 1; - int32 level = 2; - int32 optname = 3; -} - -message GetSockOptTimevalResponse { - int32 ret = 1; - int32 errno_ = 2; // "errno" may fail to compile in c++. - Timeval timeval = 3; + SockOptVal optval = 3; } message ListenRequest { @@ -167,7 +158,7 @@ message SetSockOptRequest { int32 sockfd = 1; int32 level = 2; int32 optname = 3; - bytes optval = 4; + SockOptVal optval = 4; } message SetSockOptResponse { @@ -175,30 +166,6 @@ message SetSockOptResponse { int32 errno_ = 2; // "errno" may fail to compile in c++. } -message SetSockOptIntRequest { - int32 sockfd = 1; - int32 level = 2; - int32 optname = 3; - int32 intval = 4; -} - -message SetSockOptIntResponse { - int32 ret = 1; - int32 errno_ = 2; -} - -message SetSockOptTimevalRequest { - int32 sockfd = 1; - int32 level = 2; - int32 optname = 3; - Timeval timeval = 4; -} - -message SetSockOptTimevalResponse { - int32 ret = 1; - int32 errno_ = 2; // "errno" may fail to compile in c++. -} - message SocketRequest { int32 domain = 1; int32 type = 2; @@ -233,32 +200,16 @@ service Posix { rpc Connect(ConnectRequest) returns (ConnectResponse); // Call getsockname() on the DUT. rpc GetSockName(GetSockNameRequest) returns (GetSockNameResponse); - // Call getsockopt() on the DUT. You should prefer one of the other - // GetSockOpt* functions with a more structured optval or else you may get the - // encoding wrong, such as making a bad assumption about the server's word - // sizes or endianness. + // Call getsockopt() on the DUT. rpc GetSockOpt(GetSockOptRequest) returns (GetSockOptResponse); - // Call getsockopt() on the DUT with an int optval. - rpc GetSockOptInt(GetSockOptIntRequest) returns (GetSockOptIntResponse); - // Call getsockopt() on the DUT with a Timeval optval. - rpc GetSockOptTimeval(GetSockOptTimevalRequest) - returns (GetSockOptTimevalResponse); // Call listen() on the DUT. rpc Listen(ListenRequest) returns (ListenResponse); // Call send() on the DUT. rpc Send(SendRequest) returns (SendResponse); // Call sendto() on the DUT. rpc SendTo(SendToRequest) returns (SendToResponse); - // Call setsockopt() on the DUT. You should prefer one of the other - // SetSockOpt* functions with a more structured optval or else you may get the - // encoding wrong, such as making a bad assumption about the server's word - // sizes or endianness. + // Call setsockopt() on the DUT. rpc SetSockOpt(SetSockOptRequest) returns (SetSockOptResponse); - // Call setsockopt() on the DUT with an int optval. - rpc SetSockOptInt(SetSockOptIntRequest) returns (SetSockOptIntResponse); - // Call setsockopt() on the DUT with a Timeval optval. - rpc SetSockOptTimeval(SetSockOptTimevalRequest) - returns (SetSockOptTimevalResponse); // Call socket() on the DUT. rpc Socket(SocketRequest) returns (SocketResponse); // Call recv() on the DUT. diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go index 87eeeeb88..f68d9d62b 100644 --- a/test/packetimpact/testbench/dut.go +++ b/test/packetimpact/testbench/dut.go @@ -291,6 +291,26 @@ func (dut *DUT) GetSockNameWithErrno(ctx context.Context, sockfd int32) (int32, return resp.GetRet(), dut.protoToSockaddr(resp.GetAddr()), syscall.Errno(resp.GetErrno_()) } +func (dut *DUT) getSockOpt(ctx context.Context, sockfd, level, optname, optlen int32, typ pb.GetSockOptRequest_SockOptType) (int32, *pb.SockOptVal, error) { + dut.t.Helper() + req := pb.GetSockOptRequest{ + Sockfd: sockfd, + Level: level, + Optname: optname, + Optlen: optlen, + Type: typ, + } + resp, err := dut.posixServer.GetSockOpt(ctx, &req) + if err != nil { + dut.t.Fatalf("failed to call GetSockOpt: %s", err) + } + optval := resp.GetOptval() + if optval == nil { + dut.t.Fatalf("GetSockOpt response does not contain a value") + } + return resp.GetRet(), optval, syscall.Errno(resp.GetErrno_()) +} + // GetSockOpt calls getsockopt on the DUT and causes a fatal test failure if it // doesn't succeed. If more control over the timeout or error handling is // needed, use GetSockOptWithErrno. Because endianess and the width of values @@ -312,17 +332,12 @@ func (dut *DUT) GetSockOpt(sockfd, level, optname, optlen int32) []byte { // prefer to use a more specific GetSockOptXxxWithErrno function. func (dut *DUT) GetSockOptWithErrno(ctx context.Context, sockfd, level, optname, optlen int32) (int32, []byte, error) { dut.t.Helper() - req := pb.GetSockOptRequest{ - Sockfd: sockfd, - Level: level, - Optname: optname, - Optlen: optlen, - } - resp, err := dut.posixServer.GetSockOpt(ctx, &req) - if err != nil { - dut.t.Fatalf("failed to call GetSockOpt: %s", err) + ret, optval, errno := dut.getSockOpt(ctx, sockfd, level, optname, optlen, pb.GetSockOptRequest_BYTES) + bytesval, ok := optval.Val.(*pb.SockOptVal_Bytesval) + if !ok { + dut.t.Fatalf("GetSockOpt got value type: %T, want bytes", optval) } - return resp.GetRet(), resp.GetOptval(), syscall.Errno(resp.GetErrno_()) + return ret, bytesval.Bytesval, errno } // GetSockOptInt calls getsockopt on the DUT and causes a fatal test failure @@ -342,16 +357,12 @@ func (dut *DUT) GetSockOptInt(sockfd, level, optname int32) int32 { // GetSockOptIntWithErrno calls getsockopt with an integer optval. func (dut *DUT) GetSockOptIntWithErrno(ctx context.Context, sockfd, level, optname int32) (int32, int32, error) { dut.t.Helper() - req := pb.GetSockOptIntRequest{ - Sockfd: sockfd, - Level: level, - Optname: optname, - } - resp, err := dut.posixServer.GetSockOptInt(ctx, &req) - if err != nil { - dut.t.Fatalf("failed to call GetSockOptInt: %s", err) + ret, optval, errno := dut.getSockOpt(ctx, sockfd, level, optname, 0, pb.GetSockOptRequest_INT) + intval, ok := optval.Val.(*pb.SockOptVal_Intval) + if !ok { + dut.t.Fatalf("GetSockOpt got value type: %T, want int", optval) } - return resp.GetRet(), resp.GetIntval(), syscall.Errno(resp.GetErrno_()) + return ret, intval.Intval, errno } // GetSockOptTimeval calls getsockopt on the DUT and causes a fatal test failure @@ -371,20 +382,16 @@ func (dut *DUT) GetSockOptTimeval(sockfd, level, optname int32) unix.Timeval { // GetSockOptTimevalWithErrno calls getsockopt and returns a timeval. func (dut *DUT) GetSockOptTimevalWithErrno(ctx context.Context, sockfd, level, optname int32) (int32, unix.Timeval, error) { dut.t.Helper() - req := pb.GetSockOptTimevalRequest{ - Sockfd: sockfd, - Level: level, - Optname: optname, - } - resp, err := dut.posixServer.GetSockOptTimeval(ctx, &req) - if err != nil { - dut.t.Fatalf("failed to call GetSockOptTimeval: %s", err) + ret, optval, errno := dut.getSockOpt(ctx, sockfd, level, optname, 0, pb.GetSockOptRequest_TIME) + tv, ok := optval.Val.(*pb.SockOptVal_Timeval) + if !ok { + dut.t.Fatalf("GetSockOpt got value type: %T, want timeval", optval) } timeval := unix.Timeval{ - Sec: resp.GetTimeval().Seconds, - Usec: resp.GetTimeval().Microseconds, + Sec: tv.Timeval.Seconds, + Usec: tv.Timeval.Microseconds, } - return resp.GetRet(), timeval, syscall.Errno(resp.GetErrno_()) + return ret, timeval, errno } // Listen calls listen on the DUT and causes a fatal test failure if it doesn't @@ -473,6 +480,21 @@ func (dut *DUT) SendToWithErrno(ctx context.Context, sockfd int32, buf []byte, f return resp.GetRet(), syscall.Errno(resp.GetErrno_()) } +func (dut *DUT) setSockOpt(ctx context.Context, sockfd, level, optname int32, optval *pb.SockOptVal) (int32, error) { + dut.t.Helper() + req := pb.SetSockOptRequest{ + Sockfd: sockfd, + Level: level, + Optname: optname, + Optval: optval, + } + resp, err := dut.posixServer.SetSockOpt(ctx, &req) + if err != nil { + dut.t.Fatalf("failed to call SetSockOpt: %s", err) + } + return resp.GetRet(), syscall.Errno(resp.GetErrno_()) +} + // SetSockOpt calls setsockopt on the DUT and causes a fatal test failure if it // doesn't succeed. If more control over the timeout or error handling is // needed, use SetSockOptWithErrno. Because endianess and the width of values @@ -493,17 +515,7 @@ func (dut *DUT) SetSockOpt(sockfd, level, optname int32, optval []byte) { // prefer to use a more specific SetSockOptXxxWithErrno function. func (dut *DUT) SetSockOptWithErrno(ctx context.Context, sockfd, level, optname int32, optval []byte) (int32, error) { dut.t.Helper() - req := pb.SetSockOptRequest{ - Sockfd: sockfd, - Level: level, - Optname: optname, - Optval: optval, - } - resp, err := dut.posixServer.SetSockOpt(ctx, &req) - if err != nil { - dut.t.Fatalf("failed to call SetSockOpt: %s", err) - } - return resp.GetRet(), syscall.Errno(resp.GetErrno_()) + return dut.setSockOpt(ctx, sockfd, level, optname, &pb.SockOptVal{Val: &pb.SockOptVal_Bytesval{optval}}) } // SetSockOptInt calls setsockopt on the DUT and causes a fatal test failure @@ -522,17 +534,7 @@ func (dut *DUT) SetSockOptInt(sockfd, level, optname, optval int32) { // SetSockOptIntWithErrno calls setsockopt with an integer optval. func (dut *DUT) SetSockOptIntWithErrno(ctx context.Context, sockfd, level, optname, optval int32) (int32, error) { dut.t.Helper() - req := pb.SetSockOptIntRequest{ - Sockfd: sockfd, - Level: level, - Optname: optname, - Intval: optval, - } - resp, err := dut.posixServer.SetSockOptInt(ctx, &req) - if err != nil { - dut.t.Fatalf("failed to call SetSockOptInt: %s", err) - } - return resp.GetRet(), syscall.Errno(resp.GetErrno_()) + return dut.setSockOpt(ctx, sockfd, level, optname, &pb.SockOptVal{Val: &pb.SockOptVal_Intval{optval}}) } // SetSockOptTimeval calls setsockopt on the DUT and causes a fatal test failure @@ -556,17 +558,7 @@ func (dut *DUT) SetSockOptTimevalWithErrno(ctx context.Context, sockfd, level, o Seconds: int64(tv.Sec), Microseconds: int64(tv.Usec), } - req := pb.SetSockOptTimevalRequest{ - Sockfd: sockfd, - Level: level, - Optname: optname, - Timeval: &timeval, - } - resp, err := dut.posixServer.SetSockOptTimeval(ctx, &req) - if err != nil { - dut.t.Fatalf("failed to call SetSockOptTimeval: %s", err) - } - return resp.GetRet(), syscall.Errno(resp.GetErrno_()) + return dut.setSockOpt(ctx, sockfd, level, optname, &pb.SockOptVal{Val: &pb.SockOptVal_Timeval{&timeval}}) } // Socket calls socket on the DUT and returns the file descriptor. If socket -- cgit v1.2.3 From 92cab8e2c388900f687b5ee861e82739a2d5f2c2 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Thu, 7 May 2020 12:08:45 -0700 Subject: Internal change. PiperOrigin-RevId: 310409922 --- .../tests/udp_icmp_error_propagation_test.go | 38 ++++++++++++++-------- 1 file changed, 24 insertions(+), 14 deletions(-) (limited to 'test') diff --git a/test/packetimpact/tests/udp_icmp_error_propagation_test.go b/test/packetimpact/tests/udp_icmp_error_propagation_test.go index c47af9a3e..30dcb336e 100644 --- a/test/packetimpact/tests/udp_icmp_error_propagation_test.go +++ b/test/packetimpact/tests/udp_icmp_error_propagation_test.go @@ -18,7 +18,6 @@ import ( "context" "fmt" "net" - "sync" "syscall" "testing" "time" @@ -301,19 +300,20 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) { t.Fatalf("did not receive message from DUT: %s", err) } - var wg sync.WaitGroup - wg.Add(2) - go func() { + c := make(chan error) + go func(c chan error) { if wantErrno != syscall.Errno(0) { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() ret, _, err := dut.RecvWithErrno(ctx, remoteFD, 100, 0) if ret != -1 { - t.Fatalf("recv during ICMP error succeeded unexpectedly, expected (%[1]d) %[1]v", wantErrno) + c <- fmt.Errorf("recv during ICMP error succeeded unexpectedly, expected (%[1]d) %[1]v", wantErrno) + return } if err != wantErrno { - t.Fatalf("recv during ICMP error resulted in error (%[1]d) %[1]v, expected (%[2]d) %[2]v", err, wantErrno) + c <- fmt.Errorf("recv during ICMP error resulted in error (%[1]d) %[1]v, expected (%[2]d) %[2]v", err, wantErrno) + return } } @@ -321,20 +321,23 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) { defer cancel() if ret, _, err := dut.RecvWithErrno(ctx, remoteFD, 100, 0); ret == -1 { - t.Fatalf("recv after ICMP error failed with (%[1]d) %[1]", err) + c <- fmt.Errorf("recv after ICMP error failed with (%[1]d) %[1]", err) + return } - wg.Done() - }() + c <- nil + }(c) - go func() { + cleanChan := make(chan error) + go func(c chan error) { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() if ret, _, err := dut.RecvWithErrno(ctx, cleanFD, 100, 0); ret == -1 { - t.Fatalf("recv on clean socket failed with (%[1]d) %[1]", err) + c <- fmt.Errorf("recv on clean socket failed with (%[1]d) %[1]", err) + return } - wg.Done() - }() + c <- nil + }(cleanChan) // TODO(b/155684889) This sleep is to allow time for the DUT to // actually call recv since we want the ICMP error to arrive during the @@ -348,7 +351,14 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) { conn.Send(tb.UDP{DstPort: &cleanPort}) conn.Send(tb.UDP{}) - wg.Wait() + + err, errClean := <-c, <-cleanChan + if errClean != nil { + t.Error(err) + } + if err != nil { + t.Fatal(err) + } }) } } -- cgit v1.2.3 From 5d7d5ed7d6ffd8d420f042a22b22e0527e78d441 Mon Sep 17 00:00:00 2001 From: Zeling Feng Date: Fri, 8 May 2020 11:21:40 -0700 Subject: Send ACK to OTW SEQs/unacc ACKs in CLOSE_WAIT This fixed the corresponding packetimpact test. PiperOrigin-RevId: 310593470 --- pkg/tcpip/header/tcp.go | 5 ++++- pkg/tcpip/transport/tcp/rcv.go | 36 ++++++++++++++++++++++++++---------- pkg/tcpip/transport/tcp/rcv_test.go | 4 ++-- test/packetimpact/tests/BUILD | 2 -- 4 files changed, 32 insertions(+), 15 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/header/tcp.go b/pkg/tcpip/header/tcp.go index 21581257b..29454c4b9 100644 --- a/pkg/tcpip/header/tcp.go +++ b/pkg/tcpip/header/tcp.go @@ -609,5 +609,8 @@ func Acceptable(segSeq seqnum.Value, segLen seqnum.Size, rcvNxt, rcvAcc seqnum.V } // Page 70 of RFC 793 allows packets that can be made "acceptable" by trimming // the payload, so we'll accept any payload that overlaps the receieve window. - return rcvNxt.LessThan(segSeq.Add(segLen)) && segSeq.LessThan(rcvAcc) + // segSeq < rcvAcc is more correct according to RFC, however, Linux does it + // differently, it uses segSeq <= rcvAcc, we'd want to keep the same behavior + // as Linux. + return rcvNxt.LessThan(segSeq.Add(segLen)) && segSeq.LessThanEq(rcvAcc) } diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go index 6fe97fefd..dd89a292a 100644 --- a/pkg/tcpip/transport/tcp/rcv.go +++ b/pkg/tcpip/transport/tcp/rcv.go @@ -70,7 +70,16 @@ func newReceiver(ep *endpoint, irs seqnum.Value, rcvWnd seqnum.Size, rcvWndScale // acceptable checks if the segment sequence number range is acceptable // according to the table on page 26 of RFC 793. func (r *receiver) acceptable(segSeq seqnum.Value, segLen seqnum.Size) bool { - return header.Acceptable(segSeq, segLen, r.rcvNxt, r.rcvAcc) + // r.rcvWnd could be much larger than the window size we advertised in our + // outgoing packets, we should use what we have advertised for acceptability + // test. + scaledWindowSize := r.rcvWnd >> r.rcvWndScale + if scaledWindowSize > 0xffff { + // This is what we actually put in the Window field. + scaledWindowSize = 0xffff + } + advertisedWindowSize := scaledWindowSize << r.rcvWndScale + return header.Acceptable(segSeq, segLen, r.rcvNxt, r.rcvNxt.Add(advertisedWindowSize)) } // getSendParams returns the parameters needed by the sender when building @@ -259,7 +268,14 @@ func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, clo // If we are in one of the shutdown states then we need to do // additional checks before we try and process the segment. switch state { - case StateCloseWait, StateClosing, StateLastAck: + case StateCloseWait: + // If the ACK acks something not yet sent then we send an ACK. + if r.ep.snd.sndNxt.LessThan(s.ackNumber) { + r.ep.snd.sendAck() + return true, nil + } + fallthrough + case StateClosing, StateLastAck: if !s.sequenceNumber.LessThanEq(r.rcvNxt) { // Just drop the segment as we have // already received a FIN and this @@ -276,7 +292,7 @@ func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, clo // SHUT_RD) then any data past the rcvNxt should // trigger a RST. endDataSeq := s.sequenceNumber.Add(seqnum.Size(s.data.Size())) - if rcvClosed && r.rcvNxt.LessThan(endDataSeq) { + if state != StateCloseWait && rcvClosed && r.rcvNxt.LessThan(endDataSeq) { return true, tcpip.ErrConnectionAborted } if state == StateFinWait1 { @@ -329,13 +345,6 @@ func (r *receiver) handleRcvdSegment(s *segment) (drop bool, err *tcpip.Error) { state := r.ep.EndpointState() closed := r.ep.closed - if state != StateEstablished { - drop, err := r.handleRcvdSegmentClosing(s, state, closed) - if drop || err != nil { - return drop, err - } - } - segLen := seqnum.Size(s.data.Size()) segSeq := s.sequenceNumber @@ -347,6 +356,13 @@ func (r *receiver) handleRcvdSegment(s *segment) (drop bool, err *tcpip.Error) { return true, nil } + if state != StateEstablished { + drop, err := r.handleRcvdSegmentClosing(s, state, closed) + if drop || err != nil { + return drop, err + } + } + // Store the time of the last ack. r.lastRcvdAckTime = time.Now() diff --git a/pkg/tcpip/transport/tcp/rcv_test.go b/pkg/tcpip/transport/tcp/rcv_test.go index c9eeff935..8a026ec46 100644 --- a/pkg/tcpip/transport/tcp/rcv_test.go +++ b/pkg/tcpip/transport/tcp/rcv_test.go @@ -30,7 +30,7 @@ func TestAcceptable(t *testing.T) { }{ // The segment is smaller than the window. {105, 2, 100, 104, false}, - {105, 2, 101, 105, false}, + {105, 2, 101, 105, true}, {105, 2, 102, 106, true}, {105, 2, 103, 107, true}, {105, 2, 104, 108, true}, @@ -39,7 +39,7 @@ func TestAcceptable(t *testing.T) { {105, 2, 107, 111, false}, // The segment is larger than the window. - {105, 4, 103, 105, false}, + {105, 4, 103, 105, true}, {105, 4, 104, 106, true}, {105, 4, 105, 107, true}, {105, 4, 106, 108, true}, diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index e4ced444b..563823d04 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -117,8 +117,6 @@ packetimpact_go_test( packetimpact_go_test( name = "tcp_close_wait_ack", srcs = ["tcp_close_wait_ack_test.go"], - # TODO(b/153574037): Fix netstack then remove the line below. - netstack = False, deps = [ "//pkg/tcpip/header", "//pkg/tcpip/seqnum", -- cgit v1.2.3 From e4d2d21f6b1b93146378ed5edc0c55d2ae4fb3af Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Fri, 8 May 2020 15:38:42 -0700 Subject: Add UDP send/recv packetimpact tests. Fixes #2654 PiperOrigin-RevId: 310642216 --- test/packetimpact/testbench/connections.go | 13 +++ test/packetimpact/testbench/layers.go | 5 +- test/packetimpact/testbench/rawsockets.go | 2 +- test/packetimpact/tests/BUILD | 9 ++ .../packetimpact/tests/udp_send_recv_dgram_test.go | 96 ++++++++++++++++++++++ 5 files changed, 120 insertions(+), 5 deletions(-) create mode 100644 test/packetimpact/tests/udp_send_recv_dgram_test.go (limited to 'test') diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index 56ac3fa54..28c841612 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -841,6 +841,7 @@ func (conn *UDPIPv4) SendIP(additionalLayers ...Layer) { // Expect expects a frame with the UDP layer matching the provided UDP within // the timeout specified. If it doesn't arrive in time, an error is returned. func (conn *UDPIPv4) Expect(udp UDP, timeout time.Duration) (*UDP, error) { + conn.t.Helper() layer, err := (*Connection)(conn).Expect(&udp, timeout) if layer == nil { return nil, err @@ -852,6 +853,18 @@ func (conn *UDPIPv4) Expect(udp UDP, timeout time.Duration) (*UDP, error) { return gotUDP, err } +// ExpectData is a convenient method that expects a Layer and the Layer after +// it. If it doens't arrive in time, it returns nil. +func (conn *UDPIPv4) ExpectData(udp UDP, payload Payload, timeout time.Duration) (Layers, error) { + conn.t.Helper() + expected := make([]Layer, len(conn.layerStates)) + expected[len(expected)-1] = &udp + if payload.length() != 0 { + expected = append(expected, &payload) + } + return (*Connection)(conn).ExpectFrame(expected, timeout) +} + // Close frees associated resources held by the UDPIPv4 connection. func (conn *UDPIPv4) Close() { (*Connection)(conn).Close() diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go index 165f62d3b..49370377d 100644 --- a/test/packetimpact/testbench/layers.go +++ b/test/packetimpact/testbench/layers.go @@ -898,10 +898,7 @@ func (l *UDP) match(other Layer) bool { } func (l *UDP) length() int { - if l.Length == nil { - return header.UDPMinimumSize - } - return int(*l.Length) + return header.UDPMinimumSize } // merge implements Layer.merge. diff --git a/test/packetimpact/testbench/rawsockets.go b/test/packetimpact/testbench/rawsockets.go index ff722d4a6..a9ad72b63 100644 --- a/test/packetimpact/testbench/rawsockets.go +++ b/test/packetimpact/testbench/rawsockets.go @@ -169,7 +169,7 @@ func NewInjector(t *testing.T) (Injector, error) { // Send a raw frame. func (i *Injector) Send(b []byte) { if _, err := unix.Write(i.fd, b); err != nil { - i.t.Fatalf("can't write: %s", err) + i.t.Fatalf("can't write: %s of len %d", err, len(b)) } } diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 563823d04..c25b3b8c1 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -148,6 +148,15 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "udp_send_recv_dgram", + srcs = ["udp_send_recv_dgram_test.go"], + deps = [ + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + sh_binary( name = "test_runner", srcs = ["test_runner.sh"], diff --git a/test/packetimpact/tests/udp_send_recv_dgram_test.go b/test/packetimpact/tests/udp_send_recv_dgram_test.go new file mode 100644 index 000000000..1c682831c --- /dev/null +++ b/test/packetimpact/tests/udp_send_recv_dgram_test.go @@ -0,0 +1,96 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package udp_send_recv_dgram_test + +import ( + "math/rand" + "net" + "testing" + "time" + + "golang.org/x/sys/unix" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func generateRandomPayload(t *testing.T, n int) string { + t.Helper() + buf := make([]byte, n) + if _, err := rand.Read(buf); err != nil { + t.Fatalf("rand.Read(buf) failed: %s", err) + } + return string(buf) +} + +func TestUDPRecv(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + boundFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) + defer dut.Close(boundFD) + conn := tb.NewUDPIPv4(t, tb.UDP{DstPort: &remotePort}, tb.UDP{SrcPort: &remotePort}) + defer conn.Close() + + testCases := []struct { + name string + payload string + }{ + {"emptypayload", ""}, + {"small payload", "hello world"}, + {"1kPayload", generateRandomPayload(t, 1<<10)}, + // Even though UDP allows larger dgrams we don't test it here as + // they need to be fragmented and written out as individual + // frames. + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + frame := conn.CreateFrame(&tb.UDP{}, &tb.Payload{Bytes: []byte(tc.payload)}) + conn.SendFrame(frame) + if got, want := string(dut.Recv(boundFD, int32(len(tc.payload)), 0)), tc.payload; got != want { + t.Fatalf("received payload does not match sent payload got: %s, want: %s", got, want) + } + }) + } +} + +func TestUDPSend(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + boundFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) + defer dut.Close(boundFD) + conn := tb.NewUDPIPv4(t, tb.UDP{DstPort: &remotePort}, tb.UDP{SrcPort: &remotePort}) + defer conn.Close() + + testCases := []struct { + name string + payload string + }{ + {"emptypayload", ""}, + {"small payload", "hello world"}, + {"1kPayload", generateRandomPayload(t, 1<<10)}, + // Even though UDP allows larger dgrams we don't test it here as + // they need to be fragmented and written out as individual + // frames. + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + conn.Drain() + if got, want := int(dut.SendTo(boundFD, []byte(tc.payload), 0, conn.LocalAddr())), len(tc.payload); got != want { + t.Fatalf("short write got: %d, want: %d", got, want) + } + if _, err := conn.ExpectData(tb.UDP{SrcPort: &remotePort}, tb.Payload{Bytes: []byte(tc.payload)}, 1*time.Second); err != nil { + t.Fatal(err) + } + }) + } +} -- cgit v1.2.3 From cfd30665c1d857f20dd05e67c6da6833770e2141 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Fri, 8 May 2020 15:39:04 -0700 Subject: iptables - filter packets using outgoing interface. Enables commands with -o (--out-interface) for iptables rules. $ iptables -A OUTPUT -o eth0 -j ACCEPT PiperOrigin-RevId: 310642286 --- pkg/abi/linux/netfilter.go | 2 +- pkg/sentry/socket/netfilter/netfilter.go | 40 ++++++-- pkg/tcpip/network/ipv4/ipv4.go | 8 +- pkg/tcpip/stack/iptables.go | 42 ++++++-- pkg/tcpip/stack/iptables_types.go | 13 +++ pkg/tcpip/stack/nic.go | 2 +- pkg/tcpip/stack/stack.go | 16 ++- test/iptables/filter_output.go | 170 +++++++++++++++++++++++++++++++ test/iptables/iptables_test.go | 24 +++++ test/iptables/iptables_util.go | 15 +++ 10 files changed, 309 insertions(+), 23 deletions(-) (limited to 'test') diff --git a/pkg/abi/linux/netfilter.go b/pkg/abi/linux/netfilter.go index a8d4f9d69..46d8b0b42 100644 --- a/pkg/abi/linux/netfilter.go +++ b/pkg/abi/linux/netfilter.go @@ -146,7 +146,7 @@ type IPTIP struct { // OutputInterface is the output network interface. OutputInterface [IFNAMSIZ]byte - // InputInterfaceMask is the intput interface mask. + // InputInterfaceMask is the input interface mask. InputInterfaceMask [IFNAMSIZ]byte // OuputInterfaceMask is the output interface mask. diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index 40736fb38..41a1ce031 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -17,6 +17,7 @@ package netfilter import ( + "bytes" "errors" "fmt" @@ -204,6 +205,16 @@ func convertNetstackToBinary(tablename string, table stack.Table) (linux.KernelI TargetOffset: linux.SizeOfIPTEntry, }, } + copy(entry.IPTEntry.IP.Dst[:], rule.Filter.Dst) + copy(entry.IPTEntry.IP.DstMask[:], rule.Filter.DstMask) + copy(entry.IPTEntry.IP.OutputInterface[:], rule.Filter.OutputInterface) + copy(entry.IPTEntry.IP.OutputInterfaceMask[:], rule.Filter.OutputInterfaceMask) + if rule.Filter.DstInvert { + entry.IPTEntry.IP.InverseFlags |= linux.IPT_INV_DSTIP + } + if rule.Filter.OutputInterfaceInvert { + entry.IPTEntry.IP.InverseFlags |= linux.IPT_INV_VIA_OUT + } for _, matcher := range rule.Matchers { // Serialize the matcher and add it to the @@ -719,11 +730,27 @@ func filterFromIPTIP(iptip linux.IPTIP) (stack.IPHeaderFilter, error) { if len(iptip.Dst) != header.IPv4AddressSize || len(iptip.DstMask) != header.IPv4AddressSize { return stack.IPHeaderFilter{}, fmt.Errorf("incorrect length of destination (%d) and/or destination mask (%d) fields", len(iptip.Dst), len(iptip.DstMask)) } + + n := bytes.IndexByte([]byte(iptip.OutputInterface[:]), 0) + if n == -1 { + n = len(iptip.OutputInterface) + } + ifname := string(iptip.OutputInterface[:n]) + + n = bytes.IndexByte([]byte(iptip.OutputInterfaceMask[:]), 0) + if n == -1 { + n = len(iptip.OutputInterfaceMask) + } + ifnameMask := string(iptip.OutputInterfaceMask[:n]) + return stack.IPHeaderFilter{ - Protocol: tcpip.TransportProtocolNumber(iptip.Protocol), - Dst: tcpip.Address(iptip.Dst[:]), - DstMask: tcpip.Address(iptip.DstMask[:]), - DstInvert: iptip.InverseFlags&linux.IPT_INV_DSTIP != 0, + Protocol: tcpip.TransportProtocolNumber(iptip.Protocol), + Dst: tcpip.Address(iptip.Dst[:]), + DstMask: tcpip.Address(iptip.DstMask[:]), + DstInvert: iptip.InverseFlags&linux.IPT_INV_DSTIP != 0, + OutputInterface: ifname, + OutputInterfaceMask: ifnameMask, + OutputInterfaceInvert: iptip.InverseFlags&linux.IPT_INV_VIA_OUT != 0, }, nil } @@ -732,16 +759,15 @@ func containsUnsupportedFields(iptip linux.IPTIP) bool { // - Protocol // - Dst and DstMask // - The inverse destination IP check flag + // - OutputInterface, OutputInterfaceMask and its inverse. var emptyInetAddr = linux.InetAddr{} var emptyInterface = [linux.IFNAMSIZ]byte{} // Disable any supported inverse flags. - inverseMask := uint8(linux.IPT_INV_DSTIP) + inverseMask := uint8(linux.IPT_INV_DSTIP) | uint8(linux.IPT_INV_VIA_OUT) return iptip.Src != emptyInetAddr || iptip.SrcMask != emptyInetAddr || iptip.InputInterface != emptyInterface || - iptip.OutputInterface != emptyInterface || iptip.InputInterfaceMask != emptyInterface || - iptip.OutputInterfaceMask != emptyInterface || iptip.Flags != 0 || iptip.InverseFlags&^inverseMask != 0 } diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go index 9db42b2a4..64046cbbf 100644 --- a/pkg/tcpip/network/ipv4/ipv4.go +++ b/pkg/tcpip/network/ipv4/ipv4.go @@ -249,10 +249,11 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.Netw ip := e.addIPHeader(r, &pkt.Header, pkt.Data.Size(), params) pkt.NetworkHeader = buffer.View(ip) + nicName := e.stack.FindNICNameFromID(e.NICID()) // iptables filtering. All packets that reach here are locally // generated. ipt := e.stack.IPTables() - if ok := ipt.Check(stack.Output, &pkt, gso, r, ""); !ok { + if ok := ipt.Check(stack.Output, &pkt, gso, r, "", nicName); !ok { // iptables is telling us to drop the packet. return nil } @@ -319,10 +320,11 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe pkt = pkt.Next() } + nicName := e.stack.FindNICNameFromID(e.NICID()) // iptables filtering. All packets that reach here are locally // generated. ipt := e.stack.IPTables() - dropped, natPkts := ipt.CheckPackets(stack.Output, pkts, gso, r) + dropped, natPkts := ipt.CheckPackets(stack.Output, pkts, gso, r, nicName) if len(dropped) == 0 && len(natPkts) == 0 { // Fast path: If no packets are to be dropped then we can just invoke the // faster WritePackets API directly. @@ -445,7 +447,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt stack.PacketBuffer) { // iptables filtering. All packets that reach here are intended for // this machine and will not be forwarded. ipt := e.stack.IPTables() - if ok := ipt.Check(stack.Input, &pkt, nil, nil, ""); !ok { + if ok := ipt.Check(stack.Input, &pkt, nil, nil, "", ""); !ok { // iptables is telling us to drop the packet. return } diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go index 7c3c47d50..443423b3c 100644 --- a/pkg/tcpip/stack/iptables.go +++ b/pkg/tcpip/stack/iptables.go @@ -16,6 +16,7 @@ package stack import ( "fmt" + "strings" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" @@ -178,7 +179,7 @@ const ( // dropped. // // Precondition: pkt.NetworkHeader is set. -func (it *IPTables) Check(hook Hook, pkt *PacketBuffer, gso *GSO, r *Route, address tcpip.Address) bool { +func (it *IPTables) Check(hook Hook, pkt *PacketBuffer, gso *GSO, r *Route, address tcpip.Address, nicName string) bool { // Packets are manipulated only if connection and matching // NAT rule exists. it.connections.HandlePacket(pkt, hook, gso, r) @@ -187,7 +188,7 @@ func (it *IPTables) Check(hook Hook, pkt *PacketBuffer, gso *GSO, r *Route, addr for _, tablename := range it.Priorities[hook] { table := it.Tables[tablename] ruleIdx := table.BuiltinChains[hook] - switch verdict := it.checkChain(hook, pkt, table, ruleIdx, gso, r, address); verdict { + switch verdict := it.checkChain(hook, pkt, table, ruleIdx, gso, r, address, nicName); verdict { // If the table returns Accept, move on to the next table. case chainAccept: continue @@ -228,10 +229,10 @@ func (it *IPTables) Check(hook Hook, pkt *PacketBuffer, gso *GSO, r *Route, addr // // NOTE: unlike the Check API the returned map contains packets that should be // dropped. -func (it *IPTables) CheckPackets(hook Hook, pkts PacketBufferList, gso *GSO, r *Route) (drop map[*PacketBuffer]struct{}, natPkts map[*PacketBuffer]struct{}) { +func (it *IPTables) CheckPackets(hook Hook, pkts PacketBufferList, gso *GSO, r *Route, nicName string) (drop map[*PacketBuffer]struct{}, natPkts map[*PacketBuffer]struct{}) { for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() { if !pkt.NatDone { - if ok := it.Check(hook, pkt, gso, r, ""); !ok { + if ok := it.Check(hook, pkt, gso, r, "", nicName); !ok { if drop == nil { drop = make(map[*PacketBuffer]struct{}) } @@ -251,11 +252,11 @@ func (it *IPTables) CheckPackets(hook Hook, pkts PacketBufferList, gso *GSO, r * // Precondition: pkt is a IPv4 packet of at least length header.IPv4MinimumSize. // TODO(gvisor.dev/issue/170): pkt.NetworkHeader will always be set as a // precondition. -func (it *IPTables) checkChain(hook Hook, pkt *PacketBuffer, table Table, ruleIdx int, gso *GSO, r *Route, address tcpip.Address) chainVerdict { +func (it *IPTables) checkChain(hook Hook, pkt *PacketBuffer, table Table, ruleIdx int, gso *GSO, r *Route, address tcpip.Address, nicName string) chainVerdict { // Start from ruleIdx and walk the list of rules until a rule gives us // a verdict. for ruleIdx < len(table.Rules) { - switch verdict, jumpTo := it.checkRule(hook, pkt, table, ruleIdx, gso, r, address); verdict { + switch verdict, jumpTo := it.checkRule(hook, pkt, table, ruleIdx, gso, r, address, nicName); verdict { case RuleAccept: return chainAccept @@ -272,7 +273,7 @@ func (it *IPTables) checkChain(hook Hook, pkt *PacketBuffer, table Table, ruleId ruleIdx++ continue } - switch verdict := it.checkChain(hook, pkt, table, jumpTo, gso, r, address); verdict { + switch verdict := it.checkChain(hook, pkt, table, jumpTo, gso, r, address, nicName); verdict { case chainAccept: return chainAccept case chainDrop: @@ -298,7 +299,7 @@ func (it *IPTables) checkChain(hook Hook, pkt *PacketBuffer, table Table, ruleId // Precondition: pkt is a IPv4 packet of at least length header.IPv4MinimumSize. // TODO(gvisor.dev/issue/170): pkt.NetworkHeader will always be set as a // precondition. -func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx int, gso *GSO, r *Route, address tcpip.Address) (RuleVerdict, int) { +func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx int, gso *GSO, r *Route, address tcpip.Address, nicName string) (RuleVerdict, int) { rule := table.Rules[ruleIdx] // If pkt.NetworkHeader hasn't been set yet, it will be contained in @@ -313,7 +314,7 @@ func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx } // Check whether the packet matches the IP header filter. - if !filterMatch(rule.Filter, header.IPv4(pkt.NetworkHeader)) { + if !filterMatch(rule.Filter, header.IPv4(pkt.NetworkHeader), hook, nicName) { // Continue on to the next rule. return RuleJump, ruleIdx + 1 } @@ -335,7 +336,7 @@ func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx return rule.Target.Action(pkt, &it.connections, hook, gso, r, address) } -func filterMatch(filter IPHeaderFilter, hdr header.IPv4) bool { +func filterMatch(filter IPHeaderFilter, hdr header.IPv4, hook Hook, nicName string) bool { // TODO(gvisor.dev/issue/170): Support other fields of the filter. // Check the transport protocol. if filter.Protocol != 0 && filter.Protocol != hdr.TransportProtocol() { @@ -355,5 +356,26 @@ func filterMatch(filter IPHeaderFilter, hdr header.IPv4) bool { return false } + // Check the output interface. + // TODO(gvisor.dev/issue/170): Add the check for FORWARD and POSTROUTING + // hooks after supported. + if hook == Output { + n := len(filter.OutputInterface) + if n == 0 { + return true + } + + // If the interface name ends with '+', any interface which begins + // with the name should be matched. + ifName := filter.OutputInterface + matches = true + if strings.HasSuffix(ifName, "+") { + matches = strings.HasPrefix(nicName, ifName[:n-1]) + } else { + matches = nicName == ifName + } + return filter.OutputInterfaceInvert != matches + } + return true } diff --git a/pkg/tcpip/stack/iptables_types.go b/pkg/tcpip/stack/iptables_types.go index 1bb0ba1bd..fe06007ae 100644 --- a/pkg/tcpip/stack/iptables_types.go +++ b/pkg/tcpip/stack/iptables_types.go @@ -158,6 +158,19 @@ type IPHeaderFilter struct { // true the filter will match packets that fail the destination // comparison. DstInvert bool + + // OutputInterface matches the name of the outgoing interface for the + // packet. + OutputInterface string + + // OutputInterfaceMask masks the characters of the interface name when + // comparing with OutputInterface. + OutputInterfaceMask string + + // OutputInterfaceInvert inverts the meaning of outgoing interface check, + // i.e. when true the filter will match packets that fail the outgoing + // interface comparison. + OutputInterfaceInvert bool } // A Matcher is the interface for matching packets. diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index 8f4c1fe42..54103fdb3 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -1233,7 +1233,7 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.Link // iptables filtering. ipt := n.stack.IPTables() address := n.primaryAddress(protocol) - if ok := ipt.Check(Prerouting, &pkt, nil, nil, address.Address); !ok { + if ok := ipt.Check(Prerouting, &pkt, nil, nil, address.Address, ""); !ok { // iptables is telling us to drop the packet. return } diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index e33fae4eb..b39ffa9fb 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -1898,9 +1898,23 @@ func (s *Stack) FindNetworkEndpoint(netProto tcpip.NetworkProtocolNumber, addres nic.mu.RLock() defer nic.mu.RUnlock() - // An endpoint with this id exists, check if it can be used and return it. + // An endpoint with this id exists, check if it can be + // used and return it. return ref.ep, nil } } return nil, tcpip.ErrBadAddress } + +// FindNICNameFromID returns the name of the nic for the given NICID. +func (s *Stack) FindNICNameFromID(id tcpip.NICID) string { + s.mu.Lock() + defer s.mu.Unlock() + + nic, ok := s.nics[id] + if !ok { + return "" + } + + return nic.Name() +} diff --git a/test/iptables/filter_output.go b/test/iptables/filter_output.go index b1382d353..c145bd1e9 100644 --- a/test/iptables/filter_output.go +++ b/test/iptables/filter_output.go @@ -29,6 +29,12 @@ func init() { RegisterTestCase(FilterOutputAcceptUDPOwner{}) RegisterTestCase(FilterOutputDropUDPOwner{}) RegisterTestCase(FilterOutputOwnerFail{}) + RegisterTestCase(FilterOutputInterfaceAccept{}) + RegisterTestCase(FilterOutputInterfaceDrop{}) + RegisterTestCase(FilterOutputInterface{}) + RegisterTestCase(FilterOutputInterfaceBeginsWith{}) + RegisterTestCase(FilterOutputInterfaceInvertDrop{}) + RegisterTestCase(FilterOutputInterfaceInvertAccept{}) } // FilterOutputDropTCPDestPort tests that connections are not accepted on @@ -286,3 +292,167 @@ func (FilterOutputInvertDestination) ContainerAction(ip net.IP) error { func (FilterOutputInvertDestination) LocalAction(ip net.IP) error { return listenUDP(acceptPort, sendloopDuration) } + +// FilterOutputInterfaceAccept tests that packets are sent via interface +// matching the iptables rule. +type FilterOutputInterfaceAccept struct{} + +// Name implements TestCase.Name. +func (FilterOutputInterfaceAccept) Name() string { + return "FilterOutputInterfaceAccept" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterOutputInterfaceAccept) ContainerAction(ip net.IP) error { + ifname, ok := getInterfaceName() + if !ok { + return fmt.Errorf("no interface is present, except loopback") + } + if err := filterTable("-A", "OUTPUT", "-p", "udp", "-o", ifname, "-j", "ACCEPT"); err != nil { + return err + } + + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (FilterOutputInterfaceAccept) LocalAction(ip net.IP) error { + return listenUDP(acceptPort, sendloopDuration) +} + +// FilterOutputInterfaceDrop tests that packets are not sent via interface +// matching the iptables rule. +type FilterOutputInterfaceDrop struct{} + +// Name implements TestCase.Name. +func (FilterOutputInterfaceDrop) Name() string { + return "FilterOutputInterfaceDrop" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterOutputInterfaceDrop) ContainerAction(ip net.IP) error { + ifname, ok := getInterfaceName() + if !ok { + return fmt.Errorf("no interface is present, except loopback") + } + if err := filterTable("-A", "OUTPUT", "-p", "udp", "-o", ifname, "-j", "DROP"); err != nil { + return err + } + + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (FilterOutputInterfaceDrop) LocalAction(ip net.IP) error { + if err := listenUDP(acceptPort, sendloopDuration); err == nil { + return fmt.Errorf("packets should not be received on port %v, but are received", acceptPort) + } + + return nil +} + +// FilterOutputInterface tests that packets are sent via interface which is +// not matching the interface name in the iptables rule. +type FilterOutputInterface struct{} + +// Name implements TestCase.Name. +func (FilterOutputInterface) Name() string { + return "FilterOutputInterface" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterOutputInterface) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "OUTPUT", "-p", "udp", "-o", "lo", "-j", "DROP"); err != nil { + return err + } + + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (FilterOutputInterface) LocalAction(ip net.IP) error { + return listenUDP(acceptPort, sendloopDuration) +} + +// FilterOutputInterfaceBeginsWith tests that packets are not sent via an +// interface which begins with the given interface name. +type FilterOutputInterfaceBeginsWith struct{} + +// Name implements TestCase.Name. +func (FilterOutputInterfaceBeginsWith) Name() string { + return "FilterOutputInterfaceBeginsWith" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterOutputInterfaceBeginsWith) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "OUTPUT", "-p", "udp", "-o", "e+", "-j", "DROP"); err != nil { + return err + } + + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (FilterOutputInterfaceBeginsWith) LocalAction(ip net.IP) error { + if err := listenUDP(acceptPort, sendloopDuration); err == nil { + return fmt.Errorf("packets should not be received on port %v, but are received", acceptPort) + } + + return nil +} + +// FilterOutputInterfaceInvertDrop tests that we selectively do not send +// packets via interface not matching the interface name. +type FilterOutputInterfaceInvertDrop struct{} + +// Name implements TestCase.Name. +func (FilterOutputInterfaceInvertDrop) Name() string { + return "FilterOutputInterfaceInvertDrop" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterOutputInterfaceInvertDrop) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "OUTPUT", "-p", "tcp", "!", "-o", "lo", "-j", "DROP"); err != nil { + return err + } + + // Listen for TCP packets on accept port. + if err := listenTCP(acceptPort, sendloopDuration); err == nil { + return fmt.Errorf("connection on port %d should not be accepted, but got accepted", acceptPort) + } + + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (FilterOutputInterfaceInvertDrop) LocalAction(ip net.IP) error { + if err := connectTCP(ip, acceptPort, sendloopDuration); err == nil { + return fmt.Errorf("connection destined to port %d should not be accepted, but got accepted", acceptPort) + } + + return nil +} + +// FilterOutputInterfaceInvertAccept tests that we can selectively send packets +// not matching the specific outgoing interface. +type FilterOutputInterfaceInvertAccept struct{} + +// Name implements TestCase.Name. +func (FilterOutputInterfaceInvertAccept) Name() string { + return "FilterOutputInterfaceInvertAccept" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterOutputInterfaceInvertAccept) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "OUTPUT", "-p", "tcp", "!", "-o", "lo", "-j", "ACCEPT"); err != nil { + return err + } + + // Listen for TCP packets on accept port. + return listenTCP(acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (FilterOutputInterfaceInvertAccept) LocalAction(ip net.IP) error { + return connectTCP(ip, acceptPort, sendloopDuration) +} diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 63a862d35..84eb75a40 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -167,6 +167,30 @@ func TestFilterOutputOwnerFail(t *testing.T) { singleTest(t, FilterOutputOwnerFail{}) } +func TestFilterOutputInterfaceAccept(t *testing.T) { + singleTest(t, FilterOutputInterfaceAccept{}) +} + +func TestFilterOutputInterfaceDrop(t *testing.T) { + singleTest(t, FilterOutputInterfaceDrop{}) +} + +func TestFilterOutputInterface(t *testing.T) { + singleTest(t, FilterOutputInterface{}) +} + +func TestFilterOutputInterfaceBeginsWith(t *testing.T) { + singleTest(t, FilterOutputInterfaceBeginsWith{}) +} + +func TestFilterOutputInterfaceInvertDrop(t *testing.T) { + singleTest(t, FilterOutputInterfaceInvertDrop{}) +} + +func TestFilterOutputInterfaceInvertAccept(t *testing.T) { + singleTest(t, FilterOutputInterfaceInvertAccept{}) +} + func TestJumpSerialize(t *testing.T) { singleTest(t, FilterInputSerializeJump{}) } diff --git a/test/iptables/iptables_util.go b/test/iptables/iptables_util.go index 2f988cd18..7146edbb9 100644 --- a/test/iptables/iptables_util.go +++ b/test/iptables/iptables_util.go @@ -169,3 +169,18 @@ func localAddrs() ([]string, error) { } return addrStrs, nil } + +// getInterfaceName returns the name of the interface other than loopback. +func getInterfaceName() (string, bool) { + var ifname string + if interfaces, err := net.Interfaces(); err == nil { + for _, intf := range interfaces { + if intf.Name != "lo" { + ifname = intf.Name + break + } + } + } + + return ifname, ifname != "" +} -- cgit v1.2.3 From 257a6bf8834634f8d24e61a3d3eb7641b0ff4d77 Mon Sep 17 00:00:00 2001 From: Bin Lu Date: Mon, 11 May 2020 01:57:12 -0400 Subject: passed the syscall test case 'fpsig_fork' on Arm64 platform Some functions were added for Arm64 platform: a, get_fp/set_fp b, inline_tgkill Test step: bazel test //test/syscalls:fpsig_fork_test_runsc_ptrace Signed-off-by: Bin Lu --- test/syscalls/linux/BUILD | 5 +---- test/syscalls/linux/fpsig_fork.cc | 29 ++++++++++++++++++++++++++--- 2 files changed, 27 insertions(+), 7 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 837e56042..f7c2eab85 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -835,10 +835,7 @@ cc_binary( cc_binary( name = "fpsig_fork_test", testonly = 1, - srcs = select_arch( - amd64 = ["fpsig_fork.cc"], - arm64 = [], - ), + srcs = ["fpsig_fork.cc"], linkstatic = 1, deps = [ gtest, diff --git a/test/syscalls/linux/fpsig_fork.cc b/test/syscalls/linux/fpsig_fork.cc index a346f1f00..62f2f7d23 100644 --- a/test/syscalls/linux/fpsig_fork.cc +++ b/test/syscalls/linux/fpsig_fork.cc @@ -27,9 +27,20 @@ namespace testing { namespace { +#ifdef __x86_64__ #define GET_XMM(__var, __xmm) \ asm volatile("movq %%" #__xmm ", %0" : "=r"(__var)) #define SET_XMM(__var, __xmm) asm volatile("movq %0, %%" #__xmm : : "r"(__var)) +#define GET_FP0(__var) GET_XMM(__var, xmm0) +#define SET_FP0(__var) SET_XMM(__var, xmm0) +#elif __aarch64__ +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) +#define GET_FPREG( var, regname ) asm volatile("str "__stringify(regname)", %0" : "=m" (var) ) +#define SET_FPREG( var, regname ) asm volatile("ldr "__stringify(regname)", %0" : "=m" (var) ) +#define GET_FP0( var ) GET_FPREG( var, d0 ) +#define SET_FP0( var ) GET_FPREG( var, d0 ) +#endif int parent, child; @@ -40,7 +51,10 @@ void sigusr1(int s, siginfo_t* siginfo, void* _uc) { TEST_CHECK_MSG(child >= 0, "fork failed"); uint64_t val = SIGUSR1; - SET_XMM(val, xmm0); + SET_FP0(val); + uint64_t got; + GET_FP0(got); + TEST_CHECK_MSG(val == got, "Basic FP check failed in sigusr1()"); } TEST(FPSigTest, Fork) { @@ -67,8 +81,9 @@ TEST(FPSigTest, Fork) { // be the one clobbered. uint64_t expected = 0xdeadbeeffacefeed; - SET_XMM(expected, xmm0); + SET_FP0(expected); +#ifdef __x86_64__ asm volatile( "movl %[killnr], %%eax;" "movl %[parent], %%edi;" @@ -81,9 +96,17 @@ TEST(FPSigTest, Fork) { : "rax", "rdi", "rsi", "rdx", // Clobbered by syscall. "rcx", "r11"); +#elif __aarch64__ + asm volatile( + "mov x8, %0\n" + "mov x0, %1\n" + "mov x1, %2\n" + "mov x2, %3\n" + "svc #0\n"::"r"(__NR_tgkill), "r"(parent), "r"(parent_tid), "r"(SIGUSR1)); +#endif uint64_t got; - GET_XMM(got, xmm0); + GET_FP0(got); if (getpid() == parent) { // Parent. int status; -- cgit v1.2.3 From af2bc1c72adbf09209c41ef60fffc54e913eaba6 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Mon, 11 May 2020 10:29:47 -0700 Subject: Internal change. PiperOrigin-RevId: 310941717 --- test/syscalls/linux/BUILD | 1 + test/syscalls/linux/socket_bind_to_device_sequence.cc | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 837e56042..adf259bba 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -2620,6 +2620,7 @@ cc_binary( ":socket_bind_to_device_util", ":socket_test_util", "//test/util:capability_util", + "@com_google_absl//absl/container:node_hash_map", gtest, "//test/util:test_main", "//test/util:test_util", diff --git a/test/syscalls/linux/socket_bind_to_device_sequence.cc b/test/syscalls/linux/socket_bind_to_device_sequence.cc index 637d1151a..1967329ee 100644 --- a/test/syscalls/linux/socket_bind_to_device_sequence.cc +++ b/test/syscalls/linux/socket_bind_to_device_sequence.cc @@ -33,6 +33,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/container/node_hash_map.h" #include "test/syscalls/linux/ip_socket_test_util.h" #include "test/syscalls/linux/socket_bind_to_device_util.h" #include "test/syscalls/linux/socket_test_util.h" @@ -192,8 +193,8 @@ class BindToDeviceSequenceTest : public ::testing::TestWithParam { in_port_t port_ = 0; // sockets_to_close_ is a map from action index to the socket that was // created. - std::unordered_map> + absl::node_hash_map> sockets_to_close_; int next_socket_id_ = 0; }; -- cgit v1.2.3 From c5ab21b04857837d42d3b5ca4783528b4e084548 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Mon, 11 May 2020 11:02:51 -0700 Subject: Internal change. PiperOrigin-RevId: 310949277 --- .../tests/udp_icmp_error_propagation_test.go | 38 +++++++++------------- 1 file changed, 16 insertions(+), 22 deletions(-) (limited to 'test') diff --git a/test/packetimpact/tests/udp_icmp_error_propagation_test.go b/test/packetimpact/tests/udp_icmp_error_propagation_test.go index 30dcb336e..1462f360e 100644 --- a/test/packetimpact/tests/udp_icmp_error_propagation_test.go +++ b/test/packetimpact/tests/udp_icmp_error_propagation_test.go @@ -18,6 +18,7 @@ import ( "context" "fmt" "net" + "sync" "syscall" "testing" "time" @@ -300,19 +301,22 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) { t.Fatalf("did not receive message from DUT: %s", err) } - c := make(chan error) - go func(c chan error) { + var wg sync.WaitGroup + wg.Add(2) + go func() { + defer wg.Done() + if wantErrno != syscall.Errno(0) { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() ret, _, err := dut.RecvWithErrno(ctx, remoteFD, 100, 0) if ret != -1 { - c <- fmt.Errorf("recv during ICMP error succeeded unexpectedly, expected (%[1]d) %[1]v", wantErrno) + t.Errorf("recv during ICMP error succeeded unexpectedly, expected (%[1]d) %[1]v", wantErrno) return } if err != wantErrno { - c <- fmt.Errorf("recv during ICMP error resulted in error (%[1]d) %[1]v, expected (%[2]d) %[2]v", err, wantErrno) + t.Errorf("recv during ICMP error resulted in error (%[1]d) %[1]v, expected (%[2]d) %[2]v", err, wantErrno) return } } @@ -321,23 +325,20 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) { defer cancel() if ret, _, err := dut.RecvWithErrno(ctx, remoteFD, 100, 0); ret == -1 { - c <- fmt.Errorf("recv after ICMP error failed with (%[1]d) %[1]", err) - return + t.Errorf("recv after ICMP error failed with (%[1]d) %[1]", err) } - c <- nil - }(c) + }() + + go func() { + defer wg.Done() - cleanChan := make(chan error) - go func(c chan error) { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() if ret, _, err := dut.RecvWithErrno(ctx, cleanFD, 100, 0); ret == -1 { - c <- fmt.Errorf("recv on clean socket failed with (%[1]d) %[1]", err) - return + t.Errorf("recv on clean socket failed with (%[1]d) %[1]", err) } - c <- nil - }(cleanChan) + }() // TODO(b/155684889) This sleep is to allow time for the DUT to // actually call recv since we want the ICMP error to arrive during the @@ -351,14 +352,7 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) { conn.Send(tb.UDP{DstPort: &cleanPort}) conn.Send(tb.UDP{}) - - err, errClean := <-c, <-cleanChan - if errClean != nil { - t.Error(err) - } - if err != nil { - t.Fatal(err) - } + wg.Wait() }) } } -- cgit v1.2.3 From 633e1b89bb403edb2cfe2b0c2e1c902f68743743 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Mon, 11 May 2020 15:52:42 -0700 Subject: Internal change. PiperOrigin-RevId: 311011004 --- test/packetimpact/testbench/BUILD | 1 + test/packetimpact/testbench/connections.go | 24 +++------ test/packetimpact/testbench/dut.go | 52 ++++++++---------- test/packetimpact/testbench/dut_client.go | 6 +-- test/packetimpact/testbench/rawsockets.go | 4 +- test/packetimpact/testbench/testbench.go | 63 ++++++++++++++++++++++ test/packetimpact/tests/fin_wait2_timeout_test.go | 5 ++ .../tests/icmpv6_param_problem_test.go | 5 ++ test/packetimpact/tests/tcp_close_wait_ack_test.go | 5 ++ .../tests/tcp_noaccept_close_rst_test.go | 5 ++ .../tests/tcp_outside_the_window_test.go | 5 ++ .../tests/tcp_should_piggyback_test.go | 5 ++ test/packetimpact/tests/tcp_user_timeout_test.go | 5 ++ test/packetimpact/tests/tcp_window_shrink_test.go | 5 ++ .../tests/tcp_zero_window_probe_retransmit_test.go | 5 ++ .../tests/tcp_zero_window_probe_test.go | 5 ++ .../tcp_zero_window_probe_usertimeout_test.go | 5 ++ .../tests/udp_icmp_error_propagation_test.go | 5 ++ test/packetimpact/tests/udp_recv_multicast_test.go | 5 ++ .../packetimpact/tests/udp_send_recv_dgram_test.go | 5 ++ 20 files changed, 167 insertions(+), 53 deletions(-) create mode 100644 test/packetimpact/testbench/testbench.go (limited to 'test') diff --git a/test/packetimpact/testbench/BUILD b/test/packetimpact/testbench/BUILD index 3ceceb9d7..fed51006f 100644 --- a/test/packetimpact/testbench/BUILD +++ b/test/packetimpact/testbench/BUILD @@ -13,6 +13,7 @@ go_library( "dut_client.go", "layers.go", "rawsockets.go", + "testbench.go", ], deps = [ "//pkg/tcpip", diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index 28c841612..463fd0556 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -17,7 +17,6 @@ package testbench import ( - "flag" "fmt" "math/rand" "net" @@ -32,13 +31,6 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/seqnum" ) -var localIPv4 = flag.String("local_ipv4", "", "local IPv4 address for test packets") -var remoteIPv4 = flag.String("remote_ipv4", "", "remote IPv4 address for test packets") -var localIPv6 = flag.String("local_ipv6", "", "local IPv6 address for test packets") -var remoteIPv6 = flag.String("remote_ipv6", "", "remote IPv6 address for test packets") -var localMAC = flag.String("local_mac", "", "local mac address for test packets") -var remoteMAC = flag.String("remote_mac", "", "remote mac address for test packets") - func portFromSockaddr(sa unix.Sockaddr) (uint16, error) { switch sa := sa.(type) { case *unix.SockaddrInet4: @@ -64,11 +56,11 @@ func pickPort(domain, typ int) (fd int, sa unix.Sockaddr, err error) { switch domain { case unix.AF_INET: var sa4 unix.SockaddrInet4 - copy(sa4.Addr[:], net.ParseIP(*localIPv4).To4()) + copy(sa4.Addr[:], net.ParseIP(LocalIPv4).To4()) sa = &sa4 case unix.AF_INET6: var sa6 unix.SockaddrInet6 - copy(sa6.Addr[:], net.ParseIP(*localIPv6).To16()) + copy(sa6.Addr[:], net.ParseIP(LocalIPv6).To16()) sa = &sa6 default: return -1, nil, fmt.Errorf("invalid domain %d, it should be one of unix.AF_INET or unix.AF_INET6", domain) @@ -120,12 +112,12 @@ var _ layerState = (*etherState)(nil) // newEtherState creates a new etherState. func newEtherState(out, in Ether) (*etherState, error) { - lMAC, err := tcpip.ParseMACAddress(*localMAC) + lMAC, err := tcpip.ParseMACAddress(LocalMAC) if err != nil { return nil, err } - rMAC, err := tcpip.ParseMACAddress(*remoteMAC) + rMAC, err := tcpip.ParseMACAddress(RemoteMAC) if err != nil { return nil, err } @@ -172,8 +164,8 @@ var _ layerState = (*ipv4State)(nil) // newIPv4State creates a new ipv4State. func newIPv4State(out, in IPv4) (*ipv4State, error) { - lIP := tcpip.Address(net.ParseIP(*localIPv4).To4()) - rIP := tcpip.Address(net.ParseIP(*remoteIPv4).To4()) + lIP := tcpip.Address(net.ParseIP(LocalIPv4).To4()) + rIP := tcpip.Address(net.ParseIP(RemoteIPv4).To4()) s := ipv4State{ out: IPv4{SrcAddr: &lIP, DstAddr: &rIP}, in: IPv4{SrcAddr: &rIP, DstAddr: &lIP}, @@ -217,8 +209,8 @@ var _ layerState = (*ipv6State)(nil) // newIPv6State creates a new ipv6State. func newIPv6State(out, in IPv6) (*ipv6State, error) { - lIP := tcpip.Address(net.ParseIP(*localIPv6).To16()) - rIP := tcpip.Address(net.ParseIP(*remoteIPv6).To16()) + lIP := tcpip.Address(net.ParseIP(LocalIPv6).To16()) + rIP := tcpip.Address(net.ParseIP(RemoteIPv6).To16()) s := ipv6State{ out: IPv6{SrcAddr: &lIP, DstAddr: &rIP}, in: IPv6{SrcAddr: &rIP, DstAddr: &lIP}, diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go index f68d9d62b..a78b7d7ee 100644 --- a/test/packetimpact/testbench/dut.go +++ b/test/packetimpact/testbench/dut.go @@ -16,12 +16,10 @@ package testbench import ( "context" - "flag" "net" "strconv" "syscall" "testing" - "time" pb "gvisor.dev/gvisor/test/packetimpact/proto/posix_server_go_proto" @@ -30,29 +28,21 @@ import ( "google.golang.org/grpc/keepalive" ) -var ( - posixServerIP = flag.String("posix_server_ip", "", "ip address to listen to for UDP commands") - posixServerPort = flag.Int("posix_server_port", 40000, "port to listen to for UDP commands") - rpcTimeout = flag.Duration("rpc_timeout", 100*time.Millisecond, "gRPC timeout") - rpcKeepalive = flag.Duration("rpc_keepalive", 10*time.Second, "gRPC keepalive") -) - // DUT communicates with the DUT to force it to make POSIX calls. type DUT struct { t *testing.T conn *grpc.ClientConn - posixServer PosixClient + posixServer POSIXClient } // NewDUT creates a new connection with the DUT over gRPC. func NewDUT(t *testing.T) DUT { - flag.Parse() - posixServerAddress := *posixServerIP + ":" + strconv.Itoa(*posixServerPort) - conn, err := grpc.Dial(posixServerAddress, grpc.WithInsecure(), grpc.WithKeepaliveParams(keepalive.ClientParameters{Timeout: *rpcKeepalive})) + posixServerAddress := POSIXServerIP + ":" + strconv.Itoa(POSIXServerPort) + conn, err := grpc.Dial(posixServerAddress, grpc.WithInsecure(), grpc.WithKeepaliveParams(keepalive.ClientParameters{Timeout: RPCKeepalive})) if err != nil { t.Fatalf("failed to grpc.Dial(%s): %s", posixServerAddress, err) } - posixServer := NewPosixClient(conn) + posixServer := NewPOSIXClient(conn) return DUT{ t: t, conn: conn, @@ -149,12 +139,12 @@ func (dut *DUT) CreateBoundSocket(typ, proto int32, addr net.IP) (int32, uint16) // CreateListener makes a new TCP connection. If it fails, the test ends. func (dut *DUT) CreateListener(typ, proto, backlog int32) (int32, uint16) { - fd, remotePort := dut.CreateBoundSocket(typ, proto, net.ParseIP(*remoteIPv4)) + fd, remotePort := dut.CreateBoundSocket(typ, proto, net.ParseIP(RemoteIPv4)) dut.Listen(fd, backlog) return fd, remotePort } -// All the functions that make gRPC calls to the Posix service are below, sorted +// All the functions that make gRPC calls to the POSIX service are below, sorted // alphabetically. // Accept calls accept on the DUT and causes a fatal test failure if it doesn't @@ -162,7 +152,7 @@ func (dut *DUT) CreateListener(typ, proto, backlog int32) (int32, uint16) { // AcceptWithErrno. func (dut *DUT) Accept(sockfd int32) (int32, unix.Sockaddr) { dut.t.Helper() - ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() fd, sa, err := dut.AcceptWithErrno(ctx, sockfd) if fd < 0 { @@ -189,7 +179,7 @@ func (dut *DUT) AcceptWithErrno(ctx context.Context, sockfd int32) (int32, unix. // needed, use BindWithErrno. func (dut *DUT) Bind(fd int32, sa unix.Sockaddr) { dut.t.Helper() - ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() ret, err := dut.BindWithErrno(ctx, fd, sa) if ret != 0 { @@ -216,7 +206,7 @@ func (dut *DUT) BindWithErrno(ctx context.Context, fd int32, sa unix.Sockaddr) ( // CloseWithErrno. func (dut *DUT) Close(fd int32) { dut.t.Helper() - ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() ret, err := dut.CloseWithErrno(ctx, fd) if ret != 0 { @@ -242,7 +232,7 @@ func (dut *DUT) CloseWithErrno(ctx context.Context, fd int32) (int32, error) { // needed, use ConnectWithErrno. func (dut *DUT) Connect(fd int32, sa unix.Sockaddr) { dut.t.Helper() - ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() ret, err := dut.ConnectWithErrno(ctx, fd, sa) if ret != 0 { @@ -269,7 +259,7 @@ func (dut *DUT) ConnectWithErrno(ctx context.Context, fd int32, sa unix.Sockaddr // needed, use GetSockNameWithErrno. func (dut *DUT) GetSockName(sockfd int32) unix.Sockaddr { dut.t.Helper() - ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() ret, sa, err := dut.GetSockNameWithErrno(ctx, sockfd) if ret != 0 { @@ -318,7 +308,7 @@ func (dut *DUT) getSockOpt(ctx context.Context, sockfd, level, optname, optlen i // more specific GetSockOptXxx function. func (dut *DUT) GetSockOpt(sockfd, level, optname, optlen int32) []byte { dut.t.Helper() - ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() ret, optval, err := dut.GetSockOptWithErrno(ctx, sockfd, level, optname, optlen) if ret != 0 { @@ -345,7 +335,7 @@ func (dut *DUT) GetSockOptWithErrno(ctx context.Context, sockfd, level, optname, // is needed, use GetSockOptIntWithErrno. func (dut *DUT) GetSockOptInt(sockfd, level, optname int32) int32 { dut.t.Helper() - ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() ret, intval, err := dut.GetSockOptIntWithErrno(ctx, sockfd, level, optname) if ret != 0 { @@ -370,7 +360,7 @@ func (dut *DUT) GetSockOptIntWithErrno(ctx context.Context, sockfd, level, optna // needed, use GetSockOptTimevalWithErrno. func (dut *DUT) GetSockOptTimeval(sockfd, level, optname int32) unix.Timeval { dut.t.Helper() - ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() ret, timeval, err := dut.GetSockOptTimevalWithErrno(ctx, sockfd, level, optname) if ret != 0 { @@ -399,7 +389,7 @@ func (dut *DUT) GetSockOptTimevalWithErrno(ctx context.Context, sockfd, level, o // ListenWithErrno. func (dut *DUT) Listen(sockfd, backlog int32) { dut.t.Helper() - ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() ret, err := dut.ListenWithErrno(ctx, sockfd, backlog) if ret != 0 { @@ -426,7 +416,7 @@ func (dut *DUT) ListenWithErrno(ctx context.Context, sockfd, backlog int32) (int // SendWithErrno. func (dut *DUT) Send(sockfd int32, buf []byte, flags int32) int32 { dut.t.Helper() - ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() ret, err := dut.SendWithErrno(ctx, sockfd, buf, flags) if ret == -1 { @@ -455,7 +445,7 @@ func (dut *DUT) SendWithErrno(ctx context.Context, sockfd int32, buf []byte, fla // SendToWithErrno. func (dut *DUT) SendTo(sockfd int32, buf []byte, flags int32, destAddr unix.Sockaddr) int32 { dut.t.Helper() - ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() ret, err := dut.SendToWithErrno(ctx, sockfd, buf, flags, destAddr) if ret == -1 { @@ -502,7 +492,7 @@ func (dut *DUT) setSockOpt(ctx context.Context, sockfd, level, optname int32, op // more specific SetSockOptXxx function. func (dut *DUT) SetSockOpt(sockfd, level, optname int32, optval []byte) { dut.t.Helper() - ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() ret, err := dut.SetSockOptWithErrno(ctx, sockfd, level, optname, optval) if ret != 0 { @@ -523,7 +513,7 @@ func (dut *DUT) SetSockOptWithErrno(ctx context.Context, sockfd, level, optname // is needed, use SetSockOptIntWithErrno. func (dut *DUT) SetSockOptInt(sockfd, level, optname, optval int32) { dut.t.Helper() - ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() ret, err := dut.SetSockOptIntWithErrno(ctx, sockfd, level, optname, optval) if ret != 0 { @@ -542,7 +532,7 @@ func (dut *DUT) SetSockOptIntWithErrno(ctx context.Context, sockfd, level, optna // needed, use SetSockOptTimevalWithErrno. func (dut *DUT) SetSockOptTimeval(sockfd, level, optname int32, tv *unix.Timeval) { dut.t.Helper() - ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() ret, err := dut.SetSockOptTimevalWithErrno(ctx, sockfd, level, optname, tv) if ret != 0 { @@ -593,7 +583,7 @@ func (dut *DUT) SocketWithErrno(domain, typ, proto int32) (int32, error) { // RecvWithErrno. func (dut *DUT) Recv(sockfd, len, flags int32) []byte { dut.t.Helper() - ctx, cancel := context.WithTimeout(context.Background(), *rpcTimeout) + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() ret, buf, err := dut.RecvWithErrno(ctx, sockfd, len, flags) if ret == -1 { diff --git a/test/packetimpact/testbench/dut_client.go b/test/packetimpact/testbench/dut_client.go index b130a33a2..d0e68c5da 100644 --- a/test/packetimpact/testbench/dut_client.go +++ b/test/packetimpact/testbench/dut_client.go @@ -20,9 +20,9 @@ import ( ) // PosixClient is a gRPC client for the Posix service. -type PosixClient pb.PosixClient +type POSIXClient pb.PosixClient -// NewPosixClient makes a new gRPC client for the Posix service. -func NewPosixClient(c grpc.ClientConnInterface) PosixClient { +// NewPOSIXClient makes a new gRPC client for the POSIX service. +func NewPOSIXClient(c grpc.ClientConnInterface) POSIXClient { return pb.NewPosixClient(c) } diff --git a/test/packetimpact/testbench/rawsockets.go b/test/packetimpact/testbench/rawsockets.go index a9ad72b63..4665f60b2 100644 --- a/test/packetimpact/testbench/rawsockets.go +++ b/test/packetimpact/testbench/rawsockets.go @@ -27,8 +27,6 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) -var device = flag.String("device", "", "local device for test packets") - // Sniffer can sniff raw packets on the wire. type Sniffer struct { t *testing.T @@ -139,7 +137,7 @@ type Injector struct { // NewInjector creates a new injector on *device. func NewInjector(t *testing.T) (Injector, error) { flag.Parse() - ifInfo, err := net.InterfaceByName(*device) + ifInfo, err := net.InterfaceByName(Device) if err != nil { return Injector{}, err } diff --git a/test/packetimpact/testbench/testbench.go b/test/packetimpact/testbench/testbench.go new file mode 100644 index 000000000..a1242b189 --- /dev/null +++ b/test/packetimpact/testbench/testbench.go @@ -0,0 +1,63 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package testbench + +import ( + "flag" + "time" +) + +var ( + // Device is the local device on the test network. + Device = "" + // LocalIPv4 is the local IPv4 address on the test network. + LocalIPv4 = "" + // LocalIPv6 is the local IPv6 address on the test network. + LocalIPv6 = "" + // LocalMAC is the local MAC address on the test network. + LocalMAC = "" + // POSIXServerIP is the POSIX server's IP address on the control network. + POSIXServerIP = "" + // POSIXServerPort is the UDP port the POSIX server is bound to on the + // control network. + POSIXServerPort = 40000 + // RemoteIPv4 is the DUT's IPv4 address on the test network. + RemoteIPv4 = "" + // RemoteIPv6 is the DUT's IPv6 address on the test network. + RemoteIPv6 = "" + // RemoteMAC is the DUT's MAC address on the test network. + RemoteMAC = "" + // RPCKeepalive is the gRPC keepalive. + RPCKeepalive = 10 * time.Second + // RPCTimeout is the gRPC timeout. + RPCTimeout = 100 * time.Millisecond +) + +// RegisterFlags defines flags and associates them with the package-level +// exported variables above. It should be called by tests in their init +// functions. +func RegisterFlags(fs *flag.FlagSet) { + fs.StringVar(&POSIXServerIP, "posix_server_ip", POSIXServerIP, "ip address to listen to for UDP commands") + fs.IntVar(&POSIXServerPort, "posix_server_port", POSIXServerPort, "port to listen to for UDP commands") + fs.DurationVar(&RPCTimeout, "rpc_timeout", RPCTimeout, "gRPC timeout") + fs.DurationVar(&RPCKeepalive, "rpc_keepalive", RPCKeepalive, "gRPC keepalive") + fs.StringVar(&LocalIPv4, "local_ipv4", LocalIPv4, "local IPv4 address for test packets") + fs.StringVar(&RemoteIPv4, "remote_ipv4", RemoteIPv4, "remote IPv4 address for test packets") + fs.StringVar(&LocalIPv6, "local_ipv6", LocalIPv6, "local IPv6 address for test packets") + fs.StringVar(&RemoteIPv6, "remote_ipv6", RemoteIPv6, "remote IPv6 address for test packets") + fs.StringVar(&LocalMAC, "local_mac", LocalMAC, "local mac address for test packets") + fs.StringVar(&RemoteMAC, "remote_mac", RemoteMAC, "remote mac address for test packets") + fs.StringVar(&Device, "device", Device, "local device for test packets") +} diff --git a/test/packetimpact/tests/fin_wait2_timeout_test.go b/test/packetimpact/tests/fin_wait2_timeout_test.go index 99dc77f9a..c26ab78d9 100644 --- a/test/packetimpact/tests/fin_wait2_timeout_test.go +++ b/test/packetimpact/tests/fin_wait2_timeout_test.go @@ -15,6 +15,7 @@ package fin_wait2_timeout_test import ( + "flag" "testing" "time" @@ -23,6 +24,10 @@ import ( tb "gvisor.dev/gvisor/test/packetimpact/testbench" ) +func init() { + tb.RegisterFlags(flag.CommandLine) +} + func TestFinWait2Timeout(t *testing.T) { for _, tt := range []struct { description string diff --git a/test/packetimpact/tests/icmpv6_param_problem_test.go b/test/packetimpact/tests/icmpv6_param_problem_test.go index b48e55df4..bb1fc26fc 100644 --- a/test/packetimpact/tests/icmpv6_param_problem_test.go +++ b/test/packetimpact/tests/icmpv6_param_problem_test.go @@ -16,6 +16,7 @@ package icmpv6_param_problem_test import ( "encoding/binary" + "flag" "testing" "time" @@ -23,6 +24,10 @@ import ( tb "gvisor.dev/gvisor/test/packetimpact/testbench" ) +func init() { + tb.RegisterFlags(flag.CommandLine) +} + // TestICMPv6ParamProblemTest sends a packet with a bad next header. The DUT // should respond with an ICMPv6 Parameter Problem message. func TestICMPv6ParamProblemTest(t *testing.T) { diff --git a/test/packetimpact/tests/tcp_close_wait_ack_test.go b/test/packetimpact/tests/tcp_close_wait_ack_test.go index 153ce285b..70a22a2db 100644 --- a/test/packetimpact/tests/tcp_close_wait_ack_test.go +++ b/test/packetimpact/tests/tcp_close_wait_ack_test.go @@ -15,6 +15,7 @@ package tcp_close_wait_ack_test import ( + "flag" "fmt" "testing" "time" @@ -25,6 +26,10 @@ import ( tb "gvisor.dev/gvisor/test/packetimpact/testbench" ) +func init() { + tb.RegisterFlags(flag.CommandLine) +} + func TestCloseWaitAck(t *testing.T) { for _, tt := range []struct { description string diff --git a/test/packetimpact/tests/tcp_noaccept_close_rst_test.go b/test/packetimpact/tests/tcp_noaccept_close_rst_test.go index 7ebdd1950..2c1ec27d3 100644 --- a/test/packetimpact/tests/tcp_noaccept_close_rst_test.go +++ b/test/packetimpact/tests/tcp_noaccept_close_rst_test.go @@ -15,6 +15,7 @@ package tcp_noaccept_close_rst_test import ( + "flag" "testing" "time" @@ -23,6 +24,10 @@ import ( tb "gvisor.dev/gvisor/test/packetimpact/testbench" ) +func init() { + tb.RegisterFlags(flag.CommandLine) +} + func TestTcpNoAcceptCloseReset(t *testing.T) { dut := tb.NewDUT(t) defer dut.TearDown() diff --git a/test/packetimpact/tests/tcp_outside_the_window_test.go b/test/packetimpact/tests/tcp_outside_the_window_test.go index db3d3273b..351df193e 100644 --- a/test/packetimpact/tests/tcp_outside_the_window_test.go +++ b/test/packetimpact/tests/tcp_outside_the_window_test.go @@ -15,6 +15,7 @@ package tcp_outside_the_window_test import ( + "flag" "fmt" "testing" "time" @@ -25,6 +26,10 @@ import ( tb "gvisor.dev/gvisor/test/packetimpact/testbench" ) +func init() { + tb.RegisterFlags(flag.CommandLine) +} + // TestTCPOutsideTheWindows tests the behavior of the DUT when packets arrive // that are inside or outside the TCP window. Packets that are outside the // window should force an extra ACK, as described in RFC793 page 69: diff --git a/test/packetimpact/tests/tcp_should_piggyback_test.go b/test/packetimpact/tests/tcp_should_piggyback_test.go index b0be6ba23..0240dc2f9 100644 --- a/test/packetimpact/tests/tcp_should_piggyback_test.go +++ b/test/packetimpact/tests/tcp_should_piggyback_test.go @@ -15,6 +15,7 @@ package tcp_should_piggyback_test import ( + "flag" "testing" "time" @@ -23,6 +24,10 @@ import ( tb "gvisor.dev/gvisor/test/packetimpact/testbench" ) +func init() { + tb.RegisterFlags(flag.CommandLine) +} + func TestPiggyback(t *testing.T) { dut := tb.NewDUT(t) defer dut.TearDown() diff --git a/test/packetimpact/tests/tcp_user_timeout_test.go b/test/packetimpact/tests/tcp_user_timeout_test.go index 3cf82badb..ce31917ee 100644 --- a/test/packetimpact/tests/tcp_user_timeout_test.go +++ b/test/packetimpact/tests/tcp_user_timeout_test.go @@ -15,6 +15,7 @@ package tcp_user_timeout_test import ( + "flag" "fmt" "testing" "time" @@ -24,6 +25,10 @@ import ( tb "gvisor.dev/gvisor/test/packetimpact/testbench" ) +func init() { + tb.RegisterFlags(flag.CommandLine) +} + func sendPayload(conn *tb.TCPIPv4, dut *tb.DUT, fd int32) error { sampleData := make([]byte, 100) for i := range sampleData { diff --git a/test/packetimpact/tests/tcp_window_shrink_test.go b/test/packetimpact/tests/tcp_window_shrink_test.go index c9354074e..58ec1d740 100644 --- a/test/packetimpact/tests/tcp_window_shrink_test.go +++ b/test/packetimpact/tests/tcp_window_shrink_test.go @@ -15,6 +15,7 @@ package tcp_window_shrink_test import ( + "flag" "testing" "time" @@ -23,6 +24,10 @@ import ( tb "gvisor.dev/gvisor/test/packetimpact/testbench" ) +func init() { + tb.RegisterFlags(flag.CommandLine) +} + func TestWindowShrink(t *testing.T) { dut := tb.NewDUT(t) defer dut.TearDown() diff --git a/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go b/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go index 864e5a634..dd43a24db 100644 --- a/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go +++ b/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go @@ -15,6 +15,7 @@ package tcp_zero_window_probe_retransmit_test import ( + "flag" "testing" "time" @@ -23,6 +24,10 @@ import ( tb "gvisor.dev/gvisor/test/packetimpact/testbench" ) +func init() { + tb.RegisterFlags(flag.CommandLine) +} + // TestZeroWindowProbeRetransmit tests retransmits of zero window probes // to be sent at exponentially inreasing time intervals. func TestZeroWindowProbeRetransmit(t *testing.T) { diff --git a/test/packetimpact/tests/tcp_zero_window_probe_test.go b/test/packetimpact/tests/tcp_zero_window_probe_test.go index 4fa3d0cd4..6c453505d 100644 --- a/test/packetimpact/tests/tcp_zero_window_probe_test.go +++ b/test/packetimpact/tests/tcp_zero_window_probe_test.go @@ -15,6 +15,7 @@ package tcp_zero_window_probe_test import ( + "flag" "testing" "time" @@ -23,6 +24,10 @@ import ( tb "gvisor.dev/gvisor/test/packetimpact/testbench" ) +func init() { + tb.RegisterFlags(flag.CommandLine) +} + // TestZeroWindowProbe tests few cases of zero window probing over the // same connection. func TestZeroWindowProbe(t *testing.T) { diff --git a/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go b/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go index 7d81c276c..193427fb9 100644 --- a/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go +++ b/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go @@ -15,6 +15,7 @@ package tcp_zero_window_probe_usertimeout_test import ( + "flag" "testing" "time" @@ -23,6 +24,10 @@ import ( tb "gvisor.dev/gvisor/test/packetimpact/testbench" ) +func init() { + tb.RegisterFlags(flag.CommandLine) +} + // TestZeroWindowProbeUserTimeout sanity tests user timeout when we are // retransmitting zero window probes. func TestZeroWindowProbeUserTimeout(t *testing.T) { diff --git a/test/packetimpact/tests/udp_icmp_error_propagation_test.go b/test/packetimpact/tests/udp_icmp_error_propagation_test.go index 1462f360e..ca4df2ab0 100644 --- a/test/packetimpact/tests/udp_icmp_error_propagation_test.go +++ b/test/packetimpact/tests/udp_icmp_error_propagation_test.go @@ -16,6 +16,7 @@ package udp_icmp_error_propagation_test import ( "context" + "flag" "fmt" "net" "sync" @@ -28,6 +29,10 @@ import ( tb "gvisor.dev/gvisor/test/packetimpact/testbench" ) +func init() { + tb.RegisterFlags(flag.CommandLine) +} + type connectionMode bool func (c connectionMode) String() string { diff --git a/test/packetimpact/tests/udp_recv_multicast_test.go b/test/packetimpact/tests/udp_recv_multicast_test.go index 61fd17050..0bae18ba3 100644 --- a/test/packetimpact/tests/udp_recv_multicast_test.go +++ b/test/packetimpact/tests/udp_recv_multicast_test.go @@ -15,6 +15,7 @@ package udp_recv_multicast_test import ( + "flag" "net" "testing" @@ -23,6 +24,10 @@ import ( tb "gvisor.dev/gvisor/test/packetimpact/testbench" ) +func init() { + tb.RegisterFlags(flag.CommandLine) +} + func TestUDPRecvMulticast(t *testing.T) { dut := tb.NewDUT(t) defer dut.TearDown() diff --git a/test/packetimpact/tests/udp_send_recv_dgram_test.go b/test/packetimpact/tests/udp_send_recv_dgram_test.go index 1c682831c..350875a6f 100644 --- a/test/packetimpact/tests/udp_send_recv_dgram_test.go +++ b/test/packetimpact/tests/udp_send_recv_dgram_test.go @@ -15,6 +15,7 @@ package udp_send_recv_dgram_test import ( + "flag" "math/rand" "net" "testing" @@ -24,6 +25,10 @@ import ( tb "gvisor.dev/gvisor/test/packetimpact/testbench" ) +func init() { + tb.RegisterFlags(flag.CommandLine) +} + func generateRandomPayload(t *testing.T, n int) string { t.Helper() buf := make([]byte, n) -- cgit v1.2.3 From 27b1f19cabe04effbb37fa6a680b65987b379313 Mon Sep 17 00:00:00 2001 From: Nayana Bidari Date: Tue, 12 May 2020 12:14:56 -0700 Subject: iptables: support gid match for owner matching. - Added support for matching gid owner and invert flag for uid and gid. $ iptables -A OUTPUT -p tcp -m owner --gid-owner root -j ACCEPT $ iptables -A OUTPUT -p tcp -m owner ! --uid-owner root -j ACCEPT $ iptables -A OUTPUT -p tcp -m owner ! --gid-owner root -j DROP - Added tests for uid, gid and invert flags. --- pkg/sentry/socket/netfilter/owner_matcher.go | 81 ++++++++----- test/iptables/filter_output.go | 169 +++++++++++++++++++++++++-- test/iptables/iptables_test.go | 20 ++++ 3 files changed, 230 insertions(+), 40 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/netfilter/owner_matcher.go b/pkg/sentry/socket/netfilter/owner_matcher.go index 5949a7c29..3863293c7 100644 --- a/pkg/sentry/socket/netfilter/owner_matcher.go +++ b/pkg/sentry/socket/netfilter/owner_matcher.go @@ -45,14 +45,18 @@ func (ownerMarshaler) marshal(mr stack.Matcher) []byte { GID: matcher.gid, } - // Support for UID match. - // TODO(gvisor.dev/issue/170): Need to support gid match. + // Support for UID and GID match. if matcher.matchUID { iptOwnerInfo.Match = linux.XT_OWNER_UID - } else if matcher.matchGID { - panic("GID match is not supported.") - } else { - panic("UID match is not set.") + if matcher.invertUID { + iptOwnerInfo.Invert = linux.XT_OWNER_UID + } + } + if matcher.matchGID { + iptOwnerInfo.Match |= linux.XT_OWNER_GID + if matcher.invertGID { + iptOwnerInfo.Invert |= linux.XT_OWNER_GID + } } buf := make([]byte, 0, linux.SizeOfIPTOwnerInfo) @@ -71,31 +75,34 @@ func (ownerMarshaler) unmarshal(buf []byte, filter stack.IPHeaderFilter) (stack. binary.Unmarshal(buf[:linux.SizeOfIPTOwnerInfo], usermem.ByteOrder, &matchData) nflog("parseMatchers: parsed IPTOwnerInfo: %+v", matchData) - if matchData.Invert != 0 { - return nil, fmt.Errorf("invert flag is not supported for owner match") - } - - // Support for UID match. - // TODO(gvisor.dev/issue/170): Need to support gid match. - if matchData.Match&linux.XT_OWNER_UID != linux.XT_OWNER_UID { - return nil, fmt.Errorf("owner match is only supported for uid") - } - - // Check Flags. var owner OwnerMatcher owner.uid = matchData.UID owner.gid = matchData.GID - owner.matchUID = true + + // Check flags. + if matchData.Match&linux.XT_OWNER_UID != 0 { + owner.matchUID = true + if matchData.Invert&linux.XT_OWNER_UID != 0 { + owner.invertUID = true + } + } + if matchData.Match&linux.XT_OWNER_GID != 0 { + owner.matchGID = true + if matchData.Invert&linux.XT_OWNER_GID != 0 { + owner.invertGID = true + } + } return &owner, nil } type OwnerMatcher struct { - uid uint32 - gid uint32 - matchUID bool - matchGID bool - invert uint8 + uid uint32 + gid uint32 + matchUID bool + matchGID bool + invertUID bool + invertGID bool } // Name implements Matcher.Name. @@ -112,16 +119,30 @@ func (om *OwnerMatcher) Match(hook stack.Hook, pkt stack.PacketBuffer, interface } // If the packet owner is not set, drop the packet. - // Support for uid match. - // TODO(gvisor.dev/issue/170): Need to support gid match. - if pkt.Owner == nil || !om.matchUID { + if pkt.Owner == nil { return false, true } - // TODO(gvisor.dev/issue/170): Need to add tests to verify - // drop rule when packet UID does not match owner matcher UID. - if pkt.Owner.UID() != om.uid { - return false, false + var matches bool + // Check for UID match. + if om.matchUID { + if pkt.Owner.UID() == om.uid { + matches = true + } + if matches == om.invertUID { + return false, false + } + } + + // Check for GID match. + if om.matchGID { + matches = false + if pkt.Owner.GID() == om.gid { + matches = true + } + if matches == om.invertGID { + return false, false + } } return true, false diff --git a/test/iptables/filter_output.go b/test/iptables/filter_output.go index c145bd1e9..ba0d6fc29 100644 --- a/test/iptables/filter_output.go +++ b/test/iptables/filter_output.go @@ -29,6 +29,11 @@ func init() { RegisterTestCase(FilterOutputAcceptUDPOwner{}) RegisterTestCase(FilterOutputDropUDPOwner{}) RegisterTestCase(FilterOutputOwnerFail{}) + RegisterTestCase(FilterOutputAcceptGIDOwner{}) + RegisterTestCase(FilterOutputDropGIDOwner{}) + RegisterTestCase(FilterOutputInvertGIDOwner{}) + RegisterTestCase(FilterOutputInvertUIDOwner{}) + RegisterTestCase(FilterOutputInvertUIDAndGIDOwner{}) RegisterTestCase(FilterOutputInterfaceAccept{}) RegisterTestCase(FilterOutputInterfaceDrop{}) RegisterTestCase(FilterOutputInterface{}) @@ -116,20 +121,12 @@ func (FilterOutputAcceptTCPOwner) ContainerAction(ip net.IP) error { } // Listen for TCP packets on accept port. - if err := listenTCP(acceptPort, sendloopDuration); err != nil { - return fmt.Errorf("connection on port %d should be accepted, but got dropped", acceptPort) - } - - return nil + return listenTCP(acceptPort, sendloopDuration) } // LocalAction implements TestCase.LocalAction. func (FilterOutputAcceptTCPOwner) LocalAction(ip net.IP) error { - if err := connectTCP(ip, acceptPort, sendloopDuration); err != nil { - return fmt.Errorf("connection destined to port %d should be accepted, but got dropped", acceptPort) - } - - return nil + return connectTCP(ip, acceptPort, sendloopDuration) } // FilterOutputDropTCPOwner tests that TCP connections from uid owner are dropped. @@ -239,6 +236,158 @@ func (FilterOutputOwnerFail) LocalAction(ip net.IP) error { return nil } +// FilterOutputAcceptGIDOwner tests that TCP connections from gid owner are accepted. +type FilterOutputAcceptGIDOwner struct{} + +// Name implements TestCase.Name. +func (FilterOutputAcceptGIDOwner) Name() string { + return "FilterOutputAcceptGIDOwner" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterOutputAcceptGIDOwner) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "OUTPUT", "-p", "tcp", "-m", "owner", "--gid-owner", "root", "-j", "ACCEPT"); err != nil { + return err + } + + // Listen for TCP packets on accept port. + return listenTCP(acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (FilterOutputAcceptGIDOwner) LocalAction(ip net.IP) error { + return connectTCP(ip, acceptPort, sendloopDuration) +} + +// FilterOutputDropGIDOwner tests that TCP connections from gid owner are dropped. +type FilterOutputDropGIDOwner struct{} + +// Name implements TestCase.Name. +func (FilterOutputDropGIDOwner) Name() string { + return "FilterOutputDropGIDOwner" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterOutputDropGIDOwner) ContainerAction(ip net.IP) error { + if err := filterTable("-A", "OUTPUT", "-p", "tcp", "-m", "owner", "--gid-owner", "root", "-j", "DROP"); err != nil { + return err + } + + // Listen for TCP packets on accept port. + if err := listenTCP(acceptPort, sendloopDuration); err == nil { + return fmt.Errorf("connection on port %d should not be accepted, but got accepted", acceptPort) + } + + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (FilterOutputDropGIDOwner) LocalAction(ip net.IP) error { + if err := connectTCP(ip, acceptPort, sendloopDuration); err == nil { + return fmt.Errorf("connection destined to port %d should not be accepted, but got accepted", acceptPort) + } + + return nil +} + +// FilterOutputInvertGIDOwner tests that TCP connections from gid owner are dropped. +type FilterOutputInvertGIDOwner struct{} + +// Name implements TestCase.Name. +func (FilterOutputInvertGIDOwner) Name() string { + return "FilterOutputInvertGIDOwner" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterOutputInvertGIDOwner) ContainerAction(ip net.IP) error { + rules := [][]string{ + {"-A", "OUTPUT", "-p", "tcp", "-m", "owner", "!", "--gid-owner", "root", "-j", "ACCEPT"}, + {"-A", "OUTPUT", "-p", "tcp", "-j", "DROP"}, + } + if err := filterTableRules(rules); err != nil { + return err + } + + // Listen for TCP packets on accept port. + if err := listenTCP(acceptPort, sendloopDuration); err == nil { + return fmt.Errorf("connection on port %d should not be accepted, but got accepted", acceptPort) + } + + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (FilterOutputInvertGIDOwner) LocalAction(ip net.IP) error { + if err := connectTCP(ip, acceptPort, sendloopDuration); err == nil { + return fmt.Errorf("connection destined to port %d should not be accepted, but got accepted", acceptPort) + } + + return nil +} + +// FilterOutputInvertUIDOwner tests that TCP connections from gid owner are dropped. +type FilterOutputInvertUIDOwner struct{} + +// Name implements TestCase.Name. +func (FilterOutputInvertUIDOwner) Name() string { + return "FilterOutputInvertUIDOwner" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterOutputInvertUIDOwner) ContainerAction(ip net.IP) error { + rules := [][]string{ + {"-A", "OUTPUT", "-p", "tcp", "-m", "owner", "!", "--uid-owner", "root", "-j", "DROP"}, + {"-A", "OUTPUT", "-p", "tcp", "-j", "ACCEPT"}, + } + if err := filterTableRules(rules); err != nil { + return err + } + + // Listen for TCP packets on accept port. + return listenTCP(acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (FilterOutputInvertUIDOwner) LocalAction(ip net.IP) error { + return connectTCP(ip, acceptPort, sendloopDuration) +} + +// FilterOutputInvertUIDAndGIDOwner tests that TCP connections from uid and gid +// owner are dropped. +type FilterOutputInvertUIDAndGIDOwner struct{} + +// Name implements TestCase.Name. +func (FilterOutputInvertUIDAndGIDOwner) Name() string { + return "FilterOutputInvertUIDAndGIDOwner" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterOutputInvertUIDAndGIDOwner) ContainerAction(ip net.IP) error { + rules := [][]string{ + {"-A", "OUTPUT", "-p", "tcp", "-m", "owner", "!", "--uid-owner", "root", "!", "--gid-owner", "root", "-j", "ACCEPT"}, + {"-A", "OUTPUT", "-p", "tcp", "-j", "DROP"}, + } + if err := filterTableRules(rules); err != nil { + return err + } + + // Listen for TCP packets on accept port. + if err := listenTCP(acceptPort, sendloopDuration); err == nil { + return fmt.Errorf("connection on port %d should not be accepted, but got accepted", acceptPort) + } + + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (FilterOutputInvertUIDAndGIDOwner) LocalAction(ip net.IP) error { + if err := connectTCP(ip, acceptPort, sendloopDuration); err == nil { + return fmt.Errorf("connection destined to port %d should not be accepted, but got accepted", acceptPort) + } + + return nil +} + // FilterOutputDestination tests that we can selectively allow packets to // certain destinations. type FilterOutputDestination struct{} diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 84eb75a40..4fd2cb46a 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -167,6 +167,26 @@ func TestFilterOutputOwnerFail(t *testing.T) { singleTest(t, FilterOutputOwnerFail{}) } +func TestFilterOutputAcceptGIDOwner(t *testing.T) { + singleTest(t, FilterOutputAcceptGIDOwner{}) +} + +func TestFilterOutputDropGIDOwner(t *testing.T) { + singleTest(t, FilterOutputDropGIDOwner{}) +} + +func TestFilterOutputInvertGIDOwner(t *testing.T) { + singleTest(t, FilterOutputInvertGIDOwner{}) +} + +func TestFilterOutputInvertUIDOwner(t *testing.T) { + singleTest(t, FilterOutputInvertUIDOwner{}) +} + +func TestFilterOutputInvertUIDAndGIDOwner(t *testing.T) { + singleTest(t, FilterOutputInvertUIDAndGIDOwner{}) +} + func TestFilterOutputInterfaceAccept(t *testing.T) { singleTest(t, FilterOutputInterfaceAccept{}) } -- cgit v1.2.3 From e4058c03551d0d75beba0fe1553edd3195d966d8 Mon Sep 17 00:00:00 2001 From: Ian Gudger Date: Wed, 13 May 2020 01:21:16 -0700 Subject: Replace test_runner.sh bash script with Go. PiperOrigin-RevId: 311285868 --- pkg/test/dockerutil/dockerutil.go | 110 ++++++++- test/packetimpact/netdevs/BUILD | 15 ++ test/packetimpact/netdevs/netdevs.go | 104 +++++++++ test/packetimpact/runner/BUILD | 20 ++ test/packetimpact/runner/defs.bzl | 135 +++++++++++ test/packetimpact/runner/packetimpact_test.go | 315 +++++++++++++++++++++++++ test/packetimpact/testbench/BUILD | 1 + test/packetimpact/testbench/connections.go | 4 +- test/packetimpact/testbench/dut.go | 6 + test/packetimpact/testbench/rawsockets.go | 3 - test/packetimpact/testbench/testbench.go | 31 ++- test/packetimpact/tests/BUILD | 7 +- test/packetimpact/tests/defs.bzl | 137 ----------- test/packetimpact/tests/test_runner.sh | 325 -------------------------- 14 files changed, 737 insertions(+), 476 deletions(-) create mode 100644 test/packetimpact/netdevs/BUILD create mode 100644 test/packetimpact/netdevs/netdevs.go create mode 100644 test/packetimpact/runner/BUILD create mode 100644 test/packetimpact/runner/defs.bzl create mode 100644 test/packetimpact/runner/packetimpact_test.go delete mode 100644 test/packetimpact/tests/defs.bzl delete mode 100755 test/packetimpact/tests/test_runner.sh (limited to 'test') diff --git a/pkg/test/dockerutil/dockerutil.go b/pkg/test/dockerutil/dockerutil.go index 5f2af9f3b..6bf0e84d0 100644 --- a/pkg/test/dockerutil/dockerutil.go +++ b/pkg/test/dockerutil/dockerutil.go @@ -148,6 +148,62 @@ func (m MountMode) String() string { panic(fmt.Sprintf("invalid mode: %d", m)) } +// DockerNetwork contains the name of a docker network. +type DockerNetwork struct { + logger testutil.Logger + Name string + Subnet *net.IPNet + containers []*Docker +} + +// NewDockerNetwork sets up the struct for a Docker network. Names of networks +// will be unique. +func NewDockerNetwork(logger testutil.Logger) *DockerNetwork { + return &DockerNetwork{ + logger: logger, + Name: testutil.RandomID(logger.Name()), + } +} + +// Create calls 'docker network create'. +func (n *DockerNetwork) Create(args ...string) error { + a := []string{"docker", "network", "create"} + if n.Subnet != nil { + a = append(a, fmt.Sprintf("--subnet=%s", n.Subnet)) + } + a = append(a, args...) + a = append(a, n.Name) + return testutil.Command(n.logger, a...).Run() +} + +// Connect calls 'docker network connect' with the arguments provided. +func (n *DockerNetwork) Connect(container *Docker, args ...string) error { + a := []string{"docker", "network", "connect"} + a = append(a, args...) + a = append(a, n.Name, container.Name) + if err := testutil.Command(n.logger, a...).Run(); err != nil { + return err + } + n.containers = append(n.containers, container) + return nil +} + +// Cleanup cleans up the docker network and all the containers attached to it. +func (n *DockerNetwork) Cleanup() error { + for _, c := range n.containers { + // Don't propagate the error, it might be that the container + // was already cleaned up. + if err := c.Kill(); err != nil { + n.logger.Logf("unable to kill container during cleanup: %s", err) + } + } + + if err := testutil.Command(n.logger, "docker", "network", "rm", n.Name).Run(); err != nil { + return err + } + return nil +} + // Docker contains the name and the runtime of a docker container. type Docker struct { logger testutil.Logger @@ -309,7 +365,9 @@ func (d *Docker) argsFor(r *RunOpts, command string, p []string) (rv []string) { rv = append(rv, d.Name) } else { rv = append(rv, d.mounts...) - rv = append(rv, fmt.Sprintf("--runtime=%s", d.Runtime)) + if len(d.Runtime) > 0 { + rv = append(rv, fmt.Sprintf("--runtime=%s", d.Runtime)) + } rv = append(rv, fmt.Sprintf("--name=%s", d.Name)) rv = append(rv, testutil.ImageByName(r.Image)) } @@ -477,6 +535,56 @@ func (d *Docker) FindIP() (net.IP, error) { return ip, nil } +// A NetworkInterface is container's network interface information. +type NetworkInterface struct { + IPv4 net.IP + MAC net.HardwareAddr +} + +// ListNetworks returns the network interfaces of the container, keyed by +// Docker network name. +func (d *Docker) ListNetworks() (map[string]NetworkInterface, error) { + const format = `{{json .NetworkSettings.Networks}}` + out, err := testutil.Command(d.logger, "docker", "inspect", "-f", format, d.Name).CombinedOutput() + if err != nil { + return nil, fmt.Errorf("error network interfaces: %q: %w", string(out), err) + } + + networks := map[string]map[string]string{} + if err := json.Unmarshal(out, &networks); err != nil { + return nil, fmt.Errorf("error decoding network interfaces: %w", err) + } + + interfaces := map[string]NetworkInterface{} + for name, iface := range networks { + var netface NetworkInterface + + rawIP := strings.TrimSpace(iface["IPAddress"]) + if rawIP != "" { + ip := net.ParseIP(rawIP) + if ip == nil { + return nil, fmt.Errorf("invalid IP: %q", rawIP) + } + // Docker's IPAddress field is IPv4. The IPv6 address + // is stored in the GlobalIPv6Address field. + netface.IPv4 = ip + } + + rawMAC := strings.TrimSpace(iface["MacAddress"]) + if rawMAC != "" { + mac, err := net.ParseMAC(rawMAC) + if err != nil { + return nil, fmt.Errorf("invalid MAC: %q: %w", rawMAC, err) + } + netface.MAC = mac + } + + interfaces[name] = netface + } + + return interfaces, nil +} + // SandboxPid returns the PID to the sandbox process. func (d *Docker) SandboxPid() (int, error) { out, err := testutil.Command(d.logger, "docker", "inspect", "-f={{.State.Pid}}", d.Name).CombinedOutput() diff --git a/test/packetimpact/netdevs/BUILD b/test/packetimpact/netdevs/BUILD new file mode 100644 index 000000000..422bb9b0c --- /dev/null +++ b/test/packetimpact/netdevs/BUILD @@ -0,0 +1,15 @@ +load("//tools:defs.bzl", "go_library") + +package( + licenses = ["notice"], +) + +go_library( + name = "netdevs", + srcs = ["netdevs.go"], + visibility = ["//test/packetimpact:__subpackages__"], + deps = [ + "//pkg/tcpip", + "//pkg/tcpip/header", + ], +) diff --git a/test/packetimpact/netdevs/netdevs.go b/test/packetimpact/netdevs/netdevs.go new file mode 100644 index 000000000..d2c9cfeaf --- /dev/null +++ b/test/packetimpact/netdevs/netdevs.go @@ -0,0 +1,104 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package netdevs contains utilities for working with network devices. +package netdevs + +import ( + "fmt" + "net" + "regexp" + "strings" + + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" +) + +// A DeviceInfo represents a network device. +type DeviceInfo struct { + MAC net.HardwareAddr + IPv4Addr net.IP + IPv4Net *net.IPNet + IPv6Addr net.IP + IPv6Net *net.IPNet +} + +var ( + deviceLine = regexp.MustCompile(`^\s*\d+: (\w+)`) + linkLine = regexp.MustCompile(`^\s*link/\w+ ([0-9a-fA-F:]+)`) + inetLine = regexp.MustCompile(`^\s*inet ([0-9./]+)`) + inet6Line = regexp.MustCompile(`^\s*inet6 ([0-9a-fA-Z:/]+)`) +) + +// ParseDevices parses the output from `ip addr show` into a map from device +// name to information about the device. +func ParseDevices(cmdOutput string) (map[string]DeviceInfo, error) { + var currentDevice string + var currentInfo DeviceInfo + deviceInfos := make(map[string]DeviceInfo) + for _, line := range strings.Split(cmdOutput, "\n") { + if m := deviceLine.FindStringSubmatch(line); m != nil { + if currentDevice != "" { + deviceInfos[currentDevice] = currentInfo + } + currentInfo = DeviceInfo{} + currentDevice = m[1] + } else if m := linkLine.FindStringSubmatch(line); m != nil { + mac, err := net.ParseMAC(m[1]) + if err != nil { + return nil, err + } + currentInfo.MAC = mac + } else if m := inetLine.FindStringSubmatch(line); m != nil { + ipv4Addr, ipv4Net, err := net.ParseCIDR(m[1]) + if err != nil { + return nil, err + } + currentInfo.IPv4Addr = ipv4Addr + currentInfo.IPv4Net = ipv4Net + } else if m := inet6Line.FindStringSubmatch(line); m != nil { + ipv6Addr, ipv6Net, err := net.ParseCIDR(m[1]) + if err != nil { + return nil, err + } + currentInfo.IPv6Addr = ipv6Addr + currentInfo.IPv6Net = ipv6Net + } + } + if currentDevice != "" { + deviceInfos[currentDevice] = currentInfo + } + return deviceInfos, nil +} + +// MACToIP converts the MAC address to an IPv6 link local address as described +// in RFC 4291 page 20: https://tools.ietf.org/html/rfc4291#page-20 +func MACToIP(mac net.HardwareAddr) net.IP { + addr := make([]byte, header.IPv6AddressSize) + addr[0] = 0xfe + addr[1] = 0x80 + header.EthernetAdddressToModifiedEUI64IntoBuf(tcpip.LinkAddress(mac), addr[8:]) + return net.IP(addr) +} + +// FindDeviceByIP finds a DeviceInfo and device name from an IP address in the +// output of ParseDevices. +func FindDeviceByIP(ip net.IP, devices map[string]DeviceInfo) (string, DeviceInfo, error) { + for dev, info := range devices { + if info.IPv4Addr.Equal(ip) { + return dev, info, nil + } + } + return "", DeviceInfo{}, fmt.Errorf("can't find %s on any interface", ip) +} diff --git a/test/packetimpact/runner/BUILD b/test/packetimpact/runner/BUILD new file mode 100644 index 000000000..0b68a760a --- /dev/null +++ b/test/packetimpact/runner/BUILD @@ -0,0 +1,20 @@ +load("//tools:defs.bzl", "go_test") + +package( + default_visibility = ["//test/packetimpact:__subpackages__"], + licenses = ["notice"], +) + +go_test( + name = "packetimpact_test", + srcs = ["packetimpact_test.go"], + tags = [ + # Not intended to be run directly. + "local", + "manual", + ], + deps = [ + "//pkg/test/dockerutil", + "//test/packetimpact/netdevs", + ], +) diff --git a/test/packetimpact/runner/defs.bzl b/test/packetimpact/runner/defs.bzl new file mode 100644 index 000000000..ee2e7e974 --- /dev/null +++ b/test/packetimpact/runner/defs.bzl @@ -0,0 +1,135 @@ +"""Defines rules for packetimpact test targets.""" + +load("//tools:defs.bzl", "go_test") + +def _packetimpact_test_impl(ctx): + test_runner = ctx.executable._test_runner + bench = ctx.actions.declare_file("%s-bench" % ctx.label.name) + bench_content = "\n".join([ + "#!/bin/bash", + # This test will run part in a distinct user namespace. This can cause + # permission problems, because all runfiles may not be owned by the + # current user, and no other users will be mapped in that namespace. + # Make sure that everything is readable here. + "find . -type f -or -type d -exec chmod a+rx {} \\;", + "%s %s --testbench_binary %s $@\n" % ( + test_runner.short_path, + " ".join(ctx.attr.flags), + ctx.files.testbench_binary[0].short_path, + ), + ]) + ctx.actions.write(bench, bench_content, is_executable = True) + + transitive_files = depset() + if hasattr(ctx.attr._test_runner, "data_runfiles"): + transitive_files = depset(ctx.attr._test_runner.data_runfiles.files) + runfiles = ctx.runfiles( + files = [test_runner] + ctx.files.testbench_binary + ctx.files._posix_server_binary, + transitive_files = transitive_files, + collect_default = True, + collect_data = True, + ) + return [DefaultInfo(executable = bench, runfiles = runfiles)] + +_packetimpact_test = rule( + attrs = { + "_test_runner": attr.label( + executable = True, + cfg = "target", + default = ":packetimpact_test", + ), + "_posix_server_binary": attr.label( + cfg = "target", + default = "//test/packetimpact/dut:posix_server", + ), + "testbench_binary": attr.label( + cfg = "target", + mandatory = True, + ), + "flags": attr.string_list( + mandatory = False, + default = [], + ), + }, + test = True, + implementation = _packetimpact_test_impl, +) + +PACKETIMPACT_TAGS = ["local", "manual"] + +def packetimpact_linux_test( + name, + testbench_binary, + expect_failure = False, + **kwargs): + """Add a packetimpact test on linux. + + Args: + name: name of the test + testbench_binary: the testbench binary + **kwargs: all the other args, forwarded to _packetimpact_test + """ + expect_failure_flag = ["--expect_failure"] if expect_failure else [] + _packetimpact_test( + name = name + "_linux_test", + testbench_binary = testbench_binary, + flags = ["--dut_platform", "linux"] + expect_failure_flag, + tags = PACKETIMPACT_TAGS + ["packetimpact"], + **kwargs + ) + +def packetimpact_netstack_test( + name, + testbench_binary, + expect_failure = False, + **kwargs): + """Add a packetimpact test on netstack. + + Args: + name: name of the test + testbench_binary: the testbench binary + expect_failure: the test must fail + **kwargs: all the other args, forwarded to _packetimpact_test + """ + expect_failure_flag = [] + if expect_failure: + expect_failure_flag = ["--expect_failure"] + _packetimpact_test( + name = name + "_netstack_test", + testbench_binary = testbench_binary, + # This is the default runtime unless + # "--test_arg=--runtime=OTHER_RUNTIME" is used to override the value. + flags = ["--dut_platform", "netstack", "--runtime=runsc-d"] + expect_failure_flag, + tags = PACKETIMPACT_TAGS + ["packetimpact"], + **kwargs + ) + +def packetimpact_go_test(name, size = "small", pure = True, linux = True, netstack = True, **kwargs): + """Add packetimpact tests written in go. + + Args: + name: name of the test + size: size of the test + pure: make a static go binary + linux: generate a linux test + netstack: generate a netstack test + **kwargs: all the other args, forwarded to go_test + """ + testbench_binary = name + "_test" + go_test( + name = testbench_binary, + size = size, + pure = pure, + tags = PACKETIMPACT_TAGS, + **kwargs + ) + packetimpact_linux_test( + name = name, + expect_failure = not linux, + testbench_binary = testbench_binary, + ) + packetimpact_netstack_test( + name = name, + expect_failure = not netstack, + testbench_binary = testbench_binary, + ) diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go new file mode 100644 index 000000000..8996f23fa --- /dev/null +++ b/test/packetimpact/runner/packetimpact_test.go @@ -0,0 +1,315 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The runner starts docker containers and networking for a packetimpact test. +package packetimpact_test + +import ( + "flag" + "fmt" + "log" + "math/rand" + "net" + "os" + "os/exec" + "path" + "strings" + "testing" + "time" + + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/test/packetimpact/netdevs" +) + +// stringList implements flag.Value. +type stringList []string + +// String implements flag.Value.String. +func (l *stringList) String() string { + return strings.Join(*l, ",") +} + +// Set implements flag.Value.Set. +func (l *stringList) Set(value string) error { + *l = append(*l, value) + return nil +} + +var ( + dutPlatform = flag.String("dut_platform", "", "either \"linux\" or \"netstack\"") + testbenchBinary = flag.String("testbench_binary", "", "path to the testbench binary") + tshark = flag.Bool("tshark", false, "use more verbose tshark in logs instead of tcpdump") + extraTestArgs = stringList{} + expectFailure = flag.Bool("expect_failure", false, "expect that the test will fail when run") + + dutAddr = net.IPv4(0, 0, 0, 10) + testbenchAddr = net.IPv4(0, 0, 0, 20) +) + +const ctrlPort = "40000" + +// logger implements testutil.Logger. +// +// Labels logs based on their source and formats multi-line logs. +type logger string + +// Name implements testutil.Logger.Name. +func (l logger) Name() string { + return string(l) +} + +// Logf implements testutil.Logger.Logf. +func (l logger) Logf(format string, args ...interface{}) { + lines := strings.Split(fmt.Sprintf(format, args...), "\n") + log.Printf("%s: %s", l, lines[0]) + for _, line := range lines[1:] { + log.Printf("%*s %s", len(l), "", line) + } +} + +func TestOne(t *testing.T) { + flag.Var(&extraTestArgs, "extra_test_arg", "extra arguments to pass to the testbench") + flag.Parse() + if *dutPlatform != "linux" && *dutPlatform != "netstack" { + t.Fatal("--dut_platform should be either linux or netstack") + } + if *testbenchBinary == "" { + t.Fatal("--testbench_binary is missing") + } + if *dutPlatform == "netstack" { + if _, err := dockerutil.RuntimePath(); err != nil { + t.Fatal("--runtime is missing or invalid with --dut_platform=netstack:", err) + } + } + dockerutil.EnsureSupportedDockerVersion() + + // Create the networks needed for the test. One control network is needed for + // the gRPC control packets and one test network on which to transmit the test + // packets. + ctrlNet := dockerutil.NewDockerNetwork(logger("ctrlNet")) + testNet := dockerutil.NewDockerNetwork(logger("testNet")) + for _, dn := range []*dockerutil.DockerNetwork{ctrlNet, testNet} { + for { + if err := createDockerNetwork(dn); err != nil { + t.Log("creating docker network:", err) + const wait = 100 * time.Millisecond + t.Logf("sleeping %s and will try creating docker network again", wait) + // This can fail if another docker network claimed the same IP so we'll + // just try again. + time.Sleep(wait) + continue + } + break + } + defer func(dn *dockerutil.DockerNetwork) { + if err := dn.Cleanup(); err != nil { + t.Errorf("unable to cleanup container %s: %s", dn.Name, err) + } + }(dn) + } + + runOpts := dockerutil.RunOpts{ + Image: "packetimpact", + CapAdd: []string{"NET_ADMIN"}, + Extra: []string{"--sysctl", "net.ipv6.conf.all.disable_ipv6=0", "--rm"}, + Foreground: true, + Pty: func(_ *exec.Cmd, _ *os.File) {}, + } + + // Create the Docker container for the DUT. + dut := dockerutil.MakeDocker(logger("dut")) + if *dutPlatform == "linux" { + dut.Runtime = "" + } + + const containerPosixServerBinary = "/packetimpact/posix_server" + dut.CopyFiles("/packetimpact", "/test/packetimpact/dut/posix_server") + + if err := dut.Create(runOpts, containerPosixServerBinary, "--ip=0.0.0.0", "--port="+ctrlPort); err != nil { + t.Fatalf("unable to create container %s: %s", dut.Name, err) + } + defer dut.CleanUp() + + // Add ctrlNet as eth1 and testNet as eth2. + const testNetDev = "eth2" + if err := addNetworks(dut, dutAddr, []*dockerutil.DockerNetwork{ctrlNet, testNet}); err != nil { + t.Fatal(err) + } + + if err := dut.Start(); err != nil { + t.Fatalf("unable to start container %s: %s", dut.Name, err) + } + + if _, err := dut.WaitForOutput("Server listening.*\n", 60*time.Second); err != nil { + t.Fatalf("%s on container %s never listened: %s", containerPosixServerBinary, dut.Name, err) + } + + dutTestDevice, dutDeviceInfo, err := deviceByIP(dut, addressInSubnet(dutAddr, *testNet.Subnet)) + if err != nil { + t.Fatal(err) + } + + remoteMAC := dutDeviceInfo.MAC + remoteIPv6 := dutDeviceInfo.IPv6Addr + // Netstack as DUT doesn't assign IPv6 addresses automatically so do it if + // needed. + if remoteIPv6 == nil { + if _, err := dut.Exec(dockerutil.RunOpts{}, "ip", "addr", "add", netdevs.MACToIP(remoteMAC).String(), "scope", "link", "dev", dutTestDevice); err != nil { + t.Fatalf("unable to ip addr add on container %s: %s", dut.Name, err) + } + // Now try again, to make sure that it worked. + _, dutDeviceInfo, err = deviceByIP(dut, addressInSubnet(dutAddr, *testNet.Subnet)) + if err != nil { + t.Fatal(err) + } + remoteIPv6 = dutDeviceInfo.IPv6Addr + if remoteIPv6 == nil { + t.Fatal("unable to set IPv6 address on container", dut.Name) + } + } + + // Create the Docker container for the testbench. + testbench := dockerutil.MakeDocker(logger("testbench")) + testbench.Runtime = "" // The testbench always runs on Linux. + + tbb := path.Base(*testbenchBinary) + containerTestbenchBinary := "/packetimpact/" + tbb + testbench.CopyFiles("/packetimpact", "/test/packetimpact/tests/"+tbb) + + // Run tcpdump in the test bench unbuffered, without DNS resolution, just on + // the interface with the test packets. + snifferArgs := []string{ + "tcpdump", "-S", "-vvv", "-U", "-n", "-i", testNetDev, + } + snifferRegex := "tcpdump: listening.*\n" + if *tshark { + // Run tshark in the test bench unbuffered, without DNS resolution, just on + // the interface with the test packets. + snifferArgs = []string{ + "tshark", "-V", "-l", "-n", "-i", testNetDev, + "-o", "tcp.check_checksum:TRUE", + "-o", "udp.check_checksum:TRUE", + } + snifferRegex = "Capturing on.*\n" + } + + if err := testbench.Create(runOpts, snifferArgs...); err != nil { + t.Fatalf("unable to create container %s: %s", testbench.Name, err) + } + defer testbench.CleanUp() + + // Add ctrlNet as eth1 and testNet as eth2. + if err := addNetworks(testbench, testbenchAddr, []*dockerutil.DockerNetwork{ctrlNet, testNet}); err != nil { + t.Fatal(err) + } + + if err := testbench.Start(); err != nil { + t.Fatalf("unable to start container %s: %s", testbench.Name, err) + } + + // Kill so that it will flush output. + defer testbench.Exec(dockerutil.RunOpts{}, "killall", snifferArgs[0]) + + if _, err := testbench.WaitForOutput(snifferRegex, 60*time.Second); err != nil { + t.Fatalf("sniffer on %s never listened: %s", dut.Name, err) + } + + // Because the Linux kernel receives the SYN-ACK but didn't send the SYN it + // will issue a RST. To prevent this IPtables can be used to filter out all + // incoming packets. The raw socket that packetimpact tests use will still see + // everything. + if _, err := testbench.Exec(dockerutil.RunOpts{}, "iptables", "-A", "INPUT", "-i", testNetDev, "-j", "DROP"); err != nil { + t.Fatalf("unable to Exec iptables on container %s: %s", testbench.Name, err) + } + + // FIXME(b/156449515): Some piece of the system has a race. The old + // bash script version had a sleep, so we have one too. The race should + // be fixed and this sleep removed. + time.Sleep(time.Second) + + // Start a packetimpact test on the test bench. The packetimpact test sends + // and receives packets and also sends POSIX socket commands to the + // posix_server to be executed on the DUT. + testArgs := []string{containerTestbenchBinary} + testArgs = append(testArgs, extraTestArgs...) + testArgs = append(testArgs, + "--posix_server_ip", addressInSubnet(dutAddr, *ctrlNet.Subnet).String(), + "--posix_server_port", ctrlPort, + "--remote_ipv4", addressInSubnet(dutAddr, *testNet.Subnet).String(), + "--local_ipv4", addressInSubnet(testbenchAddr, *testNet.Subnet).String(), + "--remote_ipv6", remoteIPv6.String(), + "--remote_mac", remoteMAC.String(), + "--device", testNetDev, + ) + _, err = testbench.Exec(dockerutil.RunOpts{}, testArgs...) + if !*expectFailure && err != nil { + t.Fatal("test failed:", err) + } + if *expectFailure && err == nil { + t.Fatal("test failure expected but the test succeeded, enable the test and mark the corresponding bug as fixed") + } +} + +func addNetworks(d *dockerutil.Docker, addr net.IP, networks []*dockerutil.DockerNetwork) error { + for _, dn := range networks { + ip := addressInSubnet(addr, *dn.Subnet) + // Connect to the network with the specified IP address. + if err := dn.Connect(d, "--ip", ip.String()); err != nil { + return fmt.Errorf("unable to connect container %s to network %s: %w", d.Name, dn.Name, err) + } + } + return nil +} + +// addressInSubnet combines the subnet provided with the address and returns a +// new address. The return address bits come from the subnet where the mask is 1 +// and from the ip address where the mask is 0. +func addressInSubnet(addr net.IP, subnet net.IPNet) net.IP { + var octets []byte + for i := 0; i < 4; i++ { + octets = append(octets, (subnet.IP.To4()[i]&subnet.Mask[i])+(addr.To4()[i]&(^subnet.Mask[i]))) + } + return net.IP(octets) +} + +// makeDockerNetwork makes a randomly-named network that will start with the +// namePrefix. The network will be a random /24 subnet. +func createDockerNetwork(n *dockerutil.DockerNetwork) error { + randSource := rand.NewSource(time.Now().UnixNano()) + r1 := rand.New(randSource) + // Class C, 192.0.0.0 to 223.255.255.255, transitionally has mask 24. + ip := net.IPv4(byte(r1.Intn(224-192)+192), byte(r1.Intn(256)), byte(r1.Intn(256)), 0) + n.Subnet = &net.IPNet{ + IP: ip, + Mask: ip.DefaultMask(), + } + return n.Create() +} + +// deviceByIP finds a deviceInfo and device name from an IP address. +func deviceByIP(d *dockerutil.Docker, ip net.IP) (string, netdevs.DeviceInfo, error) { + out, err := d.Exec(dockerutil.RunOpts{}, "ip", "addr", "show") + if err != nil { + return "", netdevs.DeviceInfo{}, fmt.Errorf("listing devices on %s container: %w", d.Name, err) + } + devs, err := netdevs.ParseDevices(out) + if err != nil { + return "", netdevs.DeviceInfo{}, fmt.Errorf("parsing devices from %s container: %w", d.Name, err) + } + testDevice, deviceInfo, err := netdevs.FindDeviceByIP(ip, devs) + if err != nil { + return "", netdevs.DeviceInfo{}, fmt.Errorf("can't find deviceInfo for container %s: %w", d.Name, err) + } + return testDevice, deviceInfo, nil +} diff --git a/test/packetimpact/testbench/BUILD b/test/packetimpact/testbench/BUILD index fed51006f..d588d3289 100644 --- a/test/packetimpact/testbench/BUILD +++ b/test/packetimpact/testbench/BUILD @@ -21,6 +21,7 @@ go_library( "//pkg/tcpip/header", "//pkg/tcpip/seqnum", "//pkg/usermem", + "//test/packetimpact/netdevs", "//test/packetimpact/proto:posix_server_go_proto", "@com_github_google_go-cmp//cmp:go_default_library", "@com_github_google_go-cmp//cmp/cmpopts:go_default_library", diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index 463fd0556..bf104e5ca 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -114,12 +114,12 @@ var _ layerState = (*etherState)(nil) func newEtherState(out, in Ether) (*etherState, error) { lMAC, err := tcpip.ParseMACAddress(LocalMAC) if err != nil { - return nil, err + return nil, fmt.Errorf("parsing local MAC: %q: %w", LocalMAC, err) } rMAC, err := tcpip.ParseMACAddress(RemoteMAC) if err != nil { - return nil, err + return nil, fmt.Errorf("parsing remote MAC: %q: %w", RemoteMAC, err) } s := etherState{ out: Ether{SrcAddr: &lMAC, DstAddr: &rMAC}, diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go index a78b7d7ee..b919a3c2e 100644 --- a/test/packetimpact/testbench/dut.go +++ b/test/packetimpact/testbench/dut.go @@ -16,6 +16,7 @@ package testbench import ( "context" + "flag" "net" "strconv" "syscall" @@ -37,6 +38,11 @@ type DUT struct { // NewDUT creates a new connection with the DUT over gRPC. func NewDUT(t *testing.T) DUT { + flag.Parse() + if err := genPseudoFlags(); err != nil { + t.Fatal("generating psuedo flags:", err) + } + posixServerAddress := POSIXServerIP + ":" + strconv.Itoa(POSIXServerPort) conn, err := grpc.Dial(posixServerAddress, grpc.WithInsecure(), grpc.WithKeepaliveParams(keepalive.ClientParameters{Timeout: RPCKeepalive})) if err != nil { diff --git a/test/packetimpact/testbench/rawsockets.go b/test/packetimpact/testbench/rawsockets.go index 4665f60b2..278229b7e 100644 --- a/test/packetimpact/testbench/rawsockets.go +++ b/test/packetimpact/testbench/rawsockets.go @@ -16,7 +16,6 @@ package testbench import ( "encoding/binary" - "flag" "fmt" "math" "net" @@ -41,7 +40,6 @@ func htons(x uint16) uint16 { // NewSniffer creates a Sniffer connected to *device. func NewSniffer(t *testing.T) (Sniffer, error) { - flag.Parse() snifferFd, err := unix.Socket(unix.AF_PACKET, unix.SOCK_RAW, int(htons(unix.ETH_P_ALL))) if err != nil { return Sniffer{}, err @@ -136,7 +134,6 @@ type Injector struct { // NewInjector creates a new injector on *device. func NewInjector(t *testing.T) (Injector, error) { - flag.Parse() ifInfo, err := net.InterfaceByName(Device) if err != nil { return Injector{}, err diff --git a/test/packetimpact/testbench/testbench.go b/test/packetimpact/testbench/testbench.go index a1242b189..4de2aa1d3 100644 --- a/test/packetimpact/testbench/testbench.go +++ b/test/packetimpact/testbench/testbench.go @@ -16,7 +16,12 @@ package testbench import ( "flag" + "fmt" + "net" + "os/exec" "time" + + "gvisor.dev/gvisor/test/packetimpact/netdevs" ) var ( @@ -55,9 +60,31 @@ func RegisterFlags(fs *flag.FlagSet) { fs.DurationVar(&RPCKeepalive, "rpc_keepalive", RPCKeepalive, "gRPC keepalive") fs.StringVar(&LocalIPv4, "local_ipv4", LocalIPv4, "local IPv4 address for test packets") fs.StringVar(&RemoteIPv4, "remote_ipv4", RemoteIPv4, "remote IPv4 address for test packets") - fs.StringVar(&LocalIPv6, "local_ipv6", LocalIPv6, "local IPv6 address for test packets") fs.StringVar(&RemoteIPv6, "remote_ipv6", RemoteIPv6, "remote IPv6 address for test packets") - fs.StringVar(&LocalMAC, "local_mac", LocalMAC, "local mac address for test packets") fs.StringVar(&RemoteMAC, "remote_mac", RemoteMAC, "remote mac address for test packets") fs.StringVar(&Device, "device", Device, "local device for test packets") } + +// genPseudoFlags populates flag-like global config based on real flags. +// +// genPseudoFlags must only be called after flag.Parse. +func genPseudoFlags() error { + out, err := exec.Command("ip", "addr", "show").CombinedOutput() + if err != nil { + return fmt.Errorf("listing devices: %q: %w", string(out), err) + } + devs, err := netdevs.ParseDevices(string(out)) + if err != nil { + return fmt.Errorf("parsing devices: %w", err) + } + + _, deviceInfo, err := netdevs.FindDeviceByIP(net.ParseIP(LocalIPv4), devs) + if err != nil { + return fmt.Errorf("can't find deviceInfo: %w", err) + } + + LocalMAC = deviceInfo.MAC.String() + LocalIPv6 = deviceInfo.IPv6Addr.String() + + return nil +} diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index c25b3b8c1..15e1c3f04 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -1,4 +1,4 @@ -load("defs.bzl", "packetimpact_go_test") +load("//test/packetimpact/runner:defs.bzl", "packetimpact_go_test") package( default_visibility = ["//test/packetimpact:__subpackages__"], @@ -156,8 +156,3 @@ packetimpact_go_test( "@org_golang_x_sys//unix:go_default_library", ], ) - -sh_binary( - name = "test_runner", - srcs = ["test_runner.sh"], -) diff --git a/test/packetimpact/tests/defs.bzl b/test/packetimpact/tests/defs.bzl deleted file mode 100644 index 27c5de375..000000000 --- a/test/packetimpact/tests/defs.bzl +++ /dev/null @@ -1,137 +0,0 @@ -"""Defines rules for packetimpact test targets.""" - -load("//tools:defs.bzl", "go_test") - -def _packetimpact_test_impl(ctx): - test_runner = ctx.executable._test_runner - bench = ctx.actions.declare_file("%s-bench" % ctx.label.name) - bench_content = "\n".join([ - "#!/bin/bash", - # This test will run part in a distinct user namespace. This can cause - # permission problems, because all runfiles may not be owned by the - # current user, and no other users will be mapped in that namespace. - # Make sure that everything is readable here. - "find . -type f -exec chmod a+rx {} \\;", - "find . -type d -exec chmod a+rx {} \\;", - "%s %s --posix_server_binary %s --testbench_binary %s $@\n" % ( - test_runner.short_path, - " ".join(ctx.attr.flags), - ctx.files._posix_server_binary[0].short_path, - ctx.files.testbench_binary[0].short_path, - ), - ]) - ctx.actions.write(bench, bench_content, is_executable = True) - - transitive_files = depset() - if hasattr(ctx.attr._test_runner, "data_runfiles"): - transitive_files = depset(ctx.attr._test_runner.data_runfiles.files) - runfiles = ctx.runfiles( - files = [test_runner] + ctx.files.testbench_binary + ctx.files._posix_server_binary, - transitive_files = transitive_files, - collect_default = True, - collect_data = True, - ) - return [DefaultInfo(executable = bench, runfiles = runfiles)] - -_packetimpact_test = rule( - attrs = { - "_test_runner": attr.label( - executable = True, - cfg = "target", - default = ":test_runner", - ), - "_posix_server_binary": attr.label( - cfg = "target", - default = "//test/packetimpact/dut:posix_server", - ), - "testbench_binary": attr.label( - cfg = "target", - mandatory = True, - ), - "flags": attr.string_list( - mandatory = False, - default = [], - ), - }, - test = True, - implementation = _packetimpact_test_impl, -) - -PACKETIMPACT_TAGS = ["local", "manual"] - -def packetimpact_linux_test( - name, - testbench_binary, - expect_failure = False, - **kwargs): - """Add a packetimpact test on linux. - - Args: - name: name of the test - testbench_binary: the testbench binary - **kwargs: all the other args, forwarded to _packetimpact_test - """ - expect_failure_flag = ["--expect_failure"] if expect_failure else [] - _packetimpact_test( - name = name + "_linux_test", - testbench_binary = testbench_binary, - flags = ["--dut_platform", "linux"] + expect_failure_flag, - tags = PACKETIMPACT_TAGS + ["packetimpact"], - **kwargs - ) - -def packetimpact_netstack_test( - name, - testbench_binary, - expect_failure = False, - **kwargs): - """Add a packetimpact test on netstack. - - Args: - name: name of the test - testbench_binary: the testbench binary - expect_failure: the test must fail - **kwargs: all the other args, forwarded to _packetimpact_test - """ - expect_failure_flag = [] - if expect_failure: - expect_failure_flag = ["--expect_failure"] - _packetimpact_test( - name = name + "_netstack_test", - testbench_binary = testbench_binary, - # This is the default runtime unless - # "--test_arg=--runtime=OTHER_RUNTIME" is used to override the value. - flags = ["--dut_platform", "netstack", "--runtime=runsc-d"] + expect_failure_flag, - tags = PACKETIMPACT_TAGS + ["packetimpact"], - **kwargs - ) - -def packetimpact_go_test(name, size = "small", pure = True, linux = True, netstack = True, **kwargs): - """Add packetimpact tests written in go. - - Args: - name: name of the test - size: size of the test - pure: make a static go binary - linux: generate a linux test - netstack: generate a netstack test - **kwargs: all the other args, forwarded to go_test - """ - testbench_binary = name + "_test" - go_test( - name = testbench_binary, - size = size, - pure = pure, - tags = PACKETIMPACT_TAGS, - **kwargs - ) - packetimpact_linux_test( - name = name, - expect_failure = not linux, - testbench_binary = testbench_binary, - ) - packetimpact_netstack_test( - name = name, - expect_failure = not netstack, - testbench_binary = testbench_binary, - ) diff --git a/test/packetimpact/tests/test_runner.sh b/test/packetimpact/tests/test_runner.sh deleted file mode 100755 index 706441cce..000000000 --- a/test/packetimpact/tests/test_runner.sh +++ /dev/null @@ -1,325 +0,0 @@ -#!/bin/bash - -# Copyright 2020 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Run a packetimpact test. Two docker containers are made, one for the -# Device-Under-Test (DUT) and one for the test bench. Each is attached with -# two networks, one for control packets that aid the test and one for test -# packets which are sent as part of the test and observed for correctness. - -set -euxo pipefail - -function failure() { - local lineno=$1 - local msg=$2 - local filename="$0" - echo "FAIL: $filename:$lineno: $msg" -} -trap 'failure ${LINENO} "$BASH_COMMAND"' ERR - -declare -r LONGOPTS="dut_platform:,posix_server_binary:,testbench_binary:,runtime:,tshark,extra_test_arg:,expect_failure" - -# Don't use declare below so that the error from getopt will end the script. -PARSED=$(getopt --options "" --longoptions=$LONGOPTS --name "$0" -- "$@") - -eval set -- "$PARSED" - -declare -a EXTRA_TEST_ARGS - -while true; do - case "$1" in - --dut_platform) - # Either "linux" or "netstack". - declare -r DUT_PLATFORM="$2" - shift 2 - ;; - --posix_server_binary) - declare -r POSIX_SERVER_BINARY="$2" - shift 2 - ;; - --testbench_binary) - declare -r TESTBENCH_BINARY="$2" - shift 2 - ;; - --runtime) - # Not readonly because there might be multiple --runtime arguments and we - # want to use just the last one. Only used if --dut_platform is - # "netstack". - declare RUNTIME="$2" - shift 2 - ;; - --tshark) - declare -r TSHARK="1" - shift 1 - ;; - --extra_test_arg) - EXTRA_TEST_ARGS+="$2" - shift 2 - ;; - --expect_failure) - declare -r EXPECT_FAILURE="1" - shift 1 - ;; - --) - shift - break - ;; - *) - echo "Programming error" - exit 3 - esac -done - -# All the other arguments are scripts. -declare -r scripts="$@" - -# Check that the required flags are defined in a way that is safe for "set -u". -if [[ "${DUT_PLATFORM-}" == "netstack" ]]; then - if [[ -z "${RUNTIME-}" ]]; then - echo "FAIL: Missing --runtime argument: ${RUNTIME-}" - exit 2 - fi - declare -r RUNTIME_ARG="--runtime ${RUNTIME}" -elif [[ "${DUT_PLATFORM-}" == "linux" ]]; then - declare -r RUNTIME_ARG="" -else - echo "FAIL: Bad or missing --dut_platform argument: ${DUT_PLATFORM-}" - exit 2 -fi -if [[ ! -f "${POSIX_SERVER_BINARY-}" ]]; then - echo "FAIL: Bad or missing --posix_server_binary: ${POSIX_SERVER-}" - exit 2 -fi -if [[ ! -f "${TESTBENCH_BINARY-}" ]]; then - echo "FAIL: Bad or missing --testbench_binary: ${TESTBENCH_BINARY-}" - exit 2 -fi - -function new_net_prefix() { - # Class C, 192.0.0.0 to 223.255.255.255, transitionally has mask 24. - echo "$(shuf -i 192-223 -n 1).$(shuf -i 0-255 -n 1).$(shuf -i 0-255 -n 1)" -} - -# Variables specific to the control network and interface start with CTRL_. -# Variables specific to the test network and interface start with TEST_. -# Variables specific to the DUT start with DUT_. -# Variables specific to the test bench start with TESTBENCH_. -# Use random numbers so that test networks don't collide. -declare CTRL_NET="ctrl_net-${RANDOM}${RANDOM}" -declare CTRL_NET_PREFIX=$(new_net_prefix) -declare TEST_NET="test_net-${RANDOM}${RANDOM}" -declare TEST_NET_PREFIX=$(new_net_prefix) -# On both DUT and test bench, testing packets are on the eth2 interface. -declare -r TEST_DEVICE="eth2" -# Number of bits in the *_NET_PREFIX variables. -declare -r NET_MASK="24" -# Last bits of the DUT's IP address. -declare -r DUT_NET_SUFFIX=".10" -# Control port. -declare -r CTRL_PORT="40000" -# Last bits of the test bench's IP address. -declare -r TESTBENCH_NET_SUFFIX=".20" -declare -r TIMEOUT="60" -declare -r IMAGE_TAG="gcr.io/gvisor-presubmit/packetimpact" - -# Make sure that docker is installed. -docker --version - -function finish { - local cleanup_success=1 - - if [[ -z "${TSHARK-}" ]]; then - # Kill tcpdump so that it will flush output. - docker exec -t "${TESTBENCH}" \ - killall tcpdump || \ - cleanup_success=0 - else - # Kill tshark so that it will flush output. - docker exec -t "${TESTBENCH}" \ - killall tshark || \ - cleanup_success=0 - fi - - for net in "${CTRL_NET}" "${TEST_NET}"; do - # Kill all processes attached to ${net}. - for docker_command in "kill" "rm"; do - (docker network inspect "${net}" \ - --format '{{range $key, $value := .Containers}}{{$key}} {{end}}' \ - | xargs -r docker "${docker_command}") || \ - cleanup_success=0 - done - # Remove the network. - docker network rm "${net}" || \ - cleanup_success=0 - done - - if ((!$cleanup_success)); then - echo "FAIL: Cleanup command failed" - exit 4 - fi -} -trap finish EXIT - -# Subnet for control packets between test bench and DUT. -while ! docker network create \ - "--subnet=${CTRL_NET_PREFIX}.0/${NET_MASK}" "${CTRL_NET}"; do - sleep 0.1 - CTRL_NET_PREFIX=$(new_net_prefix) - CTRL_NET="ctrl_net-${RANDOM}${RANDOM}" -done - -# Subnet for the packets that are part of the test. -while ! docker network create \ - "--subnet=${TEST_NET_PREFIX}.0/${NET_MASK}" "${TEST_NET}"; do - sleep 0.1 - TEST_NET_PREFIX=$(new_net_prefix) - TEST_NET="test_net-${RANDOM}${RANDOM}" -done - -docker pull "${IMAGE_TAG}" - -# Create the DUT container and connect to network. -DUT=$(docker create ${RUNTIME_ARG} --privileged --rm \ - --cap-add NET_ADMIN \ - --sysctl net.ipv6.conf.all.disable_ipv6=0 \ - --stop-timeout ${TIMEOUT} -it ${IMAGE_TAG}) -docker network connect "${CTRL_NET}" \ - --ip "${CTRL_NET_PREFIX}${DUT_NET_SUFFIX}" "${DUT}" \ - || (docker kill ${DUT}; docker rm ${DUT}; false) -docker network connect "${TEST_NET}" \ - --ip "${TEST_NET_PREFIX}${DUT_NET_SUFFIX}" "${DUT}" \ - || (docker kill ${DUT}; docker rm ${DUT}; false) -docker start "${DUT}" - -# Create the test bench container and connect to network. -TESTBENCH=$(docker create --privileged --rm \ - --cap-add NET_ADMIN \ - --sysctl net.ipv6.conf.all.disable_ipv6=0 \ - --stop-timeout ${TIMEOUT} -it ${IMAGE_TAG}) -docker network connect "${CTRL_NET}" \ - --ip "${CTRL_NET_PREFIX}${TESTBENCH_NET_SUFFIX}" "${TESTBENCH}" \ - || (docker kill ${TESTBENCH}; docker rm ${TESTBENCH}; false) -docker network connect "${TEST_NET}" \ - --ip "${TEST_NET_PREFIX}${TESTBENCH_NET_SUFFIX}" "${TESTBENCH}" \ - || (docker kill ${TESTBENCH}; docker rm ${TESTBENCH}; false) -docker start "${TESTBENCH}" - -# Start the posix_server in the DUT. -declare -r DOCKER_POSIX_SERVER_BINARY="/$(basename ${POSIX_SERVER_BINARY})" -docker cp -L ${POSIX_SERVER_BINARY} "${DUT}:${DOCKER_POSIX_SERVER_BINARY}" - -docker exec -t "${DUT}" \ - /bin/bash -c "${DOCKER_POSIX_SERVER_BINARY} \ - --ip ${CTRL_NET_PREFIX}${DUT_NET_SUFFIX} \ - --port ${CTRL_PORT}" & - -# Because the Linux kernel receives the SYN-ACK but didn't send the SYN it will -# issue a RST. To prevent this IPtables can be used to filter those out. -docker exec "${TESTBENCH}" \ - iptables -A INPUT -i ${TEST_DEVICE} -j DROP - -# Wait for the DUT server to come up. Attempt to connect to it from the test -# bench every 100 milliseconds until success. -while ! docker exec "${TESTBENCH}" \ - nc -zv "${CTRL_NET_PREFIX}${DUT_NET_SUFFIX}" "${CTRL_PORT}"; do - sleep 0.1 -done - -declare -r REMOTE_MAC=$(docker exec -t "${DUT}" ip link show \ - "${TEST_DEVICE}" | tail -1 | cut -d' ' -f6) -declare -r LOCAL_MAC=$(docker exec -t "${TESTBENCH}" ip link show \ - "${TEST_DEVICE}" | tail -1 | cut -d' ' -f6) -declare REMOTE_IPV6=$(docker exec -t "${DUT}" ip addr show scope link \ - "${TEST_DEVICE}" | grep inet6 | cut -d' ' -f6 | cut -d'/' -f1) -declare -r LOCAL_IPV6=$(docker exec -t "${TESTBENCH}" ip addr show scope link \ - "${TEST_DEVICE}" | grep inet6 | cut -d' ' -f6 | cut -d'/' -f1) - -# Netstack as DUT doesn't assign IPv6 addresses automatically so do it if -# needed. Convert the MAC address to an IPv6 link local address as described in -# RFC 4291 page 20: https://tools.ietf.org/html/rfc4291#page-20 -if [[ -z "${REMOTE_IPV6}" ]]; then - # Split the octets of the MAC into an array of strings. - IFS=":" read -a REMOTE_OCTETS <<< "${REMOTE_MAC}" - # Flip the global bit. - REMOTE_OCTETS[0]=$(printf '%x' "$((0x${REMOTE_OCTETS[0]} ^ 2))") - # Add the IPv6 address. - docker exec "${DUT}" \ - ip addr add $(printf 'fe80::%02x%02x:%02xff:fe%02x:%02x%02x/64' \ - "0x${REMOTE_OCTETS[0]}" "0x${REMOTE_OCTETS[1]}" "0x${REMOTE_OCTETS[2]}" \ - "0x${REMOTE_OCTETS[3]}" "0x${REMOTE_OCTETS[4]}" "0x${REMOTE_OCTETS[5]}") \ - scope link \ - dev "${TEST_DEVICE}" - # Re-extract the IPv6 address. - # TODO(eyalsoha): Add "scope link" below when netstack supports correctly - # creating link-local IPv6 addresses. - REMOTE_IPV6=$(docker exec -t "${DUT}" ip addr show \ - "${TEST_DEVICE}" | grep inet6 | cut -d' ' -f6 | cut -d'/' -f1) -fi - -declare -r DOCKER_TESTBENCH_BINARY="/$(basename ${TESTBENCH_BINARY})" -docker cp -L "${TESTBENCH_BINARY}" "${TESTBENCH}:${DOCKER_TESTBENCH_BINARY}" - -if [[ -z "${TSHARK-}" ]]; then - # Run tcpdump in the test bench unbuffered, without dns resolution, just on - # the interface with the test packets. - docker exec -t "${TESTBENCH}" \ - tcpdump -S -vvv -U -n -i "${TEST_DEVICE}" \ - net "${TEST_NET_PREFIX}/24" or \ - host "${REMOTE_IPV6}" or \ - host "${LOCAL_IPV6}" & -else - # Run tshark in the test bench unbuffered, without dns resolution, just on the - # interface with the test packets. - docker exec -t "${TESTBENCH}" \ - tshark -V -l -n -i "${TEST_DEVICE}" \ - -o tcp.check_checksum:TRUE \ - -o udp.check_checksum:TRUE \ - net "${TEST_NET_PREFIX}/24" or \ - host "${REMOTE_IPV6}" or \ - host "${LOCAL_IPV6}" & -fi - -# tcpdump and tshark take time to startup -sleep 3 - -# Start a packetimpact test on the test bench. The packetimpact test sends and -# receives packets and also sends POSIX socket commands to the posix_server to -# be executed on the DUT. -docker exec \ - -e XML_OUTPUT_FILE="/test.xml" \ - -e TEST_TARGET \ - -t "${TESTBENCH}" \ - /bin/bash -c "${DOCKER_TESTBENCH_BINARY} \ - ${EXTRA_TEST_ARGS[@]-} \ - --posix_server_ip=${CTRL_NET_PREFIX}${DUT_NET_SUFFIX} \ - --posix_server_port=${CTRL_PORT} \ - --remote_ipv4=${TEST_NET_PREFIX}${DUT_NET_SUFFIX} \ - --local_ipv4=${TEST_NET_PREFIX}${TESTBENCH_NET_SUFFIX} \ - --remote_ipv6=${REMOTE_IPV6} \ - --local_ipv6=${LOCAL_IPV6} \ - --remote_mac=${REMOTE_MAC} \ - --local_mac=${LOCAL_MAC} \ - --device=${TEST_DEVICE}" && true -declare -r TEST_RESULT="${?}" -if [[ -z "${EXPECT_FAILURE-}" && "${TEST_RESULT}" != 0 ]]; then - echo 'FAIL: This test was expected to pass.' - exit ${TEST_RESULT} -fi -if [[ ! -z "${EXPECT_FAILURE-}" && "${TEST_RESULT}" == 0 ]]; then - echo 'FAIL: This test was expected to fail but passed. Enable the test and' \ - 'mark the corresponding bug as fixed.' - exit 1 -fi -echo PASS: No errors. -- cgit v1.2.3 From d84607762889d999f93b89e64e09d2a4dda63bf7 Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Wed, 13 May 2020 10:51:50 -0700 Subject: Enable overlayfs_stale_read by default for runsc. Linux 4.18 and later make reads and writes coherent between pre-copy-up and post-copy-up FDs representing the same file on an overlay filesystem. However, memory mappings remain incoherent: - Documentation/filesystems/overlayfs.rst, "Non-standard behavior": "If a file residing on a lower layer is opened for read-only and then memory mapped with MAP_SHARED, then subsequent changes to the file are not reflected in the memory mapping." - fs/overlay/file.c:ovl_mmap() passes through to the underlying FD without any management of coherence in the overlay. - Experimentally on Linux 5.2: ``` $ cat mmap_cat_page.c #include #include #include #include #include #include int main(int argc, char **argv) { if (argc < 2) { errx(1, "syntax: %s [FILE]", argv[0]); } const int fd = open(argv[1], O_RDONLY); if (fd < 0) { err(1, "open(%s)", argv[1]); } const size_t page_size = sysconf(_SC_PAGE_SIZE); void* page = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0); if (page == MAP_FAILED) { err(1, "mmap"); } for (;;) { write(1, page, strnlen(page, page_size)); if (getc(stdin) == EOF) { break; } } return 0; } $ gcc -O2 -o mmap_cat_page mmap_cat_page.c $ mkdir lowerdir upperdir workdir overlaydir $ echo old > lowerdir/file $ sudo mount -t overlay -o "lowerdir=lowerdir,upperdir=upperdir,workdir=workdir" none overlaydir $ ./mmap_cat_page overlaydir/file old ^Z [1]+ Stopped ./mmap_cat_page overlaydir/file $ echo new > overlaydir/file $ cat overlaydir/file new $ fg ./mmap_cat_page overlaydir/file old ``` Therefore, while the VFS1 gofer client's behavior of reopening read FDs is only necessary pre-4.18, replacing existing memory mappings (in both sentry and application address spaces) with mappings of the new FD is required regardless of kernel version, and this latter behavior is common to both VFS1 and VFS2. Re-document accordingly, and change the runsc flag to enabled by default. New test: - Before this CL: https://source.cloud.google.com/results/invocations/5b222d2c-e918-4bae-afc4-407f5bac509b - After this CL: https://source.cloud.google.com/results/invocations/f28c747e-d89c-4d8c-a461-602b33e71aab PiperOrigin-RevId: 311361267 --- images/hostoverlaytest/Dockerfile | 7 +++ images/hostoverlaytest/test.c | 88 +++++++++++++++++++++++++++++++++++++ images/hostoverlaytest/testfile.txt | 1 + pkg/sentry/fs/gofer/fs.go | 3 +- pkg/sentry/fsimpl/gofer/gofer.go | 9 ++-- runsc/boot/config.go | 6 ++- runsc/container/container_test.go | 37 ---------------- runsc/main.go | 2 +- test/e2e/integration_test.go | 15 +++++++ 9 files changed, 123 insertions(+), 45 deletions(-) create mode 100644 images/hostoverlaytest/Dockerfile create mode 100644 images/hostoverlaytest/test.c create mode 100644 images/hostoverlaytest/testfile.txt (limited to 'test') diff --git a/images/hostoverlaytest/Dockerfile b/images/hostoverlaytest/Dockerfile new file mode 100644 index 000000000..d83439e9c --- /dev/null +++ b/images/hostoverlaytest/Dockerfile @@ -0,0 +1,7 @@ +FROM ubuntu:bionic + +WORKDIR /root +COPY . . + +RUN apt-get update && apt-get install -y gcc +RUN gcc -O2 -o test test.c diff --git a/images/hostoverlaytest/test.c b/images/hostoverlaytest/test.c new file mode 100644 index 000000000..088f90746 --- /dev/null +++ b/images/hostoverlaytest/test.c @@ -0,0 +1,88 @@ +#include +#include +#include +#include +#include +#include + +int main(int argc, char** argv) { + const char kTestFilePath[] = "testfile.txt"; + const char kOldFileData[] = "old data\n"; + const char kNewFileData[] = "new data\n"; + const size_t kPageSize = sysconf(_SC_PAGE_SIZE); + + // Open a file that already exists in a host overlayfs lower layer. + const int fd_rdonly = open(kTestFilePath, O_RDONLY); + if (fd_rdonly < 0) { + err(1, "open(%s, O_RDONLY)", kTestFilePath); + } + + // Check that the file's initial contents are what we expect when read via + // syscall. + char oldbuf[sizeof(kOldFileData)] = {}; + ssize_t n = pread(fd_rdonly, oldbuf, sizeof(oldbuf), 0); + if (n < 0) { + err(1, "initial pread"); + } + if (n != strlen(kOldFileData)) { + errx(1, "short initial pread (%ld/%lu bytes)", n, strlen(kOldFileData)); + } + if (strcmp(oldbuf, kOldFileData) != 0) { + errx(1, "initial pread returned wrong data: %s", oldbuf); + } + + // Check that the file's initial contents are what we expect when read via + // memory mapping. + void* page = mmap(NULL, kPageSize, PROT_READ, MAP_SHARED, fd_rdonly, 0); + if (page == MAP_FAILED) { + err(1, "mmap"); + } + if (strcmp(page, kOldFileData) != 0) { + errx(1, "mapping contains wrong initial data: %s", (const char*)page); + } + + // Open the same file writably, causing host overlayfs to copy it up, and + // replace its contents. + const int fd_rdwr = open(kTestFilePath, O_RDWR); + if (fd_rdwr < 0) { + err(1, "open(%s, O_RDWR)", kTestFilePath); + } + n = write(fd_rdwr, kNewFileData, strlen(kNewFileData)); + if (n < 0) { + err(1, "write"); + } + if (n != strlen(kNewFileData)) { + errx(1, "short write (%ld/%lu bytes)", n, strlen(kNewFileData)); + } + if (ftruncate(fd_rdwr, strlen(kNewFileData)) < 0) { + err(1, "truncate"); + } + + int failed = 0; + + // Check that syscalls on the old FD return updated contents. (Before Linux + // 4.18, this requires that runsc use a post-copy-up FD to service the read.) + char newbuf[sizeof(kNewFileData)] = {}; + n = pread(fd_rdonly, newbuf, sizeof(newbuf), 0); + if (n < 0) { + err(1, "final pread"); + } + if (n != strlen(kNewFileData)) { + warnx("short final pread (%ld/%lu bytes)", n, strlen(kNewFileData)); + failed = 1; + } else if (strcmp(newbuf, kNewFileData) != 0) { + warnx("final pread returned wrong data: %s", newbuf); + failed = 1; + } + + // Check that the memory mapping of the old FD has been updated. (Linux + // overlayfs does not do this, so regardless of kernel version this requires + // that runsc replace existing memory mappings with mappings of a + // post-copy-up FD.) + if (strcmp(page, kNewFileData) != 0) { + warnx("mapping contains wrong final data: %s", (const char*)page); + failed = 1; + } + + return failed; +} diff --git a/images/hostoverlaytest/testfile.txt b/images/hostoverlaytest/testfile.txt new file mode 100644 index 000000000..e4188c841 --- /dev/null +++ b/images/hostoverlaytest/testfile.txt @@ -0,0 +1 @@ +old data diff --git a/pkg/sentry/fs/gofer/fs.go b/pkg/sentry/fs/gofer/fs.go index 9d41fcbdb..8ae2d78d7 100644 --- a/pkg/sentry/fs/gofer/fs.go +++ b/pkg/sentry/fs/gofer/fs.go @@ -60,8 +60,7 @@ const ( limitHostFDTranslationKey = "limit_host_fd_translation" // overlayfsStaleRead if present closes cached readonly file after the first - // write. This is done to workaround a limitation of overlayfs in kernels - // before 4.19 where open FDs are not updated after the file is copied up. + // write. This is done to workaround a limitation of Linux overlayfs. overlayfsStaleRead = "overlayfs_stale_read" ) diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 1da8d5d82..353e2cf5b 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -143,9 +143,12 @@ type filesystemOptions struct { // If overlayfsStaleRead is true, O_RDONLY host FDs provided by the remote // filesystem may not be coherent with writable host FDs opened later, so - // mappings of the former must be replaced by mappings of the latter. This - // is usually only the case when the remote filesystem is an overlayfs - // mount on Linux < 4.19. + // all uses of the former must be replaced by uses of the latter. This is + // usually only the case when the remote filesystem is a Linux overlayfs + // mount. (Prior to Linux 4.18, patch series centered on commit + // d1d04ef8572b "ovl: stack file ops", both I/O and memory mappings were + // incoherent between pre-copy-up and post-copy-up FDs; after that patch + // series, only memory mappings are incoherent.) overlayfsStaleRead bool // If regularFilesUseSpecialFileFD is true, application FDs representing diff --git a/runsc/boot/config.go b/runsc/boot/config.go index 6d6a705f8..bcec7e4db 100644 --- a/runsc/boot/config.go +++ b/runsc/boot/config.go @@ -241,8 +241,10 @@ type Config struct { // ReferenceLeakMode sets reference leak check mode ReferenceLeakMode refs.LeakMode - // OverlayfsStaleRead causes cached FDs to reopen after a file is opened for - // write to workaround overlayfs limitation on kernels before 4.19. + // OverlayfsStaleRead instructs the sandbox to assume that the root mount + // is on a Linux overlayfs mount, which does not necessarily preserve + // coherence between read-only and subsequent writable file descriptors + // representing the "same" file. OverlayfsStaleRead bool // TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index acf988aa0..7ba301331 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -2126,43 +2126,6 @@ func TestNetRaw(t *testing.T) { } } -// TestOverlayfsStaleRead most basic test that '--overlayfs-stale-read' works. -func TestOverlayfsStaleRead(t *testing.T) { - conf := testutil.TestConfig(t) - conf.OverlayfsStaleRead = true - - in, err := ioutil.TempFile(testutil.TmpDir(), "stale-read.in") - if err != nil { - t.Fatalf("ioutil.TempFile() failed: %v", err) - } - defer in.Close() - if _, err := in.WriteString("stale data"); err != nil { - t.Fatalf("in.Write() failed: %v", err) - } - - out, err := ioutil.TempFile(testutil.TmpDir(), "stale-read.out") - if err != nil { - t.Fatalf("ioutil.TempFile() failed: %v", err) - } - defer out.Close() - - const want = "foobar" - cmd := fmt.Sprintf("cat %q >&2 && echo %q> %q && cp %q %q", in.Name(), want, in.Name(), in.Name(), out.Name()) - spec := testutil.NewSpecWithArgs("/bin/bash", "-c", cmd) - if err := run(spec, conf); err != nil { - t.Fatalf("Error running container: %v", err) - } - - gotBytes, err := ioutil.ReadAll(out) - if err != nil { - t.Fatalf("out.Read() failed: %v", err) - } - got := strings.TrimSpace(string(gotBytes)) - if want != got { - t.Errorf("Wrong content in out file, got: %q. want: %q", got, want) - } -} - // TestTTYField checks TTY field returned by container.Processes(). func TestTTYField(t *testing.T) { stop := testutil.StartReaper() diff --git a/runsc/main.go b/runsc/main.go index 0625a06e0..920ed84a5 100644 --- a/runsc/main.go +++ b/runsc/main.go @@ -76,7 +76,7 @@ var ( fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.") fsGoferHostUDS = flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.") overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.") - overlayfsStaleRead = flag.Bool("overlayfs-stale-read", false, "reopen cached FDs after a file is opened for write to workaround overlayfs limitation on kernels before 4.19.") + overlayfsStaleRead = flag.Bool("overlayfs-stale-read", true, "assume root mount is an overlay filesystem") watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.") panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.") profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).") diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go index 404e37689..ff856883a 100644 --- a/test/e2e/integration_test.go +++ b/test/e2e/integration_test.go @@ -361,6 +361,21 @@ func TestTmpFile(t *testing.T) { } } +// TestHostOverlayfsCopyUp tests that the --overlayfs-stale-read option causes +// runsc to hide the incoherence of FDs opened before and after overlayfs +// copy-up on the host. +func TestHostOverlayfsCopyUp(t *testing.T) { + d := dockerutil.MakeDocker(t) + defer d.CleanUp() + + if _, err := d.Run(dockerutil.RunOpts{ + Image: "hostoverlaytest", + WorkDir: "/root", + }, "./test"); err != nil { + t.Fatalf("docker run failed: %v", err) + } +} + func TestMain(m *testing.M) { dockerutil.EnsureSupportedDockerVersion() flag.Parse() -- cgit v1.2.3 From 8605c97136ab798e23d0ae4e85892270d8922b4d Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Wed, 13 May 2020 16:11:57 -0700 Subject: Automated rollback of changelist 311285868 PiperOrigin-RevId: 311424257 --- pkg/test/dockerutil/dockerutil.go | 110 +-------- test/packetimpact/netdevs/BUILD | 15 -- test/packetimpact/netdevs/netdevs.go | 104 --------- test/packetimpact/runner/BUILD | 20 -- test/packetimpact/runner/defs.bzl | 135 ----------- test/packetimpact/runner/packetimpact_test.go | 315 ------------------------- test/packetimpact/testbench/BUILD | 1 - test/packetimpact/testbench/connections.go | 4 +- test/packetimpact/testbench/dut.go | 6 - test/packetimpact/testbench/rawsockets.go | 3 + test/packetimpact/testbench/testbench.go | 31 +-- test/packetimpact/tests/BUILD | 7 +- test/packetimpact/tests/defs.bzl | 137 +++++++++++ test/packetimpact/tests/test_runner.sh | 325 ++++++++++++++++++++++++++ 14 files changed, 476 insertions(+), 737 deletions(-) delete mode 100644 test/packetimpact/netdevs/BUILD delete mode 100644 test/packetimpact/netdevs/netdevs.go delete mode 100644 test/packetimpact/runner/BUILD delete mode 100644 test/packetimpact/runner/defs.bzl delete mode 100644 test/packetimpact/runner/packetimpact_test.go create mode 100644 test/packetimpact/tests/defs.bzl create mode 100755 test/packetimpact/tests/test_runner.sh (limited to 'test') diff --git a/pkg/test/dockerutil/dockerutil.go b/pkg/test/dockerutil/dockerutil.go index 6bf0e84d0..5f2af9f3b 100644 --- a/pkg/test/dockerutil/dockerutil.go +++ b/pkg/test/dockerutil/dockerutil.go @@ -148,62 +148,6 @@ func (m MountMode) String() string { panic(fmt.Sprintf("invalid mode: %d", m)) } -// DockerNetwork contains the name of a docker network. -type DockerNetwork struct { - logger testutil.Logger - Name string - Subnet *net.IPNet - containers []*Docker -} - -// NewDockerNetwork sets up the struct for a Docker network. Names of networks -// will be unique. -func NewDockerNetwork(logger testutil.Logger) *DockerNetwork { - return &DockerNetwork{ - logger: logger, - Name: testutil.RandomID(logger.Name()), - } -} - -// Create calls 'docker network create'. -func (n *DockerNetwork) Create(args ...string) error { - a := []string{"docker", "network", "create"} - if n.Subnet != nil { - a = append(a, fmt.Sprintf("--subnet=%s", n.Subnet)) - } - a = append(a, args...) - a = append(a, n.Name) - return testutil.Command(n.logger, a...).Run() -} - -// Connect calls 'docker network connect' with the arguments provided. -func (n *DockerNetwork) Connect(container *Docker, args ...string) error { - a := []string{"docker", "network", "connect"} - a = append(a, args...) - a = append(a, n.Name, container.Name) - if err := testutil.Command(n.logger, a...).Run(); err != nil { - return err - } - n.containers = append(n.containers, container) - return nil -} - -// Cleanup cleans up the docker network and all the containers attached to it. -func (n *DockerNetwork) Cleanup() error { - for _, c := range n.containers { - // Don't propagate the error, it might be that the container - // was already cleaned up. - if err := c.Kill(); err != nil { - n.logger.Logf("unable to kill container during cleanup: %s", err) - } - } - - if err := testutil.Command(n.logger, "docker", "network", "rm", n.Name).Run(); err != nil { - return err - } - return nil -} - // Docker contains the name and the runtime of a docker container. type Docker struct { logger testutil.Logger @@ -365,9 +309,7 @@ func (d *Docker) argsFor(r *RunOpts, command string, p []string) (rv []string) { rv = append(rv, d.Name) } else { rv = append(rv, d.mounts...) - if len(d.Runtime) > 0 { - rv = append(rv, fmt.Sprintf("--runtime=%s", d.Runtime)) - } + rv = append(rv, fmt.Sprintf("--runtime=%s", d.Runtime)) rv = append(rv, fmt.Sprintf("--name=%s", d.Name)) rv = append(rv, testutil.ImageByName(r.Image)) } @@ -535,56 +477,6 @@ func (d *Docker) FindIP() (net.IP, error) { return ip, nil } -// A NetworkInterface is container's network interface information. -type NetworkInterface struct { - IPv4 net.IP - MAC net.HardwareAddr -} - -// ListNetworks returns the network interfaces of the container, keyed by -// Docker network name. -func (d *Docker) ListNetworks() (map[string]NetworkInterface, error) { - const format = `{{json .NetworkSettings.Networks}}` - out, err := testutil.Command(d.logger, "docker", "inspect", "-f", format, d.Name).CombinedOutput() - if err != nil { - return nil, fmt.Errorf("error network interfaces: %q: %w", string(out), err) - } - - networks := map[string]map[string]string{} - if err := json.Unmarshal(out, &networks); err != nil { - return nil, fmt.Errorf("error decoding network interfaces: %w", err) - } - - interfaces := map[string]NetworkInterface{} - for name, iface := range networks { - var netface NetworkInterface - - rawIP := strings.TrimSpace(iface["IPAddress"]) - if rawIP != "" { - ip := net.ParseIP(rawIP) - if ip == nil { - return nil, fmt.Errorf("invalid IP: %q", rawIP) - } - // Docker's IPAddress field is IPv4. The IPv6 address - // is stored in the GlobalIPv6Address field. - netface.IPv4 = ip - } - - rawMAC := strings.TrimSpace(iface["MacAddress"]) - if rawMAC != "" { - mac, err := net.ParseMAC(rawMAC) - if err != nil { - return nil, fmt.Errorf("invalid MAC: %q: %w", rawMAC, err) - } - netface.MAC = mac - } - - interfaces[name] = netface - } - - return interfaces, nil -} - // SandboxPid returns the PID to the sandbox process. func (d *Docker) SandboxPid() (int, error) { out, err := testutil.Command(d.logger, "docker", "inspect", "-f={{.State.Pid}}", d.Name).CombinedOutput() diff --git a/test/packetimpact/netdevs/BUILD b/test/packetimpact/netdevs/BUILD deleted file mode 100644 index 422bb9b0c..000000000 --- a/test/packetimpact/netdevs/BUILD +++ /dev/null @@ -1,15 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package( - licenses = ["notice"], -) - -go_library( - name = "netdevs", - srcs = ["netdevs.go"], - visibility = ["//test/packetimpact:__subpackages__"], - deps = [ - "//pkg/tcpip", - "//pkg/tcpip/header", - ], -) diff --git a/test/packetimpact/netdevs/netdevs.go b/test/packetimpact/netdevs/netdevs.go deleted file mode 100644 index d2c9cfeaf..000000000 --- a/test/packetimpact/netdevs/netdevs.go +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package netdevs contains utilities for working with network devices. -package netdevs - -import ( - "fmt" - "net" - "regexp" - "strings" - - "gvisor.dev/gvisor/pkg/tcpip" - "gvisor.dev/gvisor/pkg/tcpip/header" -) - -// A DeviceInfo represents a network device. -type DeviceInfo struct { - MAC net.HardwareAddr - IPv4Addr net.IP - IPv4Net *net.IPNet - IPv6Addr net.IP - IPv6Net *net.IPNet -} - -var ( - deviceLine = regexp.MustCompile(`^\s*\d+: (\w+)`) - linkLine = regexp.MustCompile(`^\s*link/\w+ ([0-9a-fA-F:]+)`) - inetLine = regexp.MustCompile(`^\s*inet ([0-9./]+)`) - inet6Line = regexp.MustCompile(`^\s*inet6 ([0-9a-fA-Z:/]+)`) -) - -// ParseDevices parses the output from `ip addr show` into a map from device -// name to information about the device. -func ParseDevices(cmdOutput string) (map[string]DeviceInfo, error) { - var currentDevice string - var currentInfo DeviceInfo - deviceInfos := make(map[string]DeviceInfo) - for _, line := range strings.Split(cmdOutput, "\n") { - if m := deviceLine.FindStringSubmatch(line); m != nil { - if currentDevice != "" { - deviceInfos[currentDevice] = currentInfo - } - currentInfo = DeviceInfo{} - currentDevice = m[1] - } else if m := linkLine.FindStringSubmatch(line); m != nil { - mac, err := net.ParseMAC(m[1]) - if err != nil { - return nil, err - } - currentInfo.MAC = mac - } else if m := inetLine.FindStringSubmatch(line); m != nil { - ipv4Addr, ipv4Net, err := net.ParseCIDR(m[1]) - if err != nil { - return nil, err - } - currentInfo.IPv4Addr = ipv4Addr - currentInfo.IPv4Net = ipv4Net - } else if m := inet6Line.FindStringSubmatch(line); m != nil { - ipv6Addr, ipv6Net, err := net.ParseCIDR(m[1]) - if err != nil { - return nil, err - } - currentInfo.IPv6Addr = ipv6Addr - currentInfo.IPv6Net = ipv6Net - } - } - if currentDevice != "" { - deviceInfos[currentDevice] = currentInfo - } - return deviceInfos, nil -} - -// MACToIP converts the MAC address to an IPv6 link local address as described -// in RFC 4291 page 20: https://tools.ietf.org/html/rfc4291#page-20 -func MACToIP(mac net.HardwareAddr) net.IP { - addr := make([]byte, header.IPv6AddressSize) - addr[0] = 0xfe - addr[1] = 0x80 - header.EthernetAdddressToModifiedEUI64IntoBuf(tcpip.LinkAddress(mac), addr[8:]) - return net.IP(addr) -} - -// FindDeviceByIP finds a DeviceInfo and device name from an IP address in the -// output of ParseDevices. -func FindDeviceByIP(ip net.IP, devices map[string]DeviceInfo) (string, DeviceInfo, error) { - for dev, info := range devices { - if info.IPv4Addr.Equal(ip) { - return dev, info, nil - } - } - return "", DeviceInfo{}, fmt.Errorf("can't find %s on any interface", ip) -} diff --git a/test/packetimpact/runner/BUILD b/test/packetimpact/runner/BUILD deleted file mode 100644 index 0b68a760a..000000000 --- a/test/packetimpact/runner/BUILD +++ /dev/null @@ -1,20 +0,0 @@ -load("//tools:defs.bzl", "go_test") - -package( - default_visibility = ["//test/packetimpact:__subpackages__"], - licenses = ["notice"], -) - -go_test( - name = "packetimpact_test", - srcs = ["packetimpact_test.go"], - tags = [ - # Not intended to be run directly. - "local", - "manual", - ], - deps = [ - "//pkg/test/dockerutil", - "//test/packetimpact/netdevs", - ], -) diff --git a/test/packetimpact/runner/defs.bzl b/test/packetimpact/runner/defs.bzl deleted file mode 100644 index ee2e7e974..000000000 --- a/test/packetimpact/runner/defs.bzl +++ /dev/null @@ -1,135 +0,0 @@ -"""Defines rules for packetimpact test targets.""" - -load("//tools:defs.bzl", "go_test") - -def _packetimpact_test_impl(ctx): - test_runner = ctx.executable._test_runner - bench = ctx.actions.declare_file("%s-bench" % ctx.label.name) - bench_content = "\n".join([ - "#!/bin/bash", - # This test will run part in a distinct user namespace. This can cause - # permission problems, because all runfiles may not be owned by the - # current user, and no other users will be mapped in that namespace. - # Make sure that everything is readable here. - "find . -type f -or -type d -exec chmod a+rx {} \\;", - "%s %s --testbench_binary %s $@\n" % ( - test_runner.short_path, - " ".join(ctx.attr.flags), - ctx.files.testbench_binary[0].short_path, - ), - ]) - ctx.actions.write(bench, bench_content, is_executable = True) - - transitive_files = depset() - if hasattr(ctx.attr._test_runner, "data_runfiles"): - transitive_files = depset(ctx.attr._test_runner.data_runfiles.files) - runfiles = ctx.runfiles( - files = [test_runner] + ctx.files.testbench_binary + ctx.files._posix_server_binary, - transitive_files = transitive_files, - collect_default = True, - collect_data = True, - ) - return [DefaultInfo(executable = bench, runfiles = runfiles)] - -_packetimpact_test = rule( - attrs = { - "_test_runner": attr.label( - executable = True, - cfg = "target", - default = ":packetimpact_test", - ), - "_posix_server_binary": attr.label( - cfg = "target", - default = "//test/packetimpact/dut:posix_server", - ), - "testbench_binary": attr.label( - cfg = "target", - mandatory = True, - ), - "flags": attr.string_list( - mandatory = False, - default = [], - ), - }, - test = True, - implementation = _packetimpact_test_impl, -) - -PACKETIMPACT_TAGS = ["local", "manual"] - -def packetimpact_linux_test( - name, - testbench_binary, - expect_failure = False, - **kwargs): - """Add a packetimpact test on linux. - - Args: - name: name of the test - testbench_binary: the testbench binary - **kwargs: all the other args, forwarded to _packetimpact_test - """ - expect_failure_flag = ["--expect_failure"] if expect_failure else [] - _packetimpact_test( - name = name + "_linux_test", - testbench_binary = testbench_binary, - flags = ["--dut_platform", "linux"] + expect_failure_flag, - tags = PACKETIMPACT_TAGS + ["packetimpact"], - **kwargs - ) - -def packetimpact_netstack_test( - name, - testbench_binary, - expect_failure = False, - **kwargs): - """Add a packetimpact test on netstack. - - Args: - name: name of the test - testbench_binary: the testbench binary - expect_failure: the test must fail - **kwargs: all the other args, forwarded to _packetimpact_test - """ - expect_failure_flag = [] - if expect_failure: - expect_failure_flag = ["--expect_failure"] - _packetimpact_test( - name = name + "_netstack_test", - testbench_binary = testbench_binary, - # This is the default runtime unless - # "--test_arg=--runtime=OTHER_RUNTIME" is used to override the value. - flags = ["--dut_platform", "netstack", "--runtime=runsc-d"] + expect_failure_flag, - tags = PACKETIMPACT_TAGS + ["packetimpact"], - **kwargs - ) - -def packetimpact_go_test(name, size = "small", pure = True, linux = True, netstack = True, **kwargs): - """Add packetimpact tests written in go. - - Args: - name: name of the test - size: size of the test - pure: make a static go binary - linux: generate a linux test - netstack: generate a netstack test - **kwargs: all the other args, forwarded to go_test - """ - testbench_binary = name + "_test" - go_test( - name = testbench_binary, - size = size, - pure = pure, - tags = PACKETIMPACT_TAGS, - **kwargs - ) - packetimpact_linux_test( - name = name, - expect_failure = not linux, - testbench_binary = testbench_binary, - ) - packetimpact_netstack_test( - name = name, - expect_failure = not netstack, - testbench_binary = testbench_binary, - ) diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go deleted file mode 100644 index 8996f23fa..000000000 --- a/test/packetimpact/runner/packetimpact_test.go +++ /dev/null @@ -1,315 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// The runner starts docker containers and networking for a packetimpact test. -package packetimpact_test - -import ( - "flag" - "fmt" - "log" - "math/rand" - "net" - "os" - "os/exec" - "path" - "strings" - "testing" - "time" - - "gvisor.dev/gvisor/pkg/test/dockerutil" - "gvisor.dev/gvisor/test/packetimpact/netdevs" -) - -// stringList implements flag.Value. -type stringList []string - -// String implements flag.Value.String. -func (l *stringList) String() string { - return strings.Join(*l, ",") -} - -// Set implements flag.Value.Set. -func (l *stringList) Set(value string) error { - *l = append(*l, value) - return nil -} - -var ( - dutPlatform = flag.String("dut_platform", "", "either \"linux\" or \"netstack\"") - testbenchBinary = flag.String("testbench_binary", "", "path to the testbench binary") - tshark = flag.Bool("tshark", false, "use more verbose tshark in logs instead of tcpdump") - extraTestArgs = stringList{} - expectFailure = flag.Bool("expect_failure", false, "expect that the test will fail when run") - - dutAddr = net.IPv4(0, 0, 0, 10) - testbenchAddr = net.IPv4(0, 0, 0, 20) -) - -const ctrlPort = "40000" - -// logger implements testutil.Logger. -// -// Labels logs based on their source and formats multi-line logs. -type logger string - -// Name implements testutil.Logger.Name. -func (l logger) Name() string { - return string(l) -} - -// Logf implements testutil.Logger.Logf. -func (l logger) Logf(format string, args ...interface{}) { - lines := strings.Split(fmt.Sprintf(format, args...), "\n") - log.Printf("%s: %s", l, lines[0]) - for _, line := range lines[1:] { - log.Printf("%*s %s", len(l), "", line) - } -} - -func TestOne(t *testing.T) { - flag.Var(&extraTestArgs, "extra_test_arg", "extra arguments to pass to the testbench") - flag.Parse() - if *dutPlatform != "linux" && *dutPlatform != "netstack" { - t.Fatal("--dut_platform should be either linux or netstack") - } - if *testbenchBinary == "" { - t.Fatal("--testbench_binary is missing") - } - if *dutPlatform == "netstack" { - if _, err := dockerutil.RuntimePath(); err != nil { - t.Fatal("--runtime is missing or invalid with --dut_platform=netstack:", err) - } - } - dockerutil.EnsureSupportedDockerVersion() - - // Create the networks needed for the test. One control network is needed for - // the gRPC control packets and one test network on which to transmit the test - // packets. - ctrlNet := dockerutil.NewDockerNetwork(logger("ctrlNet")) - testNet := dockerutil.NewDockerNetwork(logger("testNet")) - for _, dn := range []*dockerutil.DockerNetwork{ctrlNet, testNet} { - for { - if err := createDockerNetwork(dn); err != nil { - t.Log("creating docker network:", err) - const wait = 100 * time.Millisecond - t.Logf("sleeping %s and will try creating docker network again", wait) - // This can fail if another docker network claimed the same IP so we'll - // just try again. - time.Sleep(wait) - continue - } - break - } - defer func(dn *dockerutil.DockerNetwork) { - if err := dn.Cleanup(); err != nil { - t.Errorf("unable to cleanup container %s: %s", dn.Name, err) - } - }(dn) - } - - runOpts := dockerutil.RunOpts{ - Image: "packetimpact", - CapAdd: []string{"NET_ADMIN"}, - Extra: []string{"--sysctl", "net.ipv6.conf.all.disable_ipv6=0", "--rm"}, - Foreground: true, - Pty: func(_ *exec.Cmd, _ *os.File) {}, - } - - // Create the Docker container for the DUT. - dut := dockerutil.MakeDocker(logger("dut")) - if *dutPlatform == "linux" { - dut.Runtime = "" - } - - const containerPosixServerBinary = "/packetimpact/posix_server" - dut.CopyFiles("/packetimpact", "/test/packetimpact/dut/posix_server") - - if err := dut.Create(runOpts, containerPosixServerBinary, "--ip=0.0.0.0", "--port="+ctrlPort); err != nil { - t.Fatalf("unable to create container %s: %s", dut.Name, err) - } - defer dut.CleanUp() - - // Add ctrlNet as eth1 and testNet as eth2. - const testNetDev = "eth2" - if err := addNetworks(dut, dutAddr, []*dockerutil.DockerNetwork{ctrlNet, testNet}); err != nil { - t.Fatal(err) - } - - if err := dut.Start(); err != nil { - t.Fatalf("unable to start container %s: %s", dut.Name, err) - } - - if _, err := dut.WaitForOutput("Server listening.*\n", 60*time.Second); err != nil { - t.Fatalf("%s on container %s never listened: %s", containerPosixServerBinary, dut.Name, err) - } - - dutTestDevice, dutDeviceInfo, err := deviceByIP(dut, addressInSubnet(dutAddr, *testNet.Subnet)) - if err != nil { - t.Fatal(err) - } - - remoteMAC := dutDeviceInfo.MAC - remoteIPv6 := dutDeviceInfo.IPv6Addr - // Netstack as DUT doesn't assign IPv6 addresses automatically so do it if - // needed. - if remoteIPv6 == nil { - if _, err := dut.Exec(dockerutil.RunOpts{}, "ip", "addr", "add", netdevs.MACToIP(remoteMAC).String(), "scope", "link", "dev", dutTestDevice); err != nil { - t.Fatalf("unable to ip addr add on container %s: %s", dut.Name, err) - } - // Now try again, to make sure that it worked. - _, dutDeviceInfo, err = deviceByIP(dut, addressInSubnet(dutAddr, *testNet.Subnet)) - if err != nil { - t.Fatal(err) - } - remoteIPv6 = dutDeviceInfo.IPv6Addr - if remoteIPv6 == nil { - t.Fatal("unable to set IPv6 address on container", dut.Name) - } - } - - // Create the Docker container for the testbench. - testbench := dockerutil.MakeDocker(logger("testbench")) - testbench.Runtime = "" // The testbench always runs on Linux. - - tbb := path.Base(*testbenchBinary) - containerTestbenchBinary := "/packetimpact/" + tbb - testbench.CopyFiles("/packetimpact", "/test/packetimpact/tests/"+tbb) - - // Run tcpdump in the test bench unbuffered, without DNS resolution, just on - // the interface with the test packets. - snifferArgs := []string{ - "tcpdump", "-S", "-vvv", "-U", "-n", "-i", testNetDev, - } - snifferRegex := "tcpdump: listening.*\n" - if *tshark { - // Run tshark in the test bench unbuffered, without DNS resolution, just on - // the interface with the test packets. - snifferArgs = []string{ - "tshark", "-V", "-l", "-n", "-i", testNetDev, - "-o", "tcp.check_checksum:TRUE", - "-o", "udp.check_checksum:TRUE", - } - snifferRegex = "Capturing on.*\n" - } - - if err := testbench.Create(runOpts, snifferArgs...); err != nil { - t.Fatalf("unable to create container %s: %s", testbench.Name, err) - } - defer testbench.CleanUp() - - // Add ctrlNet as eth1 and testNet as eth2. - if err := addNetworks(testbench, testbenchAddr, []*dockerutil.DockerNetwork{ctrlNet, testNet}); err != nil { - t.Fatal(err) - } - - if err := testbench.Start(); err != nil { - t.Fatalf("unable to start container %s: %s", testbench.Name, err) - } - - // Kill so that it will flush output. - defer testbench.Exec(dockerutil.RunOpts{}, "killall", snifferArgs[0]) - - if _, err := testbench.WaitForOutput(snifferRegex, 60*time.Second); err != nil { - t.Fatalf("sniffer on %s never listened: %s", dut.Name, err) - } - - // Because the Linux kernel receives the SYN-ACK but didn't send the SYN it - // will issue a RST. To prevent this IPtables can be used to filter out all - // incoming packets. The raw socket that packetimpact tests use will still see - // everything. - if _, err := testbench.Exec(dockerutil.RunOpts{}, "iptables", "-A", "INPUT", "-i", testNetDev, "-j", "DROP"); err != nil { - t.Fatalf("unable to Exec iptables on container %s: %s", testbench.Name, err) - } - - // FIXME(b/156449515): Some piece of the system has a race. The old - // bash script version had a sleep, so we have one too. The race should - // be fixed and this sleep removed. - time.Sleep(time.Second) - - // Start a packetimpact test on the test bench. The packetimpact test sends - // and receives packets and also sends POSIX socket commands to the - // posix_server to be executed on the DUT. - testArgs := []string{containerTestbenchBinary} - testArgs = append(testArgs, extraTestArgs...) - testArgs = append(testArgs, - "--posix_server_ip", addressInSubnet(dutAddr, *ctrlNet.Subnet).String(), - "--posix_server_port", ctrlPort, - "--remote_ipv4", addressInSubnet(dutAddr, *testNet.Subnet).String(), - "--local_ipv4", addressInSubnet(testbenchAddr, *testNet.Subnet).String(), - "--remote_ipv6", remoteIPv6.String(), - "--remote_mac", remoteMAC.String(), - "--device", testNetDev, - ) - _, err = testbench.Exec(dockerutil.RunOpts{}, testArgs...) - if !*expectFailure && err != nil { - t.Fatal("test failed:", err) - } - if *expectFailure && err == nil { - t.Fatal("test failure expected but the test succeeded, enable the test and mark the corresponding bug as fixed") - } -} - -func addNetworks(d *dockerutil.Docker, addr net.IP, networks []*dockerutil.DockerNetwork) error { - for _, dn := range networks { - ip := addressInSubnet(addr, *dn.Subnet) - // Connect to the network with the specified IP address. - if err := dn.Connect(d, "--ip", ip.String()); err != nil { - return fmt.Errorf("unable to connect container %s to network %s: %w", d.Name, dn.Name, err) - } - } - return nil -} - -// addressInSubnet combines the subnet provided with the address and returns a -// new address. The return address bits come from the subnet where the mask is 1 -// and from the ip address where the mask is 0. -func addressInSubnet(addr net.IP, subnet net.IPNet) net.IP { - var octets []byte - for i := 0; i < 4; i++ { - octets = append(octets, (subnet.IP.To4()[i]&subnet.Mask[i])+(addr.To4()[i]&(^subnet.Mask[i]))) - } - return net.IP(octets) -} - -// makeDockerNetwork makes a randomly-named network that will start with the -// namePrefix. The network will be a random /24 subnet. -func createDockerNetwork(n *dockerutil.DockerNetwork) error { - randSource := rand.NewSource(time.Now().UnixNano()) - r1 := rand.New(randSource) - // Class C, 192.0.0.0 to 223.255.255.255, transitionally has mask 24. - ip := net.IPv4(byte(r1.Intn(224-192)+192), byte(r1.Intn(256)), byte(r1.Intn(256)), 0) - n.Subnet = &net.IPNet{ - IP: ip, - Mask: ip.DefaultMask(), - } - return n.Create() -} - -// deviceByIP finds a deviceInfo and device name from an IP address. -func deviceByIP(d *dockerutil.Docker, ip net.IP) (string, netdevs.DeviceInfo, error) { - out, err := d.Exec(dockerutil.RunOpts{}, "ip", "addr", "show") - if err != nil { - return "", netdevs.DeviceInfo{}, fmt.Errorf("listing devices on %s container: %w", d.Name, err) - } - devs, err := netdevs.ParseDevices(out) - if err != nil { - return "", netdevs.DeviceInfo{}, fmt.Errorf("parsing devices from %s container: %w", d.Name, err) - } - testDevice, deviceInfo, err := netdevs.FindDeviceByIP(ip, devs) - if err != nil { - return "", netdevs.DeviceInfo{}, fmt.Errorf("can't find deviceInfo for container %s: %w", d.Name, err) - } - return testDevice, deviceInfo, nil -} diff --git a/test/packetimpact/testbench/BUILD b/test/packetimpact/testbench/BUILD index d588d3289..fed51006f 100644 --- a/test/packetimpact/testbench/BUILD +++ b/test/packetimpact/testbench/BUILD @@ -21,7 +21,6 @@ go_library( "//pkg/tcpip/header", "//pkg/tcpip/seqnum", "//pkg/usermem", - "//test/packetimpact/netdevs", "//test/packetimpact/proto:posix_server_go_proto", "@com_github_google_go-cmp//cmp:go_default_library", "@com_github_google_go-cmp//cmp/cmpopts:go_default_library", diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index bf104e5ca..463fd0556 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -114,12 +114,12 @@ var _ layerState = (*etherState)(nil) func newEtherState(out, in Ether) (*etherState, error) { lMAC, err := tcpip.ParseMACAddress(LocalMAC) if err != nil { - return nil, fmt.Errorf("parsing local MAC: %q: %w", LocalMAC, err) + return nil, err } rMAC, err := tcpip.ParseMACAddress(RemoteMAC) if err != nil { - return nil, fmt.Errorf("parsing remote MAC: %q: %w", RemoteMAC, err) + return nil, err } s := etherState{ out: Ether{SrcAddr: &lMAC, DstAddr: &rMAC}, diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go index b919a3c2e..a78b7d7ee 100644 --- a/test/packetimpact/testbench/dut.go +++ b/test/packetimpact/testbench/dut.go @@ -16,7 +16,6 @@ package testbench import ( "context" - "flag" "net" "strconv" "syscall" @@ -38,11 +37,6 @@ type DUT struct { // NewDUT creates a new connection with the DUT over gRPC. func NewDUT(t *testing.T) DUT { - flag.Parse() - if err := genPseudoFlags(); err != nil { - t.Fatal("generating psuedo flags:", err) - } - posixServerAddress := POSIXServerIP + ":" + strconv.Itoa(POSIXServerPort) conn, err := grpc.Dial(posixServerAddress, grpc.WithInsecure(), grpc.WithKeepaliveParams(keepalive.ClientParameters{Timeout: RPCKeepalive})) if err != nil { diff --git a/test/packetimpact/testbench/rawsockets.go b/test/packetimpact/testbench/rawsockets.go index 278229b7e..4665f60b2 100644 --- a/test/packetimpact/testbench/rawsockets.go +++ b/test/packetimpact/testbench/rawsockets.go @@ -16,6 +16,7 @@ package testbench import ( "encoding/binary" + "flag" "fmt" "math" "net" @@ -40,6 +41,7 @@ func htons(x uint16) uint16 { // NewSniffer creates a Sniffer connected to *device. func NewSniffer(t *testing.T) (Sniffer, error) { + flag.Parse() snifferFd, err := unix.Socket(unix.AF_PACKET, unix.SOCK_RAW, int(htons(unix.ETH_P_ALL))) if err != nil { return Sniffer{}, err @@ -134,6 +136,7 @@ type Injector struct { // NewInjector creates a new injector on *device. func NewInjector(t *testing.T) (Injector, error) { + flag.Parse() ifInfo, err := net.InterfaceByName(Device) if err != nil { return Injector{}, err diff --git a/test/packetimpact/testbench/testbench.go b/test/packetimpact/testbench/testbench.go index 4de2aa1d3..a1242b189 100644 --- a/test/packetimpact/testbench/testbench.go +++ b/test/packetimpact/testbench/testbench.go @@ -16,12 +16,7 @@ package testbench import ( "flag" - "fmt" - "net" - "os/exec" "time" - - "gvisor.dev/gvisor/test/packetimpact/netdevs" ) var ( @@ -60,31 +55,9 @@ func RegisterFlags(fs *flag.FlagSet) { fs.DurationVar(&RPCKeepalive, "rpc_keepalive", RPCKeepalive, "gRPC keepalive") fs.StringVar(&LocalIPv4, "local_ipv4", LocalIPv4, "local IPv4 address for test packets") fs.StringVar(&RemoteIPv4, "remote_ipv4", RemoteIPv4, "remote IPv4 address for test packets") + fs.StringVar(&LocalIPv6, "local_ipv6", LocalIPv6, "local IPv6 address for test packets") fs.StringVar(&RemoteIPv6, "remote_ipv6", RemoteIPv6, "remote IPv6 address for test packets") + fs.StringVar(&LocalMAC, "local_mac", LocalMAC, "local mac address for test packets") fs.StringVar(&RemoteMAC, "remote_mac", RemoteMAC, "remote mac address for test packets") fs.StringVar(&Device, "device", Device, "local device for test packets") } - -// genPseudoFlags populates flag-like global config based on real flags. -// -// genPseudoFlags must only be called after flag.Parse. -func genPseudoFlags() error { - out, err := exec.Command("ip", "addr", "show").CombinedOutput() - if err != nil { - return fmt.Errorf("listing devices: %q: %w", string(out), err) - } - devs, err := netdevs.ParseDevices(string(out)) - if err != nil { - return fmt.Errorf("parsing devices: %w", err) - } - - _, deviceInfo, err := netdevs.FindDeviceByIP(net.ParseIP(LocalIPv4), devs) - if err != nil { - return fmt.Errorf("can't find deviceInfo: %w", err) - } - - LocalMAC = deviceInfo.MAC.String() - LocalIPv6 = deviceInfo.IPv6Addr.String() - - return nil -} diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 15e1c3f04..c25b3b8c1 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -1,4 +1,4 @@ -load("//test/packetimpact/runner:defs.bzl", "packetimpact_go_test") +load("defs.bzl", "packetimpact_go_test") package( default_visibility = ["//test/packetimpact:__subpackages__"], @@ -156,3 +156,8 @@ packetimpact_go_test( "@org_golang_x_sys//unix:go_default_library", ], ) + +sh_binary( + name = "test_runner", + srcs = ["test_runner.sh"], +) diff --git a/test/packetimpact/tests/defs.bzl b/test/packetimpact/tests/defs.bzl new file mode 100644 index 000000000..27c5de375 --- /dev/null +++ b/test/packetimpact/tests/defs.bzl @@ -0,0 +1,137 @@ +"""Defines rules for packetimpact test targets.""" + +load("//tools:defs.bzl", "go_test") + +def _packetimpact_test_impl(ctx): + test_runner = ctx.executable._test_runner + bench = ctx.actions.declare_file("%s-bench" % ctx.label.name) + bench_content = "\n".join([ + "#!/bin/bash", + # This test will run part in a distinct user namespace. This can cause + # permission problems, because all runfiles may not be owned by the + # current user, and no other users will be mapped in that namespace. + # Make sure that everything is readable here. + "find . -type f -exec chmod a+rx {} \\;", + "find . -type d -exec chmod a+rx {} \\;", + "%s %s --posix_server_binary %s --testbench_binary %s $@\n" % ( + test_runner.short_path, + " ".join(ctx.attr.flags), + ctx.files._posix_server_binary[0].short_path, + ctx.files.testbench_binary[0].short_path, + ), + ]) + ctx.actions.write(bench, bench_content, is_executable = True) + + transitive_files = depset() + if hasattr(ctx.attr._test_runner, "data_runfiles"): + transitive_files = depset(ctx.attr._test_runner.data_runfiles.files) + runfiles = ctx.runfiles( + files = [test_runner] + ctx.files.testbench_binary + ctx.files._posix_server_binary, + transitive_files = transitive_files, + collect_default = True, + collect_data = True, + ) + return [DefaultInfo(executable = bench, runfiles = runfiles)] + +_packetimpact_test = rule( + attrs = { + "_test_runner": attr.label( + executable = True, + cfg = "target", + default = ":test_runner", + ), + "_posix_server_binary": attr.label( + cfg = "target", + default = "//test/packetimpact/dut:posix_server", + ), + "testbench_binary": attr.label( + cfg = "target", + mandatory = True, + ), + "flags": attr.string_list( + mandatory = False, + default = [], + ), + }, + test = True, + implementation = _packetimpact_test_impl, +) + +PACKETIMPACT_TAGS = ["local", "manual"] + +def packetimpact_linux_test( + name, + testbench_binary, + expect_failure = False, + **kwargs): + """Add a packetimpact test on linux. + + Args: + name: name of the test + testbench_binary: the testbench binary + **kwargs: all the other args, forwarded to _packetimpact_test + """ + expect_failure_flag = ["--expect_failure"] if expect_failure else [] + _packetimpact_test( + name = name + "_linux_test", + testbench_binary = testbench_binary, + flags = ["--dut_platform", "linux"] + expect_failure_flag, + tags = PACKETIMPACT_TAGS + ["packetimpact"], + **kwargs + ) + +def packetimpact_netstack_test( + name, + testbench_binary, + expect_failure = False, + **kwargs): + """Add a packetimpact test on netstack. + + Args: + name: name of the test + testbench_binary: the testbench binary + expect_failure: the test must fail + **kwargs: all the other args, forwarded to _packetimpact_test + """ + expect_failure_flag = [] + if expect_failure: + expect_failure_flag = ["--expect_failure"] + _packetimpact_test( + name = name + "_netstack_test", + testbench_binary = testbench_binary, + # This is the default runtime unless + # "--test_arg=--runtime=OTHER_RUNTIME" is used to override the value. + flags = ["--dut_platform", "netstack", "--runtime=runsc-d"] + expect_failure_flag, + tags = PACKETIMPACT_TAGS + ["packetimpact"], + **kwargs + ) + +def packetimpact_go_test(name, size = "small", pure = True, linux = True, netstack = True, **kwargs): + """Add packetimpact tests written in go. + + Args: + name: name of the test + size: size of the test + pure: make a static go binary + linux: generate a linux test + netstack: generate a netstack test + **kwargs: all the other args, forwarded to go_test + """ + testbench_binary = name + "_test" + go_test( + name = testbench_binary, + size = size, + pure = pure, + tags = PACKETIMPACT_TAGS, + **kwargs + ) + packetimpact_linux_test( + name = name, + expect_failure = not linux, + testbench_binary = testbench_binary, + ) + packetimpact_netstack_test( + name = name, + expect_failure = not netstack, + testbench_binary = testbench_binary, + ) diff --git a/test/packetimpact/tests/test_runner.sh b/test/packetimpact/tests/test_runner.sh new file mode 100755 index 000000000..706441cce --- /dev/null +++ b/test/packetimpact/tests/test_runner.sh @@ -0,0 +1,325 @@ +#!/bin/bash + +# Copyright 2020 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Run a packetimpact test. Two docker containers are made, one for the +# Device-Under-Test (DUT) and one for the test bench. Each is attached with +# two networks, one for control packets that aid the test and one for test +# packets which are sent as part of the test and observed for correctness. + +set -euxo pipefail + +function failure() { + local lineno=$1 + local msg=$2 + local filename="$0" + echo "FAIL: $filename:$lineno: $msg" +} +trap 'failure ${LINENO} "$BASH_COMMAND"' ERR + +declare -r LONGOPTS="dut_platform:,posix_server_binary:,testbench_binary:,runtime:,tshark,extra_test_arg:,expect_failure" + +# Don't use declare below so that the error from getopt will end the script. +PARSED=$(getopt --options "" --longoptions=$LONGOPTS --name "$0" -- "$@") + +eval set -- "$PARSED" + +declare -a EXTRA_TEST_ARGS + +while true; do + case "$1" in + --dut_platform) + # Either "linux" or "netstack". + declare -r DUT_PLATFORM="$2" + shift 2 + ;; + --posix_server_binary) + declare -r POSIX_SERVER_BINARY="$2" + shift 2 + ;; + --testbench_binary) + declare -r TESTBENCH_BINARY="$2" + shift 2 + ;; + --runtime) + # Not readonly because there might be multiple --runtime arguments and we + # want to use just the last one. Only used if --dut_platform is + # "netstack". + declare RUNTIME="$2" + shift 2 + ;; + --tshark) + declare -r TSHARK="1" + shift 1 + ;; + --extra_test_arg) + EXTRA_TEST_ARGS+="$2" + shift 2 + ;; + --expect_failure) + declare -r EXPECT_FAILURE="1" + shift 1 + ;; + --) + shift + break + ;; + *) + echo "Programming error" + exit 3 + esac +done + +# All the other arguments are scripts. +declare -r scripts="$@" + +# Check that the required flags are defined in a way that is safe for "set -u". +if [[ "${DUT_PLATFORM-}" == "netstack" ]]; then + if [[ -z "${RUNTIME-}" ]]; then + echo "FAIL: Missing --runtime argument: ${RUNTIME-}" + exit 2 + fi + declare -r RUNTIME_ARG="--runtime ${RUNTIME}" +elif [[ "${DUT_PLATFORM-}" == "linux" ]]; then + declare -r RUNTIME_ARG="" +else + echo "FAIL: Bad or missing --dut_platform argument: ${DUT_PLATFORM-}" + exit 2 +fi +if [[ ! -f "${POSIX_SERVER_BINARY-}" ]]; then + echo "FAIL: Bad or missing --posix_server_binary: ${POSIX_SERVER-}" + exit 2 +fi +if [[ ! -f "${TESTBENCH_BINARY-}" ]]; then + echo "FAIL: Bad or missing --testbench_binary: ${TESTBENCH_BINARY-}" + exit 2 +fi + +function new_net_prefix() { + # Class C, 192.0.0.0 to 223.255.255.255, transitionally has mask 24. + echo "$(shuf -i 192-223 -n 1).$(shuf -i 0-255 -n 1).$(shuf -i 0-255 -n 1)" +} + +# Variables specific to the control network and interface start with CTRL_. +# Variables specific to the test network and interface start with TEST_. +# Variables specific to the DUT start with DUT_. +# Variables specific to the test bench start with TESTBENCH_. +# Use random numbers so that test networks don't collide. +declare CTRL_NET="ctrl_net-${RANDOM}${RANDOM}" +declare CTRL_NET_PREFIX=$(new_net_prefix) +declare TEST_NET="test_net-${RANDOM}${RANDOM}" +declare TEST_NET_PREFIX=$(new_net_prefix) +# On both DUT and test bench, testing packets are on the eth2 interface. +declare -r TEST_DEVICE="eth2" +# Number of bits in the *_NET_PREFIX variables. +declare -r NET_MASK="24" +# Last bits of the DUT's IP address. +declare -r DUT_NET_SUFFIX=".10" +# Control port. +declare -r CTRL_PORT="40000" +# Last bits of the test bench's IP address. +declare -r TESTBENCH_NET_SUFFIX=".20" +declare -r TIMEOUT="60" +declare -r IMAGE_TAG="gcr.io/gvisor-presubmit/packetimpact" + +# Make sure that docker is installed. +docker --version + +function finish { + local cleanup_success=1 + + if [[ -z "${TSHARK-}" ]]; then + # Kill tcpdump so that it will flush output. + docker exec -t "${TESTBENCH}" \ + killall tcpdump || \ + cleanup_success=0 + else + # Kill tshark so that it will flush output. + docker exec -t "${TESTBENCH}" \ + killall tshark || \ + cleanup_success=0 + fi + + for net in "${CTRL_NET}" "${TEST_NET}"; do + # Kill all processes attached to ${net}. + for docker_command in "kill" "rm"; do + (docker network inspect "${net}" \ + --format '{{range $key, $value := .Containers}}{{$key}} {{end}}' \ + | xargs -r docker "${docker_command}") || \ + cleanup_success=0 + done + # Remove the network. + docker network rm "${net}" || \ + cleanup_success=0 + done + + if ((!$cleanup_success)); then + echo "FAIL: Cleanup command failed" + exit 4 + fi +} +trap finish EXIT + +# Subnet for control packets between test bench and DUT. +while ! docker network create \ + "--subnet=${CTRL_NET_PREFIX}.0/${NET_MASK}" "${CTRL_NET}"; do + sleep 0.1 + CTRL_NET_PREFIX=$(new_net_prefix) + CTRL_NET="ctrl_net-${RANDOM}${RANDOM}" +done + +# Subnet for the packets that are part of the test. +while ! docker network create \ + "--subnet=${TEST_NET_PREFIX}.0/${NET_MASK}" "${TEST_NET}"; do + sleep 0.1 + TEST_NET_PREFIX=$(new_net_prefix) + TEST_NET="test_net-${RANDOM}${RANDOM}" +done + +docker pull "${IMAGE_TAG}" + +# Create the DUT container and connect to network. +DUT=$(docker create ${RUNTIME_ARG} --privileged --rm \ + --cap-add NET_ADMIN \ + --sysctl net.ipv6.conf.all.disable_ipv6=0 \ + --stop-timeout ${TIMEOUT} -it ${IMAGE_TAG}) +docker network connect "${CTRL_NET}" \ + --ip "${CTRL_NET_PREFIX}${DUT_NET_SUFFIX}" "${DUT}" \ + || (docker kill ${DUT}; docker rm ${DUT}; false) +docker network connect "${TEST_NET}" \ + --ip "${TEST_NET_PREFIX}${DUT_NET_SUFFIX}" "${DUT}" \ + || (docker kill ${DUT}; docker rm ${DUT}; false) +docker start "${DUT}" + +# Create the test bench container and connect to network. +TESTBENCH=$(docker create --privileged --rm \ + --cap-add NET_ADMIN \ + --sysctl net.ipv6.conf.all.disable_ipv6=0 \ + --stop-timeout ${TIMEOUT} -it ${IMAGE_TAG}) +docker network connect "${CTRL_NET}" \ + --ip "${CTRL_NET_PREFIX}${TESTBENCH_NET_SUFFIX}" "${TESTBENCH}" \ + || (docker kill ${TESTBENCH}; docker rm ${TESTBENCH}; false) +docker network connect "${TEST_NET}" \ + --ip "${TEST_NET_PREFIX}${TESTBENCH_NET_SUFFIX}" "${TESTBENCH}" \ + || (docker kill ${TESTBENCH}; docker rm ${TESTBENCH}; false) +docker start "${TESTBENCH}" + +# Start the posix_server in the DUT. +declare -r DOCKER_POSIX_SERVER_BINARY="/$(basename ${POSIX_SERVER_BINARY})" +docker cp -L ${POSIX_SERVER_BINARY} "${DUT}:${DOCKER_POSIX_SERVER_BINARY}" + +docker exec -t "${DUT}" \ + /bin/bash -c "${DOCKER_POSIX_SERVER_BINARY} \ + --ip ${CTRL_NET_PREFIX}${DUT_NET_SUFFIX} \ + --port ${CTRL_PORT}" & + +# Because the Linux kernel receives the SYN-ACK but didn't send the SYN it will +# issue a RST. To prevent this IPtables can be used to filter those out. +docker exec "${TESTBENCH}" \ + iptables -A INPUT -i ${TEST_DEVICE} -j DROP + +# Wait for the DUT server to come up. Attempt to connect to it from the test +# bench every 100 milliseconds until success. +while ! docker exec "${TESTBENCH}" \ + nc -zv "${CTRL_NET_PREFIX}${DUT_NET_SUFFIX}" "${CTRL_PORT}"; do + sleep 0.1 +done + +declare -r REMOTE_MAC=$(docker exec -t "${DUT}" ip link show \ + "${TEST_DEVICE}" | tail -1 | cut -d' ' -f6) +declare -r LOCAL_MAC=$(docker exec -t "${TESTBENCH}" ip link show \ + "${TEST_DEVICE}" | tail -1 | cut -d' ' -f6) +declare REMOTE_IPV6=$(docker exec -t "${DUT}" ip addr show scope link \ + "${TEST_DEVICE}" | grep inet6 | cut -d' ' -f6 | cut -d'/' -f1) +declare -r LOCAL_IPV6=$(docker exec -t "${TESTBENCH}" ip addr show scope link \ + "${TEST_DEVICE}" | grep inet6 | cut -d' ' -f6 | cut -d'/' -f1) + +# Netstack as DUT doesn't assign IPv6 addresses automatically so do it if +# needed. Convert the MAC address to an IPv6 link local address as described in +# RFC 4291 page 20: https://tools.ietf.org/html/rfc4291#page-20 +if [[ -z "${REMOTE_IPV6}" ]]; then + # Split the octets of the MAC into an array of strings. + IFS=":" read -a REMOTE_OCTETS <<< "${REMOTE_MAC}" + # Flip the global bit. + REMOTE_OCTETS[0]=$(printf '%x' "$((0x${REMOTE_OCTETS[0]} ^ 2))") + # Add the IPv6 address. + docker exec "${DUT}" \ + ip addr add $(printf 'fe80::%02x%02x:%02xff:fe%02x:%02x%02x/64' \ + "0x${REMOTE_OCTETS[0]}" "0x${REMOTE_OCTETS[1]}" "0x${REMOTE_OCTETS[2]}" \ + "0x${REMOTE_OCTETS[3]}" "0x${REMOTE_OCTETS[4]}" "0x${REMOTE_OCTETS[5]}") \ + scope link \ + dev "${TEST_DEVICE}" + # Re-extract the IPv6 address. + # TODO(eyalsoha): Add "scope link" below when netstack supports correctly + # creating link-local IPv6 addresses. + REMOTE_IPV6=$(docker exec -t "${DUT}" ip addr show \ + "${TEST_DEVICE}" | grep inet6 | cut -d' ' -f6 | cut -d'/' -f1) +fi + +declare -r DOCKER_TESTBENCH_BINARY="/$(basename ${TESTBENCH_BINARY})" +docker cp -L "${TESTBENCH_BINARY}" "${TESTBENCH}:${DOCKER_TESTBENCH_BINARY}" + +if [[ -z "${TSHARK-}" ]]; then + # Run tcpdump in the test bench unbuffered, without dns resolution, just on + # the interface with the test packets. + docker exec -t "${TESTBENCH}" \ + tcpdump -S -vvv -U -n -i "${TEST_DEVICE}" \ + net "${TEST_NET_PREFIX}/24" or \ + host "${REMOTE_IPV6}" or \ + host "${LOCAL_IPV6}" & +else + # Run tshark in the test bench unbuffered, without dns resolution, just on the + # interface with the test packets. + docker exec -t "${TESTBENCH}" \ + tshark -V -l -n -i "${TEST_DEVICE}" \ + -o tcp.check_checksum:TRUE \ + -o udp.check_checksum:TRUE \ + net "${TEST_NET_PREFIX}/24" or \ + host "${REMOTE_IPV6}" or \ + host "${LOCAL_IPV6}" & +fi + +# tcpdump and tshark take time to startup +sleep 3 + +# Start a packetimpact test on the test bench. The packetimpact test sends and +# receives packets and also sends POSIX socket commands to the posix_server to +# be executed on the DUT. +docker exec \ + -e XML_OUTPUT_FILE="/test.xml" \ + -e TEST_TARGET \ + -t "${TESTBENCH}" \ + /bin/bash -c "${DOCKER_TESTBENCH_BINARY} \ + ${EXTRA_TEST_ARGS[@]-} \ + --posix_server_ip=${CTRL_NET_PREFIX}${DUT_NET_SUFFIX} \ + --posix_server_port=${CTRL_PORT} \ + --remote_ipv4=${TEST_NET_PREFIX}${DUT_NET_SUFFIX} \ + --local_ipv4=${TEST_NET_PREFIX}${TESTBENCH_NET_SUFFIX} \ + --remote_ipv6=${REMOTE_IPV6} \ + --local_ipv6=${LOCAL_IPV6} \ + --remote_mac=${REMOTE_MAC} \ + --local_mac=${LOCAL_MAC} \ + --device=${TEST_DEVICE}" && true +declare -r TEST_RESULT="${?}" +if [[ -z "${EXPECT_FAILURE-}" && "${TEST_RESULT}" != 0 ]]; then + echo 'FAIL: This test was expected to pass.' + exit ${TEST_RESULT} +fi +if [[ ! -z "${EXPECT_FAILURE-}" && "${TEST_RESULT}" == 0 ]]; then + echo 'FAIL: This test was expected to fail but passed. Enable the test and' \ + 'mark the corresponding bug as fixed.' + exit 1 +fi +echo PASS: No errors. -- cgit v1.2.3 From db655f020ea556524f0e341e538e81c16d4f95e7 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Wed, 13 May 2020 17:35:04 -0700 Subject: Resolve remaining TODOs for tmpfs. Closes #1197 PiperOrigin-RevId: 311438223 --- pkg/sentry/fsimpl/tmpfs/BUILD | 1 + pkg/sentry/fsimpl/tmpfs/filesystem.go | 25 +++-- pkg/sentry/fsimpl/tmpfs/regular_file_test.go | 136 ----------------------- pkg/sentry/fsimpl/tmpfs/stat_test.go | 2 - pkg/sentry/fsimpl/tmpfs/tmpfs_test.go | 156 +++++++++++++++++++++++++++ test/syscalls/linux/BUILD | 1 + test/syscalls/linux/socket.cc | 9 +- test/syscalls/linux/symlink.cc | 25 +++++ 8 files changed, 208 insertions(+), 147 deletions(-) create mode 100644 pkg/sentry/fsimpl/tmpfs/tmpfs_test.go (limited to 'test') diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD index a2d9649e7..9a076ad71 100644 --- a/pkg/sentry/fsimpl/tmpfs/BUILD +++ b/pkg/sentry/fsimpl/tmpfs/BUILD @@ -96,6 +96,7 @@ go_test( "pipe_test.go", "regular_file_test.go", "stat_test.go", + "tmpfs_test.go", ], library = ":tmpfs", deps = [ diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 36ffcb592..e0ad82769 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -16,6 +16,7 @@ package tmpfs import ( "fmt" + "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" @@ -24,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // Sync implements vfs.FilesystemImpl.Sync. @@ -76,8 +78,8 @@ afterSymlink: return nil, err } if symlink, ok := child.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() { - // TODO(gvisor.dev/issue/1197): Symlink traversals updates - // access time. + // Symlink traversal updates access time. + atomic.StoreInt64(&d.inode.atime, d.inode.fs.clock.Now().Nanoseconds()) if err := rp.HandleSymlink(symlink.target); err != nil { return nil, err } @@ -361,8 +363,8 @@ afterTrailingSymlink: } // Do we need to resolve a trailing symlink? if symlink, ok := child.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() { - // TODO(gvisor.dev/issue/1197): Symlink traversals updates - // access time. + // Symlink traversal updates access time. + atomic.StoreInt64(&child.inode.atime, child.inode.fs.clock.Now().Nanoseconds()) if err := rp.HandleSymlink(symlink.target); err != nil { return nil, err } @@ -636,12 +638,19 @@ func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { fs.mu.RLock() defer fs.mu.RUnlock() - _, err := resolveLocked(rp) - if err != nil { + if _, err := resolveLocked(rp); err != nil { return linux.Statfs{}, err } - // TODO(gvisor.dev/issue/1197): Actually implement statfs. - return linux.Statfs{}, syserror.ENOSYS + statfs := linux.Statfs{ + Type: linux.TMPFS_MAGIC, + BlockSize: usermem.PageSize, + FragmentSize: usermem.PageSize, + NameLength: linux.NAME_MAX, + // TODO(b/29637826): Allow configuring a tmpfs size and enforce it. + Blocks: 0, + BlocksFree: 0, + } + return statfs, nil } // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go index 0399725cf..f2bc96d51 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go @@ -18,152 +18,16 @@ import ( "bytes" "fmt" "io" - "sync/atomic" "testing" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/fs/lock" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) -// nextFileID is used to generate unique file names. -var nextFileID int64 - -// newTmpfsRoot creates a new tmpfs mount, and returns the root. If the error -// is not nil, then cleanup should be called when the root is no longer needed. -func newTmpfsRoot(ctx context.Context) (*vfs.VirtualFilesystem, vfs.VirtualDentry, func(), error) { - creds := auth.CredentialsFromContext(ctx) - - vfsObj := &vfs.VirtualFilesystem{} - if err := vfsObj.Init(); err != nil { - return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("VFS init: %v", err) - } - - vfsObj.MustRegisterFilesystemType("tmpfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{}) - if err != nil { - return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("failed to create tmpfs root mount: %v", err) - } - root := mntns.Root() - return vfsObj, root, func() { - root.DecRef() - mntns.DecRef() - }, nil -} - -// newFileFD creates a new file in a new tmpfs mount, and returns the FD. If -// the returned err is not nil, then cleanup should be called when the FD is no -// longer needed. -func newFileFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) { - creds := auth.CredentialsFromContext(ctx) - vfsObj, root, cleanup, err := newTmpfsRoot(ctx) - if err != nil { - return nil, nil, err - } - - filename := fmt.Sprintf("tmpfs-test-file-%d", atomic.AddInt64(&nextFileID, 1)) - - // Create the file that will be write/read. - fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(filename), - }, &vfs.OpenOptions{ - Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, - Mode: linux.ModeRegular | mode, - }) - if err != nil { - cleanup() - return nil, nil, fmt.Errorf("failed to create file %q: %v", filename, err) - } - - return fd, cleanup, nil -} - -// newDirFD is like newFileFD, but for directories. -func newDirFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) { - creds := auth.CredentialsFromContext(ctx) - vfsObj, root, cleanup, err := newTmpfsRoot(ctx) - if err != nil { - return nil, nil, err - } - - dirname := fmt.Sprintf("tmpfs-test-dir-%d", atomic.AddInt64(&nextFileID, 1)) - - // Create the dir. - if err := vfsObj.MkdirAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(dirname), - }, &vfs.MkdirOptions{ - Mode: linux.ModeDirectory | mode, - }); err != nil { - cleanup() - return nil, nil, fmt.Errorf("failed to create directory %q: %v", dirname, err) - } - - // Open the dir and return it. - fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(dirname), - }, &vfs.OpenOptions{ - Flags: linux.O_RDONLY | linux.O_DIRECTORY, - }) - if err != nil { - cleanup() - return nil, nil, fmt.Errorf("failed to open directory %q: %v", dirname, err) - } - - return fd, cleanup, nil -} - -// newPipeFD is like newFileFD, but for pipes. -func newPipeFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) { - creds := auth.CredentialsFromContext(ctx) - vfsObj, root, cleanup, err := newTmpfsRoot(ctx) - if err != nil { - return nil, nil, err - } - - pipename := fmt.Sprintf("tmpfs-test-pipe-%d", atomic.AddInt64(&nextFileID, 1)) - - // Create the pipe. - if err := vfsObj.MknodAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(pipename), - }, &vfs.MknodOptions{ - Mode: linux.ModeNamedPipe | mode, - }); err != nil { - cleanup() - return nil, nil, fmt.Errorf("failed to create pipe %q: %v", pipename, err) - } - - // Open the pipe and return it. - fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(pipename), - }, &vfs.OpenOptions{ - Flags: linux.O_RDWR, - }) - if err != nil { - cleanup() - return nil, nil, fmt.Errorf("failed to open pipe %q: %v", pipename, err) - } - - return fd, cleanup, nil -} - // Test that we can write some data to a file and read it back.` func TestSimpleWriteRead(t *testing.T) { ctx := contexttest.Context(t) diff --git a/pkg/sentry/fsimpl/tmpfs/stat_test.go b/pkg/sentry/fsimpl/tmpfs/stat_test.go index 60c2c980e..f52755092 100644 --- a/pkg/sentry/fsimpl/tmpfs/stat_test.go +++ b/pkg/sentry/fsimpl/tmpfs/stat_test.go @@ -29,7 +29,6 @@ func TestStatAfterCreate(t *testing.T) { mode := linux.FileMode(0644) // Run with different file types. - // TODO(gvisor.dev/issue/1197): Also test symlinks and sockets. for _, typ := range []string{"file", "dir", "pipe"} { t.Run(fmt.Sprintf("type=%q", typ), func(t *testing.T) { var ( @@ -175,7 +174,6 @@ func TestSetStat(t *testing.T) { mode := linux.FileMode(0644) // Run with different file types. - // TODO(gvisor.dev/issue/1197): Also test symlinks and sockets. for _, typ := range []string{"file", "dir", "pipe"} { t.Run(fmt.Sprintf("type=%q", typ), func(t *testing.T) { var ( diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go b/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go new file mode 100644 index 000000000..a240fb276 --- /dev/null +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go @@ -0,0 +1,156 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tmpfs + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/vfs" +) + +// nextFileID is used to generate unique file names. +var nextFileID int64 + +// newTmpfsRoot creates a new tmpfs mount, and returns the root. If the error +// is not nil, then cleanup should be called when the root is no longer needed. +func newTmpfsRoot(ctx context.Context) (*vfs.VirtualFilesystem, vfs.VirtualDentry, func(), error) { + creds := auth.CredentialsFromContext(ctx) + + vfsObj := &vfs.VirtualFilesystem{} + if err := vfsObj.Init(); err != nil { + return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("VFS init: %v", err) + } + + vfsObj.MustRegisterFilesystemType("tmpfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserMount: true, + }) + mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{}) + if err != nil { + return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("failed to create tmpfs root mount: %v", err) + } + root := mntns.Root() + return vfsObj, root, func() { + root.DecRef() + mntns.DecRef() + }, nil +} + +// newFileFD creates a new file in a new tmpfs mount, and returns the FD. If +// the returned err is not nil, then cleanup should be called when the FD is no +// longer needed. +func newFileFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) { + creds := auth.CredentialsFromContext(ctx) + vfsObj, root, cleanup, err := newTmpfsRoot(ctx) + if err != nil { + return nil, nil, err + } + + filename := fmt.Sprintf("tmpfs-test-file-%d", atomic.AddInt64(&nextFileID, 1)) + + // Create the file that will be write/read. + fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(filename), + }, &vfs.OpenOptions{ + Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, + Mode: linux.ModeRegular | mode, + }) + if err != nil { + cleanup() + return nil, nil, fmt.Errorf("failed to create file %q: %v", filename, err) + } + + return fd, cleanup, nil +} + +// newDirFD is like newFileFD, but for directories. +func newDirFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) { + creds := auth.CredentialsFromContext(ctx) + vfsObj, root, cleanup, err := newTmpfsRoot(ctx) + if err != nil { + return nil, nil, err + } + + dirname := fmt.Sprintf("tmpfs-test-dir-%d", atomic.AddInt64(&nextFileID, 1)) + + // Create the dir. + if err := vfsObj.MkdirAt(ctx, creds, &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(dirname), + }, &vfs.MkdirOptions{ + Mode: linux.ModeDirectory | mode, + }); err != nil { + cleanup() + return nil, nil, fmt.Errorf("failed to create directory %q: %v", dirname, err) + } + + // Open the dir and return it. + fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(dirname), + }, &vfs.OpenOptions{ + Flags: linux.O_RDONLY | linux.O_DIRECTORY, + }) + if err != nil { + cleanup() + return nil, nil, fmt.Errorf("failed to open directory %q: %v", dirname, err) + } + + return fd, cleanup, nil +} + +// newPipeFD is like newFileFD, but for pipes. +func newPipeFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) { + creds := auth.CredentialsFromContext(ctx) + vfsObj, root, cleanup, err := newTmpfsRoot(ctx) + if err != nil { + return nil, nil, err + } + + name := fmt.Sprintf("tmpfs-test-%d", atomic.AddInt64(&nextFileID, 1)) + + if err := vfsObj.MknodAt(ctx, creds, &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(name), + }, &vfs.MknodOptions{ + Mode: linux.ModeNamedPipe | mode, + }); err != nil { + cleanup() + return nil, nil, fmt.Errorf("failed to create pipe %q: %v", name, err) + } + + fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(name), + }, &vfs.OpenOptions{ + Flags: linux.O_RDWR, + }) + if err != nil { + cleanup() + return nil, nil, fmt.Errorf("failed to open pipe %q: %v", name, err) + } + + return fd, cleanup, nil +} diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index adf259bba..e6bc63474 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -3288,6 +3288,7 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/time", gtest, "//test/util:temp_path", "//test/util:test_main", diff --git a/test/syscalls/linux/socket.cc b/test/syscalls/linux/socket.cc index 703d594a2..afa59c1da 100644 --- a/test/syscalls/linux/socket.cc +++ b/test/syscalls/linux/socket.cc @@ -61,7 +61,7 @@ TEST(SocketTest, ProtocolInet) { } } -TEST(SocketTest, UnixSocketFileMode) { +TEST(SocketTest, UnixSocketStat) { // TODO(gvisor.dev/issue/1624): Re-enable this test once VFS1 is deleted. It // should pass in VFS2. SKIP_IF(IsRunningOnGvisor()); @@ -83,7 +83,14 @@ TEST(SocketTest, UnixSocketFileMode) { struct stat statbuf = {}; ASSERT_THAT(stat(addr.sun_path, &statbuf), SyscallSucceeds()); + + // Mode should be S_IFSOCK. EXPECT_EQ(statbuf.st_mode, S_IFSOCK | sock_perm & ~mask); + + // Timestamps should be equal and non-zero. + EXPECT_NE(statbuf.st_atime, 0); + EXPECT_EQ(statbuf.st_atime, statbuf.st_mtime); + EXPECT_EQ(statbuf.st_atime, statbuf.st_ctime); } TEST(SocketTest, UnixConnectNeedsWritePerm) { diff --git a/test/syscalls/linux/symlink.cc b/test/syscalls/linux/symlink.cc index 03ee1250d..a17ff62e9 100644 --- a/test/syscalls/linux/symlink.cc +++ b/test/syscalls/linux/symlink.cc @@ -20,6 +20,7 @@ #include #include "gtest/gtest.h" +#include "absl/time/clock.h" #include "test/util/capability_util.h" #include "test/util/file_descriptor.h" #include "test/util/fs_util.h" @@ -272,6 +273,30 @@ TEST(SymlinkTest, ChmodSymlink) { EXPECT_EQ(FilePermission(newpath), 0777); } +// Test that following a symlink updates the atime on the symlink. +TEST(SymlinkTest, FollowUpdatesATime) { + const auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string link = NewTempAbsPath(); + EXPECT_THAT(symlink(file.path().c_str(), link.c_str()), SyscallSucceeds()); + + // Lstat the symlink. + struct stat st_before_follow; + ASSERT_THAT(lstat(link.c_str(), &st_before_follow), SyscallSucceeds()); + + // Let the clock advance. + absl::SleepFor(absl::Seconds(1)); + + // Open the file via the symlink. + int fd; + ASSERT_THAT(fd = open(link.c_str(), O_RDWR, 0666), SyscallSucceeds()); + FileDescriptor fd_closer(fd); + + // Lstat the symlink again, and check that atime is updated. + struct stat st_after_follow; + ASSERT_THAT(lstat(link.c_str(), &st_after_follow), SyscallSucceeds()); + EXPECT_LT(st_before_follow.st_atime, st_after_follow.st_atime); +} + class ParamSymlinkTest : public ::testing::TestWithParam {}; // Test that creating an existing symlink with creat will create the target. -- cgit v1.2.3 From 8b8774d7152eb61fc6273bbae679e80c34188517 Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Wed, 13 May 2020 19:47:42 -0700 Subject: Stub support for TCP_SYNCNT and TCP_WINDOW_CLAMP. This change adds support for TCP_SYNCNT and TCP_WINDOW_CLAMP options in GetSockOpt/SetSockOpt. This change does not really change any behaviour in Netstack and only stores/returns the stored value. Actual honoring of these options will be added as required. Fixes #2626, #2625 PiperOrigin-RevId: 311453777 --- pkg/sentry/socket/netstack/netstack.go | 39 ++++++ pkg/tcpip/tcpip.go | 17 +++ pkg/tcpip/transport/tcp/endpoint.go | 64 ++++++++- pkg/tcpip/transport/tcp/protocol.go | 21 +++ test/syscalls/linux/socket_ip_tcp_generic.cc | 45 +++++++ test/syscalls/linux/tcp_socket.cc | 185 ++++++++++++++++++++++++++- 6 files changed, 367 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 9d032f052..9dea2b5ff 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -1321,6 +1321,29 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa return int32(time.Duration(v) / time.Second), nil + case linux.TCP_SYNCNT: + if outLen < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + + v, err := ep.GetSockOptInt(tcpip.TCPSynCountOption) + if err != nil { + return nil, syserr.TranslateNetstackError(err) + } + + return int32(v), nil + + case linux.TCP_WINDOW_CLAMP: + if outLen < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + + v, err := ep.GetSockOptInt(tcpip.TCPWindowClampOption) + if err != nil { + return nil, syserr.TranslateNetstackError(err) + } + + return int32(v), nil default: emitUnimplementedEventTCP(t, name) } @@ -1790,6 +1813,22 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) * } return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.TCPDeferAcceptOption(time.Second * time.Duration(v)))) + case linux.TCP_SYNCNT: + if len(optVal) < sizeOfInt32 { + return syserr.ErrInvalidArgument + } + v := usermem.ByteOrder.Uint32(optVal) + + return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.TCPSynCountOption, int(v))) + + case linux.TCP_WINDOW_CLAMP: + if len(optVal) < sizeOfInt32 { + return syserr.ErrInvalidArgument + } + v := usermem.ByteOrder.Uint32(optVal) + + return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.TCPWindowClampOption, int(v))) + case linux.TCP_REPAIR_OPTIONS: t.Kernel().EmitUnimplementedEvent(t) diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 1ca4088c9..6270146fa 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -622,6 +622,19 @@ const ( // // A zero value indicates the default. TTLOption + + // TCPSynCountOption is used by SetSockOpt/GetSockOpt to specify the number of + // SYN retransmits that TCP should send before aborting the attempt to + // connect. It cannot exceed 255. + // + // NOTE: This option is currently only stubbed out and is no-op. + TCPSynCountOption + + // TCPWindowClampOption is used by SetSockOpt/GetSockOpt to bound the size + // of the advertised window to this value. + // + // NOTE: This option is currently only stubed out and is a no-op + TCPWindowClampOption ) // ErrorOption is used in GetSockOpt to specify that the last error reported by @@ -690,6 +703,10 @@ type TCPMinRTOOption time.Duration // switches to using SYN cookies. type TCPSynRcvdCountThresholdOption uint64 +// TCPSynRetriesOption is used by SetSockOpt/GetSockOpt to specify stack-wide +// default for number of times SYN is retransmitted before aborting a connect. +type TCPSynRetriesOption uint8 + // MulticastInterfaceOption is used by SetSockOpt/GetSockOpt to specify a // default interface for multicast. type MulticastInterfaceOption struct { diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 07d3e64c8..71735029e 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -470,6 +470,17 @@ type endpoint struct { // for this endpoint using the TCP_MAXSEG setsockopt. userMSS uint16 + // maxSynRetries is the maximum number of SYN retransmits that TCP should + // send before aborting the attempt to connect. It cannot exceed 255. + // + // NOTE: This is currently a no-op and does not change the SYN + // retransmissions. + maxSynRetries uint8 + + // windowClamp is used to bound the size of the advertised window to + // this value. + windowClamp uint32 + // The following fields are used to manage the send buffer. When // segments are ready to be sent, they are added to sndQueue and the // protocol goroutine is signaled via sndWaker. @@ -795,8 +806,10 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue interval: 75 * time.Second, count: 9, }, - uniqueID: s.UniqueID(), - txHash: s.Rand().Uint32(), + uniqueID: s.UniqueID(), + txHash: s.Rand().Uint32(), + windowClamp: DefaultReceiveBufferSize, + maxSynRetries: DefaultSynRetries, } var ss SendBufferSizeOption @@ -829,6 +842,11 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue e.tcpLingerTimeout = time.Duration(tcpLT) } + var synRetries tcpip.TCPSynRetriesOption + if err := s.TransportProtocolOption(ProtocolNumber, &synRetries); err == nil { + e.maxSynRetries = uint8(synRetries) + } + if p := s.GetTCPProbe(); p != nil { e.probe = p } @@ -1603,6 +1621,36 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { e.ttl = uint8(v) e.UnlockUser() + case tcpip.TCPSynCountOption: + if v < 1 || v > 255 { + return tcpip.ErrInvalidOptionValue + } + e.LockUser() + e.maxSynRetries = uint8(v) + e.UnlockUser() + + case tcpip.TCPWindowClampOption: + if v == 0 { + e.LockUser() + switch e.EndpointState() { + case StateClose, StateInitial: + e.windowClamp = 0 + e.UnlockUser() + return nil + default: + e.UnlockUser() + return tcpip.ErrInvalidOptionValue + } + } + var rs ReceiveBufferSizeOption + if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err == nil { + if v < rs.Min/2 { + v = rs.Min / 2 + } + } + e.LockUser() + e.windowClamp = uint32(v) + e.UnlockUser() } return nil } @@ -1826,6 +1874,18 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { e.UnlockUser() return v, nil + case tcpip.TCPSynCountOption: + e.LockUser() + v := int(e.maxSynRetries) + e.UnlockUser() + return v, nil + + case tcpip.TCPWindowClampOption: + e.LockUser() + v := int(e.windowClamp) + e.UnlockUser() + return v, nil + default: return -1, tcpip.ErrUnknownProtocolOption } diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go index cfd9a4e8e..1e64283b5 100644 --- a/pkg/tcpip/transport/tcp/protocol.go +++ b/pkg/tcpip/transport/tcp/protocol.go @@ -64,6 +64,10 @@ const ( // DefaultTCPTimeWaitTimeout is the amount of time that sockets linger // in TIME_WAIT state before being marked closed. DefaultTCPTimeWaitTimeout = 60 * time.Second + + // DefaultSynRetries is the default value for the number of SYN retransmits + // before a connect is aborted. + DefaultSynRetries = 6 ) // SACKEnabled option can be used to enable SACK support in the TCP @@ -164,6 +168,7 @@ type protocol struct { tcpTimeWaitTimeout time.Duration minRTO time.Duration synRcvdCount synRcvdCounter + synRetries uint8 dispatcher *dispatcher } @@ -346,6 +351,15 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error { p.mu.Unlock() return nil + case tcpip.TCPSynRetriesOption: + if v < 1 || v > 255 { + return tcpip.ErrInvalidOptionValue + } + p.mu.Lock() + p.synRetries = uint8(v) + p.mu.Unlock() + return nil + default: return tcpip.ErrUnknownProtocolOption } @@ -420,6 +434,12 @@ func (p *protocol) Option(option interface{}) *tcpip.Error { p.mu.RUnlock() return nil + case *tcpip.TCPSynRetriesOption: + p.mu.RLock() + *v = tcpip.TCPSynRetriesOption(p.synRetries) + p.mu.RUnlock() + return nil + default: return tcpip.ErrUnknownProtocolOption } @@ -452,6 +472,7 @@ func NewProtocol() stack.TransportProtocol { tcpTimeWaitTimeout: DefaultTCPTimeWaitTimeout, synRcvdCount: synRcvdCounter{threshold: SynRcvdCountThreshold}, dispatcher: newDispatcher(runtime.GOMAXPROCS(0)), + synRetries: DefaultSynRetries, minRTO: MinRTO, } } diff --git a/test/syscalls/linux/socket_ip_tcp_generic.cc b/test/syscalls/linux/socket_ip_tcp_generic.cc index 27779e47c..fa81845fd 100644 --- a/test/syscalls/linux/socket_ip_tcp_generic.cc +++ b/test/syscalls/linux/socket_ip_tcp_generic.cc @@ -876,6 +876,51 @@ TEST_P(TCPSocketPairTest, SetTCPUserTimeoutAboveZero) { EXPECT_EQ(get, kAbove); } +TEST_P(TCPSocketPairTest, SetTCPWindowClampBelowMinRcvBufConnectedSocket) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + // Discover minimum receive buf by setting a really low value + // for the receive buffer. + constexpr int kZero = 0; + EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVBUF, &kZero, + sizeof(kZero)), + SyscallSucceeds()); + + // Now retrieve the minimum value for SO_RCVBUF as the set above should + // have caused SO_RCVBUF for the socket to be set to the minimum. + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVBUF, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + int min_so_rcvbuf = get; + + { + // Setting TCP_WINDOW_CLAMP to zero for a connected socket is not permitted. + constexpr int kZero = 0; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_WINDOW_CLAMP, + &kZero, sizeof(kZero)), + SyscallFailsWithErrno(EINVAL)); + + // Non-zero clamp values below MIN_SO_RCVBUF/2 should result in the clamp + // being set to MIN_SO_RCVBUF/2. + int below_half_min_so_rcvbuf = min_so_rcvbuf / 2 - 1; + EXPECT_THAT( + setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_WINDOW_CLAMP, + &below_half_min_so_rcvbuf, sizeof(below_half_min_so_rcvbuf)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + + ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_WINDOW_CLAMP, + &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(min_so_rcvbuf / 2, get); + } +} + TEST_P(TCPSocketPairTest, TCPResetDuringClose_NoRandomSave) { DisableSave ds; // Too many syscalls. constexpr int kThreadCount = 1000; diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc index d9c1ac0e1..a4d2953e1 100644 --- a/test/syscalls/linux/tcp_socket.cc +++ b/test/syscalls/linux/tcp_socket.cc @@ -1313,7 +1313,7 @@ TEST_P(SimpleTcpSocketTest, SetTCPDeferAcceptNeg) { int get = -1; socklen_t get_len = sizeof(get); ASSERT_THAT( - getsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, &get, &get_len), + getsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &get, &get_len), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_len, sizeof(get)); EXPECT_EQ(get, 0); @@ -1326,7 +1326,7 @@ TEST_P(SimpleTcpSocketTest, GetTCPDeferAcceptDefault) { int get = -1; socklen_t get_len = sizeof(get); ASSERT_THAT( - getsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, &get, &get_len), + getsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &get, &get_len), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_len, sizeof(get)); EXPECT_EQ(get, 0); @@ -1378,6 +1378,187 @@ TEST_P(SimpleTcpSocketTest, TCPConnectSoRcvBufRace) { SyscallSucceedsWithValue(0)); } +TEST_P(SimpleTcpSocketTest, SetTCPSynCntLessThanOne) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + int default_syn_cnt = get; + + { + // TCP_SYNCNT less than 1 should be rejected with an EINVAL. + constexpr int kZero = 0; + EXPECT_THAT( + setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kZero, sizeof(kZero)), + SyscallFailsWithErrno(EINVAL)); + + // TCP_SYNCNT less than 1 should be rejected with an EINVAL. + constexpr int kNeg = -1; + EXPECT_THAT( + setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kNeg, sizeof(kNeg)), + SyscallFailsWithErrno(EINVAL)); + + int get = -1; + socklen_t get_len = sizeof(get); + + ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(default_syn_cnt, get); + } +} + +TEST_P(SimpleTcpSocketTest, GetTCPSynCntDefault) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + int get = -1; + socklen_t get_len = sizeof(get); + constexpr int kDefaultSynCnt = 6; + + ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kDefaultSynCnt); +} + +TEST_P(SimpleTcpSocketTest, SetTCPSynCntGreaterThanOne) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + constexpr int kTCPSynCnt = 20; + ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kTCPSynCnt, + sizeof(kTCPSynCnt)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len), + SyscallSucceeds()); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kTCPSynCnt); +} + +TEST_P(SimpleTcpSocketTest, SetTCPSynCntAboveMax) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + int default_syn_cnt = get; + { + constexpr int kTCPSynCnt = 256; + ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kTCPSynCnt, + sizeof(kTCPSynCnt)), + SyscallFailsWithErrno(EINVAL)); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len), + SyscallSucceeds()); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, default_syn_cnt); + } +} + +TEST_P(SimpleTcpSocketTest, SetTCPWindowClampBelowMinRcvBuf) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + // Discover minimum receive buf by setting a really low value + // for the receive buffer. + constexpr int kZero = 0; + EXPECT_THAT(setsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &kZero, sizeof(kZero)), + SyscallSucceeds()); + + // Now retrieve the minimum value for SO_RCVBUF as the set above should + // have caused SO_RCVBUF for the socket to be set to the minimum. + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + int min_so_rcvbuf = get; + + { + // TCP_WINDOW_CLAMP less than min_so_rcvbuf/2 should be set to + // min_so_rcvbuf/2. + int below_half_min_rcvbuf = min_so_rcvbuf / 2 - 1; + EXPECT_THAT( + setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, + &below_half_min_rcvbuf, sizeof(below_half_min_rcvbuf)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + + ASSERT_THAT( + getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(min_so_rcvbuf / 2, get); + } +} + +TEST_P(SimpleTcpSocketTest, SetTCPWindowClampZeroClosedSocket) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + constexpr int kZero = 0; + ASSERT_THAT( + setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &kZero, sizeof(kZero)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT( + getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len), + SyscallSucceeds()); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, kZero); +} + +TEST_P(SimpleTcpSocketTest, SetTCPWindowClampAboveHalfMinRcvBuf) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + // Discover minimum receive buf by setting a really low value + // for the receive buffer. + constexpr int kZero = 0; + EXPECT_THAT(setsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &kZero, sizeof(kZero)), + SyscallSucceeds()); + + // Now retrieve the minimum value for SO_RCVBUF as the set above should + // have caused SO_RCVBUF for the socket to be set to the minimum. + int get = -1; + socklen_t get_len = sizeof(get); + ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + int min_so_rcvbuf = get; + + { + int above_half_min_rcv_buf = min_so_rcvbuf / 2 + 1; + EXPECT_THAT( + setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, + &above_half_min_rcv_buf, sizeof(above_half_min_rcv_buf)), + SyscallSucceeds()); + + int get = -1; + socklen_t get_len = sizeof(get); + + ASSERT_THAT( + getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(above_half_min_rcv_buf, get); + } +} + INSTANTIATE_TEST_SUITE_P(AllInetTests, SimpleTcpSocketTest, ::testing::Values(AF_INET, AF_INET6)); -- cgit v1.2.3 From f1ad2d54ab28dcdaaa5d7aa95d8f1b370b6fd36d Mon Sep 17 00:00:00 2001 From: Mithun Iyer Date: Wed, 13 May 2020 21:25:28 -0700 Subject: Fix TCP segment retransmit timeout handling. As per RFC 1122 and Linux retransmit timeout handling: - The segment retransmit timeout needs to exponentially increase and cap at a predefined value. - TCP connection needs to timeout after a predefined number of segment retransmissions. - TCP connection should not timeout when the retranmission timeout exceeds MaxRTO, predefined upper bound. Fixes #2673 PiperOrigin-RevId: 311463961 --- pkg/tcpip/tcpip.go | 8 ++ pkg/tcpip/transport/tcp/protocol.go | 31 +++++++ pkg/tcpip/transport/tcp/snd.go | 59 ++++++++---- pkg/tcpip/transport/tcp/tcp_test.go | 114 +++++++++++++++++++++++- test/packetimpact/tests/BUILD | 10 +++ test/packetimpact/tests/tcp_retransmits_test.go | 84 +++++++++++++++++ 6 files changed, 284 insertions(+), 22 deletions(-) create mode 100644 test/packetimpact/tests/tcp_retransmits_test.go (limited to 'test') diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 6270146fa..45e930ad8 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -698,6 +698,14 @@ type TCPDeferAcceptOption time.Duration // default MinRTO used by the Stack. type TCPMinRTOOption time.Duration +// TCPMaxRTOOption is use by SetSockOpt/GetSockOpt to allow overriding +// default MaxRTO used by the Stack. +type TCPMaxRTOOption time.Duration + +// TCPMaxRetriesOption is used by SetSockOpt/GetSockOpt to set/get the +// maximum number of retransmits after which we time out the connection. +type TCPMaxRetriesOption uint64 + // TCPSynRcvdCountThresholdOption is used by SetSockOpt/GetSockOpt to specify // the number of endpoints that can be in SYN-RCVD state before the stack // switches to using SYN cookies. diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go index 1e64283b5..2a2a7ddeb 100644 --- a/pkg/tcpip/transport/tcp/protocol.go +++ b/pkg/tcpip/transport/tcp/protocol.go @@ -167,6 +167,8 @@ type protocol struct { tcpLingerTimeout time.Duration tcpTimeWaitTimeout time.Duration minRTO time.Duration + maxRTO time.Duration + maxRetries uint32 synRcvdCount synRcvdCounter synRetries uint8 dispatcher *dispatcher @@ -345,6 +347,21 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error { p.mu.Unlock() return nil + case tcpip.TCPMaxRTOOption: + if v < 0 { + v = tcpip.TCPMaxRTOOption(MaxRTO) + } + p.mu.Lock() + p.maxRTO = time.Duration(v) + p.mu.Unlock() + return nil + + case tcpip.TCPMaxRetriesOption: + p.mu.Lock() + p.maxRetries = uint32(v) + p.mu.Unlock() + return nil + case tcpip.TCPSynRcvdCountThresholdOption: p.mu.Lock() p.synRcvdCount.SetThreshold(uint64(v)) @@ -428,6 +445,18 @@ func (p *protocol) Option(option interface{}) *tcpip.Error { p.mu.RUnlock() return nil + case *tcpip.TCPMaxRTOOption: + p.mu.RLock() + *v = tcpip.TCPMaxRTOOption(p.maxRTO) + p.mu.RUnlock() + return nil + + case *tcpip.TCPMaxRetriesOption: + p.mu.RLock() + *v = tcpip.TCPMaxRetriesOption(p.maxRetries) + p.mu.RUnlock() + return nil + case *tcpip.TCPSynRcvdCountThresholdOption: p.mu.RLock() *v = tcpip.TCPSynRcvdCountThresholdOption(p.synRcvdCount.Threshold()) @@ -474,5 +503,7 @@ func NewProtocol() stack.TransportProtocol { dispatcher: newDispatcher(runtime.GOMAXPROCS(0)), synRetries: DefaultSynRetries, minRTO: MinRTO, + maxRTO: MaxRTO, + maxRetries: MaxRetries, } } diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index 9e547a221..06dc9b7d7 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -43,7 +43,8 @@ const ( nDupAckThreshold = 3 // MaxRetries is the maximum number of probe retries sender does - // before timing out the connection, Linux default TCP_RETR2. + // before timing out the connection. + // Linux default TCP_RETR2, net.ipv4.tcp_retries2. MaxRetries = 15 ) @@ -165,6 +166,12 @@ type sender struct { // minRTO is the minimum permitted value for sender.rto. minRTO time.Duration + // maxRTO is the maximum permitted value for sender.rto. + maxRTO time.Duration + + // maxRetries is the maximum permitted retransmissions. + maxRetries uint32 + // maxPayloadSize is the maximum size of the payload of a given segment. // It is initialized on demand. maxPayloadSize int @@ -276,12 +283,24 @@ func newSender(ep *endpoint, iss, irs seqnum.Value, sndWnd seqnum.Size, mss uint // etc. s.ep.scoreboard = NewSACKScoreboard(uint16(s.maxPayloadSize), iss) - // Get Stack wide minRTO. - var v tcpip.TCPMinRTOOption - if err := ep.stack.TransportProtocolOption(ProtocolNumber, &v); err != nil { + // Get Stack wide config. + var minRTO tcpip.TCPMinRTOOption + if err := ep.stack.TransportProtocolOption(ProtocolNumber, &minRTO); err != nil { panic(fmt.Sprintf("unable to get minRTO from stack: %s", err)) } - s.minRTO = time.Duration(v) + s.minRTO = time.Duration(minRTO) + + var maxRTO tcpip.TCPMaxRTOOption + if err := ep.stack.TransportProtocolOption(ProtocolNumber, &maxRTO); err != nil { + panic(fmt.Sprintf("unable to get maxRTO from stack: %s", err)) + } + s.maxRTO = time.Duration(maxRTO) + + var maxRetries tcpip.TCPMaxRetriesOption + if err := ep.stack.TransportProtocolOption(ProtocolNumber, &maxRetries); err != nil { + panic(fmt.Sprintf("unable to get maxRetries from stack: %s", err)) + } + s.maxRetries = uint32(maxRetries) return s } @@ -485,7 +504,7 @@ func (s *sender) retransmitTimerExpired() bool { } elapsed := time.Since(s.firstRetransmittedSegXmitTime) - remaining := MaxRTO + remaining := s.maxRTO if uto != 0 { // Cap to the user specified timeout if one is specified. remaining = uto - elapsed @@ -494,24 +513,17 @@ func (s *sender) retransmitTimerExpired() bool { // Always honor the user-timeout irrespective of whether the zero // window probes were acknowledged. // net/ipv4/tcp_timer.c::tcp_probe_timer() - if remaining <= 0 || s.unackZeroWindowProbes >= MaxRetries { + if remaining <= 0 || s.unackZeroWindowProbes >= s.maxRetries { return false } - if s.rto >= MaxRTO { - // RFC 1122 section: 4.2.2.17 - // A TCP MAY keep its offered receive window closed - // indefinitely. As long as the receiving TCP continues to - // send acknowledgments in response to the probe segments, the - // sending TCP MUST allow the connection to stay open. - if !(s.zeroWindowProbing && s.unackZeroWindowProbes == 0) { - return false - } - } - // Set new timeout. The timer will be restarted by the call to sendData // below. s.rto *= 2 + // Cap the RTO as per RFC 1122 4.2.3.1, RFC 6298 5.5 + if s.rto > s.maxRTO { + s.rto = s.maxRTO + } // Cap RTO to remaining time. if s.rto > remaining { @@ -565,9 +577,20 @@ func (s *sender) retransmitTimerExpired() bool { // send. if s.zeroWindowProbing { s.sendZeroWindowProbe() + // RFC 1122 4.2.2.17: A TCP MAY keep its offered receive window closed + // indefinitely. As long as the receiving TCP continues to send + // acknowledgments in response to the probe segments, the sending TCP + // MUST allow the connection to stay open. return true } + seg := s.writeNext + // RFC 1122 4.2.3.5: Close the connection when the number of + // retransmissions for this segment is beyond a limit. + if seg != nil && seg.xmitCount > s.maxRetries { + return false + } + s.sendData() return true diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index d2c90ebd5..0b4512c65 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -2994,6 +2994,101 @@ func TestSendOnResetConnection(t *testing.T) { } } +// TestMaxRetransmitsTimeout tests if the connection is timed out after +// a segment has been retransmitted MaxRetries times. +func TestMaxRetransmitsTimeout(t *testing.T) { + c := context.New(t, defaultMTU) + defer c.Cleanup() + + const numRetries = 2 + if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPMaxRetriesOption(numRetries)); err != nil { + t.Fatalf("could not set protocol option MaxRetries.\n") + } + + c.CreateConnected(789 /* iss */, 30000 /* rcvWnd */, -1 /* epRcvBuf */) + + waitEntry, notifyCh := waiter.NewChannelEntry(nil) + c.WQ.EventRegister(&waitEntry, waiter.EventHUp) + defer c.WQ.EventUnregister(&waitEntry) + + _, _, err := c.EP.Write(tcpip.SlicePayload(buffer.NewView(1)), tcpip.WriteOptions{}) + if err != nil { + t.Fatalf("Write failed: %v", err) + } + + // Expect first transmit and MaxRetries retransmits. + for i := 0; i < numRetries+1; i++ { + checker.IPv4(t, c.GetPacket(), + checker.TCP( + checker.DstPort(context.TestPort), + checker.TCPFlags(header.TCPFlagAck|header.TCPFlagPsh), + ), + ) + } + // Wait for the connection to timeout after MaxRetries retransmits. + initRTO := 1 * time.Second + select { + case <-notifyCh: + case <-time.After((2 << numRetries) * initRTO): + t.Fatalf("connection still alive after maximum retransmits.\n") + } + + // Send an ACK and expect a RST as the connection would have been closed. + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: c.Port, + Flags: header.TCPFlagAck, + }) + + checker.IPv4(t, c.GetPacket(), + checker.TCP( + checker.DstPort(context.TestPort), + checker.TCPFlags(header.TCPFlagRst), + ), + ) + + if got := c.Stack().Stats().TCP.EstablishedTimedout.Value(); got != 1 { + t.Errorf("got c.Stack().Stats().TCP.EstablishedTimedout.Value() = %v, want = 1", got) + } +} + +// TestMaxRTO tests if the retransmit interval caps to MaxRTO. +func TestMaxRTO(t *testing.T) { + c := context.New(t, defaultMTU) + defer c.Cleanup() + + rto := 1 * time.Second + if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPMaxRTOOption(rto)); err != nil { + t.Fatalf("c.stack.SetTransportProtocolOption(tcp, tcpip.TCPMaxRTO(%d) failed: %s", rto, err) + } + + c.CreateConnected(789 /* iss */, 30000 /* rcvWnd */, -1 /* epRcvBuf */) + + _, _, err := c.EP.Write(tcpip.SlicePayload(buffer.NewView(1)), tcpip.WriteOptions{}) + if err != nil { + t.Fatalf("Write failed: %v", err) + } + checker.IPv4(t, c.GetPacket(), + checker.TCP( + checker.DstPort(context.TestPort), + checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)), + ), + ) + const numRetransmits = 2 + for i := 0; i < numRetransmits; i++ { + start := time.Now() + checker.IPv4(t, c.GetPacket(), + checker.TCP( + checker.DstPort(context.TestPort), + checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)), + ), + ) + if time.Since(start).Round(time.Second).Seconds() != rto.Seconds() { + t.Errorf("Retransmit interval not capped to MaxRTO.\n") + } + } +} + func TestFinImmediately(t *testing.T) { c := context.New(t, defaultMTU) defer c.Cleanup() @@ -6605,9 +6700,16 @@ func TestTCPUserTimeout(t *testing.T) { c.CreateConnected(789, 30000, -1 /* epRcvBuf */) + waitEntry, notifyCh := waiter.NewChannelEntry(nil) + c.WQ.EventRegister(&waitEntry, waiter.EventHUp) + defer c.WQ.EventUnregister(&waitEntry) + origEstablishedTimedout := c.Stack().Stats().TCP.EstablishedTimedout.Value() - userTimeout := 50 * time.Millisecond + // Ensure that on the next retransmit timer fire, the user timeout has + // expired. + initRTO := 1 * time.Second + userTimeout := initRTO / 2 c.EP.SetSockOpt(tcpip.TCPUserTimeoutOption(userTimeout)) // Send some data and wait before ACKing it. @@ -6627,9 +6729,13 @@ func TestTCPUserTimeout(t *testing.T) { ), ) - // Wait for a little over the minimum retransmit timeout of 200ms for - // the retransmitTimer to fire and close the connection. - time.Sleep(tcp.MinRTO + 10*time.Millisecond) + // Wait for the retransmit timer to be fired and the user timeout to cause + // close of the connection. + select { + case <-notifyCh: + case <-time.After(2 * initRTO): + t.Fatalf("connection still alive after %s, should have been closed after :%s", 2*initRTO, userTimeout) + } // No packet should be received as the connection should be silently // closed due to timeout. diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index c25b3b8c1..fff7787c8 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -81,6 +81,16 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "tcp_retransmits", + srcs = ["tcp_retransmits_test.go"], + deps = [ + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + packetimpact_go_test( name = "tcp_outside_the_window", srcs = ["tcp_outside_the_window_test.go"], diff --git a/test/packetimpact/tests/tcp_retransmits_test.go b/test/packetimpact/tests/tcp_retransmits_test.go new file mode 100644 index 000000000..c043ad881 --- /dev/null +++ b/test/packetimpact/tests/tcp_retransmits_test.go @@ -0,0 +1,84 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_retransmits_test + +import ( + "flag" + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func init() { + tb.RegisterFlags(flag.CommandLine) +} + +// TestRetransmits tests retransmits occur at exponentially increasing +// time intervals. +func TestRetransmits(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(listenFd) + conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + defer conn.Close() + + conn.Handshake() + acceptFd, _ := dut.Accept(listenFd) + defer dut.Close(acceptFd) + + dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) + + sampleData := []byte("Sample Data") + samplePayload := &tb.Payload{Bytes: sampleData} + + dut.Send(acceptFd, sampleData, 0) + if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil { + t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) + } + // Give a chance for the dut to estimate RTO with RTT from the DATA-ACK. + // TODO(gvisor.dev/issue/2685) Estimate RTO during handshake, after which + // we can skip sending this ACK. + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) + + startRTO := time.Second + current := startRTO + first := time.Now() + dut.Send(acceptFd, sampleData, 0) + seq := tb.Uint32(uint32(*conn.RemoteSeqNum())) + if _, err := conn.ExpectData(&tb.TCP{SeqNum: seq}, samplePayload, startRTO); err != nil { + t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) + } + // Expect retransmits of the same segment. + for i := 0; i < 5; i++ { + start := time.Now() + if _, err := conn.ExpectData(&tb.TCP{SeqNum: seq}, samplePayload, 2*current); err != nil { + t.Fatalf("expected a packet with payload %v: %s loop %d", samplePayload, err, i) + } + if i == 0 { + startRTO = time.Now().Sub(first) + current = 2 * startRTO + continue + } + // Check if the probes came at exponentially increasing intervals. + if p := time.Since(start); p < current-startRTO { + t.Fatalf("retransmit came sooner interval %d probe %d\n", p, i) + } + current *= 2 + } +} -- cgit v1.2.3 From 326abf5e3643f954a295a8bbfaa6f6a47db59767 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Thu, 14 May 2020 18:16:10 -0700 Subject: Internal change. PiperOrigin-RevId: 311645222 --- test/packetimpact/tests/BUILD | 8 ++++---- test/packetimpact/tests/defs.bzl | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'test') diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index fff7787c8..d4fcf31fa 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -19,7 +19,7 @@ packetimpact_go_test( name = "udp_recv_multicast", srcs = ["udp_recv_multicast_test.go"], # TODO(b/152813495): Fix netstack then remove the line below. - netstack = False, + expect_netstack_failure = True, deps = [ "//pkg/tcpip", "//pkg/tcpip/header", @@ -32,7 +32,7 @@ packetimpact_go_test( name = "udp_icmp_error_propagation", srcs = ["udp_icmp_error_propagation_test.go"], # TODO(b/153926291): Fix netstack then remove the line below. - netstack = False, + expect_netstack_failure = True, deps = [ "//pkg/tcpip", "//pkg/tcpip/header", @@ -116,7 +116,7 @@ packetimpact_go_test( name = "tcp_should_piggyback", srcs = ["tcp_should_piggyback_test.go"], # TODO(b/153680566): Fix netstack then remove the line below. - netstack = False, + expect_netstack_failure = True, deps = [ "//pkg/tcpip/header", "//test/packetimpact/testbench", @@ -149,7 +149,7 @@ packetimpact_go_test( name = "icmpv6_param_problem", srcs = ["icmpv6_param_problem_test.go"], # TODO(b/153485026): Fix netstack then remove the line below. - netstack = False, + expect_netstack_failure = True, deps = [ "//pkg/tcpip", "//pkg/tcpip/header", diff --git a/test/packetimpact/tests/defs.bzl b/test/packetimpact/tests/defs.bzl index 27c5de375..45dce64ab 100644 --- a/test/packetimpact/tests/defs.bzl +++ b/test/packetimpact/tests/defs.bzl @@ -106,15 +106,15 @@ def packetimpact_netstack_test( **kwargs ) -def packetimpact_go_test(name, size = "small", pure = True, linux = True, netstack = True, **kwargs): +def packetimpact_go_test(name, size = "small", pure = True, expect_linux_failure = False, expect_netstack_failure = False, **kwargs): """Add packetimpact tests written in go. Args: name: name of the test size: size of the test pure: make a static go binary - linux: generate a linux test - netstack: generate a netstack test + expect_linux_failure: expect the test to fail for Linux + expect_netstack_failure: expect the test to fail for Netstack **kwargs: all the other args, forwarded to go_test """ testbench_binary = name + "_test" @@ -127,11 +127,11 @@ def packetimpact_go_test(name, size = "small", pure = True, linux = True, netsta ) packetimpact_linux_test( name = name, - expect_failure = not linux, + expect_failure = expect_linux_failure, testbench_binary = testbench_binary, ) packetimpact_netstack_test( name = name, - expect_failure = not netstack, + expect_failure = expect_netstack_failure, testbench_binary = testbench_binary, ) -- cgit v1.2.3 From fb7e5f1676ad9e6e7e83312e09fe55f91ade41bc Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Thu, 14 May 2020 20:08:25 -0700 Subject: Make utimes_test pass on VFS2. PiperOrigin-RevId: 311657502 --- pkg/sentry/fsimpl/gofer/gofer.go | 4 +- pkg/sentry/syscalls/linux/vfs2/setstat.go | 123 +++++++++++++++++++----------- pkg/sentry/syscalls/linux/vfs2/vfs2.go | 2 +- test/syscalls/linux/utimes.cc | 33 ++++---- 4 files changed, 101 insertions(+), 61 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 353e2cf5b..ebf063a58 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -869,8 +869,8 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *lin Size: stat.Mask&linux.STATX_SIZE != 0, ATime: stat.Mask&linux.STATX_ATIME != 0, MTime: stat.Mask&linux.STATX_MTIME != 0, - ATimeNotSystemTime: stat.Atime.Nsec != linux.UTIME_NOW, - MTimeNotSystemTime: stat.Mtime.Nsec != linux.UTIME_NOW, + ATimeNotSystemTime: stat.Mask&linux.STATX_ATIME != 0 && stat.Atime.Nsec != linux.UTIME_NOW, + MTimeNotSystemTime: stat.Mask&linux.STATX_MTIME != 0 && stat.Mtime.Nsec != linux.UTIME_NOW, }, p9.SetAttr{ Permissions: p9.FileMode(stat.Mode), UID: p9.UID(stat.UID), diff --git a/pkg/sentry/syscalls/linux/vfs2/setstat.go b/pkg/sentry/syscalls/linux/vfs2/setstat.go index 4e61f1452..09ecfed26 100644 --- a/pkg/sentry/syscalls/linux/vfs2/setstat.go +++ b/pkg/sentry/syscalls/linux/vfs2/setstat.go @@ -246,73 +246,104 @@ func Utimes(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return 0, nil, err } - opts := vfs.SetStatOptions{ - Stat: linux.Statx{ - Mask: linux.STATX_ATIME | linux.STATX_MTIME, - }, - } - if timesAddr == 0 { - opts.Stat.Atime.Nsec = linux.UTIME_NOW - opts.Stat.Mtime.Nsec = linux.UTIME_NOW - } else { - var times [2]linux.Timeval - if _, err := t.CopyIn(timesAddr, ×); err != nil { - return 0, nil, err - } - opts.Stat.Atime = linux.StatxTimestamp{ - Sec: times[0].Sec, - Nsec: uint32(times[0].Usec * 1000), - } - opts.Stat.Mtime = linux.StatxTimestamp{ - Sec: times[1].Sec, - Nsec: uint32(times[1].Usec * 1000), - } + var opts vfs.SetStatOptions + if err := populateSetStatOptionsForUtimes(t, timesAddr, &opts); err != nil { + return 0, nil, err } return 0, nil, setstatat(t, linux.AT_FDCWD, path, disallowEmptyPath, followFinalSymlink, &opts) } -// Utimensat implements Linux syscall utimensat(2). -func Utimensat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { +// Futimesat implements Linux syscall futimesat(2). +func Futimesat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { dirfd := args[0].Int() pathAddr := args[1].Pointer() timesAddr := args[2].Pointer() - flags := args[3].Int() - if flags&^linux.AT_SYMLINK_NOFOLLOW != 0 { - return 0, nil, syserror.EINVAL - } - - path, err := copyInPath(t, pathAddr) - if err != nil { - return 0, nil, err + // "If filename is NULL and dfd refers to an open file, then operate on the + // file. Otherwise look up filename, possibly using dfd as a starting + // point." - fs/utimes.c + var path fspath.Path + shouldAllowEmptyPath := allowEmptyPath + if dirfd == linux.AT_FDCWD || pathAddr != 0 { + var err error + path, err = copyInPath(t, pathAddr) + if err != nil { + return 0, nil, err + } + shouldAllowEmptyPath = disallowEmptyPath } var opts vfs.SetStatOptions - if err := populateSetStatOptionsForUtimens(t, timesAddr, &opts); err != nil { + if err := populateSetStatOptionsForUtimes(t, timesAddr, &opts); err != nil { return 0, nil, err } - return 0, nil, setstatat(t, dirfd, path, disallowEmptyPath, followFinalSymlink, &opts) + return 0, nil, setstatat(t, dirfd, path, shouldAllowEmptyPath, followFinalSymlink, &opts) } -// Futimens implements Linux syscall futimens(2). -func Futimens(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := args[0].Int() - timesAddr := args[1].Pointer() - - file := t.GetFileVFS2(fd) - if file == nil { - return 0, nil, syserror.EBADF +func populateSetStatOptionsForUtimes(t *kernel.Task, timesAddr usermem.Addr, opts *vfs.SetStatOptions) error { + if timesAddr == 0 { + opts.Stat.Mask = linux.STATX_ATIME | linux.STATX_MTIME + opts.Stat.Atime.Nsec = linux.UTIME_NOW + opts.Stat.Mtime.Nsec = linux.UTIME_NOW + return nil } - defer file.DecRef() + var times [2]linux.Timeval + if _, err := t.CopyIn(timesAddr, ×); err != nil { + return err + } + if times[0].Usec < 0 || times[0].Usec > 999999 || times[1].Usec < 0 || times[1].Usec > 999999 { + return syserror.EINVAL + } + opts.Stat.Mask = linux.STATX_ATIME | linux.STATX_MTIME + opts.Stat.Atime = linux.StatxTimestamp{ + Sec: times[0].Sec, + Nsec: uint32(times[0].Usec * 1000), + } + opts.Stat.Mtime = linux.StatxTimestamp{ + Sec: times[1].Sec, + Nsec: uint32(times[1].Usec * 1000), + } + return nil +} +// Utimensat implements Linux syscall utimensat(2). +func Utimensat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + dirfd := args[0].Int() + pathAddr := args[1].Pointer() + timesAddr := args[2].Pointer() + flags := args[3].Int() + + // Linux requires that the UTIME_OMIT check occur before checking path or + // flags. var opts vfs.SetStatOptions if err := populateSetStatOptionsForUtimens(t, timesAddr, &opts); err != nil { return 0, nil, err } + if opts.Stat.Mask == 0 { + return 0, nil, nil + } - return 0, nil, file.SetStat(t, opts) + if flags&^linux.AT_SYMLINK_NOFOLLOW != 0 { + return 0, nil, syserror.EINVAL + } + + // "If filename is NULL and dfd refers to an open file, then operate on the + // file. Otherwise look up filename, possibly using dfd as a starting + // point." - fs/utimes.c + var path fspath.Path + shouldAllowEmptyPath := allowEmptyPath + if dirfd == linux.AT_FDCWD || pathAddr != 0 { + var err error + path, err = copyInPath(t, pathAddr) + if err != nil { + return 0, nil, err + } + shouldAllowEmptyPath = disallowEmptyPath + } + + return 0, nil, setstatat(t, dirfd, path, shouldAllowEmptyPath, shouldFollowFinalSymlink(flags&linux.AT_SYMLINK_NOFOLLOW == 0), &opts) } func populateSetStatOptionsForUtimens(t *kernel.Task, timesAddr usermem.Addr, opts *vfs.SetStatOptions) error { @@ -327,6 +358,9 @@ func populateSetStatOptionsForUtimens(t *kernel.Task, timesAddr usermem.Addr, op return err } if times[0].Nsec != linux.UTIME_OMIT { + if times[0].Nsec != linux.UTIME_NOW && (times[0].Nsec < 0 || times[0].Nsec > 999999999) { + return syserror.EINVAL + } opts.Stat.Mask |= linux.STATX_ATIME opts.Stat.Atime = linux.StatxTimestamp{ Sec: times[0].Sec, @@ -334,6 +368,9 @@ func populateSetStatOptionsForUtimens(t *kernel.Task, timesAddr usermem.Addr, op } } if times[1].Nsec != linux.UTIME_OMIT { + if times[1].Nsec != linux.UTIME_NOW && (times[1].Nsec < 0 || times[1].Nsec > 999999999) { + return syserror.EINVAL + } opts.Stat.Mask |= linux.STATX_MTIME opts.Stat.Mtime = linux.StatxTimestamp{ Sec: times[1].Sec, diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go index ec8da7f06..a332d01bd 100644 --- a/pkg/sentry/syscalls/linux/vfs2/vfs2.go +++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go @@ -123,7 +123,7 @@ func Override() { s.Table[258] = syscalls.Supported("mkdirat", Mkdirat) s.Table[259] = syscalls.Supported("mknodat", Mknodat) s.Table[260] = syscalls.Supported("fchownat", Fchownat) - s.Table[261] = syscalls.Supported("futimens", Futimens) + s.Table[261] = syscalls.Supported("futimesat", Futimesat) s.Table[262] = syscalls.Supported("newfstatat", Newfstatat) s.Table[263] = syscalls.Supported("unlinkat", Unlinkat) s.Table[264] = syscalls.Supported("renameat", Renameat) diff --git a/test/syscalls/linux/utimes.cc b/test/syscalls/linux/utimes.cc index 22e6d1a85..e647d2896 100644 --- a/test/syscalls/linux/utimes.cc +++ b/test/syscalls/linux/utimes.cc @@ -48,12 +48,15 @@ void TimeBoxed(absl::Time* before, absl::Time* after, // filesystems set it to 1, so we don't do any truncation. struct timespec ts; EXPECT_THAT(clock_gettime(CLOCK_REALTIME_COARSE, &ts), SyscallSucceeds()); - *before = absl::TimeFromTimespec(ts); + // FIXME(b/132819225): gVisor filesystem timestamps inconsistently use the + // internal or host clock, which may diverge slightly. Allow some slack on + // times to account for the difference. + *before = absl::TimeFromTimespec(ts) - absl::Seconds(1); fn(); EXPECT_THAT(clock_gettime(CLOCK_REALTIME_COARSE, &ts), SyscallSucceeds()); - *after = absl::TimeFromTimespec(ts); + *after = absl::TimeFromTimespec(ts) + absl::Seconds(1); if (*after < *before) { // Clock jumped backwards; retry. @@ -68,11 +71,11 @@ void TimeBoxed(absl::Time* before, absl::Time* after, void TestUtimesOnPath(std::string const& path) { struct stat statbuf; - struct timeval times[2] = {{1, 0}, {2, 0}}; + struct timeval times[2] = {{10, 0}, {20, 0}}; EXPECT_THAT(utimes(path.c_str(), times), SyscallSucceeds()); EXPECT_THAT(stat(path.c_str(), &statbuf), SyscallSucceeds()); - EXPECT_EQ(1, statbuf.st_atime); - EXPECT_EQ(2, statbuf.st_mtime); + EXPECT_EQ(10, statbuf.st_atime); + EXPECT_EQ(20, statbuf.st_mtime); absl::Time before; absl::Time after; @@ -103,18 +106,18 @@ TEST(UtimesTest, OnDir) { TEST(UtimesTest, MissingPath) { auto path = NewTempAbsPath(); - struct timeval times[2] = {{1, 0}, {2, 0}}; + struct timeval times[2] = {{10, 0}, {20, 0}}; EXPECT_THAT(utimes(path.c_str(), times), SyscallFailsWithErrno(ENOENT)); } void TestFutimesat(int dirFd, std::string const& path) { struct stat statbuf; - struct timeval times[2] = {{1, 0}, {2, 0}}; + struct timeval times[2] = {{10, 0}, {20, 0}}; EXPECT_THAT(futimesat(dirFd, path.c_str(), times), SyscallSucceeds()); EXPECT_THAT(fstatat(dirFd, path.c_str(), &statbuf, 0), SyscallSucceeds()); - EXPECT_EQ(1, statbuf.st_atime); - EXPECT_EQ(2, statbuf.st_mtime); + EXPECT_EQ(10, statbuf.st_atime); + EXPECT_EQ(20, statbuf.st_mtime); absl::Time before; absl::Time after; @@ -175,11 +178,11 @@ TEST(FutimesatTest, InvalidNsec) { void TestUtimensat(int dirFd, std::string const& path) { struct stat statbuf; - const struct timespec times[2] = {{1, 0}, {2, 0}}; + const struct timespec times[2] = {{10, 0}, {20, 0}}; EXPECT_THAT(utimensat(dirFd, path.c_str(), times, 0), SyscallSucceeds()); EXPECT_THAT(fstatat(dirFd, path.c_str(), &statbuf, 0), SyscallSucceeds()); - EXPECT_EQ(1, statbuf.st_atime); - EXPECT_EQ(2, statbuf.st_mtime); + EXPECT_EQ(10, statbuf.st_atime); + EXPECT_EQ(20, statbuf.st_mtime); // Test setting with UTIME_NOW and UTIME_OMIT. struct stat statbuf2; @@ -301,13 +304,13 @@ TEST(Utimensat, NullPath) { auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDWR)); struct stat statbuf; - const struct timespec times[2] = {{1, 0}, {2, 0}}; + const struct timespec times[2] = {{10, 0}, {20, 0}}; // Call syscall directly. EXPECT_THAT(syscall(SYS_utimensat, fd.get(), NULL, times, 0), SyscallSucceeds()); EXPECT_THAT(fstatat(0, f.path().c_str(), &statbuf, 0), SyscallSucceeds()); - EXPECT_EQ(1, statbuf.st_atime); - EXPECT_EQ(2, statbuf.st_mtime); + EXPECT_EQ(10, statbuf.st_atime); + EXPECT_EQ(20, statbuf.st_mtime); } } // namespace -- cgit v1.2.3 From 0b26f9aa0f669dcbd46f49c115e210172b761489 Mon Sep 17 00:00:00 2001 From: Bin Lu Date: Fri, 15 May 2020 02:52:12 -0400 Subject: passed the syscall test case 'fpsig_nested' on Arm64 platform Some functions were added for Arm64 platform: a, get_fp/set_fp b, inline_tgkill Test step: bazel test //test/syscalls:fpsig_nested_test_runsc_ptrace Signed-off-by: Bin Lu --- test/syscalls/linux/BUILD | 5 +--- test/syscalls/linux/fpsig_fork.cc | 6 ++--- test/syscalls/linux/fpsig_nested.cc | 49 +++++++++++++++++++++++++++++++------ 3 files changed, 45 insertions(+), 15 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 5acdb8438..3f12a04c0 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -849,10 +849,7 @@ cc_binary( cc_binary( name = "fpsig_nested_test", testonly = 1, - srcs = select_arch( - amd64 = ["fpsig_nested.cc"], - arm64 = [], - ), + srcs = ["fpsig_nested.cc"], linkstatic = 1, deps = [ gtest, diff --git a/test/syscalls/linux/fpsig_fork.cc b/test/syscalls/linux/fpsig_fork.cc index d08111cd3..c47567b4e 100644 --- a/test/syscalls/linux/fpsig_fork.cc +++ b/test/syscalls/linux/fpsig_fork.cc @@ -37,11 +37,11 @@ namespace { #define __stringify_1(x...) #x #define __stringify(x...) __stringify_1(x) #define GET_FPREG(var, regname) \ - asm volatile("str "__stringify(regname) ", %0" : "=m"(var)) + asm volatile("str " __stringify(regname) ", %0" : "=m"(var)) #define SET_FPREG(var, regname) \ - asm volatile("ldr "__stringify(regname) ", %0" : "=m"(var)) + asm volatile("ldr " __stringify(regname) ", %0" : "=m"(var)) #define GET_FP0(var) GET_FPREG(var, d0) -#define SET_FP0(var) GET_FPREG(var, d0) +#define SET_FP0(var) SET_FPREG(var, d0) #endif int parent, child; diff --git a/test/syscalls/linux/fpsig_nested.cc b/test/syscalls/linux/fpsig_nested.cc index c476a8e7a..302d928d1 100644 --- a/test/syscalls/linux/fpsig_nested.cc +++ b/test/syscalls/linux/fpsig_nested.cc @@ -26,9 +26,22 @@ namespace testing { namespace { +#ifdef __x86_64__ #define GET_XMM(__var, __xmm) \ asm volatile("movq %%" #__xmm ", %0" : "=r"(__var)) #define SET_XMM(__var, __xmm) asm volatile("movq %0, %%" #__xmm : : "r"(__var)) +#define GET_FP0(__var) GET_XMM(__var, xmm0) +#define SET_FP0(__var) SET_XMM(__var, xmm0) +#elif __aarch64__ +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) +#define GET_FPREG(var, regname) \ + asm volatile("str " __stringify(regname) ", %0" : "=m"(var)) +#define SET_FPREG(var, regname) \ + asm volatile("ldr " __stringify(regname) ", %0" : "=m"(var)) +#define GET_FP0(var) GET_FPREG(var, d0) +#define SET_FP0(var) SET_FPREG(var, d0) +#endif int pid; int tid; @@ -40,20 +53,21 @@ void sigusr2(int s, siginfo_t* siginfo, void* _uc) { uint64_t val = SIGUSR2; // Record the value of %xmm0 on entry and then clobber it. - GET_XMM(entryxmm[1], xmm0); - SET_XMM(val, xmm0); - GET_XMM(exitxmm[1], xmm0); + GET_FP0(entryxmm[1]); + SET_FP0(val); + GET_FP0(exitxmm[1]); } void sigusr1(int s, siginfo_t* siginfo, void* _uc) { uint64_t val = SIGUSR1; // Record the value of %xmm0 on entry and then clobber it. - GET_XMM(entryxmm[0], xmm0); - SET_XMM(val, xmm0); + GET_FP0(entryxmm[0]); + SET_FP0(val); // Send a SIGUSR2 to ourself. The signal mask is configured such that // the SIGUSR2 handler will run before this handler returns. +#ifdef __x86_64__ asm volatile( "movl %[killnr], %%eax;" "movl %[pid], %%edi;" @@ -66,10 +80,19 @@ void sigusr1(int s, siginfo_t* siginfo, void* _uc) { : "rax", "rdi", "rsi", "rdx", // Clobbered by syscall. "rcx", "r11"); +#elif __aarch64__ + asm volatile( + "mov x8, %0\n" + "mov x0, %1\n" + "mov x1, %2\n" + "mov x2, %3\n" + "svc #0\n" ::"r"(__NR_tgkill), + "r"(pid), "r"(tid), "r"(SIGUSR2)); +#endif // Record value of %xmm0 again to verify that the nested signal handler // does not clobber it. - GET_XMM(exitxmm[0], xmm0); + GET_FP0(exitxmm[0]); } TEST(FPSigTest, NestedSignals) { @@ -98,8 +121,9 @@ TEST(FPSigTest, NestedSignals) { // to signal the current thread ensures that this is the clobbered thread. uint64_t expected = 0xdeadbeeffacefeed; - SET_XMM(expected, xmm0); + SET_FP0(expected); +#ifdef __x86_64__ asm volatile( "movl %[killnr], %%eax;" "movl %[pid], %%edi;" @@ -112,9 +136,18 @@ TEST(FPSigTest, NestedSignals) { : "rax", "rdi", "rsi", "rdx", // Clobbered by syscall. "rcx", "r11"); +#elif __aarch64__ + asm volatile( + "mov x8, %0\n" + "mov x0, %1\n" + "mov x1, %2\n" + "mov x2, %3\n" + "svc #0\n" ::"r"(__NR_tgkill), + "r"(pid), "r"(tid), "r"(SIGUSR1)); +#endif uint64_t got; - GET_XMM(got, xmm0); + GET_FP0(got); // // The checks below verifies the following: -- cgit v1.2.3 From 99a18ec8b4cc98694b8bc61a35b360d5fca1a075 Mon Sep 17 00:00:00 2001 From: Zeling Feng Date: Mon, 18 May 2020 11:30:04 -0700 Subject: Support TCP options for packetimpact PiperOrigin-RevId: 312119730 --- test/packetimpact/testbench/BUILD | 1 + test/packetimpact/testbench/layers.go | 13 +++- test/packetimpact/testbench/layers_test.go | 112 +++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/packetimpact/testbench/BUILD b/test/packetimpact/testbench/BUILD index fed51006f..682933067 100644 --- a/test/packetimpact/testbench/BUILD +++ b/test/packetimpact/testbench/BUILD @@ -39,6 +39,7 @@ go_test( library = ":testbench", deps = [ "//pkg/tcpip", + "//pkg/tcpip/header", "@com_github_mohae_deepcopy//:go_default_library", ], ) diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go index 49370377d..1b0e5b8fc 100644 --- a/test/packetimpact/testbench/layers.go +++ b/test/packetimpact/testbench/layers.go @@ -689,6 +689,7 @@ type TCP struct { WindowSize *uint16 Checksum *uint16 UrgentPointer *uint16 + Options []byte } func (l *TCP) String() string { @@ -697,7 +698,7 @@ func (l *TCP) String() string { // ToBytes implements Layer.ToBytes. func (l *TCP) ToBytes() ([]byte, error) { - b := make([]byte, header.TCPMinimumSize) + b := make([]byte, l.length()) h := header.TCP(b) if l.SrcPort != nil { h.SetSourcePort(*l.SrcPort) @@ -727,6 +728,8 @@ func (l *TCP) ToBytes() ([]byte, error) { if l.UrgentPointer != nil { h.SetUrgentPoiner(*l.UrgentPointer) } + copy(b[header.TCPMinimumSize:], l.Options) + header.AddTCPOptionPadding(b[header.TCPMinimumSize:], len(l.Options)) if l.Checksum != nil { h.SetChecksum(*l.Checksum) return h, nil @@ -811,6 +814,7 @@ func parseTCP(b []byte) (Layer, layerParser) { WindowSize: Uint16(h.WindowSize()), Checksum: Uint16(h.Checksum()), UrgentPointer: Uint16(h.UrgentPointer()), + Options: b[header.TCPMinimumSize:h.DataOffset()], } return &tcp, parsePayload } @@ -821,7 +825,12 @@ func (l *TCP) match(other Layer) bool { func (l *TCP) length() int { if l.DataOffset == nil { - return header.TCPMinimumSize + // TCP header including the options must end on a 32-bit + // boundary; the user could potentially give us a slice + // whose length is not a multiple of 4 bytes, so we have + // to do the alignment here. + optlen := (len(l.Options) + 3) & ^3 + return header.TCPMinimumSize + optlen } return int(*l.DataOffset) } diff --git a/test/packetimpact/testbench/layers_test.go b/test/packetimpact/testbench/layers_test.go index 96f72de5b..c7f00e70d 100644 --- a/test/packetimpact/testbench/layers_test.go +++ b/test/packetimpact/testbench/layers_test.go @@ -15,10 +15,13 @@ package testbench import ( + "bytes" + "net" "testing" "github.com/mohae/deepcopy" "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" ) func TestLayerMatch(t *testing.T) { @@ -393,3 +396,112 @@ func TestLayersDiff(t *testing.T) { } } } + +func TestTCPOptions(t *testing.T) { + for _, tt := range []struct { + description string + wantBytes []byte + wantLayers Layers + }{ + { + description: "without payload", + wantBytes: []byte{ + // IPv4 Header + 0x45, 0x00, 0x00, 0x2c, 0x00, 0x01, 0x00, 0x00, 0x40, 0x06, + 0xf9, 0x77, 0xc0, 0xa8, 0x00, 0x02, 0xc0, 0xa8, 0x00, 0x01, + // TCP Header + 0x30, 0x39, 0xd4, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x60, 0x02, 0x20, 0x00, 0xf5, 0x1c, 0x00, 0x00, + // WindowScale Option + 0x03, 0x03, 0x02, + // NOP Option + 0x00, + }, + wantLayers: []Layer{ + &IPv4{ + IHL: Uint8(20), + TOS: Uint8(0), + TotalLength: Uint16(44), + ID: Uint16(1), + Flags: Uint8(0), + FragmentOffset: Uint16(0), + TTL: Uint8(64), + Protocol: Uint8(uint8(header.TCPProtocolNumber)), + Checksum: Uint16(0xf977), + SrcAddr: Address(tcpip.Address(net.ParseIP("192.168.0.2").To4())), + DstAddr: Address(tcpip.Address(net.ParseIP("192.168.0.1").To4())), + }, + &TCP{ + SrcPort: Uint16(12345), + DstPort: Uint16(54321), + SeqNum: Uint32(0), + AckNum: Uint32(0), + Flags: Uint8(header.TCPFlagSyn), + WindowSize: Uint16(8192), + Checksum: Uint16(0xf51c), + UrgentPointer: Uint16(0), + Options: []byte{3, 3, 2, 0}, + }, + &Payload{Bytes: nil}, + }, + }, + { + description: "with payload", + wantBytes: []byte{ + // IPv4 header + 0x45, 0x00, 0x00, 0x37, 0x00, 0x01, 0x00, 0x00, 0x40, 0x06, + 0xf9, 0x6c, 0xc0, 0xa8, 0x00, 0x02, 0xc0, 0xa8, 0x00, 0x01, + // TCP header + 0x30, 0x39, 0xd4, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x60, 0x02, 0x20, 0x00, 0xe5, 0x21, 0x00, 0x00, + // WindowScale Option + 0x03, 0x03, 0x02, + // NOP Option + 0x00, + // Payload: "Sample Data" + 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x20, 0x44, 0x61, 0x74, 0x61, + }, + wantLayers: []Layer{ + &IPv4{ + IHL: Uint8(20), + TOS: Uint8(0), + TotalLength: Uint16(55), + ID: Uint16(1), + Flags: Uint8(0), + FragmentOffset: Uint16(0), + TTL: Uint8(64), + Protocol: Uint8(uint8(header.TCPProtocolNumber)), + Checksum: Uint16(0xf96c), + SrcAddr: Address(tcpip.Address(net.ParseIP("192.168.0.2").To4())), + DstAddr: Address(tcpip.Address(net.ParseIP("192.168.0.1").To4())), + }, + &TCP{ + SrcPort: Uint16(12345), + DstPort: Uint16(54321), + SeqNum: Uint32(0), + AckNum: Uint32(0), + Flags: Uint8(header.TCPFlagSyn), + WindowSize: Uint16(8192), + Checksum: Uint16(0xe521), + UrgentPointer: Uint16(0), + Options: []byte{3, 3, 2, 0}, + }, + &Payload{Bytes: []byte("Sample Data")}, + }, + }, + } { + t.Run(tt.description, func(t *testing.T) { + layers := parse(parseIPv4, tt.wantBytes) + if !layers.match(tt.wantLayers) { + t.Fatalf("match failed with diff: %s", layers.diff(tt.wantLayers)) + } + gotBytes, err := layers.ToBytes() + if err != nil { + t.Fatalf("ToBytes() failed on %s: %s", &layers, err) + } + if !bytes.Equal(tt.wantBytes, gotBytes) { + t.Fatalf("mismatching bytes, gotBytes: %x, wantBytes: %x", gotBytes, tt.wantBytes) + } + }) + } +} -- cgit v1.2.3 From d06de1bede9c40078fe674a6df4849fe3322a861 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Tue, 19 May 2020 11:40:15 -0700 Subject: Fix flaky udp tests by polling before reading. On native Linux, calling recv/read right after send/write sometimes returns EWOULDBLOCK, if the data has not made it to the receiving socket (even though the endpoints are on the same host). Poll before reading to avoid this. Making this change also uncovered a hostinet bug (gvisor.dev/issue/2726), which is noted in this CL. PiperOrigin-RevId: 312320587 --- pkg/sentry/socket/hostinet/socket.go | 2 +- test/syscalls/linux/udp_socket_test_cases.cc | 48 +++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go index b49433326..c11e82c10 100644 --- a/pkg/sentry/socket/hostinet/socket.go +++ b/pkg/sentry/socket/hostinet/socket.go @@ -555,7 +555,7 @@ func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []b if uint64(src.NumBytes()) != srcs.NumBytes() { return 0, nil } - if srcs.IsEmpty() { + if srcs.IsEmpty() && len(controlBuf) == 0 { return 0, nil } diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc index 740c7986d..4a3ae5f3f 100644 --- a/test/syscalls/linux/udp_socket_test_cases.cc +++ b/test/syscalls/linux/udp_socket_test_cases.cc @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -673,6 +674,11 @@ TEST_P(UdpSocketTest, ZerolengthWriteAllowed) { char buf[3]; // Send zero length packet from s_ to t_. ASSERT_THAT(write(s_, buf, 0), SyscallSucceedsWithValue(0)); + + struct pollfd pfd = {t_, POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), + SyscallSucceedsWithValue(1)); + // Receive the packet. char received[3]; EXPECT_THAT(read(t_, received, sizeof(received)), @@ -698,6 +704,11 @@ TEST_P(UdpSocketTest, ZerolengthWriteAllowedNonBlockRead) { char buf[3]; // Send zero length packet from s_ to t_. ASSERT_THAT(write(s_, buf, 0), SyscallSucceedsWithValue(0)); + + struct pollfd pfd = {t_, POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), + SyscallSucceedsWithValue(1)); + // Receive the packet. char received[3]; EXPECT_THAT(read(t_, received, sizeof(received)), @@ -859,6 +870,10 @@ TEST_P(UdpSocketTest, ReadShutdownNonblockPendingData) { EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); + struct pollfd pfd = {s_, POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), + SyscallSucceedsWithValue(1)); + // We should get the data even though read has been shutdown. EXPECT_THAT(recv(s_, received, 2, 0), SyscallSucceedsWithValue(2)); @@ -1112,6 +1127,10 @@ TEST_P(UdpSocketTest, FIONREADWriteShutdown) { ASSERT_THAT(send(s_, str, sizeof(str), 0), SyscallSucceedsWithValue(sizeof(str))); + struct pollfd pfd = {s_, POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), + SyscallSucceedsWithValue(1)); + n = -1; EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); EXPECT_EQ(n, sizeof(str)); @@ -1123,7 +1142,7 @@ TEST_P(UdpSocketTest, FIONREADWriteShutdown) { EXPECT_EQ(n, sizeof(str)); } -TEST_P(UdpSocketTest, Fionread) { +TEST_P(UdpSocketTest, FIONREAD) { // Bind s_ to loopback:TestPort. ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); @@ -1138,10 +1157,14 @@ TEST_P(UdpSocketTest, Fionread) { char buf[3 * psize]; RandomizeBuffer(buf, sizeof(buf)); + struct pollfd pfd = {s_, POLLIN, 0}; for (int i = 0; i < 3; ++i) { ASSERT_THAT(sendto(t_, buf + i * psize, psize, 0, addr_[0], addrlen_), SyscallSucceedsWithValue(psize)); + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), + SyscallSucceedsWithValue(1)); + // Check that regardless of how many packets are in the queue, the size // reported is that of a single packet. n = -1; @@ -1165,10 +1188,18 @@ TEST_P(UdpSocketTest, FIONREADZeroLengthPacket) { char buf[3 * psize]; RandomizeBuffer(buf, sizeof(buf)); + struct pollfd pfd = {s_, POLLIN, 0}; for (int i = 0; i < 3; ++i) { ASSERT_THAT(sendto(t_, buf + i * psize, 0, 0, addr_[0], addrlen_), SyscallSucceedsWithValue(0)); + // TODO(gvisor.dev/issue/2726): sending a zero-length message to a hostinet + // socket does not cause a poll event to be triggered. + if (!IsRunningWithHostinet()) { + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), + SyscallSucceedsWithValue(1)); + } + // Check that regardless of how many packets are in the queue, the size // reported is that of a single packet. n = -1; @@ -1235,6 +1266,10 @@ TEST_P(UdpSocketTest, SoTimestamp) { // Send zero length packet from t_ to s_. ASSERT_THAT(RetryEINTR(write)(t_, buf, 0), SyscallSucceedsWithValue(0)); + struct pollfd pfd = {s_, POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), + SyscallSucceedsWithValue(1)); + char cmsgbuf[CMSG_SPACE(sizeof(struct timeval))]; msghdr msg; memset(&msg, 0, sizeof(msg)); @@ -1278,6 +1313,10 @@ TEST_P(UdpSocketTest, TimestampIoctl) { ASSERT_THAT(RetryEINTR(write)(t_, buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); + struct pollfd pfd = {s_, POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), + SyscallSucceedsWithValue(1)); + // There should be no control messages. char recv_buf[sizeof(buf)]; ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(s_, recv_buf, sizeof(recv_buf))); @@ -1315,6 +1354,10 @@ TEST_P(UdpSocketTest, TimestampIoctlPersistence) { SyscallSucceedsWithValue(sizeof(buf))); ASSERT_THAT(RetryEINTR(write)(t_, buf, 0), SyscallSucceedsWithValue(0)); + struct pollfd pfd = {s_, POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), + SyscallSucceedsWithValue(1)); + // There should be no control messages. char recv_buf[sizeof(buf)]; ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(s_, recv_buf, sizeof(recv_buf))); @@ -1330,6 +1373,9 @@ TEST_P(UdpSocketTest, TimestampIoctlPersistence) { SyscallSucceeds()); ASSERT_THAT(RetryEINTR(write)(t_, buf, 0), SyscallSucceedsWithValue(0)); + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), + SyscallSucceedsWithValue(1)); + // There should be a message for SO_TIMESTAMP. char cmsgbuf[CMSG_SPACE(sizeof(struct timeval))]; msghdr msg = {}; -- cgit v1.2.3 From 064347afdf6206df73571684a3a75dde63584ad5 Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Tue, 19 May 2020 11:51:28 -0700 Subject: Skip itimer "fairness" tests on ptrace. With additional logging, the issue described by the new comment looks like: D0518 21:28:08.416810 6777 task_signals.go:459] [ 8] Notified of signal 27 D0518 21:28:08.416852 6777 task_block.go:223] [ 8] Interrupt queued D0518 21:28:08.417013 6777 task_run.go:250] [ 8] Switching to sentry D0518 21:28:08.417033 6777 task_signals.go:220] [ 8] Signal 27: delivering to handler D0518 21:28:08.417127 6777 task_run.go:248] [ 8] Switching to app D0518 21:28:08.443765 6777 task_signals.go:519] [ 8] Refusing masked signal 27 // ED: note the ~26ms elapsed since TID 8 "switched to app" D0518 21:28:08.443814 6777 task_signals.go:465] [ 6] Notified of group signal 27 D0518 21:28:08.443832 6777 task_block.go:223] [ 6] Interrupt queued D0518 21:28:08.443914 6777 task_block.go:223] [ 6] Interrupt queued D0518 21:28:08.443859 6777 task_run.go:250] [ 8] Switching to sentry I0518 21:28:08.443936 6777 strace.go:576] [ 8] exe E rt_sigreturn() Slow context switches on ptrace are probably due to kernel scheduling delays. Slow context switches on KVM are less clear, so leave that bug and TODO open. PiperOrigin-RevId: 312322782 --- test/syscalls/linux/itimer.cc | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/itimer.cc b/test/syscalls/linux/itimer.cc index dd981a278..e397d5f57 100644 --- a/test/syscalls/linux/itimer.cc +++ b/test/syscalls/linux/itimer.cc @@ -267,8 +267,19 @@ int TestSIGPROFFairness(absl::Duration sleep) { // Random save/restore is disabled as it introduces additional latency and // unpredictable distribution patterns. TEST(ItimerTest, DeliversSIGPROFToThreadsRoughlyFairlyActive_NoRandomSave) { - // TODO(b/143247272): CPU time accounting is inaccurate for the KVM platform. - SKIP_IF(GvisorPlatform() == Platform::kKVM); + // On the KVM and ptrace platforms, switches between sentry and application + // context are sometimes extremely slow, causing the itimer to send SIGPROF to + // a thread that either already has one pending or has had SIGPROF delivered, + // but hasn't handled it yet (and thus therefore still has SIGPROF masked). In + // either case, since itimer signals are group-directed, signal sending falls + // back to notifying the thread group leader. ItimerSignalTest() fails if "too + // many" signals are delivered to the thread group leader, so these tests are + // flaky on these platforms. + // + // TODO(b/143247272): Clarify why context switches are so slow on KVM. + const auto gvisor_platform = GvisorPlatform(); + SKIP_IF(gvisor_platform == Platform::kKVM || + gvisor_platform == Platform::kPtrace); pid_t child; int execve_errno; @@ -291,8 +302,10 @@ TEST(ItimerTest, DeliversSIGPROFToThreadsRoughlyFairlyActive_NoRandomSave) { // Random save/restore is disabled as it introduces additional latency and // unpredictable distribution patterns. TEST(ItimerTest, DeliversSIGPROFToThreadsRoughlyFairlyIdle_NoRandomSave) { - // TODO(b/143247272): CPU time accounting is inaccurate for the KVM platform. - SKIP_IF(GvisorPlatform() == Platform::kKVM); + // See comment in DeliversSIGPROFToThreadsRoughlyFairlyActive. + const auto gvisor_platform = GvisorPlatform(); + SKIP_IF(gvisor_platform == Platform::kKVM || + gvisor_platform == Platform::kPtrace); pid_t child; int execve_errno; -- cgit v1.2.3 From 7f8172edf583e0d26bee5e06578a442c7507ba6f Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Tue, 5 May 2020 18:06:46 -0700 Subject: Restructure shim packages for merging. --- .gitignore | 1 - .travis.yml | 29 - CODEOWNERS | 1 - CONTRIBUTING.md | 28 - LICENSE | 202 ------- Makefile | 42 -- README.md | 26 - cmd/containerd-shim-runsc-v1/main.go | 24 - cmd/gvisor-containerd-shim/config.go | 42 -- cmd/gvisor-containerd-shim/main.go | 320 ---------- docs/README.md | 9 - docs/configure-containerd-shim-runsc-v1.md | 72 --- docs/configure-gvisor-containerd-shim.md | 42 -- docs/runtime-handler-quickstart.md | 251 -------- docs/runtime-handler-shim-v2-quickstart.md | 232 -------- docs/untrusted-workload-quickstart.md | 212 ------- go.mod | 35 -- go.sum | 67 --- pkg/go-runsc/runsc.go | 513 ---------------- pkg/go-runsc/utils.go | 46 -- pkg/shim/runsc/runsc.go | 511 ++++++++++++++++ pkg/shim/runsc/utils.go | 44 ++ pkg/shim/v1/proc/deleted_state.go | 49 ++ pkg/shim/v1/proc/exec.go | 282 +++++++++ pkg/shim/v1/proc/exec_state.go | 154 +++++ pkg/shim/v1/proc/init.go | 462 ++++++++++++++ pkg/shim/v1/proc/init_state.go | 182 ++++++ pkg/shim/v1/proc/io.go | 167 ++++++ pkg/shim/v1/proc/process.go | 37 ++ pkg/shim/v1/proc/types.go | 70 +++ pkg/shim/v1/proc/utils.go | 92 +++ pkg/shim/v1/shim/platform.go | 110 ++++ pkg/shim/v1/shim/service.go | 579 ++++++++++++++++++ pkg/shim/v1/utils/utils.go | 57 ++ pkg/shim/v1/utils/volumes.go | 156 +++++ pkg/shim/v1/utils/volumes_test.go | 309 ++++++++++ pkg/shim/v2/epoll.go | 128 ++++ pkg/shim/v2/options/options.go | 33 + pkg/shim/v2/service.go | 821 +++++++++++++++++++++++++ pkg/shim/v2/service_linux.go | 112 ++++ pkg/v1/proc/deleted_state.go | 54 -- pkg/v1/proc/exec.go | 284 --------- pkg/v1/proc/exec_state.go | 156 ----- pkg/v1/proc/init.go | 464 --------------- pkg/v1/proc/init_state.go | 184 ------ pkg/v1/proc/io.go | 169 ------ pkg/v1/proc/process.go | 39 -- pkg/v1/proc/types.go | 72 --- pkg/v1/proc/utils.go | 94 --- pkg/v1/shim/platform.go | 112 ---- pkg/v1/shim/service.go | 581 ------------------ pkg/v1/utils/utils.go | 59 -- pkg/v1/utils/volumes.go | 158 ----- pkg/v1/utils/volumes_test.go | 311 ---------- pkg/v2/epoll.go | 129 ---- pkg/v2/options/options.go | 34 -- pkg/v2/service.go | 826 -------------------------- pkg/v2/service_linux.go | 111 ---- shim/README.md | 16 + shim/configure-containerd-shim-runsc-v1.md | 72 +++ shim/configure-gvisor-containerd-shim.md | 42 ++ shim/runtime-handler-quickstart.md | 251 ++++++++ shim/runtime-handler-shim-v2-quickstart.md | 232 ++++++++ shim/untrusted-workload-quickstart.md | 212 +++++++ shim/v1/main.go | 26 + shim/v2/config.go | 40 ++ shim/v2/main.go | 318 ++++++++++ test/e2e/containerd-install.sh | 44 -- test/e2e/crictl-install.sh | 17 - test/e2e/run-container.sh | 30 - test/e2e/runsc-install.sh | 8 - test/e2e/runtime-handler-shim-v2/install.sh | 21 - test/e2e/runtime-handler-shim-v2/test.sh | 34 -- test/e2e/runtime-handler-shim-v2/validate.sh | 7 - test/e2e/runtime-handler/install.sh | 24 - test/e2e/runtime-handler/test.sh | 33 - test/e2e/runtime-handler/usage.sh | 30 - test/e2e/runtimeclass-install.sh | 33 - test/e2e/shim-install.sh | 28 - test/e2e/untrusted-workload/install.sh | 27 - test/e2e/untrusted-workload/test.sh | 33 - test/e2e/untrusted-workload/usage.sh | 33 - test/e2e/validate.sh | 17 - test/shim/containerd-install.sh | 44 ++ test/shim/crictl-install.sh | 17 + test/shim/run-container.sh | 30 + test/shim/runsc-install.sh | 8 + test/shim/runtime-handler-shim-v2/install.sh | 21 + test/shim/runtime-handler-shim-v2/test.sh | 34 ++ test/shim/runtime-handler-shim-v2/validate.sh | 7 + test/shim/runtime-handler/install.sh | 24 + test/shim/runtime-handler/test.sh | 33 + test/shim/runtime-handler/usage.sh | 30 + test/shim/runtimeclass-install.sh | 33 + test/shim/shim-install.sh | 28 + test/shim/untrusted-workload/install.sh | 27 + test/shim/untrusted-workload/test.sh | 33 + test/shim/untrusted-workload/usage.sh | 33 + test/shim/validate.sh | 17 + 99 files changed, 5983 insertions(+), 6450 deletions(-) delete mode 100644 .gitignore delete mode 100644 .travis.yml delete mode 100644 CODEOWNERS delete mode 100644 CONTRIBUTING.md delete mode 100644 LICENSE delete mode 100644 Makefile delete mode 100644 README.md delete mode 100644 cmd/containerd-shim-runsc-v1/main.go delete mode 100644 cmd/gvisor-containerd-shim/config.go delete mode 100644 cmd/gvisor-containerd-shim/main.go delete mode 100644 docs/README.md delete mode 100644 docs/configure-containerd-shim-runsc-v1.md delete mode 100644 docs/configure-gvisor-containerd-shim.md delete mode 100644 docs/runtime-handler-quickstart.md delete mode 100644 docs/runtime-handler-shim-v2-quickstart.md delete mode 100644 docs/untrusted-workload-quickstart.md delete mode 100644 go.mod delete mode 100644 go.sum delete mode 100644 pkg/go-runsc/runsc.go delete mode 100644 pkg/go-runsc/utils.go create mode 100644 pkg/shim/runsc/runsc.go create mode 100644 pkg/shim/runsc/utils.go create mode 100644 pkg/shim/v1/proc/deleted_state.go create mode 100644 pkg/shim/v1/proc/exec.go create mode 100644 pkg/shim/v1/proc/exec_state.go create mode 100644 pkg/shim/v1/proc/init.go create mode 100644 pkg/shim/v1/proc/init_state.go create mode 100644 pkg/shim/v1/proc/io.go create mode 100644 pkg/shim/v1/proc/process.go create mode 100644 pkg/shim/v1/proc/types.go create mode 100644 pkg/shim/v1/proc/utils.go create mode 100644 pkg/shim/v1/shim/platform.go create mode 100644 pkg/shim/v1/shim/service.go create mode 100644 pkg/shim/v1/utils/utils.go create mode 100644 pkg/shim/v1/utils/volumes.go create mode 100644 pkg/shim/v1/utils/volumes_test.go create mode 100644 pkg/shim/v2/epoll.go create mode 100644 pkg/shim/v2/options/options.go create mode 100644 pkg/shim/v2/service.go create mode 100644 pkg/shim/v2/service_linux.go delete mode 100644 pkg/v1/proc/deleted_state.go delete mode 100644 pkg/v1/proc/exec.go delete mode 100644 pkg/v1/proc/exec_state.go delete mode 100644 pkg/v1/proc/init.go delete mode 100644 pkg/v1/proc/init_state.go delete mode 100644 pkg/v1/proc/io.go delete mode 100644 pkg/v1/proc/process.go delete mode 100644 pkg/v1/proc/types.go delete mode 100644 pkg/v1/proc/utils.go delete mode 100644 pkg/v1/shim/platform.go delete mode 100644 pkg/v1/shim/service.go delete mode 100644 pkg/v1/utils/utils.go delete mode 100644 pkg/v1/utils/volumes.go delete mode 100644 pkg/v1/utils/volumes_test.go delete mode 100644 pkg/v2/epoll.go delete mode 100644 pkg/v2/options/options.go delete mode 100644 pkg/v2/service.go delete mode 100644 pkg/v2/service_linux.go create mode 100644 shim/README.md create mode 100644 shim/configure-containerd-shim-runsc-v1.md create mode 100644 shim/configure-gvisor-containerd-shim.md create mode 100644 shim/runtime-handler-quickstart.md create mode 100644 shim/runtime-handler-shim-v2-quickstart.md create mode 100644 shim/untrusted-workload-quickstart.md create mode 100644 shim/v1/main.go create mode 100644 shim/v2/config.go create mode 100644 shim/v2/main.go delete mode 100755 test/e2e/containerd-install.sh delete mode 100755 test/e2e/crictl-install.sh delete mode 100755 test/e2e/run-container.sh delete mode 100755 test/e2e/runsc-install.sh delete mode 100755 test/e2e/runtime-handler-shim-v2/install.sh delete mode 100755 test/e2e/runtime-handler-shim-v2/test.sh delete mode 100755 test/e2e/runtime-handler-shim-v2/validate.sh delete mode 100755 test/e2e/runtime-handler/install.sh delete mode 100755 test/e2e/runtime-handler/test.sh delete mode 100755 test/e2e/runtime-handler/usage.sh delete mode 100755 test/e2e/runtimeclass-install.sh delete mode 100755 test/e2e/shim-install.sh delete mode 100755 test/e2e/untrusted-workload/install.sh delete mode 100755 test/e2e/untrusted-workload/test.sh delete mode 100755 test/e2e/untrusted-workload/usage.sh delete mode 100755 test/e2e/validate.sh create mode 100755 test/shim/containerd-install.sh create mode 100755 test/shim/crictl-install.sh create mode 100755 test/shim/run-container.sh create mode 100755 test/shim/runsc-install.sh create mode 100755 test/shim/runtime-handler-shim-v2/install.sh create mode 100755 test/shim/runtime-handler-shim-v2/test.sh create mode 100755 test/shim/runtime-handler-shim-v2/validate.sh create mode 100755 test/shim/runtime-handler/install.sh create mode 100755 test/shim/runtime-handler/test.sh create mode 100755 test/shim/runtime-handler/usage.sh create mode 100755 test/shim/runtimeclass-install.sh create mode 100755 test/shim/shim-install.sh create mode 100755 test/shim/untrusted-workload/install.sh create mode 100755 test/shim/untrusted-workload/test.sh create mode 100755 test/shim/untrusted-workload/usage.sh create mode 100755 test/shim/validate.sh (limited to 'test') diff --git a/.gitignore b/.gitignore deleted file mode 100644 index ae3c17260..000000000 --- a/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/bin/ diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index d9ef5e33f..000000000 --- a/.travis.yml +++ /dev/null @@ -1,29 +0,0 @@ -dist: bionic -sudo: required -language: go -go: "1.14.x" - -env: - - CONTAINERD_VERSION=1.1.8 RUNSC_VERSION=release/20200219.0 TEST=untrusted-workload - - CONTAINERD_VERSION=1.2.13 RUNSC_VERSION=release/20200219.0 TEST=untrusted-workload - - CONTAINERD_VERSION=1.2.13 RUNSC_VERSION=release/20200219.0 TEST=runtime-handler - - CONTAINERD_VERSION=1.2.13 RUNSC_VERSION=release/20200219.0 TEST=runtime-handler-shim-v2 - - CONTAINERD_VERSION=1.3.3 RUNSC_VERSION=release/20200219.0 TEST=runtime-handler - - CONTAINERD_VERSION=1.3.3 RUNSC_VERSION=release/20200219.0 TEST=runtime-handler-shim-v2 - -go_import_path: github.com/google/gvisor-containerd-shim - -addons: - apt: - packages: - - socat - - conntrack - - ipset - - libseccomp-dev - -before_install: - - uname -r - -script: - - make test - - ./test/e2e/${TEST}/test.sh diff --git a/CODEOWNERS b/CODEOWNERS deleted file mode 100644 index 7bf84e6dd..000000000 --- a/CODEOWNERS +++ /dev/null @@ -1 +0,0 @@ -* @google/gvisor diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 939e5341e..000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,28 +0,0 @@ -# How to Contribute - -We'd love to accept your patches and contributions to this project. There are -just a few small guidelines you need to follow. - -## Contributor License Agreement - -Contributions to this project must be accompanied by a Contributor License -Agreement. You (or your employer) retain the copyright to your contribution; -this simply gives us permission to use and redistribute your contributions as -part of the project. Head over to to see -your current agreements on file or to sign a new one. - -You generally only need to submit a CLA once, so if you've already submitted one -(even if it was for a different project), you probably don't need to do it -again. - -## Code reviews - -All submissions, including submissions by project members, require review. We -use GitHub pull requests for this purpose. Consult -[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more -information on using pull requests. - -## Community Guidelines - -This project follows [Google's Open Source Community -Guidelines](https://opensource.google.com/conduct/). diff --git a/LICENSE b/LICENSE deleted file mode 100644 index d64569567..000000000 --- a/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/Makefile b/Makefile deleted file mode 100644 index abfadc1b5..000000000 --- a/Makefile +++ /dev/null @@ -1,42 +0,0 @@ -# Base path used to install. -DESTDIR=/usr/local -GC=go -GO_BUILD_FLAGS= -GO_TAGS= -GO_LDFLAGS=-ldflags '-s -w -extldflags "-static"' -SOURCES=$(shell find cmd/ pkg/ -name '*.go') -DEPLOY_PATH=cri-containerd-staging/gvisor-containerd-shim -VERSION=$(shell git rev-parse HEAD) -GO_MODULE=on - -all: binaries - -binaries: bin/gvisor-containerd-shim bin/containerd-shim-runsc-v1 - -bin/gvisor-containerd-shim: $(SOURCES) - GO111MODULE=${GO_MODULE} CGO_ENABLED=0 ${GC} build ${GO_BUILD_FLAGS} -o bin/gvisor-containerd-shim ${SHIM_GO_LDFLAGS} ${GO_TAGS} ./cmd/gvisor-containerd-shim - -bin/containerd-shim-runsc-v1: $(SOURCES) - GO111MODULE=${GO_MODULE} CGO_ENABLED=0 ${GC} build ${GO_BUILD_FLAGS} -o bin/containerd-shim-runsc-v1 ${SHIM_GO_LDFLAGS} ${GO_TAGS} ./cmd/containerd-shim-runsc-v1 - -install: bin/gvisor-containerd-shim - mkdir -p $(DESTDIR)/bin - install bin/gvisor-containerd-shim $(DESTDIR)/bin - install bin/containerd-shim-runsc-v1 $(DESTDIR)/bin - -uninstall: - rm -f $(DESTDIR)/bin/gvisor-containerd-shim - rm -f $(DESTDIR)/bin/containerd-shim-runsc-v1 - -clean: - rm -rf bin/* - -push: binaries - gsutil cp ./bin/gvisor-containerd-shim gs://$(DEPLOY_PATH)/gvisor-containerd-shim-$(VERSION) - gsutil cp ./bin/containerd-shim-runsc-v1 gs://$(DEPLOY_PATH)/containerd-shim-runsc-v1-$(VERSION) - echo "$(VERSION)" | gsutil cp - "gs://$(DEPLOY_PATH)/latest" - -.PHONY: test - -test: - GO111MODULE=${GO_MODULE} CGO_ENABLED=0 ${GC} test -v ./pkg/... ${SHIM_GO_LDFLAGS} ${GO_TAGS} diff --git a/README.md b/README.md deleted file mode 100644 index 40a474c4f..000000000 --- a/README.md +++ /dev/null @@ -1,26 +0,0 @@ -# gvisor-containerd-shim - -[![Build Status](https://travis-ci.org/google/gvisor-containerd-shim.svg?branch=master)](https://travis-ci.org/google/gvisor-containerd-shim) -[![Go Report Card](https://goreportcard.com/badge/github.com/google/gvisor-containerd-shim)](https://goreportcard.com/report/github.com/google/gvisor-containerd-shim) - -gvisor-containerd-shim is a containerd shim for [gVisor](https://github.com/google/gvisor/). It implements the containerd v1 shim API. It can be used as a drop-in replacement for [containerd-shim](https://github.com/containerd/containerd/tree/master/cmd/containerd-shim) (though containerd-shim must still be installed). It allows the use of both gVisor (runsc) and normal containers in the same containerd installation by deferring to the runc shim if the desired runtime engine is not runsc. - -## Requirements - -- gvisor (runsc) >= 2018-12-07 -- containerd, containerd-shim >= 1.1 - -## Installation - -- [Untrusted Workload Quick Start (containerd >=1.1)](docs/untrusted-workload-quickstart.md) -- [Runtime Handler/RuntimeClass Quick Start (containerd >=1.2)](docs/runtime-handler-quickstart.md) -- [Runtime Handler/RuntimeClass Quick Start (shim v2) (containerd >=1.2)](docs/runtime-handler-shim-v2-quickstart.md) - -## Configuration - -- [Configure containerd-shim-runsc-v1 (shim v2) (containerd >= 1.3)](docs/configure-containerd-shim-runsc-v1.md) -- [Configure gvisor-containerd-shim (shim v1) (containerd <= 1.2)](docs/configure-gvisor-containerd-shim.md) - -# Contributing - -See [CONTRIBUTING.md](CONTRIBUTING.md). diff --git a/cmd/containerd-shim-runsc-v1/main.go b/cmd/containerd-shim-runsc-v1/main.go deleted file mode 100644 index ec73edbd2..000000000 --- a/cmd/containerd-shim-runsc-v1/main.go +++ /dev/null @@ -1,24 +0,0 @@ -/* - Copyright The containerd Authors. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package main - -import ( - "github.com/containerd/containerd/runtime/v2/shim" - - runsc "github.com/google/gvisor-containerd-shim/pkg/v2" -) - -func main() { - shim.Run("io.containerd.runsc.v1", runsc.New) -} diff --git a/cmd/gvisor-containerd-shim/config.go b/cmd/gvisor-containerd-shim/config.go deleted file mode 100644 index f03690304..000000000 --- a/cmd/gvisor-containerd-shim/config.go +++ /dev/null @@ -1,42 +0,0 @@ -/* -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package main - -import "github.com/BurntSushi/toml" - -// config is the configuration for gvisor containerd shim. -type config struct { - // RuncShim is the shim binary path for standard containerd-shim for runc. - // When the runtime is `runc`, gvisor containerd shim will exec current - // process to standard containerd-shim. This is a work around for containerd - // 1.1. In containerd 1.2, containerd will choose different containerd-shims - // based on runtime. - RuncShim string `toml:"runc_shim"` - // RunscConfig is configuration for runsc. The key value will be converted - // to runsc flags --key=value directly. - RunscConfig map[string]string `toml:"runsc_config"` -} - -// loadConfig load gvisor containerd shim config from config file. -func loadConfig(path string) (*config, error) { - var c config - _, err := toml.DecodeFile(path, &c) - if err != nil { - return &c, err - } - return &c, nil -} diff --git a/cmd/gvisor-containerd-shim/main.go b/cmd/gvisor-containerd-shim/main.go deleted file mode 100644 index ea26aa105..000000000 --- a/cmd/gvisor-containerd-shim/main.go +++ /dev/null @@ -1,320 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package main - -import ( - "bytes" - "context" - "flag" - "fmt" - "net" - "os" - "os/exec" - "os/signal" - "path/filepath" - "runtime" - "runtime/debug" - "strings" - "sync" - "syscall" - "time" - - "github.com/containerd/containerd/events" - "github.com/containerd/containerd/namespaces" - "github.com/containerd/containerd/runtime/v1/linux/proc" - containerdshim "github.com/containerd/containerd/runtime/v1/shim" - shimapi "github.com/containerd/containerd/runtime/v1/shim/v1" - "github.com/containerd/ttrpc" - "github.com/containerd/typeurl" - ptypes "github.com/gogo/protobuf/types" - "github.com/opencontainers/runc/libcontainer/system" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" - - runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" - "github.com/google/gvisor-containerd-shim/pkg/v1/shim" -) - -var ( - debugFlag bool - namespaceFlag string - socketFlag string - addressFlag string - workdirFlag string - runtimeRootFlag string - containerdBinaryFlag string - shimConfigFlag string -) - -// ShimConfigPath is the default shim config file path. -const ShimConfigPath = "/etc/containerd/gvisor-containerd-shim.toml" - -func init() { - flag.BoolVar(&debugFlag, "debug", false, "enable debug output in logs") - flag.StringVar(&namespaceFlag, "namespace", "", "namespace that owns the shim") - flag.StringVar(&socketFlag, "socket", "", "abstract socket path to serve") - flag.StringVar(&addressFlag, "address", "", "grpc address back to main containerd") - flag.StringVar(&workdirFlag, "workdir", "", "path used to storge large temporary data") - // Containerd default to runc, unless another runtime is explicitly specified. - // We keep the same default to make the default behavior consistent. - flag.StringVar(&runtimeRootFlag, "runtime-root", proc.RuncRoot, "root directory for the runtime") - // currently, the `containerd publish` utility is embedded in the daemon binary. - // The daemon invokes `containerd-shim -containerd-binary ...` with its own os.Executable() path. - flag.StringVar(&containerdBinaryFlag, "containerd-binary", "containerd", "path to containerd binary (used for `containerd publish`)") - flag.StringVar(&shimConfigFlag, "config", ShimConfigPath, "path to the shim configuration file") - flag.Parse() -} - -func main() { - // This is a hack. Exec current process to run standard containerd-shim - // if runtime root is not `runsc`. We don't need this for shim v2 api. - if filepath.Base(runtimeRootFlag) != "runsc" { - if err := executeRuncShim(); err != nil { - fmt.Fprintf(os.Stderr, "gvisor-containerd-shim: %s\n", err) - os.Exit(1) - } - } - - debug.SetGCPercent(40) - go func() { - for range time.Tick(30 * time.Second) { - debug.FreeOSMemory() - } - }() - - if debugFlag { - logrus.SetLevel(logrus.DebugLevel) - } - - if os.Getenv("GOMAXPROCS") == "" { - // If GOMAXPROCS hasn't been set, we default to a value of 2 to reduce - // the number of Go stacks present in the shim. - runtime.GOMAXPROCS(2) - } - - // Run regular shim if needed. - if err := executeShim(); err != nil { - fmt.Fprintf(os.Stderr, "gvisor-containerd-shim: %s\n", err) - os.Exit(1) - } -} - -// executeRuncShim execs current process to a containerd-shim process and -// retains all flags and envs. -func executeRuncShim() error { - c, err := loadConfig(shimConfigFlag) - if err != nil && !os.IsNotExist(err) { - return errors.Wrap(err, "failed to load shim config") - } - shimPath := c.RuncShim - if shimPath == "" { - shimPath, err = exec.LookPath("containerd-shim") - if err != nil { - return errors.Wrapf(err, "lookup containerd-shim") - } - } - - args := append([]string{shimPath}, os.Args[1:]...) - if err := syscall.Exec(shimPath, args, os.Environ()); err != nil { - return errors.Wrapf(err, "exec containerd-shim %q", shimPath) - } - return nil -} - -func executeShim() error { - // start handling signals as soon as possible so that things are properly reaped - // or if runtime exits before we hit the handler - signals, err := setupSignals() - if err != nil { - return err - } - dump := make(chan os.Signal, 32) - signal.Notify(dump, syscall.SIGUSR1) - - path, err := os.Getwd() - if err != nil { - return err - } - server, err := ttrpc.NewServer(ttrpc.WithServerHandshaker(ttrpc.UnixSocketRequireSameUser())) - if err != nil { - return errors.Wrap(err, "failed creating server") - } - c, err := loadConfig(shimConfigFlag) - if err != nil && !os.IsNotExist(err) { - return errors.Wrap(err, "failed to load shim config") - } - sv, err := shim.NewService( - shim.Config{ - Path: path, - Namespace: namespaceFlag, - WorkDir: workdirFlag, - RuntimeRoot: runtimeRootFlag, - RunscConfig: c.RunscConfig, - }, - &remoteEventsPublisher{address: addressFlag}, - ) - if err != nil { - return err - } - logrus.Debug("registering ttrpc server") - shimapi.RegisterShimService(server, sv) - - socket := socketFlag - if err := serve(server, socket); err != nil { - return err - } - logger := logrus.WithFields(logrus.Fields{ - "pid": os.Getpid(), - "path": path, - "namespace": namespaceFlag, - }) - go func() { - for range dump { - dumpStacks(logger) - } - }() - return handleSignals(logger, signals, server, sv) -} - -// serve serves the ttrpc API over a unix socket at the provided path -// this function does not block -func serve(server *ttrpc.Server, path string) error { - var ( - l net.Listener - err error - ) - if path == "" { - l, err = net.FileListener(os.NewFile(3, "socket")) - path = "[inherited from parent]" - } else { - if len(path) > 106 { - return errors.Errorf("%q: unix socket path too long (> 106)", path) - } - l, err = net.Listen("unix", "\x00"+path) - } - if err != nil { - return err - } - logrus.WithField("socket", path).Debug("serving api on unix socket") - go func() { - defer l.Close() - if err := server.Serve(context.Background(), l); err != nil && - !strings.Contains(err.Error(), "use of closed network connection") { - logrus.WithError(err).Fatal("gvisor-containerd-shim: ttrpc server failure") - } - }() - return nil -} - -// setupSignals creates a new signal handler for all signals and sets the shim as a -// sub-reaper so that the container processes are reparented -func setupSignals() (chan os.Signal, error) { - signals := make(chan os.Signal, 32) - signal.Notify(signals, unix.SIGTERM, unix.SIGINT, unix.SIGCHLD, unix.SIGPIPE) - // make sure runc is setup to use the monitor - // for waiting on processes - // TODO(random-liu): Move shim/reaper.go to a separate package. - runsc.Monitor = containerdshim.Default - // set the shim as the subreaper for all orphaned processes created by the container - if err := system.SetSubreaper(1); err != nil { - return nil, err - } - return signals, nil -} - -func handleSignals(logger *logrus.Entry, signals chan os.Signal, server *ttrpc.Server, sv *shim.Service) error { - var ( - termOnce sync.Once - done = make(chan struct{}) - ) - - for { - select { - case <-done: - return nil - case s := <-signals: - switch s { - case unix.SIGCHLD: - if err := containerdshim.Reap(); err != nil { - logger.WithError(err).Error("reap exit status") - } - case unix.SIGTERM, unix.SIGINT: - go termOnce.Do(func() { - ctx := context.TODO() - if err := server.Shutdown(ctx); err != nil { - logger.WithError(err).Error("failed to shutdown server") - } - // Ensure our child is dead if any - sv.Kill(ctx, &shimapi.KillRequest{ - Signal: uint32(syscall.SIGKILL), - All: true, - }) - sv.Delete(context.Background(), &ptypes.Empty{}) - close(done) - }) - case unix.SIGPIPE: - } - } - } -} - -func dumpStacks(logger *logrus.Entry) { - var ( - buf []byte - stackSize int - ) - bufferLen := 16384 - for stackSize == len(buf) { - buf = make([]byte, bufferLen) - stackSize = runtime.Stack(buf, true) - bufferLen *= 2 - } - buf = buf[:stackSize] - logger.Infof("=== BEGIN goroutine stack dump ===\n%s\n=== END goroutine stack dump ===", buf) -} - -type remoteEventsPublisher struct { - address string -} - -func (l *remoteEventsPublisher) Publish(ctx context.Context, topic string, event events.Event) error { - ns, _ := namespaces.Namespace(ctx) - encoded, err := typeurl.MarshalAny(event) - if err != nil { - return err - } - data, err := encoded.Marshal() - if err != nil { - return err - } - cmd := exec.CommandContext(ctx, containerdBinaryFlag, "--address", l.address, "publish", "--topic", topic, "--namespace", ns) - cmd.Stdin = bytes.NewReader(data) - c, err := containerdshim.Default.Start(cmd) - if err != nil { - return err - } - status, err := containerdshim.Default.Wait(cmd, c) - if err != nil { - return err - } - if status != 0 { - return errors.New("failed to publish event") - } - return nil -} diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index f1091f934..000000000 --- a/docs/README.md +++ /dev/null @@ -1,9 +0,0 @@ -# gvisor-containerd-shim docs - -Everything you need to know about gvisor-containerd-shim - -- [Untrusted Workload Quick Start (containerd >=1.1)](untrusted-workload-quickstart.md) -- [Runtime Handler Quick Start (containerd >=1.2)](runtime-handler-quickstart.md) -- [Runtime Handler Quick Start (shim v2) (containerd >=1.2)](runtime-handler-shim-v2-quickstart.md) -- [Configure containerd-shim-runsc-v1 (shim v2) (containerd >= 1.3)](configure-containerd-shim-runsc-v1.md) -- [Configure gvisor-containerd-shim (shim v1) (containerd <= 1.2)](configure-gvisor-containerd-shim.md) diff --git a/docs/configure-containerd-shim-runsc-v1.md b/docs/configure-containerd-shim-runsc-v1.md deleted file mode 100644 index 977ceacbd..000000000 --- a/docs/configure-containerd-shim-runsc-v1.md +++ /dev/null @@ -1,72 +0,0 @@ -# Configure containerd-shim-runsc-v1 (Shim V2) - -This document describes how to configure runtime options for -`containerd-shim-runsc-v1`. This is follows on to the instructions of -[Runtime Handler Quick Start (shim v2) (containerd >=1.2)](runtime-handler-shim-v2-quickstart.md) -and requires containerd 1.3 or later. - -### Update `/etc/containerd/config.toml` to point to a configuration file for `containerd-shim-runsc-v1`. - -`containerd-shim-runsc-v1` supports a few different configuration options based -on the version of containerd that is used. For versions >= 1.3, it supports a -configurable config path in the containerd runtime configuration. - -```shell -{ # Step 1: Update runtime options for runsc in containerd config.toml -cat < -[embedmd]:# (../test/e2e/shim-install.sh shell /{ # Step 1\(dev\)/ /^}/) -```shell -{ # Step 1(dev): Build and install gvisor-containerd-shim and containerd-shim-runsc-v1 - make - sudo make install -} -``` - -### Configure containerd - -1. Update `/etc/containerd/config.toml`. Make sure `containerd-shim-runsc-v1` is - in `${PATH}`. - -[embedmd]:# (../test/e2e/runtime-handler-shim-v2/install.sh shell /{ # Step 1/ /^}/) -```shell -{ # Step 1: Create containerd config.toml -cat <: -// -func (r *Runsc) runOrError(cmd *exec.Cmd) error { - if cmd.Stdout != nil || cmd.Stderr != nil { - ec, err := Monitor.Start(cmd) - if err != nil { - return err - } - status, err := Monitor.Wait(cmd, ec) - if err == nil && status != 0 { - err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0]) - } - return err - } - data, err := cmdOutput(cmd, true) - if err != nil { - return fmt.Errorf("%s: %s", err, data) - } - return nil -} - -func (r *Runsc) command(context context.Context, args ...string) *exec.Cmd { - command := r.Command - if command == "" { - command = DefaultCommand - } - cmd := exec.CommandContext(context, command, append(r.args(), args...)...) - cmd.SysProcAttr = &syscall.SysProcAttr{ - Setpgid: r.Setpgid, - } - if r.PdeathSignal != 0 { - cmd.SysProcAttr.Pdeathsig = r.PdeathSignal - } - - return cmd -} - -func cmdOutput(cmd *exec.Cmd, combined bool) ([]byte, error) { - b := getBuf() - defer putBuf(b) - - cmd.Stdout = b - if combined { - cmd.Stderr = b - } - ec, err := Monitor.Start(cmd) - if err != nil { - return nil, err - } - - status, err := Monitor.Wait(cmd, ec) - if err == nil && status != 0 { - err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0]) - } - - return b.Bytes(), err -} diff --git a/pkg/go-runsc/utils.go b/pkg/go-runsc/utils.go deleted file mode 100644 index e4c3c937a..000000000 --- a/pkg/go-runsc/utils.go +++ /dev/null @@ -1,46 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package runsc - -import ( - "bytes" - "strings" - "sync" -) - -var bytesBufferPool = sync.Pool{ - New: func() interface{} { - return bytes.NewBuffer(nil) - }, -} - -func getBuf() *bytes.Buffer { - return bytesBufferPool.Get().(*bytes.Buffer) -} - -func putBuf(b *bytes.Buffer) { - b.Reset() - bytesBufferPool.Put(b) -} - -// FormatLogPath parses runsc config, and fill in %ID% in the log path. -func FormatLogPath(id string, config map[string]string) { - if path, ok := config["debug-log"]; ok { - config["debug-log"] = strings.Replace(path, "%ID%", id, -1) - } -} diff --git a/pkg/shim/runsc/runsc.go b/pkg/shim/runsc/runsc.go new file mode 100644 index 000000000..41ba9c3af --- /dev/null +++ b/pkg/shim/runsc/runsc.go @@ -0,0 +1,511 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package runsc + +import ( + "context" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "strconv" + "syscall" + "time" + + runc "github.com/containerd/go-runc" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +var Monitor runc.ProcessMonitor = runc.Monitor + +// DefaultCommand is the default command for Runsc +const DefaultCommand = "runsc" + +// Runsc is the client to the runsc cli +type Runsc struct { + Command string + PdeathSignal syscall.Signal + Setpgid bool + Root string + Log string + LogFormat runc.Format + Config map[string]string +} + +// List returns all containers created inside the provided runsc root directory +func (r *Runsc) List(context context.Context) ([]*runc.Container, error) { + data, err := cmdOutput(r.command(context, "list", "--format=json"), false) + if err != nil { + return nil, err + } + var out []*runc.Container + if err := json.Unmarshal(data, &out); err != nil { + return nil, err + } + return out, nil +} + +// State returns the state for the container provided by id +func (r *Runsc) State(context context.Context, id string) (*runc.Container, error) { + data, err := cmdOutput(r.command(context, "state", id), true) + if err != nil { + return nil, fmt.Errorf("%s: %s", err, data) + } + var c runc.Container + if err := json.Unmarshal(data, &c); err != nil { + return nil, err + } + return &c, nil +} + +type CreateOpts struct { + runc.IO + // PidFile is a path to where a pid file should be created + PidFile string + ConsoleSocket runc.ConsoleSocket + // UserLog is a path to where runsc user log should be generated. + UserLog string +} + +func (o *CreateOpts) args() (out []string, err error) { + if o.PidFile != "" { + abs, err := filepath.Abs(o.PidFile) + if err != nil { + return nil, err + } + out = append(out, "--pid-file", abs) + } + if o.ConsoleSocket != nil { + out = append(out, "--console-socket", o.ConsoleSocket.Path()) + } + if o.UserLog != "" { + out = append(out, "--user-log", o.UserLog) + } + return out, nil +} + +// Create creates a new container and returns its pid if it was created successfully +func (r *Runsc) Create(context context.Context, id, bundle string, opts *CreateOpts) error { + args := []string{"create", "--bundle", bundle} + if opts != nil { + oargs, err := opts.args() + if err != nil { + return err + } + args = append(args, oargs...) + } + cmd := r.command(context, append(args, id)...) + if opts != nil && opts.IO != nil { + opts.Set(cmd) + } + + if cmd.Stdout == nil && cmd.Stderr == nil { + data, err := cmdOutput(cmd, true) + if err != nil { + return fmt.Errorf("%s: %s", err, data) + } + return nil + } + ec, err := Monitor.Start(cmd) + if err != nil { + return err + } + if opts != nil && opts.IO != nil { + if c, ok := opts.IO.(runc.StartCloser); ok { + if err := c.CloseAfterStart(); err != nil { + return err + } + } + } + status, err := Monitor.Wait(cmd, ec) + if err == nil && status != 0 { + err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0]) + } + + return err +} + +// Start will start an already created container +func (r *Runsc) Start(context context.Context, id string, cio runc.IO) error { + cmd := r.command(context, "start", id) + if cio != nil { + cio.Set(cmd) + } + + if cmd.Stdout == nil && cmd.Stderr == nil { + data, err := cmdOutput(cmd, true) + if err != nil { + return fmt.Errorf("%s: %s", err, data) + } + return nil + } + + ec, err := Monitor.Start(cmd) + if err != nil { + return err + } + if cio != nil { + if c, ok := cio.(runc.StartCloser); ok { + if err := c.CloseAfterStart(); err != nil { + return err + } + } + } + status, err := Monitor.Wait(cmd, ec) + if err == nil && status != 0 { + err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0]) + } + + return err +} + +type waitResult struct { + ID string `json:"id"` + ExitStatus int `json:"exitStatus"` +} + +// Wait will wait for a running container, and return its exit status. +// TODO(random-liu): Add exec process support. +func (r *Runsc) Wait(context context.Context, id string) (int, error) { + data, err := cmdOutput(r.command(context, "wait", id), true) + if err != nil { + return 0, fmt.Errorf("%s: %s", err, data) + } + var res waitResult + if err := json.Unmarshal(data, &res); err != nil { + return 0, err + } + return res.ExitStatus, nil +} + +type ExecOpts struct { + runc.IO + PidFile string + InternalPidFile string + ConsoleSocket runc.ConsoleSocket + Detach bool +} + +func (o *ExecOpts) args() (out []string, err error) { + if o.ConsoleSocket != nil { + out = append(out, "--console-socket", o.ConsoleSocket.Path()) + } + if o.Detach { + out = append(out, "--detach") + } + if o.PidFile != "" { + abs, err := filepath.Abs(o.PidFile) + if err != nil { + return nil, err + } + out = append(out, "--pid-file", abs) + } + if o.InternalPidFile != "" { + abs, err := filepath.Abs(o.InternalPidFile) + if err != nil { + return nil, err + } + out = append(out, "--internal-pid-file", abs) + } + return out, nil +} + +// Exec executres and additional process inside the container based on a full +// OCI Process specification +func (r *Runsc) Exec(context context.Context, id string, spec specs.Process, opts *ExecOpts) error { + f, err := ioutil.TempFile(os.Getenv("XDG_RUNTIME_DIR"), "runsc-process") + if err != nil { + return err + } + defer os.Remove(f.Name()) + err = json.NewEncoder(f).Encode(spec) + f.Close() + if err != nil { + return err + } + args := []string{"exec", "--process", f.Name()} + if opts != nil { + oargs, err := opts.args() + if err != nil { + return err + } + args = append(args, oargs...) + } + cmd := r.command(context, append(args, id)...) + if opts != nil && opts.IO != nil { + opts.Set(cmd) + } + if cmd.Stdout == nil && cmd.Stderr == nil { + data, err := cmdOutput(cmd, true) + if err != nil { + return fmt.Errorf("%s: %s", err, data) + } + return nil + } + ec, err := Monitor.Start(cmd) + if err != nil { + return err + } + if opts != nil && opts.IO != nil { + if c, ok := opts.IO.(runc.StartCloser); ok { + if err := c.CloseAfterStart(); err != nil { + return err + } + } + } + status, err := Monitor.Wait(cmd, ec) + if err == nil && status != 0 { + err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0]) + } + return err +} + +// Run runs the create, start, delete lifecycle of the container +// and returns its exit status after it has exited +func (r *Runsc) Run(context context.Context, id, bundle string, opts *CreateOpts) (int, error) { + args := []string{"run", "--bundle", bundle} + if opts != nil { + oargs, err := opts.args() + if err != nil { + return -1, err + } + args = append(args, oargs...) + } + cmd := r.command(context, append(args, id)...) + if opts != nil && opts.IO != nil { + opts.Set(cmd) + } + ec, err := Monitor.Start(cmd) + if err != nil { + return -1, err + } + return Monitor.Wait(cmd, ec) +} + +type DeleteOpts struct { + Force bool +} + +func (o *DeleteOpts) args() (out []string) { + if o.Force { + out = append(out, "--force") + } + return out +} + +// Delete deletes the container +func (r *Runsc) Delete(context context.Context, id string, opts *DeleteOpts) error { + args := []string{"delete"} + if opts != nil { + args = append(args, opts.args()...) + } + return r.runOrError(r.command(context, append(args, id)...)) +} + +// KillOpts specifies options for killing a container and its processes +type KillOpts struct { + All bool + Pid int +} + +func (o *KillOpts) args() (out []string) { + if o.All { + out = append(out, "--all") + } + if o.Pid != 0 { + out = append(out, "--pid", strconv.Itoa(o.Pid)) + } + return out +} + +// Kill sends the specified signal to the container +func (r *Runsc) Kill(context context.Context, id string, sig int, opts *KillOpts) error { + args := []string{ + "kill", + } + if opts != nil { + args = append(args, opts.args()...) + } + return r.runOrError(r.command(context, append(args, id, strconv.Itoa(sig))...)) +} + +// Stats return the stats for a container like cpu, memory, and io +func (r *Runsc) Stats(context context.Context, id string) (*runc.Stats, error) { + cmd := r.command(context, "events", "--stats", id) + rd, err := cmd.StdoutPipe() + if err != nil { + return nil, err + } + ec, err := Monitor.Start(cmd) + if err != nil { + return nil, err + } + defer func() { + rd.Close() + Monitor.Wait(cmd, ec) + }() + var e runc.Event + if err := json.NewDecoder(rd).Decode(&e); err != nil { + return nil, err + } + return e.Stats, nil +} + +// Events returns an event stream from runsc for a container with stats and OOM notifications +func (r *Runsc) Events(context context.Context, id string, interval time.Duration) (chan *runc.Event, error) { + cmd := r.command(context, "events", fmt.Sprintf("--interval=%ds", int(interval.Seconds())), id) + rd, err := cmd.StdoutPipe() + if err != nil { + return nil, err + } + ec, err := Monitor.Start(cmd) + if err != nil { + rd.Close() + return nil, err + } + var ( + dec = json.NewDecoder(rd) + c = make(chan *runc.Event, 128) + ) + go func() { + defer func() { + close(c) + rd.Close() + Monitor.Wait(cmd, ec) + }() + for { + var e runc.Event + if err := dec.Decode(&e); err != nil { + if err == io.EOF { + return + } + e = runc.Event{ + Type: "error", + Err: err, + } + } + c <- &e + } + }() + return c, nil +} + +// Ps lists all the processes inside the container returning their pids +func (r *Runsc) Ps(context context.Context, id string) ([]int, error) { + data, err := cmdOutput(r.command(context, "ps", "--format", "json", id), true) + if err != nil { + return nil, fmt.Errorf("%s: %s", err, data) + } + var pids []int + if err := json.Unmarshal(data, &pids); err != nil { + return nil, err + } + return pids, nil +} + +// Top lists all the processes inside the container returning the full ps data +func (r *Runsc) Top(context context.Context, id string) (*runc.TopResults, error) { + data, err := cmdOutput(r.command(context, "ps", "--format", "table", id), true) + if err != nil { + return nil, fmt.Errorf("%s: %s", err, data) + } + + topResults, err := runc.ParsePSOutput(data) + if err != nil { + return nil, fmt.Errorf("%s: ", err) + } + return topResults, nil +} + +func (r *Runsc) args() []string { + var args []string + if r.Root != "" { + args = append(args, fmt.Sprintf("--root=%s", r.Root)) + } + if r.Log != "" { + args = append(args, fmt.Sprintf("--log=%s", r.Log)) + } + if r.LogFormat != "" { + args = append(args, fmt.Sprintf("--log-format=%s", r.LogFormat)) + } + for k, v := range r.Config { + args = append(args, fmt.Sprintf("--%s=%s", k, v)) + } + return args +} + +// runOrError will run the provided command. If an error is +// encountered and neither Stdout or Stderr was set the error and the +// stderr of the command will be returned in the format of : +// +func (r *Runsc) runOrError(cmd *exec.Cmd) error { + if cmd.Stdout != nil || cmd.Stderr != nil { + ec, err := Monitor.Start(cmd) + if err != nil { + return err + } + status, err := Monitor.Wait(cmd, ec) + if err == nil && status != 0 { + err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0]) + } + return err + } + data, err := cmdOutput(cmd, true) + if err != nil { + return fmt.Errorf("%s: %s", err, data) + } + return nil +} + +func (r *Runsc) command(context context.Context, args ...string) *exec.Cmd { + command := r.Command + if command == "" { + command = DefaultCommand + } + cmd := exec.CommandContext(context, command, append(r.args(), args...)...) + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: r.Setpgid, + } + if r.PdeathSignal != 0 { + cmd.SysProcAttr.Pdeathsig = r.PdeathSignal + } + + return cmd +} + +func cmdOutput(cmd *exec.Cmd, combined bool) ([]byte, error) { + b := getBuf() + defer putBuf(b) + + cmd.Stdout = b + if combined { + cmd.Stderr = b + } + ec, err := Monitor.Start(cmd) + if err != nil { + return nil, err + } + + status, err := Monitor.Wait(cmd, ec) + if err == nil && status != 0 { + err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0]) + } + + return b.Bytes(), err +} diff --git a/pkg/shim/runsc/utils.go b/pkg/shim/runsc/utils.go new file mode 100644 index 000000000..2732d5e98 --- /dev/null +++ b/pkg/shim/runsc/utils.go @@ -0,0 +1,44 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package runsc + +import ( + "bytes" + "strings" + "sync" +) + +var bytesBufferPool = sync.Pool{ + New: func() interface{} { + return bytes.NewBuffer(nil) + }, +} + +func getBuf() *bytes.Buffer { + return bytesBufferPool.Get().(*bytes.Buffer) +} + +func putBuf(b *bytes.Buffer) { + b.Reset() + bytesBufferPool.Put(b) +} + +// FormatLogPath parses runsc config, and fill in %ID% in the log path. +func FormatLogPath(id string, config map[string]string) { + if path, ok := config["debug-log"]; ok { + config["debug-log"] = strings.Replace(path, "%ID%", id, -1) + } +} diff --git a/pkg/shim/v1/proc/deleted_state.go b/pkg/shim/v1/proc/deleted_state.go new file mode 100644 index 000000000..0196c96dd --- /dev/null +++ b/pkg/shim/v1/proc/deleted_state.go @@ -0,0 +1,49 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "context" + + "github.com/containerd/console" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/runtime/proc" + "github.com/pkg/errors" +) + +type deletedState struct{} + +func (*deletedState) Resize(ws console.WinSize) error { + return errors.Errorf("cannot resize a deleted process") +} + +func (*deletedState) Start(ctx context.Context) error { + return errors.Errorf("cannot start a deleted process") +} + +func (*deletedState) Delete(ctx context.Context) error { + return errors.Wrap(errdefs.ErrNotFound, "cannot delete a deleted process") +} + +func (*deletedState) Kill(ctx context.Context, sig uint32, all bool) error { + return errors.Wrap(errdefs.ErrNotFound, "cannot kill a deleted process") +} + +func (*deletedState) SetExited(status int) {} + +func (*deletedState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { + return nil, errors.Errorf("cannot exec in a deleted state") +} diff --git a/pkg/shim/v1/proc/exec.go b/pkg/shim/v1/proc/exec.go new file mode 100644 index 000000000..17199960e --- /dev/null +++ b/pkg/shim/v1/proc/exec.go @@ -0,0 +1,282 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "context" + "fmt" + "io" + "os" + "path/filepath" + "sync" + "syscall" + "time" + + "github.com/containerd/console" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/runtime/proc" + "github.com/containerd/fifo" + runc "github.com/containerd/go-runc" + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + "golang.org/x/sys/unix" + + runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" +) + +type execProcess struct { + wg sync.WaitGroup + + execState execState + + mu sync.Mutex + id string + console console.Console + io runc.IO + status int + exited time.Time + pid int + internalPid int + closers []io.Closer + stdin io.Closer + stdio proc.Stdio + path string + spec specs.Process + + parent *Init + waitBlock chan struct{} +} + +func (e *execProcess) Wait() { + <-e.waitBlock +} + +func (e *execProcess) ID() string { + return e.id +} + +func (e *execProcess) Pid() int { + e.mu.Lock() + defer e.mu.Unlock() + return e.pid +} + +func (e *execProcess) ExitStatus() int { + e.mu.Lock() + defer e.mu.Unlock() + return e.status +} + +func (e *execProcess) ExitedAt() time.Time { + e.mu.Lock() + defer e.mu.Unlock() + return e.exited +} + +func (e *execProcess) SetExited(status int) { + e.mu.Lock() + defer e.mu.Unlock() + + e.execState.SetExited(status) +} + +func (e *execProcess) setExited(status int) { + e.status = status + e.exited = time.Now() + e.parent.Platform.ShutdownConsole(context.Background(), e.console) + close(e.waitBlock) +} + +func (e *execProcess) Delete(ctx context.Context) error { + e.mu.Lock() + defer e.mu.Unlock() + + return e.execState.Delete(ctx) +} + +func (e *execProcess) delete(ctx context.Context) error { + e.wg.Wait() + if e.io != nil { + for _, c := range e.closers { + c.Close() + } + e.io.Close() + } + pidfile := filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id)) + // silently ignore error + os.Remove(pidfile) + internalPidfile := filepath.Join(e.path, fmt.Sprintf("%s-internal.pid", e.id)) + // silently ignore error + os.Remove(internalPidfile) + return nil +} + +func (e *execProcess) Resize(ws console.WinSize) error { + e.mu.Lock() + defer e.mu.Unlock() + + return e.execState.Resize(ws) +} + +func (e *execProcess) resize(ws console.WinSize) error { + if e.console == nil { + return nil + } + return e.console.Resize(ws) +} + +func (e *execProcess) Kill(ctx context.Context, sig uint32, _ bool) error { + e.mu.Lock() + defer e.mu.Unlock() + + return e.execState.Kill(ctx, sig, false) +} + +func (e *execProcess) kill(ctx context.Context, sig uint32, _ bool) error { + internalPid := e.internalPid + if internalPid != 0 { + if err := e.parent.runtime.Kill(ctx, e.parent.id, int(sig), &runsc.KillOpts{ + Pid: internalPid, + }); err != nil { + // If this returns error, consider the process has already stopped. + // TODO: Fix after signal handling is fixed. + return errors.Wrapf(errdefs.ErrNotFound, err.Error()) + } + } + return nil +} + +func (e *execProcess) Stdin() io.Closer { + return e.stdin +} + +func (e *execProcess) Stdio() proc.Stdio { + return e.stdio +} + +func (e *execProcess) Start(ctx context.Context) error { + e.mu.Lock() + defer e.mu.Unlock() + + return e.execState.Start(ctx) +} + +func (e *execProcess) start(ctx context.Context) (err error) { + var ( + socket *runc.Socket + pidfile = filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id)) + internalPidfile = filepath.Join(e.path, fmt.Sprintf("%s-internal.pid", e.id)) + ) + if e.stdio.Terminal { + if socket, err = runc.NewTempConsoleSocket(); err != nil { + return errors.Wrap(err, "failed to create runc console socket") + } + defer socket.Close() + } else if e.stdio.IsNull() { + if e.io, err = runc.NewNullIO(); err != nil { + return errors.Wrap(err, "creating new NULL IO") + } + } else { + if e.io, err = runc.NewPipeIO(e.parent.IoUID, e.parent.IoGID, withConditionalIO(e.stdio)); err != nil { + return errors.Wrap(err, "failed to create runc io pipes") + } + } + opts := &runsc.ExecOpts{ + PidFile: pidfile, + InternalPidFile: internalPidfile, + IO: e.io, + Detach: true, + } + if socket != nil { + opts.ConsoleSocket = socket + } + eventCh := e.parent.Monitor.Subscribe() + defer func() { + // Unsubscribe if an error is returned. + if err != nil { + e.parent.Monitor.Unsubscribe(eventCh) + } + }() + if err := e.parent.runtime.Exec(ctx, e.parent.id, e.spec, opts); err != nil { + close(e.waitBlock) + return e.parent.runtimeError(err, "OCI runtime exec failed") + } + if e.stdio.Stdin != "" { + sc, err := fifo.OpenFifo(context.Background(), e.stdio.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0) + if err != nil { + return errors.Wrapf(err, "failed to open stdin fifo %s", e.stdio.Stdin) + } + e.closers = append(e.closers, sc) + e.stdin = sc + } + var copyWaitGroup sync.WaitGroup + ctx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + if socket != nil { + console, err := socket.ReceiveMaster() + if err != nil { + return errors.Wrap(err, "failed to retrieve console master") + } + if e.console, err = e.parent.Platform.CopyConsole(ctx, console, e.stdio.Stdin, e.stdio.Stdout, e.stdio.Stderr, &e.wg, ©WaitGroup); err != nil { + return errors.Wrap(err, "failed to start console copy") + } + } else if !e.stdio.IsNull() { + if err := copyPipes(ctx, e.io, e.stdio.Stdin, e.stdio.Stdout, e.stdio.Stderr, &e.wg, ©WaitGroup); err != nil { + return errors.Wrap(err, "failed to start io pipe copy") + } + } + copyWaitGroup.Wait() + pid, err := runc.ReadPidFile(opts.PidFile) + if err != nil { + return errors.Wrap(err, "failed to retrieve OCI runtime exec pid") + } + e.pid = pid + internalPid, err := runc.ReadPidFile(opts.InternalPidFile) + if err != nil { + return errors.Wrap(err, "failed to retrieve OCI runtime exec internal pid") + } + e.internalPid = internalPid + go func() { + defer e.parent.Monitor.Unsubscribe(eventCh) + for event := range eventCh { + if event.Pid == e.pid { + ExitCh <- Exit{ + Timestamp: event.Timestamp, + ID: e.id, + Status: event.Status, + } + break + } + } + }() + return nil +} + +func (e *execProcess) Status(ctx context.Context) (string, error) { + e.mu.Lock() + defer e.mu.Unlock() + // if we don't have a pid then the exec process has just been created + if e.pid == 0 { + return "created", nil + } + // if we have a pid and it can be signaled, the process is running + // TODO(random-liu): Use `runsc kill --pid`. + if err := unix.Kill(e.pid, 0); err == nil { + return "running", nil + } + // else if we have a pid but it can nolonger be signaled, it has stopped + return "stopped", nil +} diff --git a/pkg/shim/v1/proc/exec_state.go b/pkg/shim/v1/proc/exec_state.go new file mode 100644 index 000000000..5416cb601 --- /dev/null +++ b/pkg/shim/v1/proc/exec_state.go @@ -0,0 +1,154 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "context" + + "github.com/containerd/console" + "github.com/pkg/errors" +) + +type execState interface { + Resize(console.WinSize) error + Start(context.Context) error + Delete(context.Context) error + Kill(context.Context, uint32, bool) error + SetExited(int) +} + +type execCreatedState struct { + p *execProcess +} + +func (s *execCreatedState) transition(name string) error { + switch name { + case "running": + s.p.execState = &execRunningState{p: s.p} + case "stopped": + s.p.execState = &execStoppedState{p: s.p} + case "deleted": + s.p.execState = &deletedState{} + default: + return errors.Errorf("invalid state transition %q to %q", stateName(s), name) + } + return nil +} + +func (s *execCreatedState) Resize(ws console.WinSize) error { + return s.p.resize(ws) +} + +func (s *execCreatedState) Start(ctx context.Context) error { + if err := s.p.start(ctx); err != nil { + return err + } + return s.transition("running") +} + +func (s *execCreatedState) Delete(ctx context.Context) error { + if err := s.p.delete(ctx); err != nil { + return err + } + return s.transition("deleted") +} + +func (s *execCreatedState) Kill(ctx context.Context, sig uint32, all bool) error { + return s.p.kill(ctx, sig, all) +} + +func (s *execCreatedState) SetExited(status int) { + s.p.setExited(status) + + if err := s.transition("stopped"); err != nil { + panic(err) + } +} + +type execRunningState struct { + p *execProcess +} + +func (s *execRunningState) transition(name string) error { + switch name { + case "stopped": + s.p.execState = &execStoppedState{p: s.p} + default: + return errors.Errorf("invalid state transition %q to %q", stateName(s), name) + } + return nil +} + +func (s *execRunningState) Resize(ws console.WinSize) error { + return s.p.resize(ws) +} + +func (s *execRunningState) Start(ctx context.Context) error { + return errors.Errorf("cannot start a running process") +} + +func (s *execRunningState) Delete(ctx context.Context) error { + return errors.Errorf("cannot delete a running process") +} + +func (s *execRunningState) Kill(ctx context.Context, sig uint32, all bool) error { + return s.p.kill(ctx, sig, all) +} + +func (s *execRunningState) SetExited(status int) { + s.p.setExited(status) + + if err := s.transition("stopped"); err != nil { + panic(err) + } +} + +type execStoppedState struct { + p *execProcess +} + +func (s *execStoppedState) transition(name string) error { + switch name { + case "deleted": + s.p.execState = &deletedState{} + default: + return errors.Errorf("invalid state transition %q to %q", stateName(s), name) + } + return nil +} + +func (s *execStoppedState) Resize(ws console.WinSize) error { + return errors.Errorf("cannot resize a stopped container") +} + +func (s *execStoppedState) Start(ctx context.Context) error { + return errors.Errorf("cannot start a stopped process") +} + +func (s *execStoppedState) Delete(ctx context.Context) error { + if err := s.p.delete(ctx); err != nil { + return err + } + return s.transition("deleted") +} + +func (s *execStoppedState) Kill(ctx context.Context, sig uint32, all bool) error { + return s.p.kill(ctx, sig, all) +} + +func (s *execStoppedState) SetExited(status int) { + // no op +} diff --git a/pkg/shim/v1/proc/init.go b/pkg/shim/v1/proc/init.go new file mode 100644 index 000000000..fadfc9146 --- /dev/null +++ b/pkg/shim/v1/proc/init.go @@ -0,0 +1,462 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "context" + "encoding/json" + "io" + "path/filepath" + "strings" + "sync" + "syscall" + "time" + + "github.com/containerd/console" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/mount" + "github.com/containerd/containerd/runtime/proc" + "github.com/containerd/fifo" + runc "github.com/containerd/go-runc" + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + + runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" +) + +// InitPidFile name of the file that contains the init pid +const InitPidFile = "init.pid" + +// Init represents an initial process for a container +type Init struct { + wg sync.WaitGroup + initState initState + + // mu is used to ensure that `Start()` and `Exited()` calls return in + // the right order when invoked in separate go routines. + // This is the case within the shim implementation as it makes use of + // the reaper interface. + mu sync.Mutex + + waitBlock chan struct{} + + WorkDir string + + id string + Bundle string + console console.Console + Platform proc.Platform + io runc.IO + runtime *runsc.Runsc + status int + exited time.Time + pid int + closers []io.Closer + stdin io.Closer + stdio proc.Stdio + Rootfs string + IoUID int + IoGID int + Sandbox bool + UserLog string + Monitor ProcessMonitor +} + +// NewRunsc returns a new runsc instance for a process +func NewRunsc(root, path, namespace, runtime string, config map[string]string) *runsc.Runsc { + if root == "" { + root = RunscRoot + } + return &runsc.Runsc{ + Command: runtime, + PdeathSignal: syscall.SIGKILL, + Log: filepath.Join(path, "log.json"), + LogFormat: runc.JSON, + Root: filepath.Join(root, namespace), + Config: config, + } +} + +// New returns a new init process +func New(id string, runtime *runsc.Runsc, stdio proc.Stdio) *Init { + p := &Init{ + id: id, + runtime: runtime, + stdio: stdio, + status: 0, + waitBlock: make(chan struct{}), + } + p.initState = &createdState{p: p} + return p +} + +// Create the process with the provided config +func (p *Init) Create(ctx context.Context, r *CreateConfig) (err error) { + var socket *runc.Socket + if r.Terminal { + if socket, err = runc.NewTempConsoleSocket(); err != nil { + return errors.Wrap(err, "failed to create OCI runtime console socket") + } + defer socket.Close() + } else if hasNoIO(r) { + if p.io, err = runc.NewNullIO(); err != nil { + return errors.Wrap(err, "creating new NULL IO") + } + } else { + if p.io, err = runc.NewPipeIO(p.IoUID, p.IoGID, withConditionalIO(p.stdio)); err != nil { + return errors.Wrap(err, "failed to create OCI runtime io pipes") + } + } + pidFile := filepath.Join(p.Bundle, InitPidFile) + opts := &runsc.CreateOpts{ + PidFile: pidFile, + } + if socket != nil { + opts.ConsoleSocket = socket + } + if p.Sandbox { + opts.IO = p.io + // UserLog is only useful for sandbox. + opts.UserLog = p.UserLog + } + if err := p.runtime.Create(ctx, r.ID, r.Bundle, opts); err != nil { + return p.runtimeError(err, "OCI runtime create failed") + } + if r.Stdin != "" { + sc, err := fifo.OpenFifo(context.Background(), r.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0) + if err != nil { + return errors.Wrapf(err, "failed to open stdin fifo %s", r.Stdin) + } + p.stdin = sc + p.closers = append(p.closers, sc) + } + var copyWaitGroup sync.WaitGroup + ctx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + if socket != nil { + console, err := socket.ReceiveMaster() + if err != nil { + return errors.Wrap(err, "failed to retrieve console master") + } + console, err = p.Platform.CopyConsole(ctx, console, r.Stdin, r.Stdout, r.Stderr, &p.wg, ©WaitGroup) + if err != nil { + return errors.Wrap(err, "failed to start console copy") + } + p.console = console + } else if !hasNoIO(r) { + if err := copyPipes(ctx, p.io, r.Stdin, r.Stdout, r.Stderr, &p.wg, ©WaitGroup); err != nil { + return errors.Wrap(err, "failed to start io pipe copy") + } + } + + copyWaitGroup.Wait() + pid, err := runc.ReadPidFile(pidFile) + if err != nil { + return errors.Wrap(err, "failed to retrieve OCI runtime container pid") + } + p.pid = pid + return nil +} + +// Wait for the process to exit +func (p *Init) Wait() { + <-p.waitBlock +} + +// ID of the process +func (p *Init) ID() string { + return p.id +} + +// Pid of the process +func (p *Init) Pid() int { + return p.pid +} + +// ExitStatus of the process +func (p *Init) ExitStatus() int { + p.mu.Lock() + defer p.mu.Unlock() + return p.status +} + +// ExitedAt at time when the process exited +func (p *Init) ExitedAt() time.Time { + p.mu.Lock() + defer p.mu.Unlock() + return p.exited +} + +// Status of the process +func (p *Init) Status(ctx context.Context) (string, error) { + p.mu.Lock() + defer p.mu.Unlock() + c, err := p.runtime.State(ctx, p.id) + if err != nil { + if strings.Contains(err.Error(), "does not exist") { + return "stopped", nil + } + return "", p.runtimeError(err, "OCI runtime state failed") + } + return p.convertStatus(c.Status), nil +} + +// Start the init process +func (p *Init) Start(ctx context.Context) error { + p.mu.Lock() + defer p.mu.Unlock() + + return p.initState.Start(ctx) +} + +func (p *Init) start(ctx context.Context) error { + var cio runc.IO + if !p.Sandbox { + cio = p.io + } + if err := p.runtime.Start(ctx, p.id, cio); err != nil { + return p.runtimeError(err, "OCI runtime start failed") + } + go func() { + status, err := p.runtime.Wait(context.Background(), p.id) + if err != nil { + log.G(ctx).WithError(err).Errorf("Failed to wait for container %q", p.id) + // TODO(random-liu): Handle runsc kill error. + if err := p.killAll(ctx); err != nil { + log.G(ctx).WithError(err).Errorf("Failed to kill container %q", p.id) + } + status = internalErrorCode + } + ExitCh <- Exit{ + Timestamp: time.Now(), + ID: p.id, + Status: status, + } + }() + return nil +} + +// SetExited of the init process with the next status +func (p *Init) SetExited(status int) { + p.mu.Lock() + defer p.mu.Unlock() + + p.initState.SetExited(status) +} + +func (p *Init) setExited(status int) { + p.exited = time.Now() + p.status = status + p.Platform.ShutdownConsole(context.Background(), p.console) + close(p.waitBlock) +} + +// Delete the init process +func (p *Init) Delete(ctx context.Context) error { + p.mu.Lock() + defer p.mu.Unlock() + + return p.initState.Delete(ctx) +} + +func (p *Init) delete(ctx context.Context) error { + p.killAll(ctx) + p.wg.Wait() + err := p.runtime.Delete(ctx, p.id, nil) + // ignore errors if a runtime has already deleted the process + // but we still hold metadata and pipes + // + // this is common during a checkpoint, runc will delete the container state + // after a checkpoint and the container will no longer exist within runc + if err != nil { + if strings.Contains(err.Error(), "does not exist") { + err = nil + } else { + err = p.runtimeError(err, "failed to delete task") + } + } + if p.io != nil { + for _, c := range p.closers { + c.Close() + } + p.io.Close() + } + if err2 := mount.UnmountAll(p.Rootfs, 0); err2 != nil { + log.G(ctx).WithError(err2).Warn("failed to cleanup rootfs mount") + if err == nil { + err = errors.Wrap(err2, "failed rootfs umount") + } + } + return err +} + +// Resize the init processes console +func (p *Init) Resize(ws console.WinSize) error { + p.mu.Lock() + defer p.mu.Unlock() + + if p.console == nil { + return nil + } + return p.console.Resize(ws) +} + +func (p *Init) resize(ws console.WinSize) error { + if p.console == nil { + return nil + } + return p.console.Resize(ws) +} + +// Kill the init process +func (p *Init) Kill(ctx context.Context, signal uint32, all bool) error { + p.mu.Lock() + defer p.mu.Unlock() + + return p.initState.Kill(ctx, signal, all) +} + +func (p *Init) kill(context context.Context, signal uint32, all bool) error { + var ( + killErr error + backoff = 100 * time.Millisecond + ) + timeout := 1 * time.Second + for start := time.Now(); time.Now().Sub(start) < timeout; { + c, err := p.runtime.State(context, p.id) + if err != nil { + if strings.Contains(err.Error(), "does not exist") { + return errors.Wrapf(errdefs.ErrNotFound, "no such process") + } + return p.runtimeError(err, "OCI runtime state failed") + } + // For runsc, signal only works when container is running state. + // If the container is not in running state, directly return + // "no such process" + if p.convertStatus(c.Status) == "stopped" { + return errors.Wrapf(errdefs.ErrNotFound, "no such process") + } + killErr = p.runtime.Kill(context, p.id, int(signal), &runsc.KillOpts{ + All: all, + }) + if killErr == nil { + return nil + } + time.Sleep(backoff) + backoff *= 2 + } + return p.runtimeError(killErr, "kill timeout") +} + +// KillAll processes belonging to the init process +func (p *Init) KillAll(context context.Context) error { + p.mu.Lock() + defer p.mu.Unlock() + return p.killAll(context) +} + +func (p *Init) killAll(context context.Context) error { + p.runtime.Kill(context, p.id, int(syscall.SIGKILL), &runsc.KillOpts{ + All: true, + }) + // Ignore error handling for `runsc kill --all` for now. + // * If it doesn't return error, it is good; + // * If it returns error, consider the container has already stopped. + // TODO: Fix `runsc kill --all` error handling. + return nil +} + +// Stdin of the process +func (p *Init) Stdin() io.Closer { + return p.stdin +} + +// Runtime returns the OCI runtime configured for the init process +func (p *Init) Runtime() *runsc.Runsc { + return p.runtime +} + +// Exec returns a new child process +func (p *Init) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { + p.mu.Lock() + defer p.mu.Unlock() + + return p.initState.Exec(ctx, path, r) +} + +// exec returns a new exec'd process +func (p *Init) exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { + // process exec request + var spec specs.Process + if err := json.Unmarshal(r.Spec.Value, &spec); err != nil { + return nil, err + } + spec.Terminal = r.Terminal + + e := &execProcess{ + id: r.ID, + path: path, + parent: p, + spec: spec, + stdio: proc.Stdio{ + Stdin: r.Stdin, + Stdout: r.Stdout, + Stderr: r.Stderr, + Terminal: r.Terminal, + }, + waitBlock: make(chan struct{}), + } + e.execState = &execCreatedState{p: e} + return e, nil +} + +// Stdio of the process +func (p *Init) Stdio() proc.Stdio { + return p.stdio +} + +func (p *Init) runtimeError(rErr error, msg string) error { + if rErr == nil { + return nil + } + + rMsg, err := getLastRuntimeError(p.runtime) + switch { + case err != nil: + return errors.Wrapf(rErr, "%s: %s (%s)", msg, "unable to retrieve OCI runtime error", err.Error()) + case rMsg == "": + return errors.Wrap(rErr, msg) + default: + return errors.Errorf("%s: %s", msg, rMsg) + } +} + +func (p *Init) convertStatus(status string) string { + if status == "created" && !p.Sandbox && p.status == internalErrorCode { + // Treat start failure state for non-root container as stopped. + return "stopped" + } + return status +} + +func withConditionalIO(c proc.Stdio) runc.IOOpt { + return func(o *runc.IOOption) { + o.OpenStdin = c.Stdin != "" + o.OpenStdout = c.Stdout != "" + o.OpenStderr = c.Stderr != "" + } +} diff --git a/pkg/shim/v1/proc/init_state.go b/pkg/shim/v1/proc/init_state.go new file mode 100644 index 000000000..868646b6c --- /dev/null +++ b/pkg/shim/v1/proc/init_state.go @@ -0,0 +1,182 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "context" + + "github.com/containerd/console" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/runtime/proc" + "github.com/pkg/errors" +) + +type initState interface { + Resize(console.WinSize) error + Start(context.Context) error + Delete(context.Context) error + Exec(context.Context, string, *ExecConfig) (proc.Process, error) + Kill(context.Context, uint32, bool) error + SetExited(int) +} + +type createdState struct { + p *Init +} + +func (s *createdState) transition(name string) error { + switch name { + case "running": + s.p.initState = &runningState{p: s.p} + case "stopped": + s.p.initState = &stoppedState{p: s.p} + case "deleted": + s.p.initState = &deletedState{} + default: + return errors.Errorf("invalid state transition %q to %q", stateName(s), name) + } + return nil +} + +func (s *createdState) Resize(ws console.WinSize) error { + return s.p.resize(ws) +} + +func (s *createdState) Start(ctx context.Context) error { + if err := s.p.start(ctx); err != nil { + // Containerd doesn't allow deleting container in created state. + // However, for gvisor, a non-root container in created state can + // only go to running state. If the container can't be started, + // it can only stay in created state, and never be deleted. + // To work around that, we treat non-root container in start failure + // state as stopped. + if !s.p.Sandbox { + s.p.io.Close() + s.p.setExited(internalErrorCode) + if err := s.transition("stopped"); err != nil { + panic(err) + } + } + return err + } + return s.transition("running") +} + +func (s *createdState) Delete(ctx context.Context) error { + if err := s.p.delete(ctx); err != nil { + return err + } + return s.transition("deleted") +} + +func (s *createdState) Kill(ctx context.Context, sig uint32, all bool) error { + return s.p.kill(ctx, sig, all) +} + +func (s *createdState) SetExited(status int) { + s.p.setExited(status) + + if err := s.transition("stopped"); err != nil { + panic(err) + } +} + +func (s *createdState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { + return s.p.exec(ctx, path, r) +} + +type runningState struct { + p *Init +} + +func (s *runningState) transition(name string) error { + switch name { + case "stopped": + s.p.initState = &stoppedState{p: s.p} + default: + return errors.Errorf("invalid state transition %q to %q", stateName(s), name) + } + return nil +} + +func (s *runningState) Resize(ws console.WinSize) error { + return s.p.resize(ws) +} + +func (s *runningState) Start(ctx context.Context) error { + return errors.Errorf("cannot start a running process") +} + +func (s *runningState) Delete(ctx context.Context) error { + return errors.Errorf("cannot delete a running process") +} + +func (s *runningState) Kill(ctx context.Context, sig uint32, all bool) error { + return s.p.kill(ctx, sig, all) +} + +func (s *runningState) SetExited(status int) { + s.p.setExited(status) + + if err := s.transition("stopped"); err != nil { + panic(err) + } +} + +func (s *runningState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { + return s.p.exec(ctx, path, r) +} + +type stoppedState struct { + p *Init +} + +func (s *stoppedState) transition(name string) error { + switch name { + case "deleted": + s.p.initState = &deletedState{} + default: + return errors.Errorf("invalid state transition %q to %q", stateName(s), name) + } + return nil +} + +func (s *stoppedState) Resize(ws console.WinSize) error { + return errors.Errorf("cannot resize a stopped container") +} + +func (s *stoppedState) Start(ctx context.Context) error { + return errors.Errorf("cannot start a stopped process") +} + +func (s *stoppedState) Delete(ctx context.Context) error { + if err := s.p.delete(ctx); err != nil { + return err + } + return s.transition("deleted") +} + +func (s *stoppedState) Kill(ctx context.Context, sig uint32, all bool) error { + return errdefs.ToGRPCf(errdefs.ErrNotFound, "process %s not found", s.p.id) +} + +func (s *stoppedState) SetExited(status int) { + // no op +} + +func (s *stoppedState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { + return nil, errors.Errorf("cannot exec in a stopped state") +} diff --git a/pkg/shim/v1/proc/io.go b/pkg/shim/v1/proc/io.go new file mode 100644 index 000000000..2677b4e54 --- /dev/null +++ b/pkg/shim/v1/proc/io.go @@ -0,0 +1,167 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "context" + "fmt" + "io" + "os" + "sync" + "sync/atomic" + "syscall" + + "github.com/containerd/containerd/log" + "github.com/containerd/fifo" + runc "github.com/containerd/go-runc" +) + +// TODO(random-liu): This file can be a util. + +var bufPool = sync.Pool{ + New: func() interface{} { + buffer := make([]byte, 32<<10) + return &buffer + }, +} + +func copyPipes(ctx context.Context, rio runc.IO, stdin, stdout, stderr string, wg, cwg *sync.WaitGroup) error { + var sameFile *countingWriteCloser + for _, i := range []struct { + name string + dest func(wc io.WriteCloser, rc io.Closer) + }{ + { + name: stdout, + dest: func(wc io.WriteCloser, rc io.Closer) { + wg.Add(1) + cwg.Add(1) + go func() { + cwg.Done() + p := bufPool.Get().(*[]byte) + defer bufPool.Put(p) + if _, err := io.CopyBuffer(wc, rio.Stdout(), *p); err != nil { + log.G(ctx).Warn("error copying stdout") + } + wg.Done() + wc.Close() + if rc != nil { + rc.Close() + } + }() + }, + }, { + name: stderr, + dest: func(wc io.WriteCloser, rc io.Closer) { + wg.Add(1) + cwg.Add(1) + go func() { + cwg.Done() + p := bufPool.Get().(*[]byte) + defer bufPool.Put(p) + if _, err := io.CopyBuffer(wc, rio.Stderr(), *p); err != nil { + log.G(ctx).Warn("error copying stderr") + } + wg.Done() + wc.Close() + if rc != nil { + rc.Close() + } + }() + }, + }, + } { + ok, err := isFifo(i.name) + if err != nil { + return err + } + var ( + fw io.WriteCloser + fr io.Closer + ) + if ok { + if fw, err = fifo.OpenFifo(ctx, i.name, syscall.O_WRONLY, 0); err != nil { + return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", i.name, err) + } + if fr, err = fifo.OpenFifo(ctx, i.name, syscall.O_RDONLY, 0); err != nil { + return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", i.name, err) + } + } else { + if sameFile != nil { + sameFile.count++ + i.dest(sameFile, nil) + continue + } + if fw, err = os.OpenFile(i.name, syscall.O_WRONLY|syscall.O_APPEND, 0); err != nil { + return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", i.name, err) + } + if stdout == stderr { + sameFile = &countingWriteCloser{ + WriteCloser: fw, + count: 1, + } + } + } + i.dest(fw, fr) + } + if stdin == "" { + return nil + } + f, err := fifo.OpenFifo(context.Background(), stdin, syscall.O_RDONLY|syscall.O_NONBLOCK, 0) + if err != nil { + return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", stdin, err) + } + cwg.Add(1) + go func() { + cwg.Done() + p := bufPool.Get().(*[]byte) + defer bufPool.Put(p) + + io.CopyBuffer(rio.Stdin(), f, *p) + rio.Stdin().Close() + f.Close() + }() + return nil +} + +// countingWriteCloser masks io.Closer() until close has been invoked a certain number of times. +type countingWriteCloser struct { + io.WriteCloser + count int64 +} + +func (c *countingWriteCloser) Close() error { + if atomic.AddInt64(&c.count, -1) > 0 { + return nil + } + return c.WriteCloser.Close() +} + +// isFifo checks if a file is a fifo +// if the file does not exist then it returns false +func isFifo(path string) (bool, error) { + stat, err := os.Stat(path) + if err != nil { + if os.IsNotExist(err) { + return false, nil + } + return false, err + } + if stat.Mode()&os.ModeNamedPipe == os.ModeNamedPipe { + return true, nil + } + return false, nil +} diff --git a/pkg/shim/v1/proc/process.go b/pkg/shim/v1/proc/process.go new file mode 100644 index 000000000..1bfa99f4c --- /dev/null +++ b/pkg/shim/v1/proc/process.go @@ -0,0 +1,37 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "github.com/pkg/errors" +) + +// RunscRoot is the path to the root runsc state directory +const RunscRoot = "/run/containerd/runsc" + +func stateName(v interface{}) string { + switch v.(type) { + case *runningState, *execRunningState: + return "running" + case *createdState, *execCreatedState: + return "created" + case *deletedState: + return "deleted" + case *stoppedState: + return "stopped" + } + panic(errors.Errorf("invalid state %v", v)) +} diff --git a/pkg/shim/v1/proc/types.go b/pkg/shim/v1/proc/types.go new file mode 100644 index 000000000..dcd43bcca --- /dev/null +++ b/pkg/shim/v1/proc/types.go @@ -0,0 +1,70 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "time" + + google_protobuf "github.com/gogo/protobuf/types" + + runc "github.com/containerd/go-runc" +) + +// Mount holds filesystem mount configuration +type Mount struct { + Type string + Source string + Target string + Options []string +} + +// CreateConfig hold task creation configuration +type CreateConfig struct { + ID string + Bundle string + Runtime string + Rootfs []Mount + Terminal bool + Stdin string + Stdout string + Stderr string + Options *google_protobuf.Any +} + +// ExecConfig holds exec creation configuration +type ExecConfig struct { + ID string + Terminal bool + Stdin string + Stdout string + Stderr string + Spec *google_protobuf.Any +} + +// Exit is the type of exit events +type Exit struct { + Timestamp time.Time + ID string + Status int +} + +// ProcessMonitor monitors process exit changes +type ProcessMonitor interface { + // Subscribe to process exit changes + Subscribe() chan runc.Exit + // Unsubscribe to process exit changes + Unsubscribe(c chan runc.Exit) +} diff --git a/pkg/shim/v1/proc/utils.go b/pkg/shim/v1/proc/utils.go new file mode 100644 index 000000000..84bd702ab --- /dev/null +++ b/pkg/shim/v1/proc/utils.go @@ -0,0 +1,92 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "encoding/json" + "io" + "os" + "strings" + "time" + + runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" +) + +const ( + internalErrorCode = 128 + bufferSize = 32 +) + +// ExitCh is the exit events channel for containers and exec processes +// inside the sandbox. +var ExitCh = make(chan Exit, bufferSize) + +// TODO(random-liu): This can be a utility. + +// TODO(mlaventure): move to runc package? +func getLastRuntimeError(r *runsc.Runsc) (string, error) { + if r.Log == "" { + return "", nil + } + + f, err := os.OpenFile(r.Log, os.O_RDONLY, 0400) + if err != nil { + return "", err + } + + var ( + errMsg string + log struct { + Level string + Msg string + Time time.Time + } + ) + + dec := json.NewDecoder(f) + for err = nil; err == nil; { + if err = dec.Decode(&log); err != nil && err != io.EOF { + return "", err + } + if log.Level == "error" { + errMsg = strings.TrimSpace(log.Msg) + } + } + + return errMsg, nil +} + +func copyFile(to, from string) error { + ff, err := os.Open(from) + if err != nil { + return err + } + defer ff.Close() + tt, err := os.Create(to) + if err != nil { + return err + } + defer tt.Close() + + p := bufPool.Get().(*[]byte) + defer bufPool.Put(p) + _, err = io.CopyBuffer(tt, ff, *p) + return err +} + +func hasNoIO(r *CreateConfig) bool { + return r.Stdin == "" && r.Stdout == "" && r.Stderr == "" +} diff --git a/pkg/shim/v1/shim/platform.go b/pkg/shim/v1/shim/platform.go new file mode 100644 index 000000000..86252c3f5 --- /dev/null +++ b/pkg/shim/v1/shim/platform.go @@ -0,0 +1,110 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package shim + +import ( + "context" + "io" + "sync" + "syscall" + + "github.com/containerd/console" + "github.com/containerd/fifo" + "github.com/pkg/errors" +) + +type linuxPlatform struct { + epoller *console.Epoller +} + +func (p *linuxPlatform) CopyConsole(ctx context.Context, console console.Console, stdin, stdout, stderr string, wg, cwg *sync.WaitGroup) (console.Console, error) { + if p.epoller == nil { + return nil, errors.New("uninitialized epoller") + } + + epollConsole, err := p.epoller.Add(console) + if err != nil { + return nil, err + } + + if stdin != "" { + in, err := fifo.OpenFifo(ctx, stdin, syscall.O_RDONLY, 0) + if err != nil { + return nil, err + } + cwg.Add(1) + go func() { + cwg.Done() + p := bufPool.Get().(*[]byte) + defer bufPool.Put(p) + io.CopyBuffer(epollConsole, in, *p) + }() + } + + outw, err := fifo.OpenFifo(ctx, stdout, syscall.O_WRONLY, 0) + if err != nil { + return nil, err + } + outr, err := fifo.OpenFifo(ctx, stdout, syscall.O_RDONLY, 0) + if err != nil { + return nil, err + } + wg.Add(1) + cwg.Add(1) + go func() { + cwg.Done() + p := bufPool.Get().(*[]byte) + defer bufPool.Put(p) + io.CopyBuffer(outw, epollConsole, *p) + epollConsole.Close() + outr.Close() + outw.Close() + wg.Done() + }() + return epollConsole, nil +} + +func (p *linuxPlatform) ShutdownConsole(ctx context.Context, cons console.Console) error { + if p.epoller == nil { + return errors.New("uninitialized epoller") + } + epollConsole, ok := cons.(*console.EpollConsole) + if !ok { + return errors.Errorf("expected EpollConsole, got %#v", cons) + } + return epollConsole.Shutdown(p.epoller.CloseConsole) +} + +func (p *linuxPlatform) Close() error { + return p.epoller.Close() +} + +// initialize a single epoll fd to manage our consoles. `initPlatform` should +// only be called once. +func (s *Service) initPlatform() error { + if s.platform != nil { + return nil + } + epoller, err := console.NewEpoller() + if err != nil { + return errors.Wrap(err, "failed to initialize epoller") + } + s.platform = &linuxPlatform{ + epoller: epoller, + } + go epoller.Wait() + return nil +} diff --git a/pkg/shim/v1/shim/service.go b/pkg/shim/v1/shim/service.go new file mode 100644 index 000000000..27e915c14 --- /dev/null +++ b/pkg/shim/v1/shim/service.go @@ -0,0 +1,579 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package shim + +import ( + "context" + "fmt" + "os" + "path/filepath" + "sync" + + "github.com/containerd/console" + eventstypes "github.com/containerd/containerd/api/events" + "github.com/containerd/containerd/api/types/task" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/events" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/mount" + "github.com/containerd/containerd/namespaces" + "github.com/containerd/containerd/runtime" + "github.com/containerd/containerd/runtime/linux/runctypes" + rproc "github.com/containerd/containerd/runtime/proc" + "github.com/containerd/containerd/runtime/v1/shim" + shimapi "github.com/containerd/containerd/runtime/v1/shim/v1" + "github.com/containerd/typeurl" + ptypes "github.com/gogo/protobuf/types" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + + runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" + "github.com/google/gvisor-containerd-shim/pkg/v1/proc" + "github.com/google/gvisor-containerd-shim/pkg/v1/utils" +) + +var ( + empty = &ptypes.Empty{} + bufPool = sync.Pool{ + New: func() interface{} { + buffer := make([]byte, 32<<10) + return &buffer + }, + } +) + +// Config contains shim specific configuration +type Config struct { + Path string + Namespace string + WorkDir string + RuntimeRoot string + RunscConfig map[string]string +} + +// NewService returns a new shim service that can be used via GRPC +func NewService(config Config, publisher events.Publisher) (*Service, error) { + if config.Namespace == "" { + return nil, fmt.Errorf("shim namespace cannot be empty") + } + ctx := namespaces.WithNamespace(context.Background(), config.Namespace) + ctx = log.WithLogger(ctx, logrus.WithFields(logrus.Fields{ + "namespace": config.Namespace, + "path": config.Path, + "pid": os.Getpid(), + })) + s := &Service{ + config: config, + context: ctx, + processes: make(map[string]rproc.Process), + events: make(chan interface{}, 128), + ec: proc.ExitCh, + } + go s.processExits() + if err := s.initPlatform(); err != nil { + return nil, errors.Wrap(err, "failed to initialized platform behavior") + } + go s.forward(publisher) + return s, nil +} + +// Service is the shim implementation of a remote shim over GRPC +type Service struct { + mu sync.Mutex + + config Config + context context.Context + processes map[string]rproc.Process + events chan interface{} + platform rproc.Platform + ec chan proc.Exit + + // Filled by Create() + id string + bundle string +} + +// Create a new initial process and container with the underlying OCI runtime +func (s *Service) Create(ctx context.Context, r *shimapi.CreateTaskRequest) (_ *shimapi.CreateTaskResponse, err error) { + s.mu.Lock() + defer s.mu.Unlock() + + var mounts []proc.Mount + for _, m := range r.Rootfs { + mounts = append(mounts, proc.Mount{ + Type: m.Type, + Source: m.Source, + Target: m.Target, + Options: m.Options, + }) + } + + rootfs := filepath.Join(r.Bundle, "rootfs") + if err := os.Mkdir(rootfs, 0711); err != nil && !os.IsExist(err) { + return nil, err + } + + config := &proc.CreateConfig{ + ID: r.ID, + Bundle: r.Bundle, + Runtime: r.Runtime, + Rootfs: mounts, + Terminal: r.Terminal, + Stdin: r.Stdin, + Stdout: r.Stdout, + Stderr: r.Stderr, + Options: r.Options, + } + defer func() { + if err != nil { + if err2 := mount.UnmountAll(rootfs, 0); err2 != nil { + log.G(ctx).WithError(err2).Warn("Failed to cleanup rootfs mount") + } + } + }() + for _, rm := range mounts { + m := &mount.Mount{ + Type: rm.Type, + Source: rm.Source, + Options: rm.Options, + } + if err := m.Mount(rootfs); err != nil { + return nil, errors.Wrapf(err, "failed to mount rootfs component %v", m) + } + } + process, err := newInit( + ctx, + s.config.Path, + s.config.WorkDir, + s.config.RuntimeRoot, + s.config.Namespace, + s.config.RunscConfig, + s.platform, + config, + ) + if err := process.Create(ctx, config); err != nil { + return nil, errdefs.ToGRPC(err) + } + // save the main task id and bundle to the shim for additional requests + s.id = r.ID + s.bundle = r.Bundle + pid := process.Pid() + s.processes[r.ID] = process + return &shimapi.CreateTaskResponse{ + Pid: uint32(pid), + }, nil +} + +// Start a process +func (s *Service) Start(ctx context.Context, r *shimapi.StartRequest) (*shimapi.StartResponse, error) { + p, err := s.getExecProcess(r.ID) + if err != nil { + return nil, err + } + if err := p.Start(ctx); err != nil { + return nil, err + } + return &shimapi.StartResponse{ + ID: p.ID(), + Pid: uint32(p.Pid()), + }, nil +} + +// Delete the initial process and container +func (s *Service) Delete(ctx context.Context, r *ptypes.Empty) (*shimapi.DeleteResponse, error) { + p, err := s.getInitProcess() + if err != nil { + return nil, err + } + if err := p.Delete(ctx); err != nil { + return nil, err + } + s.mu.Lock() + delete(s.processes, s.id) + s.mu.Unlock() + s.platform.Close() + return &shimapi.DeleteResponse{ + ExitStatus: uint32(p.ExitStatus()), + ExitedAt: p.ExitedAt(), + Pid: uint32(p.Pid()), + }, nil +} + +// DeleteProcess deletes an exec'd process +func (s *Service) DeleteProcess(ctx context.Context, r *shimapi.DeleteProcessRequest) (*shimapi.DeleteResponse, error) { + if r.ID == s.id { + return nil, status.Errorf(codes.InvalidArgument, "cannot delete init process with DeleteProcess") + } + p, err := s.getExecProcess(r.ID) + if err != nil { + return nil, err + } + if err := p.Delete(ctx); err != nil { + return nil, err + } + s.mu.Lock() + delete(s.processes, r.ID) + s.mu.Unlock() + return &shimapi.DeleteResponse{ + ExitStatus: uint32(p.ExitStatus()), + ExitedAt: p.ExitedAt(), + Pid: uint32(p.Pid()), + }, nil +} + +// Exec an additional process inside the container +func (s *Service) Exec(ctx context.Context, r *shimapi.ExecProcessRequest) (*ptypes.Empty, error) { + s.mu.Lock() + + if p := s.processes[r.ID]; p != nil { + s.mu.Unlock() + return nil, errdefs.ToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ID) + } + + p := s.processes[s.id] + s.mu.Unlock() + if p == nil { + return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + } + + process, err := p.(*proc.Init).Exec(ctx, s.config.Path, &proc.ExecConfig{ + ID: r.ID, + Terminal: r.Terminal, + Stdin: r.Stdin, + Stdout: r.Stdout, + Stderr: r.Stderr, + Spec: r.Spec, + }) + if err != nil { + return nil, errdefs.ToGRPC(err) + } + s.mu.Lock() + s.processes[r.ID] = process + s.mu.Unlock() + return empty, nil +} + +// ResizePty of a process +func (s *Service) ResizePty(ctx context.Context, r *shimapi.ResizePtyRequest) (*ptypes.Empty, error) { + if r.ID == "" { + return nil, errdefs.ToGRPCf(errdefs.ErrInvalidArgument, "id not provided") + } + ws := console.WinSize{ + Width: uint16(r.Width), + Height: uint16(r.Height), + } + p, err := s.getExecProcess(r.ID) + if err != nil { + return nil, err + } + if err := p.Resize(ws); err != nil { + return nil, errdefs.ToGRPC(err) + } + return empty, nil +} + +// State returns runtime state information for a process +func (s *Service) State(ctx context.Context, r *shimapi.StateRequest) (*shimapi.StateResponse, error) { + p, err := s.getExecProcess(r.ID) + if err != nil { + return nil, err + } + st, err := p.Status(ctx) + if err != nil { + return nil, err + } + status := task.StatusUnknown + switch st { + case "created": + status = task.StatusCreated + case "running": + status = task.StatusRunning + case "stopped": + status = task.StatusStopped + } + sio := p.Stdio() + return &shimapi.StateResponse{ + ID: p.ID(), + Bundle: s.bundle, + Pid: uint32(p.Pid()), + Status: status, + Stdin: sio.Stdin, + Stdout: sio.Stdout, + Stderr: sio.Stderr, + Terminal: sio.Terminal, + ExitStatus: uint32(p.ExitStatus()), + ExitedAt: p.ExitedAt(), + }, nil +} + +// Pause the container +func (s *Service) Pause(ctx context.Context, r *ptypes.Empty) (*ptypes.Empty, error) { + return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) +} + +// Resume the container +func (s *Service) Resume(ctx context.Context, r *ptypes.Empty) (*ptypes.Empty, error) { + return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) +} + +// Kill a process with the provided signal +func (s *Service) Kill(ctx context.Context, r *shimapi.KillRequest) (*ptypes.Empty, error) { + if r.ID == "" { + p, err := s.getInitProcess() + if err != nil { + return nil, err + } + if err := p.Kill(ctx, r.Signal, r.All); err != nil { + return nil, errdefs.ToGRPC(err) + } + return empty, nil + } + + p, err := s.getExecProcess(r.ID) + if err != nil { + return nil, err + } + if err := p.Kill(ctx, r.Signal, r.All); err != nil { + return nil, errdefs.ToGRPC(err) + } + return empty, nil +} + +// ListPids returns all pids inside the container +func (s *Service) ListPids(ctx context.Context, r *shimapi.ListPidsRequest) (*shimapi.ListPidsResponse, error) { + pids, err := s.getContainerPids(ctx, r.ID) + if err != nil { + return nil, errdefs.ToGRPC(err) + } + var processes []*task.ProcessInfo + for _, pid := range pids { + pInfo := task.ProcessInfo{ + Pid: pid, + } + for _, p := range s.processes { + if p.Pid() == int(pid) { + d := &runctypes.ProcessDetails{ + ExecID: p.ID(), + } + a, err := typeurl.MarshalAny(d) + if err != nil { + return nil, errors.Wrapf(err, "failed to marshal process %d info", pid) + } + pInfo.Info = a + break + } + } + processes = append(processes, &pInfo) + } + return &shimapi.ListPidsResponse{ + Processes: processes, + }, nil +} + +// CloseIO of a process +func (s *Service) CloseIO(ctx context.Context, r *shimapi.CloseIORequest) (*ptypes.Empty, error) { + p, err := s.getExecProcess(r.ID) + if err != nil { + return nil, err + } + if stdin := p.Stdin(); stdin != nil { + if err := stdin.Close(); err != nil { + return nil, errors.Wrap(err, "close stdin") + } + } + return empty, nil +} + +// Checkpoint the container +func (s *Service) Checkpoint(ctx context.Context, r *shimapi.CheckpointTaskRequest) (*ptypes.Empty, error) { + return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) +} + +// ShimInfo returns shim information such as the shim's pid +func (s *Service) ShimInfo(ctx context.Context, r *ptypes.Empty) (*shimapi.ShimInfoResponse, error) { + return &shimapi.ShimInfoResponse{ + ShimPid: uint32(os.Getpid()), + }, nil +} + +// Update a running container +func (s *Service) Update(ctx context.Context, r *shimapi.UpdateTaskRequest) (*ptypes.Empty, error) { + return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) +} + +// Wait for a process to exit +func (s *Service) Wait(ctx context.Context, r *shimapi.WaitRequest) (*shimapi.WaitResponse, error) { + p, err := s.getExecProcess(r.ID) + if err != nil { + return nil, err + } + p.Wait() + + return &shimapi.WaitResponse{ + ExitStatus: uint32(p.ExitStatus()), + ExitedAt: p.ExitedAt(), + }, nil +} + +func (s *Service) processExits() { + for e := range s.ec { + s.checkProcesses(e) + } +} + +func (s *Service) allProcesses() []rproc.Process { + s.mu.Lock() + defer s.mu.Unlock() + + res := make([]rproc.Process, 0, len(s.processes)) + for _, p := range s.processes { + res = append(res, p) + } + return res +} + +func (s *Service) checkProcesses(e proc.Exit) { + for _, p := range s.allProcesses() { + if p.ID() == e.ID { + if ip, ok := p.(*proc.Init); ok { + // Ensure all children are killed + if err := ip.KillAll(s.context); err != nil { + log.G(s.context).WithError(err).WithField("id", ip.ID()). + Error("failed to kill init's children") + } + } + p.SetExited(e.Status) + s.events <- &eventstypes.TaskExit{ + ContainerID: s.id, + ID: p.ID(), + Pid: uint32(p.Pid()), + ExitStatus: uint32(e.Status), + ExitedAt: p.ExitedAt(), + } + return + } + } +} + +func (s *Service) getContainerPids(ctx context.Context, id string) ([]uint32, error) { + p, err := s.getInitProcess() + if err != nil { + return nil, err + } + + ps, err := p.(*proc.Init).Runtime().Ps(ctx, id) + if err != nil { + return nil, err + } + pids := make([]uint32, 0, len(ps)) + for _, pid := range ps { + pids = append(pids, uint32(pid)) + } + return pids, nil +} + +func (s *Service) forward(publisher events.Publisher) { + for e := range s.events { + if err := publisher.Publish(s.context, getTopic(s.context, e), e); err != nil { + log.G(s.context).WithError(err).Error("post event") + } + } +} + +// getInitProcess returns initial process +func (s *Service) getInitProcess() (rproc.Process, error) { + s.mu.Lock() + defer s.mu.Unlock() + p := s.processes[s.id] + if p == nil { + return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + } + return p, nil +} + +// getExecProcess returns exec process +func (s *Service) getExecProcess(id string) (rproc.Process, error) { + s.mu.Lock() + defer s.mu.Unlock() + p := s.processes[id] + if p == nil { + return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "process %s does not exist", id) + } + return p, nil +} + +func getTopic(ctx context.Context, e interface{}) string { + switch e.(type) { + case *eventstypes.TaskCreate: + return runtime.TaskCreateEventTopic + case *eventstypes.TaskStart: + return runtime.TaskStartEventTopic + case *eventstypes.TaskOOM: + return runtime.TaskOOMEventTopic + case *eventstypes.TaskExit: + return runtime.TaskExitEventTopic + case *eventstypes.TaskDelete: + return runtime.TaskDeleteEventTopic + case *eventstypes.TaskExecAdded: + return runtime.TaskExecAddedEventTopic + case *eventstypes.TaskExecStarted: + return runtime.TaskExecStartedEventTopic + default: + logrus.Warnf("no topic for type %#v", e) + } + return runtime.TaskUnknownTopic +} + +func newInit(ctx context.Context, path, workDir, runtimeRoot, namespace string, config map[string]string, platform rproc.Platform, r *proc.CreateConfig) (*proc.Init, error) { + var options runctypes.CreateOptions + if r.Options != nil { + v, err := typeurl.UnmarshalAny(r.Options) + if err != nil { + return nil, err + } + options = *v.(*runctypes.CreateOptions) + } + + spec, err := utils.ReadSpec(r.Bundle) + if err != nil { + return nil, errors.Wrap(err, "read oci spec") + } + if err := utils.UpdateVolumeAnnotations(r.Bundle, spec); err != nil { + return nil, errors.Wrap(err, "update volume annotations") + } + + runsc.FormatLogPath(r.ID, config) + rootfs := filepath.Join(path, "rootfs") + runtime := proc.NewRunsc(runtimeRoot, path, namespace, r.Runtime, config) + p := proc.New(r.ID, runtime, rproc.Stdio{ + Stdin: r.Stdin, + Stdout: r.Stdout, + Stderr: r.Stderr, + Terminal: r.Terminal, + }) + p.Bundle = r.Bundle + p.Platform = platform + p.Rootfs = rootfs + p.WorkDir = workDir + p.IoUID = int(options.IoUid) + p.IoGID = int(options.IoGid) + p.Sandbox = utils.IsSandbox(spec) + p.UserLog = utils.UserLogPath(spec) + p.Monitor = shim.Default + return p, nil +} diff --git a/pkg/shim/v1/utils/utils.go b/pkg/shim/v1/utils/utils.go new file mode 100644 index 000000000..7a400af1c --- /dev/null +++ b/pkg/shim/v1/utils/utils.go @@ -0,0 +1,57 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "encoding/json" + "io/ioutil" + "os" + "path/filepath" + + "github.com/containerd/cri/pkg/annotations" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +// ReadSpec reads OCI spec from the bundle directory. +func ReadSpec(bundle string) (*specs.Spec, error) { + f, err := os.Open(filepath.Join(bundle, "config.json")) + if err != nil { + return nil, err + } + b, err := ioutil.ReadAll(f) + if err != nil { + return nil, err + } + var spec specs.Spec + if err := json.Unmarshal(b, &spec); err != nil { + return nil, err + } + return &spec, nil +} + +// IsSandbox checks whether a container is a sandbox container. +func IsSandbox(spec *specs.Spec) bool { + t, ok := spec.Annotations[annotations.ContainerType] + return !ok || t == annotations.ContainerTypeSandbox +} + +// UserLogPath gets user log path from OCI annotation. +func UserLogPath(spec *specs.Spec) string { + sandboxLogDir := spec.Annotations[annotations.SandboxLogDir] + if sandboxLogDir == "" { + return "" + } + return filepath.Join(sandboxLogDir, "gvisor.log") +} diff --git a/pkg/shim/v1/utils/volumes.go b/pkg/shim/v1/utils/volumes.go new file mode 100644 index 000000000..7323e7245 --- /dev/null +++ b/pkg/shim/v1/utils/volumes.go @@ -0,0 +1,156 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "path/filepath" + "strings" + + "github.com/containerd/cri/pkg/annotations" + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +const volumeKeyPrefix = "dev.gvisor.spec.mount." + +var kubeletPodsDir = "/var/lib/kubelet/pods" + +// volumeName gets volume name from volume annotation key, example: +// dev.gvisor.spec.mount.NAME.share +func volumeName(k string) string { + return strings.SplitN(strings.TrimPrefix(k, volumeKeyPrefix), ".", 2)[0] +} + +// volumeFieldName gets volume field name from volume annotation key, example: +// `type` is the field of dev.gvisor.spec.mount.NAME.type +func volumeFieldName(k string) string { + parts := strings.Split(strings.TrimPrefix(k, volumeKeyPrefix), ".") + return parts[len(parts)-1] +} + +// podUID gets pod UID from the pod log path. +func podUID(s *specs.Spec) (string, error) { + sandboxLogDir := s.Annotations[annotations.SandboxLogDir] + if sandboxLogDir == "" { + return "", errors.New("no sandbox log path annotation") + } + fields := strings.Split(filepath.Base(sandboxLogDir), "_") + switch len(fields) { + case 1: // This is the old CRI logging path + return fields[0], nil + case 3: // This is the new CRI logging path + return fields[2], nil + } + return "", errors.Errorf("unexpected sandbox log path %q", sandboxLogDir) +} + +// isVolumeKey checks whether an annotation key is for volume. +func isVolumeKey(k string) bool { + return strings.HasPrefix(k, volumeKeyPrefix) +} + +// volumeSourceKey constructs the annotation key for volume source. +func volumeSourceKey(volume string) string { + return volumeKeyPrefix + volume + ".source" +} + +// volumePath searches the volume path in the kubelet pod directory. +func volumePath(volume, uid string) (string, error) { + // TODO: Support subpath when gvisor supports pod volume bind mount. + volumeSearchPath := fmt.Sprintf("%s/%s/volumes/*/%s", kubeletPodsDir, uid, volume) + dirs, err := filepath.Glob(volumeSearchPath) + if err != nil { + return "", err + } + if len(dirs) != 1 { + return "", errors.Errorf("unexpected matched volume list %v", dirs) + } + return dirs[0], nil +} + +// isVolumePath checks whether a string is the volume path. +func isVolumePath(volume, path string) (bool, error) { + // TODO: Support subpath when gvisor supports pod volume bind mount. + volumeSearchPath := fmt.Sprintf("%s/*/volumes/*/%s", kubeletPodsDir, volume) + return filepath.Match(volumeSearchPath, path) +} + +// UpdateVolumeAnnotations add necessary OCI annotations for gvisor +// volume optimization. +func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error { + var ( + uid string + err error + ) + if IsSandbox(s) { + uid, err = podUID(s) + if err != nil { + // Skip if we can't get pod UID, because this doesn't work + // for containerd 1.1. + logrus.WithError(err).Error("Can't get pod uid") + return nil + } + } + var updated bool + for k, v := range s.Annotations { + if !isVolumeKey(k) { + continue + } + if volumeFieldName(k) != "type" { + continue + } + volume := volumeName(k) + if uid != "" { + // This is a sandbox + path, err := volumePath(volume, uid) + if err != nil { + return errors.Wrapf(err, "get volume path for %q", volume) + } + s.Annotations[volumeSourceKey(volume)] = path + updated = true + } else { + // This is a container + for i := range s.Mounts { + // An error is returned for sandbox if source annotation + // is not successfully applied, so it is guaranteed that + // the source annotation for sandbox has already been + // successfully applied at this point. + // The volume name is unique inside a pod, so matching without + // podUID is fine here. + // TODO: Pass podUID down to shim for containers to do + // more accurate matching. + if yes, _ := isVolumePath(volume, s.Mounts[i].Source); yes { + // gVisor requires the container mount type to match + // sandbox mount type. + s.Mounts[i].Type = v + updated = true + } + } + } + } + if !updated { + return nil + } + // Update bundle + b, err := json.Marshal(s) + if err != nil { + return err + } + return ioutil.WriteFile(filepath.Join(bundle, "config.json"), b, 0666) +} diff --git a/pkg/shim/v1/utils/volumes_test.go b/pkg/shim/v1/utils/volumes_test.go new file mode 100644 index 000000000..4b2639545 --- /dev/null +++ b/pkg/shim/v1/utils/volumes_test.go @@ -0,0 +1,309 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "reflect" + "testing" + + "github.com/containerd/cri/pkg/annotations" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func TestUpdateVolumeAnnotations(t *testing.T) { + dir, err := ioutil.TempDir("", "test-update-volume-annotations") + if err != nil { + t.Fatalf("create tempdir: %v", err) + } + defer os.RemoveAll(dir) + kubeletPodsDir = dir + + const ( + testPodUID = "testuid" + testVolumeName = "testvolume" + testLogDirPath = "/var/log/pods/testns_testname_" + testPodUID + testLegacyLogDirPath = "/var/log/pods/" + testPodUID + ) + testVolumePath := fmt.Sprintf("%s/%s/volumes/kubernetes.io~empty-dir/%s", dir, testPodUID, testVolumeName) + + if err := os.MkdirAll(testVolumePath, 0755); err != nil { + t.Fatalf("Create test volume: %v", err) + } + + for _, test := range []struct { + desc string + spec *specs.Spec + expected *specs.Spec + expectErr bool + expectUpdate bool + }{ + { + desc: "volume annotations for sandbox", + spec: &specs.Spec{ + Annotations: map[string]string{ + annotations.SandboxLogDir: testLogDirPath, + annotations.ContainerType: annotations.ContainerTypeSandbox, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + expected: &specs.Spec{ + Annotations: map[string]string{ + annotations.SandboxLogDir: testLogDirPath, + annotations.ContainerType: annotations.ContainerTypeSandbox, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + "dev.gvisor.spec.mount." + testVolumeName + ".source": testVolumePath, + }, + }, + expectUpdate: true, + }, + { + desc: "volume annotations for sandbox with legacy log path", + spec: &specs.Spec{ + Annotations: map[string]string{ + annotations.SandboxLogDir: testLegacyLogDirPath, + annotations.ContainerType: annotations.ContainerTypeSandbox, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + expected: &specs.Spec{ + Annotations: map[string]string{ + annotations.SandboxLogDir: testLegacyLogDirPath, + annotations.ContainerType: annotations.ContainerTypeSandbox, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + "dev.gvisor.spec.mount." + testVolumeName + ".source": testVolumePath, + }, + }, + expectUpdate: true, + }, + { + desc: "tmpfs: volume annotations for container", + spec: &specs.Spec{ + Mounts: []specs.Mount{ + { + Destination: "/test", + Type: "bind", + Source: testVolumePath, + Options: []string{"ro"}, + }, + { + Destination: "/random", + Type: "bind", + Source: "/random", + Options: []string{"ro"}, + }, + }, + Annotations: map[string]string{ + annotations.ContainerType: annotations.ContainerTypeContainer, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + expected: &specs.Spec{ + Mounts: []specs.Mount{ + { + Destination: "/test", + Type: "tmpfs", + Source: testVolumePath, + Options: []string{"ro"}, + }, + { + Destination: "/random", + Type: "bind", + Source: "/random", + Options: []string{"ro"}, + }, + }, + Annotations: map[string]string{ + annotations.ContainerType: annotations.ContainerTypeContainer, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + expectUpdate: true, + }, + { + desc: "bind: volume annotations for container", + spec: &specs.Spec{ + Mounts: []specs.Mount{ + { + Destination: "/test", + Type: "bind", + Source: testVolumePath, + Options: []string{"ro"}, + }, + }, + Annotations: map[string]string{ + annotations.ContainerType: annotations.ContainerTypeContainer, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "container", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "bind", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + expected: &specs.Spec{ + Mounts: []specs.Mount{ + { + Destination: "/test", + Type: "bind", + Source: testVolumePath, + Options: []string{"ro"}, + }, + }, + Annotations: map[string]string{ + annotations.ContainerType: annotations.ContainerTypeContainer, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "container", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "bind", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + expectUpdate: true, + }, + { + desc: "should not return error without pod log directory", + spec: &specs.Spec{ + Annotations: map[string]string{ + annotations.ContainerType: annotations.ContainerTypeSandbox, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + expected: &specs.Spec{ + Annotations: map[string]string{ + annotations.ContainerType: annotations.ContainerTypeSandbox, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + }, + { + desc: "should return error if volume path does not exist", + spec: &specs.Spec{ + Annotations: map[string]string{ + annotations.SandboxLogDir: testLogDirPath, + annotations.ContainerType: annotations.ContainerTypeSandbox, + "dev.gvisor.spec.mount.notexist.share": "pod", + "dev.gvisor.spec.mount.notexist.type": "tmpfs", + "dev.gvisor.spec.mount.notexist.options": "ro", + }, + }, + expectErr: true, + }, + { + desc: "no volume annotations for sandbox", + spec: &specs.Spec{ + Annotations: map[string]string{ + annotations.SandboxLogDir: testLogDirPath, + annotations.ContainerType: annotations.ContainerTypeSandbox, + }, + }, + expected: &specs.Spec{ + Annotations: map[string]string{ + annotations.SandboxLogDir: testLogDirPath, + annotations.ContainerType: annotations.ContainerTypeSandbox, + }, + }, + }, + { + desc: "no volume annotations for container", + spec: &specs.Spec{ + Mounts: []specs.Mount{ + { + Destination: "/test", + Type: "bind", + Source: "/test", + Options: []string{"ro"}, + }, + { + Destination: "/random", + Type: "bind", + Source: "/random", + Options: []string{"ro"}, + }, + }, + Annotations: map[string]string{ + annotations.ContainerType: annotations.ContainerTypeContainer, + }, + }, + expected: &specs.Spec{ + Mounts: []specs.Mount{ + { + Destination: "/test", + Type: "bind", + Source: "/test", + Options: []string{"ro"}, + }, + { + Destination: "/random", + Type: "bind", + Source: "/random", + Options: []string{"ro"}, + }, + }, + Annotations: map[string]string{ + annotations.ContainerType: annotations.ContainerTypeContainer, + }, + }, + }, + } { + t.Run(test.desc, func(t *testing.T) { + bundle, err := ioutil.TempDir(dir, "test-bundle") + if err != nil { + t.Fatalf("Create test bundle: %v", err) + } + err = UpdateVolumeAnnotations(bundle, test.spec) + if test.expectErr { + if err == nil { + t.Fatal("Expected error, but got nil") + } + return + } + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if !reflect.DeepEqual(test.expected, test.spec) { + t.Fatalf("Expected %+v, got %+v", test.expected, test.spec) + } + if test.expectUpdate { + b, err := ioutil.ReadFile(filepath.Join(bundle, "config.json")) + if err != nil { + t.Fatalf("Read spec from bundle: %v", err) + } + var spec specs.Spec + if err := json.Unmarshal(b, &spec); err != nil { + t.Fatalf("Unmarshal spec: %v", err) + } + if !reflect.DeepEqual(test.expected, &spec) { + t.Fatalf("Expected %+v, got %+v", test.expected, &spec) + } + } + }) + } +} diff --git a/pkg/shim/v2/epoll.go b/pkg/shim/v2/epoll.go new file mode 100644 index 000000000..57a2c5452 --- /dev/null +++ b/pkg/shim/v2/epoll.go @@ -0,0 +1,128 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux + +package v2 + +import ( + "context" + "sync" + + "github.com/containerd/cgroups" + eventstypes "github.com/containerd/containerd/api/events" + "github.com/containerd/containerd/events" + "github.com/containerd/containerd/runtime" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +func newOOMEpoller(publisher events.Publisher) (*epoller, error) { + fd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC) + if err != nil { + return nil, err + } + return &epoller{ + fd: fd, + publisher: publisher, + set: make(map[uintptr]*item), + }, nil +} + +type epoller struct { + mu sync.Mutex + + fd int + publisher events.Publisher + set map[uintptr]*item +} + +type item struct { + id string + cg cgroups.Cgroup +} + +func (e *epoller) Close() error { + return unix.Close(e.fd) +} + +func (e *epoller) run(ctx context.Context) { + var events [128]unix.EpollEvent + for { + select { + case <-ctx.Done(): + e.Close() + return + default: + n, err := unix.EpollWait(e.fd, events[:], -1) + if err != nil { + if err == unix.EINTR { + continue + } + logrus.WithError(err).Error("cgroups: epoll wait") + } + for i := 0; i < n; i++ { + e.process(ctx, uintptr(events[i].Fd)) + } + } + } +} + +func (e *epoller) add(id string, cg cgroups.Cgroup) error { + e.mu.Lock() + defer e.mu.Unlock() + fd, err := cg.OOMEventFD() + if err != nil { + return err + } + e.set[fd] = &item{ + id: id, + cg: cg, + } + event := unix.EpollEvent{ + Fd: int32(fd), + Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR, + } + return unix.EpollCtl(e.fd, unix.EPOLL_CTL_ADD, int(fd), &event) +} + +func (e *epoller) process(ctx context.Context, fd uintptr) { + flush(fd) + e.mu.Lock() + i, ok := e.set[fd] + if !ok { + e.mu.Unlock() + return + } + e.mu.Unlock() + if i.cg.State() == cgroups.Deleted { + e.mu.Lock() + delete(e.set, fd) + e.mu.Unlock() + unix.Close(int(fd)) + return + } + if err := e.publisher.Publish(ctx, runtime.TaskOOMEventTopic, &eventstypes.TaskOOM{ + ContainerID: i.id, + }); err != nil { + logrus.WithError(err).Error("publish OOM event") + } +} + +func flush(fd uintptr) error { + var buf [8]byte + _, err := unix.Read(int(fd), buf[:]) + return err +} diff --git a/pkg/shim/v2/options/options.go b/pkg/shim/v2/options/options.go new file mode 100644 index 000000000..de09f2f79 --- /dev/null +++ b/pkg/shim/v2/options/options.go @@ -0,0 +1,33 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package options + +const OptionType = "io.containerd.runsc.v1.options" + +// Options is runtime options for io.containerd.runsc.v1. +type Options struct { + // ShimCgroup is the cgroup the shim should be in. + ShimCgroup string `toml:"shim_cgroup"` + // IoUid is the I/O's pipes uid. + IoUid uint32 `toml:"io_uid"` + // IoUid is the I/O's pipes gid. + IoGid uint32 `toml:"io_gid"` + // BinaryName is the binary name of the runsc binary. + BinaryName string `toml:"binary_name"` + // Root is the runsc root directory. + Root string `toml:"root"` + // RunscConfig is a key/value map of all runsc flags. + RunscConfig map[string]string `toml:"runsc_config"` +} diff --git a/pkg/shim/v2/service.go b/pkg/shim/v2/service.go new file mode 100644 index 000000000..3d226e29c --- /dev/null +++ b/pkg/shim/v2/service.go @@ -0,0 +1,821 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v2 + +import ( + "context" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "sync" + "syscall" + "time" + + "github.com/BurntSushi/toml" + "github.com/containerd/cgroups" + "github.com/containerd/console" + eventstypes "github.com/containerd/containerd/api/events" + "github.com/containerd/containerd/api/types/task" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/events" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/mount" + "github.com/containerd/containerd/namespaces" + "github.com/containerd/containerd/runtime" + "github.com/containerd/containerd/runtime/linux/runctypes" + rproc "github.com/containerd/containerd/runtime/proc" + "github.com/containerd/containerd/runtime/v2/shim" + taskAPI "github.com/containerd/containerd/runtime/v2/task" + runtimeoptions "github.com/containerd/cri/pkg/api/runtimeoptions/v1" + "github.com/containerd/typeurl" + ptypes "github.com/gogo/protobuf/types" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + + runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" + "github.com/google/gvisor-containerd-shim/pkg/v1/proc" + "github.com/google/gvisor-containerd-shim/pkg/v1/utils" + "github.com/google/gvisor-containerd-shim/pkg/v2/options" +) + +var ( + empty = &ptypes.Empty{} + bufPool = sync.Pool{ + New: func() interface{} { + buffer := make([]byte, 32<<10) + return &buffer + }, + } +) + +var _ = (taskAPI.TaskService)(&service{}) + +// configFile is the default config file name. For containerd 1.2, +// we assume that a config.toml should exist in the runtime root. +const configFile = "config.toml" + +// New returns a new shim service that can be used via GRPC +func New(ctx context.Context, id string, publisher events.Publisher) (shim.Shim, error) { + ep, err := newOOMEpoller(publisher) + if err != nil { + return nil, err + } + ctx, cancel := context.WithCancel(ctx) + go ep.run(ctx) + s := &service{ + id: id, + context: ctx, + processes: make(map[string]rproc.Process), + events: make(chan interface{}, 128), + ec: proc.ExitCh, + oomPoller: ep, + cancel: cancel, + } + go s.processExits() + runsc.Monitor = shim.Default + if err := s.initPlatform(); err != nil { + cancel() + return nil, errors.Wrap(err, "failed to initialized platform behavior") + } + go s.forward(publisher) + return s, nil +} + +// service is the shim implementation of a remote shim over GRPC +type service struct { + mu sync.Mutex + + context context.Context + task rproc.Process + processes map[string]rproc.Process + events chan interface{} + platform rproc.Platform + opts options.Options + ec chan proc.Exit + oomPoller *epoller + + id string + bundle string + cancel func() +} + +func newCommand(ctx context.Context, containerdBinary, containerdAddress string) (*exec.Cmd, error) { + ns, err := namespaces.NamespaceRequired(ctx) + if err != nil { + return nil, err + } + self, err := os.Executable() + if err != nil { + return nil, err + } + cwd, err := os.Getwd() + if err != nil { + return nil, err + } + args := []string{ + "-namespace", ns, + "-address", containerdAddress, + "-publish-binary", containerdBinary, + } + cmd := exec.Command(self, args...) + cmd.Dir = cwd + cmd.Env = append(os.Environ(), "GOMAXPROCS=2") + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: true, + } + return cmd, nil +} + +func (s *service) StartShim(ctx context.Context, id, containerdBinary, containerdAddress string) (string, error) { + cmd, err := newCommand(ctx, containerdBinary, containerdAddress) + if err != nil { + return "", err + } + address, err := shim.SocketAddress(ctx, id) + if err != nil { + return "", err + } + socket, err := shim.NewSocket(address) + if err != nil { + return "", err + } + defer socket.Close() + f, err := socket.File() + if err != nil { + return "", err + } + defer f.Close() + + cmd.ExtraFiles = append(cmd.ExtraFiles, f) + + if err := cmd.Start(); err != nil { + return "", err + } + defer func() { + if err != nil { + cmd.Process.Kill() + } + }() + // make sure to wait after start + go cmd.Wait() + if err := shim.WritePidFile("shim.pid", cmd.Process.Pid); err != nil { + return "", err + } + if err := shim.WriteAddress("address", address); err != nil { + return "", err + } + if err := shim.SetScore(cmd.Process.Pid); err != nil { + return "", errors.Wrap(err, "failed to set OOM Score on shim") + } + return address, nil +} + +func (s *service) Cleanup(ctx context.Context) (*taskAPI.DeleteResponse, error) { + path, err := os.Getwd() + if err != nil { + return nil, err + } + ns, err := namespaces.NamespaceRequired(ctx) + if err != nil { + return nil, err + } + runtime, err := s.readRuntime(path) + if err != nil { + return nil, err + } + r := proc.NewRunsc(s.opts.Root, path, ns, runtime, nil) + if err := r.Delete(ctx, s.id, &runsc.DeleteOpts{ + Force: true, + }); err != nil { + logrus.WithError(err).Warn("failed to remove runc container") + } + if err := mount.UnmountAll(filepath.Join(path, "rootfs"), 0); err != nil { + logrus.WithError(err).Warn("failed to cleanup rootfs mount") + } + return &taskAPI.DeleteResponse{ + ExitedAt: time.Now(), + ExitStatus: 128 + uint32(unix.SIGKILL), + }, nil +} + +func (s *service) readRuntime(path string) (string, error) { + data, err := ioutil.ReadFile(filepath.Join(path, "runtime")) + if err != nil { + return "", err + } + return string(data), nil +} + +func (s *service) writeRuntime(path, runtime string) error { + return ioutil.WriteFile(filepath.Join(path, "runtime"), []byte(runtime), 0600) +} + +// Create a new initial process and container with the underlying OCI runtime +func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ *taskAPI.CreateTaskResponse, err error) { + s.mu.Lock() + defer s.mu.Unlock() + + ns, err := namespaces.NamespaceRequired(ctx) + if err != nil { + return nil, errors.Wrap(err, "create namespace") + } + + // Read from root for now. + var opts options.Options + if r.Options != nil { + v, err := typeurl.UnmarshalAny(r.Options) + if err != nil { + return nil, err + } + var path string + switch o := v.(type) { + case *runctypes.CreateOptions: // containerd 1.2.x + opts.IoUid = o.IoUid + opts.IoGid = o.IoGid + opts.ShimCgroup = o.ShimCgroup + case *runctypes.RuncOptions: // containerd 1.2.x + root := proc.RunscRoot + if o.RuntimeRoot != "" { + root = o.RuntimeRoot + } + + opts.BinaryName = o.Runtime + + path = filepath.Join(root, configFile) + if _, err := os.Stat(path); err != nil { + if !os.IsNotExist(err) { + return nil, errors.Wrapf(err, "stat config file %q", path) + } + // A config file in runtime root is not required. + path = "" + } + case *runtimeoptions.Options: // containerd 1.3.x+ + if o.ConfigPath == "" { + break + } + if o.TypeUrl != options.OptionType { + return nil, errors.Errorf("unsupported runtimeoptions %q", o.TypeUrl) + } + path = o.ConfigPath + default: + return nil, errors.Errorf("unsupported option type %q", r.Options.TypeUrl) + } + if path != "" { + if _, err = toml.DecodeFile(path, &opts); err != nil { + return nil, errors.Wrapf(err, "decode config file %q", path) + } + } + } + + var mounts []proc.Mount + for _, m := range r.Rootfs { + mounts = append(mounts, proc.Mount{ + Type: m.Type, + Source: m.Source, + Target: m.Target, + Options: m.Options, + }) + } + + rootfs := filepath.Join(r.Bundle, "rootfs") + if err := os.Mkdir(rootfs, 0711); err != nil && !os.IsExist(err) { + return nil, err + } + + config := &proc.CreateConfig{ + ID: r.ID, + Bundle: r.Bundle, + Runtime: opts.BinaryName, + Rootfs: mounts, + Terminal: r.Terminal, + Stdin: r.Stdin, + Stdout: r.Stdout, + Stderr: r.Stderr, + Options: r.Options, + } + if err := s.writeRuntime(r.Bundle, opts.BinaryName); err != nil { + return nil, err + } + defer func() { + if err != nil { + if err2 := mount.UnmountAll(rootfs, 0); err2 != nil { + logrus.WithError(err2).Warn("failed to cleanup rootfs mount") + } + } + }() + for _, rm := range mounts { + m := &mount.Mount{ + Type: rm.Type, + Source: rm.Source, + Options: rm.Options, + } + if err := m.Mount(rootfs); err != nil { + return nil, errors.Wrapf(err, "failed to mount rootfs component %v", m) + } + } + process, err := newInit( + ctx, + r.Bundle, + filepath.Join(r.Bundle, "work"), + ns, + s.platform, + config, + &opts, + rootfs, + ) + if err != nil { + return nil, errdefs.ToGRPC(err) + } + if err := process.Create(ctx, config); err != nil { + return nil, errdefs.ToGRPC(err) + } + // save the main task id and bundle to the shim for additional requests + s.id = r.ID + s.bundle = r.Bundle + + // Set up OOM notification on the sandbox's cgroup. This is done on sandbox + // create since the sandbox process will be created here. + pid := process.Pid() + if pid > 0 { + cg, err := cgroups.Load(cgroups.V1, cgroups.PidPath(pid)) + if err != nil { + return nil, errors.Wrapf(err, "loading cgroup for %d", pid) + } + if err := s.oomPoller.add(s.id, cg); err != nil { + return nil, errors.Wrapf(err, "add cg to OOM monitor") + } + } + s.task = process + s.opts = opts + return &taskAPI.CreateTaskResponse{ + Pid: uint32(process.Pid()), + }, nil + +} + +// Start a process +func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI.StartResponse, error) { + p, err := s.getProcess(r.ExecID) + if err != nil { + return nil, err + } + if err := p.Start(ctx); err != nil { + return nil, err + } + // TODO: Set the cgroup and oom notifications on restore. + // https://github.com/google/gvisor-containerd-shim/issues/58 + return &taskAPI.StartResponse{ + Pid: uint32(p.Pid()), + }, nil +} + +// Delete the initial process and container +func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (*taskAPI.DeleteResponse, error) { + p, err := s.getProcess(r.ExecID) + if err != nil { + return nil, err + } + if p == nil { + return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + } + if err := p.Delete(ctx); err != nil { + return nil, err + } + isTask := r.ExecID == "" + if !isTask { + s.mu.Lock() + delete(s.processes, r.ExecID) + s.mu.Unlock() + } + if isTask && s.platform != nil { + s.platform.Close() + } + return &taskAPI.DeleteResponse{ + ExitStatus: uint32(p.ExitStatus()), + ExitedAt: p.ExitedAt(), + Pid: uint32(p.Pid()), + }, nil +} + +// Exec an additional process inside the container +func (s *service) Exec(ctx context.Context, r *taskAPI.ExecProcessRequest) (*ptypes.Empty, error) { + s.mu.Lock() + p := s.processes[r.ExecID] + s.mu.Unlock() + if p != nil { + return nil, errdefs.ToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ExecID) + } + p = s.task + if p == nil { + return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + } + process, err := p.(*proc.Init).Exec(ctx, s.bundle, &proc.ExecConfig{ + ID: r.ExecID, + Terminal: r.Terminal, + Stdin: r.Stdin, + Stdout: r.Stdout, + Stderr: r.Stderr, + Spec: r.Spec, + }) + if err != nil { + return nil, errdefs.ToGRPC(err) + } + s.mu.Lock() + s.processes[r.ExecID] = process + s.mu.Unlock() + return empty, nil +} + +// ResizePty of a process +func (s *service) ResizePty(ctx context.Context, r *taskAPI.ResizePtyRequest) (*ptypes.Empty, error) { + p, err := s.getProcess(r.ExecID) + if err != nil { + return nil, err + } + ws := console.WinSize{ + Width: uint16(r.Width), + Height: uint16(r.Height), + } + if err := p.Resize(ws); err != nil { + return nil, errdefs.ToGRPC(err) + } + return empty, nil +} + +// State returns runtime state information for a process +func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (*taskAPI.StateResponse, error) { + p, err := s.getProcess(r.ExecID) + if err != nil { + return nil, err + } + st, err := p.Status(ctx) + if err != nil { + return nil, err + } + status := task.StatusUnknown + switch st { + case "created": + status = task.StatusCreated + case "running": + status = task.StatusRunning + case "stopped": + status = task.StatusStopped + } + sio := p.Stdio() + return &taskAPI.StateResponse{ + ID: p.ID(), + Bundle: s.bundle, + Pid: uint32(p.Pid()), + Status: status, + Stdin: sio.Stdin, + Stdout: sio.Stdout, + Stderr: sio.Stderr, + Terminal: sio.Terminal, + ExitStatus: uint32(p.ExitStatus()), + ExitedAt: p.ExitedAt(), + }, nil +} + +// Pause the container +func (s *service) Pause(ctx context.Context, r *taskAPI.PauseRequest) (*ptypes.Empty, error) { + return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) +} + +// Resume the container +func (s *service) Resume(ctx context.Context, r *taskAPI.ResumeRequest) (*ptypes.Empty, error) { + return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) +} + +// Kill a process with the provided signal +func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (*ptypes.Empty, error) { + p, err := s.getProcess(r.ExecID) + if err != nil { + return nil, err + } + if p == nil { + return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + } + if err := p.Kill(ctx, r.Signal, r.All); err != nil { + return nil, errdefs.ToGRPC(err) + } + return empty, nil +} + +// Pids returns all pids inside the container +func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (*taskAPI.PidsResponse, error) { + pids, err := s.getContainerPids(ctx, r.ID) + if err != nil { + return nil, errdefs.ToGRPC(err) + } + var processes []*task.ProcessInfo + for _, pid := range pids { + pInfo := task.ProcessInfo{ + Pid: pid, + } + for _, p := range s.processes { + if p.Pid() == int(pid) { + d := &runctypes.ProcessDetails{ + ExecID: p.ID(), + } + a, err := typeurl.MarshalAny(d) + if err != nil { + return nil, errors.Wrapf(err, "failed to marshal process %d info", pid) + } + pInfo.Info = a + break + } + } + processes = append(processes, &pInfo) + } + return &taskAPI.PidsResponse{ + Processes: processes, + }, nil +} + +// CloseIO of a process +func (s *service) CloseIO(ctx context.Context, r *taskAPI.CloseIORequest) (*ptypes.Empty, error) { + p, err := s.getProcess(r.ExecID) + if err != nil { + return nil, err + } + if stdin := p.Stdin(); stdin != nil { + if err := stdin.Close(); err != nil { + return nil, errors.Wrap(err, "close stdin") + } + } + return empty, nil +} + +// Checkpoint the container +func (s *service) Checkpoint(ctx context.Context, r *taskAPI.CheckpointTaskRequest) (*ptypes.Empty, error) { + return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) +} + +// Connect returns shim information such as the shim's pid +func (s *service) Connect(ctx context.Context, r *taskAPI.ConnectRequest) (*taskAPI.ConnectResponse, error) { + var pid int + if s.task != nil { + pid = s.task.Pid() + } + return &taskAPI.ConnectResponse{ + ShimPid: uint32(os.Getpid()), + TaskPid: uint32(pid), + }, nil +} + +func (s *service) Shutdown(ctx context.Context, r *taskAPI.ShutdownRequest) (*ptypes.Empty, error) { + s.cancel() + os.Exit(0) + return empty, nil +} + +func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*taskAPI.StatsResponse, error) { + path, err := os.Getwd() + if err != nil { + return nil, err + } + ns, err := namespaces.NamespaceRequired(ctx) + if err != nil { + return nil, err + } + runtime, err := s.readRuntime(path) + if err != nil { + return nil, err + } + rs := proc.NewRunsc(s.opts.Root, path, ns, runtime, nil) + stats, err := rs.Stats(ctx, s.id) + if err != nil { + return nil, err + } + + // gvisor currently (as of 2020-03-03) only returns the total memory + // usage and current PID value[0]. However, we copy the common fields here + // so that future updates will propagate correct information. We're + // using the cgroups.Metrics structure so we're returning the same type + // as runc. + // + // [0]: https://github.com/google/gvisor/blob/277a0d5a1fbe8272d4729c01ee4c6e374d047ebc/runsc/boot/events.go#L61-L81 + data, err := typeurl.MarshalAny(&cgroups.Metrics{ + CPU: &cgroups.CPUStat{ + Usage: &cgroups.CPUUsage{ + Total: stats.Cpu.Usage.Total, + Kernel: stats.Cpu.Usage.Kernel, + User: stats.Cpu.Usage.User, + PerCPU: stats.Cpu.Usage.Percpu, + }, + Throttling: &cgroups.Throttle{ + Periods: stats.Cpu.Throttling.Periods, + ThrottledPeriods: stats.Cpu.Throttling.ThrottledPeriods, + ThrottledTime: stats.Cpu.Throttling.ThrottledTime, + }, + }, + Memory: &cgroups.MemoryStat{ + Cache: stats.Memory.Cache, + Usage: &cgroups.MemoryEntry{ + Limit: stats.Memory.Usage.Limit, + Usage: stats.Memory.Usage.Usage, + Max: stats.Memory.Usage.Max, + Failcnt: stats.Memory.Usage.Failcnt, + }, + Swap: &cgroups.MemoryEntry{ + Limit: stats.Memory.Swap.Limit, + Usage: stats.Memory.Swap.Usage, + Max: stats.Memory.Swap.Max, + Failcnt: stats.Memory.Swap.Failcnt, + }, + Kernel: &cgroups.MemoryEntry{ + Limit: stats.Memory.Kernel.Limit, + Usage: stats.Memory.Kernel.Usage, + Max: stats.Memory.Kernel.Max, + Failcnt: stats.Memory.Kernel.Failcnt, + }, + KernelTCP: &cgroups.MemoryEntry{ + Limit: stats.Memory.KernelTCP.Limit, + Usage: stats.Memory.KernelTCP.Usage, + Max: stats.Memory.KernelTCP.Max, + Failcnt: stats.Memory.KernelTCP.Failcnt, + }, + }, + Pids: &cgroups.PidsStat{ + Current: stats.Pids.Current, + Limit: stats.Pids.Limit, + }, + }) + if err != nil { + return nil, err + } + return &taskAPI.StatsResponse{ + Stats: data, + }, nil +} + +// Update a running container +func (s *service) Update(ctx context.Context, r *taskAPI.UpdateTaskRequest) (*ptypes.Empty, error) { + return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) +} + +// Wait for a process to exit +func (s *service) Wait(ctx context.Context, r *taskAPI.WaitRequest) (*taskAPI.WaitResponse, error) { + p, err := s.getProcess(r.ExecID) + if err != nil { + return nil, err + } + if p == nil { + return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + } + p.Wait() + + return &taskAPI.WaitResponse{ + ExitStatus: uint32(p.ExitStatus()), + ExitedAt: p.ExitedAt(), + }, nil +} + +func (s *service) processExits() { + for e := range s.ec { + s.checkProcesses(e) + } +} + +func (s *service) checkProcesses(e proc.Exit) { + // TODO(random-liu): Add `shouldKillAll` logic if container pid + // namespace is supported. + for _, p := range s.allProcesses() { + if p.ID() == e.ID { + if ip, ok := p.(*proc.Init); ok { + // Ensure all children are killed + if err := ip.KillAll(s.context); err != nil { + log.G(s.context).WithError(err).WithField("id", ip.ID()). + Error("failed to kill init's children") + } + } + p.SetExited(e.Status) + s.events <- &eventstypes.TaskExit{ + ContainerID: s.id, + ID: p.ID(), + Pid: uint32(p.Pid()), + ExitStatus: uint32(e.Status), + ExitedAt: p.ExitedAt(), + } + return + } + } +} + +func (s *service) allProcesses() (o []rproc.Process) { + s.mu.Lock() + defer s.mu.Unlock() + for _, p := range s.processes { + o = append(o, p) + } + if s.task != nil { + o = append(o, s.task) + } + return o +} + +func (s *service) getContainerPids(ctx context.Context, id string) ([]uint32, error) { + s.mu.Lock() + p := s.task + s.mu.Unlock() + if p == nil { + return nil, errors.Wrapf(errdefs.ErrFailedPrecondition, "container must be created") + } + ps, err := p.(*proc.Init).Runtime().Ps(ctx, id) + if err != nil { + return nil, err + } + pids := make([]uint32, 0, len(ps)) + for _, pid := range ps { + pids = append(pids, uint32(pid)) + } + return pids, nil +} + +func (s *service) forward(publisher events.Publisher) { + for e := range s.events { + ctx, cancel := context.WithTimeout(s.context, 5*time.Second) + err := publisher.Publish(ctx, getTopic(e), e) + cancel() + if err != nil { + logrus.WithError(err).Error("post event") + } + } +} + +func (s *service) getProcess(execID string) (rproc.Process, error) { + s.mu.Lock() + defer s.mu.Unlock() + if execID == "" { + return s.task, nil + } + p := s.processes[execID] + if p == nil { + return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "process does not exist %s", execID) + } + return p, nil +} + +func getTopic(e interface{}) string { + switch e.(type) { + case *eventstypes.TaskCreate: + return runtime.TaskCreateEventTopic + case *eventstypes.TaskStart: + return runtime.TaskStartEventTopic + case *eventstypes.TaskOOM: + return runtime.TaskOOMEventTopic + case *eventstypes.TaskExit: + return runtime.TaskExitEventTopic + case *eventstypes.TaskDelete: + return runtime.TaskDeleteEventTopic + case *eventstypes.TaskExecAdded: + return runtime.TaskExecAddedEventTopic + case *eventstypes.TaskExecStarted: + return runtime.TaskExecStartedEventTopic + default: + logrus.Warnf("no topic for type %#v", e) + } + return runtime.TaskUnknownTopic +} + +func newInit(ctx context.Context, path, workDir, namespace string, platform rproc.Platform, r *proc.CreateConfig, options *options.Options, rootfs string) (*proc.Init, error) { + spec, err := utils.ReadSpec(r.Bundle) + if err != nil { + return nil, errors.Wrap(err, "read oci spec") + } + if err := utils.UpdateVolumeAnnotations(r.Bundle, spec); err != nil { + return nil, errors.Wrap(err, "update volume annotations") + } + runsc.FormatLogPath(r.ID, options.RunscConfig) + runtime := proc.NewRunsc(options.Root, path, namespace, options.BinaryName, options.RunscConfig) + p := proc.New(r.ID, runtime, rproc.Stdio{ + Stdin: r.Stdin, + Stdout: r.Stdout, + Stderr: r.Stderr, + Terminal: r.Terminal, + }) + p.Bundle = r.Bundle + p.Platform = platform + p.Rootfs = rootfs + p.WorkDir = workDir + p.IoUID = int(options.IoUid) + p.IoGID = int(options.IoGid) + p.Sandbox = utils.IsSandbox(spec) + p.UserLog = utils.UserLogPath(spec) + p.Monitor = shim.Default + return p, nil +} diff --git a/pkg/shim/v2/service_linux.go b/pkg/shim/v2/service_linux.go new file mode 100644 index 000000000..cd259cd44 --- /dev/null +++ b/pkg/shim/v2/service_linux.go @@ -0,0 +1,112 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux + +package v2 + +import ( + "context" + "io" + "sync" + "syscall" + + "github.com/containerd/console" + "github.com/containerd/fifo" + "github.com/pkg/errors" +) + +type linuxPlatform struct { + epoller *console.Epoller +} + +func (p *linuxPlatform) CopyConsole(ctx context.Context, console console.Console, stdin, stdout, stderr string, wg, cwg *sync.WaitGroup) (console.Console, error) { + if p.epoller == nil { + return nil, errors.New("uninitialized epoller") + } + + epollConsole, err := p.epoller.Add(console) + if err != nil { + return nil, err + } + + if stdin != "" { + in, err := fifo.OpenFifo(context.Background(), stdin, syscall.O_RDONLY|syscall.O_NONBLOCK, 0) + if err != nil { + return nil, err + } + cwg.Add(1) + go func() { + cwg.Done() + p := bufPool.Get().(*[]byte) + defer bufPool.Put(p) + io.CopyBuffer(epollConsole, in, *p) + }() + } + + outw, err := fifo.OpenFifo(ctx, stdout, syscall.O_WRONLY, 0) + if err != nil { + return nil, err + } + outr, err := fifo.OpenFifo(ctx, stdout, syscall.O_RDONLY, 0) + if err != nil { + return nil, err + } + wg.Add(1) + cwg.Add(1) + go func() { + cwg.Done() + p := bufPool.Get().(*[]byte) + defer bufPool.Put(p) + io.CopyBuffer(outw, epollConsole, *p) + epollConsole.Close() + outr.Close() + outw.Close() + wg.Done() + }() + return epollConsole, nil +} + +func (p *linuxPlatform) ShutdownConsole(ctx context.Context, cons console.Console) error { + if p.epoller == nil { + return errors.New("uninitialized epoller") + } + epollConsole, ok := cons.(*console.EpollConsole) + if !ok { + return errors.Errorf("expected EpollConsole, got %#v", cons) + } + return epollConsole.Shutdown(p.epoller.CloseConsole) +} + +func (p *linuxPlatform) Close() error { + return p.epoller.Close() +} + +// initialize a single epoll fd to manage our consoles. `initPlatform` should +// only be called once. +func (s *service) initPlatform() error { + if s.platform != nil { + return nil + } + epoller, err := console.NewEpoller() + if err != nil { + return errors.Wrap(err, "failed to initialize epoller") + } + s.platform = &linuxPlatform{ + epoller: epoller, + } + go epoller.Wait() + return nil +} diff --git a/pkg/v1/proc/deleted_state.go b/pkg/v1/proc/deleted_state.go deleted file mode 100644 index b0233841d..000000000 --- a/pkg/v1/proc/deleted_state.go +++ /dev/null @@ -1,54 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package proc - -import ( - "context" - - "github.com/containerd/console" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/runtime/proc" - "github.com/pkg/errors" -) - -type deletedState struct { -} - -func (s *deletedState) Resize(ws console.WinSize) error { - return errors.Errorf("cannot resize a deleted process") -} - -func (s *deletedState) Start(ctx context.Context) error { - return errors.Errorf("cannot start a deleted process") -} - -func (s *deletedState) Delete(ctx context.Context) error { - return errors.Wrap(errdefs.ErrNotFound, "cannot delete a deleted process") -} - -func (s *deletedState) Kill(ctx context.Context, sig uint32, all bool) error { - return errors.Wrap(errdefs.ErrNotFound, "cannot kill a deleted process") -} - -func (s *deletedState) SetExited(status int) { - // no op -} - -func (s *deletedState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { - return nil, errors.Errorf("cannot exec in a deleted state") -} diff --git a/pkg/v1/proc/exec.go b/pkg/v1/proc/exec.go deleted file mode 100644 index f02b73bb2..000000000 --- a/pkg/v1/proc/exec.go +++ /dev/null @@ -1,284 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package proc - -import ( - "context" - "fmt" - "io" - "os" - "path/filepath" - "sync" - "syscall" - "time" - - "github.com/containerd/console" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/runtime/proc" - "github.com/containerd/fifo" - runc "github.com/containerd/go-runc" - specs "github.com/opencontainers/runtime-spec/specs-go" - "github.com/pkg/errors" - "golang.org/x/sys/unix" - - runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" -) - -type execProcess struct { - wg sync.WaitGroup - - execState execState - - mu sync.Mutex - id string - console console.Console - io runc.IO - status int - exited time.Time - pid int - internalPid int - closers []io.Closer - stdin io.Closer - stdio proc.Stdio - path string - spec specs.Process - - parent *Init - waitBlock chan struct{} -} - -func (e *execProcess) Wait() { - <-e.waitBlock -} - -func (e *execProcess) ID() string { - return e.id -} - -func (e *execProcess) Pid() int { - e.mu.Lock() - defer e.mu.Unlock() - return e.pid -} - -func (e *execProcess) ExitStatus() int { - e.mu.Lock() - defer e.mu.Unlock() - return e.status -} - -func (e *execProcess) ExitedAt() time.Time { - e.mu.Lock() - defer e.mu.Unlock() - return e.exited -} - -func (e *execProcess) SetExited(status int) { - e.mu.Lock() - defer e.mu.Unlock() - - e.execState.SetExited(status) -} - -func (e *execProcess) setExited(status int) { - e.status = status - e.exited = time.Now() - e.parent.Platform.ShutdownConsole(context.Background(), e.console) - close(e.waitBlock) -} - -func (e *execProcess) Delete(ctx context.Context) error { - e.mu.Lock() - defer e.mu.Unlock() - - return e.execState.Delete(ctx) -} - -func (e *execProcess) delete(ctx context.Context) error { - e.wg.Wait() - if e.io != nil { - for _, c := range e.closers { - c.Close() - } - e.io.Close() - } - pidfile := filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id)) - // silently ignore error - os.Remove(pidfile) - internalPidfile := filepath.Join(e.path, fmt.Sprintf("%s-internal.pid", e.id)) - // silently ignore error - os.Remove(internalPidfile) - return nil -} - -func (e *execProcess) Resize(ws console.WinSize) error { - e.mu.Lock() - defer e.mu.Unlock() - - return e.execState.Resize(ws) -} - -func (e *execProcess) resize(ws console.WinSize) error { - if e.console == nil { - return nil - } - return e.console.Resize(ws) -} - -func (e *execProcess) Kill(ctx context.Context, sig uint32, _ bool) error { - e.mu.Lock() - defer e.mu.Unlock() - - return e.execState.Kill(ctx, sig, false) -} - -func (e *execProcess) kill(ctx context.Context, sig uint32, _ bool) error { - internalPid := e.internalPid - if internalPid != 0 { - if err := e.parent.runtime.Kill(ctx, e.parent.id, int(sig), &runsc.KillOpts{ - Pid: internalPid, - }); err != nil { - // If this returns error, consider the process has already stopped. - // TODO: Fix after signal handling is fixed. - return errors.Wrapf(errdefs.ErrNotFound, err.Error()) - } - } - return nil -} - -func (e *execProcess) Stdin() io.Closer { - return e.stdin -} - -func (e *execProcess) Stdio() proc.Stdio { - return e.stdio -} - -func (e *execProcess) Start(ctx context.Context) error { - e.mu.Lock() - defer e.mu.Unlock() - - return e.execState.Start(ctx) -} - -func (e *execProcess) start(ctx context.Context) (err error) { - var ( - socket *runc.Socket - pidfile = filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id)) - internalPidfile = filepath.Join(e.path, fmt.Sprintf("%s-internal.pid", e.id)) - ) - if e.stdio.Terminal { - if socket, err = runc.NewTempConsoleSocket(); err != nil { - return errors.Wrap(err, "failed to create runc console socket") - } - defer socket.Close() - } else if e.stdio.IsNull() { - if e.io, err = runc.NewNullIO(); err != nil { - return errors.Wrap(err, "creating new NULL IO") - } - } else { - if e.io, err = runc.NewPipeIO(e.parent.IoUID, e.parent.IoGID, withConditionalIO(e.stdio)); err != nil { - return errors.Wrap(err, "failed to create runc io pipes") - } - } - opts := &runsc.ExecOpts{ - PidFile: pidfile, - InternalPidFile: internalPidfile, - IO: e.io, - Detach: true, - } - if socket != nil { - opts.ConsoleSocket = socket - } - eventCh := e.parent.Monitor.Subscribe() - defer func() { - // Unsubscribe if an error is returned. - if err != nil { - e.parent.Monitor.Unsubscribe(eventCh) - } - }() - if err := e.parent.runtime.Exec(ctx, e.parent.id, e.spec, opts); err != nil { - close(e.waitBlock) - return e.parent.runtimeError(err, "OCI runtime exec failed") - } - if e.stdio.Stdin != "" { - sc, err := fifo.OpenFifo(context.Background(), e.stdio.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0) - if err != nil { - return errors.Wrapf(err, "failed to open stdin fifo %s", e.stdio.Stdin) - } - e.closers = append(e.closers, sc) - e.stdin = sc - } - var copyWaitGroup sync.WaitGroup - ctx, cancel := context.WithTimeout(ctx, 30*time.Second) - defer cancel() - if socket != nil { - console, err := socket.ReceiveMaster() - if err != nil { - return errors.Wrap(err, "failed to retrieve console master") - } - if e.console, err = e.parent.Platform.CopyConsole(ctx, console, e.stdio.Stdin, e.stdio.Stdout, e.stdio.Stderr, &e.wg, ©WaitGroup); err != nil { - return errors.Wrap(err, "failed to start console copy") - } - } else if !e.stdio.IsNull() { - if err := copyPipes(ctx, e.io, e.stdio.Stdin, e.stdio.Stdout, e.stdio.Stderr, &e.wg, ©WaitGroup); err != nil { - return errors.Wrap(err, "failed to start io pipe copy") - } - } - copyWaitGroup.Wait() - pid, err := runc.ReadPidFile(opts.PidFile) - if err != nil { - return errors.Wrap(err, "failed to retrieve OCI runtime exec pid") - } - e.pid = pid - internalPid, err := runc.ReadPidFile(opts.InternalPidFile) - if err != nil { - return errors.Wrap(err, "failed to retrieve OCI runtime exec internal pid") - } - e.internalPid = internalPid - go func() { - defer e.parent.Monitor.Unsubscribe(eventCh) - for event := range eventCh { - if event.Pid == e.pid { - ExitCh <- Exit{ - Timestamp: event.Timestamp, - ID: e.id, - Status: event.Status, - } - break - } - } - }() - return nil -} - -func (e *execProcess) Status(ctx context.Context) (string, error) { - e.mu.Lock() - defer e.mu.Unlock() - // if we don't have a pid then the exec process has just been created - if e.pid == 0 { - return "created", nil - } - // if we have a pid and it can be signaled, the process is running - // TODO(random-liu): Use `runsc kill --pid`. - if err := unix.Kill(e.pid, 0); err == nil { - return "running", nil - } - // else if we have a pid but it can nolonger be signaled, it has stopped - return "stopped", nil -} diff --git a/pkg/v1/proc/exec_state.go b/pkg/v1/proc/exec_state.go deleted file mode 100644 index e10954670..000000000 --- a/pkg/v1/proc/exec_state.go +++ /dev/null @@ -1,156 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package proc - -import ( - "context" - - "github.com/containerd/console" - "github.com/pkg/errors" -) - -type execState interface { - Resize(console.WinSize) error - Start(context.Context) error - Delete(context.Context) error - Kill(context.Context, uint32, bool) error - SetExited(int) -} - -type execCreatedState struct { - p *execProcess -} - -func (s *execCreatedState) transition(name string) error { - switch name { - case "running": - s.p.execState = &execRunningState{p: s.p} - case "stopped": - s.p.execState = &execStoppedState{p: s.p} - case "deleted": - s.p.execState = &deletedState{} - default: - return errors.Errorf("invalid state transition %q to %q", stateName(s), name) - } - return nil -} - -func (s *execCreatedState) Resize(ws console.WinSize) error { - return s.p.resize(ws) -} - -func (s *execCreatedState) Start(ctx context.Context) error { - if err := s.p.start(ctx); err != nil { - return err - } - return s.transition("running") -} - -func (s *execCreatedState) Delete(ctx context.Context) error { - if err := s.p.delete(ctx); err != nil { - return err - } - return s.transition("deleted") -} - -func (s *execCreatedState) Kill(ctx context.Context, sig uint32, all bool) error { - return s.p.kill(ctx, sig, all) -} - -func (s *execCreatedState) SetExited(status int) { - s.p.setExited(status) - - if err := s.transition("stopped"); err != nil { - panic(err) - } -} - -type execRunningState struct { - p *execProcess -} - -func (s *execRunningState) transition(name string) error { - switch name { - case "stopped": - s.p.execState = &execStoppedState{p: s.p} - default: - return errors.Errorf("invalid state transition %q to %q", stateName(s), name) - } - return nil -} - -func (s *execRunningState) Resize(ws console.WinSize) error { - return s.p.resize(ws) -} - -func (s *execRunningState) Start(ctx context.Context) error { - return errors.Errorf("cannot start a running process") -} - -func (s *execRunningState) Delete(ctx context.Context) error { - return errors.Errorf("cannot delete a running process") -} - -func (s *execRunningState) Kill(ctx context.Context, sig uint32, all bool) error { - return s.p.kill(ctx, sig, all) -} - -func (s *execRunningState) SetExited(status int) { - s.p.setExited(status) - - if err := s.transition("stopped"); err != nil { - panic(err) - } -} - -type execStoppedState struct { - p *execProcess -} - -func (s *execStoppedState) transition(name string) error { - switch name { - case "deleted": - s.p.execState = &deletedState{} - default: - return errors.Errorf("invalid state transition %q to %q", stateName(s), name) - } - return nil -} - -func (s *execStoppedState) Resize(ws console.WinSize) error { - return errors.Errorf("cannot resize a stopped container") -} - -func (s *execStoppedState) Start(ctx context.Context) error { - return errors.Errorf("cannot start a stopped process") -} - -func (s *execStoppedState) Delete(ctx context.Context) error { - if err := s.p.delete(ctx); err != nil { - return err - } - return s.transition("deleted") -} - -func (s *execStoppedState) Kill(ctx context.Context, sig uint32, all bool) error { - return s.p.kill(ctx, sig, all) -} - -func (s *execStoppedState) SetExited(status int) { - // no op -} diff --git a/pkg/v1/proc/init.go b/pkg/v1/proc/init.go deleted file mode 100644 index 5dbb1daab..000000000 --- a/pkg/v1/proc/init.go +++ /dev/null @@ -1,464 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package proc - -import ( - "context" - "encoding/json" - "io" - "path/filepath" - "strings" - "sync" - "syscall" - "time" - - "github.com/containerd/console" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/log" - "github.com/containerd/containerd/mount" - "github.com/containerd/containerd/runtime/proc" - "github.com/containerd/fifo" - runc "github.com/containerd/go-runc" - specs "github.com/opencontainers/runtime-spec/specs-go" - "github.com/pkg/errors" - - runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" -) - -// InitPidFile name of the file that contains the init pid -const InitPidFile = "init.pid" - -// Init represents an initial process for a container -type Init struct { - wg sync.WaitGroup - initState initState - - // mu is used to ensure that `Start()` and `Exited()` calls return in - // the right order when invoked in separate go routines. - // This is the case within the shim implementation as it makes use of - // the reaper interface. - mu sync.Mutex - - waitBlock chan struct{} - - WorkDir string - - id string - Bundle string - console console.Console - Platform proc.Platform - io runc.IO - runtime *runsc.Runsc - status int - exited time.Time - pid int - closers []io.Closer - stdin io.Closer - stdio proc.Stdio - Rootfs string - IoUID int - IoGID int - Sandbox bool - UserLog string - Monitor ProcessMonitor -} - -// NewRunsc returns a new runsc instance for a process -func NewRunsc(root, path, namespace, runtime string, config map[string]string) *runsc.Runsc { - if root == "" { - root = RunscRoot - } - return &runsc.Runsc{ - Command: runtime, - PdeathSignal: syscall.SIGKILL, - Log: filepath.Join(path, "log.json"), - LogFormat: runc.JSON, - Root: filepath.Join(root, namespace), - Config: config, - } -} - -// New returns a new init process -func New(id string, runtime *runsc.Runsc, stdio proc.Stdio) *Init { - p := &Init{ - id: id, - runtime: runtime, - stdio: stdio, - status: 0, - waitBlock: make(chan struct{}), - } - p.initState = &createdState{p: p} - return p -} - -// Create the process with the provided config -func (p *Init) Create(ctx context.Context, r *CreateConfig) (err error) { - var socket *runc.Socket - if r.Terminal { - if socket, err = runc.NewTempConsoleSocket(); err != nil { - return errors.Wrap(err, "failed to create OCI runtime console socket") - } - defer socket.Close() - } else if hasNoIO(r) { - if p.io, err = runc.NewNullIO(); err != nil { - return errors.Wrap(err, "creating new NULL IO") - } - } else { - if p.io, err = runc.NewPipeIO(p.IoUID, p.IoGID, withConditionalIO(p.stdio)); err != nil { - return errors.Wrap(err, "failed to create OCI runtime io pipes") - } - } - pidFile := filepath.Join(p.Bundle, InitPidFile) - opts := &runsc.CreateOpts{ - PidFile: pidFile, - } - if socket != nil { - opts.ConsoleSocket = socket - } - if p.Sandbox { - opts.IO = p.io - // UserLog is only useful for sandbox. - opts.UserLog = p.UserLog - } - if err := p.runtime.Create(ctx, r.ID, r.Bundle, opts); err != nil { - return p.runtimeError(err, "OCI runtime create failed") - } - if r.Stdin != "" { - sc, err := fifo.OpenFifo(context.Background(), r.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0) - if err != nil { - return errors.Wrapf(err, "failed to open stdin fifo %s", r.Stdin) - } - p.stdin = sc - p.closers = append(p.closers, sc) - } - var copyWaitGroup sync.WaitGroup - ctx, cancel := context.WithTimeout(ctx, 30*time.Second) - defer cancel() - if socket != nil { - console, err := socket.ReceiveMaster() - if err != nil { - return errors.Wrap(err, "failed to retrieve console master") - } - console, err = p.Platform.CopyConsole(ctx, console, r.Stdin, r.Stdout, r.Stderr, &p.wg, ©WaitGroup) - if err != nil { - return errors.Wrap(err, "failed to start console copy") - } - p.console = console - } else if !hasNoIO(r) { - if err := copyPipes(ctx, p.io, r.Stdin, r.Stdout, r.Stderr, &p.wg, ©WaitGroup); err != nil { - return errors.Wrap(err, "failed to start io pipe copy") - } - } - - copyWaitGroup.Wait() - pid, err := runc.ReadPidFile(pidFile) - if err != nil { - return errors.Wrap(err, "failed to retrieve OCI runtime container pid") - } - p.pid = pid - return nil -} - -// Wait for the process to exit -func (p *Init) Wait() { - <-p.waitBlock -} - -// ID of the process -func (p *Init) ID() string { - return p.id -} - -// Pid of the process -func (p *Init) Pid() int { - return p.pid -} - -// ExitStatus of the process -func (p *Init) ExitStatus() int { - p.mu.Lock() - defer p.mu.Unlock() - return p.status -} - -// ExitedAt at time when the process exited -func (p *Init) ExitedAt() time.Time { - p.mu.Lock() - defer p.mu.Unlock() - return p.exited -} - -// Status of the process -func (p *Init) Status(ctx context.Context) (string, error) { - p.mu.Lock() - defer p.mu.Unlock() - c, err := p.runtime.State(ctx, p.id) - if err != nil { - if strings.Contains(err.Error(), "does not exist") { - return "stopped", nil - } - return "", p.runtimeError(err, "OCI runtime state failed") - } - return p.convertStatus(c.Status), nil -} - -// Start the init process -func (p *Init) Start(ctx context.Context) error { - p.mu.Lock() - defer p.mu.Unlock() - - return p.initState.Start(ctx) -} - -func (p *Init) start(ctx context.Context) error { - var cio runc.IO - if !p.Sandbox { - cio = p.io - } - if err := p.runtime.Start(ctx, p.id, cio); err != nil { - return p.runtimeError(err, "OCI runtime start failed") - } - go func() { - status, err := p.runtime.Wait(context.Background(), p.id) - if err != nil { - log.G(ctx).WithError(err).Errorf("Failed to wait for container %q", p.id) - // TODO(random-liu): Handle runsc kill error. - if err := p.killAll(ctx); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to kill container %q", p.id) - } - status = internalErrorCode - } - ExitCh <- Exit{ - Timestamp: time.Now(), - ID: p.id, - Status: status, - } - }() - return nil -} - -// SetExited of the init process with the next status -func (p *Init) SetExited(status int) { - p.mu.Lock() - defer p.mu.Unlock() - - p.initState.SetExited(status) -} - -func (p *Init) setExited(status int) { - p.exited = time.Now() - p.status = status - p.Platform.ShutdownConsole(context.Background(), p.console) - close(p.waitBlock) -} - -// Delete the init process -func (p *Init) Delete(ctx context.Context) error { - p.mu.Lock() - defer p.mu.Unlock() - - return p.initState.Delete(ctx) -} - -func (p *Init) delete(ctx context.Context) error { - p.killAll(ctx) - p.wg.Wait() - err := p.runtime.Delete(ctx, p.id, nil) - // ignore errors if a runtime has already deleted the process - // but we still hold metadata and pipes - // - // this is common during a checkpoint, runc will delete the container state - // after a checkpoint and the container will no longer exist within runc - if err != nil { - if strings.Contains(err.Error(), "does not exist") { - err = nil - } else { - err = p.runtimeError(err, "failed to delete task") - } - } - if p.io != nil { - for _, c := range p.closers { - c.Close() - } - p.io.Close() - } - if err2 := mount.UnmountAll(p.Rootfs, 0); err2 != nil { - log.G(ctx).WithError(err2).Warn("failed to cleanup rootfs mount") - if err == nil { - err = errors.Wrap(err2, "failed rootfs umount") - } - } - return err -} - -// Resize the init processes console -func (p *Init) Resize(ws console.WinSize) error { - p.mu.Lock() - defer p.mu.Unlock() - - if p.console == nil { - return nil - } - return p.console.Resize(ws) -} - -func (p *Init) resize(ws console.WinSize) error { - if p.console == nil { - return nil - } - return p.console.Resize(ws) -} - -// Kill the init process -func (p *Init) Kill(ctx context.Context, signal uint32, all bool) error { - p.mu.Lock() - defer p.mu.Unlock() - - return p.initState.Kill(ctx, signal, all) -} - -func (p *Init) kill(context context.Context, signal uint32, all bool) error { - var ( - killErr error - backoff = 100 * time.Millisecond - ) - timeout := 1 * time.Second - for start := time.Now(); time.Now().Sub(start) < timeout; { - c, err := p.runtime.State(context, p.id) - if err != nil { - if strings.Contains(err.Error(), "does not exist") { - return errors.Wrapf(errdefs.ErrNotFound, "no such process") - } - return p.runtimeError(err, "OCI runtime state failed") - } - // For runsc, signal only works when container is running state. - // If the container is not in running state, directly return - // "no such process" - if p.convertStatus(c.Status) == "stopped" { - return errors.Wrapf(errdefs.ErrNotFound, "no such process") - } - killErr = p.runtime.Kill(context, p.id, int(signal), &runsc.KillOpts{ - All: all, - }) - if killErr == nil { - return nil - } - time.Sleep(backoff) - backoff *= 2 - } - return p.runtimeError(killErr, "kill timeout") -} - -// KillAll processes belonging to the init process -func (p *Init) KillAll(context context.Context) error { - p.mu.Lock() - defer p.mu.Unlock() - return p.killAll(context) -} - -func (p *Init) killAll(context context.Context) error { - p.runtime.Kill(context, p.id, int(syscall.SIGKILL), &runsc.KillOpts{ - All: true, - }) - // Ignore error handling for `runsc kill --all` for now. - // * If it doesn't return error, it is good; - // * If it returns error, consider the container has already stopped. - // TODO: Fix `runsc kill --all` error handling. - return nil -} - -// Stdin of the process -func (p *Init) Stdin() io.Closer { - return p.stdin -} - -// Runtime returns the OCI runtime configured for the init process -func (p *Init) Runtime() *runsc.Runsc { - return p.runtime -} - -// Exec returns a new child process -func (p *Init) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { - p.mu.Lock() - defer p.mu.Unlock() - - return p.initState.Exec(ctx, path, r) -} - -// exec returns a new exec'd process -func (p *Init) exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { - // process exec request - var spec specs.Process - if err := json.Unmarshal(r.Spec.Value, &spec); err != nil { - return nil, err - } - spec.Terminal = r.Terminal - - e := &execProcess{ - id: r.ID, - path: path, - parent: p, - spec: spec, - stdio: proc.Stdio{ - Stdin: r.Stdin, - Stdout: r.Stdout, - Stderr: r.Stderr, - Terminal: r.Terminal, - }, - waitBlock: make(chan struct{}), - } - e.execState = &execCreatedState{p: e} - return e, nil -} - -// Stdio of the process -func (p *Init) Stdio() proc.Stdio { - return p.stdio -} - -func (p *Init) runtimeError(rErr error, msg string) error { - if rErr == nil { - return nil - } - - rMsg, err := getLastRuntimeError(p.runtime) - switch { - case err != nil: - return errors.Wrapf(rErr, "%s: %s (%s)", msg, "unable to retrieve OCI runtime error", err.Error()) - case rMsg == "": - return errors.Wrap(rErr, msg) - default: - return errors.Errorf("%s: %s", msg, rMsg) - } -} - -func (p *Init) convertStatus(status string) string { - if status == "created" && !p.Sandbox && p.status == internalErrorCode { - // Treat start failure state for non-root container as stopped. - return "stopped" - } - return status -} - -func withConditionalIO(c proc.Stdio) runc.IOOpt { - return func(o *runc.IOOption) { - o.OpenStdin = c.Stdin != "" - o.OpenStdout = c.Stdout != "" - o.OpenStderr = c.Stderr != "" - } -} diff --git a/pkg/v1/proc/init_state.go b/pkg/v1/proc/init_state.go deleted file mode 100644 index f56f6fe28..000000000 --- a/pkg/v1/proc/init_state.go +++ /dev/null @@ -1,184 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package proc - -import ( - "context" - - "github.com/containerd/console" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/runtime/proc" - "github.com/pkg/errors" -) - -type initState interface { - Resize(console.WinSize) error - Start(context.Context) error - Delete(context.Context) error - Exec(context.Context, string, *ExecConfig) (proc.Process, error) - Kill(context.Context, uint32, bool) error - SetExited(int) -} - -type createdState struct { - p *Init -} - -func (s *createdState) transition(name string) error { - switch name { - case "running": - s.p.initState = &runningState{p: s.p} - case "stopped": - s.p.initState = &stoppedState{p: s.p} - case "deleted": - s.p.initState = &deletedState{} - default: - return errors.Errorf("invalid state transition %q to %q", stateName(s), name) - } - return nil -} - -func (s *createdState) Resize(ws console.WinSize) error { - return s.p.resize(ws) -} - -func (s *createdState) Start(ctx context.Context) error { - if err := s.p.start(ctx); err != nil { - // Containerd doesn't allow deleting container in created state. - // However, for gvisor, a non-root container in created state can - // only go to running state. If the container can't be started, - // it can only stay in created state, and never be deleted. - // To work around that, we treat non-root container in start failure - // state as stopped. - if !s.p.Sandbox { - s.p.io.Close() - s.p.setExited(internalErrorCode) - if err := s.transition("stopped"); err != nil { - panic(err) - } - } - return err - } - return s.transition("running") -} - -func (s *createdState) Delete(ctx context.Context) error { - if err := s.p.delete(ctx); err != nil { - return err - } - return s.transition("deleted") -} - -func (s *createdState) Kill(ctx context.Context, sig uint32, all bool) error { - return s.p.kill(ctx, sig, all) -} - -func (s *createdState) SetExited(status int) { - s.p.setExited(status) - - if err := s.transition("stopped"); err != nil { - panic(err) - } -} - -func (s *createdState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { - return s.p.exec(ctx, path, r) -} - -type runningState struct { - p *Init -} - -func (s *runningState) transition(name string) error { - switch name { - case "stopped": - s.p.initState = &stoppedState{p: s.p} - default: - return errors.Errorf("invalid state transition %q to %q", stateName(s), name) - } - return nil -} - -func (s *runningState) Resize(ws console.WinSize) error { - return s.p.resize(ws) -} - -func (s *runningState) Start(ctx context.Context) error { - return errors.Errorf("cannot start a running process") -} - -func (s *runningState) Delete(ctx context.Context) error { - return errors.Errorf("cannot delete a running process") -} - -func (s *runningState) Kill(ctx context.Context, sig uint32, all bool) error { - return s.p.kill(ctx, sig, all) -} - -func (s *runningState) SetExited(status int) { - s.p.setExited(status) - - if err := s.transition("stopped"); err != nil { - panic(err) - } -} - -func (s *runningState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { - return s.p.exec(ctx, path, r) -} - -type stoppedState struct { - p *Init -} - -func (s *stoppedState) transition(name string) error { - switch name { - case "deleted": - s.p.initState = &deletedState{} - default: - return errors.Errorf("invalid state transition %q to %q", stateName(s), name) - } - return nil -} - -func (s *stoppedState) Resize(ws console.WinSize) error { - return errors.Errorf("cannot resize a stopped container") -} - -func (s *stoppedState) Start(ctx context.Context) error { - return errors.Errorf("cannot start a stopped process") -} - -func (s *stoppedState) Delete(ctx context.Context) error { - if err := s.p.delete(ctx); err != nil { - return err - } - return s.transition("deleted") -} - -func (s *stoppedState) Kill(ctx context.Context, sig uint32, all bool) error { - return errdefs.ToGRPCf(errdefs.ErrNotFound, "process %s not found", s.p.id) -} - -func (s *stoppedState) SetExited(status int) { - // no op -} - -func (s *stoppedState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { - return nil, errors.Errorf("cannot exec in a stopped state") -} diff --git a/pkg/v1/proc/io.go b/pkg/v1/proc/io.go deleted file mode 100644 index 4afa94cf2..000000000 --- a/pkg/v1/proc/io.go +++ /dev/null @@ -1,169 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package proc - -import ( - "context" - "fmt" - "io" - "os" - "sync" - "sync/atomic" - "syscall" - - "github.com/containerd/containerd/log" - "github.com/containerd/fifo" - runc "github.com/containerd/go-runc" -) - -// TODO(random-liu): This file can be a util. - -var bufPool = sync.Pool{ - New: func() interface{} { - buffer := make([]byte, 32<<10) - return &buffer - }, -} - -func copyPipes(ctx context.Context, rio runc.IO, stdin, stdout, stderr string, wg, cwg *sync.WaitGroup) error { - var sameFile *countingWriteCloser - for _, i := range []struct { - name string - dest func(wc io.WriteCloser, rc io.Closer) - }{ - { - name: stdout, - dest: func(wc io.WriteCloser, rc io.Closer) { - wg.Add(1) - cwg.Add(1) - go func() { - cwg.Done() - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - if _, err := io.CopyBuffer(wc, rio.Stdout(), *p); err != nil { - log.G(ctx).Warn("error copying stdout") - } - wg.Done() - wc.Close() - if rc != nil { - rc.Close() - } - }() - }, - }, { - name: stderr, - dest: func(wc io.WriteCloser, rc io.Closer) { - wg.Add(1) - cwg.Add(1) - go func() { - cwg.Done() - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - if _, err := io.CopyBuffer(wc, rio.Stderr(), *p); err != nil { - log.G(ctx).Warn("error copying stderr") - } - wg.Done() - wc.Close() - if rc != nil { - rc.Close() - } - }() - }, - }, - } { - ok, err := isFifo(i.name) - if err != nil { - return err - } - var ( - fw io.WriteCloser - fr io.Closer - ) - if ok { - if fw, err = fifo.OpenFifo(ctx, i.name, syscall.O_WRONLY, 0); err != nil { - return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", i.name, err) - } - if fr, err = fifo.OpenFifo(ctx, i.name, syscall.O_RDONLY, 0); err != nil { - return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", i.name, err) - } - } else { - if sameFile != nil { - sameFile.count++ - i.dest(sameFile, nil) - continue - } - if fw, err = os.OpenFile(i.name, syscall.O_WRONLY|syscall.O_APPEND, 0); err != nil { - return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", i.name, err) - } - if stdout == stderr { - sameFile = &countingWriteCloser{ - WriteCloser: fw, - count: 1, - } - } - } - i.dest(fw, fr) - } - if stdin == "" { - return nil - } - f, err := fifo.OpenFifo(context.Background(), stdin, syscall.O_RDONLY|syscall.O_NONBLOCK, 0) - if err != nil { - return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", stdin, err) - } - cwg.Add(1) - go func() { - cwg.Done() - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - - io.CopyBuffer(rio.Stdin(), f, *p) - rio.Stdin().Close() - f.Close() - }() - return nil -} - -// countingWriteCloser masks io.Closer() until close has been invoked a certain number of times. -type countingWriteCloser struct { - io.WriteCloser - count int64 -} - -func (c *countingWriteCloser) Close() error { - if atomic.AddInt64(&c.count, -1) > 0 { - return nil - } - return c.WriteCloser.Close() -} - -// isFifo checks if a file is a fifo -// if the file does not exist then it returns false -func isFifo(path string) (bool, error) { - stat, err := os.Stat(path) - if err != nil { - if os.IsNotExist(err) { - return false, nil - } - return false, err - } - if stat.Mode()&os.ModeNamedPipe == os.ModeNamedPipe { - return true, nil - } - return false, nil -} diff --git a/pkg/v1/proc/process.go b/pkg/v1/proc/process.go deleted file mode 100644 index 7dbcd823d..000000000 --- a/pkg/v1/proc/process.go +++ /dev/null @@ -1,39 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package proc - -import ( - "github.com/pkg/errors" -) - -// RunscRoot is the path to the root runsc state directory -const RunscRoot = "/run/containerd/runsc" - -func stateName(v interface{}) string { - switch v.(type) { - case *runningState, *execRunningState: - return "running" - case *createdState, *execCreatedState: - return "created" - case *deletedState: - return "deleted" - case *stoppedState: - return "stopped" - } - panic(errors.Errorf("invalid state %v", v)) -} diff --git a/pkg/v1/proc/types.go b/pkg/v1/proc/types.go deleted file mode 100644 index 1d7c8ade3..000000000 --- a/pkg/v1/proc/types.go +++ /dev/null @@ -1,72 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package proc - -import ( - "time" - - google_protobuf "github.com/gogo/protobuf/types" - - runc "github.com/containerd/go-runc" -) - -// Mount holds filesystem mount configuration -type Mount struct { - Type string - Source string - Target string - Options []string -} - -// CreateConfig hold task creation configuration -type CreateConfig struct { - ID string - Bundle string - Runtime string - Rootfs []Mount - Terminal bool - Stdin string - Stdout string - Stderr string - Options *google_protobuf.Any -} - -// ExecConfig holds exec creation configuration -type ExecConfig struct { - ID string - Terminal bool - Stdin string - Stdout string - Stderr string - Spec *google_protobuf.Any -} - -// Exit is the type of exit events -type Exit struct { - Timestamp time.Time - ID string - Status int -} - -// ProcessMonitor monitors process exit changes -type ProcessMonitor interface { - // Subscribe to process exit changes - Subscribe() chan runc.Exit - // Unsubscribe to process exit changes - Unsubscribe(c chan runc.Exit) -} diff --git a/pkg/v1/proc/utils.go b/pkg/v1/proc/utils.go deleted file mode 100644 index e770a6810..000000000 --- a/pkg/v1/proc/utils.go +++ /dev/null @@ -1,94 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package proc - -import ( - "encoding/json" - "io" - "os" - "strings" - "time" - - runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" -) - -const ( - internalErrorCode = 128 - bufferSize = 32 -) - -// ExitCh is the exit events channel for containers and exec processes -// inside the sandbox. -var ExitCh = make(chan Exit, bufferSize) - -// TODO(random-liu): This can be a utility. - -// TODO(mlaventure): move to runc package? -func getLastRuntimeError(r *runsc.Runsc) (string, error) { - if r.Log == "" { - return "", nil - } - - f, err := os.OpenFile(r.Log, os.O_RDONLY, 0400) - if err != nil { - return "", err - } - - var ( - errMsg string - log struct { - Level string - Msg string - Time time.Time - } - ) - - dec := json.NewDecoder(f) - for err = nil; err == nil; { - if err = dec.Decode(&log); err != nil && err != io.EOF { - return "", err - } - if log.Level == "error" { - errMsg = strings.TrimSpace(log.Msg) - } - } - - return errMsg, nil -} - -func copyFile(to, from string) error { - ff, err := os.Open(from) - if err != nil { - return err - } - defer ff.Close() - tt, err := os.Create(to) - if err != nil { - return err - } - defer tt.Close() - - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - _, err = io.CopyBuffer(tt, ff, *p) - return err -} - -func hasNoIO(r *CreateConfig) bool { - return r.Stdin == "" && r.Stdout == "" && r.Stderr == "" -} diff --git a/pkg/v1/shim/platform.go b/pkg/v1/shim/platform.go deleted file mode 100644 index 10c54958f..000000000 --- a/pkg/v1/shim/platform.go +++ /dev/null @@ -1,112 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package shim - -import ( - "context" - "io" - "sync" - "syscall" - - "github.com/containerd/console" - "github.com/containerd/fifo" - "github.com/pkg/errors" -) - -type linuxPlatform struct { - epoller *console.Epoller -} - -func (p *linuxPlatform) CopyConsole(ctx context.Context, console console.Console, stdin, stdout, stderr string, wg, cwg *sync.WaitGroup) (console.Console, error) { - if p.epoller == nil { - return nil, errors.New("uninitialized epoller") - } - - epollConsole, err := p.epoller.Add(console) - if err != nil { - return nil, err - } - - if stdin != "" { - in, err := fifo.OpenFifo(ctx, stdin, syscall.O_RDONLY, 0) - if err != nil { - return nil, err - } - cwg.Add(1) - go func() { - cwg.Done() - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - io.CopyBuffer(epollConsole, in, *p) - }() - } - - outw, err := fifo.OpenFifo(ctx, stdout, syscall.O_WRONLY, 0) - if err != nil { - return nil, err - } - outr, err := fifo.OpenFifo(ctx, stdout, syscall.O_RDONLY, 0) - if err != nil { - return nil, err - } - wg.Add(1) - cwg.Add(1) - go func() { - cwg.Done() - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - io.CopyBuffer(outw, epollConsole, *p) - epollConsole.Close() - outr.Close() - outw.Close() - wg.Done() - }() - return epollConsole, nil -} - -func (p *linuxPlatform) ShutdownConsole(ctx context.Context, cons console.Console) error { - if p.epoller == nil { - return errors.New("uninitialized epoller") - } - epollConsole, ok := cons.(*console.EpollConsole) - if !ok { - return errors.Errorf("expected EpollConsole, got %#v", cons) - } - return epollConsole.Shutdown(p.epoller.CloseConsole) -} - -func (p *linuxPlatform) Close() error { - return p.epoller.Close() -} - -// initialize a single epoll fd to manage our consoles. `initPlatform` should -// only be called once. -func (s *Service) initPlatform() error { - if s.platform != nil { - return nil - } - epoller, err := console.NewEpoller() - if err != nil { - return errors.Wrap(err, "failed to initialize epoller") - } - s.platform = &linuxPlatform{ - epoller: epoller, - } - go epoller.Wait() - return nil -} diff --git a/pkg/v1/shim/service.go b/pkg/v1/shim/service.go deleted file mode 100644 index e06a5562c..000000000 --- a/pkg/v1/shim/service.go +++ /dev/null @@ -1,581 +0,0 @@ -/* -Copyright The containerd Authors. -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package shim - -import ( - "context" - "fmt" - "os" - "path/filepath" - "sync" - - "github.com/containerd/console" - eventstypes "github.com/containerd/containerd/api/events" - "github.com/containerd/containerd/api/types/task" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/events" - "github.com/containerd/containerd/log" - "github.com/containerd/containerd/mount" - "github.com/containerd/containerd/namespaces" - "github.com/containerd/containerd/runtime" - "github.com/containerd/containerd/runtime/linux/runctypes" - rproc "github.com/containerd/containerd/runtime/proc" - "github.com/containerd/containerd/runtime/v1/shim" - shimapi "github.com/containerd/containerd/runtime/v1/shim/v1" - "github.com/containerd/typeurl" - ptypes "github.com/gogo/protobuf/types" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" - - runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" - "github.com/google/gvisor-containerd-shim/pkg/v1/proc" - "github.com/google/gvisor-containerd-shim/pkg/v1/utils" -) - -var ( - empty = &ptypes.Empty{} - bufPool = sync.Pool{ - New: func() interface{} { - buffer := make([]byte, 32<<10) - return &buffer - }, - } -) - -// Config contains shim specific configuration -type Config struct { - Path string - Namespace string - WorkDir string - RuntimeRoot string - RunscConfig map[string]string -} - -// NewService returns a new shim service that can be used via GRPC -func NewService(config Config, publisher events.Publisher) (*Service, error) { - if config.Namespace == "" { - return nil, fmt.Errorf("shim namespace cannot be empty") - } - ctx := namespaces.WithNamespace(context.Background(), config.Namespace) - ctx = log.WithLogger(ctx, logrus.WithFields(logrus.Fields{ - "namespace": config.Namespace, - "path": config.Path, - "pid": os.Getpid(), - })) - s := &Service{ - config: config, - context: ctx, - processes: make(map[string]rproc.Process), - events: make(chan interface{}, 128), - ec: proc.ExitCh, - } - go s.processExits() - if err := s.initPlatform(); err != nil { - return nil, errors.Wrap(err, "failed to initialized platform behavior") - } - go s.forward(publisher) - return s, nil -} - -// Service is the shim implementation of a remote shim over GRPC -type Service struct { - mu sync.Mutex - - config Config - context context.Context - processes map[string]rproc.Process - events chan interface{} - platform rproc.Platform - ec chan proc.Exit - - // Filled by Create() - id string - bundle string -} - -// Create a new initial process and container with the underlying OCI runtime -func (s *Service) Create(ctx context.Context, r *shimapi.CreateTaskRequest) (_ *shimapi.CreateTaskResponse, err error) { - s.mu.Lock() - defer s.mu.Unlock() - - var mounts []proc.Mount - for _, m := range r.Rootfs { - mounts = append(mounts, proc.Mount{ - Type: m.Type, - Source: m.Source, - Target: m.Target, - Options: m.Options, - }) - } - - rootfs := filepath.Join(r.Bundle, "rootfs") - if err := os.Mkdir(rootfs, 0711); err != nil && !os.IsExist(err) { - return nil, err - } - - config := &proc.CreateConfig{ - ID: r.ID, - Bundle: r.Bundle, - Runtime: r.Runtime, - Rootfs: mounts, - Terminal: r.Terminal, - Stdin: r.Stdin, - Stdout: r.Stdout, - Stderr: r.Stderr, - Options: r.Options, - } - defer func() { - if err != nil { - if err2 := mount.UnmountAll(rootfs, 0); err2 != nil { - log.G(ctx).WithError(err2).Warn("Failed to cleanup rootfs mount") - } - } - }() - for _, rm := range mounts { - m := &mount.Mount{ - Type: rm.Type, - Source: rm.Source, - Options: rm.Options, - } - if err := m.Mount(rootfs); err != nil { - return nil, errors.Wrapf(err, "failed to mount rootfs component %v", m) - } - } - process, err := newInit( - ctx, - s.config.Path, - s.config.WorkDir, - s.config.RuntimeRoot, - s.config.Namespace, - s.config.RunscConfig, - s.platform, - config, - ) - if err := process.Create(ctx, config); err != nil { - return nil, errdefs.ToGRPC(err) - } - // save the main task id and bundle to the shim for additional requests - s.id = r.ID - s.bundle = r.Bundle - pid := process.Pid() - s.processes[r.ID] = process - return &shimapi.CreateTaskResponse{ - Pid: uint32(pid), - }, nil -} - -// Start a process -func (s *Service) Start(ctx context.Context, r *shimapi.StartRequest) (*shimapi.StartResponse, error) { - p, err := s.getExecProcess(r.ID) - if err != nil { - return nil, err - } - if err := p.Start(ctx); err != nil { - return nil, err - } - return &shimapi.StartResponse{ - ID: p.ID(), - Pid: uint32(p.Pid()), - }, nil -} - -// Delete the initial process and container -func (s *Service) Delete(ctx context.Context, r *ptypes.Empty) (*shimapi.DeleteResponse, error) { - p, err := s.getInitProcess() - if err != nil { - return nil, err - } - if err := p.Delete(ctx); err != nil { - return nil, err - } - s.mu.Lock() - delete(s.processes, s.id) - s.mu.Unlock() - s.platform.Close() - return &shimapi.DeleteResponse{ - ExitStatus: uint32(p.ExitStatus()), - ExitedAt: p.ExitedAt(), - Pid: uint32(p.Pid()), - }, nil -} - -// DeleteProcess deletes an exec'd process -func (s *Service) DeleteProcess(ctx context.Context, r *shimapi.DeleteProcessRequest) (*shimapi.DeleteResponse, error) { - if r.ID == s.id { - return nil, status.Errorf(codes.InvalidArgument, "cannot delete init process with DeleteProcess") - } - p, err := s.getExecProcess(r.ID) - if err != nil { - return nil, err - } - if err := p.Delete(ctx); err != nil { - return nil, err - } - s.mu.Lock() - delete(s.processes, r.ID) - s.mu.Unlock() - return &shimapi.DeleteResponse{ - ExitStatus: uint32(p.ExitStatus()), - ExitedAt: p.ExitedAt(), - Pid: uint32(p.Pid()), - }, nil -} - -// Exec an additional process inside the container -func (s *Service) Exec(ctx context.Context, r *shimapi.ExecProcessRequest) (*ptypes.Empty, error) { - s.mu.Lock() - - if p := s.processes[r.ID]; p != nil { - s.mu.Unlock() - return nil, errdefs.ToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ID) - } - - p := s.processes[s.id] - s.mu.Unlock() - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") - } - - process, err := p.(*proc.Init).Exec(ctx, s.config.Path, &proc.ExecConfig{ - ID: r.ID, - Terminal: r.Terminal, - Stdin: r.Stdin, - Stdout: r.Stdout, - Stderr: r.Stderr, - Spec: r.Spec, - }) - if err != nil { - return nil, errdefs.ToGRPC(err) - } - s.mu.Lock() - s.processes[r.ID] = process - s.mu.Unlock() - return empty, nil -} - -// ResizePty of a process -func (s *Service) ResizePty(ctx context.Context, r *shimapi.ResizePtyRequest) (*ptypes.Empty, error) { - if r.ID == "" { - return nil, errdefs.ToGRPCf(errdefs.ErrInvalidArgument, "id not provided") - } - ws := console.WinSize{ - Width: uint16(r.Width), - Height: uint16(r.Height), - } - p, err := s.getExecProcess(r.ID) - if err != nil { - return nil, err - } - if err := p.Resize(ws); err != nil { - return nil, errdefs.ToGRPC(err) - } - return empty, nil -} - -// State returns runtime state information for a process -func (s *Service) State(ctx context.Context, r *shimapi.StateRequest) (*shimapi.StateResponse, error) { - p, err := s.getExecProcess(r.ID) - if err != nil { - return nil, err - } - st, err := p.Status(ctx) - if err != nil { - return nil, err - } - status := task.StatusUnknown - switch st { - case "created": - status = task.StatusCreated - case "running": - status = task.StatusRunning - case "stopped": - status = task.StatusStopped - } - sio := p.Stdio() - return &shimapi.StateResponse{ - ID: p.ID(), - Bundle: s.bundle, - Pid: uint32(p.Pid()), - Status: status, - Stdin: sio.Stdin, - Stdout: sio.Stdout, - Stderr: sio.Stderr, - Terminal: sio.Terminal, - ExitStatus: uint32(p.ExitStatus()), - ExitedAt: p.ExitedAt(), - }, nil -} - -// Pause the container -func (s *Service) Pause(ctx context.Context, r *ptypes.Empty) (*ptypes.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// Resume the container -func (s *Service) Resume(ctx context.Context, r *ptypes.Empty) (*ptypes.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// Kill a process with the provided signal -func (s *Service) Kill(ctx context.Context, r *shimapi.KillRequest) (*ptypes.Empty, error) { - if r.ID == "" { - p, err := s.getInitProcess() - if err != nil { - return nil, err - } - if err := p.Kill(ctx, r.Signal, r.All); err != nil { - return nil, errdefs.ToGRPC(err) - } - return empty, nil - } - - p, err := s.getExecProcess(r.ID) - if err != nil { - return nil, err - } - if err := p.Kill(ctx, r.Signal, r.All); err != nil { - return nil, errdefs.ToGRPC(err) - } - return empty, nil -} - -// ListPids returns all pids inside the container -func (s *Service) ListPids(ctx context.Context, r *shimapi.ListPidsRequest) (*shimapi.ListPidsResponse, error) { - pids, err := s.getContainerPids(ctx, r.ID) - if err != nil { - return nil, errdefs.ToGRPC(err) - } - var processes []*task.ProcessInfo - for _, pid := range pids { - pInfo := task.ProcessInfo{ - Pid: pid, - } - for _, p := range s.processes { - if p.Pid() == int(pid) { - d := &runctypes.ProcessDetails{ - ExecID: p.ID(), - } - a, err := typeurl.MarshalAny(d) - if err != nil { - return nil, errors.Wrapf(err, "failed to marshal process %d info", pid) - } - pInfo.Info = a - break - } - } - processes = append(processes, &pInfo) - } - return &shimapi.ListPidsResponse{ - Processes: processes, - }, nil -} - -// CloseIO of a process -func (s *Service) CloseIO(ctx context.Context, r *shimapi.CloseIORequest) (*ptypes.Empty, error) { - p, err := s.getExecProcess(r.ID) - if err != nil { - return nil, err - } - if stdin := p.Stdin(); stdin != nil { - if err := stdin.Close(); err != nil { - return nil, errors.Wrap(err, "close stdin") - } - } - return empty, nil -} - -// Checkpoint the container -func (s *Service) Checkpoint(ctx context.Context, r *shimapi.CheckpointTaskRequest) (*ptypes.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// ShimInfo returns shim information such as the shim's pid -func (s *Service) ShimInfo(ctx context.Context, r *ptypes.Empty) (*shimapi.ShimInfoResponse, error) { - return &shimapi.ShimInfoResponse{ - ShimPid: uint32(os.Getpid()), - }, nil -} - -// Update a running container -func (s *Service) Update(ctx context.Context, r *shimapi.UpdateTaskRequest) (*ptypes.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// Wait for a process to exit -func (s *Service) Wait(ctx context.Context, r *shimapi.WaitRequest) (*shimapi.WaitResponse, error) { - p, err := s.getExecProcess(r.ID) - if err != nil { - return nil, err - } - p.Wait() - - return &shimapi.WaitResponse{ - ExitStatus: uint32(p.ExitStatus()), - ExitedAt: p.ExitedAt(), - }, nil -} - -func (s *Service) processExits() { - for e := range s.ec { - s.checkProcesses(e) - } -} - -func (s *Service) allProcesses() []rproc.Process { - s.mu.Lock() - defer s.mu.Unlock() - - res := make([]rproc.Process, 0, len(s.processes)) - for _, p := range s.processes { - res = append(res, p) - } - return res -} - -func (s *Service) checkProcesses(e proc.Exit) { - for _, p := range s.allProcesses() { - if p.ID() == e.ID { - if ip, ok := p.(*proc.Init); ok { - // Ensure all children are killed - if err := ip.KillAll(s.context); err != nil { - log.G(s.context).WithError(err).WithField("id", ip.ID()). - Error("failed to kill init's children") - } - } - p.SetExited(e.Status) - s.events <- &eventstypes.TaskExit{ - ContainerID: s.id, - ID: p.ID(), - Pid: uint32(p.Pid()), - ExitStatus: uint32(e.Status), - ExitedAt: p.ExitedAt(), - } - return - } - } -} - -func (s *Service) getContainerPids(ctx context.Context, id string) ([]uint32, error) { - p, err := s.getInitProcess() - if err != nil { - return nil, err - } - - ps, err := p.(*proc.Init).Runtime().Ps(ctx, id) - if err != nil { - return nil, err - } - pids := make([]uint32, 0, len(ps)) - for _, pid := range ps { - pids = append(pids, uint32(pid)) - } - return pids, nil -} - -func (s *Service) forward(publisher events.Publisher) { - for e := range s.events { - if err := publisher.Publish(s.context, getTopic(s.context, e), e); err != nil { - log.G(s.context).WithError(err).Error("post event") - } - } -} - -// getInitProcess returns initial process -func (s *Service) getInitProcess() (rproc.Process, error) { - s.mu.Lock() - defer s.mu.Unlock() - p := s.processes[s.id] - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") - } - return p, nil -} - -// getExecProcess returns exec process -func (s *Service) getExecProcess(id string) (rproc.Process, error) { - s.mu.Lock() - defer s.mu.Unlock() - p := s.processes[id] - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "process %s does not exist", id) - } - return p, nil -} - -func getTopic(ctx context.Context, e interface{}) string { - switch e.(type) { - case *eventstypes.TaskCreate: - return runtime.TaskCreateEventTopic - case *eventstypes.TaskStart: - return runtime.TaskStartEventTopic - case *eventstypes.TaskOOM: - return runtime.TaskOOMEventTopic - case *eventstypes.TaskExit: - return runtime.TaskExitEventTopic - case *eventstypes.TaskDelete: - return runtime.TaskDeleteEventTopic - case *eventstypes.TaskExecAdded: - return runtime.TaskExecAddedEventTopic - case *eventstypes.TaskExecStarted: - return runtime.TaskExecStartedEventTopic - default: - logrus.Warnf("no topic for type %#v", e) - } - return runtime.TaskUnknownTopic -} - -func newInit(ctx context.Context, path, workDir, runtimeRoot, namespace string, config map[string]string, platform rproc.Platform, r *proc.CreateConfig) (*proc.Init, error) { - var options runctypes.CreateOptions - if r.Options != nil { - v, err := typeurl.UnmarshalAny(r.Options) - if err != nil { - return nil, err - } - options = *v.(*runctypes.CreateOptions) - } - - spec, err := utils.ReadSpec(r.Bundle) - if err != nil { - return nil, errors.Wrap(err, "read oci spec") - } - if err := utils.UpdateVolumeAnnotations(r.Bundle, spec); err != nil { - return nil, errors.Wrap(err, "update volume annotations") - } - - runsc.FormatLogPath(r.ID, config) - rootfs := filepath.Join(path, "rootfs") - runtime := proc.NewRunsc(runtimeRoot, path, namespace, r.Runtime, config) - p := proc.New(r.ID, runtime, rproc.Stdio{ - Stdin: r.Stdin, - Stdout: r.Stdout, - Stderr: r.Stderr, - Terminal: r.Terminal, - }) - p.Bundle = r.Bundle - p.Platform = platform - p.Rootfs = rootfs - p.WorkDir = workDir - p.IoUID = int(options.IoUid) - p.IoGID = int(options.IoGid) - p.Sandbox = utils.IsSandbox(spec) - p.UserLog = utils.UserLogPath(spec) - p.Monitor = shim.Default - return p, nil -} diff --git a/pkg/v1/utils/utils.go b/pkg/v1/utils/utils.go deleted file mode 100644 index 83840c047..000000000 --- a/pkg/v1/utils/utils.go +++ /dev/null @@ -1,59 +0,0 @@ -/* -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package utils - -import ( - "encoding/json" - "io/ioutil" - "os" - "path/filepath" - - "github.com/containerd/cri/pkg/annotations" - specs "github.com/opencontainers/runtime-spec/specs-go" -) - -// ReadSpec reads OCI spec from the bundle directory. -func ReadSpec(bundle string) (*specs.Spec, error) { - f, err := os.Open(filepath.Join(bundle, "config.json")) - if err != nil { - return nil, err - } - b, err := ioutil.ReadAll(f) - if err != nil { - return nil, err - } - var spec specs.Spec - if err := json.Unmarshal(b, &spec); err != nil { - return nil, err - } - return &spec, nil -} - -// IsSandbox checks whether a container is a sandbox container. -func IsSandbox(spec *specs.Spec) bool { - t, ok := spec.Annotations[annotations.ContainerType] - return !ok || t == annotations.ContainerTypeSandbox -} - -// UserLogPath gets user log path from OCI annotation. -func UserLogPath(spec *specs.Spec) string { - sandboxLogDir := spec.Annotations[annotations.SandboxLogDir] - if sandboxLogDir == "" { - return "" - } - return filepath.Join(sandboxLogDir, "gvisor.log") -} diff --git a/pkg/v1/utils/volumes.go b/pkg/v1/utils/volumes.go deleted file mode 100644 index 61adeaa54..000000000 --- a/pkg/v1/utils/volumes.go +++ /dev/null @@ -1,158 +0,0 @@ -/* -Copyright 2019 Google LLC. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package utils - -import ( - "encoding/json" - "fmt" - "io/ioutil" - "path/filepath" - "strings" - - "github.com/containerd/cri/pkg/annotations" - specs "github.com/opencontainers/runtime-spec/specs-go" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" -) - -const volumeKeyPrefix = "dev.gvisor.spec.mount." - -var kubeletPodsDir = "/var/lib/kubelet/pods" - -// volumeName gets volume name from volume annotation key, example: -// dev.gvisor.spec.mount.NAME.share -func volumeName(k string) string { - return strings.SplitN(strings.TrimPrefix(k, volumeKeyPrefix), ".", 2)[0] -} - -// volumeFieldName gets volume field name from volume annotation key, example: -// `type` is the field of dev.gvisor.spec.mount.NAME.type -func volumeFieldName(k string) string { - parts := strings.Split(strings.TrimPrefix(k, volumeKeyPrefix), ".") - return parts[len(parts)-1] -} - -// podUID gets pod UID from the pod log path. -func podUID(s *specs.Spec) (string, error) { - sandboxLogDir := s.Annotations[annotations.SandboxLogDir] - if sandboxLogDir == "" { - return "", errors.New("no sandbox log path annotation") - } - fields := strings.Split(filepath.Base(sandboxLogDir), "_") - switch len(fields) { - case 1: // This is the old CRI logging path - return fields[0], nil - case 3: // This is the new CRI logging path - return fields[2], nil - } - return "", errors.Errorf("unexpected sandbox log path %q", sandboxLogDir) -} - -// isVolumeKey checks whether an annotation key is for volume. -func isVolumeKey(k string) bool { - return strings.HasPrefix(k, volumeKeyPrefix) -} - -// volumeSourceKey constructs the annotation key for volume source. -func volumeSourceKey(volume string) string { - return volumeKeyPrefix + volume + ".source" -} - -// volumePath searches the volume path in the kubelet pod directory. -func volumePath(volume, uid string) (string, error) { - // TODO: Support subpath when gvisor supports pod volume bind mount. - volumeSearchPath := fmt.Sprintf("%s/%s/volumes/*/%s", kubeletPodsDir, uid, volume) - dirs, err := filepath.Glob(volumeSearchPath) - if err != nil { - return "", err - } - if len(dirs) != 1 { - return "", errors.Errorf("unexpected matched volume list %v", dirs) - } - return dirs[0], nil -} - -// isVolumePath checks whether a string is the volume path. -func isVolumePath(volume, path string) (bool, error) { - // TODO: Support subpath when gvisor supports pod volume bind mount. - volumeSearchPath := fmt.Sprintf("%s/*/volumes/*/%s", kubeletPodsDir, volume) - return filepath.Match(volumeSearchPath, path) -} - -// UpdateVolumeAnnotations add necessary OCI annotations for gvisor -// volume optimization. -func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error { - var ( - uid string - err error - ) - if IsSandbox(s) { - uid, err = podUID(s) - if err != nil { - // Skip if we can't get pod UID, because this doesn't work - // for containerd 1.1. - logrus.WithError(err).Error("Can't get pod uid") - return nil - } - } - var updated bool - for k, v := range s.Annotations { - if !isVolumeKey(k) { - continue - } - if volumeFieldName(k) != "type" { - continue - } - volume := volumeName(k) - if uid != "" { - // This is a sandbox - path, err := volumePath(volume, uid) - if err != nil { - return errors.Wrapf(err, "get volume path for %q", volume) - } - s.Annotations[volumeSourceKey(volume)] = path - updated = true - } else { - // This is a container - for i := range s.Mounts { - // An error is returned for sandbox if source annotation - // is not successfully applied, so it is guaranteed that - // the source annotation for sandbox has already been - // successfully applied at this point. - // The volume name is unique inside a pod, so matching without - // podUID is fine here. - // TODO: Pass podUID down to shim for containers to do - // more accurate matching. - if yes, _ := isVolumePath(volume, s.Mounts[i].Source); yes { - // gVisor requires the container mount type to match - // sandbox mount type. - s.Mounts[i].Type = v - updated = true - } - } - } - } - if !updated { - return nil - } - // Update bundle - b, err := json.Marshal(s) - if err != nil { - return err - } - return ioutil.WriteFile(filepath.Join(bundle, "config.json"), b, 0666) -} diff --git a/pkg/v1/utils/volumes_test.go b/pkg/v1/utils/volumes_test.go deleted file mode 100644 index 472cd6e86..000000000 --- a/pkg/v1/utils/volumes_test.go +++ /dev/null @@ -1,311 +0,0 @@ -/* -Copyright 2019 Google LLC. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package utils - -import ( - "encoding/json" - "fmt" - "io/ioutil" - "os" - "path/filepath" - "reflect" - "testing" - - "github.com/containerd/cri/pkg/annotations" - specs "github.com/opencontainers/runtime-spec/specs-go" -) - -func TestUpdateVolumeAnnotations(t *testing.T) { - dir, err := ioutil.TempDir("", "test-update-volume-annotations") - if err != nil { - t.Fatalf("create tempdir: %v", err) - } - defer os.RemoveAll(dir) - kubeletPodsDir = dir - - const ( - testPodUID = "testuid" - testVolumeName = "testvolume" - testLogDirPath = "/var/log/pods/testns_testname_" + testPodUID - testLegacyLogDirPath = "/var/log/pods/" + testPodUID - ) - testVolumePath := fmt.Sprintf("%s/%s/volumes/kubernetes.io~empty-dir/%s", dir, testPodUID, testVolumeName) - - if err := os.MkdirAll(testVolumePath, 0755); err != nil { - t.Fatalf("Create test volume: %v", err) - } - - for _, test := range []struct { - desc string - spec *specs.Spec - expected *specs.Spec - expectErr bool - expectUpdate bool - }{ - { - desc: "volume annotations for sandbox", - spec: &specs.Spec{ - Annotations: map[string]string{ - annotations.SandboxLogDir: testLogDirPath, - annotations.ContainerType: annotations.ContainerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - expected: &specs.Spec{ - Annotations: map[string]string{ - annotations.SandboxLogDir: testLogDirPath, - annotations.ContainerType: annotations.ContainerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - "dev.gvisor.spec.mount." + testVolumeName + ".source": testVolumePath, - }, - }, - expectUpdate: true, - }, - { - desc: "volume annotations for sandbox with legacy log path", - spec: &specs.Spec{ - Annotations: map[string]string{ - annotations.SandboxLogDir: testLegacyLogDirPath, - annotations.ContainerType: annotations.ContainerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - expected: &specs.Spec{ - Annotations: map[string]string{ - annotations.SandboxLogDir: testLegacyLogDirPath, - annotations.ContainerType: annotations.ContainerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - "dev.gvisor.spec.mount." + testVolumeName + ".source": testVolumePath, - }, - }, - expectUpdate: true, - }, - { - desc: "tmpfs: volume annotations for container", - spec: &specs.Spec{ - Mounts: []specs.Mount{ - { - Destination: "/test", - Type: "bind", - Source: testVolumePath, - Options: []string{"ro"}, - }, - { - Destination: "/random", - Type: "bind", - Source: "/random", - Options: []string{"ro"}, - }, - }, - Annotations: map[string]string{ - annotations.ContainerType: annotations.ContainerTypeContainer, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - expected: &specs.Spec{ - Mounts: []specs.Mount{ - { - Destination: "/test", - Type: "tmpfs", - Source: testVolumePath, - Options: []string{"ro"}, - }, - { - Destination: "/random", - Type: "bind", - Source: "/random", - Options: []string{"ro"}, - }, - }, - Annotations: map[string]string{ - annotations.ContainerType: annotations.ContainerTypeContainer, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - expectUpdate: true, - }, - { - desc: "bind: volume annotations for container", - spec: &specs.Spec{ - Mounts: []specs.Mount{ - { - Destination: "/test", - Type: "bind", - Source: testVolumePath, - Options: []string{"ro"}, - }, - }, - Annotations: map[string]string{ - annotations.ContainerType: annotations.ContainerTypeContainer, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "container", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "bind", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - expected: &specs.Spec{ - Mounts: []specs.Mount{ - { - Destination: "/test", - Type: "bind", - Source: testVolumePath, - Options: []string{"ro"}, - }, - }, - Annotations: map[string]string{ - annotations.ContainerType: annotations.ContainerTypeContainer, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "container", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "bind", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - expectUpdate: true, - }, - { - desc: "should not return error without pod log directory", - spec: &specs.Spec{ - Annotations: map[string]string{ - annotations.ContainerType: annotations.ContainerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - expected: &specs.Spec{ - Annotations: map[string]string{ - annotations.ContainerType: annotations.ContainerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - }, - { - desc: "should return error if volume path does not exist", - spec: &specs.Spec{ - Annotations: map[string]string{ - annotations.SandboxLogDir: testLogDirPath, - annotations.ContainerType: annotations.ContainerTypeSandbox, - "dev.gvisor.spec.mount.notexist.share": "pod", - "dev.gvisor.spec.mount.notexist.type": "tmpfs", - "dev.gvisor.spec.mount.notexist.options": "ro", - }, - }, - expectErr: true, - }, - { - desc: "no volume annotations for sandbox", - spec: &specs.Spec{ - Annotations: map[string]string{ - annotations.SandboxLogDir: testLogDirPath, - annotations.ContainerType: annotations.ContainerTypeSandbox, - }, - }, - expected: &specs.Spec{ - Annotations: map[string]string{ - annotations.SandboxLogDir: testLogDirPath, - annotations.ContainerType: annotations.ContainerTypeSandbox, - }, - }, - }, - { - desc: "no volume annotations for container", - spec: &specs.Spec{ - Mounts: []specs.Mount{ - { - Destination: "/test", - Type: "bind", - Source: "/test", - Options: []string{"ro"}, - }, - { - Destination: "/random", - Type: "bind", - Source: "/random", - Options: []string{"ro"}, - }, - }, - Annotations: map[string]string{ - annotations.ContainerType: annotations.ContainerTypeContainer, - }, - }, - expected: &specs.Spec{ - Mounts: []specs.Mount{ - { - Destination: "/test", - Type: "bind", - Source: "/test", - Options: []string{"ro"}, - }, - { - Destination: "/random", - Type: "bind", - Source: "/random", - Options: []string{"ro"}, - }, - }, - Annotations: map[string]string{ - annotations.ContainerType: annotations.ContainerTypeContainer, - }, - }, - }, - } { - t.Run(test.desc, func(t *testing.T) { - bundle, err := ioutil.TempDir(dir, "test-bundle") - if err != nil { - t.Fatalf("Create test bundle: %v", err) - } - err = UpdateVolumeAnnotations(bundle, test.spec) - if test.expectErr { - if err == nil { - t.Fatal("Expected error, but got nil") - } - return - } - if err != nil { - t.Fatalf("Unexpected error: %v", err) - } - if !reflect.DeepEqual(test.expected, test.spec) { - t.Fatalf("Expected %+v, got %+v", test.expected, test.spec) - } - if test.expectUpdate { - b, err := ioutil.ReadFile(filepath.Join(bundle, "config.json")) - if err != nil { - t.Fatalf("Read spec from bundle: %v", err) - } - var spec specs.Spec - if err := json.Unmarshal(b, &spec); err != nil { - t.Fatalf("Unmarshal spec: %v", err) - } - if !reflect.DeepEqual(test.expected, &spec) { - t.Fatalf("Expected %+v, got %+v", test.expected, &spec) - } - } - }) - } -} diff --git a/pkg/v2/epoll.go b/pkg/v2/epoll.go deleted file mode 100644 index 76c7b54d6..000000000 --- a/pkg/v2/epoll.go +++ /dev/null @@ -1,129 +0,0 @@ -// +build linux - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package v2 - -import ( - "context" - "sync" - - "github.com/containerd/cgroups" - eventstypes "github.com/containerd/containerd/api/events" - "github.com/containerd/containerd/events" - "github.com/containerd/containerd/runtime" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" -) - -func newOOMEpoller(publisher events.Publisher) (*epoller, error) { - fd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC) - if err != nil { - return nil, err - } - return &epoller{ - fd: fd, - publisher: publisher, - set: make(map[uintptr]*item), - }, nil -} - -type epoller struct { - mu sync.Mutex - - fd int - publisher events.Publisher - set map[uintptr]*item -} - -type item struct { - id string - cg cgroups.Cgroup -} - -func (e *epoller) Close() error { - return unix.Close(e.fd) -} - -func (e *epoller) run(ctx context.Context) { - var events [128]unix.EpollEvent - for { - select { - case <-ctx.Done(): - e.Close() - return - default: - n, err := unix.EpollWait(e.fd, events[:], -1) - if err != nil { - if err == unix.EINTR { - continue - } - logrus.WithError(err).Error("cgroups: epoll wait") - } - for i := 0; i < n; i++ { - e.process(ctx, uintptr(events[i].Fd)) - } - } - } -} - -func (e *epoller) add(id string, cg cgroups.Cgroup) error { - e.mu.Lock() - defer e.mu.Unlock() - fd, err := cg.OOMEventFD() - if err != nil { - return err - } - e.set[fd] = &item{ - id: id, - cg: cg, - } - event := unix.EpollEvent{ - Fd: int32(fd), - Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR, - } - return unix.EpollCtl(e.fd, unix.EPOLL_CTL_ADD, int(fd), &event) -} - -func (e *epoller) process(ctx context.Context, fd uintptr) { - flush(fd) - e.mu.Lock() - i, ok := e.set[fd] - if !ok { - e.mu.Unlock() - return - } - e.mu.Unlock() - if i.cg.State() == cgroups.Deleted { - e.mu.Lock() - delete(e.set, fd) - e.mu.Unlock() - unix.Close(int(fd)) - return - } - if err := e.publisher.Publish(ctx, runtime.TaskOOMEventTopic, &eventstypes.TaskOOM{ - ContainerID: i.id, - }); err != nil { - logrus.WithError(err).Error("publish OOM event") - } -} - -func flush(fd uintptr) error { - var buf [8]byte - _, err := unix.Read(int(fd), buf[:]) - return err -} diff --git a/pkg/v2/options/options.go b/pkg/v2/options/options.go deleted file mode 100644 index 34b002ac4..000000000 --- a/pkg/v2/options/options.go +++ /dev/null @@ -1,34 +0,0 @@ -/* -Copyright 2018 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ -package options - -const OptionType = "io.containerd.runsc.v1.options" - -// Options is runtime options for io.containerd.runsc.v1. -type Options struct { - // ShimCgroup is the cgroup the shim should be in. - ShimCgroup string `toml:"shim_cgroup"` - // IoUid is the I/O's pipes uid. - IoUid uint32 `toml:"io_uid"` - // IoUid is the I/O's pipes gid. - IoGid uint32 `toml:"io_gid"` - // BinaryName is the binary name of the runsc binary. - BinaryName string `toml:"binary_name"` - // Root is the runsc root directory. - Root string `toml:"root"` - // RunscConfig is a key/value map of all runsc flags. - RunscConfig map[string]string `toml:"runsc_config"` -} diff --git a/pkg/v2/service.go b/pkg/v2/service.go deleted file mode 100644 index f99456f63..000000000 --- a/pkg/v2/service.go +++ /dev/null @@ -1,826 +0,0 @@ -// +build linux - -/* - Copyright The containerd Authors. - Copyright 2018 Google LLC - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package v2 - -import ( - "context" - "io/ioutil" - "os" - "os/exec" - "path/filepath" - "sync" - "syscall" - "time" - - "github.com/BurntSushi/toml" - "github.com/containerd/cgroups" - "github.com/containerd/console" - eventstypes "github.com/containerd/containerd/api/events" - "github.com/containerd/containerd/api/types/task" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/events" - "github.com/containerd/containerd/log" - "github.com/containerd/containerd/mount" - "github.com/containerd/containerd/namespaces" - "github.com/containerd/containerd/runtime" - "github.com/containerd/containerd/runtime/linux/runctypes" - rproc "github.com/containerd/containerd/runtime/proc" - "github.com/containerd/containerd/runtime/v2/shim" - taskAPI "github.com/containerd/containerd/runtime/v2/task" - runtimeoptions "github.com/containerd/cri/pkg/api/runtimeoptions/v1" - "github.com/containerd/typeurl" - ptypes "github.com/gogo/protobuf/types" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" - - runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" - "github.com/google/gvisor-containerd-shim/pkg/v1/proc" - "github.com/google/gvisor-containerd-shim/pkg/v1/utils" - "github.com/google/gvisor-containerd-shim/pkg/v2/options" -) - -var ( - empty = &ptypes.Empty{} - bufPool = sync.Pool{ - New: func() interface{} { - buffer := make([]byte, 32<<10) - return &buffer - }, - } -) - -var _ = (taskAPI.TaskService)(&service{}) - -// configFile is the default config file name. For containerd 1.2, -// we assume that a config.toml should exist in the runtime root. -const configFile = "config.toml" - -// New returns a new shim service that can be used via GRPC -func New(ctx context.Context, id string, publisher events.Publisher) (shim.Shim, error) { - ep, err := newOOMEpoller(publisher) - if err != nil { - return nil, err - } - ctx, cancel := context.WithCancel(ctx) - go ep.run(ctx) - s := &service{ - id: id, - context: ctx, - processes: make(map[string]rproc.Process), - events: make(chan interface{}, 128), - ec: proc.ExitCh, - oomPoller: ep, - cancel: cancel, - } - go s.processExits() - runsc.Monitor = shim.Default - if err := s.initPlatform(); err != nil { - cancel() - return nil, errors.Wrap(err, "failed to initialized platform behavior") - } - go s.forward(publisher) - return s, nil -} - -// service is the shim implementation of a remote shim over GRPC -type service struct { - mu sync.Mutex - - context context.Context - task rproc.Process - processes map[string]rproc.Process - events chan interface{} - platform rproc.Platform - opts options.Options - ec chan proc.Exit - oomPoller *epoller - - id string - bundle string - cancel func() -} - -func newCommand(ctx context.Context, containerdBinary, containerdAddress string) (*exec.Cmd, error) { - ns, err := namespaces.NamespaceRequired(ctx) - if err != nil { - return nil, err - } - self, err := os.Executable() - if err != nil { - return nil, err - } - cwd, err := os.Getwd() - if err != nil { - return nil, err - } - args := []string{ - "-namespace", ns, - "-address", containerdAddress, - "-publish-binary", containerdBinary, - } - cmd := exec.Command(self, args...) - cmd.Dir = cwd - cmd.Env = append(os.Environ(), "GOMAXPROCS=2") - cmd.SysProcAttr = &syscall.SysProcAttr{ - Setpgid: true, - } - return cmd, nil -} - -func (s *service) StartShim(ctx context.Context, id, containerdBinary, containerdAddress string) (string, error) { - cmd, err := newCommand(ctx, containerdBinary, containerdAddress) - if err != nil { - return "", err - } - address, err := shim.SocketAddress(ctx, id) - if err != nil { - return "", err - } - socket, err := shim.NewSocket(address) - if err != nil { - return "", err - } - defer socket.Close() - f, err := socket.File() - if err != nil { - return "", err - } - defer f.Close() - - cmd.ExtraFiles = append(cmd.ExtraFiles, f) - - if err := cmd.Start(); err != nil { - return "", err - } - defer func() { - if err != nil { - cmd.Process.Kill() - } - }() - // make sure to wait after start - go cmd.Wait() - if err := shim.WritePidFile("shim.pid", cmd.Process.Pid); err != nil { - return "", err - } - if err := shim.WriteAddress("address", address); err != nil { - return "", err - } - if err := shim.SetScore(cmd.Process.Pid); err != nil { - return "", errors.Wrap(err, "failed to set OOM Score on shim") - } - return address, nil -} - -func (s *service) Cleanup(ctx context.Context) (*taskAPI.DeleteResponse, error) { - path, err := os.Getwd() - if err != nil { - return nil, err - } - ns, err := namespaces.NamespaceRequired(ctx) - if err != nil { - return nil, err - } - runtime, err := s.readRuntime(path) - if err != nil { - return nil, err - } - r := proc.NewRunsc(s.opts.Root, path, ns, runtime, nil) - if err := r.Delete(ctx, s.id, &runsc.DeleteOpts{ - Force: true, - }); err != nil { - logrus.WithError(err).Warn("failed to remove runc container") - } - if err := mount.UnmountAll(filepath.Join(path, "rootfs"), 0); err != nil { - logrus.WithError(err).Warn("failed to cleanup rootfs mount") - } - return &taskAPI.DeleteResponse{ - ExitedAt: time.Now(), - ExitStatus: 128 + uint32(unix.SIGKILL), - }, nil -} - -func (s *service) readRuntime(path string) (string, error) { - data, err := ioutil.ReadFile(filepath.Join(path, "runtime")) - if err != nil { - return "", err - } - return string(data), nil -} - -func (s *service) writeRuntime(path, runtime string) error { - return ioutil.WriteFile(filepath.Join(path, "runtime"), []byte(runtime), 0600) -} - -// Create a new initial process and container with the underlying OCI runtime -func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ *taskAPI.CreateTaskResponse, err error) { - s.mu.Lock() - defer s.mu.Unlock() - - ns, err := namespaces.NamespaceRequired(ctx) - if err != nil { - return nil, errors.Wrap(err, "create namespace") - } - - // Read from root for now. - var opts options.Options - if r.Options != nil { - v, err := typeurl.UnmarshalAny(r.Options) - if err != nil { - return nil, err - } - var path string - switch o := v.(type) { - case *runctypes.CreateOptions: // containerd 1.2.x - opts.IoUid = o.IoUid - opts.IoGid = o.IoGid - opts.ShimCgroup = o.ShimCgroup - case *runctypes.RuncOptions: // containerd 1.2.x - root := proc.RunscRoot - if o.RuntimeRoot != "" { - root = o.RuntimeRoot - } - - opts.BinaryName = o.Runtime - - path = filepath.Join(root, configFile) - if _, err := os.Stat(path); err != nil { - if !os.IsNotExist(err) { - return nil, errors.Wrapf(err, "stat config file %q", path) - } - // A config file in runtime root is not required. - path = "" - } - case *runtimeoptions.Options: // containerd 1.3.x+ - if o.ConfigPath == "" { - break - } - if o.TypeUrl != options.OptionType { - return nil, errors.Errorf("unsupported runtimeoptions %q", o.TypeUrl) - } - path = o.ConfigPath - default: - return nil, errors.Errorf("unsupported option type %q", r.Options.TypeUrl) - } - if path != "" { - if _, err = toml.DecodeFile(path, &opts); err != nil { - return nil, errors.Wrapf(err, "decode config file %q", path) - } - } - } - - var mounts []proc.Mount - for _, m := range r.Rootfs { - mounts = append(mounts, proc.Mount{ - Type: m.Type, - Source: m.Source, - Target: m.Target, - Options: m.Options, - }) - } - - rootfs := filepath.Join(r.Bundle, "rootfs") - if err := os.Mkdir(rootfs, 0711); err != nil && !os.IsExist(err) { - return nil, err - } - - config := &proc.CreateConfig{ - ID: r.ID, - Bundle: r.Bundle, - Runtime: opts.BinaryName, - Rootfs: mounts, - Terminal: r.Terminal, - Stdin: r.Stdin, - Stdout: r.Stdout, - Stderr: r.Stderr, - Options: r.Options, - } - if err := s.writeRuntime(r.Bundle, opts.BinaryName); err != nil { - return nil, err - } - defer func() { - if err != nil { - if err2 := mount.UnmountAll(rootfs, 0); err2 != nil { - logrus.WithError(err2).Warn("failed to cleanup rootfs mount") - } - } - }() - for _, rm := range mounts { - m := &mount.Mount{ - Type: rm.Type, - Source: rm.Source, - Options: rm.Options, - } - if err := m.Mount(rootfs); err != nil { - return nil, errors.Wrapf(err, "failed to mount rootfs component %v", m) - } - } - process, err := newInit( - ctx, - r.Bundle, - filepath.Join(r.Bundle, "work"), - ns, - s.platform, - config, - &opts, - rootfs, - ) - if err != nil { - return nil, errdefs.ToGRPC(err) - } - if err := process.Create(ctx, config); err != nil { - return nil, errdefs.ToGRPC(err) - } - // save the main task id and bundle to the shim for additional requests - s.id = r.ID - s.bundle = r.Bundle - - // Set up OOM notification on the sandbox's cgroup. This is done on sandbox - // create since the sandbox process will be created here. - pid := process.Pid() - if pid > 0 { - cg, err := cgroups.Load(cgroups.V1, cgroups.PidPath(pid)) - if err != nil { - return nil, errors.Wrapf(err, "loading cgroup for %d", pid) - } - if err := s.oomPoller.add(s.id, cg); err != nil { - return nil, errors.Wrapf(err, "add cg to OOM monitor") - } - } - s.task = process - s.opts = opts - return &taskAPI.CreateTaskResponse{ - Pid: uint32(process.Pid()), - }, nil - -} - -// Start a process -func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI.StartResponse, error) { - p, err := s.getProcess(r.ExecID) - if err != nil { - return nil, err - } - if err := p.Start(ctx); err != nil { - return nil, err - } - // TODO: Set the cgroup and oom notifications on restore. - // https://github.com/google/gvisor-containerd-shim/issues/58 - return &taskAPI.StartResponse{ - Pid: uint32(p.Pid()), - }, nil -} - -// Delete the initial process and container -func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (*taskAPI.DeleteResponse, error) { - p, err := s.getProcess(r.ExecID) - if err != nil { - return nil, err - } - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") - } - if err := p.Delete(ctx); err != nil { - return nil, err - } - isTask := r.ExecID == "" - if !isTask { - s.mu.Lock() - delete(s.processes, r.ExecID) - s.mu.Unlock() - } - if isTask && s.platform != nil { - s.platform.Close() - } - return &taskAPI.DeleteResponse{ - ExitStatus: uint32(p.ExitStatus()), - ExitedAt: p.ExitedAt(), - Pid: uint32(p.Pid()), - }, nil -} - -// Exec an additional process inside the container -func (s *service) Exec(ctx context.Context, r *taskAPI.ExecProcessRequest) (*ptypes.Empty, error) { - s.mu.Lock() - p := s.processes[r.ExecID] - s.mu.Unlock() - if p != nil { - return nil, errdefs.ToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ExecID) - } - p = s.task - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") - } - process, err := p.(*proc.Init).Exec(ctx, s.bundle, &proc.ExecConfig{ - ID: r.ExecID, - Terminal: r.Terminal, - Stdin: r.Stdin, - Stdout: r.Stdout, - Stderr: r.Stderr, - Spec: r.Spec, - }) - if err != nil { - return nil, errdefs.ToGRPC(err) - } - s.mu.Lock() - s.processes[r.ExecID] = process - s.mu.Unlock() - return empty, nil -} - -// ResizePty of a process -func (s *service) ResizePty(ctx context.Context, r *taskAPI.ResizePtyRequest) (*ptypes.Empty, error) { - p, err := s.getProcess(r.ExecID) - if err != nil { - return nil, err - } - ws := console.WinSize{ - Width: uint16(r.Width), - Height: uint16(r.Height), - } - if err := p.Resize(ws); err != nil { - return nil, errdefs.ToGRPC(err) - } - return empty, nil -} - -// State returns runtime state information for a process -func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (*taskAPI.StateResponse, error) { - p, err := s.getProcess(r.ExecID) - if err != nil { - return nil, err - } - st, err := p.Status(ctx) - if err != nil { - return nil, err - } - status := task.StatusUnknown - switch st { - case "created": - status = task.StatusCreated - case "running": - status = task.StatusRunning - case "stopped": - status = task.StatusStopped - } - sio := p.Stdio() - return &taskAPI.StateResponse{ - ID: p.ID(), - Bundle: s.bundle, - Pid: uint32(p.Pid()), - Status: status, - Stdin: sio.Stdin, - Stdout: sio.Stdout, - Stderr: sio.Stderr, - Terminal: sio.Terminal, - ExitStatus: uint32(p.ExitStatus()), - ExitedAt: p.ExitedAt(), - }, nil -} - -// Pause the container -func (s *service) Pause(ctx context.Context, r *taskAPI.PauseRequest) (*ptypes.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// Resume the container -func (s *service) Resume(ctx context.Context, r *taskAPI.ResumeRequest) (*ptypes.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// Kill a process with the provided signal -func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (*ptypes.Empty, error) { - p, err := s.getProcess(r.ExecID) - if err != nil { - return nil, err - } - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") - } - if err := p.Kill(ctx, r.Signal, r.All); err != nil { - return nil, errdefs.ToGRPC(err) - } - return empty, nil -} - -// Pids returns all pids inside the container -func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (*taskAPI.PidsResponse, error) { - pids, err := s.getContainerPids(ctx, r.ID) - if err != nil { - return nil, errdefs.ToGRPC(err) - } - var processes []*task.ProcessInfo - for _, pid := range pids { - pInfo := task.ProcessInfo{ - Pid: pid, - } - for _, p := range s.processes { - if p.Pid() == int(pid) { - d := &runctypes.ProcessDetails{ - ExecID: p.ID(), - } - a, err := typeurl.MarshalAny(d) - if err != nil { - return nil, errors.Wrapf(err, "failed to marshal process %d info", pid) - } - pInfo.Info = a - break - } - } - processes = append(processes, &pInfo) - } - return &taskAPI.PidsResponse{ - Processes: processes, - }, nil -} - -// CloseIO of a process -func (s *service) CloseIO(ctx context.Context, r *taskAPI.CloseIORequest) (*ptypes.Empty, error) { - p, err := s.getProcess(r.ExecID) - if err != nil { - return nil, err - } - if stdin := p.Stdin(); stdin != nil { - if err := stdin.Close(); err != nil { - return nil, errors.Wrap(err, "close stdin") - } - } - return empty, nil -} - -// Checkpoint the container -func (s *service) Checkpoint(ctx context.Context, r *taskAPI.CheckpointTaskRequest) (*ptypes.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// Connect returns shim information such as the shim's pid -func (s *service) Connect(ctx context.Context, r *taskAPI.ConnectRequest) (*taskAPI.ConnectResponse, error) { - var pid int - if s.task != nil { - pid = s.task.Pid() - } - return &taskAPI.ConnectResponse{ - ShimPid: uint32(os.Getpid()), - TaskPid: uint32(pid), - }, nil -} - -func (s *service) Shutdown(ctx context.Context, r *taskAPI.ShutdownRequest) (*ptypes.Empty, error) { - s.cancel() - os.Exit(0) - return empty, nil -} - -func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*taskAPI.StatsResponse, error) { - path, err := os.Getwd() - if err != nil { - return nil, err - } - ns, err := namespaces.NamespaceRequired(ctx) - if err != nil { - return nil, err - } - runtime, err := s.readRuntime(path) - if err != nil { - return nil, err - } - rs := proc.NewRunsc(s.opts.Root, path, ns, runtime, nil) - stats, err := rs.Stats(ctx, s.id) - if err != nil { - return nil, err - } - - // gvisor currently (as of 2020-03-03) only returns the total memory - // usage and current PID value[0]. However, we copy the common fields here - // so that future updates will propagate correct information. We're - // using the cgroups.Metrics structure so we're returning the same type - // as runc. - // - // [0]: https://github.com/google/gvisor/blob/277a0d5a1fbe8272d4729c01ee4c6e374d047ebc/runsc/boot/events.go#L61-L81 - data, err := typeurl.MarshalAny(&cgroups.Metrics{ - CPU: &cgroups.CPUStat{ - Usage: &cgroups.CPUUsage{ - Total: stats.Cpu.Usage.Total, - Kernel: stats.Cpu.Usage.Kernel, - User: stats.Cpu.Usage.User, - PerCPU: stats.Cpu.Usage.Percpu, - }, - Throttling: &cgroups.Throttle{ - Periods: stats.Cpu.Throttling.Periods, - ThrottledPeriods: stats.Cpu.Throttling.ThrottledPeriods, - ThrottledTime: stats.Cpu.Throttling.ThrottledTime, - }, - }, - Memory: &cgroups.MemoryStat{ - Cache: stats.Memory.Cache, - Usage: &cgroups.MemoryEntry{ - Limit: stats.Memory.Usage.Limit, - Usage: stats.Memory.Usage.Usage, - Max: stats.Memory.Usage.Max, - Failcnt: stats.Memory.Usage.Failcnt, - }, - Swap: &cgroups.MemoryEntry{ - Limit: stats.Memory.Swap.Limit, - Usage: stats.Memory.Swap.Usage, - Max: stats.Memory.Swap.Max, - Failcnt: stats.Memory.Swap.Failcnt, - }, - Kernel: &cgroups.MemoryEntry{ - Limit: stats.Memory.Kernel.Limit, - Usage: stats.Memory.Kernel.Usage, - Max: stats.Memory.Kernel.Max, - Failcnt: stats.Memory.Kernel.Failcnt, - }, - KernelTCP: &cgroups.MemoryEntry{ - Limit: stats.Memory.KernelTCP.Limit, - Usage: stats.Memory.KernelTCP.Usage, - Max: stats.Memory.KernelTCP.Max, - Failcnt: stats.Memory.KernelTCP.Failcnt, - }, - }, - Pids: &cgroups.PidsStat{ - Current: stats.Pids.Current, - Limit: stats.Pids.Limit, - }, - }) - if err != nil { - return nil, err - } - return &taskAPI.StatsResponse{ - Stats: data, - }, nil -} - -// Update a running container -func (s *service) Update(ctx context.Context, r *taskAPI.UpdateTaskRequest) (*ptypes.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// Wait for a process to exit -func (s *service) Wait(ctx context.Context, r *taskAPI.WaitRequest) (*taskAPI.WaitResponse, error) { - p, err := s.getProcess(r.ExecID) - if err != nil { - return nil, err - } - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") - } - p.Wait() - - return &taskAPI.WaitResponse{ - ExitStatus: uint32(p.ExitStatus()), - ExitedAt: p.ExitedAt(), - }, nil -} - -func (s *service) processExits() { - for e := range s.ec { - s.checkProcesses(e) - } -} - -func (s *service) checkProcesses(e proc.Exit) { - // TODO(random-liu): Add `shouldKillAll` logic if container pid - // namespace is supported. - for _, p := range s.allProcesses() { - if p.ID() == e.ID { - if ip, ok := p.(*proc.Init); ok { - // Ensure all children are killed - if err := ip.KillAll(s.context); err != nil { - log.G(s.context).WithError(err).WithField("id", ip.ID()). - Error("failed to kill init's children") - } - } - p.SetExited(e.Status) - s.events <- &eventstypes.TaskExit{ - ContainerID: s.id, - ID: p.ID(), - Pid: uint32(p.Pid()), - ExitStatus: uint32(e.Status), - ExitedAt: p.ExitedAt(), - } - return - } - } -} - -func (s *service) allProcesses() (o []rproc.Process) { - s.mu.Lock() - defer s.mu.Unlock() - for _, p := range s.processes { - o = append(o, p) - } - if s.task != nil { - o = append(o, s.task) - } - return o -} - -func (s *service) getContainerPids(ctx context.Context, id string) ([]uint32, error) { - s.mu.Lock() - p := s.task - s.mu.Unlock() - if p == nil { - return nil, errors.Wrapf(errdefs.ErrFailedPrecondition, "container must be created") - } - ps, err := p.(*proc.Init).Runtime().Ps(ctx, id) - if err != nil { - return nil, err - } - pids := make([]uint32, 0, len(ps)) - for _, pid := range ps { - pids = append(pids, uint32(pid)) - } - return pids, nil -} - -func (s *service) forward(publisher events.Publisher) { - for e := range s.events { - ctx, cancel := context.WithTimeout(s.context, 5*time.Second) - err := publisher.Publish(ctx, getTopic(e), e) - cancel() - if err != nil { - logrus.WithError(err).Error("post event") - } - } -} - -func (s *service) getProcess(execID string) (rproc.Process, error) { - s.mu.Lock() - defer s.mu.Unlock() - if execID == "" { - return s.task, nil - } - p := s.processes[execID] - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "process does not exist %s", execID) - } - return p, nil -} - -func getTopic(e interface{}) string { - switch e.(type) { - case *eventstypes.TaskCreate: - return runtime.TaskCreateEventTopic - case *eventstypes.TaskStart: - return runtime.TaskStartEventTopic - case *eventstypes.TaskOOM: - return runtime.TaskOOMEventTopic - case *eventstypes.TaskExit: - return runtime.TaskExitEventTopic - case *eventstypes.TaskDelete: - return runtime.TaskDeleteEventTopic - case *eventstypes.TaskExecAdded: - return runtime.TaskExecAddedEventTopic - case *eventstypes.TaskExecStarted: - return runtime.TaskExecStartedEventTopic - default: - logrus.Warnf("no topic for type %#v", e) - } - return runtime.TaskUnknownTopic -} - -func newInit(ctx context.Context, path, workDir, namespace string, platform rproc.Platform, r *proc.CreateConfig, options *options.Options, rootfs string) (*proc.Init, error) { - spec, err := utils.ReadSpec(r.Bundle) - if err != nil { - return nil, errors.Wrap(err, "read oci spec") - } - if err := utils.UpdateVolumeAnnotations(r.Bundle, spec); err != nil { - return nil, errors.Wrap(err, "update volume annotations") - } - runsc.FormatLogPath(r.ID, options.RunscConfig) - runtime := proc.NewRunsc(options.Root, path, namespace, options.BinaryName, options.RunscConfig) - p := proc.New(r.ID, runtime, rproc.Stdio{ - Stdin: r.Stdin, - Stdout: r.Stdout, - Stderr: r.Stderr, - Terminal: r.Terminal, - }) - p.Bundle = r.Bundle - p.Platform = platform - p.Rootfs = rootfs - p.WorkDir = workDir - p.IoUID = int(options.IoUid) - p.IoGID = int(options.IoGid) - p.Sandbox = utils.IsSandbox(spec) - p.UserLog = utils.UserLogPath(spec) - p.Monitor = shim.Default - return p, nil -} diff --git a/pkg/v2/service_linux.go b/pkg/v2/service_linux.go deleted file mode 100644 index cbd431537..000000000 --- a/pkg/v2/service_linux.go +++ /dev/null @@ -1,111 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package v2 - -import ( - "context" - "io" - "sync" - "syscall" - - "github.com/containerd/console" - "github.com/containerd/fifo" - "github.com/pkg/errors" -) - -type linuxPlatform struct { - epoller *console.Epoller -} - -func (p *linuxPlatform) CopyConsole(ctx context.Context, console console.Console, stdin, stdout, stderr string, wg, cwg *sync.WaitGroup) (console.Console, error) { - if p.epoller == nil { - return nil, errors.New("uninitialized epoller") - } - - epollConsole, err := p.epoller.Add(console) - if err != nil { - return nil, err - } - - if stdin != "" { - in, err := fifo.OpenFifo(context.Background(), stdin, syscall.O_RDONLY|syscall.O_NONBLOCK, 0) - if err != nil { - return nil, err - } - cwg.Add(1) - go func() { - cwg.Done() - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - io.CopyBuffer(epollConsole, in, *p) - }() - } - - outw, err := fifo.OpenFifo(ctx, stdout, syscall.O_WRONLY, 0) - if err != nil { - return nil, err - } - outr, err := fifo.OpenFifo(ctx, stdout, syscall.O_RDONLY, 0) - if err != nil { - return nil, err - } - wg.Add(1) - cwg.Add(1) - go func() { - cwg.Done() - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - io.CopyBuffer(outw, epollConsole, *p) - epollConsole.Close() - outr.Close() - outw.Close() - wg.Done() - }() - return epollConsole, nil -} - -func (p *linuxPlatform) ShutdownConsole(ctx context.Context, cons console.Console) error { - if p.epoller == nil { - return errors.New("uninitialized epoller") - } - epollConsole, ok := cons.(*console.EpollConsole) - if !ok { - return errors.Errorf("expected EpollConsole, got %#v", cons) - } - return epollConsole.Shutdown(p.epoller.CloseConsole) -} - -func (p *linuxPlatform) Close() error { - return p.epoller.Close() -} - -// initialize a single epoll fd to manage our consoles. `initPlatform` should -// only be called once. -func (s *service) initPlatform() error { - if s.platform != nil { - return nil - } - epoller, err := console.NewEpoller() - if err != nil { - return errors.Wrap(err, "failed to initialize epoller") - } - s.platform = &linuxPlatform{ - epoller: epoller, - } - go epoller.Wait() - return nil -} diff --git a/shim/README.md b/shim/README.md new file mode 100644 index 000000000..e446ec970 --- /dev/null +++ b/shim/README.md @@ -0,0 +1,16 @@ +# gvisor-containerd-shim + +gvisor-containerd-shim is a containerd shim. It implements the containerd v1 +shim API. It can be used as a drop-in replacement for +[containerd-shim][containerd-shim] +(though containerd-shim must still be installed). It allows the use of both +gVisor (runsc) and normal containers in the same containerd installation by +deferring to the runc shim if the desired runtime engine is not runsc. + +- [Untrusted Workload Quick Start (containerd >=1.1)](docs/untrusted-workload-quickstart.md) +- [Runtime Handler/RuntimeClass Quick Start (containerd >=1.2)](docs/runtime-handler-quickstart.md) +- [Runtime Handler/RuntimeClass Quick Start (shim v2) (containerd >=1.2)](docs/runtime-handler-shim-v2-quickstart.md) +- [Configure containerd-shim-runsc-v1 (shim v2) (containerd >= 1.3)](docs/configure-containerd-shim-runsc-v1.md) +- [Configure gvisor-containerd-shim (shim v1) (containerd <= 1.2)](docs/configure-gvisor-containerd-shim.md) + +[containerd-shim]: https://github.com/containerd/containerd/tree/master/cmd/containerd-shim diff --git a/shim/configure-containerd-shim-runsc-v1.md b/shim/configure-containerd-shim-runsc-v1.md new file mode 100644 index 000000000..977ceacbd --- /dev/null +++ b/shim/configure-containerd-shim-runsc-v1.md @@ -0,0 +1,72 @@ +# Configure containerd-shim-runsc-v1 (Shim V2) + +This document describes how to configure runtime options for +`containerd-shim-runsc-v1`. This is follows on to the instructions of +[Runtime Handler Quick Start (shim v2) (containerd >=1.2)](runtime-handler-shim-v2-quickstart.md) +and requires containerd 1.3 or later. + +### Update `/etc/containerd/config.toml` to point to a configuration file for `containerd-shim-runsc-v1`. + +`containerd-shim-runsc-v1` supports a few different configuration options based +on the version of containerd that is used. For versions >= 1.3, it supports a +configurable config path in the containerd runtime configuration. + +```shell +{ # Step 1: Update runtime options for runsc in containerd config.toml +cat < +[embedmd]:# (../test/e2e/shim-install.sh shell /{ # Step 1\(dev\)/ /^}/) +```shell +{ # Step 1(dev): Build and install gvisor-containerd-shim and containerd-shim-runsc-v1 + make + sudo make install +} +``` + +### Configure containerd + +1. Update `/etc/containerd/config.toml`. Make sure `containerd-shim-runsc-v1` is + in `${PATH}`. + +[embedmd]:# (../test/e2e/runtime-handler-shim-v2/install.sh shell /{ # Step 1/ /^}/) +```shell +{ # Step 1: Create containerd config.toml +cat < 106 { + return errors.Errorf("%q: unix socket path too long (> 106)", path) + } + l, err = net.Listen("unix", "\x00"+path) + } + if err != nil { + return err + } + logrus.WithField("socket", path).Debug("serving api on unix socket") + go func() { + defer l.Close() + if err := server.Serve(context.Background(), l); err != nil && + !strings.Contains(err.Error(), "use of closed network connection") { + logrus.WithError(err).Fatal("gvisor-containerd-shim: ttrpc server failure") + } + }() + return nil +} + +// setupSignals creates a new signal handler for all signals and sets the shim as a +// sub-reaper so that the container processes are reparented +func setupSignals() (chan os.Signal, error) { + signals := make(chan os.Signal, 32) + signal.Notify(signals, unix.SIGTERM, unix.SIGINT, unix.SIGCHLD, unix.SIGPIPE) + // make sure runc is setup to use the monitor + // for waiting on processes + // TODO(random-liu): Move shim/reaper.go to a separate package. + runsc.Monitor = containerdshim.Default + // set the shim as the subreaper for all orphaned processes created by the container + if err := system.SetSubreaper(1); err != nil { + return nil, err + } + return signals, nil +} + +func handleSignals(logger *logrus.Entry, signals chan os.Signal, server *ttrpc.Server, sv *shim.Service) error { + var ( + termOnce sync.Once + done = make(chan struct{}) + ) + + for { + select { + case <-done: + return nil + case s := <-signals: + switch s { + case unix.SIGCHLD: + if err := containerdshim.Reap(); err != nil { + logger.WithError(err).Error("reap exit status") + } + case unix.SIGTERM, unix.SIGINT: + go termOnce.Do(func() { + ctx := context.TODO() + if err := server.Shutdown(ctx); err != nil { + logger.WithError(err).Error("failed to shutdown server") + } + // Ensure our child is dead if any + sv.Kill(ctx, &shimapi.KillRequest{ + Signal: uint32(syscall.SIGKILL), + All: true, + }) + sv.Delete(context.Background(), &ptypes.Empty{}) + close(done) + }) + case unix.SIGPIPE: + } + } + } +} + +func dumpStacks(logger *logrus.Entry) { + var ( + buf []byte + stackSize int + ) + bufferLen := 16384 + for stackSize == len(buf) { + buf = make([]byte, bufferLen) + stackSize = runtime.Stack(buf, true) + bufferLen *= 2 + } + buf = buf[:stackSize] + logger.Infof("=== BEGIN goroutine stack dump ===\n%s\n=== END goroutine stack dump ===", buf) +} + +type remoteEventsPublisher struct { + address string +} + +func (l *remoteEventsPublisher) Publish(ctx context.Context, topic string, event events.Event) error { + ns, _ := namespaces.Namespace(ctx) + encoded, err := typeurl.MarshalAny(event) + if err != nil { + return err + } + data, err := encoded.Marshal() + if err != nil { + return err + } + cmd := exec.CommandContext(ctx, containerdBinaryFlag, "--address", l.address, "publish", "--topic", topic, "--namespace", ns) + cmd.Stdin = bytes.NewReader(data) + c, err := containerdshim.Default.Start(cmd) + if err != nil { + return err + } + status, err := containerdshim.Default.Wait(cmd, c) + if err != nil { + return err + } + if status != 0 { + return errors.New("failed to publish event") + } + return nil +} diff --git a/test/e2e/containerd-install.sh b/test/e2e/containerd-install.sh deleted file mode 100755 index 400819245..000000000 --- a/test/e2e/containerd-install.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash - -# A script to install containerd and CNI plugins for e2e testing - -wget -q --https-only \ - https://github.com/containerd/containerd/releases/download/v${CONTAINERD_VERSION}/containerd-${CONTAINERD_VERSION}.linux-amd64.tar.gz \ - https://github.com/containernetworking/plugins/releases/download/v0.7.0/cni-plugins-amd64-v0.7.0.tgz - -sudo mkdir -p /etc/containerd /etc/cni/net.d /opt/cni/bin -sudo tar -xvf cni-plugins-amd64-v0.7.0.tgz -C /opt/cni/bin/ -sudo tar -xvf containerd-${CONTAINERD_VERSION}.linux-amd64.tar.gz -C / - -cat </tmp/containerd-cri.log & diff --git a/test/e2e/crictl-install.sh b/test/e2e/crictl-install.sh deleted file mode 100755 index 1d63c889b..000000000 --- a/test/e2e/crictl-install.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -# A sample script for installing crictl. - -set -ex - -{ # Step 1: Download crictl -wget https://github.com/kubernetes-sigs/cri-tools/releases/download/v1.13.0/crictl-v1.13.0-linux-amd64.tar.gz -tar xf crictl-v1.13.0-linux-amd64.tar.gz -sudo mv crictl /usr/local/bin -} - -{ # Step 2: Configure crictl -cat < /tmp/containerd-cri.log & -} diff --git a/test/e2e/runtime-handler-shim-v2/test.sh b/test/e2e/runtime-handler-shim-v2/test.sh deleted file mode 100755 index e33655ec1..000000000 --- a/test/e2e/runtime-handler-shim-v2/test.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash - -# Runs end-to-end tests for gvisor-containerd-shim to test the use of runtime -# handler. This should work on containerd 1.2+ - -# This is meant to be run in a VM as it makes a fairly invasive install of -# containerd. - -set -ex - -# Install containerd -. ./test/e2e/containerd-install.sh - -# Install gVisor -. ./test/e2e/runsc-install.sh - -# Install gvisor-containerd-shim -. ./test/e2e/shim-install.sh - -# Test installation/configuration -. ./test/e2e/runtime-handler-shim-v2/install.sh - -# Install crictl -. ./test/e2e/crictl-install.sh - -# Test usage (the same with runtime-handler) -. ./test/e2e/runtime-handler/usage.sh - -# Run a container in the sandbox -. ./test/e2e/run-container.sh - -# Validate the pod and container -. ./test/e2e/validate.sh -. ./test/e2e/runtime-handler-shim-v2/validate.sh diff --git a/test/e2e/runtime-handler-shim-v2/validate.sh b/test/e2e/runtime-handler-shim-v2/validate.sh deleted file mode 100755 index b74a059ef..000000000 --- a/test/e2e/runtime-handler-shim-v2/validate.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -# A sample script to validating the running containerd-shim-runsc-v1. - -set -ex - -ps aux | grep [c]ontainerd-shim-runsc-v1 diff --git a/test/e2e/runtime-handler/install.sh b/test/e2e/runtime-handler/install.sh deleted file mode 100755 index ebe9d3580..000000000 --- a/test/e2e/runtime-handler/install.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -# A sample script for installing and configuring the gvisor-containerd-shim to -# use the containerd runtime handler. - -set -ex - -{ # Step 1: Create containerd config.toml -cat < /tmp/containerd-cri.log & -} diff --git a/test/e2e/runtime-handler/test.sh b/test/e2e/runtime-handler/test.sh deleted file mode 100755 index 99f3565b6..000000000 --- a/test/e2e/runtime-handler/test.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -# Runs end-to-end tests for gvisor-containerd-shim to test the use of runtime -# handler. This should work on containerd 1.2+ - -# This is meant to be run in a VM as it makes a fairly invasive install of -# containerd. - -set -ex - -# Install containerd -. ./test/e2e/containerd-install.sh - -# Install gVisor -. ./test/e2e/runsc-install.sh - -# Install gvisor-containerd-shim -. ./test/e2e/shim-install.sh - -# Test installation/configuration -. ./test/e2e/runtime-handler/install.sh - -# Install crictl -. ./test/e2e/crictl-install.sh - -# Test usage -. ./test/e2e/runtime-handler/usage.sh - -# Run a container in the sandbox -. ./test/e2e/run-container.sh - -# Validate the pod and container -. ./test/e2e/validate.sh diff --git a/test/e2e/runtime-handler/usage.sh b/test/e2e/runtime-handler/usage.sh deleted file mode 100755 index 350c720c2..000000000 --- a/test/e2e/runtime-handler/usage.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -# A sample script for testing the gvisor-containerd-shim -# using runtime handler. - -set -ex - -{ # Step 1: Pull the nginx image -sudo crictl pull nginx -} - -{ # Step 2: Create sandbox.json -cat </tmp/containerd-cri.log & -} diff --git a/test/e2e/untrusted-workload/test.sh b/test/e2e/untrusted-workload/test.sh deleted file mode 100755 index 6e312cf6d..000000000 --- a/test/e2e/untrusted-workload/test.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -# Runs end-to-end tests for gvisor-containerd-shim to test using the -# untrusted workload extension. This should work on containerd 1.1+ - -# This is meant to be run in a VM as it makes a fairly invasive install of -# containerd. - -set -ex - -# Install containerd -. ./test/e2e/containerd-install.sh - -# Install gVisor -. ./test/e2e/runsc-install.sh - -# Install gvisor-containerd-shim -. ./test/e2e/shim-install.sh - -# Test installation/configuration -. ./test/e2e/untrusted-workload/install.sh - -# Install crictl -. ./test/e2e/crictl-install.sh - -# Test usage -. ./test/e2e/untrusted-workload/usage.sh - -# Run a container in the sandbox -. ./test/e2e/run-container.sh - -# Validate the pod and container -. ./test/e2e/validate.sh diff --git a/test/e2e/untrusted-workload/usage.sh b/test/e2e/untrusted-workload/usage.sh deleted file mode 100755 index db8206964..000000000 --- a/test/e2e/untrusted-workload/usage.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -# A sample script for testing the gvisor-containerd-shim # using untrusted -# workload extension. - -set -ex - -{ # Step 1: Pull the nginx image -sudo crictl pull nginx -} - -{ # Step 2: Create sandbox.json -cat </tmp/containerd-cri.log & diff --git a/test/shim/crictl-install.sh b/test/shim/crictl-install.sh new file mode 100755 index 000000000..1d63c889b --- /dev/null +++ b/test/shim/crictl-install.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# A sample script for installing crictl. + +set -ex + +{ # Step 1: Download crictl +wget https://github.com/kubernetes-sigs/cri-tools/releases/download/v1.13.0/crictl-v1.13.0-linux-amd64.tar.gz +tar xf crictl-v1.13.0-linux-amd64.tar.gz +sudo mv crictl /usr/local/bin +} + +{ # Step 2: Configure crictl +cat < /tmp/containerd-cri.log & +} diff --git a/test/shim/runtime-handler-shim-v2/test.sh b/test/shim/runtime-handler-shim-v2/test.sh new file mode 100755 index 000000000..e33655ec1 --- /dev/null +++ b/test/shim/runtime-handler-shim-v2/test.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# Runs end-to-end tests for gvisor-containerd-shim to test the use of runtime +# handler. This should work on containerd 1.2+ + +# This is meant to be run in a VM as it makes a fairly invasive install of +# containerd. + +set -ex + +# Install containerd +. ./test/e2e/containerd-install.sh + +# Install gVisor +. ./test/e2e/runsc-install.sh + +# Install gvisor-containerd-shim +. ./test/e2e/shim-install.sh + +# Test installation/configuration +. ./test/e2e/runtime-handler-shim-v2/install.sh + +# Install crictl +. ./test/e2e/crictl-install.sh + +# Test usage (the same with runtime-handler) +. ./test/e2e/runtime-handler/usage.sh + +# Run a container in the sandbox +. ./test/e2e/run-container.sh + +# Validate the pod and container +. ./test/e2e/validate.sh +. ./test/e2e/runtime-handler-shim-v2/validate.sh diff --git a/test/shim/runtime-handler-shim-v2/validate.sh b/test/shim/runtime-handler-shim-v2/validate.sh new file mode 100755 index 000000000..b74a059ef --- /dev/null +++ b/test/shim/runtime-handler-shim-v2/validate.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +# A sample script to validating the running containerd-shim-runsc-v1. + +set -ex + +ps aux | grep [c]ontainerd-shim-runsc-v1 diff --git a/test/shim/runtime-handler/install.sh b/test/shim/runtime-handler/install.sh new file mode 100755 index 000000000..ebe9d3580 --- /dev/null +++ b/test/shim/runtime-handler/install.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# A sample script for installing and configuring the gvisor-containerd-shim to +# use the containerd runtime handler. + +set -ex + +{ # Step 1: Create containerd config.toml +cat < /tmp/containerd-cri.log & +} diff --git a/test/shim/runtime-handler/test.sh b/test/shim/runtime-handler/test.sh new file mode 100755 index 000000000..99f3565b6 --- /dev/null +++ b/test/shim/runtime-handler/test.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# Runs end-to-end tests for gvisor-containerd-shim to test the use of runtime +# handler. This should work on containerd 1.2+ + +# This is meant to be run in a VM as it makes a fairly invasive install of +# containerd. + +set -ex + +# Install containerd +. ./test/e2e/containerd-install.sh + +# Install gVisor +. ./test/e2e/runsc-install.sh + +# Install gvisor-containerd-shim +. ./test/e2e/shim-install.sh + +# Test installation/configuration +. ./test/e2e/runtime-handler/install.sh + +# Install crictl +. ./test/e2e/crictl-install.sh + +# Test usage +. ./test/e2e/runtime-handler/usage.sh + +# Run a container in the sandbox +. ./test/e2e/run-container.sh + +# Validate the pod and container +. ./test/e2e/validate.sh diff --git a/test/shim/runtime-handler/usage.sh b/test/shim/runtime-handler/usage.sh new file mode 100755 index 000000000..350c720c2 --- /dev/null +++ b/test/shim/runtime-handler/usage.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# A sample script for testing the gvisor-containerd-shim +# using runtime handler. + +set -ex + +{ # Step 1: Pull the nginx image +sudo crictl pull nginx +} + +{ # Step 2: Create sandbox.json +cat </tmp/containerd-cri.log & +} diff --git a/test/shim/untrusted-workload/test.sh b/test/shim/untrusted-workload/test.sh new file mode 100755 index 000000000..6e312cf6d --- /dev/null +++ b/test/shim/untrusted-workload/test.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# Runs end-to-end tests for gvisor-containerd-shim to test using the +# untrusted workload extension. This should work on containerd 1.1+ + +# This is meant to be run in a VM as it makes a fairly invasive install of +# containerd. + +set -ex + +# Install containerd +. ./test/e2e/containerd-install.sh + +# Install gVisor +. ./test/e2e/runsc-install.sh + +# Install gvisor-containerd-shim +. ./test/e2e/shim-install.sh + +# Test installation/configuration +. ./test/e2e/untrusted-workload/install.sh + +# Install crictl +. ./test/e2e/crictl-install.sh + +# Test usage +. ./test/e2e/untrusted-workload/usage.sh + +# Run a container in the sandbox +. ./test/e2e/run-container.sh + +# Validate the pod and container +. ./test/e2e/validate.sh diff --git a/test/shim/untrusted-workload/usage.sh b/test/shim/untrusted-workload/usage.sh new file mode 100755 index 000000000..db8206964 --- /dev/null +++ b/test/shim/untrusted-workload/usage.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# A sample script for testing the gvisor-containerd-shim # using untrusted +# workload extension. + +set -ex + +{ # Step 1: Pull the nginx image +sudo crictl pull nginx +} + +{ # Step 2: Create sandbox.json +cat < Date: Wed, 20 May 2020 14:50:17 -0700 Subject: Internal change. PiperOrigin-RevId: 312559963 --- pkg/tcpip/tcpip.go | 65 +++++++++++++++++++++++++++++ pkg/tcpip/transport/tcp/endpoint_state.go | 68 +------------------------------ pkg/tcpip/transport/udp/endpoint.go | 32 +++++++++++++++ pkg/tcpip/transport/udp/endpoint_state.go | 18 ++++++++ test/packetimpact/tests/BUILD | 2 - 5 files changed, 117 insertions(+), 68 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 45e930ad8..b7b227328 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -110,6 +110,71 @@ var ( ErrAddressFamilyNotSupported = &Error{msg: "address family not supported by protocol"} ) +var messageToError map[string]*Error + +var populate sync.Once + +// StringToError converts an error message to the error. +func StringToError(s string) *Error { + populate.Do(func() { + var errors = []*Error{ + ErrUnknownProtocol, + ErrUnknownNICID, + ErrUnknownDevice, + ErrUnknownProtocolOption, + ErrDuplicateNICID, + ErrDuplicateAddress, + ErrNoRoute, + ErrBadLinkEndpoint, + ErrAlreadyBound, + ErrInvalidEndpointState, + ErrAlreadyConnecting, + ErrAlreadyConnected, + ErrNoPortAvailable, + ErrPortInUse, + ErrBadLocalAddress, + ErrClosedForSend, + ErrClosedForReceive, + ErrWouldBlock, + ErrConnectionRefused, + ErrTimeout, + ErrAborted, + ErrConnectStarted, + ErrDestinationRequired, + ErrNotSupported, + ErrQueueSizeNotSupported, + ErrNotConnected, + ErrConnectionReset, + ErrConnectionAborted, + ErrNoSuchFile, + ErrInvalidOptionValue, + ErrNoLinkAddress, + ErrBadAddress, + ErrNetworkUnreachable, + ErrMessageTooLong, + ErrNoBufferSpace, + ErrBroadcastDisabled, + ErrNotPermitted, + ErrAddressFamilyNotSupported, + } + + messageToError = make(map[string]*Error) + for _, e := range errors { + if messageToError[e.String()] != nil { + panic("tcpip errors with duplicated message: " + e.String()) + } + messageToError[e.String()] = e + } + }) + + e, ok := messageToError[s] + if !ok { + panic("unknown error message: " + s) + } + + return e +} + // Errors related to Subnet var ( errSubnetLengthMismatch = errors.New("subnet length of address and mask differ") diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go index 8b7562396..fc43c11e2 100644 --- a/pkg/tcpip/transport/tcp/endpoint_state.go +++ b/pkg/tcpip/transport/tcp/endpoint_state.go @@ -314,7 +314,7 @@ func (e *endpoint) loadLastError(s string) { return } - e.lastError = loadError(s) + e.lastError = tcpip.StringToError(s) } // saveHardError is invoked by stateify. @@ -332,71 +332,7 @@ func (e *EndpointInfo) loadHardError(s string) { return } - e.HardError = loadError(s) -} - -var messageToError map[string]*tcpip.Error - -var populate sync.Once - -func loadError(s string) *tcpip.Error { - populate.Do(func() { - var errors = []*tcpip.Error{ - tcpip.ErrUnknownProtocol, - tcpip.ErrUnknownNICID, - tcpip.ErrUnknownDevice, - tcpip.ErrUnknownProtocolOption, - tcpip.ErrDuplicateNICID, - tcpip.ErrDuplicateAddress, - tcpip.ErrNoRoute, - tcpip.ErrBadLinkEndpoint, - tcpip.ErrAlreadyBound, - tcpip.ErrInvalidEndpointState, - tcpip.ErrAlreadyConnecting, - tcpip.ErrAlreadyConnected, - tcpip.ErrNoPortAvailable, - tcpip.ErrPortInUse, - tcpip.ErrBadLocalAddress, - tcpip.ErrClosedForSend, - tcpip.ErrClosedForReceive, - tcpip.ErrWouldBlock, - tcpip.ErrConnectionRefused, - tcpip.ErrTimeout, - tcpip.ErrAborted, - tcpip.ErrConnectStarted, - tcpip.ErrDestinationRequired, - tcpip.ErrNotSupported, - tcpip.ErrQueueSizeNotSupported, - tcpip.ErrNotConnected, - tcpip.ErrConnectionReset, - tcpip.ErrConnectionAborted, - tcpip.ErrNoSuchFile, - tcpip.ErrInvalidOptionValue, - tcpip.ErrNoLinkAddress, - tcpip.ErrBadAddress, - tcpip.ErrNetworkUnreachable, - tcpip.ErrMessageTooLong, - tcpip.ErrNoBufferSpace, - tcpip.ErrBroadcastDisabled, - tcpip.ErrNotPermitted, - tcpip.ErrAddressFamilyNotSupported, - } - - messageToError = make(map[string]*tcpip.Error) - for _, e := range errors { - if messageToError[e.String()] != nil { - panic("tcpip errors with duplicated message: " + e.String()) - } - messageToError[e.String()] = e - } - }) - - e, ok := messageToError[s] - if !ok { - panic("unknown error message: " + s) - } - - return e + e.HardError = tcpip.StringToError(s) } // saveMeasureTime is invoked by stateify. diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 756ab913a..647b2067a 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -106,6 +106,9 @@ type endpoint struct { bindToDevice tcpip.NICID broadcast bool + lastErrorMu sync.Mutex `state:"nosave"` + lastError *tcpip.Error `state:".(string)"` + // Values used to reserve a port or register a transport endpoint. // (which ever happens first). boundBindToDevice tcpip.NICID @@ -188,6 +191,15 @@ func (e *endpoint) UniqueID() uint64 { return e.uniqueID } +func (e *endpoint) takeLastError() *tcpip.Error { + e.lastErrorMu.Lock() + defer e.lastErrorMu.Unlock() + + err := e.lastError + e.lastError = nil + return err +} + // Abort implements stack.TransportEndpoint.Abort. func (e *endpoint) Abort() { e.Close() @@ -243,6 +255,10 @@ func (e *endpoint) IPTables() (stack.IPTables, error) { // Read reads data from the endpoint. This method does not block if // there is no data pending. func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) { + if err := e.takeLastError(); err != nil { + return buffer.View{}, tcpip.ControlMessages{}, err + } + e.rcvMu.Lock() if e.rcvList.Empty() { @@ -382,6 +398,10 @@ func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c } func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-chan struct{}, *tcpip.Error) { + if err := e.takeLastError(); err != nil { + return 0, nil, err + } + // MSG_MORE is unimplemented. (This also means that MSG_EOR is a no-op.) if opts.More { return 0, nil, tcpip.ErrInvalidOptionValue @@ -853,6 +873,7 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { switch o := opt.(type) { case tcpip.ErrorOption: + return e.takeLastError() case *tcpip.MulticastInterfaceOption: e.mu.Lock() *o = tcpip.MulticastInterfaceOption{ @@ -1316,6 +1337,17 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk // HandleControlPacket implements stack.TransportEndpoint.HandleControlPacket. func (e *endpoint) HandleControlPacket(id stack.TransportEndpointID, typ stack.ControlType, extra uint32, pkt stack.PacketBuffer) { + if typ == stack.ControlPortUnreachable { + e.mu.RLock() + defer e.mu.RUnlock() + + if e.state == StateConnected { + e.lastErrorMu.Lock() + defer e.lastErrorMu.Unlock() + + e.lastError = tcpip.ErrConnectionRefused + } + } } // State implements tcpip.Endpoint.State. diff --git a/pkg/tcpip/transport/udp/endpoint_state.go b/pkg/tcpip/transport/udp/endpoint_state.go index 466bd9381..851e6b635 100644 --- a/pkg/tcpip/transport/udp/endpoint_state.go +++ b/pkg/tcpip/transport/udp/endpoint_state.go @@ -37,6 +37,24 @@ func (u *udpPacket) loadData(data buffer.VectorisedView) { u.data = data } +// saveLastError is invoked by stateify. +func (e *endpoint) saveLastError() string { + if e.lastError == nil { + return "" + } + + return e.lastError.String() +} + +// loadLastError is invoked by stateify. +func (e *endpoint) loadLastError(s string) { + if s == "" { + return + } + + e.lastError = tcpip.StringToError(s) +} + // beforeSave is invoked by stateify. func (e *endpoint) beforeSave() { // Stop incoming packets from being handled (and mutate endpoint state). diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index d4fcf31fa..852ae4a74 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -31,8 +31,6 @@ packetimpact_go_test( packetimpact_go_test( name = "udp_icmp_error_propagation", srcs = ["udp_icmp_error_propagation_test.go"], - # TODO(b/153926291): Fix netstack then remove the line below. - expect_netstack_failure = True, deps = [ "//pkg/tcpip", "//pkg/tcpip/header", -- cgit v1.2.3 From 5f3eeb47286cf9696154f3d9569b655b84ac7d0c Mon Sep 17 00:00:00 2001 From: Zeling Feng Date: Wed, 20 May 2020 17:52:05 -0700 Subject: Test that we have PAWS mechanism If there is a Timestamps option in the arriving segment and SEG.TSval < TS.Recent and if TS.Recent is valid, then treat the arriving segment as not acceptable: Send an acknowledgement in reply as specified in RFC-793 page 69 and drop the segment. https://tools.ietf.org/html/rfc1323#page-19 PiperOrigin-RevId: 312590678 --- pkg/tcpip/header/tcp.go | 29 +++--- test/packetimpact/tests/BUILD | 13 +++ test/packetimpact/tests/tcp_paws_mechanism_test.go | 109 +++++++++++++++++++++ 3 files changed, 139 insertions(+), 12 deletions(-) create mode 100644 test/packetimpact/tests/tcp_paws_mechanism_test.go (limited to 'test') diff --git a/pkg/tcpip/header/tcp.go b/pkg/tcpip/header/tcp.go index 29454c4b9..4c6f808e5 100644 --- a/pkg/tcpip/header/tcp.go +++ b/pkg/tcpip/header/tcp.go @@ -66,6 +66,14 @@ const ( TCPOptionSACK = 5 ) +// Option Lengths. +const ( + TCPOptionMSSLength = 4 + TCPOptionTSLength = 10 + TCPOptionWSLength = 3 + TCPOptionSackPermittedLength = 2 +) + // TCPFields contains the fields of a TCP packet. It is used to describe the // fields of a packet that needs to be encoded. type TCPFields struct { @@ -494,14 +502,11 @@ func ParseTCPOptions(b []byte) TCPOptions { // returns without encoding anything. It returns the number of bytes written to // the provided buffer. func EncodeMSSOption(mss uint32, b []byte) int { - // mssOptionSize is the number of bytes in a valid MSS option. - const mssOptionSize = 4 - - if len(b) < mssOptionSize { + if len(b) < TCPOptionMSSLength { return 0 } - b[0], b[1], b[2], b[3] = TCPOptionMSS, mssOptionSize, byte(mss>>8), byte(mss) - return mssOptionSize + b[0], b[1], b[2], b[3] = TCPOptionMSS, TCPOptionMSSLength, byte(mss>>8), byte(mss) + return TCPOptionMSSLength } // EncodeWSOption encodes the WS TCP option with the WS value in the @@ -509,10 +514,10 @@ func EncodeMSSOption(mss uint32, b []byte) int { // returns without encoding anything. It returns the number of bytes written to // the provided buffer. func EncodeWSOption(ws int, b []byte) int { - if len(b) < 3 { + if len(b) < TCPOptionWSLength { return 0 } - b[0], b[1], b[2] = TCPOptionWS, 3, uint8(ws) + b[0], b[1], b[2] = TCPOptionWS, TCPOptionWSLength, uint8(ws) return int(b[1]) } @@ -521,10 +526,10 @@ func EncodeWSOption(ws int, b []byte) int { // just returns without encoding anything. It returns the number of bytes // written to the provided buffer. func EncodeTSOption(tsVal, tsEcr uint32, b []byte) int { - if len(b) < 10 { + if len(b) < TCPOptionTSLength { return 0 } - b[0], b[1] = TCPOptionTS, 10 + b[0], b[1] = TCPOptionTS, TCPOptionTSLength binary.BigEndian.PutUint32(b[2:], tsVal) binary.BigEndian.PutUint32(b[6:], tsEcr) return int(b[1]) @@ -535,11 +540,11 @@ func EncodeTSOption(tsVal, tsEcr uint32, b []byte) int { // encoding anything. It returns the number of bytes written to the provided // buffer. func EncodeSACKPermittedOption(b []byte) int { - if len(b) < 2 { + if len(b) < TCPOptionSackPermittedLength { return 0 } - b[0], b[1] = TCPOptionSACKPermitted, 2 + b[0], b[1] = TCPOptionSACKPermitted, TCPOptionSackPermittedLength return int(b[1]) } diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 852ae4a74..c4ffda17e 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -133,6 +133,19 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "tcp_paws_mechanism", + srcs = ["tcp_paws_mechanism_test.go"], + # TODO(b/156682000): Fix netstack then remove the line below. + expect_netstack_failure = True, + deps = [ + "//pkg/tcpip/header", + "//pkg/tcpip/seqnum", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + packetimpact_go_test( name = "tcp_user_timeout", srcs = ["tcp_user_timeout_test.go"], diff --git a/test/packetimpact/tests/tcp_paws_mechanism_test.go b/test/packetimpact/tests/tcp_paws_mechanism_test.go new file mode 100644 index 000000000..0a668adcf --- /dev/null +++ b/test/packetimpact/tests/tcp_paws_mechanism_test.go @@ -0,0 +1,109 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_paws_mechanism_test + +import ( + "encoding/hex" + "flag" + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func init() { + tb.RegisterFlags(flag.CommandLine) +} + +func TestPAWSMechanism(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(listenFD) + conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + defer conn.Close() + + options := make([]byte, header.TCPOptionTSLength) + header.EncodeTSOption(currentTS(), 0, options) + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn), Options: options}) + synAck, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second) + if err != nil { + t.Fatalf("didn't get synack during handshake: %s", err) + } + parsedSynOpts := header.ParseSynOptions(synAck.Options, true) + if !parsedSynOpts.TS { + t.Fatalf("expected TSOpt from DUT, options we got:\n%s", hex.Dump(synAck.Options)) + } + tsecr := parsedSynOpts.TSVal + header.EncodeTSOption(currentTS(), tsecr, options) + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), Options: options}) + acceptFD, _ := dut.Accept(listenFD) + defer dut.Close(acceptFD) + + sampleData := []byte("Sample Data") + sentTSVal := currentTS() + header.EncodeTSOption(sentTSVal, tsecr, options) + // 3ms here is chosen arbitrarily to make sure we have increasing timestamps + // every time we send one, it should not cause any flakiness because timestamps + // only need to be non-decreasing. + time.Sleep(3 * time.Millisecond) + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), Options: options}, &tb.Payload{Bytes: sampleData}) + + gotTCP, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, time.Second) + if err != nil { + t.Fatalf("expected an ACK but got none: %s", err) + } + + parsedOpts := header.ParseTCPOptions(gotTCP.Options) + if !parsedOpts.TS { + t.Fatalf("expected TS option in response, options we got:\n%s", hex.Dump(gotTCP.Options)) + } + if parsedOpts.TSVal < tsecr { + t.Fatalf("TSVal should be non-decreasing, but %d < %d", parsedOpts.TSVal, tsecr) + } + if parsedOpts.TSEcr != sentTSVal { + t.Fatalf("TSEcr should match our sent TSVal, %d != %d", parsedOpts.TSEcr, sentTSVal) + } + tsecr = parsedOpts.TSVal + lastAckNum := gotTCP.AckNum + + badTSVal := sentTSVal - 100 + header.EncodeTSOption(badTSVal, tsecr, options) + // 3ms here is chosen arbitrarily and this time.Sleep() should not cause flakiness + // due to the exact same reasoning discussed above. + time.Sleep(3 * time.Millisecond) + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), Options: options}, &tb.Payload{Bytes: sampleData}) + + gotTCP, err = conn.Expect(tb.TCP{AckNum: lastAckNum, Flags: tb.Uint8(header.TCPFlagAck)}, time.Second) + if err != nil { + t.Fatalf("expected segment with AckNum %d but got none: %s", lastAckNum, err) + } + parsedOpts = header.ParseTCPOptions(gotTCP.Options) + if !parsedOpts.TS { + t.Fatalf("expected TS option in response, options we got:\n%s", hex.Dump(gotTCP.Options)) + } + if parsedOpts.TSVal < tsecr { + t.Fatalf("TSVal should be non-decreasing, but %d < %d", parsedOpts.TSVal, tsecr) + } + if parsedOpts.TSEcr != sentTSVal { + t.Fatalf("TSEcr should match our sent TSVal, %d != %d", parsedOpts.TSEcr, sentTSVal) + } +} + +func currentTS() uint32 { + return uint32(time.Now().UnixNano() / 1e6) +} -- cgit v1.2.3 From 8298c5bd4d1f836ee4c531a7bf04acff05d7099b Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Wed, 20 May 2020 18:37:22 -0700 Subject: Avoid all caps FIONREAD as test name. PiperOrigin-RevId: 312596169 --- test/syscalls/linux/udp_socket_test_cases.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc index 4a3ae5f3f..42521efef 100644 --- a/test/syscalls/linux/udp_socket_test_cases.cc +++ b/test/syscalls/linux/udp_socket_test_cases.cc @@ -1142,7 +1142,9 @@ TEST_P(UdpSocketTest, FIONREADWriteShutdown) { EXPECT_EQ(n, sizeof(str)); } -TEST_P(UdpSocketTest, FIONREAD) { +// NOTE: Do not use `FIONREAD` as test name because it will be replaced by the +// corresponding macro and become `0x541B`. +TEST_P(UdpSocketTest, Fionread) { // Bind s_ to loopback:TestPort. ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); -- cgit v1.2.3 From 7bde26934ae6d39649a31f6c77a4bc210277085a Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Thu, 21 May 2020 10:46:45 -0700 Subject: Add IsRunningWithVFS1 to test util VFS2 is adding more functionality than VFS1. In order to test new functionality, it's required to skip some tests with VFS1. To skip tests, use: SKIP_IF(IsRunningWithVFS1()); The test will run in Linux and gVisor with VFS2 enabled. Updates #1035 PiperOrigin-RevId: 312698616 --- test/util/test_util.cc | 14 ++++++++++++-- test/util/test_util.h | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/util/test_util.cc b/test/util/test_util.cc index 95e1e0c96..b20758626 100644 --- a/test/util/test_util.cc +++ b/test/util/test_util.cc @@ -42,12 +42,13 @@ namespace testing { #define TEST_ON_GVISOR "TEST_ON_GVISOR" #define GVISOR_NETWORK "GVISOR_NETWORK" +#define GVISOR_VFS "GVISOR_VFS" bool IsRunningOnGvisor() { return GvisorPlatform() != Platform::kNative; } const std::string GvisorPlatform() { // Set by runner.go. - char* env = getenv(TEST_ON_GVISOR); + const char* env = getenv(TEST_ON_GVISOR); if (!env) { return Platform::kNative; } @@ -55,10 +56,19 @@ const std::string GvisorPlatform() { } bool IsRunningWithHostinet() { - char* env = getenv(GVISOR_NETWORK); + const char* env = getenv(GVISOR_NETWORK); return env && strcmp(env, "host") == 0; } +bool IsRunningWithVFS1() { + const char* env = getenv(GVISOR_VFS); + if (env == nullptr) { + // If not set, it's running on Linux. + return false; + } + return strcmp(env, "VFS1") == 0; +} + // Inline cpuid instruction. Preserve %ebx/%rbx register. In PIC compilations // %ebx contains the address of the global offset table. %rbx is occasionally // used to address stack variables in presence of dynamic allocas. diff --git a/test/util/test_util.h b/test/util/test_util.h index c5cb9d6d6..8e3245b27 100644 --- a/test/util/test_util.h +++ b/test/util/test_util.h @@ -220,6 +220,7 @@ constexpr char kKVM[] = "kvm"; bool IsRunningOnGvisor(); const std::string GvisorPlatform(); bool IsRunningWithHostinet(); +bool IsRunningWithVFS1(); #ifdef __linux__ void SetupGvisorDeathTest(); -- cgit v1.2.3 From cdf48e851670f8f333f61e7621e0aa7d495d98fe Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Thu, 21 May 2020 11:06:28 -0700 Subject: Fix TestTmpFile Split check for file in /tmp from working directory test. Fix readonly case which should not fail to create working dir. PiperOrigin-RevId: 312702930 --- images/tmpfile/Dockerfile | 4 +++ pkg/test/dockerutil/dockerutil.go | 6 ++++- test/e2e/integration_test.go | 56 ++++++++++++++++++++++++++++----------- 3 files changed, 50 insertions(+), 16 deletions(-) create mode 100644 images/tmpfile/Dockerfile (limited to 'test') diff --git a/images/tmpfile/Dockerfile b/images/tmpfile/Dockerfile new file mode 100644 index 000000000..e3816c8cb --- /dev/null +++ b/images/tmpfile/Dockerfile @@ -0,0 +1,4 @@ +# Create file under /tmp to ensure files inside '/tmp' are not overridden. +FROM alpine:3.11.5 +RUN mkdir -p /tmp/foo \ + && echo 123 > /tmp/foo/file.txt diff --git a/pkg/test/dockerutil/dockerutil.go b/pkg/test/dockerutil/dockerutil.go index 5f2af9f3b..06f81d28d 100644 --- a/pkg/test/dockerutil/dockerutil.go +++ b/pkg/test/dockerutil/dockerutil.go @@ -162,9 +162,13 @@ type Docker struct { // // Names of containers will be unique. func MakeDocker(logger testutil.Logger) *Docker { + // Slashes are not allowed in container names. + name := testutil.RandomID(logger.Name()) + name = strings.ReplaceAll(name, "/", "-") + return &Docker{ logger: logger, - Name: testutil.RandomID(logger.Name()), + Name: name, Runtime: *runtime, } } diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go index ff856883a..9cbb2ed5b 100644 --- a/test/e2e/integration_test.go +++ b/test/e2e/integration_test.go @@ -337,27 +337,53 @@ func TestJobControl(t *testing.T) { } } -// TestTmpFile checks that files inside '/tmp' are not overridden. In addition, -// it checks that working dir is created if it doesn't exit. +// TestWorkingDirCreation checks that working dir is created if it doesn't exit. +func TestWorkingDirCreation(t *testing.T) { + for _, tc := range []struct { + name string + workingDir string + }{ + {name: "root", workingDir: "/foo"}, + {name: "tmp", workingDir: "/tmp/foo"}, + } { + for _, readonly := range []bool{true, false} { + name := tc.name + if readonly { + name += "-readonly" + } + t.Run(name, func(t *testing.T) { + d := dockerutil.MakeDocker(t) + defer d.CleanUp() + + opts := dockerutil.RunOpts{ + Image: "basic/alpine", + WorkDir: tc.workingDir, + ReadOnly: readonly, + } + got, err := d.Run(opts, "sh", "-c", "echo ${PWD}") + if err != nil { + t.Fatalf("docker run failed: %v", err) + } + if want := tc.workingDir + "\n"; want != got { + t.Errorf("invalid working dir, want: %q, got: %q", want, got) + } + }) + } + } +} + +// TestTmpFile checks that files inside '/tmp' are not overridden. func TestTmpFile(t *testing.T) { d := dockerutil.MakeDocker(t) defer d.CleanUp() - // Should work without ReadOnly - if _, err := d.Run(dockerutil.RunOpts{ - Image: "basic/alpine", - WorkDir: "/tmp/foo/bar", - }, "touch", "/tmp/foo/bar/file"); err != nil { + opts := dockerutil.RunOpts{Image: "tmpfile"} + got, err := d.Run(opts, "cat", "/tmp/foo/file.txt") + if err != nil { t.Fatalf("docker run failed: %v", err) } - - // Expect failure. - if _, err := d.Run(dockerutil.RunOpts{ - Image: "basic/alpine", - WorkDir: "/tmp/foo/bar", - ReadOnly: true, - }, "touch", "/tmp/foo/bar/file"); err == nil { - t.Fatalf("docker run expected failure, but succeeded") + if want := "123\n"; want != got { + t.Errorf("invalid file content, want: %q, got: %q", want, got) } } -- cgit v1.2.3 From 198642df7689852062be9e3847a98e02d1bcbfd9 Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Thu, 21 May 2020 14:01:26 -0700 Subject: Fix IsRunningWithVFS1() on the runsc-based test runner. Updates #1035 PiperOrigin-RevId: 312736450 --- test/runner/runner.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/test/runner/runner.go b/test/runner/runner.go index 14c9cbc47..e4f04cd2a 100644 --- a/test/runner/runner.go +++ b/test/runner/runner.go @@ -341,11 +341,13 @@ func runTestCaseRunsc(testBin string, tc gtest.TestCase, t *testing.T) { } } - // Set environment variables that indicate we are - // running in gVisor with the given platform and network. + // Set environment variables that indicate we are running in gVisor with + // the given platform, network, and filesystem stack. + // TODO(gvisor.dev/issue/1487): Update this when the runner supports VFS2. platformVar := "TEST_ON_GVISOR" networkVar := "GVISOR_NETWORK" - env := append(os.Environ(), platformVar+"="+*platform, networkVar+"="+*network) + vfsVar := "GVISOR_VFS" + env := append(os.Environ(), platformVar+"="+*platform, networkVar+"="+*network, vfsVar+"=VFS1") // Remove env variables that cause the gunit binary to write output // files, since they will stomp on eachother, and on the output files -- cgit v1.2.3 From ba2bf9fc13c204ad05d9fbb7199b890e6faf1d76 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Thu, 21 May 2020 16:31:13 -0700 Subject: Skip socket tests only if running on vfs1. PiperOrigin-RevId: 312763249 --- test/syscalls/linux/socket.cc | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/socket.cc b/test/syscalls/linux/socket.cc index afa59c1da..e0a4d0985 100644 --- a/test/syscalls/linux/socket.cc +++ b/test/syscalls/linux/socket.cc @@ -62,9 +62,7 @@ TEST(SocketTest, ProtocolInet) { } TEST(SocketTest, UnixSocketStat) { - // TODO(gvisor.dev/issue/1624): Re-enable this test once VFS1 is deleted. It - // should pass in VFS2. - SKIP_IF(IsRunningOnGvisor()); + SKIP_IF(IsRunningWithVFS1()); FileDescriptor bound = ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_STREAM, PF_UNIX)); @@ -94,9 +92,7 @@ TEST(SocketTest, UnixSocketStat) { } TEST(SocketTest, UnixConnectNeedsWritePerm) { - // TODO(gvisor.dev/issue/1624): Re-enable this test once VFS1 is deleted. It - // should succeed in VFS2. - SKIP_IF(IsRunningOnGvisor()); + SKIP_IF(IsRunningWithVFS1()); FileDescriptor bound = ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_STREAM, PF_UNIX)); @@ -128,10 +124,7 @@ using SocketOpenTest = ::testing::TestWithParam; // UDS cannot be opened. TEST_P(SocketOpenTest, Unix) { // FIXME(b/142001530): Open incorrectly succeeds on gVisor. - // - // TODO(gvisor.dev/issue/1624): Re-enable this test once VFS1 is deleted. It - // should succeed in VFS2. - SKIP_IF(IsRunningOnGvisor()); + SKIP_IF(IsRunningWithVFS1()); FileDescriptor bound = ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_STREAM, PF_UNIX)); -- cgit v1.2.3 From 92bafd79293b99aac0ddeada11dfbe1fd9b67f13 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Tue, 26 May 2020 17:39:19 -0700 Subject: Automated rollback of changelist 311424257 PiperOrigin-RevId: 313300554 --- pkg/test/dockerutil/dockerutil.go | 110 ++++++++- test/packetimpact/README.md | 21 ++ test/packetimpact/netdevs/BUILD | 15 ++ test/packetimpact/netdevs/netdevs.go | 104 +++++++++ test/packetimpact/runner/BUILD | 20 ++ test/packetimpact/runner/defs.bzl | 136 +++++++++++ test/packetimpact/runner/packetimpact_test.go | 312 +++++++++++++++++++++++++ test/packetimpact/testbench/BUILD | 1 + test/packetimpact/testbench/connections.go | 4 +- test/packetimpact/testbench/dut.go | 6 + test/packetimpact/testbench/rawsockets.go | 3 - test/packetimpact/testbench/testbench.go | 31 ++- test/packetimpact/tests/BUILD | 7 +- test/packetimpact/tests/defs.bzl | 137 ----------- test/packetimpact/tests/test_runner.sh | 325 -------------------------- 15 files changed, 756 insertions(+), 476 deletions(-) create mode 100644 test/packetimpact/netdevs/BUILD create mode 100644 test/packetimpact/netdevs/netdevs.go create mode 100644 test/packetimpact/runner/BUILD create mode 100644 test/packetimpact/runner/defs.bzl create mode 100644 test/packetimpact/runner/packetimpact_test.go delete mode 100644 test/packetimpact/tests/defs.bzl delete mode 100755 test/packetimpact/tests/test_runner.sh (limited to 'test') diff --git a/pkg/test/dockerutil/dockerutil.go b/pkg/test/dockerutil/dockerutil.go index 06f81d28d..c45d2ecbc 100644 --- a/pkg/test/dockerutil/dockerutil.go +++ b/pkg/test/dockerutil/dockerutil.go @@ -148,6 +148,62 @@ func (m MountMode) String() string { panic(fmt.Sprintf("invalid mode: %d", m)) } +// DockerNetwork contains the name of a docker network. +type DockerNetwork struct { + logger testutil.Logger + Name string + Subnet *net.IPNet + containers []*Docker +} + +// NewDockerNetwork sets up the struct for a Docker network. Names of networks +// will be unique. +func NewDockerNetwork(logger testutil.Logger) *DockerNetwork { + return &DockerNetwork{ + logger: logger, + Name: testutil.RandomID(logger.Name()), + } +} + +// Create calls 'docker network create'. +func (n *DockerNetwork) Create(args ...string) error { + a := []string{"docker", "network", "create"} + if n.Subnet != nil { + a = append(a, fmt.Sprintf("--subnet=%s", n.Subnet)) + } + a = append(a, args...) + a = append(a, n.Name) + return testutil.Command(n.logger, a...).Run() +} + +// Connect calls 'docker network connect' with the arguments provided. +func (n *DockerNetwork) Connect(container *Docker, args ...string) error { + a := []string{"docker", "network", "connect"} + a = append(a, args...) + a = append(a, n.Name, container.Name) + if err := testutil.Command(n.logger, a...).Run(); err != nil { + return err + } + n.containers = append(n.containers, container) + return nil +} + +// Cleanup cleans up the docker network and all the containers attached to it. +func (n *DockerNetwork) Cleanup() error { + for _, c := range n.containers { + // Don't propagate the error, it might be that the container + // was already cleaned up. + if err := c.Kill(); err != nil { + n.logger.Logf("unable to kill container during cleanup: %s", err) + } + } + + if err := testutil.Command(n.logger, "docker", "network", "rm", n.Name).Run(); err != nil { + return err + } + return nil +} + // Docker contains the name and the runtime of a docker container. type Docker struct { logger testutil.Logger @@ -313,7 +369,9 @@ func (d *Docker) argsFor(r *RunOpts, command string, p []string) (rv []string) { rv = append(rv, d.Name) } else { rv = append(rv, d.mounts...) - rv = append(rv, fmt.Sprintf("--runtime=%s", d.Runtime)) + if len(d.Runtime) > 0 { + rv = append(rv, fmt.Sprintf("--runtime=%s", d.Runtime)) + } rv = append(rv, fmt.Sprintf("--name=%s", d.Name)) rv = append(rv, testutil.ImageByName(r.Image)) } @@ -481,6 +539,56 @@ func (d *Docker) FindIP() (net.IP, error) { return ip, nil } +// A NetworkInterface is container's network interface information. +type NetworkInterface struct { + IPv4 net.IP + MAC net.HardwareAddr +} + +// ListNetworks returns the network interfaces of the container, keyed by +// Docker network name. +func (d *Docker) ListNetworks() (map[string]NetworkInterface, error) { + const format = `{{json .NetworkSettings.Networks}}` + out, err := testutil.Command(d.logger, "docker", "inspect", "-f", format, d.Name).CombinedOutput() + if err != nil { + return nil, fmt.Errorf("error network interfaces: %q: %w", string(out), err) + } + + networks := map[string]map[string]string{} + if err := json.Unmarshal(out, &networks); err != nil { + return nil, fmt.Errorf("error decoding network interfaces: %w", err) + } + + interfaces := map[string]NetworkInterface{} + for name, iface := range networks { + var netface NetworkInterface + + rawIP := strings.TrimSpace(iface["IPAddress"]) + if rawIP != "" { + ip := net.ParseIP(rawIP) + if ip == nil { + return nil, fmt.Errorf("invalid IP: %q", rawIP) + } + // Docker's IPAddress field is IPv4. The IPv6 address + // is stored in the GlobalIPv6Address field. + netface.IPv4 = ip + } + + rawMAC := strings.TrimSpace(iface["MacAddress"]) + if rawMAC != "" { + mac, err := net.ParseMAC(rawMAC) + if err != nil { + return nil, fmt.Errorf("invalid MAC: %q: %w", rawMAC, err) + } + netface.MAC = mac + } + + interfaces[name] = netface + } + + return interfaces, nil +} + // SandboxPid returns the PID to the sandbox process. func (d *Docker) SandboxPid() (int, error) { out, err := testutil.Command(d.logger, "docker", "inspect", "-f={{.State.Pid}}", d.Name).CombinedOutput() diff --git a/test/packetimpact/README.md b/test/packetimpact/README.md index a82ad996a..f46c67a0c 100644 --- a/test/packetimpact/README.md +++ b/test/packetimpact/README.md @@ -18,6 +18,27 @@ Packetimpact aims to provide: * **Control-flow** like for loops, conditionals, and variables. * **Flexibilty** to specify every byte in a packet or use multiple sockets. +## How to run packetimpact tests? + +Build the test container image by running the following at the root of the +repository: + +```bash +$ make load-packetimpact +``` + +Run a test, e.g. `fin_wait2_timeout`, against Linux: + +```bash +$ bazel test //test/packetimpact/tests:fin_wait2_timeout_linux_test +``` + +Run the same test, but against gVisor: + +```bash +$ bazel test //test/packetimpact/tests:fin_wait2_timeout_netstack_test +``` + ## When to use packetimpact? There are a few ways to write networking tests for gVisor currently: diff --git a/test/packetimpact/netdevs/BUILD b/test/packetimpact/netdevs/BUILD new file mode 100644 index 000000000..422bb9b0c --- /dev/null +++ b/test/packetimpact/netdevs/BUILD @@ -0,0 +1,15 @@ +load("//tools:defs.bzl", "go_library") + +package( + licenses = ["notice"], +) + +go_library( + name = "netdevs", + srcs = ["netdevs.go"], + visibility = ["//test/packetimpact:__subpackages__"], + deps = [ + "//pkg/tcpip", + "//pkg/tcpip/header", + ], +) diff --git a/test/packetimpact/netdevs/netdevs.go b/test/packetimpact/netdevs/netdevs.go new file mode 100644 index 000000000..d2c9cfeaf --- /dev/null +++ b/test/packetimpact/netdevs/netdevs.go @@ -0,0 +1,104 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package netdevs contains utilities for working with network devices. +package netdevs + +import ( + "fmt" + "net" + "regexp" + "strings" + + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" +) + +// A DeviceInfo represents a network device. +type DeviceInfo struct { + MAC net.HardwareAddr + IPv4Addr net.IP + IPv4Net *net.IPNet + IPv6Addr net.IP + IPv6Net *net.IPNet +} + +var ( + deviceLine = regexp.MustCompile(`^\s*\d+: (\w+)`) + linkLine = regexp.MustCompile(`^\s*link/\w+ ([0-9a-fA-F:]+)`) + inetLine = regexp.MustCompile(`^\s*inet ([0-9./]+)`) + inet6Line = regexp.MustCompile(`^\s*inet6 ([0-9a-fA-Z:/]+)`) +) + +// ParseDevices parses the output from `ip addr show` into a map from device +// name to information about the device. +func ParseDevices(cmdOutput string) (map[string]DeviceInfo, error) { + var currentDevice string + var currentInfo DeviceInfo + deviceInfos := make(map[string]DeviceInfo) + for _, line := range strings.Split(cmdOutput, "\n") { + if m := deviceLine.FindStringSubmatch(line); m != nil { + if currentDevice != "" { + deviceInfos[currentDevice] = currentInfo + } + currentInfo = DeviceInfo{} + currentDevice = m[1] + } else if m := linkLine.FindStringSubmatch(line); m != nil { + mac, err := net.ParseMAC(m[1]) + if err != nil { + return nil, err + } + currentInfo.MAC = mac + } else if m := inetLine.FindStringSubmatch(line); m != nil { + ipv4Addr, ipv4Net, err := net.ParseCIDR(m[1]) + if err != nil { + return nil, err + } + currentInfo.IPv4Addr = ipv4Addr + currentInfo.IPv4Net = ipv4Net + } else if m := inet6Line.FindStringSubmatch(line); m != nil { + ipv6Addr, ipv6Net, err := net.ParseCIDR(m[1]) + if err != nil { + return nil, err + } + currentInfo.IPv6Addr = ipv6Addr + currentInfo.IPv6Net = ipv6Net + } + } + if currentDevice != "" { + deviceInfos[currentDevice] = currentInfo + } + return deviceInfos, nil +} + +// MACToIP converts the MAC address to an IPv6 link local address as described +// in RFC 4291 page 20: https://tools.ietf.org/html/rfc4291#page-20 +func MACToIP(mac net.HardwareAddr) net.IP { + addr := make([]byte, header.IPv6AddressSize) + addr[0] = 0xfe + addr[1] = 0x80 + header.EthernetAdddressToModifiedEUI64IntoBuf(tcpip.LinkAddress(mac), addr[8:]) + return net.IP(addr) +} + +// FindDeviceByIP finds a DeviceInfo and device name from an IP address in the +// output of ParseDevices. +func FindDeviceByIP(ip net.IP, devices map[string]DeviceInfo) (string, DeviceInfo, error) { + for dev, info := range devices { + if info.IPv4Addr.Equal(ip) { + return dev, info, nil + } + } + return "", DeviceInfo{}, fmt.Errorf("can't find %s on any interface", ip) +} diff --git a/test/packetimpact/runner/BUILD b/test/packetimpact/runner/BUILD new file mode 100644 index 000000000..0b68a760a --- /dev/null +++ b/test/packetimpact/runner/BUILD @@ -0,0 +1,20 @@ +load("//tools:defs.bzl", "go_test") + +package( + default_visibility = ["//test/packetimpact:__subpackages__"], + licenses = ["notice"], +) + +go_test( + name = "packetimpact_test", + srcs = ["packetimpact_test.go"], + tags = [ + # Not intended to be run directly. + "local", + "manual", + ], + deps = [ + "//pkg/test/dockerutil", + "//test/packetimpact/netdevs", + ], +) diff --git a/test/packetimpact/runner/defs.bzl b/test/packetimpact/runner/defs.bzl new file mode 100644 index 000000000..ea66b9756 --- /dev/null +++ b/test/packetimpact/runner/defs.bzl @@ -0,0 +1,136 @@ +"""Defines rules for packetimpact test targets.""" + +load("//tools:defs.bzl", "go_test") + +def _packetimpact_test_impl(ctx): + test_runner = ctx.executable._test_runner + bench = ctx.actions.declare_file("%s-bench" % ctx.label.name) + bench_content = "\n".join([ + "#!/bin/bash", + # This test will run part in a distinct user namespace. This can cause + # permission problems, because all runfiles may not be owned by the + # current user, and no other users will be mapped in that namespace. + # Make sure that everything is readable here. + "find . -type f -or -type d -exec chmod a+rx {} \\;", + "%s %s --testbench_binary %s $@\n" % ( + test_runner.short_path, + " ".join(ctx.attr.flags), + ctx.files.testbench_binary[0].short_path, + ), + ]) + ctx.actions.write(bench, bench_content, is_executable = True) + + transitive_files = depset() + if hasattr(ctx.attr._test_runner, "data_runfiles"): + transitive_files = depset(ctx.attr._test_runner.data_runfiles.files) + runfiles = ctx.runfiles( + files = [test_runner] + ctx.files.testbench_binary + ctx.files._posix_server_binary, + transitive_files = transitive_files, + collect_default = True, + collect_data = True, + ) + return [DefaultInfo(executable = bench, runfiles = runfiles)] + +_packetimpact_test = rule( + attrs = { + "_test_runner": attr.label( + executable = True, + cfg = "target", + default = ":packetimpact_test", + ), + "_posix_server_binary": attr.label( + cfg = "target", + default = "//test/packetimpact/dut:posix_server", + ), + "testbench_binary": attr.label( + cfg = "target", + mandatory = True, + ), + "flags": attr.string_list( + mandatory = False, + default = [], + ), + }, + test = True, + implementation = _packetimpact_test_impl, +) + +PACKETIMPACT_TAGS = ["local", "manual"] + +def packetimpact_linux_test( + name, + testbench_binary, + expect_failure = False, + **kwargs): + """Add a packetimpact test on linux. + + Args: + name: name of the test + testbench_binary: the testbench binary + expect_failure: the test must fail + **kwargs: all the other args, forwarded to _packetimpact_test + """ + expect_failure_flag = ["--expect_failure"] if expect_failure else [] + _packetimpact_test( + name = name + "_linux_test", + testbench_binary = testbench_binary, + flags = ["--dut_platform", "linux"] + expect_failure_flag, + tags = PACKETIMPACT_TAGS + ["packetimpact"], + **kwargs + ) + +def packetimpact_netstack_test( + name, + testbench_binary, + expect_failure = False, + **kwargs): + """Add a packetimpact test on netstack. + + Args: + name: name of the test + testbench_binary: the testbench binary + expect_failure: the test must fail + **kwargs: all the other args, forwarded to _packetimpact_test + """ + expect_failure_flag = [] + if expect_failure: + expect_failure_flag = ["--expect_failure"] + _packetimpact_test( + name = name + "_netstack_test", + testbench_binary = testbench_binary, + # This is the default runtime unless + # "--test_arg=--runtime=OTHER_RUNTIME" is used to override the value. + flags = ["--dut_platform", "netstack", "--runtime=runsc-d"] + expect_failure_flag, + tags = PACKETIMPACT_TAGS + ["packetimpact"], + **kwargs + ) + +def packetimpact_go_test(name, size = "small", pure = True, expect_linux_failure = False, expect_netstack_failure = False, **kwargs): + """Add packetimpact tests written in go. + + Args: + name: name of the test + size: size of the test + pure: make a static go binary + expect_linux_failure: the test must fail for Linux + expect_netstack_failure: the test must fail for Netstack + **kwargs: all the other args, forwarded to go_test + """ + testbench_binary = name + "_test" + go_test( + name = testbench_binary, + size = size, + pure = pure, + tags = PACKETIMPACT_TAGS, + **kwargs + ) + packetimpact_linux_test( + name = name, + expect_failure = expect_linux_failure, + testbench_binary = testbench_binary, + ) + packetimpact_netstack_test( + name = name, + expect_failure = expect_netstack_failure, + testbench_binary = testbench_binary, + ) diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go new file mode 100644 index 000000000..ac13c8543 --- /dev/null +++ b/test/packetimpact/runner/packetimpact_test.go @@ -0,0 +1,312 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The runner starts docker containers and networking for a packetimpact test. +package packetimpact_test + +import ( + "flag" + "fmt" + "log" + "math/rand" + "net" + "path" + "strings" + "testing" + "time" + + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/test/packetimpact/netdevs" +) + +// stringList implements flag.Value. +type stringList []string + +// String implements flag.Value.String. +func (l *stringList) String() string { + return strings.Join(*l, ",") +} + +// Set implements flag.Value.Set. +func (l *stringList) Set(value string) error { + *l = append(*l, value) + return nil +} + +var ( + dutPlatform = flag.String("dut_platform", "", "either \"linux\" or \"netstack\"") + testbenchBinary = flag.String("testbench_binary", "", "path to the testbench binary") + tshark = flag.Bool("tshark", false, "use more verbose tshark in logs instead of tcpdump") + extraTestArgs = stringList{} + expectFailure = flag.Bool("expect_failure", false, "expect that the test will fail when run") + + dutAddr = net.IPv4(0, 0, 0, 10) + testbenchAddr = net.IPv4(0, 0, 0, 20) +) + +const ctrlPort = "40000" + +// logger implements testutil.Logger. +// +// Labels logs based on their source and formats multi-line logs. +type logger string + +// Name implements testutil.Logger.Name. +func (l logger) Name() string { + return string(l) +} + +// Logf implements testutil.Logger.Logf. +func (l logger) Logf(format string, args ...interface{}) { + lines := strings.Split(fmt.Sprintf(format, args...), "\n") + log.Printf("%s: %s", l, lines[0]) + for _, line := range lines[1:] { + log.Printf("%*s %s", len(l), "", line) + } +} + +func TestOne(t *testing.T) { + flag.Var(&extraTestArgs, "extra_test_arg", "extra arguments to pass to the testbench") + flag.Parse() + if *dutPlatform != "linux" && *dutPlatform != "netstack" { + t.Fatal("--dut_platform should be either linux or netstack") + } + if *testbenchBinary == "" { + t.Fatal("--testbench_binary is missing") + } + if *dutPlatform == "netstack" { + if _, err := dockerutil.RuntimePath(); err != nil { + t.Fatal("--runtime is missing or invalid with --dut_platform=netstack:", err) + } + } + dockerutil.EnsureSupportedDockerVersion() + + // Create the networks needed for the test. One control network is needed for + // the gRPC control packets and one test network on which to transmit the test + // packets. + ctrlNet := dockerutil.NewDockerNetwork(logger("ctrlNet")) + testNet := dockerutil.NewDockerNetwork(logger("testNet")) + for _, dn := range []*dockerutil.DockerNetwork{ctrlNet, testNet} { + for { + if err := createDockerNetwork(dn); err != nil { + t.Log("creating docker network:", err) + const wait = 100 * time.Millisecond + t.Logf("sleeping %s and will try creating docker network again", wait) + // This can fail if another docker network claimed the same IP so we'll + // just try again. + time.Sleep(wait) + continue + } + break + } + defer func(dn *dockerutil.DockerNetwork) { + if err := dn.Cleanup(); err != nil { + t.Errorf("unable to cleanup container %s: %s", dn.Name, err) + } + }(dn) + } + + runOpts := dockerutil.RunOpts{ + Image: "packetimpact", + CapAdd: []string{"NET_ADMIN"}, + Extra: []string{"--sysctl", "net.ipv6.conf.all.disable_ipv6=0", "--rm"}, + Foreground: true, + } + + // Create the Docker container for the DUT. + dut := dockerutil.MakeDocker(logger("dut")) + if *dutPlatform == "linux" { + dut.Runtime = "" + } + + const containerPosixServerBinary = "/packetimpact/posix_server" + dut.CopyFiles("/packetimpact", "/test/packetimpact/dut/posix_server") + + if err := dut.Create(runOpts, containerPosixServerBinary, "--ip=0.0.0.0", "--port="+ctrlPort); err != nil { + t.Fatalf("unable to create container %s: %s", dut.Name, err) + } + defer dut.CleanUp() + + // Add ctrlNet as eth1 and testNet as eth2. + const testNetDev = "eth2" + if err := addNetworks(dut, dutAddr, []*dockerutil.DockerNetwork{ctrlNet, testNet}); err != nil { + t.Fatal(err) + } + + if err := dut.Start(); err != nil { + t.Fatalf("unable to start container %s: %s", dut.Name, err) + } + + if _, err := dut.WaitForOutput("Server listening.*\n", 60*time.Second); err != nil { + t.Fatalf("%s on container %s never listened: %s", containerPosixServerBinary, dut.Name, err) + } + + dutTestDevice, dutDeviceInfo, err := deviceByIP(dut, addressInSubnet(dutAddr, *testNet.Subnet)) + if err != nil { + t.Fatal(err) + } + + remoteMAC := dutDeviceInfo.MAC + remoteIPv6 := dutDeviceInfo.IPv6Addr + // Netstack as DUT doesn't assign IPv6 addresses automatically so do it if + // needed. + if remoteIPv6 == nil { + if _, err := dut.Exec(dockerutil.RunOpts{}, "ip", "addr", "add", netdevs.MACToIP(remoteMAC).String(), "scope", "link", "dev", dutTestDevice); err != nil { + t.Fatalf("unable to ip addr add on container %s: %s", dut.Name, err) + } + // Now try again, to make sure that it worked. + _, dutDeviceInfo, err = deviceByIP(dut, addressInSubnet(dutAddr, *testNet.Subnet)) + if err != nil { + t.Fatal(err) + } + remoteIPv6 = dutDeviceInfo.IPv6Addr + if remoteIPv6 == nil { + t.Fatal("unable to set IPv6 address on container", dut.Name) + } + } + + // Create the Docker container for the testbench. + testbench := dockerutil.MakeDocker(logger("testbench")) + testbench.Runtime = "" // The testbench always runs on Linux. + + tbb := path.Base(*testbenchBinary) + containerTestbenchBinary := "/packetimpact/" + tbb + testbench.CopyFiles("/packetimpact", "/test/packetimpact/tests/"+tbb) + + // Run tcpdump in the test bench unbuffered, without DNS resolution, just on + // the interface with the test packets. + snifferArgs := []string{ + "tcpdump", "-S", "-vvv", "-U", "-n", "-i", testNetDev, + } + snifferRegex := "tcpdump: listening.*\n" + if *tshark { + // Run tshark in the test bench unbuffered, without DNS resolution, just on + // the interface with the test packets. + snifferArgs = []string{ + "tshark", "-V", "-l", "-n", "-i", testNetDev, + "-o", "tcp.check_checksum:TRUE", + "-o", "udp.check_checksum:TRUE", + } + snifferRegex = "Capturing on.*\n" + } + + if err := testbench.Create(runOpts, snifferArgs...); err != nil { + t.Fatalf("unable to create container %s: %s", testbench.Name, err) + } + defer testbench.CleanUp() + + // Add ctrlNet as eth1 and testNet as eth2. + if err := addNetworks(testbench, testbenchAddr, []*dockerutil.DockerNetwork{ctrlNet, testNet}); err != nil { + t.Fatal(err) + } + + if err := testbench.Start(); err != nil { + t.Fatalf("unable to start container %s: %s", testbench.Name, err) + } + + // Kill so that it will flush output. + defer testbench.Exec(dockerutil.RunOpts{}, "killall", snifferArgs[0]) + + if _, err := testbench.WaitForOutput(snifferRegex, 60*time.Second); err != nil { + t.Fatalf("sniffer on %s never listened: %s", dut.Name, err) + } + + // Because the Linux kernel receives the SYN-ACK but didn't send the SYN it + // will issue a RST. To prevent this IPtables can be used to filter out all + // incoming packets. The raw socket that packetimpact tests use will still see + // everything. + if _, err := testbench.Exec(dockerutil.RunOpts{}, "iptables", "-A", "INPUT", "-i", testNetDev, "-j", "DROP"); err != nil { + t.Fatalf("unable to Exec iptables on container %s: %s", testbench.Name, err) + } + + // FIXME(b/156449515): Some piece of the system has a race. The old + // bash script version had a sleep, so we have one too. The race should + // be fixed and this sleep removed. + time.Sleep(time.Second) + + // Start a packetimpact test on the test bench. The packetimpact test sends + // and receives packets and also sends POSIX socket commands to the + // posix_server to be executed on the DUT. + testArgs := []string{containerTestbenchBinary} + testArgs = append(testArgs, extraTestArgs...) + testArgs = append(testArgs, + "--posix_server_ip", addressInSubnet(dutAddr, *ctrlNet.Subnet).String(), + "--posix_server_port", ctrlPort, + "--remote_ipv4", addressInSubnet(dutAddr, *testNet.Subnet).String(), + "--local_ipv4", addressInSubnet(testbenchAddr, *testNet.Subnet).String(), + "--remote_ipv6", remoteIPv6.String(), + "--remote_mac", remoteMAC.String(), + "--device", testNetDev, + ) + _, err = testbench.Exec(dockerutil.RunOpts{}, testArgs...) + if !*expectFailure && err != nil { + t.Fatal("test failed:", err) + } + if *expectFailure && err == nil { + t.Fatal("test failure expected but the test succeeded, enable the test and mark the corresponding bug as fixed") + } +} + +func addNetworks(d *dockerutil.Docker, addr net.IP, networks []*dockerutil.DockerNetwork) error { + for _, dn := range networks { + ip := addressInSubnet(addr, *dn.Subnet) + // Connect to the network with the specified IP address. + if err := dn.Connect(d, "--ip", ip.String()); err != nil { + return fmt.Errorf("unable to connect container %s to network %s: %w", d.Name, dn.Name, err) + } + } + return nil +} + +// addressInSubnet combines the subnet provided with the address and returns a +// new address. The return address bits come from the subnet where the mask is 1 +// and from the ip address where the mask is 0. +func addressInSubnet(addr net.IP, subnet net.IPNet) net.IP { + var octets []byte + for i := 0; i < 4; i++ { + octets = append(octets, (subnet.IP.To4()[i]&subnet.Mask[i])+(addr.To4()[i]&(^subnet.Mask[i]))) + } + return net.IP(octets) +} + +// makeDockerNetwork makes a randomly-named network that will start with the +// namePrefix. The network will be a random /24 subnet. +func createDockerNetwork(n *dockerutil.DockerNetwork) error { + randSource := rand.NewSource(time.Now().UnixNano()) + r1 := rand.New(randSource) + // Class C, 192.0.0.0 to 223.255.255.255, transitionally has mask 24. + ip := net.IPv4(byte(r1.Intn(224-192)+192), byte(r1.Intn(256)), byte(r1.Intn(256)), 0) + n.Subnet = &net.IPNet{ + IP: ip, + Mask: ip.DefaultMask(), + } + return n.Create() +} + +// deviceByIP finds a deviceInfo and device name from an IP address. +func deviceByIP(d *dockerutil.Docker, ip net.IP) (string, netdevs.DeviceInfo, error) { + out, err := d.Exec(dockerutil.RunOpts{}, "ip", "addr", "show") + if err != nil { + return "", netdevs.DeviceInfo{}, fmt.Errorf("listing devices on %s container: %w", d.Name, err) + } + devs, err := netdevs.ParseDevices(out) + if err != nil { + return "", netdevs.DeviceInfo{}, fmt.Errorf("parsing devices from %s container: %w", d.Name, err) + } + testDevice, deviceInfo, err := netdevs.FindDeviceByIP(ip, devs) + if err != nil { + return "", netdevs.DeviceInfo{}, fmt.Errorf("can't find deviceInfo for container %s: %w", d.Name, err) + } + return testDevice, deviceInfo, nil +} diff --git a/test/packetimpact/testbench/BUILD b/test/packetimpact/testbench/BUILD index 682933067..d19ec07d4 100644 --- a/test/packetimpact/testbench/BUILD +++ b/test/packetimpact/testbench/BUILD @@ -21,6 +21,7 @@ go_library( "//pkg/tcpip/header", "//pkg/tcpip/seqnum", "//pkg/usermem", + "//test/packetimpact/netdevs", "//test/packetimpact/proto:posix_server_go_proto", "@com_github_google_go-cmp//cmp:go_default_library", "@com_github_google_go-cmp//cmp/cmpopts:go_default_library", diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index 463fd0556..bf104e5ca 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -114,12 +114,12 @@ var _ layerState = (*etherState)(nil) func newEtherState(out, in Ether) (*etherState, error) { lMAC, err := tcpip.ParseMACAddress(LocalMAC) if err != nil { - return nil, err + return nil, fmt.Errorf("parsing local MAC: %q: %w", LocalMAC, err) } rMAC, err := tcpip.ParseMACAddress(RemoteMAC) if err != nil { - return nil, err + return nil, fmt.Errorf("parsing remote MAC: %q: %w", RemoteMAC, err) } s := etherState{ out: Ether{SrcAddr: &lMAC, DstAddr: &rMAC}, diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go index a78b7d7ee..b919a3c2e 100644 --- a/test/packetimpact/testbench/dut.go +++ b/test/packetimpact/testbench/dut.go @@ -16,6 +16,7 @@ package testbench import ( "context" + "flag" "net" "strconv" "syscall" @@ -37,6 +38,11 @@ type DUT struct { // NewDUT creates a new connection with the DUT over gRPC. func NewDUT(t *testing.T) DUT { + flag.Parse() + if err := genPseudoFlags(); err != nil { + t.Fatal("generating psuedo flags:", err) + } + posixServerAddress := POSIXServerIP + ":" + strconv.Itoa(POSIXServerPort) conn, err := grpc.Dial(posixServerAddress, grpc.WithInsecure(), grpc.WithKeepaliveParams(keepalive.ClientParameters{Timeout: RPCKeepalive})) if err != nil { diff --git a/test/packetimpact/testbench/rawsockets.go b/test/packetimpact/testbench/rawsockets.go index 4665f60b2..278229b7e 100644 --- a/test/packetimpact/testbench/rawsockets.go +++ b/test/packetimpact/testbench/rawsockets.go @@ -16,7 +16,6 @@ package testbench import ( "encoding/binary" - "flag" "fmt" "math" "net" @@ -41,7 +40,6 @@ func htons(x uint16) uint16 { // NewSniffer creates a Sniffer connected to *device. func NewSniffer(t *testing.T) (Sniffer, error) { - flag.Parse() snifferFd, err := unix.Socket(unix.AF_PACKET, unix.SOCK_RAW, int(htons(unix.ETH_P_ALL))) if err != nil { return Sniffer{}, err @@ -136,7 +134,6 @@ type Injector struct { // NewInjector creates a new injector on *device. func NewInjector(t *testing.T) (Injector, error) { - flag.Parse() ifInfo, err := net.InterfaceByName(Device) if err != nil { return Injector{}, err diff --git a/test/packetimpact/testbench/testbench.go b/test/packetimpact/testbench/testbench.go index a1242b189..4de2aa1d3 100644 --- a/test/packetimpact/testbench/testbench.go +++ b/test/packetimpact/testbench/testbench.go @@ -16,7 +16,12 @@ package testbench import ( "flag" + "fmt" + "net" + "os/exec" "time" + + "gvisor.dev/gvisor/test/packetimpact/netdevs" ) var ( @@ -55,9 +60,31 @@ func RegisterFlags(fs *flag.FlagSet) { fs.DurationVar(&RPCKeepalive, "rpc_keepalive", RPCKeepalive, "gRPC keepalive") fs.StringVar(&LocalIPv4, "local_ipv4", LocalIPv4, "local IPv4 address for test packets") fs.StringVar(&RemoteIPv4, "remote_ipv4", RemoteIPv4, "remote IPv4 address for test packets") - fs.StringVar(&LocalIPv6, "local_ipv6", LocalIPv6, "local IPv6 address for test packets") fs.StringVar(&RemoteIPv6, "remote_ipv6", RemoteIPv6, "remote IPv6 address for test packets") - fs.StringVar(&LocalMAC, "local_mac", LocalMAC, "local mac address for test packets") fs.StringVar(&RemoteMAC, "remote_mac", RemoteMAC, "remote mac address for test packets") fs.StringVar(&Device, "device", Device, "local device for test packets") } + +// genPseudoFlags populates flag-like global config based on real flags. +// +// genPseudoFlags must only be called after flag.Parse. +func genPseudoFlags() error { + out, err := exec.Command("ip", "addr", "show").CombinedOutput() + if err != nil { + return fmt.Errorf("listing devices: %q: %w", string(out), err) + } + devs, err := netdevs.ParseDevices(string(out)) + if err != nil { + return fmt.Errorf("parsing devices: %w", err) + } + + _, deviceInfo, err := netdevs.FindDeviceByIP(net.ParseIP(LocalIPv4), devs) + if err != nil { + return fmt.Errorf("can't find deviceInfo: %w", err) + } + + LocalMAC = deviceInfo.MAC.String() + LocalIPv6 = deviceInfo.IPv6Addr.String() + + return nil +} diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index c4ffda17e..3a0e9cb07 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -1,4 +1,4 @@ -load("defs.bzl", "packetimpact_go_test") +load("//test/packetimpact/runner:defs.bzl", "packetimpact_go_test") package( default_visibility = ["//test/packetimpact:__subpackages__"], @@ -177,8 +177,3 @@ packetimpact_go_test( "@org_golang_x_sys//unix:go_default_library", ], ) - -sh_binary( - name = "test_runner", - srcs = ["test_runner.sh"], -) diff --git a/test/packetimpact/tests/defs.bzl b/test/packetimpact/tests/defs.bzl deleted file mode 100644 index 45dce64ab..000000000 --- a/test/packetimpact/tests/defs.bzl +++ /dev/null @@ -1,137 +0,0 @@ -"""Defines rules for packetimpact test targets.""" - -load("//tools:defs.bzl", "go_test") - -def _packetimpact_test_impl(ctx): - test_runner = ctx.executable._test_runner - bench = ctx.actions.declare_file("%s-bench" % ctx.label.name) - bench_content = "\n".join([ - "#!/bin/bash", - # This test will run part in a distinct user namespace. This can cause - # permission problems, because all runfiles may not be owned by the - # current user, and no other users will be mapped in that namespace. - # Make sure that everything is readable here. - "find . -type f -exec chmod a+rx {} \\;", - "find . -type d -exec chmod a+rx {} \\;", - "%s %s --posix_server_binary %s --testbench_binary %s $@\n" % ( - test_runner.short_path, - " ".join(ctx.attr.flags), - ctx.files._posix_server_binary[0].short_path, - ctx.files.testbench_binary[0].short_path, - ), - ]) - ctx.actions.write(bench, bench_content, is_executable = True) - - transitive_files = depset() - if hasattr(ctx.attr._test_runner, "data_runfiles"): - transitive_files = depset(ctx.attr._test_runner.data_runfiles.files) - runfiles = ctx.runfiles( - files = [test_runner] + ctx.files.testbench_binary + ctx.files._posix_server_binary, - transitive_files = transitive_files, - collect_default = True, - collect_data = True, - ) - return [DefaultInfo(executable = bench, runfiles = runfiles)] - -_packetimpact_test = rule( - attrs = { - "_test_runner": attr.label( - executable = True, - cfg = "target", - default = ":test_runner", - ), - "_posix_server_binary": attr.label( - cfg = "target", - default = "//test/packetimpact/dut:posix_server", - ), - "testbench_binary": attr.label( - cfg = "target", - mandatory = True, - ), - "flags": attr.string_list( - mandatory = False, - default = [], - ), - }, - test = True, - implementation = _packetimpact_test_impl, -) - -PACKETIMPACT_TAGS = ["local", "manual"] - -def packetimpact_linux_test( - name, - testbench_binary, - expect_failure = False, - **kwargs): - """Add a packetimpact test on linux. - - Args: - name: name of the test - testbench_binary: the testbench binary - **kwargs: all the other args, forwarded to _packetimpact_test - """ - expect_failure_flag = ["--expect_failure"] if expect_failure else [] - _packetimpact_test( - name = name + "_linux_test", - testbench_binary = testbench_binary, - flags = ["--dut_platform", "linux"] + expect_failure_flag, - tags = PACKETIMPACT_TAGS + ["packetimpact"], - **kwargs - ) - -def packetimpact_netstack_test( - name, - testbench_binary, - expect_failure = False, - **kwargs): - """Add a packetimpact test on netstack. - - Args: - name: name of the test - testbench_binary: the testbench binary - expect_failure: the test must fail - **kwargs: all the other args, forwarded to _packetimpact_test - """ - expect_failure_flag = [] - if expect_failure: - expect_failure_flag = ["--expect_failure"] - _packetimpact_test( - name = name + "_netstack_test", - testbench_binary = testbench_binary, - # This is the default runtime unless - # "--test_arg=--runtime=OTHER_RUNTIME" is used to override the value. - flags = ["--dut_platform", "netstack", "--runtime=runsc-d"] + expect_failure_flag, - tags = PACKETIMPACT_TAGS + ["packetimpact"], - **kwargs - ) - -def packetimpact_go_test(name, size = "small", pure = True, expect_linux_failure = False, expect_netstack_failure = False, **kwargs): - """Add packetimpact tests written in go. - - Args: - name: name of the test - size: size of the test - pure: make a static go binary - expect_linux_failure: expect the test to fail for Linux - expect_netstack_failure: expect the test to fail for Netstack - **kwargs: all the other args, forwarded to go_test - """ - testbench_binary = name + "_test" - go_test( - name = testbench_binary, - size = size, - pure = pure, - tags = PACKETIMPACT_TAGS, - **kwargs - ) - packetimpact_linux_test( - name = name, - expect_failure = expect_linux_failure, - testbench_binary = testbench_binary, - ) - packetimpact_netstack_test( - name = name, - expect_failure = expect_netstack_failure, - testbench_binary = testbench_binary, - ) diff --git a/test/packetimpact/tests/test_runner.sh b/test/packetimpact/tests/test_runner.sh deleted file mode 100755 index 706441cce..000000000 --- a/test/packetimpact/tests/test_runner.sh +++ /dev/null @@ -1,325 +0,0 @@ -#!/bin/bash - -# Copyright 2020 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Run a packetimpact test. Two docker containers are made, one for the -# Device-Under-Test (DUT) and one for the test bench. Each is attached with -# two networks, one for control packets that aid the test and one for test -# packets which are sent as part of the test and observed for correctness. - -set -euxo pipefail - -function failure() { - local lineno=$1 - local msg=$2 - local filename="$0" - echo "FAIL: $filename:$lineno: $msg" -} -trap 'failure ${LINENO} "$BASH_COMMAND"' ERR - -declare -r LONGOPTS="dut_platform:,posix_server_binary:,testbench_binary:,runtime:,tshark,extra_test_arg:,expect_failure" - -# Don't use declare below so that the error from getopt will end the script. -PARSED=$(getopt --options "" --longoptions=$LONGOPTS --name "$0" -- "$@") - -eval set -- "$PARSED" - -declare -a EXTRA_TEST_ARGS - -while true; do - case "$1" in - --dut_platform) - # Either "linux" or "netstack". - declare -r DUT_PLATFORM="$2" - shift 2 - ;; - --posix_server_binary) - declare -r POSIX_SERVER_BINARY="$2" - shift 2 - ;; - --testbench_binary) - declare -r TESTBENCH_BINARY="$2" - shift 2 - ;; - --runtime) - # Not readonly because there might be multiple --runtime arguments and we - # want to use just the last one. Only used if --dut_platform is - # "netstack". - declare RUNTIME="$2" - shift 2 - ;; - --tshark) - declare -r TSHARK="1" - shift 1 - ;; - --extra_test_arg) - EXTRA_TEST_ARGS+="$2" - shift 2 - ;; - --expect_failure) - declare -r EXPECT_FAILURE="1" - shift 1 - ;; - --) - shift - break - ;; - *) - echo "Programming error" - exit 3 - esac -done - -# All the other arguments are scripts. -declare -r scripts="$@" - -# Check that the required flags are defined in a way that is safe for "set -u". -if [[ "${DUT_PLATFORM-}" == "netstack" ]]; then - if [[ -z "${RUNTIME-}" ]]; then - echo "FAIL: Missing --runtime argument: ${RUNTIME-}" - exit 2 - fi - declare -r RUNTIME_ARG="--runtime ${RUNTIME}" -elif [[ "${DUT_PLATFORM-}" == "linux" ]]; then - declare -r RUNTIME_ARG="" -else - echo "FAIL: Bad or missing --dut_platform argument: ${DUT_PLATFORM-}" - exit 2 -fi -if [[ ! -f "${POSIX_SERVER_BINARY-}" ]]; then - echo "FAIL: Bad or missing --posix_server_binary: ${POSIX_SERVER-}" - exit 2 -fi -if [[ ! -f "${TESTBENCH_BINARY-}" ]]; then - echo "FAIL: Bad or missing --testbench_binary: ${TESTBENCH_BINARY-}" - exit 2 -fi - -function new_net_prefix() { - # Class C, 192.0.0.0 to 223.255.255.255, transitionally has mask 24. - echo "$(shuf -i 192-223 -n 1).$(shuf -i 0-255 -n 1).$(shuf -i 0-255 -n 1)" -} - -# Variables specific to the control network and interface start with CTRL_. -# Variables specific to the test network and interface start with TEST_. -# Variables specific to the DUT start with DUT_. -# Variables specific to the test bench start with TESTBENCH_. -# Use random numbers so that test networks don't collide. -declare CTRL_NET="ctrl_net-${RANDOM}${RANDOM}" -declare CTRL_NET_PREFIX=$(new_net_prefix) -declare TEST_NET="test_net-${RANDOM}${RANDOM}" -declare TEST_NET_PREFIX=$(new_net_prefix) -# On both DUT and test bench, testing packets are on the eth2 interface. -declare -r TEST_DEVICE="eth2" -# Number of bits in the *_NET_PREFIX variables. -declare -r NET_MASK="24" -# Last bits of the DUT's IP address. -declare -r DUT_NET_SUFFIX=".10" -# Control port. -declare -r CTRL_PORT="40000" -# Last bits of the test bench's IP address. -declare -r TESTBENCH_NET_SUFFIX=".20" -declare -r TIMEOUT="60" -declare -r IMAGE_TAG="gcr.io/gvisor-presubmit/packetimpact" - -# Make sure that docker is installed. -docker --version - -function finish { - local cleanup_success=1 - - if [[ -z "${TSHARK-}" ]]; then - # Kill tcpdump so that it will flush output. - docker exec -t "${TESTBENCH}" \ - killall tcpdump || \ - cleanup_success=0 - else - # Kill tshark so that it will flush output. - docker exec -t "${TESTBENCH}" \ - killall tshark || \ - cleanup_success=0 - fi - - for net in "${CTRL_NET}" "${TEST_NET}"; do - # Kill all processes attached to ${net}. - for docker_command in "kill" "rm"; do - (docker network inspect "${net}" \ - --format '{{range $key, $value := .Containers}}{{$key}} {{end}}' \ - | xargs -r docker "${docker_command}") || \ - cleanup_success=0 - done - # Remove the network. - docker network rm "${net}" || \ - cleanup_success=0 - done - - if ((!$cleanup_success)); then - echo "FAIL: Cleanup command failed" - exit 4 - fi -} -trap finish EXIT - -# Subnet for control packets between test bench and DUT. -while ! docker network create \ - "--subnet=${CTRL_NET_PREFIX}.0/${NET_MASK}" "${CTRL_NET}"; do - sleep 0.1 - CTRL_NET_PREFIX=$(new_net_prefix) - CTRL_NET="ctrl_net-${RANDOM}${RANDOM}" -done - -# Subnet for the packets that are part of the test. -while ! docker network create \ - "--subnet=${TEST_NET_PREFIX}.0/${NET_MASK}" "${TEST_NET}"; do - sleep 0.1 - TEST_NET_PREFIX=$(new_net_prefix) - TEST_NET="test_net-${RANDOM}${RANDOM}" -done - -docker pull "${IMAGE_TAG}" - -# Create the DUT container and connect to network. -DUT=$(docker create ${RUNTIME_ARG} --privileged --rm \ - --cap-add NET_ADMIN \ - --sysctl net.ipv6.conf.all.disable_ipv6=0 \ - --stop-timeout ${TIMEOUT} -it ${IMAGE_TAG}) -docker network connect "${CTRL_NET}" \ - --ip "${CTRL_NET_PREFIX}${DUT_NET_SUFFIX}" "${DUT}" \ - || (docker kill ${DUT}; docker rm ${DUT}; false) -docker network connect "${TEST_NET}" \ - --ip "${TEST_NET_PREFIX}${DUT_NET_SUFFIX}" "${DUT}" \ - || (docker kill ${DUT}; docker rm ${DUT}; false) -docker start "${DUT}" - -# Create the test bench container and connect to network. -TESTBENCH=$(docker create --privileged --rm \ - --cap-add NET_ADMIN \ - --sysctl net.ipv6.conf.all.disable_ipv6=0 \ - --stop-timeout ${TIMEOUT} -it ${IMAGE_TAG}) -docker network connect "${CTRL_NET}" \ - --ip "${CTRL_NET_PREFIX}${TESTBENCH_NET_SUFFIX}" "${TESTBENCH}" \ - || (docker kill ${TESTBENCH}; docker rm ${TESTBENCH}; false) -docker network connect "${TEST_NET}" \ - --ip "${TEST_NET_PREFIX}${TESTBENCH_NET_SUFFIX}" "${TESTBENCH}" \ - || (docker kill ${TESTBENCH}; docker rm ${TESTBENCH}; false) -docker start "${TESTBENCH}" - -# Start the posix_server in the DUT. -declare -r DOCKER_POSIX_SERVER_BINARY="/$(basename ${POSIX_SERVER_BINARY})" -docker cp -L ${POSIX_SERVER_BINARY} "${DUT}:${DOCKER_POSIX_SERVER_BINARY}" - -docker exec -t "${DUT}" \ - /bin/bash -c "${DOCKER_POSIX_SERVER_BINARY} \ - --ip ${CTRL_NET_PREFIX}${DUT_NET_SUFFIX} \ - --port ${CTRL_PORT}" & - -# Because the Linux kernel receives the SYN-ACK but didn't send the SYN it will -# issue a RST. To prevent this IPtables can be used to filter those out. -docker exec "${TESTBENCH}" \ - iptables -A INPUT -i ${TEST_DEVICE} -j DROP - -# Wait for the DUT server to come up. Attempt to connect to it from the test -# bench every 100 milliseconds until success. -while ! docker exec "${TESTBENCH}" \ - nc -zv "${CTRL_NET_PREFIX}${DUT_NET_SUFFIX}" "${CTRL_PORT}"; do - sleep 0.1 -done - -declare -r REMOTE_MAC=$(docker exec -t "${DUT}" ip link show \ - "${TEST_DEVICE}" | tail -1 | cut -d' ' -f6) -declare -r LOCAL_MAC=$(docker exec -t "${TESTBENCH}" ip link show \ - "${TEST_DEVICE}" | tail -1 | cut -d' ' -f6) -declare REMOTE_IPV6=$(docker exec -t "${DUT}" ip addr show scope link \ - "${TEST_DEVICE}" | grep inet6 | cut -d' ' -f6 | cut -d'/' -f1) -declare -r LOCAL_IPV6=$(docker exec -t "${TESTBENCH}" ip addr show scope link \ - "${TEST_DEVICE}" | grep inet6 | cut -d' ' -f6 | cut -d'/' -f1) - -# Netstack as DUT doesn't assign IPv6 addresses automatically so do it if -# needed. Convert the MAC address to an IPv6 link local address as described in -# RFC 4291 page 20: https://tools.ietf.org/html/rfc4291#page-20 -if [[ -z "${REMOTE_IPV6}" ]]; then - # Split the octets of the MAC into an array of strings. - IFS=":" read -a REMOTE_OCTETS <<< "${REMOTE_MAC}" - # Flip the global bit. - REMOTE_OCTETS[0]=$(printf '%x' "$((0x${REMOTE_OCTETS[0]} ^ 2))") - # Add the IPv6 address. - docker exec "${DUT}" \ - ip addr add $(printf 'fe80::%02x%02x:%02xff:fe%02x:%02x%02x/64' \ - "0x${REMOTE_OCTETS[0]}" "0x${REMOTE_OCTETS[1]}" "0x${REMOTE_OCTETS[2]}" \ - "0x${REMOTE_OCTETS[3]}" "0x${REMOTE_OCTETS[4]}" "0x${REMOTE_OCTETS[5]}") \ - scope link \ - dev "${TEST_DEVICE}" - # Re-extract the IPv6 address. - # TODO(eyalsoha): Add "scope link" below when netstack supports correctly - # creating link-local IPv6 addresses. - REMOTE_IPV6=$(docker exec -t "${DUT}" ip addr show \ - "${TEST_DEVICE}" | grep inet6 | cut -d' ' -f6 | cut -d'/' -f1) -fi - -declare -r DOCKER_TESTBENCH_BINARY="/$(basename ${TESTBENCH_BINARY})" -docker cp -L "${TESTBENCH_BINARY}" "${TESTBENCH}:${DOCKER_TESTBENCH_BINARY}" - -if [[ -z "${TSHARK-}" ]]; then - # Run tcpdump in the test bench unbuffered, without dns resolution, just on - # the interface with the test packets. - docker exec -t "${TESTBENCH}" \ - tcpdump -S -vvv -U -n -i "${TEST_DEVICE}" \ - net "${TEST_NET_PREFIX}/24" or \ - host "${REMOTE_IPV6}" or \ - host "${LOCAL_IPV6}" & -else - # Run tshark in the test bench unbuffered, without dns resolution, just on the - # interface with the test packets. - docker exec -t "${TESTBENCH}" \ - tshark -V -l -n -i "${TEST_DEVICE}" \ - -o tcp.check_checksum:TRUE \ - -o udp.check_checksum:TRUE \ - net "${TEST_NET_PREFIX}/24" or \ - host "${REMOTE_IPV6}" or \ - host "${LOCAL_IPV6}" & -fi - -# tcpdump and tshark take time to startup -sleep 3 - -# Start a packetimpact test on the test bench. The packetimpact test sends and -# receives packets and also sends POSIX socket commands to the posix_server to -# be executed on the DUT. -docker exec \ - -e XML_OUTPUT_FILE="/test.xml" \ - -e TEST_TARGET \ - -t "${TESTBENCH}" \ - /bin/bash -c "${DOCKER_TESTBENCH_BINARY} \ - ${EXTRA_TEST_ARGS[@]-} \ - --posix_server_ip=${CTRL_NET_PREFIX}${DUT_NET_SUFFIX} \ - --posix_server_port=${CTRL_PORT} \ - --remote_ipv4=${TEST_NET_PREFIX}${DUT_NET_SUFFIX} \ - --local_ipv4=${TEST_NET_PREFIX}${TESTBENCH_NET_SUFFIX} \ - --remote_ipv6=${REMOTE_IPV6} \ - --local_ipv6=${LOCAL_IPV6} \ - --remote_mac=${REMOTE_MAC} \ - --local_mac=${LOCAL_MAC} \ - --device=${TEST_DEVICE}" && true -declare -r TEST_RESULT="${?}" -if [[ -z "${EXPECT_FAILURE-}" && "${TEST_RESULT}" != 0 ]]; then - echo 'FAIL: This test was expected to pass.' - exit ${TEST_RESULT} -fi -if [[ ! -z "${EXPECT_FAILURE-}" && "${TEST_RESULT}" == 0 ]]; then - echo 'FAIL: This test was expected to fail but passed. Enable the test and' \ - 'mark the corresponding bug as fixed.' - exit 1 -fi -echo PASS: No errors. -- cgit v1.2.3 From af3121a52383fb60579d769994be5d91bd788015 Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Tue, 26 May 2020 21:42:07 -0700 Subject: Implement splice(2) and tee(2) for VFS2. Updates #138 PiperOrigin-RevId: 313326354 --- pkg/buffer/safemem.go | 82 ++++----- pkg/sentry/fsimpl/tmpfs/regular_file.go | 2 +- pkg/sentry/kernel/pipe/BUILD | 2 + pkg/sentry/kernel/pipe/pipe.go | 6 + pkg/sentry/kernel/pipe/pipe_unsafe.go | 35 ++++ pkg/sentry/kernel/pipe/vfs.go | 219 ++++++++++++++++++++++- pkg/sentry/syscalls/linux/vfs2/BUILD | 1 + pkg/sentry/syscalls/linux/vfs2/splice.go | 286 +++++++++++++++++++++++++++++++ pkg/sentry/syscalls/linux/vfs2/vfs2.go | 4 +- pkg/sentry/vfs/file_description.go | 5 + test/syscalls/linux/splice.cc | 49 ++++++ 11 files changed, 648 insertions(+), 43 deletions(-) create mode 100644 pkg/sentry/kernel/pipe/pipe_unsafe.go create mode 100644 pkg/sentry/syscalls/linux/vfs2/splice.go (limited to 'test') diff --git a/pkg/buffer/safemem.go b/pkg/buffer/safemem.go index 0e5b86344..b789e56e9 100644 --- a/pkg/buffer/safemem.go +++ b/pkg/buffer/safemem.go @@ -28,12 +28,11 @@ func (b *buffer) ReadBlock() safemem.Block { return safemem.BlockFromSafeSlice(b.ReadSlice()) } -// WriteFromBlocks implements safemem.Writer.WriteFromBlocks. -// -// This will advance the write index. -func (v *View) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) { - need := int(srcs.NumBytes()) - if need == 0 { +// WriteFromSafememReader writes up to count bytes from r to v and advances the +// write index by the number of bytes written. It calls r.ReadToBlocks() at +// most once. +func (v *View) WriteFromSafememReader(r safemem.Reader, count uint64) (uint64, error) { + if count == 0 { return 0, nil } @@ -50,32 +49,33 @@ func (v *View) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) { } // Does the last block have sufficient capacity alone? - if l := firstBuf.WriteSize(); l >= need { - dst = safemem.BlockSeqOf(firstBuf.WriteBlock()) + if l := uint64(firstBuf.WriteSize()); l >= count { + dst = safemem.BlockSeqOf(firstBuf.WriteBlock().TakeFirst64(count)) } else { // Append blocks until sufficient. - need -= l + count -= l blocks = append(blocks, firstBuf.WriteBlock()) - for need > 0 { + for count > 0 { emptyBuf := bufferPool.Get().(*buffer) v.data.PushBack(emptyBuf) - need -= emptyBuf.WriteSize() - blocks = append(blocks, emptyBuf.WriteBlock()) + block := emptyBuf.WriteBlock().TakeFirst64(count) + count -= uint64(block.Len()) + blocks = append(blocks, block) } dst = safemem.BlockSeqFromSlice(blocks) } - // Perform the copy. - n, err := safemem.CopySeq(dst, srcs) + // Perform I/O. + n, err := r.ReadToBlocks(dst) v.size += int64(n) // Update all indices. - for left := int(n); left > 0; firstBuf = firstBuf.Next() { - if l := firstBuf.WriteSize(); left >= l { + for left := n; left > 0; firstBuf = firstBuf.Next() { + if l := firstBuf.WriteSize(); left >= uint64(l) { firstBuf.WriteMove(l) // Whole block. - left -= l + left -= uint64(l) } else { - firstBuf.WriteMove(left) // Partial block. + firstBuf.WriteMove(int(left)) // Partial block. left = 0 } } @@ -83,14 +83,16 @@ func (v *View) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) { return n, err } -// ReadToBlocks implements safemem.Reader.ReadToBlocks. -// -// This will not advance the read index; the caller should follow -// this call with a call to TrimFront in order to remove the read -// data from the buffer. This is done to support pipe sematics. -func (v *View) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) { - need := int(dsts.NumBytes()) - if need == 0 { +// WriteFromBlocks implements safemem.Writer.WriteFromBlocks. It advances the +// write index by the number of bytes written. +func (v *View) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) { + return v.WriteFromSafememReader(&safemem.BlockSeqReader{srcs}, srcs.NumBytes()) +} + +// ReadToSafememWriter reads up to count bytes from v to w. It does not advance +// the read index. It calls w.WriteFromBlocks() at most once. +func (v *View) ReadToSafememWriter(w safemem.Writer, count uint64) (uint64, error) { + if count == 0 { return 0, nil } @@ -105,25 +107,27 @@ func (v *View) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) { } // Is all the data in a single block? - if l := firstBuf.ReadSize(); l >= need { - src = safemem.BlockSeqOf(firstBuf.ReadBlock()) + if l := uint64(firstBuf.ReadSize()); l >= count { + src = safemem.BlockSeqOf(firstBuf.ReadBlock().TakeFirst64(count)) } else { // Build a list of all the buffers. - need -= l + count -= l blocks = append(blocks, firstBuf.ReadBlock()) - for buf := firstBuf.Next(); buf != nil && need > 0; buf = buf.Next() { - need -= buf.ReadSize() - blocks = append(blocks, buf.ReadBlock()) + for buf := firstBuf.Next(); buf != nil && count > 0; buf = buf.Next() { + block := buf.ReadBlock().TakeFirst64(count) + count -= uint64(block.Len()) + blocks = append(blocks, block) } src = safemem.BlockSeqFromSlice(blocks) } - // Perform the copy. - n, err := safemem.CopySeq(dsts, src) - - // See above: we would normally advance the read index here, but we - // don't do that in order to support pipe semantics. We rely on a - // separate call to TrimFront() in this case. + // Perform I/O. As documented, we don't advance the read index. + return w.WriteFromBlocks(src) +} - return n, err +// ReadToBlocks implements safemem.Reader.ReadToBlocks. It does not advance the +// read index by the number of bytes read, such that it's only safe to call if +// the caller guarantees that ReadToBlocks will only be called once. +func (v *View) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) { + return v.ReadToSafememWriter(&safemem.BlockSeqWriter{dsts}, dsts.NumBytes()) } diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go index 3f433d666..fee174375 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go @@ -312,7 +312,7 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off f := fd.inode().impl.(*regularFile) if end := offset + srclen; end < offset { // Overflow. - return 0, syserror.EFBIG + return 0, syserror.EINVAL } var err error diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD index f29dc0472..7bfa9075a 100644 --- a/pkg/sentry/kernel/pipe/BUILD +++ b/pkg/sentry/kernel/pipe/BUILD @@ -8,6 +8,7 @@ go_library( "device.go", "node.go", "pipe.go", + "pipe_unsafe.go", "pipe_util.go", "reader.go", "reader_writer.go", @@ -20,6 +21,7 @@ go_library( "//pkg/amutex", "//pkg/buffer", "//pkg/context", + "//pkg/safemem", "//pkg/sentry/arch", "//pkg/sentry/device", "//pkg/sentry/fs", diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go index 62c8691f1..79645d7d2 100644 --- a/pkg/sentry/kernel/pipe/pipe.go +++ b/pkg/sentry/kernel/pipe/pipe.go @@ -207,7 +207,10 @@ func (p *Pipe) read(ctx context.Context, ops readOps) (int64, error) { p.mu.Lock() defer p.mu.Unlock() + return p.readLocked(ctx, ops) +} +func (p *Pipe) readLocked(ctx context.Context, ops readOps) (int64, error) { // Is the pipe empty? if p.view.Size() == 0 { if !p.HasWriters() { @@ -246,7 +249,10 @@ type writeOps struct { func (p *Pipe) write(ctx context.Context, ops writeOps) (int64, error) { p.mu.Lock() defer p.mu.Unlock() + return p.writeLocked(ctx, ops) +} +func (p *Pipe) writeLocked(ctx context.Context, ops writeOps) (int64, error) { // Can't write to a pipe with no readers. if !p.HasReaders() { return 0, syscall.EPIPE diff --git a/pkg/sentry/kernel/pipe/pipe_unsafe.go b/pkg/sentry/kernel/pipe/pipe_unsafe.go new file mode 100644 index 000000000..dd60cba24 --- /dev/null +++ b/pkg/sentry/kernel/pipe/pipe_unsafe.go @@ -0,0 +1,35 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pipe + +import ( + "unsafe" +) + +// lockTwoPipes locks both x.mu and y.mu in an order that is guaranteed to be +// consistent for both lockTwoPipes(x, y) and lockTwoPipes(y, x), such that +// concurrent calls cannot deadlock. +// +// Preconditions: x != y. +func lockTwoPipes(x, y *Pipe) { + // Lock the two pipes in order of increasing address. + if uintptr(unsafe.Pointer(x)) < uintptr(unsafe.Pointer(y)) { + x.mu.Lock() + y.mu.Lock() + } else { + y.mu.Lock() + x.mu.Lock() + } +} diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go index b54f08a30..2602bed72 100644 --- a/pkg/sentry/kernel/pipe/vfs.go +++ b/pkg/sentry/kernel/pipe/vfs.go @@ -16,7 +16,9 @@ package pipe import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/buffer" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" @@ -150,7 +152,9 @@ func (vp *VFSPipe) newFD(mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32) * return &fd.vfsfd } -// VFSPipeFD implements vfs.FileDescriptionImpl for pipes. +// VFSPipeFD implements vfs.FileDescriptionImpl for pipes. It also implements +// non-atomic usermem.IO methods, allowing it to be passed as usermem.IO to +// other FileDescriptions for splice(2) and tee(2). type VFSPipeFD struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl @@ -229,3 +233,216 @@ func (fd *VFSPipeFD) PipeSize() int64 { func (fd *VFSPipeFD) SetPipeSize(size int64) (int64, error) { return fd.pipe.SetFifoSize(size) } + +// IOSequence returns a useremm.IOSequence that reads up to count bytes from, +// or writes up to count bytes to, fd. +func (fd *VFSPipeFD) IOSequence(count int64) usermem.IOSequence { + return usermem.IOSequence{ + IO: fd, + Addrs: usermem.AddrRangeSeqOf(usermem.AddrRange{0, usermem.Addr(count)}), + } +} + +// CopyIn implements usermem.IO.CopyIn. +func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr usermem.Addr, dst []byte, opts usermem.IOOpts) (int, error) { + origCount := int64(len(dst)) + n, err := fd.pipe.read(ctx, readOps{ + left: func() int64 { + return int64(len(dst)) + }, + limit: func(l int64) { + dst = dst[:l] + }, + read: func(view *buffer.View) (int64, error) { + n, err := view.ReadAt(dst, 0) + view.TrimFront(int64(n)) + return int64(n), err + }, + }) + if n > 0 { + fd.pipe.Notify(waiter.EventOut) + } + if err == nil && n != origCount { + return int(n), syserror.ErrWouldBlock + } + return int(n), err +} + +// CopyOut implements usermem.IO.CopyOut. +func (fd *VFSPipeFD) CopyOut(ctx context.Context, addr usermem.Addr, src []byte, opts usermem.IOOpts) (int, error) { + origCount := int64(len(src)) + n, err := fd.pipe.write(ctx, writeOps{ + left: func() int64 { + return int64(len(src)) + }, + limit: func(l int64) { + src = src[:l] + }, + write: func(view *buffer.View) (int64, error) { + view.Append(src) + return int64(len(src)), nil + }, + }) + if n > 0 { + fd.pipe.Notify(waiter.EventIn) + } + if err == nil && n != origCount { + return int(n), syserror.ErrWouldBlock + } + return int(n), err +} + +// ZeroOut implements usermem.IO.ZeroOut. +func (fd *VFSPipeFD) ZeroOut(ctx context.Context, addr usermem.Addr, toZero int64, opts usermem.IOOpts) (int64, error) { + origCount := toZero + n, err := fd.pipe.write(ctx, writeOps{ + left: func() int64 { + return toZero + }, + limit: func(l int64) { + toZero = l + }, + write: func(view *buffer.View) (int64, error) { + view.Grow(view.Size()+toZero, true /* zero */) + return toZero, nil + }, + }) + if n > 0 { + fd.pipe.Notify(waiter.EventIn) + } + if err == nil && n != origCount { + return n, syserror.ErrWouldBlock + } + return n, err +} + +// CopyInTo implements usermem.IO.CopyInTo. +func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars usermem.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) { + count := ars.NumBytes() + if count == 0 { + return 0, nil + } + origCount := count + n, err := fd.pipe.read(ctx, readOps{ + left: func() int64 { + return count + }, + limit: func(l int64) { + count = l + }, + read: func(view *buffer.View) (int64, error) { + n, err := view.ReadToSafememWriter(dst, uint64(count)) + view.TrimFront(int64(n)) + return int64(n), err + }, + }) + if n > 0 { + fd.pipe.Notify(waiter.EventOut) + } + if err == nil && n != origCount { + return n, syserror.ErrWouldBlock + } + return n, err +} + +// CopyOutFrom implements usermem.IO.CopyOutFrom. +func (fd *VFSPipeFD) CopyOutFrom(ctx context.Context, ars usermem.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) { + count := ars.NumBytes() + if count == 0 { + return 0, nil + } + origCount := count + n, err := fd.pipe.write(ctx, writeOps{ + left: func() int64 { + return count + }, + limit: func(l int64) { + count = l + }, + write: func(view *buffer.View) (int64, error) { + n, err := view.WriteFromSafememReader(src, uint64(count)) + return int64(n), err + }, + }) + if n > 0 { + fd.pipe.Notify(waiter.EventIn) + } + if err == nil && n != origCount { + return n, syserror.ErrWouldBlock + } + return n, err +} + +// SwapUint32 implements usermem.IO.SwapUint32. +func (fd *VFSPipeFD) SwapUint32(ctx context.Context, addr usermem.Addr, new uint32, opts usermem.IOOpts) (uint32, error) { + // How did a pipe get passed as the virtual address space to futex(2)? + panic("VFSPipeFD.SwapUint32 called unexpectedly") +} + +// CompareAndSwapUint32 implements usermem.IO.CompareAndSwapUint32. +func (fd *VFSPipeFD) CompareAndSwapUint32(ctx context.Context, addr usermem.Addr, old, new uint32, opts usermem.IOOpts) (uint32, error) { + panic("VFSPipeFD.CompareAndSwapUint32 called unexpectedly") +} + +// LoadUint32 implements usermem.IO.LoadUint32. +func (fd *VFSPipeFD) LoadUint32(ctx context.Context, addr usermem.Addr, opts usermem.IOOpts) (uint32, error) { + panic("VFSPipeFD.LoadUint32 called unexpectedly") +} + +// Splice reads up to count bytes from src and writes them to dst. It returns +// the number of bytes moved. +// +// Preconditions: count > 0. +func Splice(ctx context.Context, dst, src *VFSPipeFD, count int64) (int64, error) { + return spliceOrTee(ctx, dst, src, count, true /* removeFromSrc */) +} + +// Tee reads up to count bytes from src and writes them to dst, without +// removing the read bytes from src. It returns the number of bytes copied. +// +// Preconditions: count > 0. +func Tee(ctx context.Context, dst, src *VFSPipeFD, count int64) (int64, error) { + return spliceOrTee(ctx, dst, src, count, false /* removeFromSrc */) +} + +// Preconditions: count > 0. +func spliceOrTee(ctx context.Context, dst, src *VFSPipeFD, count int64, removeFromSrc bool) (int64, error) { + if dst.pipe == src.pipe { + return 0, syserror.EINVAL + } + + lockTwoPipes(dst.pipe, src.pipe) + defer dst.pipe.mu.Unlock() + defer src.pipe.mu.Unlock() + + n, err := dst.pipe.writeLocked(ctx, writeOps{ + left: func() int64 { + return count + }, + limit: func(l int64) { + count = l + }, + write: func(dstView *buffer.View) (int64, error) { + return src.pipe.readLocked(ctx, readOps{ + left: func() int64 { + return count + }, + limit: func(l int64) { + count = l + }, + read: func(srcView *buffer.View) (int64, error) { + n, err := srcView.ReadToSafememWriter(dstView, uint64(count)) + if n > 0 && removeFromSrc { + srcView.TrimFront(int64(n)) + } + return int64(n), err + }, + }) + }, + }) + if n > 0 { + dst.pipe.Notify(waiter.EventIn) + src.pipe.Notify(waiter.EventOut) + } + return n, err +} diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD index f882ef840..d56927ff5 100644 --- a/pkg/sentry/syscalls/linux/vfs2/BUILD +++ b/pkg/sentry/syscalls/linux/vfs2/BUILD @@ -22,6 +22,7 @@ go_library( "setstat.go", "signal.go", "socket.go", + "splice.go", "stat.go", "stat_amd64.go", "stat_arm64.go", diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go new file mode 100644 index 000000000..8f3c22a02 --- /dev/null +++ b/pkg/sentry/syscalls/linux/vfs2/splice.go @@ -0,0 +1,286 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vfs2 + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" +) + +// Splice implements Linux syscall splice(2). +func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + inFD := args[0].Int() + inOffsetPtr := args[1].Pointer() + outFD := args[2].Int() + outOffsetPtr := args[3].Pointer() + count := int64(args[4].SizeT()) + flags := args[5].Int() + + if count == 0 { + return 0, nil, nil + } + if count > int64(kernel.MAX_RW_COUNT) { + count = int64(kernel.MAX_RW_COUNT) + } + + // Check for invalid flags. + if flags&^(linux.SPLICE_F_MOVE|linux.SPLICE_F_NONBLOCK|linux.SPLICE_F_MORE|linux.SPLICE_F_GIFT) != 0 { + return 0, nil, syserror.EINVAL + } + + // Get file descriptions. + inFile := t.GetFileVFS2(inFD) + if inFile == nil { + return 0, nil, syserror.EBADF + } + defer inFile.DecRef() + outFile := t.GetFileVFS2(outFD) + if outFile == nil { + return 0, nil, syserror.EBADF + } + defer outFile.DecRef() + + // Check that both files support the required directionality. + if !inFile.IsReadable() || !outFile.IsWritable() { + return 0, nil, syserror.EBADF + } + + // The operation is non-blocking if anything is non-blocking. + // + // N.B. This is a rather simplistic heuristic that avoids some + // poor edge case behavior since the exact semantics here are + // underspecified and vary between versions of Linux itself. + nonBlock := ((inFile.StatusFlags()|outFile.StatusFlags())&linux.O_NONBLOCK != 0) || (flags&linux.SPLICE_F_NONBLOCK != 0) + + // At least one file description must represent a pipe. + inPipeFD, inIsPipe := inFile.Impl().(*pipe.VFSPipeFD) + outPipeFD, outIsPipe := outFile.Impl().(*pipe.VFSPipeFD) + if !inIsPipe && !outIsPipe { + return 0, nil, syserror.EINVAL + } + + // Copy in offsets. + inOffset := int64(-1) + if inOffsetPtr != 0 { + if inIsPipe { + return 0, nil, syserror.ESPIPE + } + if inFile.Options().DenyPRead { + return 0, nil, syserror.EINVAL + } + if _, err := t.CopyIn(inOffsetPtr, &inOffset); err != nil { + return 0, nil, err + } + if inOffset < 0 { + return 0, nil, syserror.EINVAL + } + } + outOffset := int64(-1) + if outOffsetPtr != 0 { + if outIsPipe { + return 0, nil, syserror.ESPIPE + } + if outFile.Options().DenyPWrite { + return 0, nil, syserror.EINVAL + } + if _, err := t.CopyIn(outOffsetPtr, &outOffset); err != nil { + return 0, nil, err + } + if outOffset < 0 { + return 0, nil, syserror.EINVAL + } + } + + // Move data. + var ( + n int64 + err error + inCh chan struct{} + outCh chan struct{} + ) + for { + // If both input and output are pipes, delegate to the pipe + // implementation. Otherwise, exactly one end is a pipe, which we + // ensure is consistently ordered after the non-pipe FD's locks by + // passing the pipe FD as usermem.IO to the non-pipe end. + switch { + case inIsPipe && outIsPipe: + n, err = pipe.Splice(t, outPipeFD, inPipeFD, count) + case inIsPipe: + if outOffset != -1 { + n, err = outFile.PWrite(t, inPipeFD.IOSequence(count), outOffset, vfs.WriteOptions{}) + outOffset += n + } else { + n, err = outFile.Write(t, inPipeFD.IOSequence(count), vfs.WriteOptions{}) + } + case outIsPipe: + if inOffset != -1 { + n, err = inFile.PRead(t, outPipeFD.IOSequence(count), inOffset, vfs.ReadOptions{}) + inOffset += n + } else { + n, err = inFile.Read(t, outPipeFD.IOSequence(count), vfs.ReadOptions{}) + } + } + if n != 0 || err != syserror.ErrWouldBlock || nonBlock { + break + } + + // Note that the blocking behavior here is a bit different than the + // normal pattern. Because we need to have both data to read and data + // to write simultaneously, we actually explicitly block on both of + // these cases in turn before returning to the splice operation. + if inFile.Readiness(eventMaskRead)&eventMaskRead == 0 { + if inCh == nil { + inCh = make(chan struct{}, 1) + inW, _ := waiter.NewChannelEntry(inCh) + inFile.EventRegister(&inW, eventMaskRead) + defer inFile.EventUnregister(&inW) + continue // Need to refresh readiness. + } + if err = t.Block(inCh); err != nil { + break + } + } + if outFile.Readiness(eventMaskWrite)&eventMaskWrite == 0 { + if outCh == nil { + outCh = make(chan struct{}, 1) + outW, _ := waiter.NewChannelEntry(outCh) + outFile.EventRegister(&outW, eventMaskWrite) + defer outFile.EventUnregister(&outW) + continue // Need to refresh readiness. + } + if err = t.Block(outCh); err != nil { + break + } + } + } + + // Copy updated offsets out. + if inOffsetPtr != 0 { + if _, err := t.CopyOut(inOffsetPtr, &inOffset); err != nil { + return 0, nil, err + } + } + if outOffsetPtr != 0 { + if _, err := t.CopyOut(outOffsetPtr, &outOffset); err != nil { + return 0, nil, err + } + } + + if n == 0 { + return 0, nil, err + } + return uintptr(n), nil, nil +} + +// Tee implements Linux syscall tee(2). +func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + inFD := args[0].Int() + outFD := args[1].Int() + count := int64(args[2].SizeT()) + flags := args[3].Int() + + if count == 0 { + return 0, nil, nil + } + if count > int64(kernel.MAX_RW_COUNT) { + count = int64(kernel.MAX_RW_COUNT) + } + + // Check for invalid flags. + if flags&^(linux.SPLICE_F_MOVE|linux.SPLICE_F_NONBLOCK|linux.SPLICE_F_MORE|linux.SPLICE_F_GIFT) != 0 { + return 0, nil, syserror.EINVAL + } + + // Get file descriptions. + inFile := t.GetFileVFS2(inFD) + if inFile == nil { + return 0, nil, syserror.EBADF + } + defer inFile.DecRef() + outFile := t.GetFileVFS2(outFD) + if outFile == nil { + return 0, nil, syserror.EBADF + } + defer outFile.DecRef() + + // Check that both files support the required directionality. + if !inFile.IsReadable() || !outFile.IsWritable() { + return 0, nil, syserror.EBADF + } + + // The operation is non-blocking if anything is non-blocking. + // + // N.B. This is a rather simplistic heuristic that avoids some + // poor edge case behavior since the exact semantics here are + // underspecified and vary between versions of Linux itself. + nonBlock := ((inFile.StatusFlags()|outFile.StatusFlags())&linux.O_NONBLOCK != 0) || (flags&linux.SPLICE_F_NONBLOCK != 0) + + // Both file descriptions must represent pipes. + inPipeFD, inIsPipe := inFile.Impl().(*pipe.VFSPipeFD) + outPipeFD, outIsPipe := outFile.Impl().(*pipe.VFSPipeFD) + if !inIsPipe || !outIsPipe { + return 0, nil, syserror.EINVAL + } + + // Copy data. + var ( + inCh chan struct{} + outCh chan struct{} + ) + for { + n, err := pipe.Tee(t, outPipeFD, inPipeFD, count) + if n != 0 { + return uintptr(n), nil, nil + } + if err != syserror.ErrWouldBlock || nonBlock { + return 0, nil, err + } + + // Note that the blocking behavior here is a bit different than the + // normal pattern. Because we need to have both data to read and data + // to write simultaneously, we actually explicitly block on both of + // these cases in turn before returning to the tee operation. + if inFile.Readiness(eventMaskRead)&eventMaskRead == 0 { + if inCh == nil { + inCh = make(chan struct{}, 1) + inW, _ := waiter.NewChannelEntry(inCh) + inFile.EventRegister(&inW, eventMaskRead) + defer inFile.EventUnregister(&inW) + continue // Need to refresh readiness. + } + if err := t.Block(inCh); err != nil { + return 0, nil, err + } + } + if outFile.Readiness(eventMaskWrite)&eventMaskWrite == 0 { + if outCh == nil { + outCh = make(chan struct{}, 1) + outW, _ := waiter.NewChannelEntry(outCh) + outFile.EventRegister(&outW, eventMaskWrite) + defer outFile.EventUnregister(&outW) + continue // Need to refresh readiness. + } + if err := t.Block(outCh); err != nil { + return 0, nil, err + } + } + } +} diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go index a332d01bd..083fdcf82 100644 --- a/pkg/sentry/syscalls/linux/vfs2/vfs2.go +++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go @@ -134,8 +134,8 @@ func Override() { s.Table[269] = syscalls.Supported("faccessat", Faccessat) s.Table[270] = syscalls.Supported("pselect", Pselect) s.Table[271] = syscalls.Supported("ppoll", Ppoll) - delete(s.Table, 275) // splice - delete(s.Table, 276) // tee + s.Table[275] = syscalls.Supported("splice", Splice) + s.Table[276] = syscalls.Supported("tee", Tee) s.Table[277] = syscalls.Supported("sync_file_range", SyncFileRange) s.Table[280] = syscalls.Supported("utimensat", Utimensat) s.Table[281] = syscalls.Supported("epoll_pwait", EpollPwait) diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go index cfabd936c..bb294563d 100644 --- a/pkg/sentry/vfs/file_description.go +++ b/pkg/sentry/vfs/file_description.go @@ -210,6 +210,11 @@ func (fd *FileDescription) VirtualDentry() VirtualDentry { return fd.vd } +// Options returns the options passed to fd.Init(). +func (fd *FileDescription) Options() FileDescriptionOptions { + return fd.opts +} + // StatusFlags returns file description status flags, as for fcntl(F_GETFL). func (fd *FileDescription) StatusFlags() uint32 { return atomic.LoadUint32(&fd.statusFlags) diff --git a/test/syscalls/linux/splice.cc b/test/syscalls/linux/splice.cc index f103e2e56..08fc4b1b7 100644 --- a/test/syscalls/linux/splice.cc +++ b/test/syscalls/linux/splice.cc @@ -430,6 +430,55 @@ TEST(SpliceTest, TwoPipes) { EXPECT_EQ(memcmp(rbuf.data(), buf.data(), kPageSize), 0); } +TEST(SpliceTest, TwoPipesCircular) { + // This test deadlocks the sentry on VFS1 because VFS1 splice ordering is + // based on fs.File.UniqueID, which does not prevent circular ordering between + // e.g. inode-level locks taken by fs.FileOperations. + SKIP_IF(IsRunningWithVFS1()); + + // Create two pipes. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor first_rfd(fds[0]); + const FileDescriptor first_wfd(fds[1]); + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor second_rfd(fds[0]); + const FileDescriptor second_wfd(fds[1]); + + // On Linux, each pipe is normally limited to + // include/linux/pipe_fs_i.h:PIPE_DEF_BUFFERS buffers worth of data. + constexpr size_t PIPE_DEF_BUFFERS = 16; + + // Write some data to each pipe. Below we splice 1 byte at a time between + // pipes, which very quickly causes each byte to be stored in a separate + // buffer, so we must ensure that the total amount of data in the system is <= + // PIPE_DEF_BUFFERS bytes. + std::vector buf(PIPE_DEF_BUFFERS / 2); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(write(first_wfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + ASSERT_THAT(write(second_wfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + // Have another thread splice from the second pipe to the first, while we + // splice from the first to the second. The test passes if this does not + // deadlock. + const int kIterations = 1000; + DisableSave ds; + ScopedThread t([&]() { + for (int i = 0; i < kIterations; i++) { + ASSERT_THAT( + splice(second_rfd.get(), nullptr, first_wfd.get(), nullptr, 1, 0), + SyscallSucceedsWithValue(1)); + } + }); + for (int i = 0; i < kIterations; i++) { + ASSERT_THAT( + splice(first_rfd.get(), nullptr, second_wfd.get(), nullptr, 1, 0), + SyscallSucceedsWithValue(1)); + } +} + TEST(SpliceTest, Blocking) { // Create two new pipes. int first[2], second[2]; -- cgit v1.2.3 From 744e8d6e4311f0aef47215a227b0eef582034cfd Mon Sep 17 00:00:00 2001 From: Bin Lu Date: Wed, 27 May 2020 05:21:33 -0400 Subject: minor changes in exec_binary test case for Arm64 Signed-off-by: Bin Lu --- test/syscalls/linux/exec_binary.cc | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/exec_binary.cc b/test/syscalls/linux/exec_binary.cc index 1a9f203b9..18d2f22c1 100644 --- a/test/syscalls/linux/exec_binary.cc +++ b/test/syscalls/linux/exec_binary.cc @@ -438,7 +438,12 @@ TEST(ElfTest, MissingText) { ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), SyscallSucceedsWithValue(child)); // It runs off the end of the zeroes filling the end of the page. +#if defined(__x86_64__) EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSEGV) << status; +#elif defined(__aarch64__) + // 0 is an invalid instruction opcode on arm64. + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGILL) << status; +#endif } // Typical ELF with a data + bss segment -- cgit v1.2.3 From 7b79370c105b28a1cffa2d12d81898fc6b278728 Mon Sep 17 00:00:00 2001 From: Ian Gudger Date: Thu, 28 May 2020 14:42:01 -0700 Subject: Add pcap logging to pcaketimpact. This makes debugging packetimpact tests much easier. PiperOrigin-RevId: 313662654 --- test/packetimpact/runner/packetimpact_test.go | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go index ac13c8543..e58a1fb1b 100644 --- a/test/packetimpact/runner/packetimpact_test.go +++ b/test/packetimpact/runner/packetimpact_test.go @@ -18,9 +18,12 @@ package packetimpact_test import ( "flag" "fmt" + "io/ioutil" "log" "math/rand" "net" + "os" + "os/exec" "path" "strings" "testing" @@ -117,10 +120,18 @@ func TestOne(t *testing.T) { }(dn) } + tmpDir, err := ioutil.TempDir("", "container-output") + if err != nil { + t.Fatal("creating temp dir:", err) + } + defer os.RemoveAll(tmpDir) + + const testOutputDir = "/tmp/testoutput" + runOpts := dockerutil.RunOpts{ Image: "packetimpact", CapAdd: []string{"NET_ADMIN"}, - Extra: []string{"--sysctl", "net.ipv6.conf.all.disable_ipv6=0", "--rm"}, + Extra: []string{"--sysctl", "net.ipv6.conf.all.disable_ipv6=0", "--rm", "-v", tmpDir + ":" + testOutputDir}, Foreground: true, } @@ -187,7 +198,10 @@ func TestOne(t *testing.T) { // Run tcpdump in the test bench unbuffered, without DNS resolution, just on // the interface with the test packets. snifferArgs := []string{ - "tcpdump", "-S", "-vvv", "-U", "-n", "-i", testNetDev, + "tcpdump", + "-S", "-vvv", "-U", "-n", + "-i", testNetDev, + "-w", testOutputDir + "/dump.pcap", } snifferRegex := "tcpdump: listening.*\n" if *tshark { @@ -201,6 +215,12 @@ func TestOne(t *testing.T) { snifferRegex = "Capturing on.*\n" } + defer func() { + if err := exec.Command("/bin/cp", "-r", tmpDir, os.Getenv("TEST_UNDECLARED_OUTPUTS_DIR")).Run(); err != nil { + t.Error("unable to copy container output files:", err) + } + }() + if err := testbench.Create(runOpts, snifferArgs...); err != nil { t.Fatalf("unable to create container %s: %s", testbench.Name, err) } -- cgit v1.2.3 From f7418e21590e271302a3c375323950c209ce5ced Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Thu, 28 May 2020 14:45:52 -0700 Subject: Move Cleanup to its own package PiperOrigin-RevId: 313663382 --- pkg/cleanup/BUILD | 17 +++++++++ pkg/cleanup/cleanup.go | 60 ++++++++++++++++++++++++++++++ pkg/cleanup/cleanup_test.go | 66 +++++++++++++++++++++++++++++++++ runsc/cgroup/BUILD | 2 +- runsc/cgroup/cgroup.go | 4 +- runsc/container/BUILD | 2 + runsc/container/container.go | 7 ++-- runsc/container/multi_container_test.go | 26 ++++--------- runsc/fsgofer/BUILD | 2 +- runsc/fsgofer/fsgofer.go | 8 ++-- runsc/sandbox/BUILD | 1 + runsc/sandbox/sandbox.go | 3 +- runsc/specutils/specutils.go | 30 --------------- test/root/BUILD | 1 + test/root/crictl_test.go | 27 +++++--------- test/root/oom_score_adj_test.go | 30 ++++----------- 16 files changed, 186 insertions(+), 100 deletions(-) create mode 100644 pkg/cleanup/BUILD create mode 100644 pkg/cleanup/cleanup.go create mode 100644 pkg/cleanup/cleanup_test.go (limited to 'test') diff --git a/pkg/cleanup/BUILD b/pkg/cleanup/BUILD new file mode 100644 index 000000000..5c34b9872 --- /dev/null +++ b/pkg/cleanup/BUILD @@ -0,0 +1,17 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "cleanup", + srcs = ["cleanup.go"], + visibility = ["//:sandbox"], + deps = [ + ], +) + +go_test( + name = "cleanup_test", + srcs = ["cleanup_test.go"], + library = ":cleanup", +) diff --git a/pkg/cleanup/cleanup.go b/pkg/cleanup/cleanup.go new file mode 100644 index 000000000..14a05f076 --- /dev/null +++ b/pkg/cleanup/cleanup.go @@ -0,0 +1,60 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package cleanup provides utilities to clean "stuff" on defers. +package cleanup + +// Cleanup allows defers to be aborted when cleanup needs to happen +// conditionally. Usage: +// cu := cleanup.Make(func() { f.Close() }) +// defer cu.Clean() // failure before release is called will close the file. +// ... +// cu.Add(func() { f2.Close() }) // Adds another cleanup function +// ... +// cu.Release() // on success, aborts closing the file. +// return f +type Cleanup struct { + cleaners []func() +} + +// Make creates a new Cleanup object. +func Make(f func()) Cleanup { + return Cleanup{cleaners: []func(){f}} +} + +// Add adds a new function to be called on Clean(). +func (c *Cleanup) Add(f func()) { + c.cleaners = append(c.cleaners, f) +} + +// Clean calls all cleanup functions in reverse order. +func (c *Cleanup) Clean() { + clean(c.cleaners) + c.cleaners = nil +} + +// Release releases the cleanup from its duties, i.e. cleanup functions are not +// called after this point. Returns a function that calls all registered +// functions in case the caller has use for them. +func (c *Cleanup) Release() func() { + old := c.cleaners + c.cleaners = nil + return func() { clean(old) } +} + +func clean(cleaners []func()) { + for i := len(cleaners) - 1; i >= 0; i-- { + cleaners[i]() + } +} diff --git a/pkg/cleanup/cleanup_test.go b/pkg/cleanup/cleanup_test.go new file mode 100644 index 000000000..ab3d9ed95 --- /dev/null +++ b/pkg/cleanup/cleanup_test.go @@ -0,0 +1,66 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cleanup + +import "testing" + +func testCleanupHelper(clean, cleanAdd *bool, release bool) func() { + cu := Make(func() { + *clean = true + }) + cu.Add(func() { + *cleanAdd = true + }) + defer cu.Clean() + if release { + return cu.Release() + } + return nil +} + +func TestCleanup(t *testing.T) { + clean := false + cleanAdd := false + testCleanupHelper(&clean, &cleanAdd, false) + if !clean { + t.Fatalf("cleanup function was not called.") + } + if !cleanAdd { + t.Fatalf("added cleanup function was not called.") + } +} + +func TestRelease(t *testing.T) { + clean := false + cleanAdd := false + cleaner := testCleanupHelper(&clean, &cleanAdd, true) + + // Check that clean was not called after release. + if clean { + t.Fatalf("cleanup function was called.") + } + if cleanAdd { + t.Fatalf("added cleanup function was called.") + } + + // Call the cleaner function and check that both cleanup functions are called. + cleaner() + if !clean { + t.Fatalf("cleanup function was not called.") + } + if !cleanAdd { + t.Fatalf("added cleanup function was not called.") + } +} diff --git a/runsc/cgroup/BUILD b/runsc/cgroup/BUILD index d4c7bdfbb..c087e1a3c 100644 --- a/runsc/cgroup/BUILD +++ b/runsc/cgroup/BUILD @@ -7,8 +7,8 @@ go_library( srcs = ["cgroup.go"], visibility = ["//:sandbox"], deps = [ + "//pkg/cleanup", "//pkg/log", - "//runsc/specutils", "@com_github_cenkalti_backoff//:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", ], diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go index 19c8b0db6..ef01820ef 100644 --- a/runsc/cgroup/cgroup.go +++ b/runsc/cgroup/cgroup.go @@ -31,8 +31,8 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/runsc/specutils" ) const ( @@ -246,7 +246,7 @@ func (c *Cgroup) Install(res *specs.LinuxResources) error { // The Cleanup object cleans up partially created cgroups when an error occurs. // Errors occuring during cleanup itself are ignored. - clean := specutils.MakeCleanup(func() { _ = c.Uninstall() }) + clean := cleanup.Make(func() { _ = c.Uninstall() }) defer clean.Clean() for key, cfg := range controllers { diff --git a/runsc/container/BUILD b/runsc/container/BUILD index 46154df60..9a856d65c 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -16,6 +16,7 @@ go_library( ], deps = [ "//pkg/abi/linux", + "//pkg/cleanup", "//pkg/log", "//pkg/sentry/control", "//pkg/sentry/sighandling", @@ -53,6 +54,7 @@ go_test( deps = [ "//pkg/abi/linux", "//pkg/bits", + "//pkg/cleanup", "//pkg/log", "//pkg/sentry/control", "//pkg/sentry/kernel", diff --git a/runsc/container/container.go b/runsc/container/container.go index 8539f252d..6d297d0df 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -31,6 +31,7 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/sighandling" @@ -293,7 +294,7 @@ func New(conf *boot.Config, args Args) (*Container, error) { } // The Cleanup object cleans up partially created containers when an error // occurs. Any errors occurring during cleanup itself are ignored. - cu := specutils.MakeCleanup(func() { _ = c.Destroy() }) + cu := cleanup.Make(func() { _ = c.Destroy() }) defer cu.Clean() // Lock the container metadata file to prevent concurrent creations of @@ -402,7 +403,7 @@ func (c *Container) Start(conf *boot.Config) error { if err := c.Saver.lock(); err != nil { return err } - unlock := specutils.MakeCleanup(func() { c.Saver.unlock() }) + unlock := cleanup.Make(func() { c.Saver.unlock() }) defer unlock.Clean() if err := c.requireStatus("start", Created); err != nil { @@ -506,7 +507,7 @@ func Run(conf *boot.Config, args Args) (syscall.WaitStatus, error) { } // Clean up partially created container if an error occurs. // Any errors returned by Destroy() itself are ignored. - cu := specutils.MakeCleanup(func() { + cu := cleanup.Make(func() { c.Destroy() }) defer cu.Clean() diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index e3704b453..dc825abd9 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -27,6 +27,7 @@ import ( "time" specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sync" @@ -64,29 +65,16 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C panic("conf.RootDir not set. Call testutil.SetupRootDir() to set.") } - var ( - containers []*Container - cleanups []func() - ) - cleanups = append(cleanups, func() { - for _, c := range containers { - c.Destroy() - } - }) - cleanupAll := func() { - for _, c := range cleanups { - c() - } - } - localClean := specutils.MakeCleanup(cleanupAll) - defer localClean.Clean() + cu := cleanup.Cleanup{} + defer cu.Clean() + var containers []*Container for i, spec := range specs { bundleDir, cleanup, err := testutil.SetupBundleDir(spec) if err != nil { return nil, nil, fmt.Errorf("error setting up container: %v", err) } - cleanups = append(cleanups, cleanup) + cu.Add(cleanup) args := Args{ ID: ids[i], @@ -97,6 +85,7 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C if err != nil { return nil, nil, fmt.Errorf("error creating container: %v", err) } + cu.Add(func() { cont.Destroy() }) containers = append(containers, cont) if err := cont.Start(conf); err != nil { @@ -104,8 +93,7 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C } } - localClean.Release() - return containers, cleanupAll, nil + return containers, cu.Release(), nil } type execDesc struct { diff --git a/runsc/fsgofer/BUILD b/runsc/fsgofer/BUILD index 64a406ae2..1036b0630 100644 --- a/runsc/fsgofer/BUILD +++ b/runsc/fsgofer/BUILD @@ -13,12 +13,12 @@ go_library( visibility = ["//runsc:__subpackages__"], deps = [ "//pkg/abi/linux", + "//pkg/cleanup", "//pkg/fd", "//pkg/log", "//pkg/p9", "//pkg/sync", "//pkg/syserr", - "//runsc/specutils", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index 1942f50d7..edc239013 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -33,11 +33,11 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/runsc/specutils" ) const ( @@ -439,7 +439,7 @@ func (l *localFile) Create(name string, mode p9.OpenFlags, perm p9.FileMode, uid if err != nil { return nil, nil, p9.QID{}, 0, extractErrno(err) } - cu := specutils.MakeCleanup(func() { + cu := cleanup.Make(func() { child.Close() // Best effort attempt to remove the file in case of failure. if err := syscall.Unlinkat(l.file.FD(), name); err != nil { @@ -480,7 +480,7 @@ func (l *localFile) Mkdir(name string, perm p9.FileMode, uid p9.UID, gid p9.GID) if err := syscall.Mkdirat(l.file.FD(), name, uint32(perm.Permissions())); err != nil { return p9.QID{}, extractErrno(err) } - cu := specutils.MakeCleanup(func() { + cu := cleanup.Make(func() { // Best effort attempt to remove the dir in case of failure. if err := unix.Unlinkat(l.file.FD(), name, unix.AT_REMOVEDIR); err != nil { log.Warningf("error unlinking dir %q after failure: %v", path.Join(l.hostPath, name), err) @@ -864,7 +864,7 @@ func (l *localFile) Symlink(target, newName string, uid p9.UID, gid p9.GID) (p9. if err := unix.Symlinkat(target, l.file.FD(), newName); err != nil { return p9.QID{}, extractErrno(err) } - cu := specutils.MakeCleanup(func() { + cu := cleanup.Make(func() { // Best effort attempt to remove the symlink in case of failure. if err := syscall.Unlinkat(l.file.FD(), newName); err != nil { log.Warningf("error unlinking file %q after failure: %v", path.Join(l.hostPath, newName), err) diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD index c95d50294..035dcd3e3 100644 --- a/runsc/sandbox/BUILD +++ b/runsc/sandbox/BUILD @@ -13,6 +13,7 @@ go_library( "//runsc:__subpackages__", ], deps = [ + "//pkg/cleanup", "//pkg/control/client", "//pkg/control/server", "//pkg/log", diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index e4ec16e2f..6e1a2af25 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -30,6 +30,7 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/syndtr/gocapability/capability" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/control/client" "gvisor.dev/gvisor/pkg/control/server" "gvisor.dev/gvisor/pkg/log" @@ -119,7 +120,7 @@ func New(conf *boot.Config, args *Args) (*Sandbox, error) { s := &Sandbox{ID: args.ID, Cgroup: args.Cgroup} // The Cleanup object cleans up partially created sandboxes when an error // occurs. Any errors occurring during cleanup itself are ignored. - c := specutils.MakeCleanup(func() { + c := cleanup.Make(func() { err := s.destroy() log.Warningf("error destroying sandbox: %v", err) }) diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 837d5e238..f1fa573c5 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -444,36 +444,6 @@ func ContainsStr(strs []string, str string) bool { return false } -// Cleanup allows defers to be aborted when cleanup needs to happen -// conditionally. Usage: -// c := MakeCleanup(func() { f.Close() }) -// defer c.Clean() // any failure before release is called will close the file. -// ... -// c.Release() // on success, aborts closing the file and return it. -// return f -type Cleanup struct { - clean func() -} - -// MakeCleanup creates a new Cleanup object. -func MakeCleanup(f func()) Cleanup { - return Cleanup{clean: f} -} - -// Clean calls the cleanup function. -func (c *Cleanup) Clean() { - if c.clean != nil { - c.clean() - c.clean = nil - } -} - -// Release releases the cleanup from its duties, i.e. cleanup function is not -// called after this point. -func (c *Cleanup) Release() { - c.clean = nil -} - // RetryEintr retries the function until an error different than EINTR is // returned. func RetryEintr(f func() (uintptr, uintptr, error)) (uintptr, uintptr, error) { diff --git a/test/root/BUILD b/test/root/BUILD index 639e293e3..a9e91ccd6 100644 --- a/test/root/BUILD +++ b/test/root/BUILD @@ -33,6 +33,7 @@ go_test( ], visibility = ["//:sandbox"], deps = [ + "//pkg/cleanup", "//pkg/test/criutil", "//pkg/test/dockerutil", "//pkg/test/testutil", diff --git a/test/root/crictl_test.go b/test/root/crictl_test.go index 85007dcce..c138e02dc 100644 --- a/test/root/crictl_test.go +++ b/test/root/crictl_test.go @@ -30,10 +30,10 @@ import ( "testing" "time" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/test/criutil" "gvisor.dev/gvisor/pkg/test/dockerutil" "gvisor.dev/gvisor/pkg/test/testutil" - "gvisor.dev/gvisor/runsc/specutils" ) // Tests for crictl have to be run as root (rather than in a user namespace) @@ -272,27 +272,20 @@ disabled_plugins = ["restart"] // * Runs containerd and waits for it to reach a "ready" state for testing. // * Returns a cleanup function that should be called at the end of the test. func setup(t *testing.T) (*criutil.Crictl, func(), error) { - var cleanups []func() - cleanupFunc := func() { - for i := len(cleanups) - 1; i >= 0; i-- { - cleanups[i]() - } - } - cleanup := specutils.MakeCleanup(cleanupFunc) - defer cleanup.Clean() - // Create temporary containerd root and state directories, and a socket // via which crictl and containerd communicate. containerdRoot, err := ioutil.TempDir(testutil.TmpDir(), "containerd-root") if err != nil { t.Fatalf("failed to create containerd root: %v", err) } - cleanups = append(cleanups, func() { os.RemoveAll(containerdRoot) }) + cu := cleanup.Make(func() { os.RemoveAll(containerdRoot) }) + defer cu.Clean() + containerdState, err := ioutil.TempDir(testutil.TmpDir(), "containerd-state") if err != nil { t.Fatalf("failed to create containerd state: %v", err) } - cleanups = append(cleanups, func() { os.RemoveAll(containerdState) }) + cu.Add(func() { os.RemoveAll(containerdState) }) sockAddr := filepath.Join(testutil.TmpDir(), "containerd-test.sock") // We rewrite a configuration. This is based on the current docker @@ -305,7 +298,7 @@ func setup(t *testing.T) (*criutil.Crictl, func(), error) { if err != nil { t.Fatalf("failed to write containerd config") } - cleanups = append(cleanups, configCleanup) + cu.Add(configCleanup) // Start containerd. cmd := exec.Command(getContainerd(), @@ -321,7 +314,8 @@ func setup(t *testing.T) (*criutil.Crictl, func(), error) { stdout := &bytes.Buffer{} cmd.Stderr = io.MultiWriter(startupW, stderr) cmd.Stdout = io.MultiWriter(startupW, stdout) - cleanups = append(cleanups, func() { + cu.Add(func() { + // Log output in case of failure. t.Logf("containerd stdout: %s", stdout.String()) t.Logf("containerd stderr: %s", stderr.String()) }) @@ -338,15 +332,14 @@ func setup(t *testing.T) (*criutil.Crictl, func(), error) { // Kill must be the last cleanup (as it will be executed first). cc := criutil.NewCrictl(t, sockAddr) - cleanups = append(cleanups, func() { + cu.Add(func() { cc.CleanUp() // Remove tmp files, etc. if err := testutil.KillCommand(cmd); err != nil { log.Printf("error killing containerd: %v", err) } }) - cleanup.Release() - return cc, cleanupFunc, nil + return cc, cu.Release(), nil } // httpGet GETs the contents of a file served from a pod on port 80. diff --git a/test/root/oom_score_adj_test.go b/test/root/oom_score_adj_test.go index 9a3cecd97..4243eb59e 100644 --- a/test/root/oom_score_adj_test.go +++ b/test/root/oom_score_adj_test.go @@ -20,6 +20,7 @@ import ( "testing" specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/runsc/container" "gvisor.dev/gvisor/runsc/specutils" @@ -324,40 +325,26 @@ func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) { } func startContainers(t *testing.T, specs []*specs.Spec, ids []string) ([]*container.Container, func(), error) { - var ( - containers []*container.Container - cleanups []func() - ) - cleanups = append(cleanups, func() { - for _, c := range containers { - c.Destroy() - } - }) - cleanupAll := func() { - for _, c := range cleanups { - c() - } - } - localClean := specutils.MakeCleanup(cleanupAll) - defer localClean.Clean() + var containers []*container.Container // All containers must share the same root. - rootDir, cleanup, err := testutil.SetupRootDir() + rootDir, clean, err := testutil.SetupRootDir() if err != nil { t.Fatalf("error creating root dir: %v", err) } - cleanups = append(cleanups, cleanup) + cu := cleanup.Make(clean) + defer cu.Clean() // Point this to from the configuration. conf := testutil.TestConfig(t) conf.RootDir = rootDir for i, spec := range specs { - bundleDir, cleanup, err := testutil.SetupBundleDir(spec) + bundleDir, clean, err := testutil.SetupBundleDir(spec) if err != nil { return nil, nil, fmt.Errorf("error setting up bundle: %v", err) } - cleanups = append(cleanups, cleanup) + cu.Add(clean) args := container.Args{ ID: ids[i], @@ -375,6 +362,5 @@ func startContainers(t *testing.T, specs []*specs.Spec, ids []string) ([]*contai } } - localClean.Release() - return containers, cleanupAll, nil + return containers, cu.Release(), nil } -- cgit v1.2.3 From c55b84e16aeb4481106661e3877c50edbf281762 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Thu, 28 May 2020 16:44:15 -0700 Subject: Enable iptables source filtering (-s/--source) --- pkg/sentry/socket/netfilter/netfilter.go | 21 ++++++++--- pkg/tcpip/stack/iptables.go | 47 +----------------------- pkg/tcpip/stack/iptables_types.go | 62 ++++++++++++++++++++++++++++++++ test/iptables/filter_input.go | 60 +++++++++++++++++++++++++++++++ test/iptables/iptables_test.go | 8 +++++ 5 files changed, 147 insertions(+), 51 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index 789bb94c8..47ff48c00 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -64,6 +64,8 @@ const enableLogging = false var emptyFilter = stack.IPHeaderFilter{ Dst: "\x00\x00\x00\x00", DstMask: "\x00\x00\x00\x00", + Src: "\x00\x00\x00\x00", + SrcMask: "\x00\x00\x00\x00", } // nflog logs messages related to the writing and reading of iptables. @@ -214,11 +216,16 @@ func convertNetstackToBinary(tablename string, table stack.Table) (linux.KernelI } copy(entry.IPTEntry.IP.Dst[:], rule.Filter.Dst) copy(entry.IPTEntry.IP.DstMask[:], rule.Filter.DstMask) + copy(entry.IPTEntry.IP.Src[:], rule.Filter.Src) + copy(entry.IPTEntry.IP.SrcMask[:], rule.Filter.SrcMask) copy(entry.IPTEntry.IP.OutputInterface[:], rule.Filter.OutputInterface) copy(entry.IPTEntry.IP.OutputInterfaceMask[:], rule.Filter.OutputInterfaceMask) if rule.Filter.DstInvert { entry.IPTEntry.IP.InverseFlags |= linux.IPT_INV_DSTIP } + if rule.Filter.SrcInvert { + entry.IPTEntry.IP.InverseFlags |= linux.IPT_INV_SRCIP + } if rule.Filter.OutputInterfaceInvert { entry.IPTEntry.IP.InverseFlags |= linux.IPT_INV_VIA_OUT } @@ -737,6 +744,9 @@ func filterFromIPTIP(iptip linux.IPTIP) (stack.IPHeaderFilter, error) { if len(iptip.Dst) != header.IPv4AddressSize || len(iptip.DstMask) != header.IPv4AddressSize { return stack.IPHeaderFilter{}, fmt.Errorf("incorrect length of destination (%d) and/or destination mask (%d) fields", len(iptip.Dst), len(iptip.DstMask)) } + if len(iptip.Src) != header.IPv4AddressSize || len(iptip.SrcMask) != header.IPv4AddressSize { + return stack.IPHeaderFilter{}, fmt.Errorf("incorrect length of source (%d) and/or source mask (%d) fields", len(iptip.Src), len(iptip.SrcMask)) + } n := bytes.IndexByte([]byte(iptip.OutputInterface[:]), 0) if n == -1 { @@ -755,6 +765,9 @@ func filterFromIPTIP(iptip linux.IPTIP) (stack.IPHeaderFilter, error) { Dst: tcpip.Address(iptip.Dst[:]), DstMask: tcpip.Address(iptip.DstMask[:]), DstInvert: iptip.InverseFlags&linux.IPT_INV_DSTIP != 0, + Src: tcpip.Address(iptip.Src[:]), + SrcMask: tcpip.Address(iptip.SrcMask[:]), + SrcInvert: iptip.InverseFlags&linux.IPT_INV_SRCIP != 0, OutputInterface: ifname, OutputInterfaceMask: ifnameMask, OutputInterfaceInvert: iptip.InverseFlags&linux.IPT_INV_VIA_OUT != 0, @@ -765,15 +778,13 @@ func containsUnsupportedFields(iptip linux.IPTIP) bool { // The following features are supported: // - Protocol // - Dst and DstMask + // - Src and SrcMask // - The inverse destination IP check flag // - OutputInterface, OutputInterfaceMask and its inverse. - var emptyInetAddr = linux.InetAddr{} var emptyInterface = [linux.IFNAMSIZ]byte{} // Disable any supported inverse flags. - inverseMask := uint8(linux.IPT_INV_DSTIP) | uint8(linux.IPT_INV_VIA_OUT) - return iptip.Src != emptyInetAddr || - iptip.SrcMask != emptyInetAddr || - iptip.InputInterface != emptyInterface || + inverseMask := uint8(linux.IPT_INV_DSTIP) | uint8(linux.IPT_INV_SRCIP) | uint8(linux.IPT_INV_VIA_OUT) + return iptip.InputInterface != emptyInterface || iptip.InputInterfaceMask != emptyInterface || iptip.Flags != 0 || iptip.InverseFlags&^inverseMask != 0 diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go index 443423b3c..709ede3fa 100644 --- a/pkg/tcpip/stack/iptables.go +++ b/pkg/tcpip/stack/iptables.go @@ -16,7 +16,6 @@ package stack import ( "fmt" - "strings" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" @@ -314,7 +313,7 @@ func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx } // Check whether the packet matches the IP header filter. - if !filterMatch(rule.Filter, header.IPv4(pkt.NetworkHeader), hook, nicName) { + if !rule.Filter.match(header.IPv4(pkt.NetworkHeader), hook, nicName) { // Continue on to the next rule. return RuleJump, ruleIdx + 1 } @@ -335,47 +334,3 @@ func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx // All the matchers matched, so run the target. return rule.Target.Action(pkt, &it.connections, hook, gso, r, address) } - -func filterMatch(filter IPHeaderFilter, hdr header.IPv4, hook Hook, nicName string) bool { - // TODO(gvisor.dev/issue/170): Support other fields of the filter. - // Check the transport protocol. - if filter.Protocol != 0 && filter.Protocol != hdr.TransportProtocol() { - return false - } - - // Check the destination IP. - dest := hdr.DestinationAddress() - matches := true - for i := range filter.Dst { - if dest[i]&filter.DstMask[i] != filter.Dst[i] { - matches = false - break - } - } - if matches == filter.DstInvert { - return false - } - - // Check the output interface. - // TODO(gvisor.dev/issue/170): Add the check for FORWARD and POSTROUTING - // hooks after supported. - if hook == Output { - n := len(filter.OutputInterface) - if n == 0 { - return true - } - - // If the interface name ends with '+', any interface which begins - // with the name should be matched. - ifName := filter.OutputInterface - matches = true - if strings.HasSuffix(ifName, "+") { - matches = strings.HasPrefix(nicName, ifName[:n-1]) - } else { - matches = nicName == ifName - } - return filter.OutputInterfaceInvert != matches - } - - return true -} diff --git a/pkg/tcpip/stack/iptables_types.go b/pkg/tcpip/stack/iptables_types.go index fe06007ae..a3bd3e700 100644 --- a/pkg/tcpip/stack/iptables_types.go +++ b/pkg/tcpip/stack/iptables_types.go @@ -15,7 +15,10 @@ package stack import ( + "strings" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" ) // A Hook specifies one of the hooks built into the network stack. @@ -159,6 +162,16 @@ type IPHeaderFilter struct { // comparison. DstInvert bool + // Src matches the source IP address. + Src tcpip.Address + + // SrcMask masks bits of the source IP address when comparing with Src. + SrcMask tcpip.Address + + // SrcInvert inverts the meaning of the source IP check, i.e. when true the + // filter will match packets that fail the source comparison. + SrcInvert bool + // OutputInterface matches the name of the outgoing interface for the // packet. OutputInterface string @@ -173,6 +186,55 @@ type IPHeaderFilter struct { OutputInterfaceInvert bool } +// match returns whether hdr matches the filter. +func (fl IPHeaderFilter) match(hdr header.IPv4, hook Hook, nicName string) bool { + // TODO(gvisor.dev/issue/170): Support other fields of the filter. + // Check the transport protocol. + if fl.Protocol != 0 && fl.Protocol != hdr.TransportProtocol() { + return false + } + + // Check the source and destination IPs. + if !filterAddress(hdr.DestinationAddress(), fl.DstMask, fl.Dst, fl.DstInvert) || !filterAddress(hdr.SourceAddress(), fl.SrcMask, fl.Src, fl.SrcInvert) { + return false + } + + // Check the output interface. + // TODO(gvisor.dev/issue/170): Add the check for FORWARD and POSTROUTING + // hooks after supported. + if hook == Output { + n := len(fl.OutputInterface) + if n == 0 { + return true + } + + // If the interface name ends with '+', any interface which begins + // with the name should be matched. + ifName := fl.OutputInterface + matches := true + if strings.HasSuffix(ifName, "+") { + matches = strings.HasPrefix(nicName, ifName[:n-1]) + } else { + matches = nicName == ifName + } + return fl.OutputInterfaceInvert != matches + } + + return true +} + +// filterAddress returns whether addr matches the filter. +func filterAddress(addr, mask, filterAddr tcpip.Address, invert bool) bool { + matches := true + for i := range filterAddr { + if addr[i]&mask[i] != filterAddr[i] { + matches = false + break + } + } + return matches != invert +} + // A Matcher is the interface for matching packets. type Matcher interface { // Name returns the name of the Matcher. diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index 41e0cfa8d..14e385f5a 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -49,6 +49,8 @@ func init() { RegisterTestCase(FilterInputJumpTwice{}) RegisterTestCase(FilterInputDestination{}) RegisterTestCase(FilterInputInvertDestination{}) + RegisterTestCase(FilterInputSource{}) + RegisterTestCase(FilterInputInvertSource{}) } // FilterInputDropUDP tests that we can drop UDP traffic. @@ -667,3 +669,61 @@ func (FilterInputInvertDestination) ContainerAction(ip net.IP) error { func (FilterInputInvertDestination) LocalAction(ip net.IP) error { return sendUDPLoop(ip, acceptPort, sendloopDuration) } + +// FilterInputSource verifies that we can filter packets via `-d +// `. +type FilterInputSource struct{} + +// Name implements TestCase.Name. +func (FilterInputSource) Name() string { + return "FilterInputSource" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterInputSource) ContainerAction(ip net.IP) error { + // Make INPUT's default action DROP, then ACCEPT all packets from this + // machine. + rules := [][]string{ + {"-P", "INPUT", "DROP"}, + {"-A", "INPUT", "-s", fmt.Sprintf("%v", ip), "-j", "ACCEPT"}, + } + if err := filterTableRules(rules); err != nil { + return err + } + + return listenUDP(acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (FilterInputSource) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} + +// FilterInputInvertSource verifies that we can filter packets via `! -d +// `. +type FilterInputInvertSource struct{} + +// Name implements TestCase.Name. +func (FilterInputInvertSource) Name() string { + return "FilterInputInvertSource" +} + +// ContainerAction implements TestCase.ContainerAction. +func (FilterInputInvertSource) ContainerAction(ip net.IP) error { + // Make INPUT's default action DROP, then ACCEPT all packets not bound + // for 127.0.0.1. + rules := [][]string{ + {"-P", "INPUT", "DROP"}, + {"-A", "INPUT", "!", "-s", localIP, "-j", "ACCEPT"}, + } + if err := filterTableRules(rules); err != nil { + return err + } + + return listenUDP(acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (FilterInputInvertSource) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 4fd2cb46a..172ad9e16 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -302,3 +302,11 @@ func TestNATPreRedirectInvert(t *testing.T) { func TestNATRedirectRequiresProtocol(t *testing.T) { singleTest(t, NATRedirectRequiresProtocol{}) } + +func TestInputSource(t *testing.T) { + singleTest(t, FilterInputSource{}) +} + +func TestInputInvertSource(t *testing.T) { + singleTest(t, FilterInputInvertSource{}) +} -- cgit v1.2.3 From fe464f44b7d3696bafd9a2faf3750e1dc4d56d80 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Fri, 29 May 2020 08:07:44 -0700 Subject: Port inotify to vfs2, with support in tmpfs. Support in other filesystem impls is still needed. Unlike in Linux and vfs1, we need to plumb inotify down to each filesystem implementation in order to keep track of links/inode structures properly. IN_EXCL_UNLINK still needs to be implemented, as well as a few inotify hooks that are not present in either vfs1 or vfs2. Those will be addressed in subsequent changes. Updates #1479. PiperOrigin-RevId: 313781995 --- pkg/sentry/fsimpl/ext/dentry.go | 12 + pkg/sentry/fsimpl/gofer/gofer.go | 12 + pkg/sentry/fsimpl/kernfs/kernfs.go | 12 + pkg/sentry/fsimpl/tmpfs/BUILD | 1 + pkg/sentry/fsimpl/tmpfs/directory.go | 1 + pkg/sentry/fsimpl/tmpfs/filesystem.go | 40 +- pkg/sentry/fsimpl/tmpfs/tmpfs.go | 60 ++- pkg/sentry/kernel/fd_table.go | 8 +- pkg/sentry/syscalls/linux/vfs2/BUILD | 1 + pkg/sentry/syscalls/linux/vfs2/inotify.go | 134 ++++++ pkg/sentry/syscalls/linux/vfs2/read_write.go | 36 ++ pkg/sentry/syscalls/linux/vfs2/vfs2.go | 8 +- pkg/sentry/vfs/BUILD | 15 + pkg/sentry/vfs/anonfs.go | 12 + pkg/sentry/vfs/dentry.go | 27 ++ pkg/sentry/vfs/inotify.go | 675 +++++++++++++++++++++++++++ pkg/sentry/vfs/vfs.go | 1 + test/syscalls/linux/BUILD | 5 +- test/syscalls/linux/inotify.cc | 176 ++++++- 19 files changed, 1211 insertions(+), 25 deletions(-) create mode 100644 pkg/sentry/syscalls/linux/vfs2/inotify.go create mode 100644 pkg/sentry/vfs/inotify.go (limited to 'test') diff --git a/pkg/sentry/fsimpl/ext/dentry.go b/pkg/sentry/fsimpl/ext/dentry.go index bfbd7c3d4..4d0deaf03 100644 --- a/pkg/sentry/fsimpl/ext/dentry.go +++ b/pkg/sentry/fsimpl/ext/dentry.go @@ -60,3 +60,15 @@ func (d *dentry) DecRef() { // inode.decRef(). d.inode.decRef() } + +// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. +// +// TODO(gvisor.dev/issue/1479): Implement inotify. +func (d *dentry) InotifyWithParent(events uint32, cookie uint32) {} + +// Watches implements vfs.DentryImpl.Watches. +// +// TODO(gvisor.dev/issue/1479): Implement inotify. +func (d *dentry) Watches() *vfs.Watches { + return nil +} diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 131da332f..850482a19 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -1039,6 +1039,18 @@ func (d *dentry) decRefLocked() { } } +// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. +// +// TODO(gvisor.dev/issue/1479): Implement inotify. +func (d *dentry) InotifyWithParent(events uint32, cookie uint32) {} + +// Watches implements vfs.DentryImpl.Watches. +// +// TODO(gvisor.dev/issue/1479): Implement inotify. +func (d *dentry) Watches() *vfs.Watches { + return nil +} + // checkCachingLocked should be called after d's reference count becomes 0 or it // becomes disowned. // diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go index a83151ad3..682545994 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs.go @@ -225,6 +225,18 @@ func (d *Dentry) destroy() { } } +// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. +// +// TODO(gvisor.dev/issue/1479): Implement inotify. +func (d *Dentry) InotifyWithParent(events uint32, cookie uint32) {} + +// Watches implements vfs.DentryImpl.Watches. +// +// TODO(gvisor.dev/issue/1479): Implement inotify. +func (d *Dentry) Watches() *vfs.Watches { + return nil +} + // InsertChild inserts child into the vfs dentry cache with the given name under // this dentry. This does not update the directory inode, so calling this on // it's own isn't sufficient to insert a child into a directory. InsertChild diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD index 007be1572..062321cbc 100644 --- a/pkg/sentry/fsimpl/tmpfs/BUILD +++ b/pkg/sentry/fsimpl/tmpfs/BUILD @@ -59,6 +59,7 @@ go_library( "//pkg/sentry/pgalloc", "//pkg/sentry/platform", "//pkg/sentry/socket/unix/transport", + "//pkg/sentry/uniqueid", "//pkg/sentry/usage", "//pkg/sentry/vfs", "//pkg/sentry/vfs/lock", diff --git a/pkg/sentry/fsimpl/tmpfs/directory.go b/pkg/sentry/fsimpl/tmpfs/directory.go index f2399981b..8bc475f88 100644 --- a/pkg/sentry/fsimpl/tmpfs/directory.go +++ b/pkg/sentry/fsimpl/tmpfs/directory.go @@ -112,6 +112,7 @@ func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallba dir.iterMu.Lock() defer dir.iterMu.Unlock() + fd.dentry().InotifyWithParent(linux.IN_ACCESS, 0) fd.inode().touchAtime(fd.vfsfd.Mount()) if fd.off == 0 { diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 7c04570f1..b4159f904 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -177,6 +177,12 @@ func (fs *filesystem) doCreateAt(rp *vfs.ResolvingPath, dir bool, create func(pa if err := create(parentDir, name); err != nil { return err } + + ev := linux.IN_CREATE + if dir { + ev |= linux.IN_ISDIR + } + parentDir.inode.watches.Notify(name, uint32(ev), 0) parentDir.inode.touchCMtime() return nil } @@ -241,6 +247,7 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. return syserror.EMLINK } d.inode.incLinksLocked() + d.inode.watches.Notify("", linux.IN_ATTRIB, 0) parentDir.insertChildLocked(fs.newDentry(d.inode), name) return nil }) @@ -354,6 +361,7 @@ afterTrailingSymlink: if err != nil { return nil, err } + parentDir.inode.watches.Notify(name, linux.IN_CREATE, 0) parentDir.inode.touchCMtime() return fd, nil } @@ -559,6 +567,8 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa newParentDir.inode.touchCMtime() } renamed.inode.touchCtime() + + vfs.InotifyRename(ctx, &renamed.inode.watches, &oldParentDir.inode.watches, &newParentDir.inode.watches, oldName, newName, renamed.inode.isDir()) return nil } @@ -603,6 +613,7 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error return err } parentDir.removeChildLocked(child) + parentDir.inode.watches.Notify(name, linux.IN_DELETE|linux.IN_ISDIR, 0) // Remove links for child, child/., and child/.. child.inode.decLinksLocked() child.inode.decLinksLocked() @@ -620,7 +631,14 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts if err != nil { return err } - return d.inode.setStat(ctx, rp.Credentials(), &opts.Stat) + if err := d.inode.setStat(ctx, rp.Credentials(), &opts.Stat); err != nil { + return err + } + + if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 { + d.InotifyWithParent(ev, 0) + } + return nil } // StatAt implements vfs.FilesystemImpl.StatAt. @@ -700,6 +718,12 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil { return err } + + // Generate inotify events. Note that this must take place before the link + // count of the child is decremented, or else the watches may be dropped + // before these events are added. + vfs.InotifyRemoveChild(&child.inode.watches, &parentDir.inode.watches, name) + parentDir.removeChildLocked(child) child.inode.decLinksLocked() vfsObj.CommitDeleteDentry(&child.vfsd) @@ -756,7 +780,12 @@ func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt if err != nil { return err } - return d.inode.setxattr(rp.Credentials(), &opts) + if err := d.inode.setxattr(rp.Credentials(), &opts); err != nil { + return err + } + + d.InotifyWithParent(linux.IN_ATTRIB, 0) + return nil } // RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt. @@ -767,7 +796,12 @@ func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, if err != nil { return err } - return d.inode.removexattr(rp.Credentials(), name) + if err := d.inode.removexattr(rp.Credentials(), name); err != nil { + return err + } + + d.InotifyWithParent(linux.IN_ATTRIB, 0) + return nil } // PrependPath implements vfs.FilesystemImpl.PrependPath. diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index b739095b7..1d83b6840 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -201,6 +201,26 @@ func (d *dentry) DecRef() { d.inode.decRef() } +// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. +func (d *dentry) InotifyWithParent(events uint32, cookie uint32) { + if d.inode.isDir() { + events |= linux.IN_ISDIR + } + + // The ordering below is important, Linux always notifies the parent first. + if d.parent != nil { + // Note that d.parent or d.name may be stale if there is a concurrent + // rename operation. Inotify does not provide consistency guarantees. + d.parent.inode.watches.Notify(d.name, events, cookie) + } + d.inode.watches.Notify("", events, cookie) +} + +// Watches implements vfs.DentryImpl.Watches. +func (d *dentry) Watches() *vfs.Watches { + return &d.inode.watches +} + // inode represents a filesystem object. type inode struct { // fs is the owning filesystem. fs is immutable. @@ -236,6 +256,9 @@ type inode struct { // Advisory file locks, which lock at the inode level. locks lock.FileLocks + // Inotify watches for this inode. + watches vfs.Watches + impl interface{} // immutable } @@ -257,6 +280,7 @@ func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials, i.ctime = now i.mtime = now // i.nlink initialized by caller + i.watches = vfs.Watches{} i.impl = impl } @@ -307,6 +331,7 @@ func (i *inode) tryIncRef() bool { func (i *inode) decRef() { if refs := atomic.AddInt64(&i.refs, -1); refs == 0 { + i.watches.HandleDeletion() if regFile, ok := i.impl.(*regularFile); ok { // Release memory used by regFile to store data. Since regFile is // no longer usable, we don't need to grab any locks or update any @@ -628,8 +653,12 @@ func (fd *fileDescription) filesystem() *filesystem { return fd.vfsfd.Mount().Filesystem().Impl().(*filesystem) } +func (fd *fileDescription) dentry() *dentry { + return fd.vfsfd.Dentry().Impl().(*dentry) +} + func (fd *fileDescription) inode() *inode { - return fd.vfsfd.Dentry().Impl().(*dentry).inode + return fd.dentry().inode } // Stat implements vfs.FileDescriptionImpl.Stat. @@ -642,7 +671,16 @@ func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linu // SetStat implements vfs.FileDescriptionImpl.SetStat. func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { creds := auth.CredentialsFromContext(ctx) - return fd.inode().setStat(ctx, creds, &opts.Stat) + d := fd.dentry() + if err := d.inode.setStat(ctx, creds, &opts.Stat); err != nil { + return err + } + + // Generate inotify events. + if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 { + d.InotifyWithParent(ev, 0) + } + return nil } // Listxattr implements vfs.FileDescriptionImpl.Listxattr. @@ -657,12 +695,26 @@ func (fd *fileDescription) Getxattr(ctx context.Context, opts vfs.GetxattrOption // Setxattr implements vfs.FileDescriptionImpl.Setxattr. func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOptions) error { - return fd.inode().setxattr(auth.CredentialsFromContext(ctx), &opts) + d := fd.dentry() + if err := d.inode.setxattr(auth.CredentialsFromContext(ctx), &opts); err != nil { + return err + } + + // Generate inotify events. + d.InotifyWithParent(linux.IN_ATTRIB, 0) + return nil } // Removexattr implements vfs.FileDescriptionImpl.Removexattr. func (fd *fileDescription) Removexattr(ctx context.Context, name string) error { - return fd.inode().removexattr(auth.CredentialsFromContext(ctx), name) + d := fd.dentry() + if err := d.inode.removexattr(auth.CredentialsFromContext(ctx), name); err != nil { + return err + } + + // Generate inotify events. + d.InotifyWithParent(linux.IN_ATTRIB, 0) + return nil } // NewMemfd creates a new tmpfs regular file and file description that can back diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go index ed40b5303..ef73e1169 100644 --- a/pkg/sentry/kernel/fd_table.go +++ b/pkg/sentry/kernel/fd_table.go @@ -152,7 +152,13 @@ func (f *FDTable) drop(file *fs.File) { // dropVFS2 drops the table reference. func (f *FDTable) dropVFS2(file *vfs.FileDescription) { // TODO(gvisor.dev/issue/1480): Release locks. - // TODO(gvisor.dev/issue/1479): Send inotify events. + + // Generate inotify events. + ev := uint32(linux.IN_CLOSE_NOWRITE) + if file.IsWritable() { + ev = linux.IN_CLOSE_WRITE + } + file.Dentry().InotifyWithParent(ev, 0) // Drop the table reference. file.DecRef() diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD index d56927ff5..9c8b44f64 100644 --- a/pkg/sentry/syscalls/linux/vfs2/BUILD +++ b/pkg/sentry/syscalls/linux/vfs2/BUILD @@ -12,6 +12,7 @@ go_library( "filesystem.go", "fscontext.go", "getdents.go", + "inotify.go", "ioctl.go", "memfd.go", "mmap.go", diff --git a/pkg/sentry/syscalls/linux/vfs2/inotify.go b/pkg/sentry/syscalls/linux/vfs2/inotify.go new file mode 100644 index 000000000..7d50b6a16 --- /dev/null +++ b/pkg/sentry/syscalls/linux/vfs2/inotify.go @@ -0,0 +1,134 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vfs2 + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" +) + +const allFlags = linux.IN_NONBLOCK | linux.IN_CLOEXEC + +// InotifyInit1 implements the inotify_init1() syscalls. +func InotifyInit1(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + flags := args[0].Int() + if flags&^allFlags != 0 { + return 0, nil, syserror.EINVAL + } + + ino, err := vfs.NewInotifyFD(t, t.Kernel().VFS(), uint32(flags)) + if err != nil { + return 0, nil, err + } + defer ino.DecRef() + + fd, err := t.NewFDFromVFS2(0, ino, kernel.FDFlags{ + CloseOnExec: flags&linux.IN_CLOEXEC != 0, + }) + + if err != nil { + return 0, nil, err + } + + return uintptr(fd), nil, nil +} + +// InotifyInit implements the inotify_init() syscalls. +func InotifyInit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + args[0].Value = 0 + return InotifyInit1(t, args) +} + +// fdToInotify resolves an fd to an inotify object. If successful, the file will +// have an extra ref and the caller is responsible for releasing the ref. +func fdToInotify(t *kernel.Task, fd int32) (*vfs.Inotify, *vfs.FileDescription, error) { + f := t.GetFileVFS2(fd) + if f == nil { + // Invalid fd. + return nil, nil, syserror.EBADF + } + + ino, ok := f.Impl().(*vfs.Inotify) + if !ok { + // Not an inotify fd. + f.DecRef() + return nil, nil, syserror.EINVAL + } + + return ino, f, nil +} + +// InotifyAddWatch implements the inotify_add_watch() syscall. +func InotifyAddWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + addr := args[1].Pointer() + mask := args[2].Uint() + + // "EINVAL: The given event mask contains no valid events." + // -- inotify_add_watch(2) + if validBits := mask & linux.ALL_INOTIFY_BITS; validBits == 0 { + return 0, nil, syserror.EINVAL + } + + // "IN_DONT_FOLLOW: Don't dereference pathname if it is a symbolic link." + // -- inotify(7) + follow := followFinalSymlink + if mask&linux.IN_DONT_FOLLOW == 0 { + follow = nofollowFinalSymlink + } + + ino, f, err := fdToInotify(t, fd) + if err != nil { + return 0, nil, err + } + defer f.DecRef() + + path, err := copyInPath(t, addr) + if err != nil { + return 0, nil, err + } + if mask&linux.IN_ONLYDIR != 0 { + path.Dir = true + } + tpop, err := getTaskPathOperation(t, linux.AT_FDCWD, path, disallowEmptyPath, follow) + if err != nil { + return 0, nil, err + } + defer tpop.Release() + d, err := t.Kernel().VFS().GetDentryAt(t, t.Credentials(), &tpop.pop, &vfs.GetDentryOptions{}) + if err != nil { + return 0, nil, err + } + defer d.DecRef() + + fd = ino.AddWatch(d.Dentry(), mask) + return uintptr(fd), nil, err +} + +// InotifyRmWatch implements the inotify_rm_watch() syscall. +func InotifyRmWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + wd := args[1].Int() + + ino, f, err := fdToInotify(t, fd) + if err != nil { + return 0, nil, err + } + defer f.DecRef() + return 0, nil, ino.RmWatch(wd) +} diff --git a/pkg/sentry/syscalls/linux/vfs2/read_write.go b/pkg/sentry/syscalls/linux/vfs2/read_write.go index 3a7ef24f5..92b5631a3 100644 --- a/pkg/sentry/syscalls/linux/vfs2/read_write.go +++ b/pkg/sentry/syscalls/linux/vfs2/read_write.go @@ -93,11 +93,17 @@ func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { n, err := file.Read(t, dst, opts) if err != syserror.ErrWouldBlock { + if n > 0 { + file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0) + } return n, err } allowBlock, deadline, hasDeadline := blockPolicy(t, file) if !allowBlock { + if n > 0 { + file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0) + } return n, err } @@ -128,6 +134,9 @@ func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opt } file.EventUnregister(&w) + if total > 0 { + file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0) + } return total, err } @@ -248,11 +257,17 @@ func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { n, err := file.PRead(t, dst, offset, opts) if err != syserror.ErrWouldBlock { + if n > 0 { + file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0) + } return n, err } allowBlock, deadline, hasDeadline := blockPolicy(t, file) if !allowBlock { + if n > 0 { + file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0) + } return n, err } @@ -283,6 +298,9 @@ func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, of } file.EventUnregister(&w) + if total > 0 { + file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0) + } return total, err } @@ -345,11 +363,17 @@ func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { n, err := file.Write(t, src, opts) if err != syserror.ErrWouldBlock { + if n > 0 { + file.Dentry().InotifyWithParent(linux.IN_MODIFY, 0) + } return n, err } allowBlock, deadline, hasDeadline := blockPolicy(t, file) if !allowBlock { + if n > 0 { + file.Dentry().InotifyWithParent(linux.IN_MODIFY, 0) + } return n, err } @@ -380,6 +404,9 @@ func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, op } file.EventUnregister(&w) + if total > 0 { + file.Dentry().InotifyWithParent(linux.IN_MODIFY, 0) + } return total, err } @@ -500,11 +527,17 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { n, err := file.PWrite(t, src, offset, opts) if err != syserror.ErrWouldBlock { + if n > 0 { + file.Dentry().InotifyWithParent(linux.IN_MODIFY, 0) + } return n, err } allowBlock, deadline, hasDeadline := blockPolicy(t, file) if !allowBlock { + if n > 0 { + file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0) + } return n, err } @@ -535,6 +568,9 @@ func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, o } file.EventUnregister(&w) + if total > 0 { + file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0) + } return total, err } diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go index 083fdcf82..ef8358b8a 100644 --- a/pkg/sentry/syscalls/linux/vfs2/vfs2.go +++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go @@ -116,9 +116,9 @@ func Override() { s.Table[232] = syscalls.Supported("epoll_wait", EpollWait) s.Table[233] = syscalls.Supported("epoll_ctl", EpollCtl) s.Table[235] = syscalls.Supported("utimes", Utimes) - delete(s.Table, 253) // inotify_init - delete(s.Table, 254) // inotify_add_watch - delete(s.Table, 255) // inotify_rm_watch + s.Table[253] = syscalls.PartiallySupported("inotify_init", InotifyInit, "inotify events are only available inside the sandbox.", nil) + s.Table[254] = syscalls.PartiallySupported("inotify_add_watch", InotifyAddWatch, "inotify events are only available inside the sandbox.", nil) + s.Table[255] = syscalls.PartiallySupported("inotify_rm_watch", InotifyRmWatch, "inotify events are only available inside the sandbox.", nil) s.Table[257] = syscalls.Supported("openat", Openat) s.Table[258] = syscalls.Supported("mkdirat", Mkdirat) s.Table[259] = syscalls.Supported("mknodat", Mknodat) @@ -151,7 +151,7 @@ func Override() { s.Table[291] = syscalls.Supported("epoll_create1", EpollCreate1) s.Table[292] = syscalls.Supported("dup3", Dup3) s.Table[293] = syscalls.Supported("pipe2", Pipe2) - delete(s.Table, 294) // inotify_init1 + s.Table[294] = syscalls.PartiallySupported("inotify_init1", InotifyInit1, "inotify events are only available inside the sandbox.", nil) s.Table[295] = syscalls.Supported("preadv", Preadv) s.Table[296] = syscalls.Supported("pwritev", Pwritev) s.Table[299] = syscalls.Supported("recvmmsg", RecvMMsg) diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD index 94d69c1cc..774cc66cc 100644 --- a/pkg/sentry/vfs/BUILD +++ b/pkg/sentry/vfs/BUILD @@ -15,6 +15,18 @@ go_template_instance( }, ) +go_template_instance( + name = "event_list", + out = "event_list.go", + package = "vfs", + prefix = "event", + template = "//pkg/ilist:generic_list", + types = { + "Element": "*Event", + "Linker": "*Event", + }, +) + go_library( name = "vfs", srcs = [ @@ -25,11 +37,13 @@ go_library( "device.go", "epoll.go", "epoll_interest_list.go", + "event_list.go", "file_description.go", "file_description_impl_util.go", "filesystem.go", "filesystem_impl_util.go", "filesystem_type.go", + "inotify.go", "mount.go", "mount_unsafe.go", "options.go", @@ -57,6 +71,7 @@ go_library( "//pkg/sentry/limits", "//pkg/sentry/memmap", "//pkg/sentry/socket/unix/transport", + "//pkg/sentry/uniqueid", "//pkg/sync", "//pkg/syserror", "//pkg/usermem", diff --git a/pkg/sentry/vfs/anonfs.go b/pkg/sentry/vfs/anonfs.go index caf770fd5..55a3d54cc 100644 --- a/pkg/sentry/vfs/anonfs.go +++ b/pkg/sentry/vfs/anonfs.go @@ -297,3 +297,15 @@ func (d *anonDentry) TryIncRef() bool { func (d *anonDentry) DecRef() { // no-op } + +// InotifyWithParent implements DentryImpl.InotifyWithParent. +// +// TODO(gvisor.dev/issue/1479): Implement inotify. +func (d *anonDentry) InotifyWithParent(events uint32, cookie uint32) {} + +// Watches implements DentryImpl.Watches. +// +// TODO(gvisor.dev/issue/1479): Implement inotify. +func (d *anonDentry) Watches() *Watches { + return nil +} diff --git a/pkg/sentry/vfs/dentry.go b/pkg/sentry/vfs/dentry.go index 8624dbd5d..d61b9e09b 100644 --- a/pkg/sentry/vfs/dentry.go +++ b/pkg/sentry/vfs/dentry.go @@ -103,6 +103,22 @@ type DentryImpl interface { // DecRef decrements the Dentry's reference count. DecRef() + + // InotifyWithParent notifies all watches on the targets represented by this + // dentry and its parent. The parent's watches are notified first, followed + // by this dentry's. + // + // InotifyWithParent automatically adds the IN_ISDIR flag for dentries + // representing directories. + // + // Note that the events may not actually propagate up to the user, depending + // on the event masks. + InotifyWithParent(events uint32, cookie uint32) + + // Watches returns the set of inotify watches for the file corresponding to + // the Dentry. Dentries that are hard links to the same underlying file + // share the same watches. + Watches() *Watches } // IncRef increments d's reference count. @@ -133,6 +149,17 @@ func (d *Dentry) isMounted() bool { return atomic.LoadUint32(&d.mounts) != 0 } +// InotifyWithParent notifies all watches on the inodes for this dentry and +// its parent of events. +func (d *Dentry) InotifyWithParent(events uint32, cookie uint32) { + d.impl.InotifyWithParent(events, cookie) +} + +// Watches returns the set of inotify watches associated with d. +func (d *Dentry) Watches() *Watches { + return d.impl.Watches() +} + // The following functions are exported so that filesystem implementations can // use them. The vfs package, and users of VFS, should not call these // functions. diff --git a/pkg/sentry/vfs/inotify.go b/pkg/sentry/vfs/inotify.go new file mode 100644 index 000000000..1d28ccb46 --- /dev/null +++ b/pkg/sentry/vfs/inotify.go @@ -0,0 +1,675 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vfs + +import ( + "bytes" + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/uniqueid" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/waiter" +) + +// inotifyEventBaseSize is the base size of linux's struct inotify_event. This +// must be a power 2 for rounding below. +const inotifyEventBaseSize = 16 + +// Inotify represents an inotify instance created by inotify_init(2) or +// inotify_init1(2). Inotify implements FileDescriptionImpl. +// +// Lock ordering: +// Inotify.mu -> Watches.mu -> Inotify.evMu +// +// +stateify savable +type Inotify struct { + vfsfd FileDescription + FileDescriptionDefaultImpl + DentryMetadataFileDescriptionImpl + + // Unique identifier for this inotify instance. We don't just reuse the + // inotify fd because fds can be duped. These should not be exposed to the + // user, since we may aggressively reuse an id on S/R. + id uint64 + + // queue is used to notify interested parties when the inotify instance + // becomes readable or writable. + queue waiter.Queue `state:"nosave"` + + // evMu *only* protects the events list. We need a separate lock while + // queuing events: using mu may violate lock ordering, since at that point + // the calling goroutine may already hold Watches.mu. + evMu sync.Mutex `state:"nosave"` + + // A list of pending events for this inotify instance. Protected by evMu. + events eventList + + // A scratch buffer, used to serialize inotify events. Allocate this + // ahead of time for the sake of performance. Protected by evMu. + scratch []byte + + // mu protects the fields below. + mu sync.Mutex `state:"nosave"` + + // nextWatchMinusOne is used to allocate watch descriptors on this Inotify + // instance. Note that Linux starts numbering watch descriptors from 1. + nextWatchMinusOne int32 + + // Map from watch descriptors to watch objects. + watches map[int32]*Watch +} + +var _ FileDescriptionImpl = (*Inotify)(nil) + +// NewInotifyFD constructs a new Inotify instance. +func NewInotifyFD(ctx context.Context, vfsObj *VirtualFilesystem, flags uint32) (*FileDescription, error) { + // O_CLOEXEC affects file descriptors, so it must be handled outside of vfs. + flags &^= linux.O_CLOEXEC + if flags&^linux.O_NONBLOCK != 0 { + return nil, syserror.EINVAL + } + + id := uniqueid.GlobalFromContext(ctx) + vd := vfsObj.NewAnonVirtualDentry(fmt.Sprintf("[inotifyfd:%d]", id)) + defer vd.DecRef() + fd := &Inotify{ + id: id, + scratch: make([]byte, inotifyEventBaseSize), + watches: make(map[int32]*Watch), + } + if err := fd.vfsfd.Init(fd, flags, vd.Mount(), vd.Dentry(), &FileDescriptionOptions{ + UseDentryMetadata: true, + DenyPRead: true, + DenyPWrite: true, + }); err != nil { + return nil, err + } + return &fd.vfsfd, nil +} + +// Release implements FileDescriptionImpl.Release. Release removes all +// watches and frees all resources for an inotify instance. +func (i *Inotify) Release() { + // We need to hold i.mu to avoid a race with concurrent calls to + // Inotify.handleDeletion from Watches. There's no risk of Watches + // accessing this Inotify after the destructor ends, because we remove all + // references to it below. + i.mu.Lock() + defer i.mu.Unlock() + for _, w := range i.watches { + // Remove references to the watch from the watches set on the target. We + // don't need to worry about the references from i.watches, since this + // file description is about to be destroyed. + w.set.Remove(i.id) + } +} + +// EventRegister implements waiter.Waitable. +func (i *Inotify) EventRegister(e *waiter.Entry, mask waiter.EventMask) { + i.queue.EventRegister(e, mask) +} + +// EventUnregister implements waiter.Waitable. +func (i *Inotify) EventUnregister(e *waiter.Entry) { + i.queue.EventUnregister(e) +} + +// Readiness implements waiter.Waitable.Readiness. +// +// Readiness indicates whether there are pending events for an inotify instance. +func (i *Inotify) Readiness(mask waiter.EventMask) waiter.EventMask { + ready := waiter.EventMask(0) + + i.evMu.Lock() + defer i.evMu.Unlock() + + if !i.events.Empty() { + ready |= waiter.EventIn + } + + return mask & ready +} + +// PRead implements FileDescriptionImpl. +func (*Inotify) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) { + return 0, syserror.ESPIPE +} + +// PWrite implements FileDescriptionImpl. +func (*Inotify) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) { + return 0, syserror.ESPIPE +} + +// Write implements FileDescriptionImpl.Write. +func (*Inotify) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) { + return 0, syserror.EBADF +} + +// Read implements FileDescriptionImpl.Read. +func (i *Inotify) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) { + if dst.NumBytes() < inotifyEventBaseSize { + return 0, syserror.EINVAL + } + + i.evMu.Lock() + defer i.evMu.Unlock() + + if i.events.Empty() { + // Nothing to read yet, tell caller to block. + return 0, syserror.ErrWouldBlock + } + + var writeLen int64 + for it := i.events.Front(); it != nil; { + // Advance `it` before the element is removed from the list, or else + // it.Next() will always be nil. + event := it + it = it.Next() + + // Does the buffer have enough remaining space to hold the event we're + // about to write out? + if dst.NumBytes() < int64(event.sizeOf()) { + if writeLen > 0 { + // Buffer wasn't big enough for all pending events, but we did + // write some events out. + return writeLen, nil + } + return 0, syserror.EINVAL + } + + // Linux always dequeues an available event as long as there's enough + // buffer space to copy it out, even if the copy below fails. Emulate + // this behaviour. + i.events.Remove(event) + + // Buffer has enough space, copy event to the read buffer. + n, err := event.CopyTo(ctx, i.scratch, dst) + if err != nil { + return 0, err + } + + writeLen += n + dst = dst.DropFirst64(n) + } + return writeLen, nil +} + +// Ioctl implements fs.FileOperations.Ioctl. +func (i *Inotify) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) { + switch args[1].Int() { + case linux.FIONREAD: + i.evMu.Lock() + defer i.evMu.Unlock() + var n uint32 + for e := i.events.Front(); e != nil; e = e.Next() { + n += uint32(e.sizeOf()) + } + var buf [4]byte + usermem.ByteOrder.PutUint32(buf[:], n) + _, err := uio.CopyOut(ctx, args[2].Pointer(), buf[:], usermem.IOOpts{}) + return 0, err + + default: + return 0, syserror.ENOTTY + } +} + +func (i *Inotify) queueEvent(ev *Event) { + i.evMu.Lock() + + // Check if we should coalesce the event we're about to queue with the last + // one currently in the queue. Events are coalesced if they are identical. + if last := i.events.Back(); last != nil { + if ev.equals(last) { + // "Coalesce" the two events by simply not queuing the new one. We + // don't need to raise a waiter.EventIn notification because no new + // data is available for reading. + i.evMu.Unlock() + return + } + } + + i.events.PushBack(ev) + + // Release mutex before notifying waiters because we don't control what they + // can do. + i.evMu.Unlock() + + i.queue.Notify(waiter.EventIn) +} + +// newWatchLocked creates and adds a new watch to target. +// +// Precondition: i.mu must be locked. +func (i *Inotify) newWatchLocked(target *Dentry, mask uint32) *Watch { + targetWatches := target.Watches() + w := &Watch{ + owner: i, + wd: i.nextWatchIDLocked(), + set: targetWatches, + mask: mask, + } + + // Hold the watch in this inotify instance as well as the watch set on the + // target. + i.watches[w.wd] = w + targetWatches.Add(w) + return w +} + +// newWatchIDLocked allocates and returns a new watch descriptor. +// +// Precondition: i.mu must be locked. +func (i *Inotify) nextWatchIDLocked() int32 { + i.nextWatchMinusOne++ + return i.nextWatchMinusOne +} + +// handleDeletion handles the deletion of the target of watch w. It removes w +// from i.watches and a watch removal event is generated. +func (i *Inotify) handleDeletion(w *Watch) { + i.mu.Lock() + _, found := i.watches[w.wd] + delete(i.watches, w.wd) + i.mu.Unlock() + + if found { + i.queueEvent(newEvent(w.wd, "", linux.IN_IGNORED, 0)) + } +} + +// AddWatch constructs a new inotify watch and adds it to the target. It +// returns the watch descriptor returned by inotify_add_watch(2). +func (i *Inotify) AddWatch(target *Dentry, mask uint32) int32 { + // Note: Locking this inotify instance protects the result returned by + // Lookup() below. With the lock held, we know for sure the lookup result + // won't become stale because it's impossible for *this* instance to + // add/remove watches on target. + i.mu.Lock() + defer i.mu.Unlock() + + // Does the target already have a watch from this inotify instance? + if existing := target.Watches().Lookup(i.id); existing != nil { + newmask := mask + if mask&linux.IN_MASK_ADD != 0 { + // "Add (OR) events to watch mask for this pathname if it already + // exists (instead of replacing mask)." -- inotify(7) + newmask |= atomic.LoadUint32(&existing.mask) + } + atomic.StoreUint32(&existing.mask, newmask) + return existing.wd + } + + // No existing watch, create a new watch. + w := i.newWatchLocked(target, mask) + return w.wd +} + +// RmWatch looks up an inotify watch for the given 'wd' and configures the +// target to stop sending events to this inotify instance. +func (i *Inotify) RmWatch(wd int32) error { + i.mu.Lock() + + // Find the watch we were asked to removed. + w, ok := i.watches[wd] + if !ok { + i.mu.Unlock() + return syserror.EINVAL + } + + // Remove the watch from this instance. + delete(i.watches, wd) + + // Remove the watch from the watch target. + w.set.Remove(w.OwnerID()) + i.mu.Unlock() + + // Generate the event for the removal. + i.queueEvent(newEvent(wd, "", linux.IN_IGNORED, 0)) + + return nil +} + +// Watches is the collection of all inotify watches on a single file. +// +// +stateify savable +type Watches struct { + // mu protects the fields below. + mu sync.RWMutex `state:"nosave"` + + // ws is the map of active watches in this collection, keyed by the inotify + // instance id of the owner. + ws map[uint64]*Watch +} + +// Lookup returns the watch owned by an inotify instance with the given id. +// Returns nil if no such watch exists. +// +// Precondition: the inotify instance with the given id must be locked to +// prevent the returned watch from being concurrently modified or replaced in +// Inotify.watches. +func (w *Watches) Lookup(id uint64) *Watch { + w.mu.Lock() + defer w.mu.Unlock() + return w.ws[id] +} + +// Add adds watch into this set of watches. +// +// Precondition: the inotify instance with the given id must be locked. +func (w *Watches) Add(watch *Watch) { + w.mu.Lock() + defer w.mu.Unlock() + + owner := watch.OwnerID() + // Sanity check, we should never have two watches for one owner on the + // same target. + if _, exists := w.ws[owner]; exists { + panic(fmt.Sprintf("Watch collision with ID %+v", owner)) + } + if w.ws == nil { + w.ws = make(map[uint64]*Watch) + } + w.ws[owner] = watch +} + +// Remove removes a watch with the given id from this set of watches and +// releases it. The caller is responsible for generating any watch removal +// event, as appropriate. The provided id must match an existing watch in this +// collection. +// +// Precondition: the inotify instance with the given id must be locked. +func (w *Watches) Remove(id uint64) { + w.mu.Lock() + defer w.mu.Unlock() + + if w.ws == nil { + // This watch set is being destroyed. The thread executing the + // destructor is already in the process of deleting all our watches. We + // got here with no references on the target because we raced with the + // destructor notifying all the watch owners of destruction. See the + // comment in Watches.HandleDeletion for why this race exists. + return + } + + if _, ok := w.ws[id]; !ok { + // While there's technically no problem with silently ignoring a missing + // watch, this is almost certainly a bug. + panic(fmt.Sprintf("Attempt to remove a watch, but no watch found with provided id %+v.", id)) + } + delete(w.ws, id) +} + +// Notify queues a new event with all watches in this set. +func (w *Watches) Notify(name string, events, cookie uint32) { + // N.B. We don't defer the unlocks because Notify is in the hot path of + // all IO operations, and the defer costs too much for small IO + // operations. + w.mu.RLock() + for _, watch := range w.ws { + // TODO(gvisor.dev/issue/1479): Skip for IN_EXCL_UNLINK cases. + watch.Notify(name, events, cookie) + } + w.mu.RUnlock() +} + +// HandleDeletion is called when the watch target is destroyed to emit +// the appropriate events. +func (w *Watches) HandleDeletion() { + w.Notify("", linux.IN_DELETE_SELF, 0) + + // TODO(gvisor.dev/issue/1479): This doesn't work because maps are not copied + // by value. Ideally, we wouldn't have this circular locking so we can just + // notify of IN_DELETE_SELF in the same loop below. + // + // We can't hold w.mu while calling watch.handleDeletion to preserve lock + // ordering w.r.t to the owner inotify instances. Instead, atomically move + // the watches map into a local variable so we can iterate over it safely. + // + // Because of this however, it is possible for the watches' owners to reach + // this inode while the inode has no refs. This is still safe because the + // owners can only reach the inode until this function finishes calling + // watch.handleDeletion below and the inode is guaranteed to exist in the + // meantime. But we still have to be very careful not to rely on inode state + // that may have been already destroyed. + var ws map[uint64]*Watch + w.mu.Lock() + ws = w.ws + w.ws = nil + w.mu.Unlock() + + for _, watch := range ws { + // TODO(gvisor.dev/issue/1479): consider refactoring this. + watch.handleDeletion() + } +} + +// Watch represent a particular inotify watch created by inotify_add_watch. +// +// +stateify savable +type Watch struct { + // Inotify instance which owns this watch. + owner *Inotify + + // Descriptor for this watch. This is unique across an inotify instance. + wd int32 + + // set is the watch set containing this watch. It belongs to the target file + // of this watch. + set *Watches + + // Events being monitored via this watch. Must be accessed with atomic + // memory operations. + mask uint32 +} + +// OwnerID returns the id of the inotify instance that owns this watch. +func (w *Watch) OwnerID() uint64 { + return w.owner.id +} + +// ExcludeUnlinkedChildren indicates whether the watched object should continue +// to be notified of events of its children after they have been unlinked, e.g. +// for an open file descriptor. +// +// TODO(gvisor.dev/issue/1479): Implement IN_EXCL_UNLINK. +// We can do this by keeping track of the set of unlinked children in Watches +// to skip notification. +func (w *Watch) ExcludeUnlinkedChildren() bool { + return atomic.LoadUint32(&w.mask)&linux.IN_EXCL_UNLINK != 0 +} + +// Notify queues a new event on this watch. +func (w *Watch) Notify(name string, events uint32, cookie uint32) { + mask := atomic.LoadUint32(&w.mask) + if mask&events == 0 { + // We weren't watching for this event. + return + } + + // Event mask should include bits matched from the watch plus all control + // event bits. + unmaskableBits := ^uint32(0) &^ linux.IN_ALL_EVENTS + effectiveMask := unmaskableBits | mask + matchedEvents := effectiveMask & events + w.owner.queueEvent(newEvent(w.wd, name, matchedEvents, cookie)) +} + +// handleDeletion handles the deletion of w's target. +func (w *Watch) handleDeletion() { + w.owner.handleDeletion(w) +} + +// Event represents a struct inotify_event from linux. +// +// +stateify savable +type Event struct { + eventEntry + + wd int32 + mask uint32 + cookie uint32 + + // len is computed based on the name field is set automatically by + // Event.setName. It should be 0 when no name is set; otherwise it is the + // length of the name slice. + len uint32 + + // The name field has special padding requirements and should only be set by + // calling Event.setName. + name []byte +} + +func newEvent(wd int32, name string, events, cookie uint32) *Event { + e := &Event{ + wd: wd, + mask: events, + cookie: cookie, + } + if name != "" { + e.setName(name) + } + return e +} + +// paddedBytes converts a go string to a null-terminated c-string, padded with +// null bytes to a total size of 'l'. 'l' must be large enough for all the bytes +// in the 's' plus at least one null byte. +func paddedBytes(s string, l uint32) []byte { + if l < uint32(len(s)+1) { + panic("Converting string to byte array results in truncation, this can lead to buffer-overflow due to the missing null-byte!") + } + b := make([]byte, l) + copy(b, s) + + // b was zero-value initialized during make(), so the rest of the slice is + // already filled with null bytes. + + return b +} + +// setName sets the optional name for this event. +func (e *Event) setName(name string) { + // We need to pad the name such that the entire event length ends up a + // multiple of inotifyEventBaseSize. + unpaddedLen := len(name) + 1 + // Round up to nearest multiple of inotifyEventBaseSize. + e.len = uint32((unpaddedLen + inotifyEventBaseSize - 1) & ^(inotifyEventBaseSize - 1)) + // Make sure we haven't overflowed and wrapped around when rounding. + if unpaddedLen > int(e.len) { + panic("Overflow when rounding inotify event size, the 'name' field was too big.") + } + e.name = paddedBytes(name, e.len) +} + +func (e *Event) sizeOf() int { + s := inotifyEventBaseSize + int(e.len) + if s < inotifyEventBaseSize { + panic("overflow") + } + return s +} + +// CopyTo serializes this event to dst. buf is used as a scratch buffer to +// construct the output. We use a buffer allocated ahead of time for +// performance. buf must be at least inotifyEventBaseSize bytes. +func (e *Event) CopyTo(ctx context.Context, buf []byte, dst usermem.IOSequence) (int64, error) { + usermem.ByteOrder.PutUint32(buf[0:], uint32(e.wd)) + usermem.ByteOrder.PutUint32(buf[4:], e.mask) + usermem.ByteOrder.PutUint32(buf[8:], e.cookie) + usermem.ByteOrder.PutUint32(buf[12:], e.len) + + writeLen := 0 + + n, err := dst.CopyOut(ctx, buf) + if err != nil { + return 0, err + } + writeLen += n + dst = dst.DropFirst(n) + + if e.len > 0 { + n, err = dst.CopyOut(ctx, e.name) + if err != nil { + return 0, err + } + writeLen += n + } + + // Santiy check. + if writeLen != e.sizeOf() { + panic(fmt.Sprintf("Serialized unexpected amount of data for an event, expected %d, wrote %d.", e.sizeOf(), writeLen)) + } + + return int64(writeLen), nil +} + +func (e *Event) equals(other *Event) bool { + return e.wd == other.wd && + e.mask == other.mask && + e.cookie == other.cookie && + e.len == other.len && + bytes.Equal(e.name, other.name) +} + +// InotifyEventFromStatMask generates the appropriate events for an operation +// that set the stats specified in mask. +func InotifyEventFromStatMask(mask uint32) uint32 { + var ev uint32 + if mask&(linux.STATX_UID|linux.STATX_GID|linux.STATX_MODE) != 0 { + ev |= linux.IN_ATTRIB + } + if mask&linux.STATX_SIZE != 0 { + ev |= linux.IN_MODIFY + } + + if (mask & (linux.STATX_ATIME | linux.STATX_MTIME)) == (linux.STATX_ATIME | linux.STATX_MTIME) { + // Both times indicates a utime(s) call. + ev |= linux.IN_ATTRIB + } else if mask&linux.STATX_ATIME != 0 { + ev |= linux.IN_ACCESS + } else if mask&linux.STATX_MTIME != 0 { + mask |= linux.IN_MODIFY + } + return ev +} + +// InotifyRemoveChild sends the appriopriate notifications to the watch sets of +// the child being removed and its parent. +func InotifyRemoveChild(self, parent *Watches, name string) { + self.Notify("", linux.IN_ATTRIB, 0) + parent.Notify(name, linux.IN_DELETE, 0) + // TODO(gvisor.dev/issue/1479): implement IN_EXCL_UNLINK. +} + +// InotifyRename sends the appriopriate notifications to the watch sets of the +// file being renamed and its old/new parents. +func InotifyRename(ctx context.Context, renamed, oldParent, newParent *Watches, oldName, newName string, isDir bool) { + var dirEv uint32 + if isDir { + dirEv = linux.IN_ISDIR + } + cookie := uniqueid.InotifyCookie(ctx) + oldParent.Notify(oldName, dirEv|linux.IN_MOVED_FROM, cookie) + newParent.Notify(newName, dirEv|linux.IN_MOVED_TO, cookie) + // Somewhat surprisingly, self move events do not have a cookie. + renamed.Notify("", linux.IN_MOVE_SELF, 0) +} diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go index 6d2ba53ea..be6f21dba 100644 --- a/pkg/sentry/vfs/vfs.go +++ b/pkg/sentry/vfs/vfs.go @@ -422,6 +422,7 @@ func (vfs *VirtualFilesystem) OpenAt(ctx context.Context, creds *auth.Credential } } + fd.Dentry().InotifyWithParent(linux.IN_OPEN, 0) return fd, nil } if !rp.handleError(err) { diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 5acdb8438..f4b5de18d 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -951,6 +951,7 @@ cc_binary( "//test/util:epoll_util", "//test/util:file_descriptor", "//test/util:fs_util", + "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", @@ -1382,7 +1383,7 @@ cc_binary( srcs = ["partial_bad_buffer.cc"], linkstatic = 1, deps = [ - "//test/syscalls/linux:socket_test_util", + ":socket_test_util", "//test/util:file_descriptor", "//test/util:fs_util", "@com_google_absl//absl/time", @@ -3461,7 +3462,7 @@ cc_binary( deps = [ ":socket_test_util", gtest, - "//test/syscalls/linux:socket_netlink_route_util", + ":socket_netlink_route_util", "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc index 0e13ad190..e4565467b 100644 --- a/test/syscalls/linux/inotify.cc +++ b/test/syscalls/linux/inotify.cc @@ -33,6 +33,7 @@ #include "test/util/epoll_util.h" #include "test/util/file_descriptor.h" #include "test/util/fs_util.h" +#include "test/util/posix_error.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" @@ -335,6 +336,11 @@ TEST(Inotify, InotifyFdNotWritable) { EXPECT_THAT(write(fd.get(), "x", 1), SyscallFailsWithErrno(EBADF)); } +TEST(Inotify, InitFlags) { + EXPECT_THAT(inotify_init1(IN_NONBLOCK | IN_CLOEXEC), SyscallSucceeds()); + EXPECT_THAT(inotify_init1(12345), SyscallFailsWithErrno(EINVAL)); +} + TEST(Inotify, NonBlockingReadReturnsEagain) { const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); @@ -395,7 +401,7 @@ TEST(Inotify, CanDeleteFileAfterRemovingWatch) { file1.reset(); } -TEST(Inotify, CanRemoveWatchAfterDeletingFile) { +TEST(Inotify, RemoveWatchAfterDeletingFileFails) { const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); @@ -491,17 +497,23 @@ TEST(Inotify, DeletingChildGeneratesEvents) { Event(IN_DELETE, root_wd, Basename(file1_path))})); } +// Creating a file in "parent/child" should generate events for child, but not +// parent. TEST(Inotify, CreatingFileGeneratesEvents) { - const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath child = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(parent.path())); const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), parent.path(), IN_ALL_EVENTS)); const int wd = ASSERT_NO_ERRNO_AND_VALUE( - InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + InotifyAddWatch(fd.get(), child.path(), IN_ALL_EVENTS)); // Create a new file in the directory. const TempPath file1 = - ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(child.path())); const std::vector events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); @@ -554,6 +566,47 @@ TEST(Inotify, WritingFileGeneratesModifyEvent) { ASSERT_THAT(events, Are({Event(IN_MODIFY, wd, Basename(file1.path()))})); } +TEST(Inotify, SizeZeroReadWriteGeneratesNothing) { + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const TempPath file1 = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); + + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDWR)); + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + + // Read from the empty file. + int val; + ASSERT_THAT(read(file1_fd.get(), &val, sizeof(val)), + SyscallSucceedsWithValue(0)); + + // Write zero bytes. + ASSERT_THAT(write(file1_fd.get(), "", 0), SyscallSucceedsWithValue(0)); + + const std::vector events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({})); +} + +TEST(Inotify, FailedFileCreationGeneratesNoEvents) { + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), dir.path(), IN_ALL_EVENTS)); + + const char* p = dir.path().c_str(); + ASSERT_THAT(mkdir(p, 0777), SyscallFails()); + ASSERT_THAT(mknod(p, S_IFIFO, 0777), SyscallFails()); + ASSERT_THAT(symlink(p, p), SyscallFails()); + ASSERT_THAT(link(p, p), SyscallFails()); + std::vector events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({})); +} + TEST(Inotify, WatchSetAfterOpenReportsCloseFdEvent) { const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); const FileDescriptor fd = @@ -602,7 +655,7 @@ TEST(Inotify, ChildrenDeletionInWatchedDirGeneratesEvent) { Event(IN_DELETE | IN_ISDIR, wd, Basename(dir1_path))})); } -TEST(Inotify, WatchTargetDeletionGeneratesEvent) { +TEST(Inotify, RmdirOnWatchedTargetGeneratesEvent) { const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); @@ -1228,7 +1281,7 @@ TEST(Inotify, LinkGeneratesAttribAndCreateEvents) { InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); const int rc = link(file1.path().c_str(), link1.path().c_str()); - // link(2) is only supported on tmpfs in the sandbox. + // NOTE(b/34861058): link(2) is only supported on tmpfs in the sandbox. SKIP_IF(IsRunningOnGvisor() && rc != 0 && (errno == EPERM || errno == ENOENT)); ASSERT_THAT(rc, SyscallSucceeds()); @@ -1322,21 +1375,27 @@ TEST(Inotify, HardlinksReuseSameWatch) { Event(IN_DELETE, root_wd, Basename(file1_path))})); } +// Calling mkdir within "parent/child" should generate an event for child, but +// not parent. TEST(Inotify, MkdirGeneratesCreateEventWithDirFlag) { - const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath child = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(parent.path())); const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); - const int root_wd = ASSERT_NO_ERRNO_AND_VALUE( - InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), parent.path(), IN_ALL_EVENTS)); + const int child_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), child.path(), IN_ALL_EVENTS)); - const TempPath dir1(NewTempAbsPathInDir(root.path())); + const TempPath dir1(NewTempAbsPathInDir(child.path())); ASSERT_THAT(mkdir(dir1.path().c_str(), 0777), SyscallSucceeds()); const std::vector events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); ASSERT_THAT( events, - Are({Event(IN_CREATE | IN_ISDIR, root_wd, Basename(dir1.path()))})); + Are({Event(IN_CREATE | IN_ISDIR, child_wd, Basename(dir1.path()))})); } TEST(Inotify, MultipleInotifyInstancesAndWatchesAllGetEvents) { @@ -1597,6 +1656,8 @@ TEST(Inotify, EpollNoDeadlock) { } TEST(Inotify, SpliceEvent) { + // TODO(gvisor.dev/issue/138): Implement splice in VFS2. + SKIP_IF(IsRunningOnGvisor() && !IsRunningWithVFS1()); int pipes[2]; ASSERT_THAT(pipe2(pipes, O_NONBLOCK), SyscallSucceeds()); @@ -1624,6 +1685,99 @@ TEST(Inotify, SpliceEvent) { ASSERT_THAT(events, Are({Event(IN_ACCESS, watcher)})); } +// Watches on a parent should not be triggered by actions on a hard link to one +// of its children that has a different parent. +TEST(Inotify, LinkOnOtherParent) { + const TempPath dir1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath dir2 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir1.path())); + std::string link_path = NewTempAbsPathInDir(dir2.path()); + + const int rc = link(file.path().c_str(), link_path.c_str()); + // NOTE(b/34861058): link(2) is only supported on tmpfs in the sandbox. + SKIP_IF(IsRunningOnGvisor() && rc != 0 && + (errno == EPERM || errno == ENOENT)); + ASSERT_THAT(rc, SyscallSucceeds()); + + const FileDescriptor inotify_fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(inotify_fd.get(), dir1.path(), IN_ALL_EVENTS)); + + // Perform various actions on the link outside of dir1, which should trigger + // no inotify events. + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(link_path.c_str(), O_RDWR)); + int val = 0; + ASSERT_THAT(write(fd.get(), &val, sizeof(val)), SyscallSucceeds()); + ASSERT_THAT(read(fd.get(), &val, sizeof(val)), SyscallSucceeds()); + ASSERT_THAT(ftruncate(fd.get(), 12345), SyscallSucceeds()); + ASSERT_THAT(unlink(link_path.c_str()), SyscallSucceeds()); + const std::vector events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({})); +} + +TEST(Inotify, Exec) { + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath bin = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(dir.path(), "/bin/true")); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), bin.path(), IN_ALL_EVENTS)); + + // Perform exec. + ScopedThread t([&bin]() { + ASSERT_THAT(execl(bin.path().c_str(), bin.path().c_str(), (char*)nullptr), + SyscallSucceeds()); + }); + t.Join(); + + std::vector events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + EXPECT_THAT(events, Are({Event(IN_OPEN, wd), Event(IN_ACCESS, wd)})); +} + +// Watches without IN_EXCL_UNLINK, should continue to emit events for file +// descriptors after their corresponding files have been unlinked. +// +// We need to disable S/R because there are filesystems where we cannot re-open +// fds to an unlinked file across S/R, e.g. gofer-backed filesytems. +TEST(Inotify, IncludeUnlinkedFile_NoRandomSave) { + const DisableSave ds; + + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(dir.path(), "123", TempPath::kDefaultFileMode)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + + const FileDescriptor inotify_fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int dir_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(inotify_fd.get(), dir.path(), IN_ALL_EVENTS)); + const int file_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(inotify_fd.get(), file.path(), IN_ALL_EVENTS)); + + ASSERT_THAT(unlink(file.path().c_str()), SyscallSucceeds()); + int val = 0; + ASSERT_THAT(read(fd.get(), &val, sizeof(val)), SyscallSucceeds()); + ASSERT_THAT(write(fd.get(), &val, sizeof(val)), SyscallSucceeds()); + const std::vector events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({ + Event(IN_ATTRIB, file_wd), + Event(IN_DELETE, dir_wd, Basename(file.path())), + Event(IN_ACCESS, dir_wd, Basename(file.path())), + Event(IN_ACCESS, file_wd), + Event(IN_MODIFY, dir_wd, Basename(file.path())), + Event(IN_MODIFY, file_wd), + })); +} + } // namespace } // namespace testing } // namespace gvisor -- cgit v1.2.3 From a2b3b67b3f543a54ab0f09500b6c9e18bc33097f Mon Sep 17 00:00:00 2001 From: Gaurav Singh Date: Fri, 29 May 2020 12:36:49 -0400 Subject: runner: fix goroutine leak Signed-off-by: Gaurav Singh --- test/runner/runner.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/runner/runner.go b/test/runner/runner.go index e4f04cd2a..e048e5a9c 100644 --- a/test/runner/runner.go +++ b/test/runner/runner.go @@ -204,7 +204,7 @@ func runRunsc(tc gtest.TestCase, spec *specs.Spec) error { return } log.Warningf("%s: Got signal: %v", name, s) - done := make(chan bool) + done := make(chan bool, 1) dArgs := append([]string{}, args...) dArgs = append(dArgs, "-alsologtostderr=true", "debug", "--stacks", id) go func(dArgs []string) { -- cgit v1.2.3 From 0baba92ad9fc3c92347cada47364514a85603462 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Fri, 29 May 2020 11:51:02 -0700 Subject: Internal change. PiperOrigin-RevId: 313821986 --- pkg/abi/linux/ip.go | 10 ++ test/packetimpact/tests/BUILD | 13 +++ test/packetimpact/tests/ipv4_id_uniqueness_test.go | 111 +++++++++++++++++++++ 3 files changed, 134 insertions(+) create mode 100644 test/packetimpact/tests/ipv4_id_uniqueness_test.go (limited to 'test') diff --git a/pkg/abi/linux/ip.go b/pkg/abi/linux/ip.go index 31e56ffa6..ef6d1093e 100644 --- a/pkg/abi/linux/ip.go +++ b/pkg/abi/linux/ip.go @@ -92,6 +92,16 @@ const ( IP_UNICAST_IF = 50 ) +// IP_MTU_DISCOVER values from uapi/linux/in.h +const ( + IP_PMTUDISC_DONT = 0 + IP_PMTUDISC_WANT = 1 + IP_PMTUDISC_DO = 2 + IP_PMTUDISC_PROBE = 3 + IP_PMTUDISC_INTERFACE = 4 + IP_PMTUDISC_OMIT = 5 +) + // Socket options from uapi/linux/in6.h const ( IPV6_ADDRFORM = 1 diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 3a0e9cb07..e2a320159 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -15,6 +15,19 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "ipv4_id_uniqueness", + srcs = ["ipv4_id_uniqueness_test.go"], + # TODO(b/157506701) Fix netstack then remove the line below. + expect_netstack_failure = True, + deps = [ + "//pkg/abi/linux", + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + packetimpact_go_test( name = "udp_recv_multicast", srcs = ["udp_recv_multicast_test.go"], diff --git a/test/packetimpact/tests/ipv4_id_uniqueness_test.go b/test/packetimpact/tests/ipv4_id_uniqueness_test.go new file mode 100644 index 000000000..49e481d0b --- /dev/null +++ b/test/packetimpact/tests/ipv4_id_uniqueness_test.go @@ -0,0 +1,111 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ipv4_id_uniqueness_test + +import ( + "context" + "flag" + "fmt" + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/tcpip/header" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func init() { + tb.RegisterFlags(flag.CommandLine) +} + +func recvTCPSegment(conn *tb.TCPIPv4, expect *tb.TCP, expectPayload *tb.Payload) (uint16, error) { + layers, err := conn.ExpectData(expect, expectPayload, time.Second) + if err != nil { + return 0, fmt.Errorf("failed to receive TCP segment: %s", err) + } + if len(layers) < 2 { + return 0, fmt.Errorf("got packet with layers: %v, expected to have at least 2 layers (link and network)", layers) + } + ipv4, ok := layers[1].(*tb.IPv4) + if !ok { + return 0, fmt.Errorf("got network layer: %T, expected: *IPv4", layers[1]) + } + if *ipv4.Flags&header.IPv4FlagDontFragment != 0 { + return 0, fmt.Errorf("got IPv4 DF=1, expected DF=0") + } + return *ipv4.ID, nil +} + +// RFC 6864 section 4.2 states: "The IPv4 ID of non-atomic datagrams MUST NOT +// be reused when sending a copy of an earlier non-atomic datagram." +// +// This test creates a TCP connection, uses the IP_MTU_DISCOVER socket option +// to force the DF bit to be 0, and checks that a retransmitted segment has a +// different IPv4 Identification value than the original segment. +func TestIPv4RetransmitIdentificationUniqueness(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + + listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(listenFD) + + conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + defer conn.Close() + + conn.Handshake() + remoteFD, _ := dut.Accept(listenFD) + defer dut.Close(remoteFD) + + dut.SetSockOptInt(remoteFD, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) + + // TODO(b/129291778) The following socket option clears the DF bit on + // IP packets sent over the socket, and is currently not supported by + // gVisor. gVisor by default sends packets with DF=0 anyway, so the + // socket option being not supported does not affect the operation of + // this test. Once the socket option is supported, the following call + // can be changed to simply assert success. + ret, errno := dut.SetSockOptIntWithErrno(context.Background(), remoteFD, unix.IPPROTO_IP, linux.IP_MTU_DISCOVER, linux.IP_PMTUDISC_DONT) + if ret == -1 && errno != unix.ENOTSUP { + t.Fatalf("failed to set IP_MTU_DISCOVER socket option to IP_PMTUDISC_DONT: %s", errno) + } + + sampleData := []byte("Sample Data") + samplePayload := &tb.Payload{Bytes: sampleData} + + dut.Send(remoteFD, sampleData, 0) + if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil { + t.Fatalf("failed to receive TCP segment sent for RTT calculation: %s", err) + } + // Let the DUT estimate RTO with RTT from the DATA-ACK. + // TODO(gvisor.dev/issue/2685) Estimate RTO during handshake, after which + // we can skip sending this ACK. + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) + + expectTCP := &tb.TCP{SeqNum: tb.Uint32(uint32(*conn.RemoteSeqNum()))} + dut.Send(remoteFD, sampleData, 0) + originalID, err := recvTCPSegment(&conn, expectTCP, samplePayload) + if err != nil { + t.Fatalf("failed to receive TCP segment: %s", err) + } + + retransmitID, err := recvTCPSegment(&conn, expectTCP, samplePayload) + if err != nil { + t.Fatalf("failed to receive retransmitted TCP segment: %s", err) + } + if originalID == retransmitID { + t.Fatalf("unexpectedly got retransmitted TCP segment with same IPv4 ID field=%d", originalID) + } +} -- cgit v1.2.3 From ccf69bdd7e05a4e5f404fbef89a7f49f218645e2 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Fri, 29 May 2020 12:27:15 -0700 Subject: Implement IN_EXCL_UNLINK inotify option in vfs2. Limited to tmpfs. Inotify support in other filesystem implementations to follow. Updates #1479 PiperOrigin-RevId: 313828648 --- pkg/sentry/fsimpl/ext/dentry.go | 2 +- pkg/sentry/fsimpl/gofer/gofer.go | 2 +- pkg/sentry/fsimpl/kernfs/kernfs.go | 2 +- pkg/sentry/fsimpl/tmpfs/directory.go | 3 +- pkg/sentry/fsimpl/tmpfs/filesystem.go | 14 +-- pkg/sentry/fsimpl/tmpfs/tmpfs.go | 18 +-- pkg/sentry/kernel/fd_table.go | 2 +- pkg/sentry/syscalls/linux/vfs2/read_write.go | 24 ++-- pkg/sentry/vfs/anonfs.go | 2 +- pkg/sentry/vfs/dentry.go | 6 +- pkg/sentry/vfs/inotify.go | 38 +++++-- pkg/sentry/vfs/vfs.go | 2 +- test/syscalls/linux/inotify.cc | 164 +++++++++++++++++++++++++++ 13 files changed, 235 insertions(+), 44 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fsimpl/ext/dentry.go b/pkg/sentry/fsimpl/ext/dentry.go index 4d0deaf03..6bd1a9fc6 100644 --- a/pkg/sentry/fsimpl/ext/dentry.go +++ b/pkg/sentry/fsimpl/ext/dentry.go @@ -64,7 +64,7 @@ func (d *dentry) DecRef() { // InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. // // TODO(gvisor.dev/issue/1479): Implement inotify. -func (d *dentry) InotifyWithParent(events uint32, cookie uint32) {} +func (d *dentry) InotifyWithParent(events uint32, cookie uint32, et vfs.EventType) {} // Watches implements vfs.DentryImpl.Watches. // diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 850482a19..3f3bd56f0 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -1042,7 +1042,7 @@ func (d *dentry) decRefLocked() { // InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. // // TODO(gvisor.dev/issue/1479): Implement inotify. -func (d *dentry) InotifyWithParent(events uint32, cookie uint32) {} +func (d *dentry) InotifyWithParent(events uint32, cookie uint32, et vfs.EventType) {} // Watches implements vfs.DentryImpl.Watches. // diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go index 3f1220791..bbee8ccda 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs.go @@ -228,7 +228,7 @@ func (d *Dentry) destroy() { // InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. // // TODO(gvisor.dev/issue/1479): Implement inotify. -func (d *Dentry) InotifyWithParent(events uint32, cookie uint32) {} +func (d *Dentry) InotifyWithParent(events uint32, cookie uint32, et vfs.EventType) {} // Watches implements vfs.DentryImpl.Watches. // diff --git a/pkg/sentry/fsimpl/tmpfs/directory.go b/pkg/sentry/fsimpl/tmpfs/directory.go index 8bc475f88..70387cb9c 100644 --- a/pkg/sentry/fsimpl/tmpfs/directory.go +++ b/pkg/sentry/fsimpl/tmpfs/directory.go @@ -79,6 +79,7 @@ func (dir *directory) removeChildLocked(child *dentry) { dir.iterMu.Lock() dir.childList.Remove(child) dir.iterMu.Unlock() + child.unlinked = true } type directoryFD struct { @@ -112,7 +113,7 @@ func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallba dir.iterMu.Lock() defer dir.iterMu.Unlock() - fd.dentry().InotifyWithParent(linux.IN_ACCESS, 0) + fd.dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) fd.inode().touchAtime(fd.vfsfd.Mount()) if fd.off == 0 { diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index b4159f904..183eb975c 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -182,7 +182,7 @@ func (fs *filesystem) doCreateAt(rp *vfs.ResolvingPath, dir bool, create func(pa if dir { ev |= linux.IN_ISDIR } - parentDir.inode.watches.Notify(name, uint32(ev), 0) + parentDir.inode.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent) parentDir.inode.touchCMtime() return nil } @@ -247,7 +247,7 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. return syserror.EMLINK } d.inode.incLinksLocked() - d.inode.watches.Notify("", linux.IN_ATTRIB, 0) + d.inode.watches.Notify("", linux.IN_ATTRIB, 0, vfs.InodeEvent) parentDir.insertChildLocked(fs.newDentry(d.inode), name) return nil }) @@ -361,7 +361,7 @@ afterTrailingSymlink: if err != nil { return nil, err } - parentDir.inode.watches.Notify(name, linux.IN_CREATE, 0) + parentDir.inode.watches.Notify(name, linux.IN_CREATE, 0, vfs.PathEvent) parentDir.inode.touchCMtime() return fd, nil } @@ -613,7 +613,7 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error return err } parentDir.removeChildLocked(child) - parentDir.inode.watches.Notify(name, linux.IN_DELETE|linux.IN_ISDIR, 0) + parentDir.inode.watches.Notify(name, linux.IN_DELETE|linux.IN_ISDIR, 0, vfs.InodeEvent) // Remove links for child, child/., and child/.. child.inode.decLinksLocked() child.inode.decLinksLocked() @@ -636,7 +636,7 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts } if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 { - d.InotifyWithParent(ev, 0) + d.InotifyWithParent(ev, 0, vfs.InodeEvent) } return nil } @@ -784,7 +784,7 @@ func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt return err } - d.InotifyWithParent(linux.IN_ATTRIB, 0) + d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent) return nil } @@ -800,7 +800,7 @@ func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, return err } - d.InotifyWithParent(linux.IN_ATTRIB, 0) + d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent) return nil } diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 1d83b6840..f0e098702 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -163,6 +163,11 @@ type dentry struct { // filesystem.mu. name string + // unlinked indicates whether this dentry has been unlinked from its parent. + // It is only set to true on an unlink operation, and never set from true to + // false. unlinked is protected by filesystem.mu. + unlinked bool + // dentryEntry (ugh) links dentries into their parent directory.childList. dentryEntry @@ -202,7 +207,7 @@ func (d *dentry) DecRef() { } // InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. -func (d *dentry) InotifyWithParent(events uint32, cookie uint32) { +func (d *dentry) InotifyWithParent(events uint32, cookie uint32, et vfs.EventType) { if d.inode.isDir() { events |= linux.IN_ISDIR } @@ -211,9 +216,9 @@ func (d *dentry) InotifyWithParent(events uint32, cookie uint32) { if d.parent != nil { // Note that d.parent or d.name may be stale if there is a concurrent // rename operation. Inotify does not provide consistency guarantees. - d.parent.inode.watches.Notify(d.name, events, cookie) + d.parent.inode.watches.NotifyWithExclusions(d.name, events, cookie, et, d.unlinked) } - d.inode.watches.Notify("", events, cookie) + d.inode.watches.Notify("", events, cookie, et) } // Watches implements vfs.DentryImpl.Watches. @@ -676,9 +681,8 @@ func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) return err } - // Generate inotify events. if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 { - d.InotifyWithParent(ev, 0) + d.InotifyWithParent(ev, 0, vfs.InodeEvent) } return nil } @@ -701,7 +705,7 @@ func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOption } // Generate inotify events. - d.InotifyWithParent(linux.IN_ATTRIB, 0) + d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent) return nil } @@ -713,7 +717,7 @@ func (fd *fileDescription) Removexattr(ctx context.Context, name string) error { } // Generate inotify events. - d.InotifyWithParent(linux.IN_ATTRIB, 0) + d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent) return nil } diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go index ef73e1169..dbfcef0fa 100644 --- a/pkg/sentry/kernel/fd_table.go +++ b/pkg/sentry/kernel/fd_table.go @@ -158,7 +158,7 @@ func (f *FDTable) dropVFS2(file *vfs.FileDescription) { if file.IsWritable() { ev = linux.IN_CLOSE_WRITE } - file.Dentry().InotifyWithParent(ev, 0) + file.Dentry().InotifyWithParent(ev, 0, vfs.PathEvent) // Drop the table reference. file.DecRef() diff --git a/pkg/sentry/syscalls/linux/vfs2/read_write.go b/pkg/sentry/syscalls/linux/vfs2/read_write.go index 92b5631a3..7f9debd4a 100644 --- a/pkg/sentry/syscalls/linux/vfs2/read_write.go +++ b/pkg/sentry/syscalls/linux/vfs2/read_write.go @@ -94,7 +94,7 @@ func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opt n, err := file.Read(t, dst, opts) if err != syserror.ErrWouldBlock { if n > 0 { - file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0) + file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) } return n, err } @@ -102,7 +102,7 @@ func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opt allowBlock, deadline, hasDeadline := blockPolicy(t, file) if !allowBlock { if n > 0 { - file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0) + file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) } return n, err } @@ -135,7 +135,7 @@ func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opt file.EventUnregister(&w) if total > 0 { - file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0) + file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) } return total, err } @@ -258,7 +258,7 @@ func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, of n, err := file.PRead(t, dst, offset, opts) if err != syserror.ErrWouldBlock { if n > 0 { - file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0) + file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) } return n, err } @@ -266,7 +266,7 @@ func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, of allowBlock, deadline, hasDeadline := blockPolicy(t, file) if !allowBlock { if n > 0 { - file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0) + file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) } return n, err } @@ -299,7 +299,7 @@ func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, of file.EventUnregister(&w) if total > 0 { - file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0) + file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) } return total, err } @@ -364,7 +364,7 @@ func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, op n, err := file.Write(t, src, opts) if err != syserror.ErrWouldBlock { if n > 0 { - file.Dentry().InotifyWithParent(linux.IN_MODIFY, 0) + file.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent) } return n, err } @@ -372,7 +372,7 @@ func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, op allowBlock, deadline, hasDeadline := blockPolicy(t, file) if !allowBlock { if n > 0 { - file.Dentry().InotifyWithParent(linux.IN_MODIFY, 0) + file.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent) } return n, err } @@ -405,7 +405,7 @@ func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, op file.EventUnregister(&w) if total > 0 { - file.Dentry().InotifyWithParent(linux.IN_MODIFY, 0) + file.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent) } return total, err } @@ -528,7 +528,7 @@ func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, o n, err := file.PWrite(t, src, offset, opts) if err != syserror.ErrWouldBlock { if n > 0 { - file.Dentry().InotifyWithParent(linux.IN_MODIFY, 0) + file.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent) } return n, err } @@ -536,7 +536,7 @@ func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, o allowBlock, deadline, hasDeadline := blockPolicy(t, file) if !allowBlock { if n > 0 { - file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0) + file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) } return n, err } @@ -569,7 +569,7 @@ func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, o file.EventUnregister(&w) if total > 0 { - file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0) + file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) } return total, err } diff --git a/pkg/sentry/vfs/anonfs.go b/pkg/sentry/vfs/anonfs.go index 55a3d54cc..b7c6b60b8 100644 --- a/pkg/sentry/vfs/anonfs.go +++ b/pkg/sentry/vfs/anonfs.go @@ -301,7 +301,7 @@ func (d *anonDentry) DecRef() { // InotifyWithParent implements DentryImpl.InotifyWithParent. // // TODO(gvisor.dev/issue/1479): Implement inotify. -func (d *anonDentry) InotifyWithParent(events uint32, cookie uint32) {} +func (d *anonDentry) InotifyWithParent(events uint32, cookie uint32, et EventType) {} // Watches implements DentryImpl.Watches. // diff --git a/pkg/sentry/vfs/dentry.go b/pkg/sentry/vfs/dentry.go index d61b9e09b..24af13eb1 100644 --- a/pkg/sentry/vfs/dentry.go +++ b/pkg/sentry/vfs/dentry.go @@ -113,7 +113,7 @@ type DentryImpl interface { // // Note that the events may not actually propagate up to the user, depending // on the event masks. - InotifyWithParent(events uint32, cookie uint32) + InotifyWithParent(events uint32, cookie uint32, et EventType) // Watches returns the set of inotify watches for the file corresponding to // the Dentry. Dentries that are hard links to the same underlying file @@ -151,8 +151,8 @@ func (d *Dentry) isMounted() bool { // InotifyWithParent notifies all watches on the inodes for this dentry and // its parent of events. -func (d *Dentry) InotifyWithParent(events uint32, cookie uint32) { - d.impl.InotifyWithParent(events, cookie) +func (d *Dentry) InotifyWithParent(events uint32, cookie uint32, et EventType) { + d.impl.InotifyWithParent(events, cookie, et) } // Watches returns the set of inotify watches associated with d. diff --git a/pkg/sentry/vfs/inotify.go b/pkg/sentry/vfs/inotify.go index 1d28ccb46..05a3051a4 100644 --- a/pkg/sentry/vfs/inotify.go +++ b/pkg/sentry/vfs/inotify.go @@ -33,6 +33,19 @@ import ( // must be a power 2 for rounding below. const inotifyEventBaseSize = 16 +// EventType defines different kinds of inotfiy events. +// +// The way events are labelled appears somewhat arbitrary, but they must match +// Linux so that IN_EXCL_UNLINK behaves as it does in Linux. +type EventType uint8 + +// PathEvent and InodeEvent correspond to FSNOTIFY_EVENT_PATH and +// FSNOTIFY_EVENT_INODE in Linux. +const ( + PathEvent EventType = iota + InodeEvent EventType = iota +) + // Inotify represents an inotify instance created by inotify_init(2) or // inotify_init1(2). Inotify implements FileDescriptionImpl. // @@ -419,13 +432,22 @@ func (w *Watches) Remove(id uint64) { } // Notify queues a new event with all watches in this set. -func (w *Watches) Notify(name string, events, cookie uint32) { +func (w *Watches) Notify(name string, events, cookie uint32, et EventType) { + w.NotifyWithExclusions(name, events, cookie, et, false) +} + +// NotifyWithExclusions queues a new event with watches in this set. Watches +// with IN_EXCL_UNLINK are skipped if the event is coming from a child that +// has been unlinked. +func (w *Watches) NotifyWithExclusions(name string, events, cookie uint32, et EventType, unlinked bool) { // N.B. We don't defer the unlocks because Notify is in the hot path of // all IO operations, and the defer costs too much for small IO // operations. w.mu.RLock() for _, watch := range w.ws { - // TODO(gvisor.dev/issue/1479): Skip for IN_EXCL_UNLINK cases. + if unlinked && watch.ExcludeUnlinkedChildren() && et == PathEvent { + continue + } watch.Notify(name, events, cookie) } w.mu.RUnlock() @@ -434,7 +456,7 @@ func (w *Watches) Notify(name string, events, cookie uint32) { // HandleDeletion is called when the watch target is destroyed to emit // the appropriate events. func (w *Watches) HandleDeletion() { - w.Notify("", linux.IN_DELETE_SELF, 0) + w.Notify("", linux.IN_DELETE_SELF, 0, InodeEvent) // TODO(gvisor.dev/issue/1479): This doesn't work because maps are not copied // by value. Ideally, we wouldn't have this circular locking so we can just @@ -655,8 +677,8 @@ func InotifyEventFromStatMask(mask uint32) uint32 { // InotifyRemoveChild sends the appriopriate notifications to the watch sets of // the child being removed and its parent. func InotifyRemoveChild(self, parent *Watches, name string) { - self.Notify("", linux.IN_ATTRIB, 0) - parent.Notify(name, linux.IN_DELETE, 0) + self.Notify("", linux.IN_ATTRIB, 0, InodeEvent) + parent.Notify(name, linux.IN_DELETE, 0, InodeEvent) // TODO(gvisor.dev/issue/1479): implement IN_EXCL_UNLINK. } @@ -668,8 +690,8 @@ func InotifyRename(ctx context.Context, renamed, oldParent, newParent *Watches, dirEv = linux.IN_ISDIR } cookie := uniqueid.InotifyCookie(ctx) - oldParent.Notify(oldName, dirEv|linux.IN_MOVED_FROM, cookie) - newParent.Notify(newName, dirEv|linux.IN_MOVED_TO, cookie) + oldParent.Notify(oldName, dirEv|linux.IN_MOVED_FROM, cookie, InodeEvent) + newParent.Notify(newName, dirEv|linux.IN_MOVED_TO, cookie, InodeEvent) // Somewhat surprisingly, self move events do not have a cookie. - renamed.Notify("", linux.IN_MOVE_SELF, 0) + renamed.Notify("", linux.IN_MOVE_SELF, 0, InodeEvent) } diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go index be6f21dba..52643a7c5 100644 --- a/pkg/sentry/vfs/vfs.go +++ b/pkg/sentry/vfs/vfs.go @@ -422,7 +422,7 @@ func (vfs *VirtualFilesystem) OpenAt(ctx context.Context, creds *auth.Credential } } - fd.Dentry().InotifyWithParent(linux.IN_OPEN, 0) + fd.Dentry().InotifyWithParent(linux.IN_OPEN, 0, PathEvent) return fd, nil } if !rp.handleError(err) { diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc index e4565467b..2306d9cab 100644 --- a/test/syscalls/linux/inotify.cc +++ b/test/syscalls/linux/inotify.cc @@ -1778,6 +1778,170 @@ TEST(Inotify, IncludeUnlinkedFile_NoRandomSave) { })); } +// Watches created with IN_EXCL_UNLINK will stop emitting events on fds for +// children that have already been unlinked. +// +// We need to disable S/R because there are filesystems where we cannot re-open +// fds to an unlinked file across S/R, e.g. gofer-backed filesytems. +TEST(Inotify, ExcludeUnlink_NoRandomSave) { + const DisableSave ds; + // TODO(gvisor.dev/issue/1624): This test fails on VFS1. + SKIP_IF(IsRunningWithVFS1()); + + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path())); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + + const FileDescriptor inotify_fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch( + inotify_fd.get(), dir.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK)); + + // Unlink the child, which should cause further operations on the open file + // descriptor to be ignored. + ASSERT_THAT(unlink(file.path().c_str()), SyscallSucceeds()); + int val = 0; + ASSERT_THAT(write(fd.get(), &val, sizeof(val)), SyscallSucceeds()); + ASSERT_THAT(read(fd.get(), &val, sizeof(val)), SyscallSucceeds()); + const std::vector events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({Event(IN_DELETE, wd, Basename(file.path()))})); +} + +// We need to disable S/R because there are filesystems where we cannot re-open +// fds to an unlinked file across S/R, e.g. gofer-backed filesytems. +TEST(Inotify, ExcludeUnlinkDirectory_NoRandomSave) { + const DisableSave ds; + + const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + TempPath dir = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(parent.path())); + std::string dirPath = dir.path(); + const FileDescriptor inotify_fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(dirPath.c_str(), O_RDONLY | O_DIRECTORY)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch( + inotify_fd.get(), parent.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK)); + + // Unlink the dir, and then close the open fd. + ASSERT_THAT(rmdir(dirPath.c_str()), SyscallSucceeds()); + dir.reset(); + + const std::vector events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + // No close event should appear. + ASSERT_THAT(events, + Are({Event(IN_DELETE | IN_ISDIR, wd, Basename(dirPath))})); +} + +// If "dir/child" and "dir/child2" are links to the same file, and "dir/child" +// is unlinked, a watch on "dir" with IN_EXCL_UNLINK will exclude future events +// for fds on "dir/child" but not "dir/child2". +// +// We need to disable S/R because there are filesystems where we cannot re-open +// fds to an unlinked file across S/R, e.g. gofer-backed filesytems. +TEST(Inotify, ExcludeUnlinkMultipleChildren_NoRandomSave) { + const DisableSave ds; + // TODO(gvisor.dev/issue/1624): This test fails on VFS1. + SKIP_IF(IsRunningWithVFS1()); + + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path())); + std::string path1 = file.path(); + std::string path2 = NewTempAbsPathInDir(dir.path()); + + const int rc = link(path1.c_str(), path2.c_str()); + // NOTE(b/34861058): link(2) is only supported on tmpfs in the sandbox. + SKIP_IF(IsRunningOnGvisor() && rc != 0 && + (errno == EPERM || errno == ENOENT)); + ASSERT_THAT(rc, SyscallSucceeds()); + const FileDescriptor fd1 = + ASSERT_NO_ERRNO_AND_VALUE(Open(path1.c_str(), O_RDWR)); + const FileDescriptor fd2 = + ASSERT_NO_ERRNO_AND_VALUE(Open(path2.c_str(), O_RDWR)); + + const FileDescriptor inotify_fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch( + inotify_fd.get(), dir.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK)); + + // After unlinking path1, only events on the fd for path2 should be generated. + ASSERT_THAT(unlink(path1.c_str()), SyscallSucceeds()); + ASSERT_THAT(write(fd1.get(), "x", 1), SyscallSucceeds()); + ASSERT_THAT(write(fd2.get(), "x", 1), SyscallSucceeds()); + + const std::vector events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({ + Event(IN_DELETE, wd, Basename(path1)), + Event(IN_MODIFY, wd, Basename(path2)), + })); +} + +// On native Linux, actions of data type FSNOTIFY_EVENT_INODE are not affected +// by IN_EXCL_UNLINK (see +// fs/notify/inotify/inotify_fsnotify.c:inotify_handle_event). Inode-level +// events include changes to metadata and extended attributes. +// +// We need to disable S/R because there are filesystems where we cannot re-open +// fds to an unlinked file across S/R, e.g. gofer-backed filesytems. +TEST(Inotify, ExcludeUnlinkInodeEvents_NoRandomSave) { + const DisableSave ds; + + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path())); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path().c_str(), O_RDWR)); + const FileDescriptor inotify_fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch( + inotify_fd.get(), dir.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK)); + + // NOTE(b/157163751): Create another link before unlinking. This is needed for + // the gofer filesystem in gVisor, where open fds will not work once the link + // count hits zero. In VFS2, we end up skipping the gofer test anyway, because + // hard links are not supported for gofer fs. + if (IsRunningOnGvisor()) { + std::string link_path = NewTempAbsPath(); + const int rc = link(file.path().c_str(), link_path.c_str()); + // NOTE(b/34861058): link(2) is only supported on tmpfs in the sandbox. + SKIP_IF(rc != 0 && (errno == EPERM || errno == ENOENT)); + ASSERT_THAT(rc, SyscallSucceeds()); + } + + // Even after unlinking, inode-level operations will trigger events regardless + // of IN_EXCL_UNLINK. + ASSERT_THAT(unlink(file.path().c_str()), SyscallSucceeds()); + + // Perform various actions on fd. + ASSERT_THAT(ftruncate(fd.get(), 12345), SyscallSucceeds()); + std::vector events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({ + Event(IN_DELETE, wd, Basename(file.path())), + Event(IN_MODIFY, wd, Basename(file.path())), + })); + + struct timeval times[2] = {{1, 0}, {2, 0}}; + ASSERT_THAT(futimes(fd.get(), times), SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd, Basename(file.path()))})); + + // S/R is disabled on this entire test due to behavior with unlink; it must + // also be disabled after this point because of fchmod. + ASSERT_THAT(fchmod(fd.get(), 0777), SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd, Basename(file.path()))})); +} + } // namespace } // namespace testing } // namespace gvisor -- cgit v1.2.3 From 089c88f2e87fb14cead02caea7f9dba0a5957395 Mon Sep 17 00:00:00 2001 From: Mithun Iyer Date: Fri, 29 May 2020 12:27:55 -0700 Subject: Move TCP to CLOSED from SYN-RCVD on RST. RST handling is broken when the TCP state transitions from SYN-SENT to SYN-RCVD in case of simultaneous open. An incoming RST should trigger cleanup of the endpoint. RFC793, section 3.9, page 70. Fixes #2814 PiperOrigin-RevId: 313828777 --- pkg/tcpip/transport/tcp/connect.go | 1 + test/packetimpact/testbench/dut.go | 4 +- test/packetimpact/tests/BUILD | 20 ++++++ test/packetimpact/tests/tcp_synrcvd_reset_test.go | 52 ++++++++++++++ test/packetimpact/tests/tcp_synsent_reset_test.go | 88 +++++++++++++++++++++++ 5 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 test/packetimpact/tests/tcp_synrcvd_reset_test.go create mode 100644 test/packetimpact/tests/tcp_synsent_reset_test.go (limited to 'test') diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index a7e088d4e..e4a06c9e1 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -1347,6 +1347,7 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{ e.setEndpointState(StateError) e.HardError = err + e.workerCleanup = true // Lock released below. epilogue() return err diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go index b919a3c2e..f3e326adf 100644 --- a/test/packetimpact/testbench/dut.go +++ b/test/packetimpact/testbench/dut.go @@ -241,7 +241,9 @@ func (dut *DUT) Connect(fd int32, sa unix.Sockaddr) { ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() ret, err := dut.ConnectWithErrno(ctx, fd, sa) - if ret != 0 { + // Ignore 'operation in progress' error that can be returned when the socket + // is non-blocking. + if err != syscall.Errno(unix.EINPROGRESS) && ret != 0 { dut.t.Fatalf("failed to connect socket: %s", err) } } diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index e2a320159..9cd695200 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -169,6 +169,26 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "tcp_synsent_reset", + srcs = ["tcp_synsent_reset_test.go"], + deps = [ + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + +packetimpact_go_test( + name = "tcp_synrcvd_reset", + srcs = ["tcp_synrcvd_reset_test.go"], + deps = [ + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + packetimpact_go_test( name = "icmpv6_param_problem", srcs = ["icmpv6_param_problem_test.go"], diff --git a/test/packetimpact/tests/tcp_synrcvd_reset_test.go b/test/packetimpact/tests/tcp_synrcvd_reset_test.go new file mode 100644 index 000000000..e6ba84cab --- /dev/null +++ b/test/packetimpact/tests/tcp_synrcvd_reset_test.go @@ -0,0 +1,52 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_syn_reset_test + +import ( + "flag" + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func init() { + tb.RegisterFlags(flag.CommandLine) +} + +// TestTCPSynRcvdReset tests transition from SYN-RCVD to CLOSED. +func TestTCPSynRcvdReset(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(listenFD) + conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + defer conn.Close() + + // Expect dut connection to have transitioned to SYN-RCVD state. + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn)}) + if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil { + t.Fatalf("expected SYN-ACK %s", err) + } + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}) + // Expect the connection to have transitioned SYN-RCVD to CLOSED. + // TODO(gvisor.dev/issue/478): Check for TCP_INFO on the dut side. + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) + if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil { + t.Fatalf("expected a TCP RST %s", err) + } +} diff --git a/test/packetimpact/tests/tcp_synsent_reset_test.go b/test/packetimpact/tests/tcp_synsent_reset_test.go new file mode 100644 index 000000000..6898a2239 --- /dev/null +++ b/test/packetimpact/tests/tcp_synsent_reset_test.go @@ -0,0 +1,88 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_synsent_reset_test + +import ( + "flag" + "net" + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func init() { + tb.RegisterFlags(flag.CommandLine) +} + +// dutSynSentState sets up the dut connection in SYN-SENT state. +func dutSynSentState(t *testing.T) (*tb.DUT, *tb.TCPIPv4, uint16, uint16) { + dut := tb.NewDUT(t) + + clientFD, clientPort := dut.CreateBoundSocket(unix.SOCK_STREAM|unix.SOCK_NONBLOCK, unix.IPPROTO_TCP, net.ParseIP(tb.RemoteIPv4)) + port := uint16(9001) + conn := tb.NewTCPIPv4(t, tb.TCP{SrcPort: &port, DstPort: &clientPort}, tb.TCP{SrcPort: &clientPort, DstPort: &port}) + + sa := unix.SockaddrInet4{Port: int(port)} + copy(sa.Addr[:], net.IP(net.ParseIP(tb.LocalIPv4)).To4()) + // Bring the dut to SYN-SENT state with a non-blocking connect. + dut.Connect(clientFD, &sa) + if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn)}, nil, time.Second); err != nil { + t.Fatalf("expected SYN\n") + } + + return &dut, &conn, port, clientPort +} + +// TestTCPSynSentReset tests RFC793, p67: SYN-SENT to CLOSED transition. +func TestTCPSynSentReset(t *testing.T) { + dut, conn, _, _ := dutSynSentState(t) + defer conn.Close() + defer dut.TearDown() + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst | header.TCPFlagAck)}) + // Expect the connection to have closed. + // TODO(gvisor.dev/issue/478): Check for TCP_INFO on the dut side. + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) + if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil { + t.Fatalf("expected a TCP RST") + } +} + +// TestTCPSynSentRcvdReset tests RFC793, p70, SYN-SENT to SYN-RCVD to CLOSED +// transitions. +func TestTCPSynSentRcvdReset(t *testing.T) { + dut, c, remotePort, clientPort := dutSynSentState(t) + defer dut.TearDown() + defer c.Close() + + conn := tb.NewTCPIPv4(t, tb.TCP{SrcPort: &remotePort, DstPort: &clientPort}, tb.TCP{SrcPort: &clientPort, DstPort: &remotePort}) + defer conn.Close() + // Initiate new SYN connection with the same port pair + // (simultaneous open case), expect the dut connection to move to + // SYN-RCVD state + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn)}) + if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil { + t.Fatalf("expected SYN-ACK %s\n", err) + } + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}) + // Expect the connection to have transitioned SYN-RCVD to CLOSED. + // TODO(gvisor.dev/issue/478): Check for TCP_INFO on the dut side. + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) + if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil { + t.Fatalf("expected a TCP RST") + } +} -- cgit v1.2.3 From 790811f75783cd3cb82b6aba5e8152129b2d1d4d Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Fri, 29 May 2020 15:37:34 -0700 Subject: Fix copied comment mistakes. PiperOrigin-RevId: 313862843 --- test/iptables/filter_input.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index 14e385f5a..872021358 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -670,7 +670,7 @@ func (FilterInputInvertDestination) LocalAction(ip net.IP) error { return sendUDPLoop(ip, acceptPort, sendloopDuration) } -// FilterInputSource verifies that we can filter packets via `-d +// FilterInputSource verifies that we can filter packets via `-s // `. type FilterInputSource struct{} @@ -699,7 +699,7 @@ func (FilterInputSource) LocalAction(ip net.IP) error { return sendUDPLoop(ip, acceptPort, sendloopDuration) } -// FilterInputInvertSource verifies that we can filter packets via `! -d +// FilterInputInvertSource verifies that we can filter packets via `! -s // `. type FilterInputInvertSource struct{} -- cgit v1.2.3 From a9b47390c821942d60784e308f681f213645049c Mon Sep 17 00:00:00 2001 From: Zeling Feng Date: Fri, 29 May 2020 17:22:56 -0700 Subject: Test TCP should queue RECEIVE request in SYN-SENT PiperOrigin-RevId: 313878910 --- test/packetimpact/dut/posix_server.cc | 8 ++ test/packetimpact/proto/posix_server.proto | 13 ++++ test/packetimpact/testbench/connections.go | 20 +++-- test/packetimpact/testbench/dut.go | 42 +++++++++++ test/packetimpact/tests/BUILD | 12 +++ .../tests/tcp_queue_receive_in_syn_sent_test.go | 87 ++++++++++++++++++++++ 6 files changed, 175 insertions(+), 7 deletions(-) create mode 100644 test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go (limited to 'test') diff --git a/test/packetimpact/dut/posix_server.cc b/test/packetimpact/dut/posix_server.cc index dc3024f44..a34307bb2 100644 --- a/test/packetimpact/dut/posix_server.cc +++ b/test/packetimpact/dut/posix_server.cc @@ -158,6 +158,14 @@ class PosixImpl final : public posix_server::Posix::Service { return ::grpc::Status::OK; } + ::grpc::Status Fcntl(grpc_impl::ServerContext *context, + const ::posix_server::FcntlRequest *request, + ::posix_server::FcntlResponse *response) override { + response->set_ret(::fcntl(request->fd(), request->cmd(), request->arg())); + response->set_errno_(errno); + return ::grpc::Status::OK; + } + ::grpc::Status GetSockName( grpc_impl::ServerContext *context, const ::posix_server::GetSockNameRequest *request, diff --git a/test/packetimpact/proto/posix_server.proto b/test/packetimpact/proto/posix_server.proto index 9dca563f1..77da0fb3a 100644 --- a/test/packetimpact/proto/posix_server.proto +++ b/test/packetimpact/proto/posix_server.proto @@ -91,6 +91,17 @@ message ConnectResponse { int32 errno_ = 2; // "errno" may fail to compile in c++. } +message FcntlRequest { + int32 fd = 1; + int32 cmd = 2; + int32 arg = 3; +} + +message FcntlResponse { + int32 ret = 1; + int32 errno_ = 2; +} + message GetSockNameRequest { int32 sockfd = 1; } @@ -198,6 +209,8 @@ service Posix { rpc Close(CloseRequest) returns (CloseResponse); // Call connect() on the DUT. rpc Connect(ConnectRequest) returns (ConnectResponse); + // Call fcntl() on the DUT. + rpc Fcntl(FcntlRequest) returns (FcntlResponse); // Call getsockname() on the DUT. rpc GetSockName(GetSockNameRequest) returns (GetSockNameResponse); // Call getsockopt() on the DUT. diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index bf104e5ca..fb32964e9 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -266,14 +266,14 @@ func SeqNumValue(v seqnum.Value) *seqnum.Value { } // newTCPState creates a new TCPState. -func newTCPState(domain int, out, in TCP) (*tcpState, error) { +func newTCPState(domain int, out, in TCP) (*tcpState, unix.Sockaddr, error) { portPickerFD, localAddr, err := pickPort(domain, unix.SOCK_STREAM) if err != nil { - return nil, err + return nil, nil, err } localPort, err := portFromSockaddr(localAddr) if err != nil { - return nil, err + return nil, nil, err } s := tcpState{ out: TCP{SrcPort: &localPort}, @@ -283,12 +283,12 @@ func newTCPState(domain int, out, in TCP) (*tcpState, error) { finSent: false, } if err := s.out.merge(&out); err != nil { - return nil, err + return nil, nil, err } if err := s.in.merge(&in); err != nil { - return nil, err + return nil, nil, err } - return &s, nil + return &s, localAddr, nil } func (s *tcpState) outgoing() Layer { @@ -606,7 +606,7 @@ func NewTCPIPv4(t *testing.T, outgoingTCP, incomingTCP TCP) TCPIPv4 { if err != nil { t.Fatalf("can't make ipv4State: %s", err) } - tcpState, err := newTCPState(unix.AF_INET, outgoingTCP, incomingTCP) + tcpState, localAddr, err := newTCPState(unix.AF_INET, outgoingTCP, incomingTCP) if err != nil { t.Fatalf("can't make tcpState: %s", err) } @@ -623,6 +623,7 @@ func NewTCPIPv4(t *testing.T, outgoingTCP, incomingTCP TCP) TCPIPv4 { layerStates: []layerState{etherState, ipv4State, tcpState}, injector: injector, sniffer: sniffer, + localAddr: localAddr, t: t, } } @@ -703,6 +704,11 @@ func (conn *TCPIPv4) SynAck() *TCP { return conn.state().synAck } +// LocalAddr gets the local socket address of this connection. +func (conn *TCPIPv4) LocalAddr() unix.Sockaddr { + return conn.localAddr +} + // IPv6Conn maintains the state for all the layers in a IPv6 connection. type IPv6Conn Connection diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go index f3e326adf..2a2afecb5 100644 --- a/test/packetimpact/testbench/dut.go +++ b/test/packetimpact/testbench/dut.go @@ -262,6 +262,35 @@ func (dut *DUT) ConnectWithErrno(ctx context.Context, fd int32, sa unix.Sockaddr return resp.GetRet(), syscall.Errno(resp.GetErrno_()) } +// Fcntl calls fcntl on the DUT and causes a fatal test failure if it +// doesn't succeed. If more control over the timeout or error handling is +// needed, use FcntlWithErrno. +func (dut *DUT) Fcntl(fd, cmd, arg int32) int32 { + dut.t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) + defer cancel() + ret, err := dut.FcntlWithErrno(ctx, fd, cmd, arg) + if ret == -1 { + dut.t.Fatalf("failed to Fcntl: ret=%d, errno=%s", ret, err) + } + return ret +} + +// FcntlWithErrno calls fcntl on the DUT. +func (dut *DUT) FcntlWithErrno(ctx context.Context, fd, cmd, arg int32) (int32, error) { + dut.t.Helper() + req := pb.FcntlRequest{ + Fd: fd, + Cmd: cmd, + Arg: arg, + } + resp, err := dut.posixServer.Fcntl(ctx, &req) + if err != nil { + dut.t.Fatalf("failed to call Fcntl: %s", err) + } + return resp.GetRet(), syscall.Errno(resp.GetErrno_()) +} + // GetSockName calls getsockname on the DUT and causes a fatal test failure if // it doesn't succeed. If more control over the timeout or error handling is // needed, use GetSockNameWithErrno. @@ -478,6 +507,19 @@ func (dut *DUT) SendToWithErrno(ctx context.Context, sockfd int32, buf []byte, f return resp.GetRet(), syscall.Errno(resp.GetErrno_()) } +// SetNonBlocking will set O_NONBLOCK flag for fd if nonblocking +// is true, otherwise it will clear the flag. +func (dut *DUT) SetNonBlocking(fd int32, nonblocking bool) { + dut.t.Helper() + flags := dut.Fcntl(fd, unix.F_GETFL, 0) + if nonblocking { + flags |= unix.O_NONBLOCK + } else { + flags &= ^unix.O_NONBLOCK + } + dut.Fcntl(fd, unix.F_SETFL, flags) +} + func (dut *DUT) setSockOpt(ctx context.Context, sockfd, level, optname int32, optval *pb.SockOptVal) (int32, error) { dut.t.Helper() req := pb.SetSockOptRequest{ diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 9cd695200..1598c61e9 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -169,6 +169,18 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "tcp_queue_receive_in_syn_sent", + srcs = ["tcp_queue_receive_in_syn_sent_test.go"], + # TODO(b/157658105): Fix netstack then remove the line below. + expect_netstack_failure = True, + deps = [ + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + packetimpact_go_test( name = "tcp_synsent_reset", srcs = ["tcp_synsent_reset_test.go"], diff --git a/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go b/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go new file mode 100644 index 000000000..5cc93fa24 --- /dev/null +++ b/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go @@ -0,0 +1,87 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_queue_receive_in_syn_sent_test + +import ( + "bytes" + "context" + "encoding/hex" + "errors" + "flag" + "net" + "sync" + "syscall" + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func init() { + tb.RegisterFlags(flag.CommandLine) +} + +func TestQueueReceiveInSynSent(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + + socket, remotePort := dut.CreateBoundSocket(unix.SOCK_STREAM, unix.IPPROTO_TCP, net.ParseIP(tb.RemoteIPv4)) + conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + defer conn.Close() + + sampleData := []byte("Sample Data") + + dut.SetNonBlocking(socket, true) + if _, err := dut.ConnectWithErrno(context.Background(), socket, conn.LocalAddr()); !errors.Is(err, syscall.EINPROGRESS) { + t.Fatalf("failed to bring DUT to SYN-SENT, got: %s, want EINPROGRESS", err) + } + if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn)}, time.Second); err != nil { + t.Fatalf("expected a SYN from DUT, but got none: %s", err) + } + + // Issue RECEIVE call in SYN-SENT, this should be queued for process until the connection + // is established. + dut.SetNonBlocking(socket, false) + var wg sync.WaitGroup + defer wg.Wait() + wg.Add(1) + go func() { + defer wg.Done() + ctx, cancel := context.WithTimeout(context.Background(), time.Second*3) + defer cancel() + n, buff, err := dut.RecvWithErrno(ctx, socket, int32(len(sampleData)), 0) + if n == -1 { + t.Fatalf("failed to recv on DUT: %s", err) + } + if got := buff[:n]; !bytes.Equal(got, sampleData) { + t.Fatalf("received data don't match, got:\n%s, want:\n%s", hex.Dump(got), hex.Dump(sampleData)) + } + }() + + // The following sleep is used to prevent the connection from being established while the + // RPC is in flight. + time.Sleep(time.Second) + + // Bring the connection to Established. + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}) + if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, time.Second); err != nil { + t.Fatalf("expected an ACK from DUT, but got none: %s", err) + } + + // Send sample data to DUT. + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, &tb.Payload{Bytes: sampleData}) +} -- cgit v1.2.3 From 474d9b260930dc220660df3eab8dc55405c34215 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Mon, 1 Jun 2020 10:40:38 -0700 Subject: Internal change. PiperOrigin-RevId: 314157710 --- test/packetimpact/dut/posix_server.cc | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'test') diff --git a/test/packetimpact/dut/posix_server.cc b/test/packetimpact/dut/posix_server.cc index a34307bb2..a1a5c3612 100644 --- a/test/packetimpact/dut/posix_server.cc +++ b/test/packetimpact/dut/posix_server.cc @@ -61,7 +61,7 @@ } ::grpc::Status proto_to_sockaddr(const posix_server::Sockaddr &sockaddr_proto, - sockaddr_storage *addr) { + sockaddr_storage *addr, socklen_t *addr_len) { switch (sockaddr_proto.sockaddr_case()) { case posix_server::Sockaddr::SockaddrCase::kIn: { auto proto_in = sockaddr_proto.in(); @@ -74,6 +74,7 @@ addr_in->sin_port = htons(proto_in.port()); proto_in.addr().copy(reinterpret_cast(&addr_in->sin_addr.s_addr), 4); + *addr_len = sizeof(*addr_in); break; } case posix_server::Sockaddr::SockaddrCase::kIn6: { @@ -89,6 +90,7 @@ proto_in6.addr().copy( reinterpret_cast(&addr_in6->sin6_addr.s6_addr), 16); addr_in6->sin6_scope_id = htonl(proto_in6.scope_id()); + *addr_len = sizeof(*addr_in6); break; } case posix_server::Sockaddr::SockaddrCase::SOCKADDR_NOT_SET: @@ -120,13 +122,14 @@ class PosixImpl final : public posix_server::Posix::Service { } sockaddr_storage addr; - auto err = proto_to_sockaddr(request->addr(), &addr); + socklen_t addr_len; + auto err = proto_to_sockaddr(request->addr(), &addr, &addr_len); if (!err.ok()) { return err; } - response->set_ret(bind(request->sockfd(), - reinterpret_cast(&addr), sizeof(addr))); + response->set_ret( + bind(request->sockfd(), reinterpret_cast(&addr), addr_len)); response->set_errno_(errno); return ::grpc::Status::OK; } @@ -147,13 +150,14 @@ class PosixImpl final : public posix_server::Posix::Service { "Missing address"); } sockaddr_storage addr; - auto err = proto_to_sockaddr(request->addr(), &addr); + socklen_t addr_len; + auto err = proto_to_sockaddr(request->addr(), &addr, &addr_len); if (!err.ok()) { return err; } - response->set_ret(connect( - request->sockfd(), reinterpret_cast(&addr), sizeof(addr))); + response->set_ret(connect(request->sockfd(), + reinterpret_cast(&addr), addr_len)); response->set_errno_(errno); return ::grpc::Status::OK; } @@ -245,14 +249,15 @@ class PosixImpl final : public posix_server::Posix::Service { "Missing address"); } sockaddr_storage addr; - auto err = proto_to_sockaddr(request->dest_addr(), &addr); + socklen_t addr_len; + auto err = proto_to_sockaddr(request->dest_addr(), &addr, &addr_len); if (!err.ok()) { return err; } - response->set_ret(::sendto( - request->sockfd(), request->buf().data(), request->buf().size(), - request->flags(), reinterpret_cast(&addr), sizeof(addr))); + response->set_ret(::sendto(request->sockfd(), request->buf().data(), + request->buf().size(), request->flags(), + reinterpret_cast(&addr), addr_len)); response->set_errno_(errno); return ::grpc::Status::OK; } -- cgit v1.2.3 From 07c3b1dc5561f8ceb376cba1d79a41cd4b7a2533 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Mon, 1 Jun 2020 13:28:49 -0700 Subject: Skip proc/pid/fd socket test for VFS1 only. PiperOrigin-RevId: 314192359 --- test/syscalls/linux/socket_unix.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/socket_unix.cc b/test/syscalls/linux/socket_unix.cc index 8bf663e8b..591cab3fd 100644 --- a/test/syscalls/linux/socket_unix.cc +++ b/test/syscalls/linux/socket_unix.cc @@ -256,10 +256,9 @@ TEST_P(UnixSocketPairTest, ShutdownWrite) { } TEST_P(UnixSocketPairTest, SocketReopenFromProcfs) { - // TODO(b/122310852): We should be returning ENXIO and NOT EIO. - // TODO(github.dev/issue/1624): This should be resolved in VFS2. Verify - // that this is the case and delete the SKIP_IF once we delete VFS1. - SKIP_IF(IsRunningOnGvisor()); + // TODO(gvisor.dev/issue/1624): In VFS1, we return EIO instead of ENXIO (see + // b/122310852). Remove this skip once VFS1 is deleted. + SKIP_IF(IsRunningWithVFS1()); auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); // Opening a socket pair via /proc/self/fd/X is a ENXIO. -- cgit v1.2.3 From 35a3f462d9ccc5237f0200fcbeafaebb110b5134 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Mon, 1 Jun 2020 13:29:17 -0700 Subject: Fix inotify test. PiperOrigin-RevId: 314192441 --- test/syscalls/linux/inotify.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc index 2306d9cab..ac5ef2e51 100644 --- a/test/syscalls/linux/inotify.cc +++ b/test/syscalls/linux/inotify.cc @@ -593,12 +593,12 @@ TEST(Inotify, SizeZeroReadWriteGeneratesNothing) { TEST(Inotify, FailedFileCreationGeneratesNoEvents) { const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const std::string dir_path = dir.path(); const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); - ASSERT_NO_ERRNO_AND_VALUE( - InotifyAddWatch(fd.get(), dir.path(), IN_ALL_EVENTS)); + ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(fd.get(), dir_path, IN_ALL_EVENTS)); - const char* p = dir.path().c_str(); + const char* p = dir_path.c_str(); ASSERT_THAT(mkdir(p, 0777), SyscallFails()); ASSERT_THAT(mknod(p, S_IFIFO, 0777), SyscallFails()); ASSERT_THAT(symlink(p, p), SyscallFails()); -- cgit v1.2.3 From 6ef5924725812f5885880cf57821fe2cd49b808d Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Mon, 1 Jun 2020 14:54:09 -0700 Subject: Deflake pty_test_linux. PiperOrigin-RevId: 314208973 --- test/syscalls/linux/pty.cc | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/pty.cc b/test/syscalls/linux/pty.cc index b8a0159ba..aabfa6955 100644 --- a/test/syscalls/linux/pty.cc +++ b/test/syscalls/linux/pty.cc @@ -364,6 +364,12 @@ PosixErrorOr PollAndReadFd(int fd, void* buf, size_t count, ssize_t n = ReadFd(fd, static_cast(buf) + completed, count - completed); if (n < 0) { + if (errno == EAGAIN) { + // Linux sometimes returns EAGAIN from this read, despite the fact that + // poll returned success. Let's just do what do as we are told and try + // again. + continue; + } return PosixError(errno, "read failed"); } completed += n; -- cgit v1.2.3 From 050d8e6e331e01d732471e4641dc51346e7a7d3b Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Mon, 1 Jun 2020 18:30:09 -0700 Subject: Add inotify events for extended attributes and splice. Splice, setxattr and removexattr should generate events. Note that VFS2 already generates events for extended attributes. Updates #1479. PiperOrigin-RevId: 314244261 --- pkg/sentry/syscalls/linux/sys_splice.go | 6 ++ pkg/sentry/syscalls/linux/sys_xattr.go | 12 +++- pkg/sentry/syscalls/linux/vfs2/splice.go | 5 ++ test/syscalls/linux/inotify.cc | 94 +++++++++++++++++++++++++++++++- 4 files changed, 112 insertions(+), 5 deletions(-) (limited to 'test') diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go index 39f2b79ec..77c78889d 100644 --- a/pkg/sentry/syscalls/linux/sys_splice.go +++ b/pkg/sentry/syscalls/linux/sys_splice.go @@ -80,6 +80,12 @@ func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonB } } + if total > 0 { + // On Linux, inotify behavior is not very consistent with splice(2). We try + // our best to emulate Linux for very basic calls to splice, where for some + // reason, events are generated for output files, but not input files. + outFile.Dirent.InotifyEvent(linux.IN_MODIFY, 0) + } return total, err } diff --git a/pkg/sentry/syscalls/linux/sys_xattr.go b/pkg/sentry/syscalls/linux/sys_xattr.go index 2de5e3422..c24946160 100644 --- a/pkg/sentry/syscalls/linux/sys_xattr.go +++ b/pkg/sentry/syscalls/linux/sys_xattr.go @@ -207,7 +207,11 @@ func setXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr usermem.Addr, si return syserror.EOPNOTSUPP } - return d.Inode.SetXattr(t, d, name, value, flags) + if err := d.Inode.SetXattr(t, d, name, value, flags); err != nil { + return err + } + d.InotifyEvent(linux.IN_ATTRIB, 0) + return nil } func copyInXattrName(t *kernel.Task, nameAddr usermem.Addr) (string, error) { @@ -418,7 +422,11 @@ func removeXattr(t *kernel.Task, d *fs.Dirent, nameAddr usermem.Addr) error { return syserror.EOPNOTSUPP } - return d.Inode.RemoveXattr(t, d, name) + if err := d.Inode.RemoveXattr(t, d, name); err != nil { + return err + } + d.InotifyEvent(linux.IN_ATTRIB, 0) + return nil } // LINT.ThenChange(vfs2/xattr.go) diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go index 8f3c22a02..945a364a7 100644 --- a/pkg/sentry/syscalls/linux/vfs2/splice.go +++ b/pkg/sentry/syscalls/linux/vfs2/splice.go @@ -187,6 +187,11 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if n == 0 { return 0, nil, err } + + // On Linux, inotify behavior is not very consistent with splice(2). We try + // our best to emulate Linux for very basic calls to splice, where for some + // reason, events are generated for output files, but not input files. + outFile.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent) return uintptr(n), nil, nil } diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc index ac5ef2e51..1d1a7171d 100644 --- a/test/syscalls/linux/inotify.cc +++ b/test/syscalls/linux/inotify.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -1655,9 +1656,44 @@ TEST(Inotify, EpollNoDeadlock) { } } -TEST(Inotify, SpliceEvent) { - // TODO(gvisor.dev/issue/138): Implement splice in VFS2. - SKIP_IF(IsRunningOnGvisor() && !IsRunningWithVFS1()); +// On Linux, inotify behavior is not very consistent with splice(2). We try our +// best to emulate Linux for very basic calls to splice. +TEST(Inotify, SpliceOnWatchTarget) { + int pipes[2]; + ASSERT_THAT(pipe2(pipes, O_NONBLOCK), SyscallSucceeds()); + + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor inotify_fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + dir.path(), "some content", TempPath::kDefaultFileMode)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + const int dir_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(inotify_fd.get(), dir.path(), IN_ALL_EVENTS)); + const int file_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(inotify_fd.get(), file.path(), IN_ALL_EVENTS)); + + EXPECT_THAT(splice(fd.get(), nullptr, pipes[1], nullptr, 1, /*flags=*/0), + SyscallSucceedsWithValue(1)); + + // Surprisingly, events are not generated in Linux if we read from a file. + std::vector events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + ASSERT_THAT(events, Are({})); + + EXPECT_THAT(splice(pipes[0], nullptr, fd.get(), nullptr, 1, /*flags=*/0), + SyscallSucceedsWithValue(1)); + + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + ASSERT_THAT(events, Are({ + Event(IN_MODIFY, dir_wd, Basename(file.path())), + Event(IN_MODIFY, file_wd), + })); +} + +TEST(Inotify, SpliceOnInotifyFD) { int pipes[2]; ASSERT_THAT(pipe2(pipes, O_NONBLOCK), SyscallSucceeds()); @@ -1719,6 +1755,58 @@ TEST(Inotify, LinkOnOtherParent) { EXPECT_THAT(events, Are({})); } +TEST(Inotify, Xattr) { + // TODO(gvisor.dev/issue/1636): Support extended attributes in runsc gofer. + SKIP_IF(IsRunningOnGvisor()); + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string path = file.path(); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_RDWR)); + const FileDescriptor inotify_fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(inotify_fd.get(), path, IN_ALL_EVENTS)); + + const char* cpath = path.c_str(); + const char* name = "user.test"; + int val = 123; + ASSERT_THAT(setxattr(cpath, name, &val, sizeof(val), /*flags=*/0), + SyscallSucceeds()); + std::vector events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd)})); + + ASSERT_THAT(getxattr(cpath, name, &val, sizeof(val)), SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({})); + + char list[100]; + ASSERT_THAT(listxattr(cpath, list, sizeof(list)), SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({})); + + ASSERT_THAT(removexattr(cpath, name), SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd)})); + + ASSERT_THAT(fsetxattr(fd.get(), name, &val, sizeof(val), /*flags=*/0), + SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd)})); + + ASSERT_THAT(fgetxattr(fd.get(), name, &val, sizeof(val)), SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({})); + + ASSERT_THAT(flistxattr(fd.get(), list, sizeof(list)), SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({})); + + ASSERT_THAT(fremovexattr(fd.get(), name), SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd)})); +} + TEST(Inotify, Exec) { const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); const TempPath bin = ASSERT_NO_ERRNO_AND_VALUE( -- cgit v1.2.3 From 4b5eae39f201ffbe7f4a0e08a28380099469efe8 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Mon, 1 Jun 2020 23:01:35 -0700 Subject: Enable VFS2 to runsc syscall tests Updates #1487 PiperOrigin-RevId: 314271995 --- test/runner/defs.bzl | 49 ++++- test/runner/runner.go | 4 + test/syscalls/BUILD | 494 +++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 454 insertions(+), 93 deletions(-) (limited to 'test') diff --git a/test/runner/defs.bzl b/test/runner/defs.bzl index 0a75b158f..402ba4064 100644 --- a/test/runner/defs.bzl +++ b/test/runner/defs.bzl @@ -60,7 +60,8 @@ def _syscall_test( network = "none", file_access = "exclusive", overlay = False, - add_uds_tree = False): + add_uds_tree = False, + vfs2 = False): # Prepend "runsc" to non-native platform names. full_platform = platform if platform == "native" else "runsc_" + platform @@ -70,6 +71,8 @@ def _syscall_test( name += "_shared" if overlay: name += "_overlay" + if vfs2: + name += "_vfs2" if network != "none": name += "_" + network + "net" @@ -102,6 +105,7 @@ def _syscall_test( "--file-access=" + file_access, "--overlay=" + str(overlay), "--add-uds-tree=" + str(add_uds_tree), + "--vfs2=" + str(vfs2), ] # Call the rule above. @@ -123,6 +127,7 @@ def syscall_test( add_overlay = False, add_uds_tree = False, add_hostinet = False, + vfs2 = False, tags = None): """syscall_test is a macro that will create targets for all platforms. @@ -160,6 +165,29 @@ def syscall_test( tags = platform_tags + tags, ) + vfs2_tags = list(tags) + if vfs2: + # Add tag to easily run VFS2 tests with --test_tag_filters=vfs2 + vfs2_tags.append("vfs2") + + else: + # Don't automatically run tests tests not yet passing. + vfs2_tags.append("manual") + vfs2_tags.append("noguitar") + vfs2_tags.append("notap") + + _syscall_test( + test = test, + shard_count = shard_count, + size = size, + platform = default_platform, + use_tmpfs = use_tmpfs, + add_uds_tree = add_uds_tree, + tags = platforms[default_platform] + vfs2_tags, + vfs2 = True, + ) + + # TODO(gvisor.dev/issue/1487): Enable VFS2 overlay tests. if add_overlay: _syscall_test( test = test, @@ -172,6 +200,18 @@ def syscall_test( overlay = True, ) + if add_hostinet: + _syscall_test( + test = test, + shard_count = shard_count, + size = size, + platform = default_platform, + use_tmpfs = use_tmpfs, + network = "host", + add_uds_tree = add_uds_tree, + tags = platforms[default_platform] + tags, + ) + if not use_tmpfs: # Also test shared gofer access. _syscall_test( @@ -184,15 +224,14 @@ def syscall_test( tags = platforms[default_platform] + tags, file_access = "shared", ) - - if add_hostinet: _syscall_test( test = test, shard_count = shard_count, size = size, platform = default_platform, use_tmpfs = use_tmpfs, - network = "host", add_uds_tree = add_uds_tree, - tags = platforms[default_platform] + tags, + tags = platforms[default_platform] + vfs2_tags, + file_access = "shared", + vfs2 = True, ) diff --git a/test/runner/runner.go b/test/runner/runner.go index e048e5a9c..948e3a8ef 100644 --- a/test/runner/runner.go +++ b/test/runner/runner.go @@ -46,6 +46,7 @@ var ( useTmpfs = flag.Bool("use-tmpfs", false, "mounts tmpfs for /tmp") fileAccess = flag.String("file-access", "exclusive", "mounts root in exclusive or shared mode") overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable tmpfs overlay") + vfs2 = flag.Bool("vfs2", false, "enable VFS2") parallel = flag.Bool("parallel", false, "run tests in parallel") runscPath = flag.String("runsc", "", "path to runsc binary") @@ -146,6 +147,9 @@ func runRunsc(tc gtest.TestCase, spec *specs.Spec) error { if *overlay { args = append(args, "-overlay") } + if *vfs2 { + args = append(args, "-vfs2") + } if *debug { args = append(args, "-debug", "-log-packets=true") } diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 9800a0cdf..3406a2de8 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -2,22 +2,33 @@ load("//test/runner:defs.bzl", "syscall_test") package(licenses = ["notice"]) -syscall_test(test = "//test/syscalls/linux:32bit_test") +syscall_test( + test = "//test/syscalls/linux:32bit_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:accept_bind_stream_test") +syscall_test( + test = "//test/syscalls/linux:accept_bind_stream_test", + vfs2 = "True", +) syscall_test( size = "large", shard_count = 50, test = "//test/syscalls/linux:accept_bind_test", + vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:access_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:affinity_test") +syscall_test( + test = "//test/syscalls/linux:affinity_test", + vfs2 = "True", +) syscall_test( add_overlay = True, @@ -28,11 +39,18 @@ syscall_test( size = "medium", shard_count = 5, test = "//test/syscalls/linux:alarm_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:arch_prctl_test") +syscall_test( + test = "//test/syscalls/linux:arch_prctl_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:bad_test") +syscall_test( + test = "//test/syscalls/linux:bad_test", + vfs2 = "True", +) syscall_test( size = "large", @@ -40,9 +58,15 @@ syscall_test( test = "//test/syscalls/linux:bind_test", ) -syscall_test(test = "//test/syscalls/linux:brk_test") +syscall_test( + test = "//test/syscalls/linux:brk_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:socket_test") +syscall_test( + test = "//test/syscalls/linux:socket_test", + vfs2 = "True", +) syscall_test( size = "large", @@ -51,16 +75,19 @@ syscall_test( # involve much concurrency, TSAN's usefulness here is limited anyway. tags = ["nogotsan"], test = "//test/syscalls/linux:socket_stress_test", + vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:chdir_test", + vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:chmod_test", + vfs2 = "True", ) syscall_test( @@ -68,6 +95,7 @@ syscall_test( add_overlay = True, test = "//test/syscalls/linux:chown_test", use_tmpfs = True, # chwon tests require gofer to be running as root. + vfs2 = "True", ) syscall_test( @@ -75,45 +103,70 @@ syscall_test( test = "//test/syscalls/linux:chroot_test", ) -syscall_test(test = "//test/syscalls/linux:clock_getres_test") +syscall_test( + test = "//test/syscalls/linux:clock_getres_test", + vfs2 = "True", +) syscall_test( size = "medium", test = "//test/syscalls/linux:clock_gettime_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:clock_nanosleep_test") +syscall_test( + test = "//test/syscalls/linux:clock_nanosleep_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:concurrency_test") +syscall_test( + test = "//test/syscalls/linux:concurrency_test", + vfs2 = "True", +) syscall_test( add_uds_tree = True, test = "//test/syscalls/linux:connect_external_test", use_tmpfs = True, + vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:creat_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:dev_test") +syscall_test( + test = "//test/syscalls/linux:dev_test", +) syscall_test( add_overlay = True, test = "//test/syscalls/linux:dup_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:epoll_test") +syscall_test( + test = "//test/syscalls/linux:epoll_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:eventfd_test") +syscall_test( + test = "//test/syscalls/linux:eventfd_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:exceptions_test") +syscall_test( + test = "//test/syscalls/linux:exceptions_test", + vfs2 = "True", +) syscall_test( size = "medium", add_overlay = True, test = "//test/syscalls/linux:exec_test", + vfs2 = "True", ) syscall_test( @@ -122,7 +175,10 @@ syscall_test( test = "//test/syscalls/linux:exec_binary_test", ) -syscall_test(test = "//test/syscalls/linux:exit_test") +syscall_test( + test = "//test/syscalls/linux:exit_test", + vfs2 = "True", +) syscall_test( add_overlay = True, @@ -134,11 +190,15 @@ syscall_test( test = "//test/syscalls/linux:fallocate_test", ) -syscall_test(test = "//test/syscalls/linux:fault_test") +syscall_test( + test = "//test/syscalls/linux:fault_test", + vfs2 = "True", +) syscall_test( add_overlay = True, test = "//test/syscalls/linux:fchdir_test", + vfs2 = "True", ) syscall_test( @@ -152,11 +212,20 @@ syscall_test( test = "//test/syscalls/linux:flock_test", ) -syscall_test(test = "//test/syscalls/linux:fork_test") +syscall_test( + test = "//test/syscalls/linux:fork_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:fpsig_fork_test") +syscall_test( + test = "//test/syscalls/linux:fpsig_fork_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:fpsig_nested_test") +syscall_test( + test = "//test/syscalls/linux:fpsig_nested_test", + vfs2 = "True", +) syscall_test( add_overlay = True, @@ -167,20 +236,33 @@ syscall_test( size = "medium", shard_count = 5, test = "//test/syscalls/linux:futex_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:getcpu_host_test") +syscall_test( + test = "//test/syscalls/linux:getcpu_host_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:getcpu_test") +syscall_test( + test = "//test/syscalls/linux:getcpu_test", + vfs2 = "True", +) syscall_test( add_overlay = True, test = "//test/syscalls/linux:getdents_test", ) -syscall_test(test = "//test/syscalls/linux:getrandom_test") +syscall_test( + test = "//test/syscalls/linux:getrandom_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:getrusage_test") +syscall_test( + test = "//test/syscalls/linux:getrusage_test", + vfs2 = "True", +) syscall_test( size = "medium", @@ -196,15 +278,20 @@ syscall_test( syscall_test( test = "//test/syscalls/linux:iptables_test", + vfs2 = "True", ) syscall_test( size = "large", shard_count = 5, test = "//test/syscalls/linux:itimer_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:kill_test") +syscall_test( + test = "//test/syscalls/linux:kill_test", + vfs2 = "True", +) syscall_test( add_overlay = True, @@ -215,19 +302,33 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:lseek_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:madvise_test") +syscall_test( + test = "//test/syscalls/linux:madvise_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:memory_accounting_test") +syscall_test( + test = "//test/syscalls/linux:memory_accounting_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:mempolicy_test") +syscall_test( + test = "//test/syscalls/linux:mempolicy_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:mincore_test") +syscall_test( + test = "//test/syscalls/linux:mincore_test", + vfs2 = "True", +) syscall_test( add_overlay = True, test = "//test/syscalls/linux:mkdir_test", + vfs2 = "True", ) syscall_test( @@ -249,20 +350,29 @@ syscall_test( syscall_test( size = "medium", test = "//test/syscalls/linux:mremap_test", + vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:msync_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:munmap_test") +syscall_test( + test = "//test/syscalls/linux:munmap_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:network_namespace_test") +syscall_test( + test = "//test/syscalls/linux:network_namespace_test", + vfs2 = "True", +) syscall_test( add_overlay = True, test = "//test/syscalls/linux:open_create_test", + vfs2 = "True", ) syscall_test( @@ -270,40 +380,65 @@ syscall_test( test = "//test/syscalls/linux:open_test", ) -syscall_test(test = "//test/syscalls/linux:packet_socket_raw_test") +syscall_test( + test = "//test/syscalls/linux:packet_socket_raw_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:packet_socket_test") +syscall_test( + test = "//test/syscalls/linux:packet_socket_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:partial_bad_buffer_test") +syscall_test( + test = "//test/syscalls/linux:partial_bad_buffer_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:pause_test") +syscall_test( + test = "//test/syscalls/linux:pause_test", + vfs2 = "True", +) syscall_test( size = "large", add_overlay = True, shard_count = 5, test = "//test/syscalls/linux:pipe_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:poll_test") +syscall_test( + test = "//test/syscalls/linux:poll_test", + vfs2 = "True", +) syscall_test( size = "medium", test = "//test/syscalls/linux:ppoll_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:prctl_setuid_test") +syscall_test( + test = "//test/syscalls/linux:prctl_setuid_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:prctl_test") +syscall_test( + test = "//test/syscalls/linux:prctl_test", + vfs2 = "True", +) syscall_test( add_overlay = True, test = "//test/syscalls/linux:pread64_test", + vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:preadv_test", + vfs2 = "True", ) syscall_test( @@ -311,36 +446,56 @@ syscall_test( test = "//test/syscalls/linux:preadv2_test", ) -syscall_test(test = "//test/syscalls/linux:priority_test") +syscall_test( + test = "//test/syscalls/linux:priority_test", + vfs2 = "True", +) syscall_test( size = "medium", test = "//test/syscalls/linux:proc_test", ) -syscall_test(test = "//test/syscalls/linux:proc_net_test") +syscall_test( + test = "//test/syscalls/linux:proc_net_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:proc_pid_oomscore_test") +syscall_test( + test = "//test/syscalls/linux:proc_pid_oomscore_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:proc_pid_smaps_test") +syscall_test( + test = "//test/syscalls/linux:proc_pid_smaps_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:proc_pid_uid_gid_map_test") +syscall_test( + test = "//test/syscalls/linux:proc_pid_uid_gid_map_test", +) syscall_test( size = "medium", test = "//test/syscalls/linux:pselect_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:ptrace_test") +syscall_test( + test = "//test/syscalls/linux:ptrace_test", + vfs2 = "True", +) syscall_test( size = "medium", shard_count = 5, test = "//test/syscalls/linux:pty_test", + vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:pty_root_test", + vfs2 = "True", ) syscall_test( @@ -351,17 +506,28 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:pwrite64_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:raw_socket_hdrincl_test") +syscall_test( + test = "//test/syscalls/linux:raw_socket_hdrincl_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:raw_socket_icmp_test") +syscall_test( + test = "//test/syscalls/linux:raw_socket_icmp_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:raw_socket_ipv4_test") +syscall_test( + test = "//test/syscalls/linux:raw_socket_ipv4_test", + vfs2 = "True", +) syscall_test( add_overlay = True, test = "//test/syscalls/linux:read_test", + vfs2 = "True", ) syscall_test( @@ -373,12 +539,14 @@ syscall_test( size = "medium", shard_count = 5, test = "//test/syscalls/linux:readv_socket_test", + vfs2 = "True", ) syscall_test( size = "medium", add_overlay = True, test = "//test/syscalls/linux:readv_test", + vfs2 = "True", ) syscall_test( @@ -387,25 +555,50 @@ syscall_test( test = "//test/syscalls/linux:rename_test", ) -syscall_test(test = "//test/syscalls/linux:rlimits_test") +syscall_test( + test = "//test/syscalls/linux:rlimits_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:rseq_test") +syscall_test( + test = "//test/syscalls/linux:rseq_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:rtsignal_test") +syscall_test( + test = "//test/syscalls/linux:rtsignal_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:signalfd_test") +syscall_test( + test = "//test/syscalls/linux:signalfd_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:sched_test") +syscall_test( + test = "//test/syscalls/linux:sched_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:sched_yield_test") +syscall_test( + test = "//test/syscalls/linux:sched_yield_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:seccomp_test") +syscall_test( + test = "//test/syscalls/linux:seccomp_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:select_test") +syscall_test( + test = "//test/syscalls/linux:select_test", + vfs2 = "True", +) syscall_test( shard_count = 20, test = "//test/syscalls/linux:semaphore_test", + vfs2 = "True", ) syscall_test( @@ -421,49 +614,68 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:splice_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:sigaction_test") +syscall_test( + test = "//test/syscalls/linux:sigaction_test", + vfs2 = "True", +) # TODO(b/119826902): Enable once the test passes in runsc. -# syscall_test(test = "//test/syscalls/linux:sigaltstack_test") +# syscall_test(vfs2="True",test = "//test/syscalls/linux:sigaltstack_test") -syscall_test(test = "//test/syscalls/linux:sigiret_test") +syscall_test( + test = "//test/syscalls/linux:sigiret_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:sigprocmask_test") +syscall_test( + test = "//test/syscalls/linux:sigprocmask_test", + vfs2 = "True", +) syscall_test( size = "medium", test = "//test/syscalls/linux:sigstop_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:sigtimedwait_test") +syscall_test( + test = "//test/syscalls/linux:sigtimedwait_test", + vfs2 = "True", +) syscall_test( size = "medium", test = "//test/syscalls/linux:shm_test", + vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_abstract_non_blocking_test", + vfs2 = "True", ) syscall_test( size = "large", shard_count = 50, test = "//test/syscalls/linux:socket_abstract_test", + vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_domain_non_blocking_test", + vfs2 = "True", ) syscall_test( size = "large", shard_count = 50, test = "//test/syscalls/linux:socket_domain_test", + vfs2 = "True", ) syscall_test( @@ -489,58 +701,90 @@ syscall_test( size = "large", shard_count = 50, test = "//test/syscalls/linux:socket_ip_tcp_generic_loopback_test", + vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_ip_tcp_loopback_non_blocking_test", + vfs2 = "True", ) syscall_test( size = "large", shard_count = 50, test = "//test/syscalls/linux:socket_ip_tcp_loopback_test", + vfs2 = "True", ) syscall_test( size = "medium", shard_count = 50, test = "//test/syscalls/linux:socket_ip_tcp_udp_generic_loopback_test", + vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_ip_udp_loopback_non_blocking_test", + vfs2 = "True", ) syscall_test( size = "large", shard_count = 50, test = "//test/syscalls/linux:socket_ip_udp_loopback_test", + vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_ipv4_udp_unbound_loopback_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:socket_ip_unbound_test") +syscall_test( + test = "//test/syscalls/linux:socket_ip_unbound_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:socket_netdevice_test") +syscall_test( + test = "//test/syscalls/linux:socket_netdevice_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:socket_netlink_test") +syscall_test( + test = "//test/syscalls/linux:socket_netlink_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:socket_netlink_route_test") +syscall_test( + test = "//test/syscalls/linux:socket_netlink_route_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:socket_netlink_uevent_test") +syscall_test( + test = "//test/syscalls/linux:socket_netlink_uevent_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:socket_blocking_local_test") +syscall_test( + test = "//test/syscalls/linux:socket_blocking_local_test", +) -syscall_test(test = "//test/syscalls/linux:socket_blocking_ip_test") +syscall_test( + test = "//test/syscalls/linux:socket_blocking_ip_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:socket_non_stream_blocking_local_test") +syscall_test( + test = "//test/syscalls/linux:socket_non_stream_blocking_local_test", +) -syscall_test(test = "//test/syscalls/linux:socket_non_stream_blocking_udp_test") +syscall_test( + test = "//test/syscalls/linux:socket_non_stream_blocking_udp_test", + vfs2 = "True", +) syscall_test( size = "large", @@ -550,6 +794,7 @@ syscall_test( syscall_test( size = "large", test = "//test/syscalls/linux:socket_stream_blocking_tcp_test", + vfs2 = "True", ) syscall_test( @@ -572,6 +817,7 @@ syscall_test( syscall_test( size = "medium", test = "//test/syscalls/linux:socket_unix_dgram_non_blocking_test", + vfs2 = "True", ) syscall_test( @@ -579,6 +825,7 @@ syscall_test( add_overlay = True, shard_count = 50, test = "//test/syscalls/linux:socket_unix_pair_test", + vfs2 = "True", ) syscall_test( @@ -596,11 +843,13 @@ syscall_test( syscall_test( size = "medium", test = "//test/syscalls/linux:socket_unix_unbound_abstract_test", + vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_unix_unbound_dgram_test", + vfs2 = "True", ) syscall_test( @@ -612,6 +861,7 @@ syscall_test( size = "medium", shard_count = 10, test = "//test/syscalls/linux:socket_unix_unbound_seqpacket_test", + vfs2 = "True", ) syscall_test( @@ -623,6 +873,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:statfs_test", + vfs2 = "True", ) syscall_test( @@ -633,6 +884,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:stat_times_test", + vfs2 = "True", ) syscall_test( @@ -648,6 +900,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:sync_test", + vfs2 = "True", ) syscall_test( @@ -655,86 +908,151 @@ syscall_test( test = "//test/syscalls/linux:sync_file_range_test", ) -syscall_test(test = "//test/syscalls/linux:sysinfo_test") +syscall_test( + test = "//test/syscalls/linux:sysinfo_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:syslog_test") +syscall_test( + test = "//test/syscalls/linux:syslog_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:sysret_test") +syscall_test( + test = "//test/syscalls/linux:sysret_test", + vfs2 = "True", +) syscall_test( size = "medium", shard_count = 10, test = "//test/syscalls/linux:tcp_socket_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:tgkill_test") +syscall_test( + test = "//test/syscalls/linux:tgkill_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:timerfd_test") +syscall_test( + test = "//test/syscalls/linux:timerfd_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:timers_test") +syscall_test( + test = "//test/syscalls/linux:timers_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:time_test") +syscall_test( + test = "//test/syscalls/linux:time_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:tkill_test") +syscall_test( + test = "//test/syscalls/linux:tkill_test", + vfs2 = "True", +) syscall_test( add_overlay = True, test = "//test/syscalls/linux:truncate_test", ) -syscall_test(test = "//test/syscalls/linux:tuntap_test") +syscall_test( + test = "//test/syscalls/linux:tuntap_test", +) syscall_test( add_hostinet = True, test = "//test/syscalls/linux:tuntap_hostinet_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:udp_bind_test") +syscall_test( + test = "//test/syscalls/linux:udp_bind_test", + vfs2 = "True", +) syscall_test( size = "medium", add_hostinet = True, shard_count = 10, test = "//test/syscalls/linux:udp_socket_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:uidgid_test") +syscall_test( + test = "//test/syscalls/linux:uidgid_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:uname_test") +syscall_test( + test = "//test/syscalls/linux:uname_test", + vfs2 = "True", +) syscall_test( add_overlay = True, test = "//test/syscalls/linux:unlink_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:unshare_test") +syscall_test( + test = "//test/syscalls/linux:unshare_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:utimes_test") +syscall_test( + test = "//test/syscalls/linux:utimes_test", + vfs2 = "True", +) syscall_test( size = "medium", test = "//test/syscalls/linux:vdso_clock_gettime_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:vdso_test") +syscall_test( + test = "//test/syscalls/linux:vdso_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:vsyscall_test") +syscall_test( + test = "//test/syscalls/linux:vsyscall_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:vfork_test") +syscall_test( + test = "//test/syscalls/linux:vfork_test", + vfs2 = "True", +) syscall_test( size = "medium", shard_count = 5, test = "//test/syscalls/linux:wait_test", + vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:write_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:proc_net_unix_test") +syscall_test( + test = "//test/syscalls/linux:proc_net_unix_test", +) -syscall_test(test = "//test/syscalls/linux:proc_net_tcp_test") +syscall_test( + test = "//test/syscalls/linux:proc_net_tcp_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:proc_net_udp_test") +syscall_test( + test = "//test/syscalls/linux:proc_net_udp_test", + vfs2 = "True", +) -- cgit v1.2.3 From e6334e81ca8d951e56f03d8ea0629e3c85556cf1 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 2 Jun 2020 19:17:53 -0700 Subject: Check that two sockets with different types can't be connected to each other PiperOrigin-RevId: 314450191 --- pkg/sentry/socket/unix/transport/connectioned.go | 2 +- pkg/sentry/socket/unix/unix.go | 13 +++++++- test/syscalls/linux/accept_bind.cc | 42 ++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/unix/transport/connectioned.go b/pkg/sentry/socket/unix/transport/connectioned.go index ce5b94ee7..09c6d3b27 100644 --- a/pkg/sentry/socket/unix/transport/connectioned.go +++ b/pkg/sentry/socket/unix/transport/connectioned.go @@ -252,7 +252,7 @@ func (e *connectionedEndpoint) Close() { // BidirectionalConnect implements BoundEndpoint.BidirectionalConnect. func (e *connectionedEndpoint) BidirectionalConnect(ctx context.Context, ce ConnectingEndpoint, returnConnect func(Receiver, ConnectedEndpoint)) *syserr.Error { if ce.Type() != e.stype { - return syserr.ErrConnectionRefused + return syserr.ErrWrongProtocolForSocket } // Check if ce is e to avoid a deadlock. diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index 5b29e9d7f..c4c9db81b 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -417,7 +417,18 @@ func (s *socketOpsCommon) Connect(t *kernel.Task, sockaddr []byte, blocking bool defer ep.Release() // Connect the server endpoint. - return s.ep.Connect(t, ep) + err = s.ep.Connect(t, ep) + + if err == syserr.ErrWrongProtocolForSocket { + // Linux for abstract sockets returns ErrConnectionRefused + // instead of ErrWrongProtocolForSocket. + path, _ := extractPath(sockaddr) + if len(path) > 0 && path[0] == 0 { + err = syserr.ErrConnectionRefused + } + } + + return err } // Write implements fs.FileOperations.Write. diff --git a/test/syscalls/linux/accept_bind.cc b/test/syscalls/linux/accept_bind.cc index e08c578f0..f65a14fb8 100644 --- a/test/syscalls/linux/accept_bind.cc +++ b/test/syscalls/linux/accept_bind.cc @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include @@ -141,6 +142,47 @@ TEST_P(AllSocketPairTest, Connect) { SyscallSucceeds()); } +TEST_P(AllSocketPairTest, ConnectWithWrongType) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int type; + socklen_t typelen = sizeof(type); + EXPECT_THAT( + getsockopt(sockets->first_fd(), SOL_SOCKET, SO_TYPE, &type, &typelen), + SyscallSucceeds()); + switch (type) { + case SOCK_STREAM: + type = SOCK_SEQPACKET; + break; + case SOCK_SEQPACKET: + type = SOCK_STREAM; + break; + } + + const FileDescriptor another_socket = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, type, 0)); + + ASSERT_THAT(bind(sockets->first_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); + + ASSERT_THAT(listen(sockets->first_fd(), 5), SyscallSucceeds()); + + if (sockets->first_addr()->sa_data[0] != 0) { + ASSERT_THAT(connect(another_socket.get(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallFailsWithErrno(EPROTOTYPE)); + } else { + ASSERT_THAT(connect(another_socket.get(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallFailsWithErrno(ECONNREFUSED)); + } + + ASSERT_THAT(connect(sockets->second_fd(), sockets->first_addr(), + sockets->first_addr_size()), + SyscallSucceeds()); +} + TEST_P(AllSocketPairTest, ConnectNonListening) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); -- cgit v1.2.3 From 162848e129e50e25c3bb9c5fdc337584b3531da0 Mon Sep 17 00:00:00 2001 From: Mithun Iyer Date: Wed, 3 Jun 2020 08:48:23 -0700 Subject: Avoid TCP segment split when out of sender window. If the entire segment cannot be accommodated in the receiver advertised window and if there are still unacknowledged pending segments, skip splitting the segment. The segment transmit would get retried by the retransmit handler. PiperOrigin-RevId: 314538523 --- pkg/tcpip/transport/tcp/snd.go | 19 ++++ test/packetimpact/tests/BUILD | 6 +- .../tests/tcp_send_window_sizes_piggyback_test.go | 105 +++++++++++++++++++++ .../tests/tcp_should_piggyback_test.go | 64 ------------- 4 files changed, 126 insertions(+), 68 deletions(-) create mode 100644 test/packetimpact/tests/tcp_send_window_sizes_piggyback_test.go delete mode 100644 test/packetimpact/tests/tcp_should_piggyback_test.go (limited to 'test') diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index 06dc9b7d7..3a19c4468 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -816,6 +816,25 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se panic("Netstack queues FIN segments without data.") } + segEnd = seg.sequenceNumber.Add(seqnum.Size(seg.data.Size())) + // If the entire segment cannot be accomodated in the receiver + // advertized window, skip splitting and sending of the segment. + // ref: net/ipv4/tcp_output.c::tcp_snd_wnd_test() + // + // Linux checks this for all segment transmits not triggered + // by a probe timer. On this condition, it defers the segment + // split and transmit to a short probe timer. + // ref: include/net/tcp.h::tcp_check_probe_timer() + // ref: net/ipv4/tcp_output.c::tcp_write_wakeup() + // + // Instead of defining a new transmit timer, we attempt to split the + // segment right here if there are no pending segments. + // If there are pending segments, segment transmits are deferred + // to the retransmit timer handler. + if s.sndUna != s.sndNxt && !segEnd.LessThan(end) { + return false + } + if !seg.sequenceNumber.LessThan(end) { return false } diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 1598c61e9..fb39ee4b8 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -124,10 +124,8 @@ packetimpact_go_test( ) packetimpact_go_test( - name = "tcp_should_piggyback", - srcs = ["tcp_should_piggyback_test.go"], - # TODO(b/153680566): Fix netstack then remove the line below. - expect_netstack_failure = True, + name = "tcp_send_window_sizes_piggyback", + srcs = ["tcp_send_window_sizes_piggyback_test.go"], deps = [ "//pkg/tcpip/header", "//test/packetimpact/testbench", diff --git a/test/packetimpact/tests/tcp_send_window_sizes_piggyback_test.go b/test/packetimpact/tests/tcp_send_window_sizes_piggyback_test.go new file mode 100644 index 000000000..b7e91712d --- /dev/null +++ b/test/packetimpact/tests/tcp_send_window_sizes_piggyback_test.go @@ -0,0 +1,105 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_send_window_sizes_piggyback_test + +import ( + "flag" + "fmt" + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func init() { + tb.RegisterFlags(flag.CommandLine) +} + +// TestSendWindowSizesPiggyback tests cases where segment sizes are close to +// sender window size and checks for ACK piggybacking for each of those case. +func TestSendWindowSizesPiggyback(t *testing.T) { + sampleData := []byte("Sample Data") + segmentSize := uint16(len(sampleData)) + // Advertise receive window sizes that are lesser, equal to or greater than + // enqueued segment size and check for segment transmits. The test attempts + // to enqueue a segment on the dut before acknowledging previous segment and + // lets the dut piggyback any ACKs along with the enqueued segment. + for _, tt := range []struct { + description string + windowSize uint16 + expectedPayload1 []byte + expectedPayload2 []byte + enqueue bool + }{ + // Expect the first segment to be split as it cannot be accomodated in + // the sender window. This means we need not enqueue a new segment after + // the first segment. + {"WindowSmallerThanSegment", segmentSize - 1, sampleData[:(segmentSize - 1)], sampleData[(segmentSize - 1):], false /* enqueue */}, + + {"WindowEqualToSegment", segmentSize, sampleData, sampleData, true /* enqueue */}, + + // Expect the second segment to not be split as its size is greater than + // the available sender window size. The segments should not be split + // when there is pending unacknowledged data and the segment-size is + // greater than available sender window. + {"WindowGreaterThanSegment", segmentSize + 1, sampleData, sampleData, true /* enqueue */}, + } { + t.Run(fmt.Sprintf("%s%d", tt.description, tt.windowSize), func(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(listenFd) + + conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort, WindowSize: tb.Uint16(tt.windowSize)}, tb.TCP{SrcPort: &remotePort}) + defer conn.Close() + + conn.Handshake() + acceptFd, _ := dut.Accept(listenFd) + defer dut.Close(acceptFd) + + dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) + + expectedTCP := tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)} + + dut.Send(acceptFd, sampleData, 0) + expectedPayload := tb.Payload{Bytes: tt.expectedPayload1} + if _, err := conn.ExpectData(&expectedTCP, &expectedPayload, time.Second); err != nil { + t.Fatalf("Expected %s but didn't get one: %s", tb.Layers{&expectedTCP, &expectedPayload}, err) + } + + // Expect any enqueued segment to be transmitted by the dut along with + // piggybacked ACK for our data. + + if tt.enqueue { + // Enqueue a segment for the dut to transmit. + dut.Send(acceptFd, sampleData, 0) + } + + // Send ACK for the previous segment along with data for the dut to + // receive and ACK back. Sending this ACK would make room for the dut + // to transmit any enqueued segment. + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh), WindowSize: tb.Uint16(tt.windowSize)}, &tb.Payload{Bytes: sampleData}) + + // Expect the dut to piggyback the ACK for received data along with + // the segment enqueued for transmit. + expectedPayload = tb.Payload{Bytes: tt.expectedPayload2} + if _, err := conn.ExpectData(&expectedTCP, &expectedPayload, time.Second); err != nil { + t.Fatalf("Expected %s but didn't get one: %s", tb.Layers{&expectedTCP, &expectedPayload}, err) + } + }) + } +} diff --git a/test/packetimpact/tests/tcp_should_piggyback_test.go b/test/packetimpact/tests/tcp_should_piggyback_test.go deleted file mode 100644 index 0240dc2f9..000000000 --- a/test/packetimpact/tests/tcp_should_piggyback_test.go +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tcp_should_piggyback_test - -import ( - "flag" - "testing" - "time" - - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/tcpip/header" - tb "gvisor.dev/gvisor/test/packetimpact/testbench" -) - -func init() { - tb.RegisterFlags(flag.CommandLine) -} - -func TestPiggyback(t *testing.T) { - dut := tb.NewDUT(t) - defer dut.TearDown() - listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) - defer dut.Close(listenFd) - conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort, WindowSize: tb.Uint16(12)}, tb.TCP{SrcPort: &remotePort}) - defer conn.Close() - - conn.Handshake() - acceptFd, _ := dut.Accept(listenFd) - defer dut.Close(acceptFd) - - dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) - - sampleData := []byte("Sample Data") - - dut.Send(acceptFd, sampleData, 0) - expectedTCP := tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)} - expectedPayload := tb.Payload{Bytes: sampleData} - if _, err := conn.ExpectData(&expectedTCP, &expectedPayload, time.Second); err != nil { - t.Fatalf("Expected %v but didn't get one: %s", tb.Layers{&expectedTCP, &expectedPayload}, err) - } - - // Cause DUT to send us more data as soon as we ACK their first data segment because we have - // a small window. - dut.Send(acceptFd, sampleData, 0) - - // DUT should ACK our segment by piggybacking ACK to their outstanding data segment instead of - // sending a separate ACK packet. - conn.Send(expectedTCP, &expectedPayload) - if _, err := conn.ExpectData(&expectedTCP, &expectedPayload, time.Second); err != nil { - t.Fatalf("Expected %v but didn't get one: %s", tb.Layers{&expectedTCP, &expectedPayload}, err) - } -} -- cgit v1.2.3 From d8d86f0f3afdf0d46a556e7925ed54c5f4dc0bbf Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Wed, 3 Jun 2020 14:54:46 -0700 Subject: Add test for O_TRUNC b/36576592 calls out an edge case previously not supported by HostFS. HostFS is currently being removed, meaning gVisor supports this feature. Simply add the test to open_test. PiperOrigin-RevId: 314610226 --- test/syscalls/linux/open.cc | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc index 640fe6bfc..670c0284b 100644 --- a/test/syscalls/linux/open.cc +++ b/test/syscalls/linux/open.cc @@ -416,6 +416,29 @@ TEST_F(OpenTest, CanTruncateWriteOnlyNoReadPermission_NoRandomSave) { EXPECT_EQ(stat.st_size, 0); } +TEST_F(OpenTest, CanTruncateWithStrangePermissions) { + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); + const DisableSave ds; // Permissions are dropped. + std::string path = NewTempAbsPath(); + int fd; + // Create a file without user permissions. + EXPECT_THAT( // SAVE_BELOW + fd = open(path.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 055), + SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); + + // Cannot open file because we are owner and have no permissions set. + EXPECT_THAT(open(path.c_str(), O_RDONLY), SyscallFailsWithErrno(EACCES)); + + // We *can* chmod the file, because we are the owner. + EXPECT_THAT(chmod(path.c_str(), 0755), SyscallSucceeds()); + + // Now we can open the file again. + EXPECT_THAT(fd = open(path.c_str(), O_RDWR), SyscallSucceeds()); + EXPECT_THAT(close(fd), SyscallSucceeds()); +} + } // namespace } // namespace testing -- cgit v1.2.3 From 9e66ac4c20dcfa2f4aaa4b149736f34f6b2bc451 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 4 Jun 2020 17:16:57 -0700 Subject: test/syscall: run hostnet tests in separate network namespaces A few tests use hard coded port numbers, so we need to guruantee that these ports will not be used for somthing else. --- test/runner/defs.bzl | 1 + 1 file changed, 1 insertion(+) (limited to 'test') diff --git a/test/runner/defs.bzl b/test/runner/defs.bzl index 402ba4064..5a83f8060 100644 --- a/test/runner/defs.bzl +++ b/test/runner/defs.bzl @@ -93,6 +93,7 @@ def _syscall_test( # we figure out how to request ipv4 sockets on Guitar machines. if network == "host": tags.append("noguitar") + tags.append("block-network") # Disable off-host networking. tags.append("requires-net:loopback") -- cgit v1.2.3 From f7663660917a5b2e250513d7c8cc98ff379ca46f Mon Sep 17 00:00:00 2001 From: Mithun Iyer Date: Thu, 4 Jun 2020 14:22:46 -0700 Subject: Handle TCP segment split cases as per MSS. - Always split segments larger than MSS. Currently, we base the segment split decision as a function of the send congestion window and MSS, which could be greater than the MSS advertised by remote. - While splitting segments, ensure the PSH flag is reset when there are segments that are queued to be sent. - With TCP_CORK, hold up segments up until MSS. Fix a bug in computing available send space before attempting to coalesce segments. Fixes #2832 PiperOrigin-RevId: 314802928 --- pkg/tcpip/transport/tcp/snd.go | 31 +++++++- test/packetimpact/testbench/connections.go | 52 +++++++++++++- test/packetimpact/testbench/layers.go | 5 ++ test/packetimpact/tests/BUILD | 20 ++++++ test/packetimpact/tests/fin_wait2_timeout_test.go | 20 +++--- .../tests/icmpv6_param_problem_test.go | 26 +++---- test/packetimpact/tests/ipv4_id_uniqueness_test.go | 22 +++--- test/packetimpact/tests/tcp_close_wait_ack_test.go | 36 +++++----- test/packetimpact/tests/tcp_cork_mss_test.go | 84 ++++++++++++++++++++++ .../tests/tcp_noaccept_close_rst_test.go | 12 ++-- .../tests/tcp_outside_the_window_test.go | 28 ++++---- test/packetimpact/tests/tcp_paws_mechanism_test.go | 22 +++--- .../tests/tcp_queue_receive_in_syn_sent_test.go | 18 ++--- test/packetimpact/tests/tcp_retransmits_test.go | 22 +++--- .../tests/tcp_send_window_sizes_piggyback_test.go | 22 +++--- test/packetimpact/tests/tcp_splitseg_mss_test.go | 71 ++++++++++++++++++ test/packetimpact/tests/tcp_synrcvd_reset_test.go | 18 ++--- test/packetimpact/tests/tcp_user_timeout_test.go | 22 +++--- test/packetimpact/tests/tcp_window_shrink_test.go | 24 +++---- .../tests/tcp_zero_window_probe_retransmit_test.go | 33 ++++----- .../tests/tcp_zero_window_probe_test.go | 38 +++++----- .../tcp_zero_window_probe_usertimeout_test.go | 30 ++++---- .../tests/udp_icmp_error_propagation_test.go | 46 ++++++------ test/packetimpact/tests/udp_recv_multicast_test.go | 12 ++-- .../packetimpact/tests/udp_send_recv_dgram_test.go | 16 ++--- 25 files changed, 491 insertions(+), 239 deletions(-) create mode 100644 test/packetimpact/tests/tcp_cork_mss_test.go create mode 100644 test/packetimpact/tests/tcp_splitseg_mss_test.go (limited to 'test') diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index 3a19c4468..acacb42e4 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -618,6 +618,20 @@ func (s *sender) splitSeg(seg *segment, size int) { nSeg.data.TrimFront(size) nSeg.sequenceNumber.UpdateForward(seqnum.Size(size)) s.writeList.InsertAfter(seg, nSeg) + + // The segment being split does not carry PUSH flag because it is + // followed by the newly split segment. + // RFC1122 section 4.2.2.2: MUST set the PSH bit in the last buffered + // segment (i.e., when there is no more queued data to be sent). + // Linux removes PSH flag only when the segment is being split over MSS + // and retains it when we are splitting the segment over lack of sender + // window space. + // ref: net/ipv4/tcp_output.c::tcp_write_xmit(), tcp_mss_split_point() + // ref: net/ipv4/tcp_output.c::tcp_write_wakeup(), tcp_snd_wnd_test() + if seg.data.Size() > s.maxPayloadSize { + seg.flags ^= header.TCPFlagPsh + } + seg.data.CapLength(size) } @@ -739,7 +753,7 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se if !s.isAssignedSequenceNumber(seg) { // Merge segments if allowed. if seg.data.Size() != 0 { - available := int(seg.sequenceNumber.Size(end)) + available := int(s.sndNxt.Size(end)) if available > limit { available = limit } @@ -782,8 +796,11 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se // sent all at once. return false } - if atomic.LoadUint32(&s.ep.cork) != 0 { - // Hold back the segment until full. + // With TCP_CORK, hold back until minimum of the available + // send space and MSS. + // TODO(gvisor.dev/issue/2833): Drain the held segments after a + // timeout. + if seg.data.Size() < s.maxPayloadSize && atomic.LoadUint32(&s.ep.cork) != 0 { return false } } @@ -843,9 +860,17 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se if available == 0 { return false } + + // The segment size limit is computed as a function of sender congestion + // window and MSS. When sender congestion window is > 1, this limit can + // be larger than MSS. Ensure that the currently available send space + // is not greater than minimum of this limit and MSS. if available > limit { available = limit } + if available > s.maxPayloadSize { + available = s.maxPayloadSize + } if seg.data.Size() > available { s.splitSeg(seg, available) diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index fb32964e9..6e85d6fab 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -628,15 +628,36 @@ func NewTCPIPv4(t *testing.T, outgoingTCP, incomingTCP TCP) TCPIPv4 { } } -// Handshake performs a TCP 3-way handshake. The input Connection should have a +// Connect performs a TCP 3-way handshake. The input Connection should have a // final TCP Layer. -func (conn *TCPIPv4) Handshake() { +func (conn *TCPIPv4) Connect() { + conn.t.Helper() + // Send the SYN. conn.Send(TCP{Flags: Uint8(header.TCPFlagSyn)}) // Wait for the SYN-ACK. synAck, err := conn.Expect(TCP{Flags: Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second) - if synAck == nil { + if err != nil { + conn.t.Fatalf("didn't get synack during handshake: %s", err) + } + conn.layerStates[len(conn.layerStates)-1].(*tcpState).synAck = synAck + + // Send an ACK. + conn.Send(TCP{Flags: Uint8(header.TCPFlagAck)}) +} + +// ConnectWithOptions performs a TCP 3-way handshake with given TCP options. +// The input Connection should have a final TCP Layer. +func (conn *TCPIPv4) ConnectWithOptions(options []byte) { + conn.t.Helper() + + // Send the SYN. + conn.Send(TCP{Flags: Uint8(header.TCPFlagSyn), Options: options}) + + // Wait for the SYN-ACK. + synAck, err := conn.Expect(TCP{Flags: Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second) + if err != nil { conn.t.Fatalf("didn't get synack during handshake: %s", err) } conn.layerStates[len(conn.layerStates)-1].(*tcpState).synAck = synAck @@ -656,6 +677,31 @@ func (conn *TCPIPv4) ExpectData(tcp *TCP, payload *Payload, timeout time.Duratio return (*Connection)(conn).ExpectFrame(expected, timeout) } +// ExpectNextData attempts to receive the next incoming segment for the +// connection and expects that to match the given layers. +// +// It differs from ExpectData() in that here we are only interested in the next +// received segment, while ExpectData() can receive multiple segments for the +// connection until there is a match with given layers or a timeout. +func (conn *TCPIPv4) ExpectNextData(tcp *TCP, payload *Payload, timeout time.Duration) (Layers, error) { + // Receive the first incoming TCP segment for this connection. + got, err := conn.ExpectData(&TCP{}, nil, timeout) + if err != nil { + return nil, err + } + + expected := make([]Layer, len(conn.layerStates)) + expected[len(expected)-1] = tcp + if payload != nil { + expected = append(expected, payload) + tcp.SeqNum = Uint32(uint32(*conn.RemoteSeqNum()) - uint32(payload.Length())) + } + if !(*Connection)(conn).match(expected, got) { + return nil, fmt.Errorf("next frame is not matching %s during %s: got %s", expected, timeout, got) + } + return got, nil +} + // Send a packet with reasonable defaults. Potentially override the TCP layer in // the connection with the provided layer and add additionLayers. func (conn *TCPIPv4) Send(tcp TCP, additionalLayers ...Layer) { diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go index 1b0e5b8fc..560c4111b 100644 --- a/test/packetimpact/testbench/layers.go +++ b/test/packetimpact/testbench/layers.go @@ -939,6 +939,11 @@ func (l *Payload) ToBytes() ([]byte, error) { return l.Bytes, nil } +// Length returns payload byte length. +func (l *Payload) Length() int { + return l.length() +} + func (l *Payload) match(other Layer) bool { return equalLayer(l, other) } diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index fb39ee4b8..2a41ef326 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -199,6 +199,26 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "tcp_splitseg_mss", + srcs = ["tcp_splitseg_mss_test.go"], + deps = [ + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + +packetimpact_go_test( + name = "tcp_cork_mss", + srcs = ["tcp_cork_mss_test.go"], + deps = [ + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + packetimpact_go_test( name = "icmpv6_param_problem", srcs = ["icmpv6_param_problem_test.go"], diff --git a/test/packetimpact/tests/fin_wait2_timeout_test.go b/test/packetimpact/tests/fin_wait2_timeout_test.go index c26ab78d9..407565078 100644 --- a/test/packetimpact/tests/fin_wait2_timeout_test.go +++ b/test/packetimpact/tests/fin_wait2_timeout_test.go @@ -21,11 +21,11 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/tcpip/header" - tb "gvisor.dev/gvisor/test/packetimpact/testbench" + "gvisor.dev/gvisor/test/packetimpact/testbench" ) func init() { - tb.RegisterFlags(flag.CommandLine) + testbench.RegisterFlags(flag.CommandLine) } func TestFinWait2Timeout(t *testing.T) { @@ -37,13 +37,13 @@ func TestFinWait2Timeout(t *testing.T) { {"WithoutLinger2", false}, } { t.Run(tt.description, func(t *testing.T) { - dut := tb.NewDUT(t) + dut := testbench.NewDUT(t) defer dut.TearDown() listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) defer dut.Close(listenFd) - conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) defer conn.Close() - conn.Handshake() + conn.Connect() acceptFd, _ := dut.Accept(listenFd) if tt.linger2 { @@ -52,21 +52,21 @@ func TestFinWait2Timeout(t *testing.T) { } dut.Close(acceptFd) - if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil { + if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil { t.Fatalf("expected a FIN-ACK within 1 second but got none: %s", err) } - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) time.Sleep(5 * time.Second) conn.Drain() - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) if tt.linger2 { - if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, time.Second); err != nil { + if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, time.Second); err != nil { t.Fatalf("expected a RST packet within a second but got none: %s", err) } } else { - if got, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, 10*time.Second); got != nil || err == nil { + if got, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, 10*time.Second); got != nil || err == nil { t.Fatalf("expected no RST packets within ten seconds but got one: %s", got) } } diff --git a/test/packetimpact/tests/icmpv6_param_problem_test.go b/test/packetimpact/tests/icmpv6_param_problem_test.go index bb1fc26fc..961059fc1 100644 --- a/test/packetimpact/tests/icmpv6_param_problem_test.go +++ b/test/packetimpact/tests/icmpv6_param_problem_test.go @@ -21,27 +21,27 @@ import ( "time" "gvisor.dev/gvisor/pkg/tcpip/header" - tb "gvisor.dev/gvisor/test/packetimpact/testbench" + "gvisor.dev/gvisor/test/packetimpact/testbench" ) func init() { - tb.RegisterFlags(flag.CommandLine) + testbench.RegisterFlags(flag.CommandLine) } // TestICMPv6ParamProblemTest sends a packet with a bad next header. The DUT // should respond with an ICMPv6 Parameter Problem message. func TestICMPv6ParamProblemTest(t *testing.T) { - dut := tb.NewDUT(t) + dut := testbench.NewDUT(t) defer dut.TearDown() - conn := tb.NewIPv6Conn(t, tb.IPv6{}, tb.IPv6{}) + conn := testbench.NewIPv6Conn(t, testbench.IPv6{}, testbench.IPv6{}) defer conn.Close() - ipv6 := tb.IPv6{ + ipv6 := testbench.IPv6{ // 254 is reserved and used for experimentation and testing. This should // cause an error. - NextHeader: tb.Uint8(254), + NextHeader: testbench.Uint8(254), } - icmpv6 := tb.ICMPv6{ - Type: tb.ICMPv6Type(header.ICMPv6EchoRequest), + icmpv6 := testbench.ICMPv6{ + Type: testbench.ICMPv6Type(header.ICMPv6EchoRequest), NDPPayload: []byte("hello world"), } @@ -61,14 +61,14 @@ func TestICMPv6ParamProblemTest(t *testing.T) { b := make([]byte, 4) binary.BigEndian.PutUint32(b, header.IPv6NextHeaderOffset) expectedPayload = append(b, expectedPayload...) - expectedICMPv6 := tb.ICMPv6{ - Type: tb.ICMPv6Type(header.ICMPv6ParamProblem), + expectedICMPv6 := testbench.ICMPv6{ + Type: testbench.ICMPv6Type(header.ICMPv6ParamProblem), NDPPayload: expectedPayload, } - paramProblem := tb.Layers{ - &tb.Ether{}, - &tb.IPv6{}, + paramProblem := testbench.Layers{ + &testbench.Ether{}, + &testbench.IPv6{}, &expectedICMPv6, } timeout := time.Second diff --git a/test/packetimpact/tests/ipv4_id_uniqueness_test.go b/test/packetimpact/tests/ipv4_id_uniqueness_test.go index 49e481d0b..4efb9829c 100644 --- a/test/packetimpact/tests/ipv4_id_uniqueness_test.go +++ b/test/packetimpact/tests/ipv4_id_uniqueness_test.go @@ -24,14 +24,14 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/tcpip/header" - tb "gvisor.dev/gvisor/test/packetimpact/testbench" + "gvisor.dev/gvisor/test/packetimpact/testbench" ) func init() { - tb.RegisterFlags(flag.CommandLine) + testbench.RegisterFlags(flag.CommandLine) } -func recvTCPSegment(conn *tb.TCPIPv4, expect *tb.TCP, expectPayload *tb.Payload) (uint16, error) { +func recvTCPSegment(conn *testbench.TCPIPv4, expect *testbench.TCP, expectPayload *testbench.Payload) (uint16, error) { layers, err := conn.ExpectData(expect, expectPayload, time.Second) if err != nil { return 0, fmt.Errorf("failed to receive TCP segment: %s", err) @@ -39,7 +39,7 @@ func recvTCPSegment(conn *tb.TCPIPv4, expect *tb.TCP, expectPayload *tb.Payload) if len(layers) < 2 { return 0, fmt.Errorf("got packet with layers: %v, expected to have at least 2 layers (link and network)", layers) } - ipv4, ok := layers[1].(*tb.IPv4) + ipv4, ok := layers[1].(*testbench.IPv4) if !ok { return 0, fmt.Errorf("got network layer: %T, expected: *IPv4", layers[1]) } @@ -56,16 +56,16 @@ func recvTCPSegment(conn *tb.TCPIPv4, expect *tb.TCP, expectPayload *tb.Payload) // to force the DF bit to be 0, and checks that a retransmitted segment has a // different IPv4 Identification value than the original segment. func TestIPv4RetransmitIdentificationUniqueness(t *testing.T) { - dut := tb.NewDUT(t) + dut := testbench.NewDUT(t) defer dut.TearDown() listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) defer dut.Close(listenFD) - conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) defer conn.Close() - conn.Handshake() + conn.Connect() remoteFD, _ := dut.Accept(listenFD) defer dut.Close(remoteFD) @@ -83,18 +83,18 @@ func TestIPv4RetransmitIdentificationUniqueness(t *testing.T) { } sampleData := []byte("Sample Data") - samplePayload := &tb.Payload{Bytes: sampleData} + samplePayload := &testbench.Payload{Bytes: sampleData} dut.Send(remoteFD, sampleData, 0) - if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil { + if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { t.Fatalf("failed to receive TCP segment sent for RTT calculation: %s", err) } // Let the DUT estimate RTO with RTT from the DATA-ACK. // TODO(gvisor.dev/issue/2685) Estimate RTO during handshake, after which // we can skip sending this ACK. - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) - expectTCP := &tb.TCP{SeqNum: tb.Uint32(uint32(*conn.RemoteSeqNum()))} + expectTCP := &testbench.TCP{SeqNum: testbench.Uint32(uint32(*conn.RemoteSeqNum()))} dut.Send(remoteFD, sampleData, 0) originalID, err := recvTCPSegment(&conn, expectTCP, samplePayload) if err != nil { diff --git a/test/packetimpact/tests/tcp_close_wait_ack_test.go b/test/packetimpact/tests/tcp_close_wait_ack_test.go index 70a22a2db..6e7ff41d7 100644 --- a/test/packetimpact/tests/tcp_close_wait_ack_test.go +++ b/test/packetimpact/tests/tcp_close_wait_ack_test.go @@ -23,17 +23,17 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/seqnum" - tb "gvisor.dev/gvisor/test/packetimpact/testbench" + "gvisor.dev/gvisor/test/packetimpact/testbench" ) func init() { - tb.RegisterFlags(flag.CommandLine) + testbench.RegisterFlags(flag.CommandLine) } func TestCloseWaitAck(t *testing.T) { for _, tt := range []struct { description string - makeTestingTCP func(conn *tb.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) tb.TCP + makeTestingTCP func(conn *testbench.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) testbench.TCP seqNumOffset seqnum.Size expectAck bool }{ @@ -45,27 +45,27 @@ func TestCloseWaitAck(t *testing.T) { {"ACK", GenerateUnaccACKSegment, 2, true}, } { t.Run(fmt.Sprintf("%s%d", tt.description, tt.seqNumOffset), func(t *testing.T) { - dut := tb.NewDUT(t) + dut := testbench.NewDUT(t) defer dut.TearDown() listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) defer dut.Close(listenFd) - conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) defer conn.Close() - conn.Handshake() + conn.Connect() acceptFd, _ := dut.Accept(listenFd) // Send a FIN to DUT to intiate the active close - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagFin)}) - gotTCP, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, time.Second) + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagFin)}) + gotTCP, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second) if err != nil { t.Fatalf("expected an ACK for our fin and DUT should enter CLOSE_WAIT: %s", err) } windowSize := seqnum.Size(*gotTCP.WindowSize) // Send a segment with OTW Seq / unacc ACK and expect an ACK back - conn.Send(tt.makeTestingTCP(&conn, tt.seqNumOffset, windowSize), &tb.Payload{Bytes: []byte("Sample Data")}) - gotAck, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, time.Second) + conn.Send(tt.makeTestingTCP(&conn, tt.seqNumOffset, windowSize), &testbench.Payload{Bytes: []byte("Sample Data")}) + gotAck, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second) if tt.expectAck && err != nil { t.Fatalf("expected an ack but got none: %s", err) } @@ -75,14 +75,14 @@ func TestCloseWaitAck(t *testing.T) { // Now let's verify DUT is indeed in CLOSE_WAIT dut.Close(acceptFd) - if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagFin)}, time.Second); err != nil { + if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagFin)}, time.Second); err != nil { t.Fatalf("expected DUT to send a FIN: %s", err) } // Ack the FIN from DUT - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) // Send some extra data to DUT - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, &tb.Payload{Bytes: []byte("Sample Data")}) - if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, time.Second); err != nil { + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: []byte("Sample Data")}) + if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, time.Second); err != nil { t.Fatalf("expected DUT to send an RST: %s", err) } }) @@ -92,17 +92,17 @@ func TestCloseWaitAck(t *testing.T) { // This generates an segment with seqnum = RCV.NXT + RCV.WND + seqNumOffset, the // generated segment is only acceptable when seqNumOffset is 0, otherwise an ACK // is expected from the receiver. -func GenerateOTWSeqSegment(conn *tb.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) tb.TCP { +func GenerateOTWSeqSegment(conn *testbench.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) testbench.TCP { lastAcceptable := conn.LocalSeqNum().Add(windowSize) otwSeq := uint32(lastAcceptable.Add(seqNumOffset)) - return tb.TCP{SeqNum: tb.Uint32(otwSeq), Flags: tb.Uint8(header.TCPFlagAck)} + return testbench.TCP{SeqNum: testbench.Uint32(otwSeq), Flags: testbench.Uint8(header.TCPFlagAck)} } // This generates an segment with acknum = SND.NXT + seqNumOffset, the generated // segment is only acceptable when seqNumOffset is 0, otherwise an ACK is // expected from the receiver. -func GenerateUnaccACKSegment(conn *tb.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) tb.TCP { +func GenerateUnaccACKSegment(conn *testbench.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) testbench.TCP { lastAcceptable := conn.RemoteSeqNum() unaccAck := uint32(lastAcceptable.Add(seqNumOffset)) - return tb.TCP{AckNum: tb.Uint32(unaccAck), Flags: tb.Uint8(header.TCPFlagAck)} + return testbench.TCP{AckNum: testbench.Uint32(unaccAck), Flags: testbench.Uint8(header.TCPFlagAck)} } diff --git a/test/packetimpact/tests/tcp_cork_mss_test.go b/test/packetimpact/tests/tcp_cork_mss_test.go new file mode 100644 index 000000000..fb8f48629 --- /dev/null +++ b/test/packetimpact/tests/tcp_cork_mss_test.go @@ -0,0 +1,84 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_cork_mss_test + +import ( + "flag" + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func init() { + testbench.RegisterFlags(flag.CommandLine) +} + +// TestTCPCorkMSS tests for segment coalesce and split as per MSS. +func TestTCPCorkMSS(t *testing.T) { + dut := testbench.NewDUT(t) + defer dut.TearDown() + listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(listenFD) + conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) + defer conn.Close() + + const mss = uint32(header.TCPDefaultMSS) + options := make([]byte, header.TCPOptionMSSLength) + header.EncodeMSSOption(mss, options) + conn.ConnectWithOptions(options) + + acceptFD, _ := dut.Accept(listenFD) + defer dut.Close(acceptFD) + + dut.SetSockOptInt(acceptFD, unix.IPPROTO_TCP, unix.TCP_CORK, 1) + + // Let the dut application send 2 small segments to be held up and coalesced + // until the application sends a larger segment to fill up to > MSS. + sampleData := []byte("Sample Data") + dut.Send(acceptFD, sampleData, 0) + dut.Send(acceptFD, sampleData, 0) + + expectedData := sampleData + expectedData = append(expectedData, sampleData...) + largeData := make([]byte, mss+1) + expectedData = append(expectedData, largeData...) + dut.Send(acceptFD, largeData, 0) + + // Expect the segments to be coalesced and sent and capped to MSS. + expectedPayload := testbench.Payload{Bytes: expectedData[:mss]} + if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &expectedPayload, time.Second); err != nil { + t.Fatalf("expected payload was not received: %s", err) + } + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) + // Expect the coalesced segment to be split and transmitted. + expectedPayload = testbench.Payload{Bytes: expectedData[mss:]} + if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &expectedPayload, time.Second); err != nil { + t.Fatalf("expected payload was not received: %s", err) + } + + // Check for segments to *not* be held up because of TCP_CORK when + // the current send window is less than MSS. + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(uint16(2 * len(sampleData)))}) + dut.Send(acceptFD, sampleData, 0) + dut.Send(acceptFD, sampleData, 0) + expectedPayload = testbench.Payload{Bytes: append(sampleData, sampleData...)} + if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &expectedPayload, time.Second); err != nil { + t.Fatalf("expected payload was not received: %s", err) + } + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) +} diff --git a/test/packetimpact/tests/tcp_noaccept_close_rst_test.go b/test/packetimpact/tests/tcp_noaccept_close_rst_test.go index 2c1ec27d3..b9b3e91d3 100644 --- a/test/packetimpact/tests/tcp_noaccept_close_rst_test.go +++ b/test/packetimpact/tests/tcp_noaccept_close_rst_test.go @@ -21,22 +21,22 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/tcpip/header" - tb "gvisor.dev/gvisor/test/packetimpact/testbench" + "gvisor.dev/gvisor/test/packetimpact/testbench" ) func init() { - tb.RegisterFlags(flag.CommandLine) + testbench.RegisterFlags(flag.CommandLine) } func TestTcpNoAcceptCloseReset(t *testing.T) { - dut := tb.NewDUT(t) + dut := testbench.NewDUT(t) defer dut.TearDown() listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) - conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) - conn.Handshake() + conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) + conn.Connect() defer conn.Close() dut.Close(listenFd) - if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst | header.TCPFlagAck)}, 1*time.Second); err != nil { + if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst | header.TCPFlagAck)}, 1*time.Second); err != nil { t.Fatalf("expected a RST-ACK packet but got none: %s", err) } } diff --git a/test/packetimpact/tests/tcp_outside_the_window_test.go b/test/packetimpact/tests/tcp_outside_the_window_test.go index 351df193e..ad8c74234 100644 --- a/test/packetimpact/tests/tcp_outside_the_window_test.go +++ b/test/packetimpact/tests/tcp_outside_the_window_test.go @@ -23,11 +23,11 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/seqnum" - tb "gvisor.dev/gvisor/test/packetimpact/testbench" + "gvisor.dev/gvisor/test/packetimpact/testbench" ) func init() { - tb.RegisterFlags(flag.CommandLine) + testbench.RegisterFlags(flag.CommandLine) } // TestTCPOutsideTheWindows tests the behavior of the DUT when packets arrive @@ -38,7 +38,7 @@ func TestTCPOutsideTheWindow(t *testing.T) { for _, tt := range []struct { description string tcpFlags uint8 - payload []tb.Layer + payload []testbench.Layer seqNumOffset seqnum.Size expectACK bool }{ @@ -46,28 +46,28 @@ func TestTCPOutsideTheWindow(t *testing.T) { {"SYNACK", header.TCPFlagSyn | header.TCPFlagAck, nil, 0, true}, {"ACK", header.TCPFlagAck, nil, 0, false}, {"FIN", header.TCPFlagFin, nil, 0, false}, - {"Data", header.TCPFlagAck, []tb.Layer{&tb.Payload{Bytes: []byte("abc123")}}, 0, true}, + {"Data", header.TCPFlagAck, []testbench.Layer{&testbench.Payload{Bytes: []byte("abc123")}}, 0, true}, {"SYN", header.TCPFlagSyn, nil, 1, true}, {"SYNACK", header.TCPFlagSyn | header.TCPFlagAck, nil, 1, true}, {"ACK", header.TCPFlagAck, nil, 1, true}, {"FIN", header.TCPFlagFin, nil, 1, false}, - {"Data", header.TCPFlagAck, []tb.Layer{&tb.Payload{Bytes: []byte("abc123")}}, 1, true}, + {"Data", header.TCPFlagAck, []testbench.Layer{&testbench.Payload{Bytes: []byte("abc123")}}, 1, true}, {"SYN", header.TCPFlagSyn, nil, 2, true}, {"SYNACK", header.TCPFlagSyn | header.TCPFlagAck, nil, 2, true}, {"ACK", header.TCPFlagAck, nil, 2, true}, {"FIN", header.TCPFlagFin, nil, 2, false}, - {"Data", header.TCPFlagAck, []tb.Layer{&tb.Payload{Bytes: []byte("abc123")}}, 2, true}, + {"Data", header.TCPFlagAck, []testbench.Layer{&testbench.Payload{Bytes: []byte("abc123")}}, 2, true}, } { t.Run(fmt.Sprintf("%s%d", tt.description, tt.seqNumOffset), func(t *testing.T) { - dut := tb.NewDUT(t) + dut := testbench.NewDUT(t) defer dut.TearDown() listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) defer dut.Close(listenFD) - conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) defer conn.Close() - conn.Handshake() + conn.Connect() acceptFD, _ := dut.Accept(listenFD) defer dut.Close(acceptFD) @@ -75,13 +75,13 @@ func TestTCPOutsideTheWindow(t *testing.T) { conn.Drain() // Ignore whatever incrementing that this out-of-order packet might cause // to the AckNum. - localSeqNum := tb.Uint32(uint32(*conn.LocalSeqNum())) - conn.Send(tb.TCP{ - Flags: tb.Uint8(tt.tcpFlags), - SeqNum: tb.Uint32(uint32(conn.LocalSeqNum().Add(windowSize))), + localSeqNum := testbench.Uint32(uint32(*conn.LocalSeqNum())) + conn.Send(testbench.TCP{ + Flags: testbench.Uint8(tt.tcpFlags), + SeqNum: testbench.Uint32(uint32(conn.LocalSeqNum().Add(windowSize))), }, tt.payload...) timeout := 3 * time.Second - gotACK, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), AckNum: localSeqNum}, timeout) + gotACK, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), AckNum: localSeqNum}, timeout) if tt.expectACK && err != nil { t.Fatalf("expected an ACK packet within %s but got none: %s", timeout, err) } diff --git a/test/packetimpact/tests/tcp_paws_mechanism_test.go b/test/packetimpact/tests/tcp_paws_mechanism_test.go index 0a668adcf..55db4ece6 100644 --- a/test/packetimpact/tests/tcp_paws_mechanism_test.go +++ b/test/packetimpact/tests/tcp_paws_mechanism_test.go @@ -22,25 +22,25 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/tcpip/header" - tb "gvisor.dev/gvisor/test/packetimpact/testbench" + "gvisor.dev/gvisor/test/packetimpact/testbench" ) func init() { - tb.RegisterFlags(flag.CommandLine) + testbench.RegisterFlags(flag.CommandLine) } func TestPAWSMechanism(t *testing.T) { - dut := tb.NewDUT(t) + dut := testbench.NewDUT(t) defer dut.TearDown() listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) defer dut.Close(listenFD) - conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) defer conn.Close() options := make([]byte, header.TCPOptionTSLength) header.EncodeTSOption(currentTS(), 0, options) - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn), Options: options}) - synAck, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second) + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn), Options: options}) + synAck, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second) if err != nil { t.Fatalf("didn't get synack during handshake: %s", err) } @@ -50,7 +50,7 @@ func TestPAWSMechanism(t *testing.T) { } tsecr := parsedSynOpts.TSVal header.EncodeTSOption(currentTS(), tsecr, options) - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), Options: options}) + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), Options: options}) acceptFD, _ := dut.Accept(listenFD) defer dut.Close(acceptFD) @@ -61,9 +61,9 @@ func TestPAWSMechanism(t *testing.T) { // every time we send one, it should not cause any flakiness because timestamps // only need to be non-decreasing. time.Sleep(3 * time.Millisecond) - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), Options: options}, &tb.Payload{Bytes: sampleData}) + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), Options: options}, &testbench.Payload{Bytes: sampleData}) - gotTCP, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, time.Second) + gotTCP, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second) if err != nil { t.Fatalf("expected an ACK but got none: %s", err) } @@ -86,9 +86,9 @@ func TestPAWSMechanism(t *testing.T) { // 3ms here is chosen arbitrarily and this time.Sleep() should not cause flakiness // due to the exact same reasoning discussed above. time.Sleep(3 * time.Millisecond) - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), Options: options}, &tb.Payload{Bytes: sampleData}) + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), Options: options}, &testbench.Payload{Bytes: sampleData}) - gotTCP, err = conn.Expect(tb.TCP{AckNum: lastAckNum, Flags: tb.Uint8(header.TCPFlagAck)}, time.Second) + gotTCP, err = conn.Expect(testbench.TCP{AckNum: lastAckNum, Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second) if err != nil { t.Fatalf("expected segment with AckNum %d but got none: %s", lastAckNum, err) } diff --git a/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go b/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go index 5cc93fa24..b640d8673 100644 --- a/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go +++ b/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go @@ -28,19 +28,19 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/tcpip/header" - tb "gvisor.dev/gvisor/test/packetimpact/testbench" + "gvisor.dev/gvisor/test/packetimpact/testbench" ) func init() { - tb.RegisterFlags(flag.CommandLine) + testbench.RegisterFlags(flag.CommandLine) } func TestQueueReceiveInSynSent(t *testing.T) { - dut := tb.NewDUT(t) + dut := testbench.NewDUT(t) defer dut.TearDown() - socket, remotePort := dut.CreateBoundSocket(unix.SOCK_STREAM, unix.IPPROTO_TCP, net.ParseIP(tb.RemoteIPv4)) - conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + socket, remotePort := dut.CreateBoundSocket(unix.SOCK_STREAM, unix.IPPROTO_TCP, net.ParseIP(testbench.RemoteIPv4)) + conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) defer conn.Close() sampleData := []byte("Sample Data") @@ -49,7 +49,7 @@ func TestQueueReceiveInSynSent(t *testing.T) { if _, err := dut.ConnectWithErrno(context.Background(), socket, conn.LocalAddr()); !errors.Is(err, syscall.EINPROGRESS) { t.Fatalf("failed to bring DUT to SYN-SENT, got: %s, want EINPROGRESS", err) } - if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn)}, time.Second); err != nil { + if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}, time.Second); err != nil { t.Fatalf("expected a SYN from DUT, but got none: %s", err) } @@ -77,11 +77,11 @@ func TestQueueReceiveInSynSent(t *testing.T) { time.Sleep(time.Second) // Bring the connection to Established. - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}) - if _, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, time.Second); err != nil { + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}) + if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second); err != nil { t.Fatalf("expected an ACK from DUT, but got none: %s", err) } // Send sample data to DUT. - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, &tb.Payload{Bytes: sampleData}) + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: sampleData}) } diff --git a/test/packetimpact/tests/tcp_retransmits_test.go b/test/packetimpact/tests/tcp_retransmits_test.go index c043ad881..e51409b66 100644 --- a/test/packetimpact/tests/tcp_retransmits_test.go +++ b/test/packetimpact/tests/tcp_retransmits_test.go @@ -21,53 +21,53 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/tcpip/header" - tb "gvisor.dev/gvisor/test/packetimpact/testbench" + "gvisor.dev/gvisor/test/packetimpact/testbench" ) func init() { - tb.RegisterFlags(flag.CommandLine) + testbench.RegisterFlags(flag.CommandLine) } // TestRetransmits tests retransmits occur at exponentially increasing // time intervals. func TestRetransmits(t *testing.T) { - dut := tb.NewDUT(t) + dut := testbench.NewDUT(t) defer dut.TearDown() listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) defer dut.Close(listenFd) - conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) defer conn.Close() - conn.Handshake() + conn.Connect() acceptFd, _ := dut.Accept(listenFd) defer dut.Close(acceptFd) dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) sampleData := []byte("Sample Data") - samplePayload := &tb.Payload{Bytes: sampleData} + samplePayload := &testbench.Payload{Bytes: sampleData} dut.Send(acceptFd, sampleData, 0) - if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil { + if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) } // Give a chance for the dut to estimate RTO with RTT from the DATA-ACK. // TODO(gvisor.dev/issue/2685) Estimate RTO during handshake, after which // we can skip sending this ACK. - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) startRTO := time.Second current := startRTO first := time.Now() dut.Send(acceptFd, sampleData, 0) - seq := tb.Uint32(uint32(*conn.RemoteSeqNum())) - if _, err := conn.ExpectData(&tb.TCP{SeqNum: seq}, samplePayload, startRTO); err != nil { + seq := testbench.Uint32(uint32(*conn.RemoteSeqNum())) + if _, err := conn.ExpectData(&testbench.TCP{SeqNum: seq}, samplePayload, startRTO); err != nil { t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) } // Expect retransmits of the same segment. for i := 0; i < 5; i++ { start := time.Now() - if _, err := conn.ExpectData(&tb.TCP{SeqNum: seq}, samplePayload, 2*current); err != nil { + if _, err := conn.ExpectData(&testbench.TCP{SeqNum: seq}, samplePayload, 2*current); err != nil { t.Fatalf("expected a packet with payload %v: %s loop %d", samplePayload, err, i) } if i == 0 { diff --git a/test/packetimpact/tests/tcp_send_window_sizes_piggyback_test.go b/test/packetimpact/tests/tcp_send_window_sizes_piggyback_test.go index b7e91712d..90ab85419 100644 --- a/test/packetimpact/tests/tcp_send_window_sizes_piggyback_test.go +++ b/test/packetimpact/tests/tcp_send_window_sizes_piggyback_test.go @@ -22,11 +22,11 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/tcpip/header" - tb "gvisor.dev/gvisor/test/packetimpact/testbench" + "gvisor.dev/gvisor/test/packetimpact/testbench" ) func init() { - tb.RegisterFlags(flag.CommandLine) + testbench.RegisterFlags(flag.CommandLine) } // TestSendWindowSizesPiggyback tests cases where segment sizes are close to @@ -59,26 +59,26 @@ func TestSendWindowSizesPiggyback(t *testing.T) { {"WindowGreaterThanSegment", segmentSize + 1, sampleData, sampleData, true /* enqueue */}, } { t.Run(fmt.Sprintf("%s%d", tt.description, tt.windowSize), func(t *testing.T) { - dut := tb.NewDUT(t) + dut := testbench.NewDUT(t) defer dut.TearDown() listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) defer dut.Close(listenFd) - conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort, WindowSize: tb.Uint16(tt.windowSize)}, tb.TCP{SrcPort: &remotePort}) + conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort, WindowSize: testbench.Uint16(tt.windowSize)}, testbench.TCP{SrcPort: &remotePort}) defer conn.Close() - conn.Handshake() + conn.Connect() acceptFd, _ := dut.Accept(listenFd) defer dut.Close(acceptFd) dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) - expectedTCP := tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)} + expectedTCP := testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)} dut.Send(acceptFd, sampleData, 0) - expectedPayload := tb.Payload{Bytes: tt.expectedPayload1} + expectedPayload := testbench.Payload{Bytes: tt.expectedPayload1} if _, err := conn.ExpectData(&expectedTCP, &expectedPayload, time.Second); err != nil { - t.Fatalf("Expected %s but didn't get one: %s", tb.Layers{&expectedTCP, &expectedPayload}, err) + t.Fatalf("expected payload was not received: %s", err) } // Expect any enqueued segment to be transmitted by the dut along with @@ -92,13 +92,13 @@ func TestSendWindowSizesPiggyback(t *testing.T) { // Send ACK for the previous segment along with data for the dut to // receive and ACK back. Sending this ACK would make room for the dut // to transmit any enqueued segment. - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh), WindowSize: tb.Uint16(tt.windowSize)}, &tb.Payload{Bytes: sampleData}) + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh), WindowSize: testbench.Uint16(tt.windowSize)}, &testbench.Payload{Bytes: sampleData}) // Expect the dut to piggyback the ACK for received data along with // the segment enqueued for transmit. - expectedPayload = tb.Payload{Bytes: tt.expectedPayload2} + expectedPayload = testbench.Payload{Bytes: tt.expectedPayload2} if _, err := conn.ExpectData(&expectedTCP, &expectedPayload, time.Second); err != nil { - t.Fatalf("Expected %s but didn't get one: %s", tb.Layers{&expectedTCP, &expectedPayload}, err) + t.Fatalf("expected payload was not received: %s", err) } }) } diff --git a/test/packetimpact/tests/tcp_splitseg_mss_test.go b/test/packetimpact/tests/tcp_splitseg_mss_test.go new file mode 100644 index 000000000..9350d0988 --- /dev/null +++ b/test/packetimpact/tests/tcp_splitseg_mss_test.go @@ -0,0 +1,71 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_splitseg_mss_test + +import ( + "flag" + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func init() { + testbench.RegisterFlags(flag.CommandLine) +} + +// TestTCPSplitSegMSS lets the dut try to send segments larger than MSS. +// It tests if the transmitted segments are capped at MSS and are split. +func TestTCPSplitSegMSS(t *testing.T) { + dut := testbench.NewDUT(t) + defer dut.TearDown() + listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(listenFD) + conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) + defer conn.Close() + + const mss = uint32(header.TCPDefaultMSS) + options := make([]byte, header.TCPOptionMSSLength) + header.EncodeMSSOption(mss, options) + conn.ConnectWithOptions(options) + + acceptFD, _ := dut.Accept(listenFD) + defer dut.Close(acceptFD) + + // Let the dut send a segment larger than MSS. + largeData := make([]byte, mss+1) + for i := 0; i < 2; i++ { + dut.Send(acceptFD, largeData, 0) + if i == 0 { + // On Linux, the initial segment goes out beyond MSS and the segment + // split occurs on retransmission. Call ExpectData to wait to + // receive the split segment. + if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: largeData[:mss]}, time.Second); err != nil { + t.Fatalf("expected payload was not received: %s", err) + } + } else { + if _, err := conn.ExpectNextData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: largeData[:mss]}, time.Second); err != nil { + t.Fatalf("expected payload was not received: %s", err) + } + } + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) + if _, err := conn.ExpectNextData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &testbench.Payload{Bytes: largeData[mss:]}, time.Second); err != nil { + t.Fatalf("expected payload was not received: %s", err) + } + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) + } +} diff --git a/test/packetimpact/tests/tcp_synrcvd_reset_test.go b/test/packetimpact/tests/tcp_synrcvd_reset_test.go index e6ba84cab..7d5deab01 100644 --- a/test/packetimpact/tests/tcp_synrcvd_reset_test.go +++ b/test/packetimpact/tests/tcp_synrcvd_reset_test.go @@ -21,32 +21,32 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/tcpip/header" - tb "gvisor.dev/gvisor/test/packetimpact/testbench" + "gvisor.dev/gvisor/test/packetimpact/testbench" ) func init() { - tb.RegisterFlags(flag.CommandLine) + testbench.RegisterFlags(flag.CommandLine) } // TestTCPSynRcvdReset tests transition from SYN-RCVD to CLOSED. func TestTCPSynRcvdReset(t *testing.T) { - dut := tb.NewDUT(t) + dut := testbench.NewDUT(t) defer dut.TearDown() listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) defer dut.Close(listenFD) - conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) defer conn.Close() // Expect dut connection to have transitioned to SYN-RCVD state. - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn)}) - if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil { + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}) + if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil { t.Fatalf("expected SYN-ACK %s", err) } - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}) + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}) // Expect the connection to have transitioned SYN-RCVD to CLOSED. // TODO(gvisor.dev/issue/478): Check for TCP_INFO on the dut side. - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) - if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil { + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) + if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil { t.Fatalf("expected a TCP RST %s", err) } } diff --git a/test/packetimpact/tests/tcp_user_timeout_test.go b/test/packetimpact/tests/tcp_user_timeout_test.go index ce31917ee..87e45d765 100644 --- a/test/packetimpact/tests/tcp_user_timeout_test.go +++ b/test/packetimpact/tests/tcp_user_timeout_test.go @@ -22,27 +22,27 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/tcpip/header" - tb "gvisor.dev/gvisor/test/packetimpact/testbench" + "gvisor.dev/gvisor/test/packetimpact/testbench" ) func init() { - tb.RegisterFlags(flag.CommandLine) + testbench.RegisterFlags(flag.CommandLine) } -func sendPayload(conn *tb.TCPIPv4, dut *tb.DUT, fd int32) error { +func sendPayload(conn *testbench.TCPIPv4, dut *testbench.DUT, fd int32) error { sampleData := make([]byte, 100) for i := range sampleData { sampleData[i] = uint8(i) } conn.Drain() dut.Send(fd, sampleData, 0) - if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &tb.Payload{Bytes: sampleData}, time.Second); err != nil { + if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &testbench.Payload{Bytes: sampleData}, time.Second); err != nil { return fmt.Errorf("expected data but got none: %w", err) } return nil } -func sendFIN(conn *tb.TCPIPv4, dut *tb.DUT, fd int32) error { +func sendFIN(conn *testbench.TCPIPv4, dut *testbench.DUT, fd int32) error { dut.Close(fd) return nil } @@ -59,20 +59,20 @@ func TestTCPUserTimeout(t *testing.T) { } { for _, ttf := range []struct { description string - f func(conn *tb.TCPIPv4, dut *tb.DUT, fd int32) error + f func(conn *testbench.TCPIPv4, dut *testbench.DUT, fd int32) error }{ {"AfterPayload", sendPayload}, {"AfterFIN", sendFIN}, } { t.Run(tt.description+ttf.description, func(t *testing.T) { // Create a socket, listen, TCP handshake, and accept. - dut := tb.NewDUT(t) + dut := testbench.NewDUT(t) defer dut.TearDown() listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) defer dut.Close(listenFD) - conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) defer conn.Close() - conn.Handshake() + conn.Connect() acceptFD, _ := dut.Accept(listenFD) if tt.userTimeout != 0 { @@ -85,14 +85,14 @@ func TestTCPUserTimeout(t *testing.T) { time.Sleep(tt.sendDelay) conn.Drain() - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) // If TCP_USER_TIMEOUT was set and the above delay was longer than the // TCP_USER_TIMEOUT then the DUT should send a RST in response to the // testbench's packet. expectRST := tt.userTimeout != 0 && tt.sendDelay > tt.userTimeout expectTimeout := 5 * time.Second - got, err := conn.Expect(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, expectTimeout) + got, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, expectTimeout) if expectRST && err != nil { t.Errorf("expected RST packet within %s but got none: %s", expectTimeout, err) } diff --git a/test/packetimpact/tests/tcp_window_shrink_test.go b/test/packetimpact/tests/tcp_window_shrink_test.go index 58ec1d740..576ec1a8b 100644 --- a/test/packetimpact/tests/tcp_window_shrink_test.go +++ b/test/packetimpact/tests/tcp_window_shrink_test.go @@ -21,53 +21,53 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/tcpip/header" - tb "gvisor.dev/gvisor/test/packetimpact/testbench" + "gvisor.dev/gvisor/test/packetimpact/testbench" ) func init() { - tb.RegisterFlags(flag.CommandLine) + testbench.RegisterFlags(flag.CommandLine) } func TestWindowShrink(t *testing.T) { - dut := tb.NewDUT(t) + dut := testbench.NewDUT(t) defer dut.TearDown() listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) defer dut.Close(listenFd) - conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) defer conn.Close() - conn.Handshake() + conn.Connect() acceptFd, _ := dut.Accept(listenFd) defer dut.Close(acceptFd) dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) sampleData := []byte("Sample Data") - samplePayload := &tb.Payload{Bytes: sampleData} + samplePayload := &testbench.Payload{Bytes: sampleData} dut.Send(acceptFd, sampleData, 0) - if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil { + if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) } - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) dut.Send(acceptFd, sampleData, 0) dut.Send(acceptFd, sampleData, 0) - if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil { + if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) } - if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil { + if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) } // We close our receiving window here - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), WindowSize: tb.Uint16(0)}) + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)}) dut.Send(acceptFd, []byte("Sample Data"), 0) // Note: There is another kind of zero-window probing which Windows uses (by sending one // new byte at `RemoteSeqNum`), if netstack wants to go that way, we may want to change // the following lines. expectedRemoteSeqNum := *conn.RemoteSeqNum() - 1 - if _, err := conn.ExpectData(&tb.TCP{SeqNum: tb.Uint32(uint32(expectedRemoteSeqNum))}, nil, time.Second); err != nil { + if _, err := conn.ExpectData(&testbench.TCP{SeqNum: testbench.Uint32(uint32(expectedRemoteSeqNum))}, nil, time.Second); err != nil { t.Fatalf("expected a packet with sequence number %v: %s", expectedRemoteSeqNum, err) } } diff --git a/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go b/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go index dd43a24db..54cee138f 100644 --- a/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go +++ b/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go @@ -21,39 +21,39 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/tcpip/header" - tb "gvisor.dev/gvisor/test/packetimpact/testbench" + "gvisor.dev/gvisor/test/packetimpact/testbench" ) func init() { - tb.RegisterFlags(flag.CommandLine) + testbench.RegisterFlags(flag.CommandLine) } // TestZeroWindowProbeRetransmit tests retransmits of zero window probes // to be sent at exponentially inreasing time intervals. func TestZeroWindowProbeRetransmit(t *testing.T) { - dut := tb.NewDUT(t) + dut := testbench.NewDUT(t) defer dut.TearDown() listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) defer dut.Close(listenFd) - conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) defer conn.Close() - conn.Handshake() + conn.Connect() acceptFd, _ := dut.Accept(listenFd) defer dut.Close(acceptFd) dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) sampleData := []byte("Sample Data") - samplePayload := &tb.Payload{Bytes: sampleData} + samplePayload := &testbench.Payload{Bytes: sampleData} // Send and receive sample data to the dut. dut.Send(acceptFd, sampleData, 0) - if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil { + if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) } - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload) - if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil { + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload) + if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil { t.Fatalf("expected a packet with sequence number %s", err) } @@ -63,9 +63,9 @@ func TestZeroWindowProbeRetransmit(t *testing.T) { // of the recorded first zero probe transmission duration. // // Advertize zero receive window again. - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), WindowSize: tb.Uint16(0)}) - probeSeq := tb.Uint32(uint32(*conn.RemoteSeqNum() - 1)) - ackProbe := tb.Uint32(uint32(*conn.RemoteSeqNum())) + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)}) + probeSeq := testbench.Uint32(uint32(*conn.RemoteSeqNum() - 1)) + ackProbe := testbench.Uint32(uint32(*conn.RemoteSeqNum())) startProbeDuration := time.Second current := startProbeDuration @@ -79,7 +79,7 @@ func TestZeroWindowProbeRetransmit(t *testing.T) { // Expect zero-window probe with a timeout which is a function of the typical // first retransmission time. The retransmission times is supposed to // exponentially increase. - if _, err := conn.ExpectData(&tb.TCP{SeqNum: probeSeq}, nil, 2*current); err != nil { + if _, err := conn.ExpectData(&testbench.TCP{SeqNum: probeSeq}, nil, 2*current); err != nil { t.Fatalf("expected a probe with sequence number %v: loop %d", probeSeq, i) } if i == 0 { @@ -92,13 +92,14 @@ func TestZeroWindowProbeRetransmit(t *testing.T) { t.Fatalf("zero probe came sooner interval %d probe %d\n", p, i) } // Acknowledge the zero-window probes from the dut. - conn.Send(tb.TCP{AckNum: ackProbe, Flags: tb.Uint8(header.TCPFlagAck), WindowSize: tb.Uint16(0)}) + conn.Send(testbench.TCP{AckNum: ackProbe, Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)}) current *= 2 } // Advertize non-zero window. - conn.Send(tb.TCP{AckNum: ackProbe, Flags: tb.Uint8(header.TCPFlagAck)}) + conn.Send(testbench.TCP{AckNum: ackProbe, Flags: testbench.Uint8(header.TCPFlagAck)}) // Expect the dut to recover and transmit data. - if _, err := conn.ExpectData(&tb.TCP{SeqNum: ackProbe}, samplePayload, time.Second); err != nil { + if _, err := conn.ExpectData(&testbench. + TCP{SeqNum: ackProbe}, samplePayload, time.Second); err != nil { t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) } } diff --git a/test/packetimpact/tests/tcp_zero_window_probe_test.go b/test/packetimpact/tests/tcp_zero_window_probe_test.go index 6c453505d..c9b3b7af2 100644 --- a/test/packetimpact/tests/tcp_zero_window_probe_test.go +++ b/test/packetimpact/tests/tcp_zero_window_probe_test.go @@ -21,41 +21,41 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/tcpip/header" - tb "gvisor.dev/gvisor/test/packetimpact/testbench" + "gvisor.dev/gvisor/test/packetimpact/testbench" ) func init() { - tb.RegisterFlags(flag.CommandLine) + testbench.RegisterFlags(flag.CommandLine) } // TestZeroWindowProbe tests few cases of zero window probing over the // same connection. func TestZeroWindowProbe(t *testing.T) { - dut := tb.NewDUT(t) + dut := testbench.NewDUT(t) defer dut.TearDown() listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) defer dut.Close(listenFd) - conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) defer conn.Close() - conn.Handshake() + conn.Connect() acceptFd, _ := dut.Accept(listenFd) defer dut.Close(acceptFd) dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) sampleData := []byte("Sample Data") - samplePayload := &tb.Payload{Bytes: sampleData} + samplePayload := &testbench.Payload{Bytes: sampleData} start := time.Now() // Send and receive sample data to the dut. dut.Send(acceptFd, sampleData, 0) - if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil { + if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) } sendTime := time.Now().Sub(start) - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload) - if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil { + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload) + if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil { t.Fatalf("expected a packet with sequence number %s", err) } @@ -63,16 +63,16 @@ func TestZeroWindowProbe(t *testing.T) { // probe to be sent. // // Advertize zero window to the dut. - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), WindowSize: tb.Uint16(0)}) + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)}) // Expected sequence number of the zero window probe. - probeSeq := tb.Uint32(uint32(*conn.RemoteSeqNum() - 1)) + probeSeq := testbench.Uint32(uint32(*conn.RemoteSeqNum() - 1)) // Expected ack number of the ACK for the probe. - ackProbe := tb.Uint32(uint32(*conn.RemoteSeqNum())) + ackProbe := testbench.Uint32(uint32(*conn.RemoteSeqNum())) // Expect there are no zero-window probes sent until there is data to be sent out // from the dut. - if _, err := conn.ExpectData(&tb.TCP{SeqNum: probeSeq}, nil, 2*time.Second); err == nil { + if _, err := conn.ExpectData(&testbench.TCP{SeqNum: probeSeq}, nil, 2*time.Second); err == nil { t.Fatalf("unexpected a packet with sequence number %v: %s", probeSeq, err) } @@ -80,7 +80,7 @@ func TestZeroWindowProbe(t *testing.T) { // Ask the dut to send out data. dut.Send(acceptFd, sampleData, 0) // Expect zero-window probe from the dut. - if _, err := conn.ExpectData(&tb.TCP{SeqNum: probeSeq}, nil, time.Second); err != nil { + if _, err := conn.ExpectData(&testbench.TCP{SeqNum: probeSeq}, nil, time.Second); err != nil { t.Fatalf("expected a packet with sequence number %v: %s", probeSeq, err) } // Expect the probe to be sent after some time. Compare against the previous @@ -94,9 +94,9 @@ func TestZeroWindowProbe(t *testing.T) { // and sends out the sample payload after the send window opens. // // Advertize non-zero window to the dut and ack the zero window probe. - conn.Send(tb.TCP{AckNum: ackProbe, Flags: tb.Uint8(header.TCPFlagAck)}) + conn.Send(testbench.TCP{AckNum: ackProbe, Flags: testbench.Uint8(header.TCPFlagAck)}) // Expect the dut to recover and transmit data. - if _, err := conn.ExpectData(&tb.TCP{SeqNum: ackProbe}, samplePayload, time.Second); err != nil { + if _, err := conn.ExpectData(&testbench.TCP{SeqNum: ackProbe}, samplePayload, time.Second); err != nil { t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) } @@ -104,9 +104,9 @@ func TestZeroWindowProbe(t *testing.T) { // Check if the dut responds as we do for a similar probe sent to it. // Basically with sequence number to one byte behind the unacknowledged // sequence number. - p := tb.Uint32(uint32(*conn.LocalSeqNum())) - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), SeqNum: tb.Uint32(uint32(*conn.LocalSeqNum() - 1))}) - if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), AckNum: p}, nil, time.Second); err != nil { + p := testbench.Uint32(uint32(*conn.LocalSeqNum())) + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), SeqNum: testbench.Uint32(uint32(*conn.LocalSeqNum() - 1))}) + if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), AckNum: p}, nil, time.Second); err != nil { t.Fatalf("expected a packet with ack number: %d: %s", p, err) } } diff --git a/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go b/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go index 193427fb9..749281d9d 100644 --- a/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go +++ b/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go @@ -21,39 +21,39 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/tcpip/header" - tb "gvisor.dev/gvisor/test/packetimpact/testbench" + "gvisor.dev/gvisor/test/packetimpact/testbench" ) func init() { - tb.RegisterFlags(flag.CommandLine) + testbench.RegisterFlags(flag.CommandLine) } // TestZeroWindowProbeUserTimeout sanity tests user timeout when we are // retransmitting zero window probes. func TestZeroWindowProbeUserTimeout(t *testing.T) { - dut := tb.NewDUT(t) + dut := testbench.NewDUT(t) defer dut.TearDown() listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) defer dut.Close(listenFd) - conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) defer conn.Close() - conn.Handshake() + conn.Connect() acceptFd, _ := dut.Accept(listenFd) defer dut.Close(acceptFd) dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) sampleData := []byte("Sample Data") - samplePayload := &tb.Payload{Bytes: sampleData} + samplePayload := &testbench.Payload{Bytes: sampleData} // Send and receive sample data to the dut. dut.Send(acceptFd, sampleData, 0) - if _, err := conn.ExpectData(&tb.TCP{}, samplePayload, time.Second); err != nil { + if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) } - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload) - if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil { + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload) + if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil { t.Fatalf("expected a packet with sequence number %s", err) } @@ -61,15 +61,15 @@ func TestZeroWindowProbeUserTimeout(t *testing.T) { // probe to be sent. // // Advertize zero window to the dut. - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), WindowSize: tb.Uint16(0)}) + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)}) // Expected sequence number of the zero window probe. - probeSeq := tb.Uint32(uint32(*conn.RemoteSeqNum() - 1)) + probeSeq := testbench.Uint32(uint32(*conn.RemoteSeqNum() - 1)) start := time.Now() // Ask the dut to send out data. dut.Send(acceptFd, sampleData, 0) // Expect zero-window probe from the dut. - if _, err := conn.ExpectData(&tb.TCP{SeqNum: probeSeq}, nil, time.Second); err != nil { + if _, err := conn.ExpectData(&testbench.TCP{SeqNum: probeSeq}, nil, time.Second); err != nil { t.Fatalf("expected a packet with sequence number %v: %s", probeSeq, err) } // Record the duration for first probe, the dut sends the zero window probe after @@ -82,7 +82,7 @@ func TestZeroWindowProbeUserTimeout(t *testing.T) { // Reduce the retransmit timeout. dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_USER_TIMEOUT, int32(startProbeDuration.Milliseconds())) // Advertize zero window again. - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), WindowSize: tb.Uint16(0)}) + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)}) // Ask the dut to send out data that would trigger zero window probe retransmissions. dut.Send(acceptFd, sampleData, 0) @@ -91,8 +91,8 @@ func TestZeroWindowProbeUserTimeout(t *testing.T) { // Expect the connection to have timed out and closed which would cause the dut // to reply with a RST to the ACK we send. - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) - if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil { + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) + if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil { t.Fatalf("expected a TCP RST") } } diff --git a/test/packetimpact/tests/udp_icmp_error_propagation_test.go b/test/packetimpact/tests/udp_icmp_error_propagation_test.go index ca4df2ab0..aedabf9de 100644 --- a/test/packetimpact/tests/udp_icmp_error_propagation_test.go +++ b/test/packetimpact/tests/udp_icmp_error_propagation_test.go @@ -26,11 +26,11 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/tcpip/header" - tb "gvisor.dev/gvisor/test/packetimpact/testbench" + "gvisor.dev/gvisor/test/packetimpact/testbench" ) func init() { - tb.RegisterFlags(flag.CommandLine) + testbench.RegisterFlags(flag.CommandLine) } type connectionMode bool @@ -59,12 +59,12 @@ func (e icmpError) String() string { return "Unknown ICMP error" } -func (e icmpError) ToICMPv4() *tb.ICMPv4 { +func (e icmpError) ToICMPv4() *testbench.ICMPv4 { switch e { case portUnreachable: - return &tb.ICMPv4{Type: tb.ICMPv4Type(header.ICMPv4DstUnreachable), Code: tb.Uint8(header.ICMPv4PortUnreachable)} + return &testbench.ICMPv4{Type: testbench.ICMPv4Type(header.ICMPv4DstUnreachable), Code: testbench.Uint8(header.ICMPv4PortUnreachable)} case timeToLiveExceeded: - return &tb.ICMPv4{Type: tb.ICMPv4Type(header.ICMPv4TimeExceeded), Code: tb.Uint8(header.ICMPv4TTLExceeded)} + return &testbench.ICMPv4{Type: testbench.ICMPv4Type(header.ICMPv4TimeExceeded), Code: testbench.Uint8(header.ICMPv4TTLExceeded)} } return nil } @@ -76,8 +76,8 @@ type errorDetection struct { } type testData struct { - dut *tb.DUT - conn *tb.UDPIPv4 + dut *testbench.DUT + conn *testbench.UDPIPv4 remoteFD int32 remotePort uint16 cleanFD int32 @@ -95,9 +95,9 @@ func wantErrno(c connectionMode, icmpErr icmpError) syscall.Errno { } // sendICMPError sends an ICMP error message in response to a UDP datagram. -func sendICMPError(conn *tb.UDPIPv4, icmpErr icmpError, udp *tb.UDP) error { +func sendICMPError(conn *testbench.UDPIPv4, icmpErr icmpError, udp *testbench.UDP) error { if icmpErr == timeToLiveExceeded { - ip, ok := udp.Prev().(*tb.IPv4) + ip, ok := udp.Prev().(*testbench.IPv4) if !ok { return fmt.Errorf("expected %s to be IPv4", udp.Prev()) } @@ -123,10 +123,10 @@ func sendICMPError(conn *tb.UDPIPv4, icmpErr icmpError, udp *tb.UDP) error { // first recv should succeed immediately. func testRecv(ctx context.Context, d testData) error { // Check that receiving on the clean socket works. - d.conn.Send(tb.UDP{DstPort: &d.cleanPort}) + d.conn.Send(testbench.UDP{DstPort: &d.cleanPort}) d.dut.Recv(d.cleanFD, 100, 0) - d.conn.Send(tb.UDP{}) + d.conn.Send(testbench.UDP{}) if d.wantErrno != syscall.Errno(0) { ctx, cancel := context.WithTimeout(ctx, time.Second) @@ -151,7 +151,7 @@ func testRecv(ctx context.Context, d testData) error { func testSendTo(ctx context.Context, d testData) error { // Check that sending on the clean socket works. d.dut.SendTo(d.cleanFD, nil, 0, d.conn.LocalAddr()) - if _, err := d.conn.Expect(tb.UDP{SrcPort: &d.cleanPort}, time.Second); err != nil { + if _, err := d.conn.Expect(testbench.UDP{SrcPort: &d.cleanPort}, time.Second); err != nil { return fmt.Errorf("did not receive UDP packet from clean socket on DUT: %s", err) } @@ -169,7 +169,7 @@ func testSendTo(ctx context.Context, d testData) error { } d.dut.SendTo(d.remoteFD, nil, 0, d.conn.LocalAddr()) - if _, err := d.conn.Expect(tb.UDP{}, time.Second); err != nil { + if _, err := d.conn.Expect(testbench.UDP{}, time.Second); err != nil { return fmt.Errorf("did not receive UDP packet as expected: %s", err) } return nil @@ -187,7 +187,7 @@ func testSockOpt(_ context.Context, d testData) error { // Check that after clearing socket error, sending doesn't fail. d.dut.SendTo(d.remoteFD, nil, 0, d.conn.LocalAddr()) - if _, err := d.conn.Expect(tb.UDP{}, time.Second); err != nil { + if _, err := d.conn.Expect(testbench.UDP{}, time.Second); err != nil { return fmt.Errorf("did not receive UDP packet as expected: %s", err) } return nil @@ -223,7 +223,7 @@ func TestUDPICMPErrorPropagation(t *testing.T) { errorDetection{"SockOpt", false, testSockOpt}, } { t.Run(fmt.Sprintf("%s/%s/%s", connect, icmpErr, errDetect.name), func(t *testing.T) { - dut := tb.NewDUT(t) + dut := testbench.NewDUT(t) defer dut.TearDown() remoteFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) @@ -234,7 +234,7 @@ func TestUDPICMPErrorPropagation(t *testing.T) { cleanFD, cleanPort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) defer dut.Close(cleanFD) - conn := tb.NewUDPIPv4(t, tb.UDP{DstPort: &remotePort}, tb.UDP{SrcPort: &remotePort}) + conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) defer conn.Close() if connect { @@ -243,7 +243,7 @@ func TestUDPICMPErrorPropagation(t *testing.T) { } dut.SendTo(remoteFD, nil, 0, conn.LocalAddr()) - udp, err := conn.Expect(tb.UDP{}, time.Second) + udp, err := conn.Expect(testbench.UDP{}, time.Second) if err != nil { t.Fatalf("did not receive message from DUT: %s", err) } @@ -258,7 +258,7 @@ func TestUDPICMPErrorPropagation(t *testing.T) { // involved in the generation of the ICMP error. As such, // interactions between it and the the DUT should be independent of // the ICMP error at least at the port level. - connClean := tb.NewUDPIPv4(t, tb.UDP{DstPort: &remotePort}, tb.UDP{SrcPort: &remotePort}) + connClean := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) defer connClean.Close() errDetectConn = &connClean @@ -281,7 +281,7 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) { wantErrno := wantErrno(connect, icmpErr) t.Run(fmt.Sprintf("%s/%s", connect, icmpErr), func(t *testing.T) { - dut := tb.NewDUT(t) + dut := testbench.NewDUT(t) defer dut.TearDown() remoteFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) @@ -292,7 +292,7 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) { cleanFD, cleanPort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) defer dut.Close(cleanFD) - conn := tb.NewUDPIPv4(t, tb.UDP{DstPort: &remotePort}, tb.UDP{SrcPort: &remotePort}) + conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) defer conn.Close() if connect { @@ -301,7 +301,7 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) { } dut.SendTo(remoteFD, nil, 0, conn.LocalAddr()) - udp, err := conn.Expect(tb.UDP{}, time.Second) + udp, err := conn.Expect(testbench.UDP{}, time.Second) if err != nil { t.Fatalf("did not receive message from DUT: %s", err) } @@ -355,8 +355,8 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) { t.Fatal(err) } - conn.Send(tb.UDP{DstPort: &cleanPort}) - conn.Send(tb.UDP{}) + conn.Send(testbench.UDP{DstPort: &cleanPort}) + conn.Send(testbench.UDP{}) wg.Wait() }) } diff --git a/test/packetimpact/tests/udp_recv_multicast_test.go b/test/packetimpact/tests/udp_recv_multicast_test.go index 0bae18ba3..d51a34145 100644 --- a/test/packetimpact/tests/udp_recv_multicast_test.go +++ b/test/packetimpact/tests/udp_recv_multicast_test.go @@ -21,22 +21,22 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/tcpip" - tb "gvisor.dev/gvisor/test/packetimpact/testbench" + "gvisor.dev/gvisor/test/packetimpact/testbench" ) func init() { - tb.RegisterFlags(flag.CommandLine) + testbench.RegisterFlags(flag.CommandLine) } func TestUDPRecvMulticast(t *testing.T) { - dut := tb.NewDUT(t) + dut := testbench.NewDUT(t) defer dut.TearDown() boundFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) defer dut.Close(boundFD) - conn := tb.NewUDPIPv4(t, tb.UDP{DstPort: &remotePort}, tb.UDP{SrcPort: &remotePort}) + conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) defer conn.Close() - frame := conn.CreateFrame(&tb.UDP{}, &tb.Payload{Bytes: []byte("hello world")}) - frame[1].(*tb.IPv4).DstAddr = tb.Address(tcpip.Address(net.ParseIP("224.0.0.1").To4())) + frame := conn.CreateFrame(&testbench.UDP{}, &testbench.Payload{Bytes: []byte("hello world")}) + frame[1].(*testbench.IPv4).DstAddr = testbench.Address(tcpip.Address(net.ParseIP("224.0.0.1").To4())) conn.SendFrame(frame) dut.Recv(boundFD, 100, 0) } diff --git a/test/packetimpact/tests/udp_send_recv_dgram_test.go b/test/packetimpact/tests/udp_send_recv_dgram_test.go index 350875a6f..bf64803e2 100644 --- a/test/packetimpact/tests/udp_send_recv_dgram_test.go +++ b/test/packetimpact/tests/udp_send_recv_dgram_test.go @@ -22,11 +22,11 @@ import ( "time" "golang.org/x/sys/unix" - tb "gvisor.dev/gvisor/test/packetimpact/testbench" + "gvisor.dev/gvisor/test/packetimpact/testbench" ) func init() { - tb.RegisterFlags(flag.CommandLine) + testbench.RegisterFlags(flag.CommandLine) } func generateRandomPayload(t *testing.T, n int) string { @@ -39,11 +39,11 @@ func generateRandomPayload(t *testing.T, n int) string { } func TestUDPRecv(t *testing.T) { - dut := tb.NewDUT(t) + dut := testbench.NewDUT(t) defer dut.TearDown() boundFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) defer dut.Close(boundFD) - conn := tb.NewUDPIPv4(t, tb.UDP{DstPort: &remotePort}, tb.UDP{SrcPort: &remotePort}) + conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) defer conn.Close() testCases := []struct { @@ -59,7 +59,7 @@ func TestUDPRecv(t *testing.T) { } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - frame := conn.CreateFrame(&tb.UDP{}, &tb.Payload{Bytes: []byte(tc.payload)}) + frame := conn.CreateFrame(&testbench.UDP{}, &testbench.Payload{Bytes: []byte(tc.payload)}) conn.SendFrame(frame) if got, want := string(dut.Recv(boundFD, int32(len(tc.payload)), 0)), tc.payload; got != want { t.Fatalf("received payload does not match sent payload got: %s, want: %s", got, want) @@ -69,11 +69,11 @@ func TestUDPRecv(t *testing.T) { } func TestUDPSend(t *testing.T) { - dut := tb.NewDUT(t) + dut := testbench.NewDUT(t) defer dut.TearDown() boundFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) defer dut.Close(boundFD) - conn := tb.NewUDPIPv4(t, tb.UDP{DstPort: &remotePort}, tb.UDP{SrcPort: &remotePort}) + conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) defer conn.Close() testCases := []struct { @@ -93,7 +93,7 @@ func TestUDPSend(t *testing.T) { if got, want := int(dut.SendTo(boundFD, []byte(tc.payload), 0, conn.LocalAddr())), len(tc.payload); got != want { t.Fatalf("short write got: %d, want: %d", got, want) } - if _, err := conn.ExpectData(tb.UDP{SrcPort: &remotePort}, tb.Payload{Bytes: []byte(tc.payload)}, 1*time.Second); err != nil { + if _, err := conn.ExpectData(testbench.UDP{SrcPort: &remotePort}, testbench.Payload{Bytes: []byte(tc.payload)}, 1*time.Second); err != nil { t.Fatal(err) } }) -- cgit v1.2.3 From 526df4f52a07a02687dd43ceb752621a41883f95 Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Fri, 5 Jun 2020 13:41:19 -0700 Subject: Fix error code returned due to Port exhaustion. For TCP sockets gVisor incorrectly returns EAGAIN when no ephemeral ports are available to bind during a connect. Linux returns EADDRNOTAVAIL. This change fixes gVisor to return the correct code and adds a test for the same. This change also fixes a minor bug for ping sockets where connect() would fail with EINVAL unless the socket was bound first. Also added tests for testing UDP Port exhaustion and Ping socket port exhaustion. PiperOrigin-RevId: 314988525 --- pkg/sentry/socket/netstack/BUILD | 1 + pkg/sentry/socket/netstack/netstack.go | 9 ++ pkg/tcpip/transport/icmp/endpoint.go | 1 + test/syscalls/BUILD | 16 ++ test/syscalls/linux/BUILD | 35 +++++ test/syscalls/linux/ping_socket.cc | 91 +++++++++++ test/syscalls/linux/poll.cc | 6 +- .../linux/socket_inet_loopback_nogotsan.cc | 171 +++++++++++++++++++++ test/syscalls/linux/socket_ipv4_udp_unbound.cc | 33 ++++ 9 files changed, 360 insertions(+), 3 deletions(-) create mode 100644 test/syscalls/linux/ping_socket.cc create mode 100644 test/syscalls/linux/socket_inet_loopback_nogotsan.cc (limited to 'test') diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD index 333e0042e..8f0f5466e 100644 --- a/pkg/sentry/socket/netstack/BUILD +++ b/pkg/sentry/socket/netstack/BUILD @@ -50,5 +50,6 @@ go_library( "//pkg/tcpip/transport/udp", "//pkg/usermem", "//pkg/waiter", + "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 60df51dae..e1e0c5931 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -33,6 +33,7 @@ import ( "syscall" "time" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/amutex" "gvisor.dev/gvisor/pkg/binary" @@ -719,6 +720,14 @@ func (s *socketOpsCommon) Connect(t *kernel.Task, sockaddr []byte, blocking bool defer s.EventUnregister(&e) if err := s.Endpoint.Connect(addr); err != tcpip.ErrConnectStarted && err != tcpip.ErrAlreadyConnecting { + if (s.family == unix.AF_INET || s.family == unix.AF_INET6) && s.skType == linux.SOCK_STREAM { + // TCP unlike UDP returns EADDRNOTAVAIL when it can't + // find an available local ephemeral port. + if err == tcpip.ErrNoPortAvailable { + return syserr.ErrAddressNotAvailable + } + } + return syserr.TranslateNetstackError(err) } diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go index 3bc72bc19..57e0a069b 100644 --- a/pkg/tcpip/transport/icmp/endpoint.go +++ b/pkg/tcpip/transport/icmp/endpoint.go @@ -506,6 +506,7 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { nicID := addr.NIC localPort := uint16(0) switch e.state { + case stateInitial: case stateBound, stateConnected: localPort = e.ID.LocalPort if e.BindNICID == 0 { diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 3406a2de8..d68afbe44 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -400,6 +400,14 @@ syscall_test( vfs2 = "True", ) +syscall_test( + size = "medium", + # Takes too long under gotsan to run. + tags = ["nogotsan"], + test = "//test/syscalls/linux:ping_socket_test", + vfs2 = "True", +) + syscall_test( size = "large", add_overlay = True, @@ -697,6 +705,14 @@ syscall_test( test = "//test/syscalls/linux:socket_inet_loopback_test", ) +syscall_test( + size = "large", + shard_count = 50, + # Takes too long for TSAN. Creates a lot of TCP sockets. + tags = ["nogotsan"], + test = "//test/syscalls/linux:socket_inet_loopback_nogotsan_test", +) + syscall_test( size = "large", shard_count = 50, diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index f4b5de18d..ae2aa44dc 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1411,6 +1411,21 @@ cc_binary( ], ) +cc_binary( + name = "ping_socket_test", + testonly = 1, + srcs = ["ping_socket.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + "//test/util:file_descriptor", + gtest, + "//test/util:save_util", + "//test/util:test_main", + "//test/util:test_util", + ], +) + cc_binary( name = "pipe_test", testonly = 1, @@ -2780,6 +2795,26 @@ cc_binary( ], ) +cc_binary( + name = "socket_inet_loopback_nogotsan_test", + testonly = 1, + srcs = ["socket_inet_loopback_nogotsan.cc"], + linkstatic = 1, + deps = [ + ":ip_socket_test_util", + ":socket_test_util", + "//test/util:file_descriptor", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + gtest, + "//test/util:posix_error", + "//test/util:save_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + ], +) + cc_binary( name = "socket_netlink_test", testonly = 1, diff --git a/test/syscalls/linux/ping_socket.cc b/test/syscalls/linux/ping_socket.cc new file mode 100644 index 000000000..a9bfdb37b --- /dev/null +++ b/test/syscalls/linux/ping_socket.cc @@ -0,0 +1,91 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include + +#include + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/save_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +class PingSocket : public ::testing::Test { + protected: + // Creates a socket to be used in tests. + void SetUp() override; + + // Closes the socket created by SetUp(). + void TearDown() override; + + // The loopback address. + struct sockaddr_in addr_; +}; + +void PingSocket::SetUp() { + // On some hosts ping sockets are restricted to specific groups using the + // sysctl "ping_group_range". + int s = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP); + if (s < 0 && errno == EPERM) { + GTEST_SKIP(); + } + close(s); + + addr_ = {}; + // Just a random port as the destination port number is irrelevant for ping + // sockets. + addr_.sin_port = 12345; + addr_.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr_.sin_family = AF_INET; +} + +void PingSocket::TearDown() {} + +// Test ICMP port exhaustion returns EAGAIN. +// +// We disable both random/cooperative S/R for this test as it makes way too many +// syscalls. +TEST_F(PingSocket, ICMPPortExhaustion_NoRandomSave) { + DisableSave ds; + std::vector sockets; + constexpr int kSockets = 65536; + addr_.sin_port = 0; + for (int i = 0; i < kSockets; i++) { + auto s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP)); + int ret = connect(s.get(), reinterpret_cast(&addr_), + sizeof(addr_)); + if (ret == 0) { + sockets.push_back(std::move(s)); + continue; + } + ASSERT_THAT(ret, SyscallFailsWithErrno(EAGAIN)); + break; + } +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/poll.cc b/test/syscalls/linux/poll.cc index 1e35a4a8b..7a316427d 100644 --- a/test/syscalls/linux/poll.cc +++ b/test/syscalls/linux/poll.cc @@ -259,9 +259,9 @@ TEST_F(PollTest, Nfds) { TEST_PCHECK(getrlimit(RLIMIT_NOFILE, &rlim) == 0); // gVisor caps the number of FDs that epoll can use beyond RLIMIT_NOFILE. - constexpr rlim_t gVisorMax = 1048576; - if (rlim.rlim_cur > gVisorMax) { - rlim.rlim_cur = gVisorMax; + constexpr rlim_t maxFD = 4096; + if (rlim.rlim_cur > maxFD) { + rlim.rlim_cur = maxFD; TEST_PCHECK(setrlimit(RLIMIT_NOFILE, &rlim) == 0); } diff --git a/test/syscalls/linux/socket_inet_loopback_nogotsan.cc b/test/syscalls/linux/socket_inet_loopback_nogotsan.cc new file mode 100644 index 000000000..2324c7f6a --- /dev/null +++ b/test/syscalls/linux/socket_inet_loopback_nogotsan.cc @@ -0,0 +1,171 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "test/syscalls/linux/ip_socket_test_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/save_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +using ::testing::Gt; + +PosixErrorOr AddrPort(int family, sockaddr_storage const& addr) { + switch (family) { + case AF_INET: + return static_cast( + reinterpret_cast(&addr)->sin_port); + case AF_INET6: + return static_cast( + reinterpret_cast(&addr)->sin6_port); + default: + return PosixError(EINVAL, + absl::StrCat("unknown socket family: ", family)); + } +} + +PosixError SetAddrPort(int family, sockaddr_storage* addr, uint16_t port) { + switch (family) { + case AF_INET: + reinterpret_cast(addr)->sin_port = port; + return NoError(); + case AF_INET6: + reinterpret_cast(addr)->sin6_port = port; + return NoError(); + default: + return PosixError(EINVAL, + absl::StrCat("unknown socket family: ", family)); + } +} + +struct TestParam { + TestAddress listener; + TestAddress connector; +}; + +std::string DescribeTestParam(::testing::TestParamInfo const& info) { + return absl::StrCat("Listen", info.param.listener.description, "_Connect", + info.param.connector.description); +} + +using SocketInetLoopbackTest = ::testing::TestWithParam; + +// This test verifies that connect returns EADDRNOTAVAIL if all local ephemeral +// ports are already in use for a given destination ip/port. +// We disable S/R because this test creates a large number of sockets. +TEST_P(SocketInetLoopbackTest, TestTCPPortExhaustion_NoRandomSave) { + auto const& param = GetParam(); + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + constexpr int kBacklog = 10; + constexpr int kClients = 65536; + + // Create the listening socket. + auto listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), kBacklog), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast(&listen_addr), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + // Disable cooperative S/R as we are making too many syscalls. + DisableSave ds; + + // Now we keep opening connections till we run out of local ephemeral ports. + // and assert the error we get back. + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + std::vector clients; + std::vector servers; + + for (int i = 0; i < kClients; i++) { + FileDescriptor client = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + int ret = connect(client.get(), reinterpret_cast(&conn_addr), + connector.addr_len); + if (ret == 0) { + clients.push_back(std::move(client)); + FileDescriptor server = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + servers.push_back(std::move(server)); + continue; + } + ASSERT_THAT(ret, SyscallFailsWithErrno(EADDRNOTAVAIL)); + break; + } +} + +INSTANTIATE_TEST_SUITE_P( + All, SocketInetLoopbackTest, + ::testing::Values( + // Listeners bound to IPv4 addresses refuse connections using IPv6 + // addresses. + TestParam{V4Any(), V4Any()}, TestParam{V4Any(), V4Loopback()}, + TestParam{V4Any(), V4MappedAny()}, + TestParam{V4Any(), V4MappedLoopback()}, + TestParam{V4Loopback(), V4Any()}, TestParam{V4Loopback(), V4Loopback()}, + TestParam{V4Loopback(), V4MappedLoopback()}, + TestParam{V4MappedAny(), V4Any()}, + TestParam{V4MappedAny(), V4Loopback()}, + TestParam{V4MappedAny(), V4MappedAny()}, + TestParam{V4MappedAny(), V4MappedLoopback()}, + TestParam{V4MappedLoopback(), V4Any()}, + TestParam{V4MappedLoopback(), V4Loopback()}, + TestParam{V4MappedLoopback(), V4MappedLoopback()}, + + // Listeners bound to IN6ADDR_ANY accept all connections. + TestParam{V6Any(), V4Any()}, TestParam{V6Any(), V4Loopback()}, + TestParam{V6Any(), V4MappedAny()}, + TestParam{V6Any(), V4MappedLoopback()}, TestParam{V6Any(), V6Any()}, + TestParam{V6Any(), V6Loopback()}, + + // Listeners bound to IN6ADDR_LOOPBACK refuse connections using IPv4 + // addresses. + TestParam{V6Loopback(), V6Any()}, + TestParam{V6Loopback(), V6Loopback()}), + DescribeTestParam); + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound.cc b/test/syscalls/linux/socket_ipv4_udp_unbound.cc index bc4b07a62..1294d9050 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound.cc @@ -2129,6 +2129,39 @@ TEST_P(IPv4UDPUnboundSocketTest, ReuseAddrReusePortDistribution) { SyscallSucceedsWithValue(kMessageSize)); } +// Check that connect returns EADDRNOTAVAIL when out of local ephemeral ports. +// We disable S/R because this test creates a large number of sockets. +TEST_P(IPv4UDPUnboundSocketTest, UDPConnectPortExhaustion_NoRandomSave) { + auto receiver1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + constexpr int kClients = 65536; + // Bind the first socket to the loopback and take note of the selected port. + auto addr = V4Loopback(); + ASSERT_THAT(bind(receiver1->get(), reinterpret_cast(&addr.addr), + addr.addr_len), + SyscallSucceeds()); + socklen_t addr_len = addr.addr_len; + ASSERT_THAT(getsockname(receiver1->get(), + reinterpret_cast(&addr.addr), &addr_len), + SyscallSucceeds()); + EXPECT_EQ(addr_len, addr.addr_len); + + // Disable cooperative S/R as we are making too many syscalls. + DisableSave ds; + std::vector> sockets; + for (int i = 0; i < kClients; i++) { + auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + int ret = connect(s->get(), reinterpret_cast(&addr.addr), + addr.addr_len); + if (ret == 0) { + sockets.push_back(std::move(s)); + continue; + } + ASSERT_THAT(ret, SyscallFailsWithErrno(EAGAIN)); + break; + } +} + // Test that socket will receive packet info control message. TEST_P(IPv4UDPUnboundSocketTest, SetAndReceiveIPPKTINFO) { // TODO(gvisor.dev/issue/1202): ioctl() is not supported by hostinet. -- cgit v1.2.3 From 8c1f5b5cd8b634a5e7255944f42e82c5c9de3149 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Fri, 5 Jun 2020 14:43:56 -0700 Subject: Unshare files on exec The current task can share its fdtable with a few other tasks, but after exec, this should be a completely separate process. PiperOrigin-RevId: 314999565 --- pkg/sentry/kernel/task_exec.go | 4 ++++ test/syscalls/linux/exec.cc | 27 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'test') diff --git a/pkg/sentry/kernel/task_exec.go b/pkg/sentry/kernel/task_exec.go index 00c425cca..9b69f3cbe 100644 --- a/pkg/sentry/kernel/task_exec.go +++ b/pkg/sentry/kernel/task_exec.go @@ -198,6 +198,10 @@ func (r *runSyscallAfterExecStop) execute(t *Task) taskRunState { t.tg.oldRSeqCritical.Store(&OldRSeqCriticalRegion{}) t.tg.pidns.owner.mu.Unlock() + oldFDTable := t.fdTable + t.fdTable = t.fdTable.Fork() + oldFDTable.DecRef() + // Remove FDs with the CloseOnExec flag set. t.fdTable.RemoveIf(func(_ *fs.File, _ *vfs.FileDescription, flags FDFlags) bool { return flags.CloseOnExec diff --git a/test/syscalls/linux/exec.cc b/test/syscalls/linux/exec.cc index 12c9b05ca..e09afafe9 100644 --- a/test/syscalls/linux/exec.cc +++ b/test/syscalls/linux/exec.cc @@ -673,6 +673,33 @@ TEST(ExecveatTest, SymlinkNoFollowWithRelativePath) { EXPECT_EQ(execve_errno, ELOOP); } +TEST(ExecveatTest, UnshareFiles) { + TempPath tempFile = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(GetAbsoluteTestTmpdir(), "bar", 0755)); + const FileDescriptor fd_closed_on_exec = + ASSERT_NO_ERRNO_AND_VALUE(Open(tempFile.path(), O_RDONLY | O_CLOEXEC)); + + pid_t child; + EXPECT_THAT(child = syscall(__NR_clone, SIGCHLD | CLONE_VFORK | CLONE_FILES, + 0, 0, 0, 0), + SyscallSucceeds()); + if (child == 0) { + ExecveArray argv = {"test"}; + ExecveArray envp; + ASSERT_THAT( + execve(RunfilePath(kBasicWorkload).c_str(), argv.get(), envp.get()), + SyscallSucceeds()); + _exit(1); + } + + int status; + ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), SyscallSucceeds()); + EXPECT_EQ(status, 0); + + struct stat st; + EXPECT_THAT(fstat(fd_closed_on_exec.get(), &st), SyscallSucceeds()); +} + TEST(ExecveatTest, SymlinkNoFollowWithAbsolutePath) { std::string parent_dir = "/tmp"; TempPath link = ASSERT_NO_ERRNO_AND_VALUE( -- cgit v1.2.3 From 9aaca5a6da39c6154ce08f4386e702ee4a18d03a Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Fri, 5 Jun 2020 15:38:38 -0700 Subject: Use top-down allocation for pgalloc. This change has multiple small components. First, the chunk size is bumped to 1GB in order to avoid creating excessive VMAs in the Sentry, which can lead to VMA exhaustion (and hitting limits). Second, gap-tracking is added to the usage set in order to efficiently scan for available regions. Third, reclaim is moved to a simple segment set. This is done to allow the order of reclaim to align with the Allocate order (which becomes much more complex when trying to track a "max page" as opposed to "min page", so we just track explicit segments instead, which should make reclaim scanning faster anyways). Finally, the findAvailable function attempts to scan from the top-down, in order to maximize opportunities for VMA merging in applications (hopefully preventing the same VMA exhaustion that can affect the Sentry). PiperOrigin-RevId: 315009249 --- pkg/sentry/pgalloc/BUILD | 22 ++++ pkg/sentry/pgalloc/pgalloc.go | 215 ++++++++++++++++++++----------------- pkg/sentry/pgalloc/pgalloc_test.go | 198 +++++++++++++++++++++++----------- test/e2e/integration_test.go | 4 +- 4 files changed, 273 insertions(+), 166 deletions(-) (limited to 'test') diff --git a/pkg/sentry/pgalloc/BUILD b/pkg/sentry/pgalloc/BUILD index 1eeb9f317..a9836ba71 100644 --- a/pkg/sentry/pgalloc/BUILD +++ b/pkg/sentry/pgalloc/BUILD @@ -33,6 +33,7 @@ go_template_instance( out = "usage_set.go", consts = { "minDegree": "10", + "trackGaps": "1", }, imports = { "platform": "gvisor.dev/gvisor/pkg/sentry/platform", @@ -48,6 +49,26 @@ go_template_instance( }, ) +go_template_instance( + name = "reclaim_set", + out = "reclaim_set.go", + consts = { + "minDegree": "10", + }, + imports = { + "platform": "gvisor.dev/gvisor/pkg/sentry/platform", + }, + package = "pgalloc", + prefix = "reclaim", + template = "//pkg/segment:generic_set", + types = { + "Key": "uint64", + "Range": "platform.FileRange", + "Value": "reclaimSetValue", + "Functions": "reclaimSetFunctions", + }, +) + go_library( name = "pgalloc", srcs = [ @@ -56,6 +77,7 @@ go_library( "evictable_range_set.go", "pgalloc.go", "pgalloc_unsafe.go", + "reclaim_set.go", "save_restore.go", "usage_set.go", ], diff --git a/pkg/sentry/pgalloc/pgalloc.go b/pkg/sentry/pgalloc/pgalloc.go index 2b11ea4ae..c8d9facc2 100644 --- a/pkg/sentry/pgalloc/pgalloc.go +++ b/pkg/sentry/pgalloc/pgalloc.go @@ -108,12 +108,6 @@ type MemoryFile struct { usageSwapped uint64 usageLast time.Time - // minUnallocatedPage is the minimum page that may be unallocated. - // i.e., there are no unallocated pages below minUnallocatedPage. - // - // minUnallocatedPage is protected by mu. - minUnallocatedPage uint64 - // fileSize is the size of the backing memory file in bytes. fileSize is // always a power-of-two multiple of chunkSize. // @@ -146,11 +140,9 @@ type MemoryFile struct { // is protected by mu. reclaimable bool - // minReclaimablePage is the minimum page that may be reclaimable. - // i.e., all reclaimable pages are >= minReclaimablePage. - // - // minReclaimablePage is protected by mu. - minReclaimablePage uint64 + // relcaim is the collection of regions for reclaim. relcaim is protected + // by mu. + reclaim reclaimSet // reclaimCond is signaled (with mu locked) when reclaimable or destroyed // transitions from false to true. @@ -273,12 +265,10 @@ type evictableMemoryUserInfo struct { } const ( - chunkShift = 24 - chunkSize = 1 << chunkShift // 16 MB + chunkShift = 30 + chunkSize = 1 << chunkShift // 1 GB chunkMask = chunkSize - 1 - initialSize = chunkSize - // maxPage is the highest 64-bit page. maxPage = math.MaxUint64 &^ (usermem.PageSize - 1) ) @@ -302,19 +292,12 @@ func NewMemoryFile(file *os.File, opts MemoryFileOpts) (*MemoryFile, error) { if err := file.Truncate(0); err != nil { return nil, err } - if err := file.Truncate(initialSize); err != nil { - return nil, err - } f := &MemoryFile{ - opts: opts, - fileSize: initialSize, - file: file, - // No pages are reclaimable. DecRef will always be able to - // decrease minReclaimablePage from this point. - minReclaimablePage: maxPage, - evictable: make(map[EvictableMemoryUser]*evictableMemoryUserInfo), + opts: opts, + file: file, + evictable: make(map[EvictableMemoryUser]*evictableMemoryUserInfo), } - f.mappings.Store(make([]uintptr, initialSize/chunkSize)) + f.mappings.Store(make([]uintptr, 0)) f.reclaimCond.L = &f.mu if f.opts.DelayedEviction == DelayedEvictionEnabled && f.opts.UseHostMemcgPressure { @@ -404,39 +387,28 @@ func (f *MemoryFile) Allocate(length uint64, kind usage.MemoryKind) (platform.Fi alignment = usermem.HugePageSize } - start, minUnallocatedPage := findUnallocatedRange(&f.usage, f.minUnallocatedPage, length, alignment) - end := start + length - // File offsets are int64s. Since length must be strictly positive, end - // cannot legitimately be 0. - if end < start || int64(end) <= 0 { + // Find a range in the underlying file. + fr, ok := findAvailableRange(&f.usage, f.fileSize, length, alignment) + if !ok { return platform.FileRange{}, syserror.ENOMEM } - // Expand the file if needed. Double the file size on each expansion; - // uncommitted pages have effectively no cost. - fileSize := f.fileSize - for int64(end) > fileSize { - if fileSize >= 2*fileSize { - // fileSize overflow. - return platform.FileRange{}, syserror.ENOMEM - } - fileSize *= 2 - } - if fileSize > f.fileSize { - if err := f.file.Truncate(fileSize); err != nil { + // Expand the file if needed. Note that findAvailableRange will + // appropriately double the fileSize when required. + if int64(fr.End) > f.fileSize { + if err := f.file.Truncate(int64(fr.End)); err != nil { return platform.FileRange{}, err } - f.fileSize = fileSize + f.fileSize = int64(fr.End) f.mappingsMu.Lock() oldMappings := f.mappings.Load().([]uintptr) - newMappings := make([]uintptr, fileSize>>chunkShift) + newMappings := make([]uintptr, f.fileSize>>chunkShift) copy(newMappings, oldMappings) f.mappings.Store(newMappings) f.mappingsMu.Unlock() } // Mark selected pages as in use. - fr := platform.FileRange{start, end} if f.opts.ManualZeroing { if err := f.forEachMappingSlice(fr, func(bs []byte) { for i := range bs { @@ -453,49 +425,71 @@ func (f *MemoryFile) Allocate(length uint64, kind usage.MemoryKind) (platform.Fi panic(fmt.Sprintf("allocating %v: failed to insert into usage set:\n%v", fr, &f.usage)) } - if minUnallocatedPage < start { - f.minUnallocatedPage = minUnallocatedPage - } else { - // start was the first unallocated page. The next must be - // somewhere beyond end. - f.minUnallocatedPage = end - } - return fr, nil } -// findUnallocatedRange returns the first unallocated page in usage of the -// specified length and alignment beginning at page start and the first single -// unallocated page. -func findUnallocatedRange(usage *usageSet, start, length, alignment uint64) (uint64, uint64) { - // Only searched until the first page is found. - firstPage := start - foundFirstPage := false - alignMask := alignment - 1 - for seg := usage.LowerBoundSegment(start); seg.Ok(); seg = seg.NextSegment() { - r := seg.Range() +// findAvailableRange returns an available range in the usageSet. +// +// Note that scanning for available slots takes place from end first backwards, +// then forwards. This heuristic has important consequence for how sequential +// mappings can be merged in the host VMAs, given that addresses for both +// application and sentry mappings are allocated top-down (from higher to +// lower addresses). The file is also grown expoentially in order to create +// space for mappings to be allocated downwards. +// +// Precondition: alignment must be a power of 2. +func findAvailableRange(usage *usageSet, fileSize int64, length, alignment uint64) (platform.FileRange, bool) { + alignmentMask := alignment - 1 + for gap := usage.UpperBoundGap(uint64(fileSize)); gap.Ok(); gap = gap.PrevLargeEnoughGap(length) { + // Start searching only at end of file. + end := gap.End() + if end > uint64(fileSize) { + end = uint64(fileSize) + } - if !foundFirstPage && r.Start > firstPage { - foundFirstPage = true + // Start at the top and align downwards. + start := end - length + if start > end { + break // Underflow. } + start &^= alignmentMask - if start >= r.End { - // start was rounded up to an alignment boundary from the end - // of a previous segment and is now beyond r.End. + // Is the gap still sufficient? + if start < gap.Start() { continue } - // This segment represents allocated or reclaimable pages; only the - // range from start to the segment's beginning is allocatable, and the - // next allocatable range begins after the segment. - if r.Start > start && r.Start-start >= length { - break + + // Allocate in the given gap. + return platform.FileRange{start, start + length}, true + } + + // Check that it's possible to fit this allocation at the end of a file of any size. + min := usage.LastGap().Start() + min = (min + alignmentMask) &^ alignmentMask + if min+length < min { + // Overflow. + return platform.FileRange{}, false + } + + // Determine the minimum file size required to fit this allocation at its end. + for { + if fileSize >= 2*fileSize { + // Is this because it's initially empty? + if fileSize == 0 { + fileSize += chunkSize + } else { + // fileSize overflow. + return platform.FileRange{}, false + } + } else { + // Double the current fileSize. + fileSize *= 2 } - start = (r.End + alignMask) &^ alignMask - if !foundFirstPage { - firstPage = r.End + start := (uint64(fileSize) - length) &^ alignmentMask + if start >= min { + return platform.FileRange{start, start + length}, true } } - return start, firstPage } // AllocateAndFill allocates memory of the given kind and fills it by calling @@ -616,6 +610,7 @@ func (f *MemoryFile) DecRef(fr platform.FileRange) { } val.refs-- if val.refs == 0 { + f.reclaim.Add(seg.Range(), reclaimSetValue{}) freed = true // Reclassify memory as System, until it's freed by the reclaim // goroutine. @@ -628,10 +623,6 @@ func (f *MemoryFile) DecRef(fr platform.FileRange) { f.usage.MergeAdjacent(fr) if freed { - if fr.Start < f.minReclaimablePage { - // We've freed at least one lower page. - f.minReclaimablePage = fr.Start - } f.reclaimable = true f.reclaimCond.Signal() } @@ -1030,6 +1021,7 @@ func (f *MemoryFile) String() string { // for allocation. func (f *MemoryFile) runReclaim() { for { + // N.B. We must call f.markReclaimed on the returned FrameRange. fr, ok := f.findReclaimable() if !ok { break @@ -1085,6 +1077,10 @@ func (f *MemoryFile) runReclaim() { } } +// findReclaimable finds memory that has been marked for reclaim. +// +// Note that there returned range will be removed from tracking. It +// must be reclaimed (removed from f.usage) at this point. func (f *MemoryFile) findReclaimable() (platform.FileRange, bool) { f.mu.Lock() defer f.mu.Unlock() @@ -1103,18 +1099,15 @@ func (f *MemoryFile) findReclaimable() (platform.FileRange, bool) { } f.reclaimCond.Wait() } - // Allocate returns the first usable range in offset order and is - // currently a linear scan, so reclaiming from the beginning of the - // file minimizes the expected latency of Allocate. - for seg := f.usage.LowerBoundSegment(f.minReclaimablePage); seg.Ok(); seg = seg.NextSegment() { - if seg.ValuePtr().refs == 0 { - f.minReclaimablePage = seg.End() - return seg.Range(), true - } + // Allocate works from the back of the file inwards, so reclaim + // preserves this order to minimize the cost of the search. + if seg := f.reclaim.LastSegment(); seg.Ok() { + fr := seg.Range() + f.reclaim.Remove(seg) + return fr, true } - // No pages are reclaimable. + // Nothing is reclaimable. f.reclaimable = false - f.minReclaimablePage = maxPage } } @@ -1122,8 +1115,8 @@ func (f *MemoryFile) markReclaimed(fr platform.FileRange) { f.mu.Lock() defer f.mu.Unlock() seg := f.usage.FindSegment(fr.Start) - // All of fr should be mapped to a single uncommitted reclaimable segment - // accounted to System. + // All of fr should be mapped to a single uncommitted reclaimable + // segment accounted to System. if !seg.Ok() { panic(fmt.Sprintf("reclaimed pages %v include unreferenced pages:\n%v", fr, &f.usage)) } @@ -1137,14 +1130,10 @@ func (f *MemoryFile) markReclaimed(fr platform.FileRange) { }); got != want { panic(fmt.Sprintf("reclaimed pages %v in segment %v has incorrect state %v, wanted %v:\n%v", fr, seg.Range(), got, want, &f.usage)) } - // Deallocate reclaimed pages. Even though all of seg is reclaimable, the - // caller of markReclaimed may not have decommitted it, so we can only mark - // fr as reclaimed. + // Deallocate reclaimed pages. Even though all of seg is reclaimable, + // the caller of markReclaimed may not have decommitted it, so we can + // only mark fr as reclaimed. f.usage.Remove(f.usage.Isolate(seg, fr)) - if fr.Start < f.minUnallocatedPage { - // We've deallocated at least one lower page. - f.minUnallocatedPage = fr.Start - } } // StartEvictions requests that f evict all evictable allocations. It does not @@ -1255,3 +1244,27 @@ func (evictableRangeSetFunctions) Merge(_ EvictableRange, _ evictableRangeSetVal func (evictableRangeSetFunctions) Split(_ EvictableRange, _ evictableRangeSetValue, _ uint64) (evictableRangeSetValue, evictableRangeSetValue) { return evictableRangeSetValue{}, evictableRangeSetValue{} } + +// reclaimSetValue is the value type of reclaimSet. +type reclaimSetValue struct{} + +type reclaimSetFunctions struct{} + +func (reclaimSetFunctions) MinKey() uint64 { + return 0 +} + +func (reclaimSetFunctions) MaxKey() uint64 { + return math.MaxUint64 +} + +func (reclaimSetFunctions) ClearValue(val *reclaimSetValue) { +} + +func (reclaimSetFunctions) Merge(_ platform.FileRange, _ reclaimSetValue, _ platform.FileRange, _ reclaimSetValue) (reclaimSetValue, bool) { + return reclaimSetValue{}, true +} + +func (reclaimSetFunctions) Split(_ platform.FileRange, _ reclaimSetValue, _ uint64) (reclaimSetValue, reclaimSetValue) { + return reclaimSetValue{}, reclaimSetValue{} +} diff --git a/pkg/sentry/pgalloc/pgalloc_test.go b/pkg/sentry/pgalloc/pgalloc_test.go index 293f22c6b..b5b68eb52 100644 --- a/pkg/sentry/pgalloc/pgalloc_test.go +++ b/pkg/sentry/pgalloc/pgalloc_test.go @@ -23,39 +23,49 @@ import ( const ( page = usermem.PageSize hugepage = usermem.HugePageSize + topPage = (1 << 63) - page ) func TestFindUnallocatedRange(t *testing.T) { for _, test := range []struct { - desc string - usage *usageSegmentDataSlices - start uint64 - length uint64 - alignment uint64 - unallocated uint64 - minUnallocated uint64 + desc string + usage *usageSegmentDataSlices + fileSize int64 + length uint64 + alignment uint64 + start uint64 + expectFail bool }{ { - desc: "Initial allocation succeeds", - usage: &usageSegmentDataSlices{}, - start: 0, - length: page, - alignment: page, - unallocated: 0, - minUnallocated: 0, + desc: "Initial allocation succeeds", + usage: &usageSegmentDataSlices{}, + length: page, + alignment: page, + start: chunkSize - page, // Grows by chunkSize, allocate down. }, { - desc: "Allocation begins at start of file", + desc: "Allocation finds empty space at start of file", usage: &usageSegmentDataSlices{ Start: []uint64{page}, End: []uint64{2 * page}, Values: []usageInfo{{refs: 1}}, }, - start: 0, - length: page, - alignment: page, - unallocated: 0, - minUnallocated: 0, + fileSize: 2 * page, + length: page, + alignment: page, + start: 0, + }, + { + desc: "Allocation finds empty space at end of file", + usage: &usageSegmentDataSlices{ + Start: []uint64{0}, + End: []uint64{page}, + Values: []usageInfo{{refs: 1}}, + }, + fileSize: 2 * page, + length: page, + alignment: page, + start: page, }, { desc: "In-use frames are not allocatable", @@ -64,11 +74,10 @@ func TestFindUnallocatedRange(t *testing.T) { End: []uint64{page, 2 * page}, Values: []usageInfo{{refs: 1}, {refs: 2}}, }, - start: 0, - length: page, - alignment: page, - unallocated: 2 * page, - minUnallocated: 2 * page, + fileSize: 2 * page, + length: page, + alignment: page, + start: 3 * page, // Double fileSize, allocate top-down. }, { desc: "Reclaimable frames are not allocatable", @@ -77,11 +86,10 @@ func TestFindUnallocatedRange(t *testing.T) { End: []uint64{page, 2 * page, 3 * page}, Values: []usageInfo{{refs: 1}, {refs: 0}, {refs: 1}}, }, - start: 0, - length: page, - alignment: page, - unallocated: 3 * page, - minUnallocated: 3 * page, + fileSize: 3 * page, + length: page, + alignment: page, + start: 5 * page, // Double fileSize, grow down. }, { desc: "Gaps between in-use frames are allocatable", @@ -90,11 +98,10 @@ func TestFindUnallocatedRange(t *testing.T) { End: []uint64{page, 3 * page}, Values: []usageInfo{{refs: 1}, {refs: 1}}, }, - start: 0, - length: page, - alignment: page, - unallocated: page, - minUnallocated: page, + fileSize: 3 * page, + length: page, + alignment: page, + start: page, }, { desc: "Inadequately-sized gaps are rejected", @@ -103,14 +110,13 @@ func TestFindUnallocatedRange(t *testing.T) { End: []uint64{page, 3 * page}, Values: []usageInfo{{refs: 1}, {refs: 1}}, }, - start: 0, - length: 2 * page, - alignment: page, - unallocated: 3 * page, - minUnallocated: page, + fileSize: 3 * page, + length: 2 * page, + alignment: page, + start: 4 * page, // Double fileSize, grow down. }, { - desc: "Hugepage alignment is honored", + desc: "Alignment is honored at end of file", usage: &usageSegmentDataSlices{ Start: []uint64{0, hugepage + page}, // Hugepage-sized gap here that shouldn't be allocated from @@ -118,37 +124,95 @@ func TestFindUnallocatedRange(t *testing.T) { End: []uint64{page, hugepage + 2*page}, Values: []usageInfo{{refs: 1}, {refs: 1}}, }, - start: 0, - length: hugepage, - alignment: hugepage, - unallocated: 2 * hugepage, - minUnallocated: page, + fileSize: hugepage + 2*page, + length: hugepage, + alignment: hugepage, + start: 3 * hugepage, // Double fileSize until alignment is satisfied, grow down. + }, + { + desc: "Alignment is honored before end of file", + usage: &usageSegmentDataSlices{ + Start: []uint64{0, 2*hugepage + page}, + // Page will need to be shifted down from top. + End: []uint64{page, 2*hugepage + 2*page}, + Values: []usageInfo{{refs: 1}, {refs: 1}}, + }, + fileSize: 2*hugepage + 2*page, + length: hugepage, + alignment: hugepage, + start: hugepage, }, { - desc: "Pages before start ignored", + desc: "Allocations are compact if possible", usage: &usageSegmentDataSlices{ Start: []uint64{page, 3 * page}, End: []uint64{2 * page, 4 * page}, Values: []usageInfo{{refs: 1}, {refs: 2}}, }, - start: page, - length: page, - alignment: page, - unallocated: 2 * page, - minUnallocated: 2 * page, + fileSize: 4 * page, + length: page, + alignment: page, + start: 2 * page, + }, + { + desc: "Top-down allocation within one gap", + usage: &usageSegmentDataSlices{ + Start: []uint64{page, 4 * page, 7 * page}, + End: []uint64{2 * page, 5 * page, 8 * page}, + Values: []usageInfo{{refs: 1}, {refs: 2}, {refs: 1}}, + }, + fileSize: 8 * page, + length: page, + alignment: page, + start: 6 * page, + }, + { + desc: "Top-down allocation between multiple gaps", + usage: &usageSegmentDataSlices{ + Start: []uint64{page, 3 * page, 5 * page}, + End: []uint64{2 * page, 4 * page, 6 * page}, + Values: []usageInfo{{refs: 1}, {refs: 2}, {refs: 1}}, + }, + fileSize: 6 * page, + length: page, + alignment: page, + start: 4 * page, }, { - desc: "start may be in the middle of segment", + desc: "Top-down allocation with large top gap", usage: &usageSegmentDataSlices{ - Start: []uint64{0, 3 * page}, + Start: []uint64{page, 3 * page}, End: []uint64{2 * page, 4 * page}, Values: []usageInfo{{refs: 1}, {refs: 2}}, }, - start: page, - length: page, - alignment: page, - unallocated: 2 * page, - minUnallocated: 2 * page, + fileSize: 8 * page, + length: page, + alignment: page, + start: 7 * page, + }, + { + desc: "Gaps found with possible overflow", + usage: &usageSegmentDataSlices{ + Start: []uint64{page, topPage - page}, + End: []uint64{2 * page, topPage}, + Values: []usageInfo{{refs: 1}, {refs: 1}}, + }, + fileSize: topPage, + length: page, + alignment: page, + start: topPage - 2*page, + }, + { + desc: "Overflow detected", + usage: &usageSegmentDataSlices{ + Start: []uint64{page}, + End: []uint64{topPage}, + Values: []usageInfo{{refs: 1}}, + }, + fileSize: topPage, + length: 2 * page, + alignment: page, + expectFail: true, }, } { t.Run(test.desc, func(t *testing.T) { @@ -156,12 +220,18 @@ func TestFindUnallocatedRange(t *testing.T) { if err := usage.ImportSortedSlices(test.usage); err != nil { t.Fatalf("Failed to initialize usage from %v: %v", test.usage, err) } - unallocated, minUnallocated := findUnallocatedRange(&usage, test.start, test.length, test.alignment) - if unallocated != test.unallocated { - t.Errorf("findUnallocatedRange(%v, %x, %x, %x): got unallocated %x, wanted %x", test.usage, test.start, test.length, test.alignment, unallocated, test.unallocated) + fr, ok := findAvailableRange(&usage, test.fileSize, test.length, test.alignment) + if !test.expectFail && !ok { + t.Fatalf("findAvailableRange(%v, %x, %x, %x): got %x, false wanted %x, true", test.usage, test.fileSize, test.length, test.alignment, fr.Start, test.start) + } + if test.expectFail && ok { + t.Fatalf("findAvailableRange(%v, %x, %x, %x): got %x, true wanted %x, false", test.usage, test.fileSize, test.length, test.alignment, fr.Start, test.start) + } + if ok && fr.Start != test.start { + t.Errorf("findAvailableRange(%v, %x, %x, %x): got start=%x, wanted %x", test.usage, test.fileSize, test.length, test.alignment, fr.Start, test.start) } - if minUnallocated != test.minUnallocated { - t.Errorf("findUnallocatedRange(%v, %x, %x, %x): got minUnallocated %x, wanted %x", test.usage, test.start, test.length, test.alignment, minUnallocated, test.minUnallocated) + if ok && fr.End != test.start+test.length { + t.Errorf("findAvailableRange(%v, %x, %x, %x): got end=%x, wanted %x", test.usage, test.fileSize, test.length, test.alignment, fr.End, test.start+test.length) } }) } diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go index 9cbb2ed5b..91c956e10 100644 --- a/test/e2e/integration_test.go +++ b/test/e2e/integration_test.go @@ -239,7 +239,9 @@ func TestMemLimit(t *testing.T) { d := dockerutil.MakeDocker(t) defer d.CleanUp() - allocMemory := 500 * 1024 + // N.B. Because the size of the memory file may grow in large chunks, + // there is a minimum threshold of 1GB for the MemTotal figure. + allocMemory := 1024 * 1024 out, err := d.Run(dockerutil.RunOpts{ Image: "basic/alpine", Memory: allocMemory, // In kB. -- cgit v1.2.3 From 74a7d76c9777820fcd7bd6002481eb959f58e247 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Wed, 3 Jun 2020 19:57:39 -0700 Subject: iptables: loopback traffic skips prerouting chain Loopback traffic is not affected by rules in the PREROUTING chain. This change is also necessary for istio's envoy to talk to other components in the same pod. --- pkg/tcpip/network/ipv4/ipv4.go | 49 ++++++++++------------- pkg/tcpip/stack/nic.go | 3 +- test/iptables/iptables_test.go | 4 ++ test/iptables/nat.go | 89 ++++++++++++++++++++++++++++++------------ 4 files changed, 89 insertions(+), 56 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go index 9cd7592f4..959f7e007 100644 --- a/pkg/tcpip/network/ipv4/ipv4.go +++ b/pkg/tcpip/network/ipv4/ipv4.go @@ -258,38 +258,24 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.Netw return nil } + // If the packet is manipulated as per NAT Ouput rules, handle packet + // based on destination address and do not send the packet to link layer. + // TODO(gvisor.dev/issue/170): We should do this for every packet, rather than + // only NATted packets, but removing this check short circuits broadcasts + // before they are sent out to other hosts. if pkt.NatDone { - // If the packet is manipulated as per NAT Ouput rules, handle packet - // based on destination address and do not send the packet to link layer. netHeader := header.IPv4(pkt.NetworkHeader) ep, err := e.stack.FindNetworkEndpoint(header.IPv4ProtocolNumber, netHeader.DestinationAddress()) if err == nil { - src := netHeader.SourceAddress() - dst := netHeader.DestinationAddress() - route := r.ReverseRoute(src, dst) - - views := make([]buffer.View, 1, 1+len(pkt.Data.Views())) - views[0] = pkt.Header.View() - views = append(views, pkt.Data.Views()...) - ep.HandlePacket(&route, &stack.PacketBuffer{ - Data: buffer.NewVectorisedView(len(views[0])+pkt.Data.Size(), views), - }) + route := r.ReverseRoute(netHeader.SourceAddress(), netHeader.DestinationAddress()) + handleLoopback(&route, pkt, ep) return nil } } if r.Loop&stack.PacketLoop != 0 { - // The inbound path expects the network header to still be in - // the PacketBuffer's Data field. - views := make([]buffer.View, 1, 1+len(pkt.Data.Views())) - views[0] = pkt.Header.View() - views = append(views, pkt.Data.Views()...) loopedR := r.MakeLoopedRoute() - - e.HandlePacket(&loopedR, &stack.PacketBuffer{ - Data: buffer.NewVectorisedView(len(views[0])+pkt.Data.Size(), views), - }) - + handleLoopback(&loopedR, pkt, e) loopedR.Release() } if r.Loop&stack.PacketOut == 0 { @@ -305,6 +291,17 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.Netw return nil } +func handleLoopback(route *stack.Route, pkt *stack.PacketBuffer, ep stack.NetworkEndpoint) { + // The inbound path expects the network header to still be in + // the PacketBuffer's Data field. + views := make([]buffer.View, 1, 1+len(pkt.Data.Views())) + views[0] = pkt.Header.View() + views = append(views, pkt.Data.Views()...) + ep.HandlePacket(route, &stack.PacketBuffer{ + Data: buffer.NewVectorisedView(len(views[0])+pkt.Data.Size(), views), + }) +} + // WritePackets implements stack.NetworkEndpoint.WritePackets. func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, params stack.NetworkHeaderParams) (int, *tcpip.Error) { if r.Loop&stack.PacketLoop != 0 { @@ -347,13 +344,7 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe src := netHeader.SourceAddress() dst := netHeader.DestinationAddress() route := r.ReverseRoute(src, dst) - - views := make([]buffer.View, 1, 1+len(pkt.Data.Views())) - views[0] = pkt.Header.View() - views = append(views, pkt.Data.Views()...) - ep.HandlePacket(&route, &stack.PacketBuffer{ - Data: buffer.NewVectorisedView(len(views[0])+pkt.Data.Size(), views), - }) + handleLoopback(&route, pkt, ep) n++ continue } diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index ec8e3cb85..6664aea06 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -1229,7 +1229,8 @@ func (n *NIC) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcp } // TODO(gvisor.dev/issue/170): Not supporting iptables for IPv6 yet. - if protocol == header.IPv4ProtocolNumber { + // Loopback traffic skips the prerouting chain. + if protocol == header.IPv4ProtocolNumber && !n.isLoopback() { // iptables filtering. ipt := n.stack.IPTables() address := n.primaryAddress(protocol) diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 172ad9e16..38319a3b2 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -303,6 +303,10 @@ func TestNATRedirectRequiresProtocol(t *testing.T) { singleTest(t, NATRedirectRequiresProtocol{}) } +func TestNATLoopbackSkipsPrerouting(t *testing.T) { + singleTest(t, NATLoopbackSkipsPrerouting{}) +} + func TestInputSource(t *testing.T) { singleTest(t, FilterInputSource{}) } diff --git a/test/iptables/nat.go b/test/iptables/nat.go index 0a10ce7fe..5e54a3963 100644 --- a/test/iptables/nat.go +++ b/test/iptables/nat.go @@ -39,6 +39,7 @@ func init() { RegisterTestCase(NATOutDontRedirectIP{}) RegisterTestCase(NATOutRedirectInvert{}) RegisterTestCase(NATRedirectRequiresProtocol{}) + RegisterTestCase(NATLoopbackSkipsPrerouting{}) } // NATPreRedirectUDPPort tests that packets are redirected to different port. @@ -326,32 +327,6 @@ func (NATRedirectRequiresProtocol) LocalAction(ip net.IP) error { return nil } -// loopbackTests runs an iptables rule and ensures that packets sent to -// dest:dropPort are received by localhost:acceptPort. -func loopbackTest(dest net.IP, args ...string) error { - if err := natTable(args...); err != nil { - return err - } - sendCh := make(chan error) - listenCh := make(chan error) - go func() { - sendCh <- sendUDPLoop(dest, dropPort, sendloopDuration) - }() - go func() { - listenCh <- listenUDP(acceptPort, sendloopDuration) - }() - select { - case err := <-listenCh: - if err != nil { - return err - } - case <-time.After(sendloopDuration): - return errors.New("timed out") - } - // sendCh will always take the full sendloop time. - return <-sendCh -} - // NATOutRedirectTCPPort tests that connections are redirected on specified ports. type NATOutRedirectTCPPort struct{} @@ -400,3 +375,65 @@ func (NATOutRedirectTCPPort) ContainerAction(ip net.IP) error { func (NATOutRedirectTCPPort) LocalAction(ip net.IP) error { return nil } + +// NATLoopbackSkipsPrerouting tests that packets sent via loopback aren't +// affected by PREROUTING rules. +type NATLoopbackSkipsPrerouting struct{} + +// Name implements TestCase.Name. +func (NATLoopbackSkipsPrerouting) Name() string { + return "NATLoopbackSkipsPrerouting" +} + +// ContainerAction implements TestCase.ContainerAction. +func (NATLoopbackSkipsPrerouting) ContainerAction(ip net.IP) error { + // Redirect anything sent to localhost to an unused port. + dest := []byte{127, 0, 0, 1} + if err := natTable("-A", "PREROUTING", "-p", "tcp", "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", dropPort)); err != nil { + return err + } + + // Establish a connection via localhost. If the PREROUTING rule did apply to + // loopback traffic, the connection would fail. + sendCh := make(chan error) + go func() { + sendCh <- connectTCP(dest, acceptPort, sendloopDuration) + }() + + if err := listenTCP(acceptPort, sendloopDuration); err != nil { + return err + } + return <-sendCh +} + +// LocalAction implements TestCase.LocalAction. +func (NATLoopbackSkipsPrerouting) LocalAction(ip net.IP) error { + // No-op. + return nil +} + +// loopbackTests runs an iptables rule and ensures that packets sent to +// dest:dropPort are received by localhost:acceptPort. +func loopbackTest(dest net.IP, args ...string) error { + if err := natTable(args...); err != nil { + return err + } + sendCh := make(chan error) + listenCh := make(chan error) + go func() { + sendCh <- sendUDPLoop(dest, dropPort, sendloopDuration) + }() + go func() { + listenCh <- listenUDP(acceptPort, sendloopDuration) + }() + select { + case err := <-listenCh: + if err != nil { + return err + } + case <-time.After(sendloopDuration): + return errors.New("timed out") + } + // sendCh will always take the full sendloop time. + return <-sendCh +} -- cgit v1.2.3 From 12f90947612bc5d1d7122871e8c7c1f6b9e4e303 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 8 Jun 2020 14:27:41 -0700 Subject: test/runtimes/proctor: remove an unknown nocgo attribute from go_test rule PiperOrigin-RevId: 315353408 --- test/runtimes/proctor/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/runtimes/proctor/BUILD b/test/runtimes/proctor/BUILD index da1e331e1..f76e2ddc0 100644 --- a/test/runtimes/proctor/BUILD +++ b/test/runtimes/proctor/BUILD @@ -21,7 +21,7 @@ go_test( size = "small", srcs = ["proctor_test.go"], library = ":proctor", - nocgo = 1, + pure = True, deps = [ "//pkg/test/testutil", ], -- cgit v1.2.3 From 4950ccde75b3a85852f10bce6a76f033cf2f9ac6 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Tue, 9 Jun 2020 17:00:50 -0700 Subject: Fix write hang bug found by syzkaller. After this change e.mu is only promoted to exclusively locked during route.Resolve. It downgrades back to read-lock afterwards. This prevents the second RLock() call gets stuck later in the stack. https://syzkaller.appspot.com/bug?id=065b893bd8d1d04a4e0a1d53c578537cde1efe99 Syzkaller logs does not contain interesting stack traces. The following stack trace is obtained by running repro locally. goroutine 53 [semacquire, 3 minutes]: runtime.gopark(0xfd4278, 0x1896320, 0xc000301912, 0x4) GOROOT/src/runtime/proc.go:304 +0xe0 fp=0xc0000e25f8 sp=0xc0000e25d8 pc=0x437170 runtime.goparkunlock(...) GOROOT/src/runtime/proc.go:310 runtime.semacquire1(0xc0001220b0, 0xc00000a300, 0x1, 0x0) GOROOT/src/runtime/sema.go:144 +0x1c0 fp=0xc0000e2660 sp=0xc0000e25f8 pc=0x4484e0 sync.runtime_Semacquire(0xc0001220b0) GOROOT/src/runtime/sema.go:56 +0x42 fp=0xc0000e2690 sp=0xc0000e2660 pc=0x448132 gvisor.dev/gvisor/pkg/sync.(*RWMutex).RLock(...) pkg/sync/rwmutex_unsafe.go:76 gvisor.dev/gvisor/pkg/tcpip/transport/udp.(*endpoint).HandleControlPacket(0xc000122000, 0x7ee5, 0xc00053c16c, 0x4, 0x5e21, 0xc00053c224, 0x4, 0x1, 0x0, 0xc00007ed00) pkg/tcpip/transport/udp/endpoint.go:1345 +0x169 fp=0xc0000e26d8 sp=0xc0000e2690 pc=0x9843f9 ...... gvisor.dev/gvisor/pkg/tcpip/transport/udp.(*protocol).HandleUnknownDestinationPacket(0x18bb5a0, 0xc000556540, 0x5e21, 0xc00053c16c, 0x4, 0x7ee5, 0xc00053c1ec, 0x4, 0xc00007e680, 0x4) pkg/tcpip/transport/udp/protocol.go:143 +0xb9a fp=0xc0000e8260 sp=0xc0000e7510 pc=0x9859ba ...... gvisor.dev/gvisor/pkg/tcpip/transport/udp.sendUDP(0xc0001220d0, 0xc00053ece0, 0x1, 0x1, 0x883, 0x1405e217ee5, 0x11100a0, 0xc000592000, 0xf88780) pkg/tcpip/transport/udp/endpoint.go:924 +0x3b0 fp=0xc0000ed390 sp=0xc0000ec750 pc=0x981af0 gvisor.dev/gvisor/pkg/tcpip/transport/udp.(*endpoint).write(0xc000122000, 0x11104e0, 0xc00020a460, 0x0, 0x0, 0x0, 0x0, 0x0) pkg/tcpip/transport/udp/endpoint.go:510 +0x4ad fp=0xc0000ed658 sp=0xc0000ed390 pc=0x97f2dd PiperOrigin-RevId: 315590041 --- pkg/tcpip/transport/udp/endpoint.go | 29 ++++++++++++++++++++--------- test/syscalls/linux/tuntap.cc | 20 ++++++++++++++++++++ 2 files changed, 40 insertions(+), 9 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 8c7895713..c5e3c73ef 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -15,6 +15,7 @@ package udp import ( + "gvisor.dev/gvisor/pkg/sleep" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" @@ -425,24 +426,33 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c } var route *stack.Route + var resolve func(waker *sleep.Waker) (ch <-chan struct{}, err *tcpip.Error) var dstPort uint16 if to == nil { route = &e.route dstPort = e.dstPort - - if route.IsResolutionRequired() { - // Promote lock to exclusive if using a shared route, given that it may need to - // change in Route.Resolve() call below. + resolve = func(waker *sleep.Waker) (ch <-chan struct{}, err *tcpip.Error) { + // Promote lock to exclusive if using a shared route, given that it may + // need to change in Route.Resolve() call below. e.mu.RUnlock() - defer e.mu.RLock() - e.mu.Lock() - defer e.mu.Unlock() // Recheck state after lock was re-acquired. if e.state != StateConnected { - return 0, nil, tcpip.ErrInvalidEndpointState + err = tcpip.ErrInvalidEndpointState + } + if err == nil && route.IsResolutionRequired() { + ch, err = route.Resolve(waker) + } + + e.mu.Unlock() + e.mu.RLock() + + // Recheck state after lock was re-acquired. + if e.state != StateConnected { + err = tcpip.ErrInvalidEndpointState } + return } } else { // Reject destination address if it goes through a different @@ -473,10 +483,11 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c route = &r dstPort = dst.Port + resolve = route.Resolve } if route.IsResolutionRequired() { - if ch, err := route.Resolve(nil); err != nil { + if ch, err := resolve(nil); err != nil { if err == tcpip.ErrWouldBlock { return 0, ch, tcpip.ErrNoLinkAddress } diff --git a/test/syscalls/linux/tuntap.cc b/test/syscalls/linux/tuntap.cc index 6195b11e1..97d554e72 100644 --- a/test/syscalls/linux/tuntap.cc +++ b/test/syscalls/linux/tuntap.cc @@ -398,5 +398,25 @@ TEST_F(TuntapTest, SendUdpTriggersArpResolution) { } } +// Write hang bug found by syskaller: b/155928773 +// https://syzkaller.appspot.com/bug?id=065b893bd8d1d04a4e0a1d53c578537cde1efe99 +TEST_F(TuntapTest, WriteHangBug155928773) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTap(kTapName, "10.0.0.1")); + + int sock = socket(AF_INET, SOCK_DGRAM, 0); + ASSERT_THAT(sock, SyscallSucceeds()); + + struct sockaddr_in remote = {}; + remote.sin_family = AF_INET; + remote.sin_port = htons(42); + inet_pton(AF_INET, "10.0.0.1", &remote.sin_addr); + // Return values do not matter in this test. + connect(sock, reinterpret_cast(&remote), sizeof(remote)); + write(sock, "hello", 5); +} + } // namespace testing } // namespace gvisor -- cgit v1.2.3 From 67565078bbcdd8f797206d996605df8f6658d00a Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Tue, 9 Jun 2020 18:44:57 -0700 Subject: Implement flock(2) in VFS2 LockFD is the generic implementation that can be embedded in FileDescriptionImpl implementations. Unique lock ID is maintained in vfs.FileDescription and is created on demand. Updates #1480 PiperOrigin-RevId: 315604825 --- pkg/sentry/devices/memdev/full.go | 1 + pkg/sentry/devices/memdev/null.go | 1 + pkg/sentry/devices/memdev/random.go | 1 + pkg/sentry/devices/memdev/zero.go | 1 + pkg/sentry/fs/file.go | 2 +- pkg/sentry/fs/lock/lock.go | 41 +++----- pkg/sentry/fs/lock/lock_set_functions.go | 8 +- pkg/sentry/fs/lock/lock_test.go | 111 +++++++++++----------- pkg/sentry/fsimpl/devpts/BUILD | 1 + pkg/sentry/fsimpl/devpts/devpts.go | 5 +- pkg/sentry/fsimpl/devpts/master.go | 5 + pkg/sentry/fsimpl/devpts/slave.go | 5 + pkg/sentry/fsimpl/eventfd/eventfd.go | 1 + pkg/sentry/fsimpl/ext/BUILD | 1 + pkg/sentry/fsimpl/ext/file_description.go | 1 + pkg/sentry/fsimpl/ext/inode.go | 6 ++ pkg/sentry/fsimpl/ext/regular_file.go | 1 + pkg/sentry/fsimpl/gofer/BUILD | 2 + pkg/sentry/fsimpl/gofer/filesystem.go | 9 +- pkg/sentry/fsimpl/gofer/gofer.go | 23 +++++ pkg/sentry/fsimpl/gofer/special_file.go | 4 +- pkg/sentry/fsimpl/host/BUILD | 1 + pkg/sentry/fsimpl/host/host.go | 8 +- pkg/sentry/fsimpl/kernfs/BUILD | 2 + pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go | 10 +- pkg/sentry/fsimpl/kernfs/fd_impl_util.go | 9 +- pkg/sentry/fsimpl/kernfs/inode_impl_util.go | 5 +- pkg/sentry/fsimpl/kernfs/kernfs_test.go | 13 ++- pkg/sentry/fsimpl/pipefs/BUILD | 1 + pkg/sentry/fsimpl/pipefs/pipefs.go | 6 +- pkg/sentry/fsimpl/proc/BUILD | 1 + pkg/sentry/fsimpl/proc/subtasks.go | 5 +- pkg/sentry/fsimpl/proc/task.go | 5 +- pkg/sentry/fsimpl/proc/task_fds.go | 7 +- pkg/sentry/fsimpl/proc/task_files.go | 10 +- pkg/sentry/fsimpl/proc/tasks.go | 5 +- pkg/sentry/fsimpl/signalfd/signalfd.go | 1 + pkg/sentry/fsimpl/sys/BUILD | 1 + pkg/sentry/fsimpl/sys/sys.go | 7 +- pkg/sentry/fsimpl/timerfd/timerfd.go | 1 + pkg/sentry/fsimpl/tmpfs/filesystem.go | 6 +- pkg/sentry/fsimpl/tmpfs/regular_file.go | 23 ----- pkg/sentry/fsimpl/tmpfs/tmpfs.go | 41 +------- pkg/sentry/kernel/fd_table.go | 15 +-- pkg/sentry/kernel/kernel.go | 5 - pkg/sentry/kernel/pipe/BUILD | 1 + pkg/sentry/kernel/pipe/vfs.go | 13 ++- pkg/sentry/socket/hostinet/BUILD | 1 + pkg/sentry/socket/hostinet/socket_vfs2.go | 3 + pkg/sentry/socket/netlink/BUILD | 1 + pkg/sentry/socket/netlink/socket_vfs2.go | 8 +- pkg/sentry/socket/netstack/BUILD | 1 + pkg/sentry/socket/netstack/netstack_vfs2.go | 3 + pkg/sentry/socket/unix/BUILD | 1 + pkg/sentry/socket/unix/unix_vfs2.go | 7 +- pkg/sentry/syscalls/linux/sys_file.go | 39 ++------ pkg/sentry/syscalls/linux/vfs2/BUILD | 2 + pkg/sentry/syscalls/linux/vfs2/lock.go | 64 +++++++++++++ pkg/sentry/syscalls/linux/vfs2/vfs2.go | 2 +- pkg/sentry/vfs/BUILD | 1 + pkg/sentry/vfs/epoll.go | 1 + pkg/sentry/vfs/file_description.go | 25 ++++- pkg/sentry/vfs/file_description_impl_util.go | 80 ++++++++++++---- pkg/sentry/vfs/file_description_impl_util_test.go | 1 + pkg/sentry/vfs/inotify.go | 1 + test/syscalls/linux/BUILD | 3 + test/syscalls/linux/flock.cc | 75 ++++++++++++--- 67 files changed, 470 insertions(+), 281 deletions(-) create mode 100644 pkg/sentry/syscalls/linux/vfs2/lock.go (limited to 'test') diff --git a/pkg/sentry/devices/memdev/full.go b/pkg/sentry/devices/memdev/full.go index c7e197691..af66fe4dc 100644 --- a/pkg/sentry/devices/memdev/full.go +++ b/pkg/sentry/devices/memdev/full.go @@ -42,6 +42,7 @@ type fullFD struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl vfs.DentryMetadataFileDescriptionImpl + vfs.NoLockFD } // Release implements vfs.FileDescriptionImpl.Release. diff --git a/pkg/sentry/devices/memdev/null.go b/pkg/sentry/devices/memdev/null.go index 33d060d02..92d3d71be 100644 --- a/pkg/sentry/devices/memdev/null.go +++ b/pkg/sentry/devices/memdev/null.go @@ -43,6 +43,7 @@ type nullFD struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl vfs.DentryMetadataFileDescriptionImpl + vfs.NoLockFD } // Release implements vfs.FileDescriptionImpl.Release. diff --git a/pkg/sentry/devices/memdev/random.go b/pkg/sentry/devices/memdev/random.go index acfa23149..6b81da5ef 100644 --- a/pkg/sentry/devices/memdev/random.go +++ b/pkg/sentry/devices/memdev/random.go @@ -48,6 +48,7 @@ type randomFD struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl vfs.DentryMetadataFileDescriptionImpl + vfs.NoLockFD // off is the "file offset". off is accessed using atomic memory // operations. diff --git a/pkg/sentry/devices/memdev/zero.go b/pkg/sentry/devices/memdev/zero.go index 3b1372b9e..c6f15054d 100644 --- a/pkg/sentry/devices/memdev/zero.go +++ b/pkg/sentry/devices/memdev/zero.go @@ -44,6 +44,7 @@ type zeroFD struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl vfs.DentryMetadataFileDescriptionImpl + vfs.NoLockFD } // Release implements vfs.FileDescriptionImpl.Release. diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go index 2a278fbe3..ca41520b4 100644 --- a/pkg/sentry/fs/file.go +++ b/pkg/sentry/fs/file.go @@ -146,7 +146,7 @@ func (f *File) DecRef() { f.DecRefWithDestructor(func() { // Drop BSD style locks. lockRng := lock.LockRange{Start: 0, End: lock.LockEOF} - f.Dirent.Inode.LockCtx.BSD.UnlockRegion(lock.UniqueID(f.UniqueID), lockRng) + f.Dirent.Inode.LockCtx.BSD.UnlockRegion(f, lockRng) // Release resources held by the FileOperations. f.FileOperations.Release() diff --git a/pkg/sentry/fs/lock/lock.go b/pkg/sentry/fs/lock/lock.go index 926538d90..8a5d9c7eb 100644 --- a/pkg/sentry/fs/lock/lock.go +++ b/pkg/sentry/fs/lock/lock.go @@ -62,7 +62,7 @@ import ( type LockType int // UniqueID is a unique identifier of the holder of a regional file lock. -type UniqueID uint64 +type UniqueID interface{} const ( // ReadLock describes a POSIX regional file lock to be taken @@ -98,12 +98,7 @@ type Lock struct { // If len(Readers) > 0 then HasWriter must be false. Readers map[UniqueID]bool - // HasWriter indicates that this is a write lock held by a single - // UniqueID. - HasWriter bool - - // Writer is only valid if HasWriter is true. It identifies a - // single write lock holder. + // Writer holds the writer unique ID. It's nil if there are no writers. Writer UniqueID } @@ -186,7 +181,6 @@ func makeLock(uid UniqueID, t LockType) Lock { case ReadLock: value.Readers[uid] = true case WriteLock: - value.HasWriter = true value.Writer = uid default: panic(fmt.Sprintf("makeLock: invalid lock type %d", t)) @@ -196,10 +190,7 @@ func makeLock(uid UniqueID, t LockType) Lock { // isHeld returns true if uid is a holder of Lock. func (l Lock) isHeld(uid UniqueID) bool { - if l.HasWriter && l.Writer == uid { - return true - } - return l.Readers[uid] + return l.Writer == uid || l.Readers[uid] } // lock sets uid as a holder of a typed lock on Lock. @@ -214,20 +205,20 @@ func (l *Lock) lock(uid UniqueID, t LockType) { } // We cannot downgrade a write lock to a read lock unless the // uid is the same. - if l.HasWriter { + if l.Writer != nil { if l.Writer != uid { panic(fmt.Sprintf("lock: cannot downgrade write lock to read lock for uid %d, writer is %d", uid, l.Writer)) } // Ensure that there is only one reader if upgrading. l.Readers = make(map[UniqueID]bool) // Ensure that there is no longer a writer. - l.HasWriter = false + l.Writer = nil } l.Readers[uid] = true return case WriteLock: // If we are already the writer, then this is a no-op. - if l.HasWriter && l.Writer == uid { + if l.Writer == uid { return } // We can only upgrade a read lock to a write lock if there @@ -243,7 +234,6 @@ func (l *Lock) lock(uid UniqueID, t LockType) { } // Ensure that there is only a writer. l.Readers = make(map[UniqueID]bool) - l.HasWriter = true l.Writer = uid default: panic(fmt.Sprintf("lock: invalid lock type %d", t)) @@ -277,9 +267,8 @@ func (l LockSet) canLock(uid UniqueID, t LockType, r LockRange) bool { switch t { case ReadLock: return l.lockable(r, func(value Lock) bool { - // If there is no writer, there's no problem adding - // another reader. - if !value.HasWriter { + // If there is no writer, there's no problem adding another reader. + if value.Writer == nil { return true } // If there is a writer, then it must be the same uid @@ -289,10 +278,9 @@ func (l LockSet) canLock(uid UniqueID, t LockType, r LockRange) bool { case WriteLock: return l.lockable(r, func(value Lock) bool { // If there are only readers. - if !value.HasWriter { - // Then this uid can only take a write lock if - // this is a private upgrade, meaning that the - // only reader is uid. + if value.Writer == nil { + // Then this uid can only take a write lock if this is a private + // upgrade, meaning that the only reader is uid. return len(value.Readers) == 1 && value.Readers[uid] } // If the uid is already a writer on this region, then @@ -304,7 +292,8 @@ func (l LockSet) canLock(uid UniqueID, t LockType, r LockRange) bool { } } -// lock returns true if uid took a lock of type t on the entire range of LockRange. +// lock returns true if uid took a lock of type t on the entire range of +// LockRange. // // Preconditions: r.Start <= r.End (will panic otherwise). func (l *LockSet) lock(uid UniqueID, t LockType, r LockRange) bool { @@ -339,7 +328,7 @@ func (l *LockSet) lock(uid UniqueID, t LockType, r LockRange) bool { seg, _ = l.SplitUnchecked(seg, r.End) } - // Set the lock on the segment. This is guaranteed to + // Set the lock on the segment. This is guaranteed to // always be safe, given canLock above. value := seg.ValuePtr() value.lock(uid, t) @@ -386,7 +375,7 @@ func (l *LockSet) unlock(uid UniqueID, r LockRange) { value := seg.Value() var remove bool - if value.HasWriter && value.Writer == uid { + if value.Writer == uid { // If we are unlocking a writer, then since there can // only ever be one writer and no readers, then this // lock should always be removed from the set. diff --git a/pkg/sentry/fs/lock/lock_set_functions.go b/pkg/sentry/fs/lock/lock_set_functions.go index 8a3ace0c1..50a16e662 100644 --- a/pkg/sentry/fs/lock/lock_set_functions.go +++ b/pkg/sentry/fs/lock/lock_set_functions.go @@ -44,14 +44,9 @@ func (lockSetFunctions) Merge(r1 LockRange, val1 Lock, r2 LockRange, val2 Lock) return Lock{}, false } } - if val1.HasWriter != val2.HasWriter { + if val1.Writer != val2.Writer { return Lock{}, false } - if val1.HasWriter { - if val1.Writer != val2.Writer { - return Lock{}, false - } - } return val1, true } @@ -62,7 +57,6 @@ func (lockSetFunctions) Split(r LockRange, val Lock, split uint64) (Lock, Lock) for k, v := range val.Readers { val0.Readers[k] = v } - val0.HasWriter = val.HasWriter val0.Writer = val.Writer return val, val0 diff --git a/pkg/sentry/fs/lock/lock_test.go b/pkg/sentry/fs/lock/lock_test.go index ba002aeb7..fad90984b 100644 --- a/pkg/sentry/fs/lock/lock_test.go +++ b/pkg/sentry/fs/lock/lock_test.go @@ -42,9 +42,6 @@ func equals(e0, e1 []entry) bool { if !reflect.DeepEqual(e0[i].LockRange, e1[i].LockRange) { return false } - if e0[i].Lock.HasWriter != e1[i].Lock.HasWriter { - return false - } if e0[i].Lock.Writer != e1[i].Lock.Writer { return false } @@ -105,7 +102,7 @@ func TestCanLock(t *testing.T) { LockRange: LockRange{2048, 3072}, }, { - Lock: Lock{HasWriter: true, Writer: 1}, + Lock: Lock{Writer: 1}, LockRange: LockRange{3072, 4096}, }, }) @@ -241,7 +238,7 @@ func TestSetLock(t *testing.T) { // 0 max uint64 after: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, LockEOF}, }, }, @@ -254,7 +251,7 @@ func TestSetLock(t *testing.T) { // 0 max uint64 before: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, LockEOF}, }, }, @@ -273,7 +270,7 @@ func TestSetLock(t *testing.T) { LockRange: LockRange{0, 4096}, }, { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{4096, LockEOF}, }, }, @@ -301,7 +298,7 @@ func TestSetLock(t *testing.T) { // 0 4096 max uint64 after: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, 4096}, }, { @@ -318,7 +315,7 @@ func TestSetLock(t *testing.T) { // 0 max uint64 before: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, LockEOF}, }, }, @@ -550,7 +547,7 @@ func TestSetLock(t *testing.T) { LockRange: LockRange{0, 1024}, }, { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{1024, 4096}, }, { @@ -594,7 +591,7 @@ func TestSetLock(t *testing.T) { LockRange: LockRange{0, 1024}, }, { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{1024, 3072}, }, { @@ -633,7 +630,7 @@ func TestSetLock(t *testing.T) { // 0 1024 2048 4096 max uint64 before: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, 1024}, }, { @@ -663,11 +660,11 @@ func TestSetLock(t *testing.T) { // 0 1024 max uint64 after: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, 1024}, }, { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{1024, LockEOF}, }, }, @@ -675,28 +672,30 @@ func TestSetLock(t *testing.T) { } for _, test := range tests { - l := fill(test.before) + t.Run(test.name, func(t *testing.T) { + l := fill(test.before) - r := LockRange{Start: test.start, End: test.end} - success := l.lock(test.uid, test.lockType, r) - var got []entry - for seg := l.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { - got = append(got, entry{ - Lock: seg.Value(), - LockRange: seg.Range(), - }) - } + r := LockRange{Start: test.start, End: test.end} + success := l.lock(test.uid, test.lockType, r) + var got []entry + for seg := l.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + got = append(got, entry{ + Lock: seg.Value(), + LockRange: seg.Range(), + }) + } - if success != test.success { - t.Errorf("%s: setlock(%v, %+v, %d, %d) got success %v, want %v", test.name, test.before, r, test.uid, test.lockType, success, test.success) - continue - } + if success != test.success { + t.Errorf("setlock(%v, %+v, %d, %d) got success %v, want %v", test.before, r, test.uid, test.lockType, success, test.success) + return + } - if success { - if !equals(got, test.after) { - t.Errorf("%s: got set %+v, want %+v", test.name, got, test.after) + if success { + if !equals(got, test.after) { + t.Errorf("got set %+v, want %+v", got, test.after) + } } - } + }) } } @@ -782,7 +781,7 @@ func TestUnlock(t *testing.T) { // 0 max uint64 before: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, LockEOF}, }, }, @@ -824,7 +823,7 @@ func TestUnlock(t *testing.T) { // 0 max uint64 before: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, LockEOF}, }, }, @@ -837,7 +836,7 @@ func TestUnlock(t *testing.T) { // 0 4096 max uint64 after: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{4096, LockEOF}, }, }, @@ -876,7 +875,7 @@ func TestUnlock(t *testing.T) { // 0 max uint64 before: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, LockEOF}, }, }, @@ -889,7 +888,7 @@ func TestUnlock(t *testing.T) { // 0 4096 after: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, 4096}, }, }, @@ -906,7 +905,7 @@ func TestUnlock(t *testing.T) { LockRange: LockRange{0, 1024}, }, { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{1024, 4096}, }, { @@ -974,7 +973,7 @@ func TestUnlock(t *testing.T) { // 0 1024 4096 max uint64 before: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, 1024}, }, { @@ -991,7 +990,7 @@ func TestUnlock(t *testing.T) { // 0 8 4096 max uint64 after: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, 8}, }, { @@ -1008,7 +1007,7 @@ func TestUnlock(t *testing.T) { // 0 1024 4096 max uint64 before: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, 1024}, }, { @@ -1025,7 +1024,7 @@ func TestUnlock(t *testing.T) { // 0 1024 4096 8192 max uint64 after: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, 1024}, }, { @@ -1041,19 +1040,21 @@ func TestUnlock(t *testing.T) { } for _, test := range tests { - l := fill(test.before) + t.Run(test.name, func(t *testing.T) { + l := fill(test.before) - r := LockRange{Start: test.start, End: test.end} - l.unlock(test.uid, r) - var got []entry - for seg := l.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { - got = append(got, entry{ - Lock: seg.Value(), - LockRange: seg.Range(), - }) - } - if !equals(got, test.after) { - t.Errorf("%s: got set %+v, want %+v", test.name, got, test.after) - } + r := LockRange{Start: test.start, End: test.end} + l.unlock(test.uid, r) + var got []entry + for seg := l.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + got = append(got, entry{ + Lock: seg.Value(), + LockRange: seg.Range(), + }) + } + if !equals(got, test.after) { + t.Errorf("got set %+v, want %+v", got, test.after) + } + }) } } diff --git a/pkg/sentry/fsimpl/devpts/BUILD b/pkg/sentry/fsimpl/devpts/BUILD index 585764223..cf440dce8 100644 --- a/pkg/sentry/fsimpl/devpts/BUILD +++ b/pkg/sentry/fsimpl/devpts/BUILD @@ -23,6 +23,7 @@ go_library( "//pkg/sentry/kernel/auth", "//pkg/sentry/unimpl", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserror", "//pkg/usermem", diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go index c03c65445..9b0e0cca2 100644 --- a/pkg/sentry/fsimpl/devpts/devpts.go +++ b/pkg/sentry/fsimpl/devpts/devpts.go @@ -28,6 +28,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" ) @@ -116,6 +117,8 @@ type rootInode struct { kernfs.InodeNotSymlink kernfs.OrderedChildren + locks lock.FileLocks + // Keep a reference to this inode's dentry. dentry kernfs.Dentry @@ -183,7 +186,7 @@ func (i *rootInode) masterClose(t *Terminal) { // Open implements kernfs.Inode.Open. func (i *rootInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &opts) + fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts) if err != nil { return nil, err } diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go index 7a7ce5d81..1d22adbe3 100644 --- a/pkg/sentry/fsimpl/devpts/master.go +++ b/pkg/sentry/fsimpl/devpts/master.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/unimpl" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" @@ -34,6 +35,8 @@ type masterInode struct { kernfs.InodeNotDirectory kernfs.InodeNotSymlink + locks lock.FileLocks + // Keep a reference to this inode's dentry. dentry kernfs.Dentry @@ -55,6 +58,7 @@ func (mi *masterInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vf inode: mi, t: t, } + fd.LockFD.Init(&mi.locks) if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), vfsd, &vfs.FileDescriptionOptions{}); err != nil { mi.DecRef() return nil, err @@ -85,6 +89,7 @@ func (mi *masterInode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds type masterFileDescription struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl + vfs.LockFD inode *masterInode t *Terminal diff --git a/pkg/sentry/fsimpl/devpts/slave.go b/pkg/sentry/fsimpl/devpts/slave.go index 526cd406c..7fe475080 100644 --- a/pkg/sentry/fsimpl/devpts/slave.go +++ b/pkg/sentry/fsimpl/devpts/slave.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" @@ -33,6 +34,8 @@ type slaveInode struct { kernfs.InodeNotDirectory kernfs.InodeNotSymlink + locks lock.FileLocks + // Keep a reference to this inode's dentry. dentry kernfs.Dentry @@ -51,6 +54,7 @@ func (si *slaveInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs fd := &slaveFileDescription{ inode: si, } + fd.LockFD.Init(&si.locks) if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), vfsd, &vfs.FileDescriptionOptions{}); err != nil { si.DecRef() return nil, err @@ -91,6 +95,7 @@ func (si *slaveInode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds type slaveFileDescription struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl + vfs.LockFD inode *slaveInode } diff --git a/pkg/sentry/fsimpl/eventfd/eventfd.go b/pkg/sentry/fsimpl/eventfd/eventfd.go index c573d7935..d12d78b84 100644 --- a/pkg/sentry/fsimpl/eventfd/eventfd.go +++ b/pkg/sentry/fsimpl/eventfd/eventfd.go @@ -37,6 +37,7 @@ type EventFileDescription struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl vfs.DentryMetadataFileDescriptionImpl + vfs.NoLockFD // queue is used to notify interested parties when the event object // becomes readable or writable. diff --git a/pkg/sentry/fsimpl/ext/BUILD b/pkg/sentry/fsimpl/ext/BUILD index ff861d0fe..973fa0def 100644 --- a/pkg/sentry/fsimpl/ext/BUILD +++ b/pkg/sentry/fsimpl/ext/BUILD @@ -60,6 +60,7 @@ go_library( "//pkg/sentry/socket/unix/transport", "//pkg/sentry/syscalls/linux", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserror", "//pkg/usermem", diff --git a/pkg/sentry/fsimpl/ext/file_description.go b/pkg/sentry/fsimpl/ext/file_description.go index 92f7da40d..90b086468 100644 --- a/pkg/sentry/fsimpl/ext/file_description.go +++ b/pkg/sentry/fsimpl/ext/file_description.go @@ -26,6 +26,7 @@ import ( type fileDescription struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl + vfs.LockFD } func (fd *fileDescription) filesystem() *filesystem { diff --git a/pkg/sentry/fsimpl/ext/inode.go b/pkg/sentry/fsimpl/ext/inode.go index 485f86f4b..e4b434b13 100644 --- a/pkg/sentry/fsimpl/ext/inode.go +++ b/pkg/sentry/fsimpl/ext/inode.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" ) @@ -54,6 +55,8 @@ type inode struct { // diskInode gives us access to the inode struct on disk. Immutable. diskInode disklayout.Inode + locks lock.FileLocks + // This is immutable. The first field of the implementations must have inode // as the first field to ensure temporality. impl interface{} @@ -157,6 +160,7 @@ func (in *inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts *vfs.OpenOpt switch in.impl.(type) { case *regularFile: var fd regularFileFD + fd.LockFD.Init(&in.locks) if err := fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{}); err != nil { return nil, err } @@ -168,6 +172,7 @@ func (in *inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts *vfs.OpenOpt return nil, syserror.EISDIR } var fd directoryFD + fd.LockFD.Init(&in.locks) if err := fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{}); err != nil { return nil, err } @@ -178,6 +183,7 @@ func (in *inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts *vfs.OpenOpt return nil, syserror.ELOOP } var fd symlinkFD + fd.LockFD.Init(&in.locks) fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{}) return &fd.vfsfd, nil default: diff --git a/pkg/sentry/fsimpl/ext/regular_file.go b/pkg/sentry/fsimpl/ext/regular_file.go index 30135ddb0..f7015c44f 100644 --- a/pkg/sentry/fsimpl/ext/regular_file.go +++ b/pkg/sentry/fsimpl/ext/regular_file.go @@ -77,6 +77,7 @@ func (in *inode) isRegular() bool { // vfs.FileDescriptionImpl. type regularFileFD struct { fileDescription + vfs.LockFD // off is the file offset. off is accessed using atomic memory operations. off int64 diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD index f5f35a3bc..5cdeeaeb5 100644 --- a/pkg/sentry/fsimpl/gofer/BUILD +++ b/pkg/sentry/fsimpl/gofer/BUILD @@ -54,6 +54,7 @@ go_library( "//pkg/p9", "//pkg/safemem", "//pkg/sentry/fs/fsutil", + "//pkg/sentry/fs/lock", "//pkg/sentry/fsimpl/host", "//pkg/sentry/hostfd", "//pkg/sentry/kernel", @@ -68,6 +69,7 @@ go_library( "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usage", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/syserr", "//pkg/syserror", "//pkg/unet", diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 36e0e1856..40933b74b 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -801,6 +801,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf return nil, err } fd := ®ularFileFD{} + fd.LockFD.Init(&d.locks) if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{ AllowDirectIO: true, }); err != nil { @@ -826,6 +827,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf } } fd := &directoryFD{} + fd.LockFD.Init(&d.locks) if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { return nil, err } @@ -842,7 +844,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf } case linux.S_IFIFO: if d.isSynthetic() { - return d.pipe.Open(ctx, mnt, &d.vfsd, opts.Flags) + return d.pipe.Open(ctx, mnt, &d.vfsd, opts.Flags, &d.locks) } } return d.openSpecialFileLocked(ctx, mnt, opts) @@ -902,7 +904,7 @@ retry: return nil, err } } - fd, err := newSpecialFileFD(h, mnt, d, opts.Flags) + fd, err := newSpecialFileFD(h, mnt, d, &d.locks, opts.Flags) if err != nil { h.close(ctx) return nil, err @@ -989,6 +991,7 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving var childVFSFD *vfs.FileDescription if useRegularFileFD { fd := ®ularFileFD{} + fd.LockFD.Init(&child.locks) if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &child.vfsd, &vfs.FileDescriptionOptions{ AllowDirectIO: true, }); err != nil { @@ -1003,7 +1006,7 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving if fdobj != nil { h.fd = int32(fdobj.Release()) } - fd, err := newSpecialFileFD(h, mnt, child, opts.Flags) + fd, err := newSpecialFileFD(h, mnt, child, &d.locks, opts.Flags) if err != nil { h.close(ctx) return nil, err diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 3f3bd56f0..0d88a328e 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -45,6 +45,7 @@ import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" @@ -52,6 +53,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/unet" "gvisor.dev/gvisor/pkg/usermem" @@ -662,6 +664,8 @@ type dentry struct { // If this dentry represents a synthetic named pipe, pipe is the pipe // endpoint bound to this file. pipe *pipe.VFSPipe + + locks lock.FileLocks } // dentryAttrMask returns a p9.AttrMask enabling all attributes used by the @@ -1366,6 +1370,9 @@ func (d *dentry) decLinks() { type fileDescription struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl + vfs.LockFD + + lockLogging sync.Once } func (fd *fileDescription) filesystem() *filesystem { @@ -1416,3 +1423,19 @@ func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOption func (fd *fileDescription) Removexattr(ctx context.Context, name string) error { return fd.dentry().removexattr(ctx, auth.CredentialsFromContext(ctx), name) } + +// LockBSD implements vfs.FileDescriptionImpl.LockBSD. +func (fd *fileDescription) LockBSD(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error { + fd.lockLogging.Do(func() { + log.Infof("File lock using gofer file handled internally.") + }) + return fd.LockFD.LockBSD(ctx, uid, t, block) +} + +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, rng fslock.LockRange, block fslock.Blocker) error { + fd.lockLogging.Do(func() { + log.Infof("Range lock using gofer file handled internally.") + }) + return fd.LockFD.LockPOSIX(ctx, uid, t, rng, block) +} diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go index ff6126b87..289efdd25 100644 --- a/pkg/sentry/fsimpl/gofer/special_file.go +++ b/pkg/sentry/fsimpl/gofer/special_file.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" @@ -51,7 +52,7 @@ type specialFileFD struct { off int64 } -func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, flags uint32) (*specialFileFD, error) { +func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, locks *lock.FileLocks, flags uint32) (*specialFileFD, error) { ftype := d.fileType() seekable := ftype == linux.S_IFREG mayBlock := ftype == linux.S_IFIFO || ftype == linux.S_IFSOCK @@ -60,6 +61,7 @@ func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, flags uint32) (*speci seekable: seekable, mayBlock: mayBlock, } + fd.LockFD.Init(locks) if mayBlock && h.fd >= 0 { if err := fdnotifier.AddFD(h.fd, &fd.queue); err != nil { return nil, err diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD index ca0fe6d2b..54f16ad63 100644 --- a/pkg/sentry/fsimpl/host/BUILD +++ b/pkg/sentry/fsimpl/host/BUILD @@ -39,6 +39,7 @@ go_library( "//pkg/sentry/unimpl", "//pkg/sentry/uniqueid", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserr", "//pkg/syserror", diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index 18b127521..5ec5100b8 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -34,6 +34,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/memmap" unixsocket "gvisor.dev/gvisor/pkg/sentry/socket/unix" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -182,6 +183,8 @@ type inode struct { kernfs.InodeNotDirectory kernfs.InodeNotSymlink + locks lock.FileLocks + // When the reference count reaches zero, the host fd is closed. refs.AtomicRefCount @@ -468,7 +471,7 @@ func (i *inode) open(ctx context.Context, d *vfs.Dentry, mnt *vfs.Mount, flags u return nil, err } // Currently, we only allow Unix sockets to be imported. - return unixsocket.NewFileDescription(ep, ep.Type(), flags, mnt, d) + return unixsocket.NewFileDescription(ep, ep.Type(), flags, mnt, d, &i.locks) } // TODO(gvisor.dev/issue/1672): Whitelist specific file types here, so that @@ -478,6 +481,7 @@ func (i *inode) open(ctx context.Context, d *vfs.Dentry, mnt *vfs.Mount, flags u fileDescription: fileDescription{inode: i}, termios: linux.DefaultSlaveTermios, } + fd.LockFD.Init(&i.locks) vfsfd := &fd.vfsfd if err := vfsfd.Init(fd, flags, mnt, d, &vfs.FileDescriptionOptions{}); err != nil { return nil, err @@ -486,6 +490,7 @@ func (i *inode) open(ctx context.Context, d *vfs.Dentry, mnt *vfs.Mount, flags u } fd := &fileDescription{inode: i} + fd.LockFD.Init(&i.locks) vfsfd := &fd.vfsfd if err := vfsfd.Init(fd, flags, mnt, d, &vfs.FileDescriptionOptions{}); err != nil { return nil, err @@ -497,6 +502,7 @@ func (i *inode) open(ctx context.Context, d *vfs.Dentry, mnt *vfs.Mount, flags u type fileDescription struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl + vfs.LockFD // inode is vfsfd.Dentry().Impl().(*kernfs.Dentry).Inode().(*inode), but // cached to reduce indirections and casting. fileDescription does not hold diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD index ef34cb28a..0299dbde9 100644 --- a/pkg/sentry/fsimpl/kernfs/BUILD +++ b/pkg/sentry/fsimpl/kernfs/BUILD @@ -49,6 +49,7 @@ go_library( "//pkg/sentry/memmap", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserror", "//pkg/usermem", @@ -67,6 +68,7 @@ go_test( "//pkg/sentry/fsimpl/testutil", "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/syserror", "//pkg/usermem", "@com_github_google_go-cmp//cmp:go_default_library", diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go index 1568a9d49..6418de0a3 100644 --- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go +++ b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -38,7 +39,8 @@ type DynamicBytesFile struct { InodeNotDirectory InodeNotSymlink - data vfs.DynamicBytesSource + locks lock.FileLocks + data vfs.DynamicBytesSource } var _ Inode = (*DynamicBytesFile)(nil) @@ -55,7 +57,7 @@ func (f *DynamicBytesFile) Init(creds *auth.Credentials, devMajor, devMinor uint // Open implements Inode.Open. func (f *DynamicBytesFile) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { fd := &DynamicBytesFD{} - if err := fd.Init(rp.Mount(), vfsd, f.data, opts.Flags); err != nil { + if err := fd.Init(rp.Mount(), vfsd, f.data, &f.locks, opts.Flags); err != nil { return nil, err } return &fd.vfsfd, nil @@ -77,13 +79,15 @@ func (*DynamicBytesFile) SetStat(context.Context, *vfs.Filesystem, *auth.Credent type DynamicBytesFD struct { vfs.FileDescriptionDefaultImpl vfs.DynamicBytesFileDescriptionImpl + vfs.LockFD vfsfd vfs.FileDescription inode Inode } // Init initializes a DynamicBytesFD. -func (fd *DynamicBytesFD) Init(m *vfs.Mount, d *vfs.Dentry, data vfs.DynamicBytesSource, flags uint32) error { +func (fd *DynamicBytesFD) Init(m *vfs.Mount, d *vfs.Dentry, data vfs.DynamicBytesSource, locks *lock.FileLocks, flags uint32) error { + fd.LockFD.Init(locks) if err := fd.vfsfd.Init(fd, flags, m, d, &vfs.FileDescriptionOptions{}); err != nil { return err } diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go index 8284e76a7..33a5968ca 100644 --- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -42,6 +43,7 @@ import ( type GenericDirectoryFD struct { vfs.FileDescriptionDefaultImpl vfs.DirectoryFileDescriptionDefaultImpl + vfs.LockFD vfsfd vfs.FileDescription children *OrderedChildren @@ -55,9 +57,9 @@ type GenericDirectoryFD struct { // NewGenericDirectoryFD creates a new GenericDirectoryFD and returns its // dentry. -func NewGenericDirectoryFD(m *vfs.Mount, d *vfs.Dentry, children *OrderedChildren, opts *vfs.OpenOptions) (*GenericDirectoryFD, error) { +func NewGenericDirectoryFD(m *vfs.Mount, d *vfs.Dentry, children *OrderedChildren, locks *lock.FileLocks, opts *vfs.OpenOptions) (*GenericDirectoryFD, error) { fd := &GenericDirectoryFD{} - if err := fd.Init(children, opts); err != nil { + if err := fd.Init(children, locks, opts); err != nil { return nil, err } if err := fd.vfsfd.Init(fd, opts.Flags, m, d, &vfs.FileDescriptionOptions{}); err != nil { @@ -69,11 +71,12 @@ func NewGenericDirectoryFD(m *vfs.Mount, d *vfs.Dentry, children *OrderedChildre // Init initializes a GenericDirectoryFD. Use it when overriding // GenericDirectoryFD. Caller must call fd.VFSFileDescription.Init() with the // correct implementation. -func (fd *GenericDirectoryFD) Init(children *OrderedChildren, opts *vfs.OpenOptions) error { +func (fd *GenericDirectoryFD) Init(children *OrderedChildren, locks *lock.FileLocks, opts *vfs.OpenOptions) error { if vfs.AccessTypesForOpenFlags(opts)&vfs.MayWrite != 0 { // Can't open directories for writing. return syserror.EISDIR } + fd.LockFD.Init(locks) fd.children = children return nil } diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go index 982daa2e6..0e4927215 100644 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) @@ -555,6 +556,8 @@ type StaticDirectory struct { InodeAttrs InodeNoDynamicLookup OrderedChildren + + locks lock.FileLocks } var _ Inode = (*StaticDirectory)(nil) @@ -584,7 +587,7 @@ func (s *StaticDirectory) Init(creds *auth.Credentials, devMajor, devMinor uint3 // Open implements kernfs.Inode. func (s *StaticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd, err := NewGenericDirectoryFD(rp.Mount(), vfsd, &s.OrderedChildren, &opts) + fd, err := NewGenericDirectoryFD(rp.Mount(), vfsd, &s.OrderedChildren, &s.locks, &opts) if err != nil { return nil, err } diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go index 412cf6ac9..6749facf7 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go @@ -27,6 +27,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -100,8 +101,10 @@ type readonlyDir struct { kernfs.InodeNotSymlink kernfs.InodeNoDynamicLookup kernfs.InodeDirectoryNoNewChildren - kernfs.OrderedChildren + + locks lock.FileLocks + dentry kernfs.Dentry } @@ -117,7 +120,7 @@ func (fs *filesystem) newReadonlyDir(creds *auth.Credentials, mode linux.FileMod } func (d *readonlyDir) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &d.OrderedChildren, &opts) + fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &d.OrderedChildren, &d.locks, &opts) if err != nil { return nil, err } @@ -128,10 +131,12 @@ type dir struct { attrs kernfs.InodeNotSymlink kernfs.InodeNoDynamicLookup + kernfs.OrderedChildren + + locks lock.FileLocks fs *filesystem dentry kernfs.Dentry - kernfs.OrderedChildren } func (fs *filesystem) newDir(creds *auth.Credentials, mode linux.FileMode, contents map[string]*kernfs.Dentry) *kernfs.Dentry { @@ -147,7 +152,7 @@ func (fs *filesystem) newDir(creds *auth.Credentials, mode linux.FileMode, conte } func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &d.OrderedChildren, &opts) + fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &d.OrderedChildren, &d.locks, &opts) if err != nil { return nil, err } diff --git a/pkg/sentry/fsimpl/pipefs/BUILD b/pkg/sentry/fsimpl/pipefs/BUILD index 5950a2d59..c618dbe6c 100644 --- a/pkg/sentry/fsimpl/pipefs/BUILD +++ b/pkg/sentry/fsimpl/pipefs/BUILD @@ -15,6 +15,7 @@ go_library( "//pkg/sentry/kernel/pipe", "//pkg/sentry/kernel/time", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/syserror", "//pkg/usermem", ], diff --git a/pkg/sentry/fsimpl/pipefs/pipefs.go b/pkg/sentry/fsimpl/pipefs/pipefs.go index cab771211..e4dabaa33 100644 --- a/pkg/sentry/fsimpl/pipefs/pipefs.go +++ b/pkg/sentry/fsimpl/pipefs/pipefs.go @@ -27,6 +27,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -81,7 +82,8 @@ type inode struct { kernfs.InodeNotSymlink kernfs.InodeNoopRefCount - pipe *pipe.VFSPipe + locks lock.FileLocks + pipe *pipe.VFSPipe ino uint64 uid auth.KUID @@ -147,7 +149,7 @@ func (i *inode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth. // Open implements kernfs.Inode.Open. func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - return i.pipe.Open(ctx, rp.Mount(), vfsd, opts.Flags) + return i.pipe.Open(ctx, rp.Mount(), vfsd, opts.Flags, &i.locks) } // NewConnectedPipeFDs returns a pair of FileDescriptions representing the read diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD index 17c1342b5..351ba4ee9 100644 --- a/pkg/sentry/fsimpl/proc/BUILD +++ b/pkg/sentry/fsimpl/proc/BUILD @@ -35,6 +35,7 @@ go_library( "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usage", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/syserror", "//pkg/tcpip/header", "//pkg/usermem", diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go index 36a911db4..e2cdb7ee9 100644 --- a/pkg/sentry/fsimpl/proc/subtasks.go +++ b/pkg/sentry/fsimpl/proc/subtasks.go @@ -24,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" ) @@ -37,6 +38,8 @@ type subtasksInode struct { kernfs.OrderedChildren kernfs.AlwaysValid + locks lock.FileLocks + fs *filesystem task *kernel.Task pidns *kernel.PIDNamespace @@ -153,7 +156,7 @@ func (fd *subtasksFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) erro // Open implements kernfs.Inode. func (i *subtasksInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { fd := &subtasksFD{task: i.task} - if err := fd.Init(&i.OrderedChildren, &opts); err != nil { + if err := fd.Init(&i.OrderedChildren, &i.locks, &opts); err != nil { return nil, err } if err := fd.VFSFileDescription().Init(fd, opts.Flags, rp.Mount(), vfsd, &vfs.FileDescriptionOptions{}); err != nil { diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index 482055db1..44078a765 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -25,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/mm" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" ) @@ -38,6 +39,8 @@ type taskInode struct { kernfs.InodeAttrs kernfs.OrderedChildren + locks lock.FileLocks + task *kernel.Task } @@ -103,7 +106,7 @@ func (i *taskInode) Valid(ctx context.Context) bool { // Open implements kernfs.Inode. func (i *taskInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &opts) + fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts) if err != nil { return nil, err } diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go index 44ccc9e4a..ef6c1d04f 100644 --- a/pkg/sentry/fsimpl/proc/task_fds.go +++ b/pkg/sentry/fsimpl/proc/task_fds.go @@ -27,6 +27,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" ) @@ -53,6 +54,8 @@ func taskFDExists(t *kernel.Task, fd int32) bool { } type fdDir struct { + locks lock.FileLocks + fs *filesystem task *kernel.Task @@ -143,7 +146,7 @@ func (i *fdDirInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, erro // Open implements kernfs.Inode. func (i *fdDirInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &opts) + fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts) if err != nil { return nil, err } @@ -270,7 +273,7 @@ func (i *fdInfoDirInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, // Open implements kernfs.Inode. func (i *fdInfoDirInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &opts) + fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts) if err != nil { return nil, err } diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go index 2f297e48a..e5eaa91cd 100644 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ b/pkg/sentry/fsimpl/proc/task_files.go @@ -30,6 +30,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/mm" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -775,6 +776,8 @@ type namespaceInode struct { kernfs.InodeNoopRefCount kernfs.InodeNotDirectory kernfs.InodeNotSymlink + + locks lock.FileLocks } var _ kernfs.Inode = (*namespaceInode)(nil) @@ -791,6 +794,7 @@ func (i *namespaceInode) Init(creds *auth.Credentials, devMajor, devMinor uint32 func (i *namespaceInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { fd := &namespaceFD{inode: i} i.IncRef() + fd.LockFD.Init(&i.locks) if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), vfsd, &vfs.FileDescriptionOptions{}); err != nil { return nil, err } @@ -801,6 +805,7 @@ func (i *namespaceInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd * // /proc/[pid]/ns/*. type namespaceFD struct { vfs.FileDescriptionDefaultImpl + vfs.LockFD vfsfd vfs.FileDescription inode *namespaceInode @@ -825,8 +830,3 @@ func (fd *namespaceFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) err func (fd *namespaceFD) Release() { fd.inode.DecRef() } - -// OnClose implements FileDescriptionImpl. -func (*namespaceFD) OnClose(context.Context) error { - return nil -} diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go index b51d43954..58c8b9d05 100644 --- a/pkg/sentry/fsimpl/proc/tasks.go +++ b/pkg/sentry/fsimpl/proc/tasks.go @@ -25,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" ) @@ -43,6 +44,8 @@ type tasksInode struct { kernfs.OrderedChildren kernfs.AlwaysValid + locks lock.FileLocks + fs *filesystem pidns *kernel.PIDNamespace @@ -197,7 +200,7 @@ func (i *tasksInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback // Open implements kernfs.Inode. func (i *tasksInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &opts) + fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts) if err != nil { return nil, err } diff --git a/pkg/sentry/fsimpl/signalfd/signalfd.go b/pkg/sentry/fsimpl/signalfd/signalfd.go index d29ef3f83..242ba9b5d 100644 --- a/pkg/sentry/fsimpl/signalfd/signalfd.go +++ b/pkg/sentry/fsimpl/signalfd/signalfd.go @@ -31,6 +31,7 @@ type SignalFileDescription struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl vfs.DentryMetadataFileDescriptionImpl + vfs.NoLockFD // target is the original signal target task. // diff --git a/pkg/sentry/fsimpl/sys/BUILD b/pkg/sentry/fsimpl/sys/BUILD index a741e2bb6..237f17def 100644 --- a/pkg/sentry/fsimpl/sys/BUILD +++ b/pkg/sentry/fsimpl/sys/BUILD @@ -15,6 +15,7 @@ go_library( "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/syserror", ], ) diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go index 0af373604..b84463d3a 100644 --- a/pkg/sentry/fsimpl/sys/sys.go +++ b/pkg/sentry/fsimpl/sys/sys.go @@ -25,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" ) @@ -98,8 +99,10 @@ type dir struct { kernfs.InodeNoDynamicLookup kernfs.InodeNotSymlink kernfs.InodeDirectoryNoNewChildren - kernfs.OrderedChildren + + locks lock.FileLocks + dentry kernfs.Dentry } @@ -121,7 +124,7 @@ func (*dir) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.Set // Open implements kernfs.Inode.Open. func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &d.OrderedChildren, &opts) + fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &d.OrderedChildren, &d.locks, &opts) if err != nil { return nil, err } diff --git a/pkg/sentry/fsimpl/timerfd/timerfd.go b/pkg/sentry/fsimpl/timerfd/timerfd.go index 60c92d626..2dc90d484 100644 --- a/pkg/sentry/fsimpl/timerfd/timerfd.go +++ b/pkg/sentry/fsimpl/timerfd/timerfd.go @@ -32,6 +32,7 @@ type TimerFileDescription struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl vfs.DentryMetadataFileDescriptionImpl + vfs.NoLockFD events waiter.Queue timer *ktime.Timer diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index e801680e8..72399b321 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -399,6 +399,7 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open switch impl := d.inode.impl.(type) { case *regularFile: var fd regularFileFD + fd.LockFD.Init(&d.inode.locks) if err := fd.vfsfd.Init(&fd, opts.Flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { return nil, err } @@ -414,15 +415,16 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open return nil, syserror.EISDIR } var fd directoryFD + fd.LockFD.Init(&d.inode.locks) if err := fd.vfsfd.Init(&fd, opts.Flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { return nil, err } return &fd.vfsfd, nil case *symlink: - // Can't open symlinks without O_PATH (which is unimplemented). + // TODO(gvisor.dev/issue/2782): Can't open symlinks without O_PATH. return nil, syserror.ELOOP case *namedPipe: - return impl.pipe.Open(ctx, rp.Mount(), &d.vfsd, opts.Flags) + return impl.pipe.Open(ctx, rp.Mount(), &d.vfsd, opts.Flags, &d.inode.locks) case *deviceFile: return rp.VirtualFilesystem().OpenDeviceSpecialFile(ctx, rp.Mount(), &d.vfsd, impl.kind, impl.major, impl.minor, opts) case *socketFile: diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go index 4f2ae04d2..77447b32c 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go @@ -25,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" @@ -366,28 +365,6 @@ func (fd *regularFileFD) Sync(ctx context.Context) error { return nil } -// LockBSD implements vfs.FileDescriptionImpl.LockBSD. -func (fd *regularFileFD) LockBSD(ctx context.Context, uid lock.UniqueID, t lock.LockType, block lock.Blocker) error { - return fd.inode().lockBSD(uid, t, block) -} - -// UnlockBSD implements vfs.FileDescriptionImpl.UnlockBSD. -func (fd *regularFileFD) UnlockBSD(ctx context.Context, uid lock.UniqueID) error { - fd.inode().unlockBSD(uid) - return nil -} - -// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. -func (fd *regularFileFD) LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, rng lock.LockRange, block lock.Blocker) error { - return fd.inode().lockPOSIX(uid, t, rng, block) -} - -// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. -func (fd *regularFileFD) UnlockPOSIX(ctx context.Context, uid lock.UniqueID, rng lock.LockRange) error { - fd.inode().unlockPOSIX(uid, rng) - return nil -} - // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { file := fd.inode().impl.(*regularFile) diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 7ce1b86c7..71a7522af 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -36,7 +36,6 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" - fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/pgalloc" @@ -311,7 +310,6 @@ type inode struct { ctime int64 // nanoseconds mtime int64 // nanoseconds - // Advisory file locks, which lock at the inode level. locks lock.FileLocks // Inotify watches for this inode. @@ -539,44 +537,6 @@ func (i *inode) setStat(ctx context.Context, creds *auth.Credentials, stat *linu return nil } -// TODO(gvisor.dev/issue/1480): support file locking for file types other than regular. -func (i *inode) lockBSD(uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error { - switch i.impl.(type) { - case *regularFile: - return i.locks.LockBSD(uid, t, block) - } - return syserror.EBADF -} - -// TODO(gvisor.dev/issue/1480): support file locking for file types other than regular. -func (i *inode) unlockBSD(uid fslock.UniqueID) error { - switch i.impl.(type) { - case *regularFile: - i.locks.UnlockBSD(uid) - return nil - } - return syserror.EBADF -} - -// TODO(gvisor.dev/issue/1480): support file locking for file types other than regular. -func (i *inode) lockPOSIX(uid fslock.UniqueID, t fslock.LockType, rng fslock.LockRange, block fslock.Blocker) error { - switch i.impl.(type) { - case *regularFile: - return i.locks.LockPOSIX(uid, t, rng, block) - } - return syserror.EBADF -} - -// TODO(gvisor.dev/issue/1480): support file locking for file types other than regular. -func (i *inode) unlockPOSIX(uid fslock.UniqueID, rng fslock.LockRange) error { - switch i.impl.(type) { - case *regularFile: - i.locks.UnlockPOSIX(uid, rng) - return nil - } - return syserror.EBADF -} - // allocatedBlocksForSize returns the number of 512B blocks needed to // accommodate the given size in bytes, as appropriate for struct // stat::st_blocks and struct statx::stx_blocks. (Note that this 512B block @@ -708,6 +668,7 @@ func (i *inode) userXattrSupported() bool { type fileDescription struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl + vfs.LockFD } func (fd *fileDescription) filesystem() *filesystem { diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go index dbfcef0fa..b35afafe3 100644 --- a/pkg/sentry/kernel/fd_table.go +++ b/pkg/sentry/kernel/fd_table.go @@ -80,9 +80,6 @@ type FDTable struct { refs.AtomicRefCount k *Kernel - // uid is a unique identifier. - uid uint64 - // mu protects below. mu sync.Mutex `state:"nosave"` @@ -130,7 +127,7 @@ func (f *FDTable) loadDescriptorTable(m map[int32]descriptor) { // drop drops the table reference. func (f *FDTable) drop(file *fs.File) { // Release locks. - file.Dirent.Inode.LockCtx.Posix.UnlockRegion(lock.UniqueID(f.uid), lock.LockRange{0, lock.LockEOF}) + file.Dirent.Inode.LockCtx.Posix.UnlockRegion(f, lock.LockRange{0, lock.LockEOF}) // Send inotify events. d := file.Dirent @@ -164,17 +161,9 @@ func (f *FDTable) dropVFS2(file *vfs.FileDescription) { file.DecRef() } -// ID returns a unique identifier for this FDTable. -func (f *FDTable) ID() uint64 { - return f.uid -} - // NewFDTable allocates a new FDTable that may be used by tasks in k. func (k *Kernel) NewFDTable() *FDTable { - f := &FDTable{ - k: k, - uid: atomic.AddUint64(&k.fdMapUids, 1), - } + f := &FDTable{k: k} f.init() return f } diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 5efeb3767..bcbeb6a39 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -194,11 +194,6 @@ type Kernel struct { // cpuClockTickerSetting is protected by runningTasksMu. cpuClockTickerSetting ktime.Setting - // fdMapUids is an ever-increasing counter for generating FDTable uids. - // - // fdMapUids is mutable, and is accessed using atomic memory operations. - fdMapUids uint64 - // uniqueID is used to generate unique identifiers. // // uniqueID is mutable, and is accessed using atomic memory operations. diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD index 7bfa9075a..0db546b98 100644 --- a/pkg/sentry/kernel/pipe/BUILD +++ b/pkg/sentry/kernel/pipe/BUILD @@ -27,6 +27,7 @@ go_library( "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserror", "//pkg/usermem", diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go index 2602bed72..c0e9ee1f4 100644 --- a/pkg/sentry/kernel/pipe/vfs.go +++ b/pkg/sentry/kernel/pipe/vfs.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -61,11 +62,13 @@ func NewVFSPipe(isNamed bool, sizeBytes, atomicIOBytes int64) *VFSPipe { // // Preconditions: statusFlags should not contain an open access mode. func (vp *VFSPipe) ReaderWriterPair(mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32) (*vfs.FileDescription, *vfs.FileDescription) { - return vp.newFD(mnt, vfsd, linux.O_RDONLY|statusFlags), vp.newFD(mnt, vfsd, linux.O_WRONLY|statusFlags) + // Connected pipes share the same locks. + locks := &lock.FileLocks{} + return vp.newFD(mnt, vfsd, linux.O_RDONLY|statusFlags, locks), vp.newFD(mnt, vfsd, linux.O_WRONLY|statusFlags, locks) } // Open opens the pipe represented by vp. -func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32) (*vfs.FileDescription, error) { +func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32, locks *lock.FileLocks) (*vfs.FileDescription, error) { vp.mu.Lock() defer vp.mu.Unlock() @@ -75,7 +78,7 @@ func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, s return nil, syserror.EINVAL } - fd := vp.newFD(mnt, vfsd, statusFlags) + fd := vp.newFD(mnt, vfsd, statusFlags, locks) // Named pipes have special blocking semantics during open: // @@ -127,10 +130,11 @@ func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, s } // Preconditions: vp.mu must be held. -func (vp *VFSPipe) newFD(mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32) *vfs.FileDescription { +func (vp *VFSPipe) newFD(mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32, locks *lock.FileLocks) *vfs.FileDescription { fd := &VFSPipeFD{ pipe: &vp.pipe, } + fd.LockFD.Init(locks) fd.vfsfd.Init(fd, statusFlags, mnt, vfsd, &vfs.FileDescriptionOptions{ DenyPRead: true, DenyPWrite: true, @@ -159,6 +163,7 @@ type VFSPipeFD struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl vfs.DentryMetadataFileDescriptionImpl + vfs.LockFD pipe *Pipe } diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD index e82d6cd1e..60c9896fc 100644 --- a/pkg/sentry/socket/hostinet/BUILD +++ b/pkg/sentry/socket/hostinet/BUILD @@ -34,6 +34,7 @@ go_library( "//pkg/sentry/socket", "//pkg/sentry/socket/control", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/syserr", "//pkg/syserror", "//pkg/tcpip/stack", diff --git a/pkg/sentry/socket/hostinet/socket_vfs2.go b/pkg/sentry/socket/hostinet/socket_vfs2.go index 677743113..027add1fd 100644 --- a/pkg/sentry/socket/hostinet/socket_vfs2.go +++ b/pkg/sentry/socket/hostinet/socket_vfs2.go @@ -26,6 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -35,6 +36,7 @@ import ( type socketVFS2 struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl + vfs.LockFD // We store metadata for hostinet sockets internally. Technically, we should // access metadata (e.g. through stat, chmod) on the host for correctness, @@ -59,6 +61,7 @@ func newVFS2Socket(t *kernel.Task, family int, stype linux.SockType, protocol in fd: fd, }, } + s.LockFD.Init(&lock.FileLocks{}) if err := fdnotifier.AddFD(int32(fd), &s.queue); err != nil { return nil, syserr.FromError(err) } diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD index 7212d8644..420e573c9 100644 --- a/pkg/sentry/socket/netlink/BUILD +++ b/pkg/sentry/socket/netlink/BUILD @@ -29,6 +29,7 @@ go_library( "//pkg/sentry/socket/unix", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserr", "//pkg/syserror", diff --git a/pkg/sentry/socket/netlink/socket_vfs2.go b/pkg/sentry/socket/netlink/socket_vfs2.go index b854bf990..8bfee5193 100644 --- a/pkg/sentry/socket/netlink/socket_vfs2.go +++ b/pkg/sentry/socket/netlink/socket_vfs2.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/unix" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" @@ -40,6 +41,7 @@ type SocketVFS2 struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl vfs.DentryMetadataFileDescriptionImpl + vfs.LockFD socketOpsCommon } @@ -66,7 +68,7 @@ func NewVFS2(t *kernel.Task, skType linux.SockType, protocol Protocol) (*SocketV return nil, err } - return &SocketVFS2{ + fd := &SocketVFS2{ socketOpsCommon: socketOpsCommon{ ports: t.Kernel().NetlinkPorts(), protocol: protocol, @@ -75,7 +77,9 @@ func NewVFS2(t *kernel.Task, skType linux.SockType, protocol Protocol) (*SocketV connection: connection, sendBufferSize: defaultSendBufferSize, }, - }, nil + } + fd.LockFD.Init(&lock.FileLocks{}) + return fd, nil } // Readiness implements waiter.Waitable.Readiness. diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD index 8f0f5466e..0f592ecc3 100644 --- a/pkg/sentry/socket/netstack/BUILD +++ b/pkg/sentry/socket/netstack/BUILD @@ -37,6 +37,7 @@ go_library( "//pkg/sentry/socket/netfilter", "//pkg/sentry/unimpl", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserr", "//pkg/syserror", diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go index fcd8013c0..1412a4810 100644 --- a/pkg/sentry/socket/netstack/netstack_vfs2.go +++ b/pkg/sentry/socket/netstack/netstack_vfs2.go @@ -25,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/netfilter" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" @@ -38,6 +39,7 @@ type SocketVFS2 struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl vfs.DentryMetadataFileDescriptionImpl + vfs.LockFD socketOpsCommon } @@ -64,6 +66,7 @@ func NewVFS2(t *kernel.Task, family int, skType linux.SockType, protocol int, qu protocol: protocol, }, } + s.LockFD.Init(&lock.FileLocks{}) vfsfd := &s.vfsfd if err := vfsfd.Init(s, linux.O_RDWR, mnt, d, &vfs.FileDescriptionOptions{ DenyPRead: true, diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD index de2cc4bdf..7d4cc80fe 100644 --- a/pkg/sentry/socket/unix/BUILD +++ b/pkg/sentry/socket/unix/BUILD @@ -29,6 +29,7 @@ go_library( "//pkg/sentry/socket/netstack", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/syserr", "//pkg/syserror", "//pkg/tcpip", diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go index 45e109361..8c32371a2 100644 --- a/pkg/sentry/socket/unix/unix_vfs2.go +++ b/pkg/sentry/socket/unix/unix_vfs2.go @@ -26,6 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/netstack" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" @@ -39,6 +40,7 @@ type SocketVFS2 struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl vfs.DentryMetadataFileDescriptionImpl + vfs.LockFD socketOpsCommon } @@ -51,7 +53,7 @@ func NewSockfsFile(t *kernel.Task, ep transport.Endpoint, stype linux.SockType) mnt := t.Kernel().SocketMount() d := sockfs.NewDentry(t.Credentials(), mnt) - fd, err := NewFileDescription(ep, stype, linux.O_RDWR, mnt, d) + fd, err := NewFileDescription(ep, stype, linux.O_RDWR, mnt, d, &lock.FileLocks{}) if err != nil { return nil, syserr.FromError(err) } @@ -60,7 +62,7 @@ func NewSockfsFile(t *kernel.Task, ep transport.Endpoint, stype linux.SockType) // NewFileDescription creates and returns a socket file description // corresponding to the given mount and dentry. -func NewFileDescription(ep transport.Endpoint, stype linux.SockType, flags uint32, mnt *vfs.Mount, d *vfs.Dentry) (*vfs.FileDescription, error) { +func NewFileDescription(ep transport.Endpoint, stype linux.SockType, flags uint32, mnt *vfs.Mount, d *vfs.Dentry, locks *lock.FileLocks) (*vfs.FileDescription, error) { // You can create AF_UNIX, SOCK_RAW sockets. They're the same as // SOCK_DGRAM and don't require CAP_NET_RAW. if stype == linux.SOCK_RAW { @@ -73,6 +75,7 @@ func NewFileDescription(ep transport.Endpoint, stype linux.SockType, flags uint3 stype: stype, }, } + sock.LockFD.Init(locks) vfsfd := &sock.vfsfd if err := vfsfd.Init(sock, flags, mnt, d, &vfs.FileDescriptionOptions{ DenyPRead: true, diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index 35a98212a..8347617bd 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -998,9 +998,6 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, err } - // The lock uid is that of the Task's FDTable. - lockUniqueID := lock.UniqueID(t.FDTable().ID()) - // These locks don't block; execute the non-blocking operation using the inode's lock // context directly. switch flock.Type { @@ -1010,12 +1007,12 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } if cmd == linux.F_SETLK { // Non-blocking lock, provide a nil lock.Blocker. - if !file.Dirent.Inode.LockCtx.Posix.LockRegion(lockUniqueID, lock.ReadLock, rng, nil) { + if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.ReadLock, rng, nil) { return 0, nil, syserror.EAGAIN } } else { // Blocking lock, pass in the task to satisfy the lock.Blocker interface. - if !file.Dirent.Inode.LockCtx.Posix.LockRegion(lockUniqueID, lock.ReadLock, rng, t) { + if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.ReadLock, rng, t) { return 0, nil, syserror.EINTR } } @@ -1026,18 +1023,18 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } if cmd == linux.F_SETLK { // Non-blocking lock, provide a nil lock.Blocker. - if !file.Dirent.Inode.LockCtx.Posix.LockRegion(lockUniqueID, lock.WriteLock, rng, nil) { + if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.WriteLock, rng, nil) { return 0, nil, syserror.EAGAIN } } else { // Blocking lock, pass in the task to satisfy the lock.Blocker interface. - if !file.Dirent.Inode.LockCtx.Posix.LockRegion(lockUniqueID, lock.WriteLock, rng, t) { + if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.WriteLock, rng, t) { return 0, nil, syserror.EINTR } } return 0, nil, nil case linux.F_UNLCK: - file.Dirent.Inode.LockCtx.Posix.UnlockRegion(lockUniqueID, rng) + file.Dirent.Inode.LockCtx.Posix.UnlockRegion(t.FDTable(), rng) return 0, nil, nil default: return 0, nil, syserror.EINVAL @@ -2157,22 +2154,6 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall nonblocking := operation&linux.LOCK_NB != 0 operation &^= linux.LOCK_NB - // flock(2): - // Locks created by flock() are associated with an open file table entry. This means that - // duplicate file descriptors (created by, for example, fork(2) or dup(2)) refer to the - // same lock, and this lock may be modified or released using any of these descriptors. Furthermore, - // the lock is released either by an explicit LOCK_UN operation on any of these duplicate - // descriptors, or when all such descriptors have been closed. - // - // If a process uses open(2) (or similar) to obtain more than one descriptor for the same file, - // these descriptors are treated independently by flock(). An attempt to lock the file using - // one of these file descriptors may be denied by a lock that the calling process has already placed via - // another descriptor. - // - // We use the File UniqueID as the lock UniqueID because it needs to reference the same lock across dup(2) - // and fork(2). - lockUniqueID := lock.UniqueID(file.UniqueID) - // A BSD style lock spans the entire file. rng := lock.LockRange{ Start: 0, @@ -2183,29 +2164,29 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case linux.LOCK_EX: if nonblocking { // Since we're nonblocking we pass a nil lock.Blocker implementation. - if !file.Dirent.Inode.LockCtx.BSD.LockRegion(lockUniqueID, lock.WriteLock, rng, nil) { + if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.WriteLock, rng, nil) { return 0, nil, syserror.EWOULDBLOCK } } else { // Because we're blocking we will pass the task to satisfy the lock.Blocker interface. - if !file.Dirent.Inode.LockCtx.BSD.LockRegion(lockUniqueID, lock.WriteLock, rng, t) { + if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.WriteLock, rng, t) { return 0, nil, syserror.EINTR } } case linux.LOCK_SH: if nonblocking { // Since we're nonblocking we pass a nil lock.Blocker implementation. - if !file.Dirent.Inode.LockCtx.BSD.LockRegion(lockUniqueID, lock.ReadLock, rng, nil) { + if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.ReadLock, rng, nil) { return 0, nil, syserror.EWOULDBLOCK } } else { // Because we're blocking we will pass the task to satisfy the lock.Blocker interface. - if !file.Dirent.Inode.LockCtx.BSD.LockRegion(lockUniqueID, lock.ReadLock, rng, t) { + if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.ReadLock, rng, t) { return 0, nil, syserror.EINTR } } case linux.LOCK_UN: - file.Dirent.Inode.LockCtx.BSD.UnlockRegion(lockUniqueID, rng) + file.Dirent.Inode.LockCtx.BSD.UnlockRegion(file, rng) default: // flock(2): EINVAL operation is invalid. return 0, nil, syserror.EINVAL diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD index c0d005247..9f93f4354 100644 --- a/pkg/sentry/syscalls/linux/vfs2/BUILD +++ b/pkg/sentry/syscalls/linux/vfs2/BUILD @@ -14,6 +14,7 @@ go_library( "getdents.go", "inotify.go", "ioctl.go", + "lock.go", "memfd.go", "mmap.go", "mount.go", @@ -42,6 +43,7 @@ go_library( "//pkg/fspath", "//pkg/gohacks", "//pkg/sentry/arch", + "//pkg/sentry/fs/lock", "//pkg/sentry/fsbridge", "//pkg/sentry/fsimpl/eventfd", "//pkg/sentry/fsimpl/pipefs", diff --git a/pkg/sentry/syscalls/linux/vfs2/lock.go b/pkg/sentry/syscalls/linux/vfs2/lock.go new file mode 100644 index 000000000..bf19028c4 --- /dev/null +++ b/pkg/sentry/syscalls/linux/vfs2/lock.go @@ -0,0 +1,64 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vfs2 + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs/lock" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" +) + +// Flock implements linux syscall flock(2). +func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + operation := args[1].Int() + + file := t.GetFileVFS2(fd) + if file == nil { + // flock(2): EBADF fd is not an open file descriptor. + return 0, nil, syserror.EBADF + } + defer file.DecRef() + + nonblocking := operation&linux.LOCK_NB != 0 + operation &^= linux.LOCK_NB + + var blocker lock.Blocker + if !nonblocking { + blocker = t + } + + switch operation { + case linux.LOCK_EX: + if err := file.LockBSD(t, lock.WriteLock, blocker); err != nil { + return 0, nil, err + } + case linux.LOCK_SH: + if err := file.LockBSD(t, lock.ReadLock, blocker); err != nil { + return 0, nil, err + } + case linux.LOCK_UN: + if err := file.UnlockBSD(t); err != nil { + return 0, nil, err + } + default: + // flock(2): EINVAL operation is invalid. + return 0, nil, syserror.EINVAL + } + + return 0, nil, nil +} diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go index 7b6e7571a..954c82f97 100644 --- a/pkg/sentry/syscalls/linux/vfs2/vfs2.go +++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go @@ -62,7 +62,7 @@ func Override() { s.Table[55] = syscalls.Supported("getsockopt", GetSockOpt) s.Table[59] = syscalls.Supported("execve", Execve) s.Table[72] = syscalls.Supported("fcntl", Fcntl) - delete(s.Table, 73) // flock + s.Table[73] = syscalls.Supported("fcntl", Flock) s.Table[74] = syscalls.Supported("fsync", Fsync) s.Table[75] = syscalls.Supported("fdatasync", Fdatasync) s.Table[76] = syscalls.Supported("truncate", Truncate) diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD index 774cc66cc..16d9f3a28 100644 --- a/pkg/sentry/vfs/BUILD +++ b/pkg/sentry/vfs/BUILD @@ -72,6 +72,7 @@ go_library( "//pkg/sentry/memmap", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/uniqueid", + "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserror", "//pkg/usermem", diff --git a/pkg/sentry/vfs/epoll.go b/pkg/sentry/vfs/epoll.go index 8297f964b..599c3131c 100644 --- a/pkg/sentry/vfs/epoll.go +++ b/pkg/sentry/vfs/epoll.go @@ -31,6 +31,7 @@ type EpollInstance struct { vfsfd FileDescription FileDescriptionDefaultImpl DentryMetadataFileDescriptionImpl + NoLockFD // q holds waiters on this EpollInstance. q waiter.Queue diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go index bb294563d..97b9b18d7 100644 --- a/pkg/sentry/vfs/file_description.go +++ b/pkg/sentry/vfs/file_description.go @@ -73,6 +73,8 @@ type FileDescription struct { // writable is analogous to Linux's FMODE_WRITE. writable bool + usedLockBSD uint32 + // impl is the FileDescriptionImpl associated with this Filesystem. impl is // immutable. This should be the last field in FileDescription. impl FileDescriptionImpl @@ -175,6 +177,12 @@ func (fd *FileDescription) DecRef() { } ep.interestMu.Unlock() } + + // If BSD locks were used, release any lock that it may have acquired. + if atomic.LoadUint32(&fd.usedLockBSD) != 0 { + fd.impl.UnlockBSD(context.Background(), fd) + } + // Release implementation resources. fd.impl.Release() if fd.writable { @@ -420,13 +428,9 @@ type FileDescriptionImpl interface { Removexattr(ctx context.Context, name string) error // LockBSD tries to acquire a BSD-style advisory file lock. - // - // TODO(gvisor.dev/issue/1480): BSD-style file locking LockBSD(ctx context.Context, uid lock.UniqueID, t lock.LockType, block lock.Blocker) error - // LockBSD releases a BSD-style advisory file lock. - // - // TODO(gvisor.dev/issue/1480): BSD-style file locking + // UnlockBSD releases a BSD-style advisory file lock. UnlockBSD(ctx context.Context, uid lock.UniqueID) error // LockPOSIX tries to acquire a POSIX-style advisory file lock. @@ -736,3 +740,14 @@ func (fd *FileDescription) InodeID() uint64 { func (fd *FileDescription) Msync(ctx context.Context, mr memmap.MappableRange) error { return fd.Sync(ctx) } + +// LockBSD tries to acquire a BSD-style advisory file lock. +func (fd *FileDescription) LockBSD(ctx context.Context, lockType lock.LockType, blocker lock.Blocker) error { + atomic.StoreUint32(&fd.usedLockBSD, 1) + return fd.impl.LockBSD(ctx, fd, lockType, blocker) +} + +// UnlockBSD releases a BSD-style advisory file lock. +func (fd *FileDescription) UnlockBSD(ctx context.Context) error { + return fd.impl.UnlockBSD(ctx, fd) +} diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go index f4c111926..af7213dfd 100644 --- a/pkg/sentry/vfs/file_description_impl_util.go +++ b/pkg/sentry/vfs/file_description_impl_util.go @@ -21,8 +21,9 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/fs/lock" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -153,26 +154,6 @@ func (FileDescriptionDefaultImpl) Removexattr(ctx context.Context, name string) return syserror.ENOTSUP } -// LockBSD implements FileDescriptionImpl.LockBSD. -func (FileDescriptionDefaultImpl) LockBSD(ctx context.Context, uid lock.UniqueID, t lock.LockType, block lock.Blocker) error { - return syserror.EBADF -} - -// UnlockBSD implements FileDescriptionImpl.UnlockBSD. -func (FileDescriptionDefaultImpl) UnlockBSD(ctx context.Context, uid lock.UniqueID) error { - return syserror.EBADF -} - -// LockPOSIX implements FileDescriptionImpl.LockPOSIX. -func (FileDescriptionDefaultImpl) LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, rng lock.LockRange, block lock.Blocker) error { - return syserror.EBADF -} - -// UnlockPOSIX implements FileDescriptionImpl.UnlockPOSIX. -func (FileDescriptionDefaultImpl) UnlockPOSIX(ctx context.Context, uid lock.UniqueID, rng lock.LockRange) error { - return syserror.EBADF -} - // DirectoryFileDescriptionDefaultImpl may be embedded by implementations of // FileDescriptionImpl that always represent directories to obtain // implementations of non-directory I/O methods that return EISDIR. @@ -384,3 +365,60 @@ func GenericConfigureMMap(fd *FileDescription, m memmap.Mappable, opts *memmap.M fd.IncRef() return nil } + +// LockFD may be used by most implementations of FileDescriptionImpl.Lock* +// functions. Caller must call Init(). +type LockFD struct { + locks *lock.FileLocks +} + +// Init initializes fd with FileLocks to use. +func (fd *LockFD) Init(locks *lock.FileLocks) { + fd.locks = locks +} + +// LockBSD implements vfs.FileDescriptionImpl.LockBSD. +func (fd *LockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error { + return fd.locks.LockBSD(uid, t, block) +} + +// UnlockBSD implements vfs.FileDescriptionImpl.UnlockBSD. +func (fd *LockFD) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error { + fd.locks.UnlockBSD(uid) + return nil +} + +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (fd *LockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, rng fslock.LockRange, block fslock.Blocker) error { + return fd.locks.LockPOSIX(uid, t, rng, block) +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (fd *LockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, rng fslock.LockRange) error { + fd.locks.UnlockPOSIX(uid, rng) + return nil +} + +// NoLockFD implements Lock*/Unlock* portion of FileDescriptionImpl interface +// returning ENOLCK. +type NoLockFD struct{} + +// LockBSD implements vfs.FileDescriptionImpl.LockBSD. +func (NoLockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error { + return syserror.ENOLCK +} + +// UnlockBSD implements vfs.FileDescriptionImpl.UnlockBSD. +func (NoLockFD) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error { + return syserror.ENOLCK +} + +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (NoLockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, rng fslock.LockRange, block fslock.Blocker) error { + return syserror.ENOLCK +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (NoLockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, rng fslock.LockRange) error { + return syserror.ENOLCK +} diff --git a/pkg/sentry/vfs/file_description_impl_util_test.go b/pkg/sentry/vfs/file_description_impl_util_test.go index 3a75d4d62..5061f6ac9 100644 --- a/pkg/sentry/vfs/file_description_impl_util_test.go +++ b/pkg/sentry/vfs/file_description_impl_util_test.go @@ -33,6 +33,7 @@ import ( type fileDescription struct { vfsfd FileDescription FileDescriptionDefaultImpl + NoLockFD } // genCount contains the number of times its DynamicBytesSource.Generate() diff --git a/pkg/sentry/vfs/inotify.go b/pkg/sentry/vfs/inotify.go index 05a3051a4..7fa7d2d0c 100644 --- a/pkg/sentry/vfs/inotify.go +++ b/pkg/sentry/vfs/inotify.go @@ -57,6 +57,7 @@ type Inotify struct { vfsfd FileDescription FileDescriptionDefaultImpl DentryMetadataFileDescriptionImpl + NoLockFD // Unique identifier for this inotify instance. We don't just reuse the // inotify fd because fds can be duped. These should not be exposed to the diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index ae2aa44dc..4a1486e14 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -802,10 +802,13 @@ cc_binary( ], linkstatic = 1, deps = [ + ":socket_test_util", "//test/util:file_descriptor", "@com_google_absl//absl/strings", "@com_google_absl//absl/time", gtest, + "//test/util:epoll_util", + "//test/util:eventfd_util", "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", diff --git a/test/syscalls/linux/flock.cc b/test/syscalls/linux/flock.cc index 3ecb8db8e..638a93979 100644 --- a/test/syscalls/linux/flock.cc +++ b/test/syscalls/linux/flock.cc @@ -21,6 +21,7 @@ #include "absl/time/clock.h" #include "absl/time/time.h" #include "test/syscalls/linux/file_base.h" +#include "test/syscalls/linux/socket_test_util.h" #include "test/util/file_descriptor.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" @@ -34,11 +35,6 @@ namespace { class FlockTest : public FileTest {}; -TEST_F(FlockTest, BadFD) { - // EBADF: fd is not an open file descriptor. - ASSERT_THAT(flock(-1, 0), SyscallFailsWithErrno(EBADF)); -} - TEST_F(FlockTest, InvalidOpCombinations) { // The operation cannot be both exclusive and shared. EXPECT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_SH | LOCK_NB), @@ -57,15 +53,6 @@ TEST_F(FlockTest, NoOperationSpecified) { SyscallFailsWithErrno(EINVAL)); } -TEST(FlockTestNoFixture, FlockSupportsPipes) { - int fds[2]; - ASSERT_THAT(pipe(fds), SyscallSucceeds()); - - EXPECT_THAT(flock(fds[0], LOCK_EX | LOCK_NB), SyscallSucceeds()); - EXPECT_THAT(close(fds[0]), SyscallSucceeds()); - EXPECT_THAT(close(fds[1]), SyscallSucceeds()); -} - TEST_F(FlockTest, TestSimpleExLock) { // Test that we can obtain an exclusive lock (no other holders) // and that we can unlock it. @@ -583,6 +570,66 @@ TEST_F(FlockTest, BlockingLockFirstExclusiveSecondExclusive_NoRandomSave) { EXPECT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceeds()); } +TEST(FlockTestNoFixture, BadFD) { + // EBADF: fd is not an open file descriptor. + ASSERT_THAT(flock(-1, 0), SyscallFailsWithErrno(EBADF)); +} + +TEST(FlockTestNoFixture, FlockDir) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY, 0000)); + EXPECT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceeds()); +} + +TEST(FlockTestNoFixture, FlockSymlink) { + // TODO(gvisor.dev/issue/2782): Replace with IsRunningWithVFS1() when O_PATH + // is supported. + SKIP_IF(IsRunningOnGvisor()); + + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto symlink = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(GetAbsoluteTestTmpdir(), file.path())); + + auto fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(symlink.path(), O_RDONLY | O_PATH, 0000)); + EXPECT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallFailsWithErrno(EBADF)); +} + +TEST(FlockTestNoFixture, FlockProc) { + auto fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/status", O_RDONLY, 0000)); + EXPECT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceeds()); +} + +TEST(FlockTestNoFixture, FlockPipe) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + EXPECT_THAT(flock(fds[0], LOCK_EX | LOCK_NB), SyscallSucceeds()); + // Check that the pipe was locked above. + EXPECT_THAT(flock(fds[1], LOCK_EX | LOCK_NB), SyscallFailsWithErrno(EAGAIN)); + + EXPECT_THAT(flock(fds[0], LOCK_UN), SyscallSucceeds()); + EXPECT_THAT(flock(fds[1], LOCK_EX | LOCK_NB), SyscallSucceeds()); + + EXPECT_THAT(close(fds[0]), SyscallSucceeds()); + EXPECT_THAT(close(fds[1]), SyscallSucceeds()); +} + +TEST(FlockTestNoFixture, FlockSocket) { + int sock = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_THAT(sock, SyscallSucceeds()); + + struct sockaddr_un addr = + ASSERT_NO_ERRNO_AND_VALUE(UniqueUnixAddr(true /* abstract */, AF_UNIX)); + ASSERT_THAT( + bind(sock, reinterpret_cast(&addr), sizeof(addr)), + SyscallSucceeds()); + + EXPECT_THAT(flock(sock, LOCK_EX | LOCK_NB), SyscallSucceeds()); + EXPECT_THAT(close(sock), SyscallSucceeds()); +} + } // namespace } // namespace testing -- cgit v1.2.3 From a5a4f804879f8d1b5e6de6005aef6d3e14e7dca2 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 10 Jun 2020 13:25:17 -0700 Subject: socket/unix: handle sendto address argument for connected sockets In case of SOCK_SEQPACKET, it has to be ignored. In case of SOCK_STREAM, EISCONN or EOPNOTSUPP has to be returned. PiperOrigin-RevId: 315755972 --- pkg/sentry/socket/unix/unix.go | 26 ++++++++++++++++++-------- test/syscalls/linux/socket_unix_seqpacket.cc | 18 ++++++++++++++++++ test/syscalls/linux/socket_unix_stream.cc | 14 ++++++++++++++ 3 files changed, 50 insertions(+), 8 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index c4c9db81b..4bb2b6ff4 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -459,15 +459,25 @@ func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []b To: nil, } if len(to) > 0 { - ep, err := extractEndpoint(t, to) - if err != nil { - return 0, err - } - defer ep.Release() - w.To = ep + switch s.stype { + case linux.SOCK_SEQPACKET: + to = nil + case linux.SOCK_STREAM: + if s.State() == linux.SS_CONNECTED { + return 0, syserr.ErrAlreadyConnected + } + return 0, syserr.ErrNotSupported + default: + ep, err := extractEndpoint(t, to) + if err != nil { + return 0, err + } + defer ep.Release() + w.To = ep - if ep.Passcred() && w.Control.Credentials == nil { - w.Control.Credentials = control.MakeCreds(t) + if ep.Passcred() && w.Control.Credentials == nil { + w.Control.Credentials = control.MakeCreds(t) + } } } diff --git a/test/syscalls/linux/socket_unix_seqpacket.cc b/test/syscalls/linux/socket_unix_seqpacket.cc index 84d3a569e..6d03df4d9 100644 --- a/test/syscalls/linux/socket_unix_seqpacket.cc +++ b/test/syscalls/linux/socket_unix_seqpacket.cc @@ -43,6 +43,24 @@ TEST_P(SeqpacketUnixSocketPairTest, ReadOneSideClosed) { SyscallSucceedsWithValue(0)); } +TEST_P(SeqpacketUnixSocketPairTest, Sendto) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct sockaddr_un addr = {}; + addr.sun_family = AF_UNIX; + constexpr char kPath[] = "\0nonexistent"; + memcpy(addr.sun_path, kPath, sizeof(kPath)); + + constexpr char kStr[] = "abc"; + ASSERT_THAT(sendto(sockets->second_fd(), kStr, 3, 0, (struct sockaddr*)&addr, + sizeof(addr)), + SyscallSucceedsWithValue(3)); + + char data[10] = {}; + ASSERT_THAT(read(sockets->first_fd(), data, sizeof(data)), + SyscallSucceedsWithValue(3)); +} + } // namespace } // namespace testing diff --git a/test/syscalls/linux/socket_unix_stream.cc b/test/syscalls/linux/socket_unix_stream.cc index 563467365..99e77b89e 100644 --- a/test/syscalls/linux/socket_unix_stream.cc +++ b/test/syscalls/linux/socket_unix_stream.cc @@ -89,6 +89,20 @@ TEST_P(StreamUnixSocketPairTest, ReadOneSideClosedWithUnreadData) { SyscallFailsWithErrno(ECONNRESET)); } +TEST_P(StreamUnixSocketPairTest, Sendto) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + struct sockaddr_un addr = {}; + addr.sun_family = AF_UNIX; + constexpr char kPath[] = "\0nonexistent"; + memcpy(addr.sun_path, kPath, sizeof(kPath)); + + constexpr char kStr[] = "abc"; + ASSERT_THAT(sendto(sockets->second_fd(), kStr, 3, 0, (struct sockaddr*)&addr, + sizeof(addr)), + SyscallFailsWithErrno(EISCONN)); +} + INSTANTIATE_TEST_SUITE_P( AllUnixDomainSockets, StreamUnixSocketPairTest, ::testing::ValuesIn(IncludeReversals(VecCat( -- cgit v1.2.3 From 4b9652d63b319414e764696f1b77ee39cd36d96d Mon Sep 17 00:00:00 2001 From: Nayana Bidari Date: Wed, 10 Jun 2020 13:36:02 -0700 Subject: {S,G}etsockopt for TCP_KEEPCNT option. TCP_KEEPCNT is used to set the maximum keepalive probes to be sent before dropping the connection. WANT_LGTM=jchacon PiperOrigin-RevId: 315758094 --- pkg/abi/linux/tcp.go | 1 + pkg/sentry/socket/netstack/netstack.go | 35 ++++++++----- test/syscalls/linux/socket_ip_tcp_generic.cc | 73 ++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 11 deletions(-) (limited to 'test') diff --git a/pkg/abi/linux/tcp.go b/pkg/abi/linux/tcp.go index 174d470e2..2a8d4708b 100644 --- a/pkg/abi/linux/tcp.go +++ b/pkg/abi/linux/tcp.go @@ -57,4 +57,5 @@ const ( const ( MAX_TCP_KEEPIDLE = 32767 MAX_TCP_KEEPINTVL = 32767 + MAX_TCP_KEEPCNT = 127 ) diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index e1e0c5931..738277391 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -1246,6 +1246,18 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa return int32(time.Duration(v) / time.Second), nil + case linux.TCP_KEEPCNT: + if outLen < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + + v, err := ep.GetSockOptInt(tcpip.KeepaliveCountOption) + if err != nil { + return nil, syserr.TranslateNetstackError(err) + } + + return int32(v), nil + case linux.TCP_USER_TIMEOUT: if outLen < sizeOfInt32 { return nil, syserr.ErrInvalidArgument @@ -1786,6 +1798,17 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) * } return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.KeepaliveIntervalOption(time.Second * time.Duration(v)))) + case linux.TCP_KEEPCNT: + if len(optVal) < sizeOfInt32 { + return syserr.ErrInvalidArgument + } + + v := usermem.ByteOrder.Uint32(optVal) + if v < 1 || v > linux.MAX_TCP_KEEPCNT { + return syserr.ErrInvalidArgument + } + return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.KeepaliveCountOption, int(v))) + case linux.TCP_USER_TIMEOUT: if len(optVal) < sizeOfInt32 { return syserr.ErrInvalidArgument @@ -2115,30 +2138,20 @@ func emitUnimplementedEventTCP(t *kernel.Task, name int) { switch name { case linux.TCP_CONGESTION, linux.TCP_CORK, - linux.TCP_DEFER_ACCEPT, linux.TCP_FASTOPEN, linux.TCP_FASTOPEN_CONNECT, linux.TCP_FASTOPEN_KEY, linux.TCP_FASTOPEN_NO_COOKIE, - linux.TCP_KEEPCNT, - linux.TCP_KEEPIDLE, - linux.TCP_KEEPINTVL, - linux.TCP_LINGER2, - linux.TCP_MAXSEG, linux.TCP_QUEUE_SEQ, - linux.TCP_QUICKACK, linux.TCP_REPAIR, linux.TCP_REPAIR_QUEUE, linux.TCP_REPAIR_WINDOW, linux.TCP_SAVED_SYN, linux.TCP_SAVE_SYN, - linux.TCP_SYNCNT, linux.TCP_THIN_DUPACK, linux.TCP_THIN_LINEAR_TIMEOUTS, linux.TCP_TIMESTAMP, - linux.TCP_ULP, - linux.TCP_USER_TIMEOUT, - linux.TCP_WINDOW_CLAMP: + linux.TCP_ULP: t.Kernel().EmitUnimplementedEvent(t) } diff --git a/test/syscalls/linux/socket_ip_tcp_generic.cc b/test/syscalls/linux/socket_ip_tcp_generic.cc index fa81845fd..15adc8d0e 100644 --- a/test/syscalls/linux/socket_ip_tcp_generic.cc +++ b/test/syscalls/linux/socket_ip_tcp_generic.cc @@ -524,6 +524,7 @@ TEST_P(TCPSocketPairTest, SetTCPKeepintvlZero) { // Copied from include/net/tcp.h. constexpr int MAX_TCP_KEEPIDLE = 32767; constexpr int MAX_TCP_KEEPINTVL = 32767; +constexpr int MAX_TCP_KEEPCNT = 127; TEST_P(TCPSocketPairTest, SetTCPKeepidleAboveMax) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); @@ -575,6 +576,78 @@ TEST_P(TCPSocketPairTest, SetTCPKeepintvlToMax) { EXPECT_EQ(get, MAX_TCP_KEEPINTVL); } +TEST_P(TCPSocketPairTest, TCPKeepcountDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, 9); // 9 keepalive probes. +} + +TEST_P(TCPSocketPairTest, SetTCPKeepcountZero) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr int kZero = 0; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT, &kZero, + sizeof(kZero)), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(TCPSocketPairTest, SetTCPKeepcountAboveMax) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + constexpr int kAboveMax = MAX_TCP_KEEPCNT + 1; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT, + &kAboveMax, sizeof(kAboveMax)), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_P(TCPSocketPairTest, SetTCPKeepcountToMax) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT, + &MAX_TCP_KEEPCNT, sizeof(MAX_TCP_KEEPCNT)), + SyscallSucceedsWithValue(0)); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, MAX_TCP_KEEPCNT); +} + +TEST_P(TCPSocketPairTest, SetTCPKeepcountToOne) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int keepaliveCount = 1; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT, + &keepaliveCount, sizeof(keepaliveCount)), + SyscallSucceedsWithValue(0)); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT( + getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT, &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, keepaliveCount); +} + +TEST_P(TCPSocketPairTest, SetTCPKeepcountToNegative) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int keepaliveCount = -5; + EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_KEEPCNT, + &keepaliveCount, sizeof(keepaliveCount)), + SyscallFailsWithErrno(EINVAL)); +} + TEST_P(TCPSocketPairTest, SetOOBInline) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); -- cgit v1.2.3 From 9338854ea31059d6b6b5bf59a12512455b632f49 Mon Sep 17 00:00:00 2001 From: Nayana Bidari Date: Wed, 10 Jun 2020 15:05:20 -0700 Subject: Fix the error code for syscall test with null TOS. The setsockopt with nullptr can fail with either EFAULT or zero. PiperOrigin-RevId: 315777107 --- test/syscalls/linux/socket_ip_unbound.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/socket_ip_unbound.cc b/test/syscalls/linux/socket_ip_unbound.cc index ca597e267..af8dda19c 100644 --- a/test/syscalls/linux/socket_ip_unbound.cc +++ b/test/syscalls/linux/socket_ip_unbound.cc @@ -377,8 +377,11 @@ TEST_P(IPUnboundSocketTest, NullTOS) { // // Linux's implementation would need fixing as passing a nullptr as optval // and non-zero optlen may not be valid. - EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, nullptr, set_sz), - SyscallSucceedsWithValue(0)); + // TODO(b/158666797): Combine the gVisor and linux cases for IPv6. + // Some kernel versions return EFAULT, so we handle both. + EXPECT_THAT( + setsockopt(socket->get(), t.level, t.option, nullptr, set_sz), + AnyOf(SyscallFailsWithErrno(EFAULT), SyscallSucceedsWithValue(0))); } } socklen_t get_sz = sizeof(int); -- cgit v1.2.3 From a085e562d0592bccc99e9e0380706a8025f70d53 Mon Sep 17 00:00:00 2001 From: Ian Gudger Date: Wed, 10 Jun 2020 23:48:03 -0700 Subject: Add support for SO_REUSEADDR to UDP sockets/endpoints. On UDP sockets, SO_REUSEADDR allows multiple sockets to bind to the same address, but only delivers packets to the most recently bound socket. This differs from the behavior of SO_REUSEADDR on TCP sockets. SO_REUSEADDR for TCP sockets will likely need an almost completely independent implementation. SO_REUSEADDR has some odd interactions with the similar SO_REUSEPORT. These interactions are tested fairly extensively and all but one particularly odd one (that honestly seems like a bug) behave the same on gVisor and Linux. PiperOrigin-RevId: 315844832 --- pkg/tcpip/ports/ports.go | 116 ++++++++++++++------- pkg/tcpip/stack/BUILD | 1 + pkg/tcpip/stack/stack.go | 12 +-- pkg/tcpip/stack/transport_demuxer.go | 95 ++++++++--------- pkg/tcpip/stack/transport_demuxer_test.go | 3 +- pkg/tcpip/stack/transport_test.go | 7 +- pkg/tcpip/transport/icmp/BUILD | 1 + pkg/tcpip/transport/icmp/endpoint.go | 7 +- pkg/tcpip/transport/tcp/accept.go | 4 +- pkg/tcpip/transport/tcp/endpoint.go | 19 +++- pkg/tcpip/transport/udp/endpoint.go | 42 +++++--- pkg/tcpip/transport/udp/forwarder.go | 3 +- .../linux/socket_bind_to_device_sequence.cc | 29 ++++-- test/syscalls/linux/socket_inet_loopback.cc | 9 -- test/syscalls/linux/socket_ip_udp_generic.cc | 6 -- test/syscalls/linux/socket_ip_unbound.cc | 56 +++++++--- test/syscalls/linux/socket_ipv4_udp_unbound.cc | 23 ++-- 17 files changed, 255 insertions(+), 178 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/ports/ports.go b/pkg/tcpip/ports/ports.go index b937cb84b..edc29ad27 100644 --- a/pkg/tcpip/ports/ports.go +++ b/pkg/tcpip/ports/ports.go @@ -54,17 +54,27 @@ type Flags struct { LoadBalanced bool } -func (f Flags) bits() reuseFlag { - var rf reuseFlag +// Bits converts the Flags to their bitset form. +func (f Flags) Bits() BitFlags { + var rf BitFlags if f.MostRecent { - rf |= mostRecentFlag + rf |= MostRecentFlag } if f.LoadBalanced { - rf |= loadBalancedFlag + rf |= LoadBalancedFlag } return rf } +// Effective returns the effective behavior of a flag config. +func (f Flags) Effective() Flags { + e := f + if e.LoadBalanced && e.MostRecent { + e.MostRecent = false + } + return e +} + // PortManager manages allocating, reserving and releasing ports. type PortManager struct { mu sync.RWMutex @@ -78,56 +88,88 @@ type PortManager struct { hint uint32 } -type reuseFlag int +// BitFlags is a bitset representation of Flags. +type BitFlags uint32 const ( - mostRecentFlag reuseFlag = 1 << iota - loadBalancedFlag + // MostRecentFlag represents Flags.MostRecent. + MostRecentFlag BitFlags = 1 << iota + + // LoadBalancedFlag represents Flags.LoadBalanced. + LoadBalancedFlag + + // nextFlag is the value that the next added flag will have. + // + // It is used to calculate FlagMask below. It is also the number of + // valid flag states. nextFlag - flagMask = nextFlag - 1 + // FlagMask is a bit mask for BitFlags. + FlagMask = nextFlag - 1 ) -type portNode struct { - // refs stores the count for each possible flag combination. +// ToFlags converts the bitset into a Flags struct. +func (f BitFlags) ToFlags() Flags { + return Flags{ + MostRecent: f&MostRecentFlag != 0, + LoadBalanced: f&LoadBalancedFlag != 0, + } +} + +// FlagCounter counts how many references each flag combination has. +type FlagCounter struct { + // refs stores the count for each possible flag combination, (0 though + // FlagMask). refs [nextFlag]int } -func (p portNode) totalRefs() int { +// AddRef increases the reference count for a specific flag combination. +func (c *FlagCounter) AddRef(flags BitFlags) { + c.refs[flags]++ +} + +// DropRef decreases the reference count for a specific flag combination. +func (c *FlagCounter) DropRef(flags BitFlags) { + c.refs[flags]-- +} + +// TotalRefs calculates the total number of references for all flag +// combinations. +func (c FlagCounter) TotalRefs() int { var total int - for _, r := range p.refs { + for _, r := range c.refs { total += r } return total } -// flagRefs returns the number of references with all specified flags. -func (p portNode) flagRefs(flags reuseFlag) int { +// FlagRefs returns the number of references with all specified flags. +func (c FlagCounter) FlagRefs(flags BitFlags) int { var total int - for i, r := range p.refs { - if reuseFlag(i)&flags == flags { + for i, r := range c.refs { + if BitFlags(i)&flags == flags { total += r } } return total } -// allRefsHave returns if all references have all specified flags. -func (p portNode) allRefsHave(flags reuseFlag) bool { - for i, r := range p.refs { - if reuseFlag(i)&flags == flags && r > 0 { +// AllRefsHave returns if all references have all specified flags. +func (c FlagCounter) AllRefsHave(flags BitFlags) bool { + for i, r := range c.refs { + if BitFlags(i)&flags != flags && r > 0 { return false } } return true } -// intersectionRefs returns the set of flags shared by all references. -func (p portNode) intersectionRefs() reuseFlag { - intersection := flagMask - for i, r := range p.refs { +// IntersectionRefs returns the set of flags shared by all references. +func (c FlagCounter) IntersectionRefs() BitFlags { + intersection := FlagMask + for i, r := range c.refs { if r > 0 { - intersection &= reuseFlag(i) + intersection &= BitFlags(i) } } return intersection @@ -135,26 +177,26 @@ func (p portNode) intersectionRefs() reuseFlag { // deviceNode is never empty. When it has no elements, it is removed from the // map that references it. -type deviceNode map[tcpip.NICID]portNode +type deviceNode map[tcpip.NICID]FlagCounter // isAvailable checks whether binding is possible by device. If not binding to a -// device, check against all portNodes. If binding to a specific device, check +// device, check against all FlagCounters. If binding to a specific device, check // against the unspecified device and the provided device. // // If either of the port reuse flags is enabled on any of the nodes, all nodes // sharing a port must share at least one reuse flag. This matches Linux's // behavior. func (d deviceNode) isAvailable(flags Flags, bindToDevice tcpip.NICID) bool { - flagBits := flags.bits() + flagBits := flags.Bits() if bindToDevice == 0 { // Trying to binding all devices. if flagBits == 0 { // Can't bind because the (addr,port) is already bound. return false } - intersection := flagMask + intersection := FlagMask for _, p := range d { - i := p.intersectionRefs() + i := p.IntersectionRefs() intersection &= i if intersection&flagBits == 0 { // Can't bind because the (addr,port) was @@ -165,17 +207,17 @@ func (d deviceNode) isAvailable(flags Flags, bindToDevice tcpip.NICID) bool { return true } - intersection := flagMask + intersection := FlagMask if p, ok := d[0]; ok { - intersection = p.intersectionRefs() + intersection = p.IntersectionRefs() if intersection&flagBits == 0 { return false } } if p, ok := d[bindToDevice]; ok { - i := p.intersectionRefs() + i := p.IntersectionRefs() intersection &= i if intersection&flagBits == 0 { return false @@ -324,7 +366,7 @@ func (s *PortManager) reserveSpecificPort(networks []tcpip.NetworkProtocolNumber if !s.isPortAvailableLocked(networks, transport, addr, port, flags, bindToDevice) { return false } - flagBits := flags.bits() + flagBits := flags.Bits() // Reserve port on all network protocols. for _, network := range networks { @@ -340,7 +382,7 @@ func (s *PortManager) reserveSpecificPort(networks []tcpip.NetworkProtocolNumber m[addr] = d } n := d[bindToDevice] - n.refs[flagBits]++ + n.AddRef(flagBits) d[bindToDevice] = n } @@ -353,7 +395,7 @@ func (s *PortManager) ReleasePort(networks []tcpip.NetworkProtocolNumber, transp s.mu.Lock() defer s.mu.Unlock() - flagBits := flags.bits() + flagBits := flags.Bits() for _, network := range networks { desc := portDescriptor{network, transport, port} @@ -368,7 +410,7 @@ func (s *PortManager) ReleasePort(networks []tcpip.NetworkProtocolNumber, transp } n.refs[flagBits]-- d[bindToDevice] = n - if n.refs == [nextFlag]int{} { + if n.TotalRefs() == 0 { delete(d, bindToDevice) } if len(d) == 0 { diff --git a/pkg/tcpip/stack/BUILD b/pkg/tcpip/stack/BUILD index afca925ad..24f52b735 100644 --- a/pkg/tcpip/stack/BUILD +++ b/pkg/tcpip/stack/BUILD @@ -89,6 +89,7 @@ go_test( "//pkg/tcpip/link/loopback", "//pkg/tcpip/network/ipv4", "//pkg/tcpip/network/ipv6", + "//pkg/tcpip/ports", "//pkg/tcpip/transport/icmp", "//pkg/tcpip/transport/udp", "//pkg/waiter", diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index 648791302..a2190341c 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -1404,25 +1404,25 @@ func (s *Stack) RemoveWaker(nicID tcpip.NICID, addr tcpip.Address, waker *sleep. // transport dispatcher. Received packets that match the provided id will be // delivered to the given endpoint; specifying a nic is optional, but // nic-specific IDs have precedence over global ones. -func (s *Stack) RegisterTransportEndpoint(nicID tcpip.NICID, netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, reusePort bool, bindToDevice tcpip.NICID) *tcpip.Error { - return s.demux.registerEndpoint(netProtos, protocol, id, ep, reusePort, bindToDevice) +func (s *Stack) RegisterTransportEndpoint(nicID tcpip.NICID, netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) *tcpip.Error { + return s.demux.registerEndpoint(netProtos, protocol, id, ep, flags, bindToDevice) } // UnregisterTransportEndpoint removes the endpoint with the given id from the // stack transport dispatcher. -func (s *Stack) UnregisterTransportEndpoint(nicID tcpip.NICID, netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, bindToDevice tcpip.NICID) { - s.demux.unregisterEndpoint(netProtos, protocol, id, ep, bindToDevice) +func (s *Stack) UnregisterTransportEndpoint(nicID tcpip.NICID, netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) { + s.demux.unregisterEndpoint(netProtos, protocol, id, ep, flags, bindToDevice) } // StartTransportEndpointCleanup removes the endpoint with the given id from // the stack transport dispatcher. It also transitions it to the cleanup stage. -func (s *Stack) StartTransportEndpointCleanup(nicID tcpip.NICID, netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, bindToDevice tcpip.NICID) { +func (s *Stack) StartTransportEndpointCleanup(nicID tcpip.NICID, netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) { s.mu.Lock() defer s.mu.Unlock() s.cleanupEndpoints[ep] = struct{}{} - s.demux.unregisterEndpoint(netProtos, protocol, id, ep, bindToDevice) + s.demux.unregisterEndpoint(netProtos, protocol, id, ep, flags, bindToDevice) } // CompleteTransportEndpointCleanup removes the endpoint from the cleanup diff --git a/pkg/tcpip/stack/transport_demuxer.go b/pkg/tcpip/stack/transport_demuxer.go index e09866405..118b449d5 100644 --- a/pkg/tcpip/stack/transport_demuxer.go +++ b/pkg/tcpip/stack/transport_demuxer.go @@ -15,7 +15,6 @@ package stack import ( - "container/heap" "fmt" "math/rand" @@ -23,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/hash/jenkins" "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/ports" ) type protocolIDs struct { @@ -43,14 +43,14 @@ type transportEndpoints struct { // unregisterEndpoint unregisters the endpoint with the given id such that it // won't receive any more packets. -func (eps *transportEndpoints) unregisterEndpoint(id TransportEndpointID, ep TransportEndpoint, bindToDevice tcpip.NICID) { +func (eps *transportEndpoints) unregisterEndpoint(id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) { eps.mu.Lock() defer eps.mu.Unlock() epsByNIC, ok := eps.endpoints[id] if !ok { return } - if !epsByNIC.unregisterEndpoint(bindToDevice, ep) { + if !epsByNIC.unregisterEndpoint(bindToDevice, ep, flags) { return } delete(eps.endpoints, id) @@ -204,7 +204,7 @@ func (epsByNIC *endpointsByNIC) handleControlPacket(n *NIC, id TransportEndpoint // registerEndpoint returns true if it succeeds. It fails and returns // false if ep already has an element with the same key. -func (epsByNIC *endpointsByNIC) registerEndpoint(d *transportDemuxer, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, t TransportEndpoint, reusePort bool, bindToDevice tcpip.NICID) *tcpip.Error { +func (epsByNIC *endpointsByNIC) registerEndpoint(d *transportDemuxer, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, t TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) *tcpip.Error { epsByNIC.mu.Lock() defer epsByNIC.mu.Unlock() @@ -214,23 +214,22 @@ func (epsByNIC *endpointsByNIC) registerEndpoint(d *transportDemuxer, netProto t demux: d, netProto: netProto, transProto: transProto, - reuse: reusePort, } epsByNIC.endpoints[bindToDevice] = multiPortEp } - return multiPortEp.singleRegisterEndpoint(t, reusePort) + return multiPortEp.singleRegisterEndpoint(t, flags) } // unregisterEndpoint returns true if endpointsByNIC has to be unregistered. -func (epsByNIC *endpointsByNIC) unregisterEndpoint(bindToDevice tcpip.NICID, t TransportEndpoint) bool { +func (epsByNIC *endpointsByNIC) unregisterEndpoint(bindToDevice tcpip.NICID, t TransportEndpoint, flags ports.Flags) bool { epsByNIC.mu.Lock() defer epsByNIC.mu.Unlock() multiPortEp, ok := epsByNIC.endpoints[bindToDevice] if !ok { return false } - if multiPortEp.unregisterEndpoint(t) { + if multiPortEp.unregisterEndpoint(t, flags) { delete(epsByNIC.endpoints, bindToDevice) } return len(epsByNIC.endpoints) == 0 @@ -279,10 +278,10 @@ func newTransportDemuxer(stack *Stack) *transportDemuxer { // registerEndpoint registers the given endpoint with the dispatcher such that // packets that match the endpoint ID are delivered to it. -func (d *transportDemuxer) registerEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, reusePort bool, bindToDevice tcpip.NICID) *tcpip.Error { +func (d *transportDemuxer) registerEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) *tcpip.Error { for i, n := range netProtos { - if err := d.singleRegisterEndpoint(n, protocol, id, ep, reusePort, bindToDevice); err != nil { - d.unregisterEndpoint(netProtos[:i], protocol, id, ep, bindToDevice) + if err := d.singleRegisterEndpoint(n, protocol, id, ep, flags, bindToDevice); err != nil { + d.unregisterEndpoint(netProtos[:i], protocol, id, ep, flags, bindToDevice) return err } } @@ -290,35 +289,6 @@ func (d *transportDemuxer) registerEndpoint(netProtos []tcpip.NetworkProtocolNum return nil } -type transportEndpointHeap []TransportEndpoint - -var _ heap.Interface = (*transportEndpointHeap)(nil) - -func (h *transportEndpointHeap) Len() int { - return len(*h) -} - -func (h *transportEndpointHeap) Less(i, j int) bool { - return (*h)[i].UniqueID() < (*h)[j].UniqueID() -} - -func (h *transportEndpointHeap) Swap(i, j int) { - (*h)[i], (*h)[j] = (*h)[j], (*h)[i] -} - -func (h *transportEndpointHeap) Push(x interface{}) { - *h = append(*h, x.(TransportEndpoint)) -} - -func (h *transportEndpointHeap) Pop() interface{} { - old := *h - n := len(old) - x := old[n-1] - old[n-1] = nil - *h = old[:n-1] - return x -} - // multiPortEndpoint is a container for TransportEndpoints which are bound to // the same pair of address and port. endpointsArr always has at least one // element. @@ -334,9 +304,10 @@ type multiPortEndpoint struct { netProto tcpip.NetworkProtocolNumber transProto tcpip.TransportProtocolNumber - endpoints transportEndpointHeap - // reuse indicates if more than one endpoint is allowed. - reuse bool + // endpoints stores the transport endpoints in the order in which they + // were bound. This is required for UDP SO_REUSEADDR. + endpoints []TransportEndpoint + flags ports.FlagCounter } func (ep *multiPortEndpoint) transportEndpoints() []TransportEndpoint { @@ -362,6 +333,10 @@ func selectEndpoint(id TransportEndpointID, mpep *multiPortEndpoint, seed uint32 return mpep.endpoints[0] } + if mpep.flags.IntersectionRefs().ToFlags().Effective().MostRecent { + return mpep.endpoints[len(mpep.endpoints)-1] + } + payload := []byte{ byte(id.LocalPort), byte(id.LocalPort >> 8), @@ -401,40 +376,47 @@ func (ep *multiPortEndpoint) handlePacketAll(r *Route, id TransportEndpointID, p // singleRegisterEndpoint tries to add an endpoint to the multiPortEndpoint // list. The list might be empty already. -func (ep *multiPortEndpoint) singleRegisterEndpoint(t TransportEndpoint, reusePort bool) *tcpip.Error { +func (ep *multiPortEndpoint) singleRegisterEndpoint(t TransportEndpoint, flags ports.Flags) *tcpip.Error { ep.mu.Lock() defer ep.mu.Unlock() + bits := flags.Bits() + if len(ep.endpoints) != 0 { // If it was previously bound, we need to check if we can bind again. - if !ep.reuse || !reusePort { + if ep.flags.TotalRefs() > 0 && bits&ep.flags.IntersectionRefs() == 0 { return tcpip.ErrPortInUse } } - heap.Push(&ep.endpoints, t) + ep.endpoints = append(ep.endpoints, t) + ep.flags.AddRef(bits) return nil } // unregisterEndpoint returns true if multiPortEndpoint has to be unregistered. -func (ep *multiPortEndpoint) unregisterEndpoint(t TransportEndpoint) bool { +func (ep *multiPortEndpoint) unregisterEndpoint(t TransportEndpoint, flags ports.Flags) bool { ep.mu.Lock() defer ep.mu.Unlock() for i, endpoint := range ep.endpoints { if endpoint == t { - heap.Remove(&ep.endpoints, i) + copy(ep.endpoints[i:], ep.endpoints[i+1:]) + ep.endpoints[len(ep.endpoints)-1] = nil + ep.endpoints = ep.endpoints[:len(ep.endpoints)-1] + + ep.flags.DropRef(flags.Bits()) break } } return len(ep.endpoints) == 0 } -func (d *transportDemuxer) singleRegisterEndpoint(netProto tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, reusePort bool, bindToDevice tcpip.NICID) *tcpip.Error { +func (d *transportDemuxer) singleRegisterEndpoint(netProto tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) *tcpip.Error { if id.RemotePort != 0 { - // TODO(eyalsoha): Why? - reusePort = false + // SO_REUSEPORT only applies to bound/listening endpoints. + flags.LoadBalanced = false } eps, ok := d.protocol[protocolIDs{netProto, protocol}] @@ -454,15 +436,20 @@ func (d *transportDemuxer) singleRegisterEndpoint(netProto tcpip.NetworkProtocol eps.endpoints[id] = epsByNIC } - return epsByNIC.registerEndpoint(d, netProto, protocol, ep, reusePort, bindToDevice) + return epsByNIC.registerEndpoint(d, netProto, protocol, ep, flags, bindToDevice) } // unregisterEndpoint unregisters the endpoint with the given id such that it // won't receive any more packets. -func (d *transportDemuxer) unregisterEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, bindToDevice tcpip.NICID) { +func (d *transportDemuxer) unregisterEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) { + if id.RemotePort != 0 { + // SO_REUSEPORT only applies to bound/listening endpoints. + flags.LoadBalanced = false + } + for _, n := range netProtos { if eps, ok := d.protocol[protocolIDs{n, protocol}]; ok { - eps.unregisterEndpoint(id, ep, bindToDevice) + eps.unregisterEndpoint(id, ep, flags, bindToDevice) } } } diff --git a/pkg/tcpip/stack/transport_demuxer_test.go b/pkg/tcpip/stack/transport_demuxer_test.go index 67d778137..73dada928 100644 --- a/pkg/tcpip/stack/transport_demuxer_test.go +++ b/pkg/tcpip/stack/transport_demuxer_test.go @@ -25,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/link/channel" "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" + "gvisor.dev/gvisor/pkg/tcpip/ports" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/tcpip/transport/udp" "gvisor.dev/gvisor/pkg/waiter" @@ -195,7 +196,7 @@ func TestTransportDemuxerRegister(t *testing.T) { if !ok { t.Fatalf("%T does not implement stack.TransportEndpoint", ep) } - if got, want := s.RegisterTransportEndpoint(0, []tcpip.NetworkProtocolNumber{test.proto}, udp.ProtocolNumber, stack.TransportEndpointID{}, tEP, false, 0), test.want; got != want { + if got, want := s.RegisterTransportEndpoint(0, []tcpip.NetworkProtocolNumber{test.proto}, udp.ProtocolNumber, stack.TransportEndpointID{}, tEP, ports.Flags{}, 0), test.want; got != want { t.Fatalf("s.RegisterTransportEndpoint(...) = %s, want %s", got, want) } }) diff --git a/pkg/tcpip/stack/transport_test.go b/pkg/tcpip/stack/transport_test.go index ad61c09d6..7e8b84867 100644 --- a/pkg/tcpip/stack/transport_test.go +++ b/pkg/tcpip/stack/transport_test.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/link/channel" "gvisor.dev/gvisor/pkg/tcpip/link/loopback" + "gvisor.dev/gvisor/pkg/tcpip/ports" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/waiter" ) @@ -154,7 +155,7 @@ func (f *fakeTransportEndpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { // Try to register so that we can start receiving packets. f.ID.RemoteAddress = addr.Addr - err = f.stack.RegisterTransportEndpoint(0, []tcpip.NetworkProtocolNumber{fakeNetNumber}, fakeTransNumber, f.ID, f, false /* reuse */, 0 /* bindToDevice */) + err = f.stack.RegisterTransportEndpoint(0, []tcpip.NetworkProtocolNumber{fakeNetNumber}, fakeTransNumber, f.ID, f, ports.Flags{}, 0 /* bindToDevice */) if err != nil { return err } @@ -199,8 +200,8 @@ func (f *fakeTransportEndpoint) Bind(a tcpip.FullAddress) *tcpip.Error { fakeTransNumber, stack.TransportEndpointID{LocalAddress: a.Addr}, f, - false, /* reuse */ - 0, /* bindtoDevice */ + ports.Flags{}, + 0, /* bindtoDevice */ ); err != nil { return err } diff --git a/pkg/tcpip/transport/icmp/BUILD b/pkg/tcpip/transport/icmp/BUILD index 9ce625c17..7e5c79776 100644 --- a/pkg/tcpip/transport/icmp/BUILD +++ b/pkg/tcpip/transport/icmp/BUILD @@ -31,6 +31,7 @@ go_library( "//pkg/tcpip", "//pkg/tcpip/buffer", "//pkg/tcpip/header", + "//pkg/tcpip/ports", "//pkg/tcpip/stack", "//pkg/tcpip/transport/raw", "//pkg/tcpip/transport/tcp", diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go index 57e0a069b..8ce294002 100644 --- a/pkg/tcpip/transport/icmp/endpoint.go +++ b/pkg/tcpip/transport/icmp/endpoint.go @@ -19,6 +19,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/ports" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/waiter" ) @@ -110,7 +111,7 @@ func (e *endpoint) Close() { e.shutdownFlags = tcpip.ShutdownRead | tcpip.ShutdownWrite switch e.state { case stateBound, stateConnected: - e.stack.UnregisterTransportEndpoint(e.RegisterNICID, []tcpip.NetworkProtocolNumber{e.NetProto}, e.TransProto, e.ID, e, 0 /* bindToDevice */) + e.stack.UnregisterTransportEndpoint(e.RegisterNICID, []tcpip.NetworkProtocolNumber{e.NetProto}, e.TransProto, e.ID, e, ports.Flags{}, 0 /* bindToDevice */) } // Close the receive list and drain it. @@ -607,14 +608,14 @@ func (e *endpoint) registerWithStack(nicID tcpip.NICID, netProtos []tcpip.Networ if id.LocalPort != 0 { // The endpoint already has a local port, just attempt to // register it. - err := e.stack.RegisterTransportEndpoint(nicID, netProtos, e.TransProto, id, e, false /* reuse */, 0 /* bindToDevice */) + err := e.stack.RegisterTransportEndpoint(nicID, netProtos, e.TransProto, id, e, ports.Flags{}, 0 /* bindToDevice */) return id, err } // We need to find a port for the endpoint. _, err := e.stack.PickEphemeralPort(func(p uint16) (bool, *tcpip.Error) { id.LocalPort = p - err := e.stack.RegisterTransportEndpoint(nicID, netProtos, e.TransProto, id, e, false /* reuse */, 0 /* bindtodevice */) + err := e.stack.RegisterTransportEndpoint(nicID, netProtos, e.TransProto, id, e, ports.Flags{}, 0 /* bindtodevice */) switch err { case nil: return true, nil diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go index e6a23c978..ad197e8db 100644 --- a/pkg/tcpip/transport/tcp/accept.go +++ b/pkg/tcpip/transport/tcp/accept.go @@ -27,6 +27,7 @@ import ( "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/ports" "gvisor.dev/gvisor/pkg/tcpip/seqnum" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/waiter" @@ -238,13 +239,14 @@ func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, i n.mu.Lock() // Register new endpoint so that packets are routed to it. - if err := n.stack.RegisterTransportEndpoint(n.boundNICID, n.effectiveNetProtos, ProtocolNumber, n.ID, n, n.reusePort, n.boundBindToDevice); err != nil { + if err := n.stack.RegisterTransportEndpoint(n.boundNICID, n.effectiveNetProtos, ProtocolNumber, n.ID, n, ports.Flags{LoadBalanced: n.reusePort}, n.boundBindToDevice); err != nil { n.mu.Unlock() n.Close() return nil, err } n.isRegistered = true + n.registeredReusePort = n.reusePort return n, nil } diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 19f7bf449..6e4d607da 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -465,6 +465,10 @@ type endpoint struct { // reusePort is set to true if SO_REUSEPORT is enabled. reusePort bool + // registeredReusePort is set if the current endpoint registration was + // done with SO_REUSEPORT enabled. + registeredReusePort bool + // bindToDevice is set to the NIC on which to bind or disabled if 0. bindToDevice tcpip.NICID @@ -1021,8 +1025,9 @@ func (e *endpoint) closeNoShutdownLocked() { // in Listen() when trying to register. if e.EndpointState() == StateListen && e.isPortReserved { if e.isRegistered { - e.stack.StartTransportEndpointCleanup(e.boundNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, e.boundBindToDevice) + e.stack.StartTransportEndpointCleanup(e.boundNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, ports.Flags{LoadBalanced: e.registeredReusePort}, e.boundBindToDevice) e.isRegistered = false + e.registeredReusePort = false } e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, e.boundPortFlags, e.boundBindToDevice) @@ -1086,8 +1091,9 @@ func (e *endpoint) cleanupLocked() { e.workerCleanup = false if e.isRegistered { - e.stack.StartTransportEndpointCleanup(e.boundNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, e.boundBindToDevice) + e.stack.StartTransportEndpointCleanup(e.boundNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, ports.Flags{LoadBalanced: e.registeredReusePort}, e.boundBindToDevice) e.isRegistered = false + e.registeredReusePort = false } if e.isPortReserved { @@ -2088,10 +2094,11 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc if e.ID.LocalPort != 0 { // The endpoint is bound to a port, attempt to register it. - err := e.stack.RegisterTransportEndpoint(nicID, netProtos, ProtocolNumber, e.ID, e, e.reusePort, e.boundBindToDevice) + err := e.stack.RegisterTransportEndpoint(nicID, netProtos, ProtocolNumber, e.ID, e, ports.Flags{LoadBalanced: e.reusePort}, e.boundBindToDevice) if err != nil { return err } + e.registeredReusePort = e.reusePort } else { // The endpoint doesn't have a local port yet, so try to get // one. Make sure that it isn't one that will result in the same @@ -2123,12 +2130,13 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc id := e.ID id.LocalPort = p - switch e.stack.RegisterTransportEndpoint(nicID, netProtos, ProtocolNumber, id, e, e.reusePort, e.bindToDevice) { + switch e.stack.RegisterTransportEndpoint(nicID, netProtos, ProtocolNumber, id, e, ports.Flags{LoadBalanced: e.reusePort}, e.bindToDevice) { case nil: // Port picking successful. Save the details of // the selected port. e.ID = id e.boundBindToDevice = e.bindToDevice + e.registeredReusePort = e.reusePort return true, nil case tcpip.ErrPortInUse: return false, nil @@ -2326,12 +2334,13 @@ func (e *endpoint) listen(backlog int) *tcpip.Error { } // Register the endpoint. - if err := e.stack.RegisterTransportEndpoint(e.boundNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, e.reusePort, e.boundBindToDevice); err != nil { + if err := e.stack.RegisterTransportEndpoint(e.boundNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, ports.Flags{LoadBalanced: e.reusePort}, e.boundBindToDevice); err != nil { return err } e.isRegistered = true e.setEndpointState(StateListen) + e.registeredReusePort = e.reusePort // The channel may be non-nil when we're restoring the endpoint, and it // may be pre-populated with some previously accepted (but not Accepted) diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index c5e3c73ef..df5efbf6a 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -103,7 +103,7 @@ type endpoint struct { multicastAddr tcpip.Address multicastNICID tcpip.NICID multicastLoop bool - reusePort bool + portFlags ports.Flags bindToDevice tcpip.NICID broadcast bool @@ -214,7 +214,7 @@ func (e *endpoint) Close() { switch e.state { case StateBound, StateConnected: - e.stack.UnregisterTransportEndpoint(e.RegisterNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, e.boundBindToDevice) + e.stack.UnregisterTransportEndpoint(e.RegisterNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, e.boundPortFlags, e.boundBindToDevice) e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, e.boundPortFlags, e.boundBindToDevice) e.boundBindToDevice = 0 e.boundPortFlags = ports.Flags{} @@ -558,10 +558,13 @@ func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error { e.mu.Unlock() case tcpip.ReuseAddressOption: + e.mu.Lock() + e.portFlags.MostRecent = v + e.mu.Unlock() case tcpip.ReusePortOption: e.mu.Lock() - e.reusePort = v + e.portFlags.LoadBalanced = v e.mu.Unlock() case tcpip.V6OnlyOption: @@ -795,11 +798,15 @@ func (e *endpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) { return v, nil case tcpip.ReuseAddressOption: - return false, nil + e.mu.RLock() + v := e.portFlags.MostRecent + e.mu.RUnlock() + + return v, nil case tcpip.ReusePortOption: e.mu.RLock() - v := e.reusePort + v := e.portFlags.LoadBalanced e.mu.RUnlock() return v, nil @@ -968,6 +975,11 @@ func (e *endpoint) Disconnect() *tcpip.Error { id stack.TransportEndpointID btd tcpip.NICID ) + + // We change this value below and we need the old value to unregister + // the endpoint. + boundPortFlags := e.boundPortFlags + // Exclude ephemerally bound endpoints. if e.BindNICID != 0 || e.ID.LocalAddress == "" { var err *tcpip.Error @@ -980,16 +992,17 @@ func (e *endpoint) Disconnect() *tcpip.Error { return err } e.state = StateBound + boundPortFlags = e.boundPortFlags } else { if e.ID.LocalPort != 0 { // Release the ephemeral port. - e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, e.boundPortFlags, e.boundBindToDevice) + e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, boundPortFlags, e.boundBindToDevice) e.boundPortFlags = ports.Flags{} } e.state = StateInitial } - e.stack.UnregisterTransportEndpoint(e.RegisterNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, e.boundBindToDevice) + e.stack.UnregisterTransportEndpoint(e.RegisterNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, boundPortFlags, e.boundBindToDevice) e.ID = id e.boundBindToDevice = btd e.route.Release() @@ -1061,6 +1074,8 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { } } + oldPortFlags := e.boundPortFlags + id, btd, err := e.registerWithStack(nicID, netProtos, id) if err != nil { return err @@ -1068,7 +1083,7 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { // Remove the old registration. if e.ID.LocalPort != 0 { - e.stack.UnregisterTransportEndpoint(e.RegisterNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, e.boundBindToDevice) + e.stack.UnregisterTransportEndpoint(e.RegisterNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, oldPortFlags, e.boundBindToDevice) } e.ID = id @@ -1132,20 +1147,15 @@ func (*endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) { func (e *endpoint) registerWithStack(nicID tcpip.NICID, netProtos []tcpip.NetworkProtocolNumber, id stack.TransportEndpointID) (stack.TransportEndpointID, tcpip.NICID, *tcpip.Error) { if e.ID.LocalPort == 0 { - flags := ports.Flags{ - LoadBalanced: e.reusePort, - // FIXME(b/129164367): Support SO_REUSEADDR. - MostRecent: false, - } - port, err := e.stack.ReservePort(netProtos, ProtocolNumber, id.LocalAddress, id.LocalPort, flags, e.bindToDevice) + port, err := e.stack.ReservePort(netProtos, ProtocolNumber, id.LocalAddress, id.LocalPort, e.portFlags, e.bindToDevice) if err != nil { return id, e.bindToDevice, err } - e.boundPortFlags = flags id.LocalPort = port } + e.boundPortFlags = e.portFlags - err := e.stack.RegisterTransportEndpoint(nicID, netProtos, ProtocolNumber, id, e, e.reusePort, e.bindToDevice) + err := e.stack.RegisterTransportEndpoint(nicID, netProtos, ProtocolNumber, id, e, e.boundPortFlags, e.bindToDevice) if err != nil { e.stack.ReleasePort(netProtos, ProtocolNumber, id.LocalAddress, id.LocalPort, e.boundPortFlags, e.bindToDevice) e.boundPortFlags = ports.Flags{} diff --git a/pkg/tcpip/transport/udp/forwarder.go b/pkg/tcpip/transport/udp/forwarder.go index 7abfa0ed2..c67e0ba95 100644 --- a/pkg/tcpip/transport/udp/forwarder.go +++ b/pkg/tcpip/transport/udp/forwarder.go @@ -73,7 +73,7 @@ func (r *ForwarderRequest) ID() stack.TransportEndpointID { // CreateEndpoint creates a connected UDP endpoint for the session request. func (r *ForwarderRequest) CreateEndpoint(queue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { ep := newEndpoint(r.stack, r.route.NetProto, queue) - if err := r.stack.RegisterTransportEndpoint(r.route.NICID(), []tcpip.NetworkProtocolNumber{r.route.NetProto}, ProtocolNumber, r.id, ep, ep.reusePort, ep.bindToDevice); err != nil { + if err := r.stack.RegisterTransportEndpoint(r.route.NICID(), []tcpip.NetworkProtocolNumber{r.route.NetProto}, ProtocolNumber, r.id, ep, ep.portFlags, ep.bindToDevice); err != nil { ep.Close() return nil, err } @@ -82,6 +82,7 @@ func (r *ForwarderRequest) CreateEndpoint(queue *waiter.Queue) (tcpip.Endpoint, ep.route = r.route.Clone() ep.dstPort = r.id.RemotePort ep.RegisterNICID = r.route.NICID() + ep.boundPortFlags = ep.portFlags ep.state = StateConnected diff --git a/test/syscalls/linux/socket_bind_to_device_sequence.cc b/test/syscalls/linux/socket_bind_to_device_sequence.cc index 1967329ee..4b2cb0606 100644 --- a/test/syscalls/linux/socket_bind_to_device_sequence.cc +++ b/test/syscalls/linux/socket_bind_to_device_sequence.cc @@ -363,8 +363,9 @@ TEST_P(BindToDeviceSequenceTest, BindTwiceWithReuseOnce) { } TEST_P(BindToDeviceSequenceTest, BindWithReuseAddr) { - // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. - SKIP_IF(IsRunningOnGvisor()); + // FIXME(b/158253835): Support SO_REUSEADDR on TCP sockets. + SKIP_IF(IsRunningOnGvisor() && + (GetParam().type & SOCK_STREAM) == SOCK_STREAM); ASSERT_NO_FATAL_FAILURE( BindSocket(/* reusePort */ false, /* reuse_addr */ true)); @@ -405,8 +406,9 @@ TEST_P(BindToDeviceSequenceTest, BindReuseAddrReusePortThenReusePort) { } TEST_P(BindToDeviceSequenceTest, BindReuseAddrReusePortThenReuseAddr) { - // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. - SKIP_IF(IsRunningOnGvisor()); + // FIXME(b/158253835): Support SO_REUSEADDR on TCP sockets. + SKIP_IF(IsRunningOnGvisor() && + (GetParam().type & SOCK_STREAM) == SOCK_STREAM); ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, /* reuse_addr */ true, @@ -434,8 +436,9 @@ TEST_P(BindToDeviceSequenceTest, BindDoubleReuseAddrReusePortThenReusePort) { } TEST_P(BindToDeviceSequenceTest, BindDoubleReuseAddrReusePortThenReuseAddr) { - // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. - SKIP_IF(IsRunningOnGvisor()); + // FIXME(b/158253835): Support SO_REUSEADDR on TCP sockets. + SKIP_IF(IsRunningOnGvisor() && + (GetParam().type & SOCK_STREAM) == SOCK_STREAM); ASSERT_NO_FATAL_FAILURE(BindSocket( /* reuse_port */ true, /* reuse_addr */ true, /* bind_to_device */ 0)); @@ -469,10 +472,20 @@ TEST_P(BindToDeviceSequenceTest, BindReuseAddrThenReuseAddr) { /* bind_to_device */ 0, EADDRINUSE)); } -// This behavior seems like a bug? TEST_P(BindToDeviceSequenceTest, BindReuseAddrThenReuseAddrReusePortThenReuseAddr) { - // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. + // The behavior described in this test seems like a Linux bug. It doesn't + // make any sense and it is unlikely that any applications rely on it. + // + // Both SO_REUSEADDR and SO_REUSEPORT allow binding multiple UDP sockets to + // the same address and deliver each packet to exactly one of the bound + // sockets. If both are enabled, one of the strategies is selected to route + // packets. The strategy is selected dynamically based on the settings of the + // currently bound sockets. Usually, the strategy is selected based on the + // common setting (SO_REUSEADDR or SO_REUSEPORT) amongst the sockets, but for + // some reason, Linux allows binding sets of sockets with no overlapping + // settings in some situations. In this case, it is not obvious which strategy + // would be selected as the configured setting is a contradiction. SKIP_IF(IsRunningOnGvisor()); ASSERT_NO_FATAL_FAILURE(BindSocket( diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index fa890ec98..a914d9a12 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -1900,9 +1900,6 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V6EphemeralPortReserved) { TEST_P(SocketMultiProtocolInetLoopbackTest, V6EphemeralPortReservedReuseAddr) { auto const& param = GetParam(); - // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. - SKIP_IF(IsRunningOnGvisor() && param.type == SOCK_DGRAM); - // Bind the v6 loopback on a dual stack socket. TestAddress const& test_addr = V6Loopback(); sockaddr_storage bound_addr = test_addr.addr; @@ -2091,9 +2088,6 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V4MappedEphemeralPortReservedResueAddr) { auto const& param = GetParam(); - // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. - SKIP_IF(IsRunningOnGvisor() && param.type == SOCK_DGRAM); - // Bind the v4 loopback on a dual stack socket. TestAddress const& test_addr = V4MappedLoopback(); sockaddr_storage bound_addr = test_addr.addr; @@ -2283,9 +2277,6 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V4EphemeralPortReserved) { TEST_P(SocketMultiProtocolInetLoopbackTest, V4EphemeralPortReservedReuseAddr) { auto const& param = GetParam(); - // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. - SKIP_IF(IsRunningOnGvisor() && param.type == SOCK_DGRAM); - // Bind the v4 loopback on a v4 socket. TestAddress const& test_addr = V4Loopback(); sockaddr_storage bound_addr = test_addr.addr; diff --git a/test/syscalls/linux/socket_ip_udp_generic.cc b/test/syscalls/linux/socket_ip_udp_generic.cc index 1c533fdf2..edb86aded 100644 --- a/test/syscalls/linux/socket_ip_udp_generic.cc +++ b/test/syscalls/linux/socket_ip_udp_generic.cc @@ -225,9 +225,6 @@ TEST_P(UDPSocketPairTest, ReuseAddrDefault) { TEST_P(UDPSocketPairTest, SetReuseAddr) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); - // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. - SKIP_IF(IsRunningOnGvisor()); - ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, sizeof(kSockOptOn)), SyscallSucceeds()); @@ -292,9 +289,6 @@ TEST_P(UDPSocketPairTest, SetReusePort) { TEST_P(UDPSocketPairTest, SetReuseAddrReusePort) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); - // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. - SKIP_IF(IsRunningOnGvisor()); - ASSERT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, sizeof(kSockOptOn)), SyscallSucceeds()); diff --git a/test/syscalls/linux/socket_ip_unbound.cc b/test/syscalls/linux/socket_ip_unbound.cc index af8dda19c..5536ab53a 100644 --- a/test/syscalls/linux/socket_ip_unbound.cc +++ b/test/syscalls/linux/socket_ip_unbound.cc @@ -157,7 +157,7 @@ TEST_P(IPUnboundSocketTest, TOSDefault) { int get = -1; socklen_t get_sz = sizeof(get); constexpr int kDefaultTOS = 0; - EXPECT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), + ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_sz, sizeof(get)); EXPECT_EQ(get, kDefaultTOS); @@ -173,7 +173,7 @@ TEST_P(IPUnboundSocketTest, SetTOS) { int get = -1; socklen_t get_sz = sizeof(get); - EXPECT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), + ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_sz, sizeof(get)); EXPECT_EQ(get, set); @@ -188,7 +188,7 @@ TEST_P(IPUnboundSocketTest, ZeroTOS) { SyscallSucceedsWithValue(0)); int get = -1; socklen_t get_sz = sizeof(get); - EXPECT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), + ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_sz, sizeof(get)); EXPECT_EQ(get, set); @@ -210,7 +210,7 @@ TEST_P(IPUnboundSocketTest, InvalidLargeTOS) { } int get = -1; socklen_t get_sz = sizeof(get); - EXPECT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), + ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_sz, sizeof(get)); EXPECT_EQ(get, kDefaultTOS); @@ -229,7 +229,7 @@ TEST_P(IPUnboundSocketTest, CheckSkipECN) { } int get = -1; socklen_t get_sz = sizeof(get); - EXPECT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), + ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_sz, sizeof(get)); EXPECT_EQ(get, expect); @@ -249,7 +249,7 @@ TEST_P(IPUnboundSocketTest, ZeroTOSOptionSize) { } int get = -1; socklen_t get_sz = 0; - EXPECT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), + ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_sz, 0); EXPECT_EQ(get, -1); @@ -276,7 +276,7 @@ TEST_P(IPUnboundSocketTest, SmallTOSOptionSize) { } uint get = -1; socklen_t get_sz = i; - EXPECT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), + ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_sz, expect_sz); // Account for partial copies by getsockopt, retrieve the lower @@ -297,7 +297,7 @@ TEST_P(IPUnboundSocketTest, LargeTOSOptionSize) { // We expect the system call handler to only copy atmost sizeof(int) bytes // as asserted by the check below. Hence, we do not expect the copy to // overflow in getsockopt. - EXPECT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), + ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_sz, sizeof(int)); EXPECT_EQ(get, set); @@ -325,7 +325,7 @@ TEST_P(IPUnboundSocketTest, NegativeTOS) { } int get = -1; socklen_t get_sz = sizeof(get); - EXPECT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), + ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_sz, sizeof(get)); EXPECT_EQ(get, expect); @@ -338,20 +338,20 @@ TEST_P(IPUnboundSocketTest, InvalidNegativeTOS) { TOSOption t = GetTOSOption(GetParam().domain); int expect; if (GetParam().domain == AF_INET) { - EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz), + ASSERT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz), SyscallSucceedsWithValue(0)); expect = static_cast(set); if (GetParam().protocol == IPPROTO_TCP) { expect &= ~INET_ECN_MASK; } } else { - EXPECT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz), + ASSERT_THAT(setsockopt(socket->get(), t.level, t.option, &set, set_sz), SyscallFailsWithErrno(EINVAL)); expect = 0; } int get = 0; socklen_t get_sz = sizeof(get); - EXPECT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), + ASSERT_THAT(getsockopt(socket->get(), t.level, t.option, &get, &get_sz), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_sz, sizeof(get)); EXPECT_EQ(get, expect); @@ -422,6 +422,38 @@ TEST_P(IPUnboundSocketTest, InsufficientBufferTOS) { EXPECT_THAT(sendmsg(socket->get(), &msg, 0), SyscallFailsWithErrno(EINVAL)); } +TEST_P(IPUnboundSocketTest, ReuseAddrDefault) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // FIXME(b/76031995): gVisor TCP sockets default to SO_REUSEADDR enabled. + SKIP_IF(IsRunningOnGvisor() && + (GetParam().type & SOCK_STREAM) == SOCK_STREAM); + + int get = -1; + socklen_t get_sz = sizeof(get); + ASSERT_THAT( + getsockopt(socket->get(), SOL_SOCKET, SO_REUSEADDR, &get, &get_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get, kSockOptOff); + EXPECT_EQ(get_sz, sizeof(get)); +} + +TEST_P(IPUnboundSocketTest, SetReuseAddr) { + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + ASSERT_THAT(setsockopt(socket->get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceedsWithValue(0)); + + int get = -1; + socklen_t get_sz = sizeof(get); + ASSERT_THAT( + getsockopt(socket->get(), SOL_SOCKET, SO_REUSEADDR, &get, &get_sz), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get, kSockOptOn); + EXPECT_EQ(get_sz, sizeof(get)); +} + INSTANTIATE_TEST_SUITE_P( IPUnboundSockets, IPUnboundSocketTest, ::testing::ValuesIn(VecCat(VecCat( diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound.cc b/test/syscalls/linux/socket_ipv4_udp_unbound.cc index 1294d9050..4db9553e1 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound.cc @@ -1672,10 +1672,10 @@ TEST_P(IPv4UDPUnboundSocketTest, TestBindToBcastThenSend) { } // Check that SO_REUSEADDR always delivers to the most recently bound socket. -TEST_P(IPv4UDPUnboundSocketTest, ReuseAddrDistribution) { - // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. - SKIP_IF(IsRunningOnGvisor()); - +// +// FIXME(gvisor.dev/issue/873): Endpoint order is not restored correctly. Enable +// random and co-op save (below) once that is fixed. +TEST_P(IPv4UDPUnboundSocketTest, ReuseAddrDistribution_NoRandomSave) { std::vector> sockets; sockets.emplace_back(ASSERT_NO_ERRNO_AND_VALUE(NewSocket())); @@ -1696,6 +1696,9 @@ TEST_P(IPv4UDPUnboundSocketTest, ReuseAddrDistribution) { constexpr int kMessageSize = 200; + // FIXME(gvisor.dev/issue/873): Endpoint order is not restored correctly. + const DisableSave ds; + for (int i = 0; i < 10; i++) { // Add a new receiver. sockets.emplace_back(ASSERT_NO_ERRNO_AND_VALUE(NewSocket())); @@ -1836,9 +1839,6 @@ TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConvertibleToReusePort) { } TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConvertibleToReuseAddr) { - // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. - SKIP_IF(IsRunningOnGvisor()); - auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); @@ -1880,9 +1880,6 @@ TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConvertibleToReuseAddr) { } TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConversionReversable1) { - // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. - SKIP_IF(IsRunningOnGvisor()); - auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); @@ -1927,9 +1924,6 @@ TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConversionReversable1) { } TEST_P(IPv4UDPUnboundSocketTest, BindReuseAddrReusePortConversionReversable2) { - // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. - SKIP_IF(IsRunningOnGvisor()); - auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); @@ -2020,9 +2014,6 @@ TEST_P(IPv4UDPUnboundSocketTest, BindDoubleReuseAddrReusePortThenReusePort) { } TEST_P(IPv4UDPUnboundSocketTest, BindDoubleReuseAddrReusePortThenReuseAddr) { - // FIXME(b/129164367): Support SO_REUSEADDR on UDP sockets. - SKIP_IF(IsRunningOnGvisor()); - auto socket1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto socket2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto socket3 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); -- cgit v1.2.3 From 44575bf726a6ddb259a0c6419be4517a05c1b9bc Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Thu, 11 Jun 2020 13:46:56 -0700 Subject: Refactor packetimpact Connection types Reorganize the Connection types such that the defined types no longer expose the lower-level functions SendFrame and CreateFrame. These methods are still exported on the underlying Connection type, and thus can be accessed via a type-cast. In future, defined types should have one or more type-safe versions of the send() method on Connection, e.g. UDPIPv4 has Send() which allows the UDP header to be overridden and SendIP() which allows both the IPv4 and UDP headers to be modified. testbench.Connection gets a SendFrameStateless method which sends frames without updating the state of any of the layers. This should be used when sending out-of-band control messages such as ICMP messages, as using the normal Send method can result in errors when attempting to update the TCP state using an ICMP packet. Also remove the localAddr field of testbench.Connection and instead compute it on the fly as needed for UDPIPv4 and TCPIPv4. PiperOrigin-RevId: 315969714 --- test/packetimpact/testbench/connections.go | 211 ++++++++++++--------- .../tests/icmpv6_param_problem_test.go | 4 +- .../tests/udp_icmp_error_propagation_test.go | 25 +-- test/packetimpact/tests/udp_recv_multicast_test.go | 4 +- .../packetimpact/tests/udp_send_recv_dgram_test.go | 3 +- 5 files changed, 137 insertions(+), 110 deletions(-) (limited to 'test') diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index 6e85d6fab..8b4a4d905 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -43,16 +43,17 @@ func portFromSockaddr(sa unix.Sockaddr) (uint16, error) { // pickPort makes a new socket and returns the socket FD and port. The domain should be AF_INET or AF_INET6. The caller must close the FD when done with // the port if there is no error. -func pickPort(domain, typ int) (fd int, sa unix.Sockaddr, err error) { - fd, err = unix.Socket(domain, typ, 0) +func pickPort(domain, typ int) (int, uint16, error) { + fd, err := unix.Socket(domain, typ, 0) if err != nil { - return -1, nil, err + return -1, 0, err } defer func() { if err != nil { err = multierr.Append(err, unix.Close(fd)) } }() + var sa unix.Sockaddr switch domain { case unix.AF_INET: var sa4 unix.SockaddrInet4 @@ -63,16 +64,20 @@ func pickPort(domain, typ int) (fd int, sa unix.Sockaddr, err error) { copy(sa6.Addr[:], net.ParseIP(LocalIPv6).To16()) sa = &sa6 default: - return -1, nil, fmt.Errorf("invalid domain %d, it should be one of unix.AF_INET or unix.AF_INET6", domain) + return -1, 0, fmt.Errorf("invalid domain %d, it should be one of unix.AF_INET or unix.AF_INET6", domain) } if err = unix.Bind(fd, sa); err != nil { - return -1, nil, err + return -1, 0, err } sa, err = unix.Getsockname(fd) if err != nil { - return -1, nil, err + return -1, 0, err } - return fd, sa, nil + port, err := portFromSockaddr(sa) + if err != nil { + return -1, 0, err + } + return fd, port, nil } // layerState stores the state of a layer of a connection. @@ -266,14 +271,10 @@ func SeqNumValue(v seqnum.Value) *seqnum.Value { } // newTCPState creates a new TCPState. -func newTCPState(domain int, out, in TCP) (*tcpState, unix.Sockaddr, error) { - portPickerFD, localAddr, err := pickPort(domain, unix.SOCK_STREAM) - if err != nil { - return nil, nil, err - } - localPort, err := portFromSockaddr(localAddr) +func newTCPState(domain int, out, in TCP) (*tcpState, error) { + portPickerFD, localPort, err := pickPort(domain, unix.SOCK_STREAM) if err != nil { - return nil, nil, err + return nil, err } s := tcpState{ out: TCP{SrcPort: &localPort}, @@ -283,12 +284,12 @@ func newTCPState(domain int, out, in TCP) (*tcpState, unix.Sockaddr, error) { finSent: false, } if err := s.out.merge(&out); err != nil { - return nil, nil, err + return nil, err } if err := s.in.merge(&in); err != nil { - return nil, nil, err + return nil, err } - return &s, localAddr, nil + return &s, nil } func (s *tcpState) outgoing() Layer { @@ -374,14 +375,10 @@ type udpState struct { var _ layerState = (*udpState)(nil) // newUDPState creates a new udpState. -func newUDPState(domain int, out, in UDP) (*udpState, unix.Sockaddr, error) { - portPickerFD, localAddr, err := pickPort(domain, unix.SOCK_DGRAM) +func newUDPState(domain int, out, in UDP) (*udpState, error) { + portPickerFD, localPort, err := pickPort(domain, unix.SOCK_DGRAM) if err != nil { - return nil, nil, err - } - localPort, err := portFromSockaddr(localAddr) - if err != nil { - return nil, nil, err + return nil, err } s := udpState{ out: UDP{SrcPort: &localPort}, @@ -389,12 +386,12 @@ func newUDPState(domain int, out, in UDP) (*udpState, unix.Sockaddr, error) { portPickerFD: portPickerFD, } if err := s.out.merge(&out); err != nil { - return nil, nil, err + return nil, err } if err := s.in.merge(&in); err != nil { - return nil, nil, err + return nil, err } - return &s, localAddr, nil + return &s, nil } func (s *udpState) outgoing() Layer { @@ -429,7 +426,6 @@ type Connection struct { layerStates []layerState injector Injector sniffer Sniffer - localAddr unix.Sockaddr t *testing.T } @@ -475,20 +471,45 @@ func (conn *Connection) Close() { } } -// CreateFrame builds a frame for the connection with layer overriding defaults -// of the innermost layer and additionalLayers added after it. -func (conn *Connection) CreateFrame(layer Layer, additionalLayers ...Layer) Layers { +// CreateFrame builds a frame for the connection with defaults overriden +// from the innermost layer out, and additionalLayers added after it. +// +// Note that overrideLayers can have a length that is less than the number +// of layers in this connection, and in such cases the innermost layers are +// overriden first. As an example, valid values of overrideLayers for a TCP- +// over-IPv4-over-Ethernet connection are: nil, [TCP], [IPv4, TCP], and +// [Ethernet, IPv4, TCP]. +func (conn *Connection) CreateFrame(overrideLayers Layers, additionalLayers ...Layer) Layers { var layersToSend Layers - for _, s := range conn.layerStates { - layersToSend = append(layersToSend, s.outgoing()) - } - if err := layersToSend[len(layersToSend)-1].merge(layer); err != nil { - conn.t.Fatalf("can't merge %+v into %+v: %s", layer, layersToSend[len(layersToSend)-1], err) + for i, s := range conn.layerStates { + layer := s.outgoing() + // overrideLayers and conn.layerStates have their tails aligned, so + // to find the index we move backwards by the distance i is to the + // end. + if j := len(overrideLayers) - (len(conn.layerStates) - i); j >= 0 { + if err := layer.merge(overrideLayers[j]); err != nil { + conn.t.Fatalf("can't merge %+v into %+v: %s", layer, overrideLayers[j], err) + } + } + layersToSend = append(layersToSend, layer) } layersToSend = append(layersToSend, additionalLayers...) return layersToSend } +// SendFrameStateless sends a frame without updating any of the layer states. +// +// This method is useful for sending out-of-band control messages such as +// ICMP packets, where it would not make sense to update the transport layer's +// state using the ICMP header. +func (conn *Connection) SendFrameStateless(frame Layers) { + outBytes, err := frame.ToBytes() + if err != nil { + conn.t.Fatalf("can't build outgoing packet: %s", err) + } + conn.injector.Send(outBytes) +} + // SendFrame sends a frame on the wire and updates the state of all layers. func (conn *Connection) SendFrame(frame Layers) { outBytes, err := frame.ToBytes() @@ -509,10 +530,13 @@ func (conn *Connection) SendFrame(frame Layers) { } } -// Send a packet with reasonable defaults. Potentially override the final layer -// in the connection with the provided layer and add additionLayers. -func (conn *Connection) Send(layer Layer, additionalLayers ...Layer) { - conn.SendFrame(conn.CreateFrame(layer, additionalLayers...)) +// send sends a packet, possibly with layers of this connection overridden and +// additional layers added. +// +// Types defined with Connection as the underlying type should expose +// type-safe versions of this method. +func (conn *Connection) send(overrideLayers Layers, additionalLayers ...Layer) { + conn.SendFrame(conn.CreateFrame(overrideLayers, additionalLayers...)) } // recvFrame gets the next successfully parsed frame (of type Layers) within the @@ -606,7 +630,7 @@ func NewTCPIPv4(t *testing.T, outgoingTCP, incomingTCP TCP) TCPIPv4 { if err != nil { t.Fatalf("can't make ipv4State: %s", err) } - tcpState, localAddr, err := newTCPState(unix.AF_INET, outgoingTCP, incomingTCP) + tcpState, err := newTCPState(unix.AF_INET, outgoingTCP, incomingTCP) if err != nil { t.Fatalf("can't make tcpState: %s", err) } @@ -623,7 +647,6 @@ func NewTCPIPv4(t *testing.T, outgoingTCP, incomingTCP TCP) TCPIPv4 { layerStates: []layerState{etherState, ipv4State, tcpState}, injector: injector, sniffer: sniffer, - localAddr: localAddr, t: t, } } @@ -705,7 +728,7 @@ func (conn *TCPIPv4) ExpectNextData(tcp *TCP, payload *Payload, timeout time.Dur // Send a packet with reasonable defaults. Potentially override the TCP layer in // the connection with the provided layer and add additionLayers. func (conn *TCPIPv4) Send(tcp TCP, additionalLayers ...Layer) { - (*Connection)(conn).Send(&tcp, additionalLayers...) + (*Connection)(conn).send(Layers{&tcp}, additionalLayers...) } // Close frees associated resources held by the TCPIPv4 connection. @@ -727,32 +750,48 @@ func (conn *TCPIPv4) Expect(tcp TCP, timeout time.Duration) (*TCP, error) { return gotTCP, err } -func (conn *TCPIPv4) state() *tcpState { - state, ok := conn.layerStates[len(conn.layerStates)-1].(*tcpState) +func (conn *TCPIPv4) tcpState() *tcpState { + state, ok := conn.layerStates[2].(*tcpState) + if !ok { + conn.t.Fatalf("got transport-layer state type=%T, expected tcpState", conn.layerStates[2]) + } + return state +} + +func (conn *TCPIPv4) ipv4State() *ipv4State { + state, ok := conn.layerStates[1].(*ipv4State) if !ok { - conn.t.Fatalf("expected final state of %v to be tcpState", conn.layerStates) + conn.t.Fatalf("expected network-layer state type=%T, expected ipv4State", conn.layerStates[1]) } return state } // RemoteSeqNum returns the next expected sequence number from the DUT. func (conn *TCPIPv4) RemoteSeqNum() *seqnum.Value { - return conn.state().remoteSeqNum + return conn.tcpState().remoteSeqNum } // LocalSeqNum returns the next sequence number to send from the testbench. func (conn *TCPIPv4) LocalSeqNum() *seqnum.Value { - return conn.state().localSeqNum + return conn.tcpState().localSeqNum } // SynAck returns the SynAck that was part of the handshake. func (conn *TCPIPv4) SynAck() *TCP { - return conn.state().synAck + return conn.tcpState().synAck } // LocalAddr gets the local socket address of this connection. -func (conn *TCPIPv4) LocalAddr() unix.Sockaddr { - return conn.localAddr +func (conn *TCPIPv4) LocalAddr() *unix.SockaddrInet4 { + sa := &unix.SockaddrInet4{Port: int(*conn.tcpState().out.SrcPort)} + copy(sa.Addr[:], *conn.ipv4State().out.SrcAddr) + return sa +} + +// Drain drains the sniffer's receive buffer by receiving packets until there's +// nothing else to receive. +func (conn *TCPIPv4) Drain() { + conn.sniffer.Drain() } // IPv6Conn maintains the state for all the layers in a IPv6 connection. @@ -786,15 +825,10 @@ func NewIPv6Conn(t *testing.T, outgoingIPv6, incomingIPv6 IPv6) IPv6Conn { } } -// SendFrame sends a frame on the wire and updates the state of all layers. -func (conn *IPv6Conn) SendFrame(frame Layers) { - (*Connection)(conn).SendFrame(frame) -} - -// CreateFrame builds a frame for the connection with ipv6 overriding the ipv6 -// layer defaults and additionalLayers added after it. -func (conn *IPv6Conn) CreateFrame(ipv6 IPv6, additionalLayers ...Layer) Layers { - return (*Connection)(conn).CreateFrame(&ipv6, additionalLayers...) +// Send sends a frame with ipv6 overriding the IPv6 layer defaults and +// additionalLayers added after it. +func (conn *IPv6Conn) Send(ipv6 IPv6, additionalLayers ...Layer) { + (*Connection)(conn).send(Layers{&ipv6}, additionalLayers...) } // Close to clean up any resources held. @@ -808,12 +842,6 @@ func (conn *IPv6Conn) ExpectFrame(frame Layers, timeout time.Duration) (Layers, return (*Connection)(conn).ExpectFrame(frame, timeout) } -// Drain drains the sniffer's receive buffer by receiving packets until there's -// nothing else to receive. -func (conn *TCPIPv4) Drain() { - conn.sniffer.Drain() -} - // UDPIPv4 maintains the state for all the layers in a UDP/IPv4 connection. type UDPIPv4 Connection @@ -827,7 +855,7 @@ func NewUDPIPv4(t *testing.T, outgoingUDP, incomingUDP UDP) UDPIPv4 { if err != nil { t.Fatalf("can't make ipv4State: %s", err) } - udpState, localAddr, err := newUDPState(unix.AF_INET, outgoingUDP, incomingUDP) + udpState, err := newUDPState(unix.AF_INET, outgoingUDP, incomingUDP) if err != nil { t.Fatalf("can't make udpState: %s", err) } @@ -844,42 +872,43 @@ func NewUDPIPv4(t *testing.T, outgoingUDP, incomingUDP UDP) UDPIPv4 { layerStates: []layerState{etherState, ipv4State, udpState}, injector: injector, sniffer: sniffer, - localAddr: localAddr, t: t, } } -// LocalAddr gets the local socket address of this connection. -func (conn *UDPIPv4) LocalAddr() unix.Sockaddr { - return conn.localAddr +func (conn *UDPIPv4) udpState() *udpState { + state, ok := conn.layerStates[2].(*udpState) + if !ok { + conn.t.Fatalf("got transport-layer state type=%T, expected udpState", conn.layerStates[2]) + } + return state } -// CreateFrame builds a frame for the connection with layer overriding defaults -// of the innermost layer and additionalLayers added after it. -func (conn *UDPIPv4) CreateFrame(layer Layer, additionalLayers ...Layer) Layers { - return (*Connection)(conn).CreateFrame(layer, additionalLayers...) +func (conn *UDPIPv4) ipv4State() *ipv4State { + state, ok := conn.layerStates[1].(*ipv4State) + if !ok { + conn.t.Fatalf("got network-layer state type=%T, expected ipv4State", conn.layerStates[1]) + } + return state } -// Send a packet with reasonable defaults. Potentially override the UDP layer in -// the connection with the provided layer and add additionLayers. -func (conn *UDPIPv4) Send(udp UDP, additionalLayers ...Layer) { - (*Connection)(conn).Send(&udp, additionalLayers...) +// LocalAddr gets the local socket address of this connection. +func (conn *UDPIPv4) LocalAddr() *unix.SockaddrInet4 { + sa := &unix.SockaddrInet4{Port: int(*conn.udpState().out.SrcPort)} + copy(sa.Addr[:], *conn.ipv4State().out.SrcAddr) + return sa } -// SendFrame sends a frame on the wire and updates the state of all layers. -func (conn *UDPIPv4) SendFrame(frame Layers) { - (*Connection)(conn).SendFrame(frame) +// Send sends a packet with reasonable defaults, potentially overriding the UDP +// layer and adding additionLayers. +func (conn *UDPIPv4) Send(udp UDP, additionalLayers ...Layer) { + (*Connection)(conn).send(Layers{&udp}, additionalLayers...) } -// SendIP sends a packet with additionalLayers following the IP layer in the -// connection. -func (conn *UDPIPv4) SendIP(additionalLayers ...Layer) { - var layersToSend Layers - for _, s := range conn.layerStates[:len(conn.layerStates)-1] { - layersToSend = append(layersToSend, s.outgoing()) - } - layersToSend = append(layersToSend, additionalLayers...) - conn.SendFrame(layersToSend) +// SendIP sends a packet with reasonable defaults, potentially overriding the +// UDP and IPv4 headers and adding additionLayers. +func (conn *UDPIPv4) SendIP(ip IPv4, udp UDP, additionalLayers ...Layer) { + (*Connection)(conn).send(Layers{&ip, &udp}, additionalLayers...) } // Expect expects a frame with the UDP layer matching the provided UDP within diff --git a/test/packetimpact/tests/icmpv6_param_problem_test.go b/test/packetimpact/tests/icmpv6_param_problem_test.go index 961059fc1..4d1d9a7f5 100644 --- a/test/packetimpact/tests/icmpv6_param_problem_test.go +++ b/test/packetimpact/tests/icmpv6_param_problem_test.go @@ -45,8 +45,8 @@ func TestICMPv6ParamProblemTest(t *testing.T) { NDPPayload: []byte("hello world"), } - toSend := conn.CreateFrame(ipv6, &icmpv6) - conn.SendFrame(toSend) + toSend := (*testbench.Connection)(&conn).CreateFrame(testbench.Layers{&ipv6}, &icmpv6) + (*testbench.Connection)(&conn).SendFrame(toSend) // Build the expected ICMPv6 payload, which includes an index to the // problematic byte and also the problematic packet as described in diff --git a/test/packetimpact/tests/udp_icmp_error_propagation_test.go b/test/packetimpact/tests/udp_icmp_error_propagation_test.go index aedabf9de..b754918f6 100644 --- a/test/packetimpact/tests/udp_icmp_error_propagation_test.go +++ b/test/packetimpact/tests/udp_icmp_error_propagation_test.go @@ -96,24 +96,25 @@ func wantErrno(c connectionMode, icmpErr icmpError) syscall.Errno { // sendICMPError sends an ICMP error message in response to a UDP datagram. func sendICMPError(conn *testbench.UDPIPv4, icmpErr icmpError, udp *testbench.UDP) error { + layers := (*testbench.Connection)(conn).CreateFrame(nil) + layers = layers[:len(layers)-1] + ip, ok := udp.Prev().(*testbench.IPv4) + if !ok { + return fmt.Errorf("expected %s to be IPv4", udp.Prev()) + } if icmpErr == timeToLiveExceeded { - ip, ok := udp.Prev().(*testbench.IPv4) - if !ok { - return fmt.Errorf("expected %s to be IPv4", udp.Prev()) - } *ip.TTL = 1 // Let serialization recalculate the checksum since we set the TTL // to 1. ip.Checksum = nil - - // Note that the ICMP payload is valid in this case because the UDP - // payload is empty. If the UDP payload were not empty, the packet - // length during serialization may not be calculated correctly, - // resulting in a mal-formed packet. - conn.SendIP(icmpErr.ToICMPv4(), ip, udp) - } else { - conn.SendIP(icmpErr.ToICMPv4(), udp.Prev(), udp) } + // Note that the ICMP payload is valid in this case because the UDP + // payload is empty. If the UDP payload were not empty, the packet + // length during serialization may not be calculated correctly, + // resulting in a mal-formed packet. + layers = append(layers, icmpErr.ToICMPv4(), ip, udp) + + (*testbench.Connection)(conn).SendFrameStateless(layers) return nil } diff --git a/test/packetimpact/tests/udp_recv_multicast_test.go b/test/packetimpact/tests/udp_recv_multicast_test.go index d51a34145..77a9bfa1d 100644 --- a/test/packetimpact/tests/udp_recv_multicast_test.go +++ b/test/packetimpact/tests/udp_recv_multicast_test.go @@ -35,8 +35,6 @@ func TestUDPRecvMulticast(t *testing.T) { defer dut.Close(boundFD) conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) defer conn.Close() - frame := conn.CreateFrame(&testbench.UDP{}, &testbench.Payload{Bytes: []byte("hello world")}) - frame[1].(*testbench.IPv4).DstAddr = testbench.Address(tcpip.Address(net.ParseIP("224.0.0.1").To4())) - conn.SendFrame(frame) + conn.SendIP(testbench.IPv4{DstAddr: testbench.Address(tcpip.Address(net.ParseIP("224.0.0.1").To4()))}, testbench.UDP{}) dut.Recv(boundFD, 100, 0) } diff --git a/test/packetimpact/tests/udp_send_recv_dgram_test.go b/test/packetimpact/tests/udp_send_recv_dgram_test.go index bf64803e2..a7db384ad 100644 --- a/test/packetimpact/tests/udp_send_recv_dgram_test.go +++ b/test/packetimpact/tests/udp_send_recv_dgram_test.go @@ -59,8 +59,7 @@ func TestUDPRecv(t *testing.T) { } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - frame := conn.CreateFrame(&testbench.UDP{}, &testbench.Payload{Bytes: []byte(tc.payload)}) - conn.SendFrame(frame) + conn.Send(testbench.UDP{}, &testbench.Payload{Bytes: []byte(tc.payload)}) if got, want := string(dut.Recv(boundFD, int32(len(tc.payload)), 0)), tc.payload; got != want { t.Fatalf("received payload does not match sent payload got: %s, want: %s", got, want) } -- cgit v1.2.3 From dc4e0157ef09632a25575810a70846ea81c4dd6b Mon Sep 17 00:00:00 2001 From: Ian Gudger Date: Thu, 11 Jun 2020 18:02:58 -0700 Subject: Add test for reordering. Tests the effect of reordering on retransmission and window size. Test covers the expected behavior of both Linux and netstack, however, netstack does not behave as expected. Further, the current expected behavior of netstack is not ideal and should be adjusted in the future. PiperOrigin-RevId: 316015184 --- test/packetimpact/proto/posix_server.proto | 2 +- test/packetimpact/runner/packetimpact_test.go | 1 + test/packetimpact/testbench/testbench.go | 3 + test/packetimpact/tests/BUILD | 13 ++ test/packetimpact/tests/tcp_reordering_test.go | 174 +++++++++++++++++++++++++ 5 files changed, 192 insertions(+), 1 deletion(-) create mode 100644 test/packetimpact/tests/tcp_reordering_test.go (limited to 'test') diff --git a/test/packetimpact/proto/posix_server.proto b/test/packetimpact/proto/posix_server.proto index 77da0fb3a..ccd20b10d 100644 --- a/test/packetimpact/proto/posix_server.proto +++ b/test/packetimpact/proto/posix_server.proto @@ -150,7 +150,7 @@ message SendRequest { message SendResponse { int32 ret = 1; - int32 errno_ = 2; + int32 errno_ = 2; // "errno" may fail to compile in c++. } message SendToRequest { diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go index e58a1fb1b..75617f6de 100644 --- a/test/packetimpact/runner/packetimpact_test.go +++ b/test/packetimpact/runner/packetimpact_test.go @@ -268,6 +268,7 @@ func TestOne(t *testing.T) { "--remote_ipv6", remoteIPv6.String(), "--remote_mac", remoteMAC.String(), "--device", testNetDev, + "--dut_type", *dutPlatform, ) _, err = testbench.Exec(dockerutil.RunOpts{}, testArgs...) if !*expectFailure && err != nil { diff --git a/test/packetimpact/testbench/testbench.go b/test/packetimpact/testbench/testbench.go index 4de2aa1d3..82eda6565 100644 --- a/test/packetimpact/testbench/testbench.go +++ b/test/packetimpact/testbench/testbench.go @@ -25,6 +25,8 @@ import ( ) var ( + // DUTType is the type of device under test. + DUTType = "" // Device is the local device on the test network. Device = "" // LocalIPv4 is the local IPv4 address on the test network. @@ -63,6 +65,7 @@ func RegisterFlags(fs *flag.FlagSet) { fs.StringVar(&RemoteIPv6, "remote_ipv6", RemoteIPv6, "remote IPv6 address for test packets") fs.StringVar(&RemoteMAC, "remote_mac", RemoteMAC, "remote mac address for test packets") fs.StringVar(&Device, "device", Device, "local device for test packets") + fs.StringVar(&DUTType, "dut_type", DUTType, "type of device under test") } // genPseudoFlags populates flag-like global config based on real flags. diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 2a41ef326..95dd6c440 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -52,6 +52,19 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "tcp_reordering", + srcs = ["tcp_reordering_test.go"], + # TODO(b/139368047): Fix netstack then remove the line below. + expect_netstack_failure = True, + deps = [ + "//pkg/tcpip/header", + "//pkg/tcpip/seqnum", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + packetimpact_go_test( name = "tcp_window_shrink", srcs = ["tcp_window_shrink_test.go"], diff --git a/test/packetimpact/tests/tcp_reordering_test.go b/test/packetimpact/tests/tcp_reordering_test.go new file mode 100644 index 000000000..a5378a9dd --- /dev/null +++ b/test/packetimpact/tests/tcp_reordering_test.go @@ -0,0 +1,174 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package reordering_test + +import ( + "flag" + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func init() { + tb.RegisterFlags(flag.CommandLine) +} + +func TestReorderingWindow(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(listenFd) + conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) + defer conn.Close() + + // Enable SACK. + opts := make([]byte, 40) + optsOff := 0 + optsOff += header.EncodeNOP(opts[optsOff:]) + optsOff += header.EncodeNOP(opts[optsOff:]) + optsOff += header.EncodeSACKPermittedOption(opts[optsOff:]) + + // Ethernet guarantees that the MTU is at least 1500 bytes. + const minMTU = 1500 + const mss = minMTU - header.IPv4MinimumSize - header.TCPMinimumSize + optsOff += header.EncodeMSSOption(mss, opts[optsOff:]) + + conn.ConnectWithOptions(opts[:optsOff]) + + acceptFd, _ := dut.Accept(listenFd) + defer dut.Close(acceptFd) + + if tb.DUTType == "linux" { + // Linux has changed its handling of reordering, force the old behavior. + dut.SetSockOpt(acceptFd, unix.IPPROTO_TCP, unix.TCP_CONGESTION, []byte("reno")) + } + + pls := dut.GetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_MAXSEG) + if tb.DUTType == "netstack" { + // netstack does not impliment TCP_MAXSEG correctly. Fake it + // here. Netstack uses the max SACK size which is 32. The MSS + // option is 8 bytes, making the total 36 bytes. + pls = mss - 36 + } + + payload := make([]byte, pls) + + seqNum1 := *conn.RemoteSeqNum() + const numPkts = 10 + // Send some packets, checking that we receive each. + for i, sn := 0, seqNum1; i < numPkts; i++ { + dut.Send(acceptFd, payload, 0) + + gotOne, err := conn.Expect(tb.TCP{SeqNum: tb.Uint32(uint32(sn))}, time.Second) + sn.UpdateForward(seqnum.Size(len(payload))) + if err != nil { + t.Errorf("Expect #%d: %s", i+1, err) + continue + } + if gotOne == nil { + t.Errorf("#%d: expected a packet within a second but got none", i+1) + } + } + + seqNum2 := *conn.RemoteSeqNum() + + // SACK packets #2-4. + sackBlock := make([]byte, 40) + sbOff := 0 + sbOff += header.EncodeNOP(sackBlock[sbOff:]) + sbOff += header.EncodeNOP(sackBlock[sbOff:]) + sbOff += header.EncodeSACKBlocks([]header.SACKBlock{{ + seqNum1.Add(seqnum.Size(len(payload))), + seqNum1.Add(seqnum.Size(4 * len(payload))), + }}, sackBlock[sbOff:]) + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), AckNum: tb.Uint32(uint32(seqNum1)), Options: sackBlock[:sbOff]}) + + // ACK first packet. + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), AckNum: tb.Uint32(uint32(seqNum1) + uint32(len(payload)))}) + + // Check for retransmit. + gotOne, err := conn.Expect(tb.TCP{SeqNum: tb.Uint32(uint32(seqNum1))}, time.Second) + if err != nil { + t.Error("Expect for retransmit:", err) + } + if gotOne == nil { + t.Error("expected a retransmitted packet within a second but got none") + } + + // ACK all send packets with a DSACK block for packet #1. This tells + // the other end that we got both the original and retransmit for + // packet #1. + dsackBlock := make([]byte, 40) + dsbOff := 0 + dsbOff += header.EncodeNOP(dsackBlock[dsbOff:]) + dsbOff += header.EncodeNOP(dsackBlock[dsbOff:]) + dsbOff += header.EncodeSACKBlocks([]header.SACKBlock{{ + seqNum1.Add(seqnum.Size(len(payload))), + seqNum1.Add(seqnum.Size(4 * len(payload))), + }}, dsackBlock[dsbOff:]) + + conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), AckNum: tb.Uint32(uint32(seqNum2)), Options: dsackBlock[:dsbOff]}) + + // Send half of the original window of packets, checking that we + // received each. + for i, sn := 0, seqNum2; i < numPkts/2; i++ { + dut.Send(acceptFd, payload, 0) + + gotOne, err := conn.Expect(tb.TCP{SeqNum: tb.Uint32(uint32(sn))}, time.Second) + sn.UpdateForward(seqnum.Size(len(payload))) + if err != nil { + t.Errorf("Expect #%d: %s", i+1, err) + continue + } + if gotOne == nil { + t.Errorf("#%d: expected a packet within a second but got none", i+1) + } + } + + if tb.DUTType == "netstack" { + // The window should now be halved, so we should receive any + // more, even if we send them. + dut.Send(acceptFd, payload, 0) + if got, err := conn.Expect(tb.TCP{}, 100*time.Millisecond); got != nil || err == nil { + t.Fatalf("expected no packets within 100 millisecond, but got one: %s", got) + } + return + } + + // Linux reduces the window by three. Check that we can receive the rest. + for i, sn := 0, seqNum2.Add(seqnum.Size(numPkts/2*len(payload))); i < 2; i++ { + dut.Send(acceptFd, payload, 0) + + gotOne, err := conn.Expect(tb.TCP{SeqNum: tb.Uint32(uint32(sn))}, time.Second) + sn.UpdateForward(seqnum.Size(len(payload))) + if err != nil { + t.Errorf("Expect #%d: %s", i+1, err) + continue + } + if gotOne == nil { + t.Errorf("#%d: expected a packet within a second but got none", i+1) + } + } + + // The window should now be full. + dut.Send(acceptFd, payload, 0) + if got, err := conn.Expect(tb.TCP{}, 100*time.Millisecond); got != nil || err == nil { + t.Fatalf("expected no packets within 100 millisecond, but got one: %s", got) + } +} -- cgit v1.2.3 From 5a894e35a090232085fbb20c71d1787c266bd995 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Thu, 11 Jun 2020 19:03:46 -0700 Subject: Remove generated logs when test succeeds. PiperOrigin-RevId: 316022884 --- test/runner/runner.go | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'test') diff --git a/test/runner/runner.go b/test/runner/runner.go index 948e3a8ef..332f1df89 100644 --- a/test/runner/runner.go +++ b/test/runner/runner.go @@ -160,8 +160,9 @@ func runRunsc(tc gtest.TestCase, spec *specs.Spec) error { args = append(args, "-fsgofer-host-uds") } - if outDir, ok := syscall.Getenv("TEST_UNDECLARED_OUTPUTS_DIR"); ok { - tdir := filepath.Join(outDir, strings.Replace(name, "/", "_", -1)) + undeclaredOutputsDir, ok := syscall.Getenv("TEST_UNDECLARED_OUTPUTS_DIR") + if ok { + tdir := filepath.Join(undeclaredOutputsDir, strings.Replace(name, "/", "_", -1)) if err := os.MkdirAll(tdir, 0755); err != nil { return fmt.Errorf("could not create test dir: %v", err) } @@ -201,7 +202,9 @@ func runRunsc(tc gtest.TestCase, spec *specs.Spec) error { cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr sig := make(chan os.Signal, 1) + defer close(sig) signal.Notify(sig, syscall.SIGTERM) + defer signal.Stop(sig) go func() { s, ok := <-sig if !ok { @@ -237,9 +240,11 @@ func runRunsc(tc gtest.TestCase, spec *specs.Spec) error { }() err = cmd.Run() - - signal.Stop(sig) - close(sig) + if err == nil { + // If the test passed, then we erase the log directory. This speeds up + // uploading logs in continuous integration & saves on disk space. + os.RemoveAll(undeclaredOutputsDir) + } return err } -- cgit v1.2.3 From 8ea99d58ffd708aa7a26be58d89cb817d8eceec6 Mon Sep 17 00:00:00 2001 From: Ian Lewis Date: Thu, 11 Jun 2020 19:29:34 -0700 Subject: Set the HOME environment variable for sub-containers. Fixes #701 PiperOrigin-RevId: 316025635 --- pkg/test/criutil/criutil.go | 11 ++++ pkg/test/testutil/testutil.go | 5 +- runsc/boot/loader.go | 15 +++++ runsc/container/multi_container_test.go | 80 +++++++++++++++++++++++++++ test/root/crictl_test.go | 97 +++++++++++++++++++-------------- 5 files changed, 166 insertions(+), 42 deletions(-) (limited to 'test') diff --git a/pkg/test/criutil/criutil.go b/pkg/test/criutil/criutil.go index bebebb48e..8fed29ff5 100644 --- a/pkg/test/criutil/criutil.go +++ b/pkg/test/criutil/criutil.go @@ -113,6 +113,17 @@ func (cc *Crictl) Exec(contID string, args ...string) (string, error) { return output, nil } +// Logs retrieves the container logs. It corresponds to `crictl logs`. +func (cc *Crictl) Logs(contID string, args ...string) (string, error) { + a := []string{"logs", contID} + a = append(a, args...) + output, err := cc.run(a...) + if err != nil { + return "", fmt.Errorf("logs failed: %v", err) + } + return output, nil +} + // Rm removes a container. It corresponds to `crictl rm`. func (cc *Crictl) Rm(contID string) error { _, err := cc.run("rm", contID) diff --git a/pkg/test/testutil/testutil.go b/pkg/test/testutil/testutil.go index ee8c78014..f21d6769a 100644 --- a/pkg/test/testutil/testutil.go +++ b/pkg/test/testutil/testutil.go @@ -251,7 +251,10 @@ func RandomID(prefix string) string { if _, err := rand.Read(b); err != nil { panic("rand.Read failed: " + err.Error()) } - return fmt.Sprintf("%s-%s", prefix, base32.StdEncoding.EncodeToString(b)) + if prefix != "" { + prefix = prefix + "-" + } + return fmt.Sprintf("%s%s", prefix, base32.StdEncoding.EncodeToString(b)) } // RandomContainerID generates a random container id for each test. diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 002479612..b05a8bd45 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -754,6 +754,21 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file return err } + // Add the HOME enviroment variable if it is not already set. + var envv []string + if kernel.VFS2Enabled { + envv, err = user.MaybeAddExecUserHomeVFS2(ctx, procArgs.MountNamespaceVFS2, + procArgs.Credentials.RealKUID, procArgs.Envv) + + } else { + envv, err = user.MaybeAddExecUserHome(ctx, procArgs.MountNamespace, + procArgs.Credentials.RealKUID, procArgs.Envv) + } + if err != nil { + return err + } + procArgs.Envv = envv + // Create and start the new process. tg, _, err := l.k.CreateProcess(procArgs) if err != nil { diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index c2b54696c..a27a01942 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -1698,3 +1698,83 @@ func TestMultiContainerRunNonRoot(t *testing.T) { t.Fatalf("child container failed, waitStatus: %v", ws) } } + +// TestMultiContainerHomeEnvDir tests that the HOME environment variable is set +// for root containers, sub-containers, and execed processes. +func TestMultiContainerHomeEnvDir(t *testing.T) { + // TODO(gvisor.dev/issue/1487): VFSv2 configs failing. + // NOTE: Don't use overlay since we need changes to persist to the temp dir + // outside the sandbox. + for testName, conf := range configs(t, noOverlay...) { + t.Run(testName, func(t *testing.T) { + + rootDir, cleanup, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer cleanup() + conf.RootDir = rootDir + + // Create temp files we can write the value of $HOME to. + homeDirs := map[string]*os.File{} + for _, name := range []string{"root", "sub", "exec"} { + homeFile, err := ioutil.TempFile(testutil.TmpDir(), name) + if err != nil { + t.Fatalf("creating temp file: %v", err) + } + homeDirs[name] = homeFile + } + + // We will sleep in the root container in order to ensure that + // the root container doesn't terminate before sub containers can be + // created. + rootCmd := []string{"/bin/sh", "-c", fmt.Sprintf("printf \"$HOME\" > %s; sleep 1000", homeDirs["root"].Name())} + subCmd := []string{"/bin/sh", "-c", fmt.Sprintf("printf \"$HOME\" > %s", homeDirs["sub"].Name())} + execCmd := []string{"/bin/sh", "-c", fmt.Sprintf("printf \"$HOME\" > %s", homeDirs["exec"].Name())} + + // Setup the containers, a root container and sub container. + specConfig, ids := createSpecs(rootCmd, subCmd) + containers, cleanup, err := startContainers(conf, specConfig, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() + + // Exec into the root container synchronously. + args := &control.ExecArgs{Argv: execCmd} + if _, err := containers[0].executeSync(args); err != nil { + t.Errorf("error executing %+v: %v", args, err) + } + + // Wait for the subcontainer to finish. + _, err = containers[1].Wait() + if err != nil { + t.Errorf("wait on child container: %v", err) + } + + // Wait for the root container to run. + expectedPL := []*control.Process{ + newProcessBuilder().Cmd("sh").Process(), + newProcessBuilder().Cmd("sleep").Process(), + } + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + + // Check the written files. + for name, tmpFile := range homeDirs { + dirBytes, err := ioutil.ReadAll(tmpFile) + if err != nil { + t.Fatalf("reading %s temp file: %v", name, err) + } + got := string(dirBytes) + + want := "/" + if got != want { + t.Errorf("%s $HOME incorrect: got: %q, want: %q", name, got, want) + } + } + + }) + } +} diff --git a/test/root/crictl_test.go b/test/root/crictl_test.go index c138e02dc..732fae821 100644 --- a/test/root/crictl_test.go +++ b/test/root/crictl_test.go @@ -39,6 +39,29 @@ import ( // Tests for crictl have to be run as root (rather than in a user namespace) // because crictl creates named network namespaces in /var/run/netns/. +// Sandbox returns a JSON config for a simple sandbox. Sandbox names must be +// unique so different names should be used when running tests on the same +// containerd instance. +func Sandbox(name string) string { + // Sandbox is a default JSON config for a sandbox. + s := map[string]interface{}{ + "metadata": map[string]string{ + "name": name, + "namespace": "default", + "uid": testutil.RandomID(""), + }, + "linux": map[string]string{}, + "log_directory": "/tmp", + } + + v, err := json.Marshal(s) + if err != nil { + // This shouldn't happen. + panic(err) + } + return string(v) +} + // SimpleSpec returns a JSON config for a simple container that runs the // specified command in the specified image. func SimpleSpec(name, image string, cmd []string, extra map[string]interface{}) string { @@ -49,7 +72,9 @@ func SimpleSpec(name, image string, cmd []string, extra map[string]interface{}) "image": map[string]string{ "image": testutil.ImageByName(image), }, - "log_path": fmt.Sprintf("%s.log", name), + // Log files are not deleted after root tests are run. Log to random + // paths to ensure logs are fresh. + "log_path": fmt.Sprintf("%s.log", testutil.RandomID(name)), } if len(cmd) > 0 { // Omit if empty. s["command"] = cmd @@ -65,20 +90,6 @@ func SimpleSpec(name, image string, cmd []string, extra map[string]interface{}) return string(v) } -// Sandbox is a default JSON config for a sandbox. -var Sandbox = `{ - "metadata": { - "name": "default-sandbox", - "namespace": "default", - "attempt": 1, - "uid": "hdishd83djaidwnduwk28bcsb" - }, - "linux": { - }, - "log_directory": "/tmp" -} -` - // Httpd is a JSON config for an httpd container. var Httpd = SimpleSpec("httpd", "basic/httpd", nil, nil) @@ -90,7 +101,7 @@ func TestCrictlSanity(t *testing.T) { t.Fatalf("failed to setup crictl: %v", err) } defer cleanup() - podID, contID, err := crictl.StartPodAndContainer("basic/httpd", Sandbox, Httpd) + podID, contID, err := crictl.StartPodAndContainer("basic/httpd", Sandbox("default"), Httpd) if err != nil { t.Fatalf("start failed: %v", err) } @@ -142,7 +153,7 @@ func TestMountPaths(t *testing.T) { t.Fatalf("failed to setup crictl: %v", err) } defer cleanup() - podID, contID, err := crictl.StartPodAndContainer("basic/httpd", Sandbox, HttpdMountPaths) + podID, contID, err := crictl.StartPodAndContainer("basic/httpd", Sandbox("default"), HttpdMountPaths) if err != nil { t.Fatalf("start failed: %v", err) } @@ -168,7 +179,7 @@ func TestMountOverSymlinks(t *testing.T) { defer cleanup() spec := SimpleSpec("busybox", "basic/resolv", []string{"sleep", "1000"}, nil) - podID, contID, err := crictl.StartPodAndContainer("basic/resolv", Sandbox, spec) + podID, contID, err := crictl.StartPodAndContainer("basic/resolv", Sandbox("default"), spec) if err != nil { t.Fatalf("start failed: %v", err) } @@ -200,7 +211,7 @@ func TestMountOverSymlinks(t *testing.T) { } // TestHomeDir tests that the HOME environment variable is set for -// multi-containers. +// Pod containers. func TestHomeDir(t *testing.T) { // Setup containerd and crictl. crictl, cleanup, err := setup(t) @@ -208,48 +219,52 @@ func TestHomeDir(t *testing.T) { t.Fatalf("failed to setup crictl: %v", err) } defer cleanup() - contSpec := SimpleSpec("root", "basic/busybox", []string{"sleep", "1000"}, nil) - podID, contID, err := crictl.StartPodAndContainer("basic/busybox", Sandbox, contSpec) - if err != nil { - t.Fatalf("start failed: %v", err) - } - t.Run("root container", func(t *testing.T) { - out, err := crictl.Exec(contID, "sh", "-c", "echo $HOME") + // Note that container ID returned here is a sub-container. All Pod + // containers are sub-containers. The root container of the sandbox is the + // pause container. + t.Run("sub-container", func(t *testing.T) { + contSpec := SimpleSpec("subcontainer", "basic/busybox", []string{"sh", "-c", "echo $HOME"}, nil) + podID, contID, err := crictl.StartPodAndContainer("basic/busybox", Sandbox("subcont-sandbox"), contSpec) if err != nil { - t.Fatalf("exec failed: %v, out: %s", err, out) + t.Fatalf("start failed: %v", err) + } + + out, err := crictl.Logs(contID) + if err != nil { + t.Fatalf("failed retrieving container logs: %v, out: %s", err, out) } if got, want := strings.TrimSpace(string(out)), "/root"; got != want { t.Fatalf("Home directory invalid. Got %q, Want : %q", got, want) } + + // Stop everything. + if err := crictl.StopPodAndContainer(podID, contID); err != nil { + t.Fatalf("stop failed: %v", err) + } }) - t.Run("sub-container", func(t *testing.T) { - // Create a sub container in the same pod. - subContSpec := SimpleSpec("subcontainer", "basic/busybox", []string{"sleep", "1000"}, nil) - subContID, err := crictl.StartContainer(podID, "basic/busybox", Sandbox, subContSpec) + // Tests that HOME is set for the exec process. + t.Run("exec", func(t *testing.T) { + contSpec := SimpleSpec("exec", "basic/busybox", []string{"sleep", "1000"}, nil) + podID, contID, err := crictl.StartPodAndContainer("basic/busybox", Sandbox("exec-sandbox"), contSpec) if err != nil { t.Fatalf("start failed: %v", err) } - out, err := crictl.Exec(subContID, "sh", "-c", "echo $HOME") + out, err := crictl.Exec(contID, "sh", "-c", "echo $HOME") if err != nil { - t.Fatalf("exec failed: %v, out: %s", err, out) + t.Fatalf("failed retrieving container logs: %v, out: %s", err, out) } if got, want := strings.TrimSpace(string(out)), "/root"; got != want { - t.Fatalf("Home directory invalid. Got %q, Want: %q", got, want) + t.Fatalf("Home directory invalid. Got %q, Want : %q", got, want) } - if err := crictl.StopContainer(subContID); err != nil { + // Stop everything. + if err := crictl.StopPodAndContainer(podID, contID); err != nil { t.Fatalf("stop failed: %v", err) } }) - - // Stop everything. - if err := crictl.StopPodAndContainer(podID, contID); err != nil { - t.Fatalf("stop failed: %v", err) - } - } // containerdConfigTemplate is a .toml config for containerd. It contains a -- cgit v1.2.3 From 61d6c059ac3434ddf0c206975a116a09d0838338 Mon Sep 17 00:00:00 2001 From: Mithun Iyer Date: Thu, 11 Jun 2020 19:45:06 -0700 Subject: Replace use of %v in packetimpact tests PiperOrigin-RevId: 316027588 --- test/packetimpact/tests/ipv4_id_uniqueness_test.go | 2 +- test/packetimpact/tests/tcp_retransmits_test.go | 8 ++++---- test/packetimpact/tests/tcp_window_shrink_test.go | 8 ++++---- .../tests/tcp_zero_window_probe_retransmit_test.go | 8 ++++---- test/packetimpact/tests/tcp_zero_window_probe_test.go | 12 ++++++------ .../tests/tcp_zero_window_probe_usertimeout_test.go | 6 +++--- 6 files changed, 22 insertions(+), 22 deletions(-) (limited to 'test') diff --git a/test/packetimpact/tests/ipv4_id_uniqueness_test.go b/test/packetimpact/tests/ipv4_id_uniqueness_test.go index 4efb9829c..7db7ae02d 100644 --- a/test/packetimpact/tests/ipv4_id_uniqueness_test.go +++ b/test/packetimpact/tests/ipv4_id_uniqueness_test.go @@ -37,7 +37,7 @@ func recvTCPSegment(conn *testbench.TCPIPv4, expect *testbench.TCP, expectPayloa return 0, fmt.Errorf("failed to receive TCP segment: %s", err) } if len(layers) < 2 { - return 0, fmt.Errorf("got packet with layers: %v, expected to have at least 2 layers (link and network)", layers) + return 0, fmt.Errorf("got packet with layers: %s, expected to have at least 2 layers (link and network)", layers) } ipv4, ok := layers[1].(*testbench.IPv4) if !ok { diff --git a/test/packetimpact/tests/tcp_retransmits_test.go b/test/packetimpact/tests/tcp_retransmits_test.go index e51409b66..6940eb7fb 100644 --- a/test/packetimpact/tests/tcp_retransmits_test.go +++ b/test/packetimpact/tests/tcp_retransmits_test.go @@ -49,7 +49,7 @@ func TestRetransmits(t *testing.T) { dut.Send(acceptFd, sampleData, 0) if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { - t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) + t.Fatalf("expected payload was not received: %s", err) } // Give a chance for the dut to estimate RTO with RTT from the DATA-ACK. // TODO(gvisor.dev/issue/2685) Estimate RTO during handshake, after which @@ -62,13 +62,13 @@ func TestRetransmits(t *testing.T) { dut.Send(acceptFd, sampleData, 0) seq := testbench.Uint32(uint32(*conn.RemoteSeqNum())) if _, err := conn.ExpectData(&testbench.TCP{SeqNum: seq}, samplePayload, startRTO); err != nil { - t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) + t.Fatalf("expected payload was not received: %s", err) } // Expect retransmits of the same segment. for i := 0; i < 5; i++ { start := time.Now() if _, err := conn.ExpectData(&testbench.TCP{SeqNum: seq}, samplePayload, 2*current); err != nil { - t.Fatalf("expected a packet with payload %v: %s loop %d", samplePayload, err, i) + t.Fatalf("expected payload was not received: %s loop %d", err, i) } if i == 0 { startRTO = time.Now().Sub(first) @@ -77,7 +77,7 @@ func TestRetransmits(t *testing.T) { } // Check if the probes came at exponentially increasing intervals. if p := time.Since(start); p < current-startRTO { - t.Fatalf("retransmit came sooner interval %d probe %d\n", p, i) + t.Fatalf("retransmit came sooner interval %d probe %d", p, i) } current *= 2 } diff --git a/test/packetimpact/tests/tcp_window_shrink_test.go b/test/packetimpact/tests/tcp_window_shrink_test.go index 576ec1a8b..e78d04756 100644 --- a/test/packetimpact/tests/tcp_window_shrink_test.go +++ b/test/packetimpact/tests/tcp_window_shrink_test.go @@ -47,17 +47,17 @@ func TestWindowShrink(t *testing.T) { dut.Send(acceptFd, sampleData, 0) if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { - t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) + t.Fatalf("expected payload was not received: %s", err) } conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) dut.Send(acceptFd, sampleData, 0) dut.Send(acceptFd, sampleData, 0) if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { - t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) + t.Fatalf("expected payload was not received: %s", err) } if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { - t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) + t.Fatalf("expected payload was not received: %s", err) } // We close our receiving window here conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)}) @@ -68,6 +68,6 @@ func TestWindowShrink(t *testing.T) { // the following lines. expectedRemoteSeqNum := *conn.RemoteSeqNum() - 1 if _, err := conn.ExpectData(&testbench.TCP{SeqNum: testbench.Uint32(uint32(expectedRemoteSeqNum))}, nil, time.Second); err != nil { - t.Fatalf("expected a packet with sequence number %v: %s", expectedRemoteSeqNum, err) + t.Fatalf("expected a packet with sequence number %d: %s", expectedRemoteSeqNum, err) } } diff --git a/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go b/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go index 54cee138f..5ab193181 100644 --- a/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go +++ b/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go @@ -50,11 +50,11 @@ func TestZeroWindowProbeRetransmit(t *testing.T) { // Send and receive sample data to the dut. dut.Send(acceptFd, sampleData, 0) if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { - t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) + t.Fatalf("expected payload was not received: %s", err) } conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload) if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil { - t.Fatalf("expected a packet with sequence number %s", err) + t.Fatalf("expected packet was not received: %s", err) } // Check for the dut to keep the connection alive as long as the zero window @@ -80,7 +80,7 @@ func TestZeroWindowProbeRetransmit(t *testing.T) { // first retransmission time. The retransmission times is supposed to // exponentially increase. if _, err := conn.ExpectData(&testbench.TCP{SeqNum: probeSeq}, nil, 2*current); err != nil { - t.Fatalf("expected a probe with sequence number %v: loop %d", probeSeq, i) + t.Fatalf("expected a probe with sequence number %d: loop %d", probeSeq, i) } if i == 0 { startProbeDuration = time.Now().Sub(first) @@ -100,6 +100,6 @@ func TestZeroWindowProbeRetransmit(t *testing.T) { // Expect the dut to recover and transmit data. if _, err := conn.ExpectData(&testbench. TCP{SeqNum: ackProbe}, samplePayload, time.Second); err != nil { - t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) + t.Fatalf("expected payload was not received: %s", err) } } diff --git a/test/packetimpact/tests/tcp_zero_window_probe_test.go b/test/packetimpact/tests/tcp_zero_window_probe_test.go index c9b3b7af2..649fd5699 100644 --- a/test/packetimpact/tests/tcp_zero_window_probe_test.go +++ b/test/packetimpact/tests/tcp_zero_window_probe_test.go @@ -51,12 +51,12 @@ func TestZeroWindowProbe(t *testing.T) { // Send and receive sample data to the dut. dut.Send(acceptFd, sampleData, 0) if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { - t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) + t.Fatalf("expected payload was not received: %s", err) } sendTime := time.Now().Sub(start) conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload) if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil { - t.Fatalf("expected a packet with sequence number %s", err) + t.Fatalf("expected packet was not received: %s", err) } // Test 1: Check for receive of a zero window probe, record the duration for @@ -73,7 +73,7 @@ func TestZeroWindowProbe(t *testing.T) { // Expect there are no zero-window probes sent until there is data to be sent out // from the dut. if _, err := conn.ExpectData(&testbench.TCP{SeqNum: probeSeq}, nil, 2*time.Second); err == nil { - t.Fatalf("unexpected a packet with sequence number %v: %s", probeSeq, err) + t.Fatalf("unexpected packet with sequence number %d: %s", probeSeq, err) } start = time.Now() @@ -81,13 +81,13 @@ func TestZeroWindowProbe(t *testing.T) { dut.Send(acceptFd, sampleData, 0) // Expect zero-window probe from the dut. if _, err := conn.ExpectData(&testbench.TCP{SeqNum: probeSeq}, nil, time.Second); err != nil { - t.Fatalf("expected a packet with sequence number %v: %s", probeSeq, err) + t.Fatalf("expected a packet with sequence number %d: %s", probeSeq, err) } // Expect the probe to be sent after some time. Compare against the previous // time recorded when the dut immediately sends out data on receiving the // send command. if startProbeDuration := time.Now().Sub(start); startProbeDuration <= sendTime { - t.Fatalf("expected the first probe to be sent out after retransmission interval, got %v want > %v\n", startProbeDuration, sendTime) + t.Fatalf("expected the first probe to be sent out after retransmission interval, got %s want > %s", startProbeDuration, sendTime) } // Test 2: Check if the dut recovers on advertizing non-zero receive window. @@ -97,7 +97,7 @@ func TestZeroWindowProbe(t *testing.T) { conn.Send(testbench.TCP{AckNum: ackProbe, Flags: testbench.Uint8(header.TCPFlagAck)}) // Expect the dut to recover and transmit data. if _, err := conn.ExpectData(&testbench.TCP{SeqNum: ackProbe}, samplePayload, time.Second); err != nil { - t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) + t.Fatalf("expected payload was not received: %s", err) } // Test 3: Sanity check for dut's processing of a similar probe it sent. diff --git a/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go b/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go index 749281d9d..3c467b14f 100644 --- a/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go +++ b/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go @@ -50,11 +50,11 @@ func TestZeroWindowProbeUserTimeout(t *testing.T) { // Send and receive sample data to the dut. dut.Send(acceptFd, sampleData, 0) if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { - t.Fatalf("expected a packet with payload %v: %s", samplePayload, err) + t.Fatalf("expected payload was not received: %s", err) } conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload) if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil { - t.Fatalf("expected a packet with sequence number %s", err) + t.Fatalf("expected packet was not received: %s", err) } // Test 1: Check for receive of a zero window probe, record the duration for @@ -70,7 +70,7 @@ func TestZeroWindowProbeUserTimeout(t *testing.T) { dut.Send(acceptFd, sampleData, 0) // Expect zero-window probe from the dut. if _, err := conn.ExpectData(&testbench.TCP{SeqNum: probeSeq}, nil, time.Second); err != nil { - t.Fatalf("expected a packet with sequence number %v: %s", probeSeq, err) + t.Fatalf("expected a packet with sequence number %d: %s", probeSeq, err) } // Record the duration for first probe, the dut sends the zero window probe after // a retransmission time interval. -- cgit v1.2.3 From 6ec9d60403fdf7a33072eaa023e62bfd56ed9f5c Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Fri, 12 Jun 2020 11:56:43 -0700 Subject: vfs2: implement fcntl(fd, F_SETFL, flags) PiperOrigin-RevId: 316148074 --- pkg/sentry/kernel/fd_table.go | 23 +++++++++++++++++++++++ pkg/sentry/syscalls/linux/sys_file.go | 4 ++-- pkg/sentry/syscalls/linux/vfs2/fd.go | 4 ++-- test/syscalls/linux/fcntl.cc | 9 +++++++++ 4 files changed, 36 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go index b35afafe3..48911240f 100644 --- a/pkg/sentry/kernel/fd_table.go +++ b/pkg/sentry/kernel/fd_table.go @@ -458,6 +458,29 @@ func (f *FDTable) SetFlags(fd int32, flags FDFlags) error { return nil } +// SetFlagsVFS2 sets the flags for the given file descriptor. +// +// True is returned iff flags were changed. +func (f *FDTable) SetFlagsVFS2(fd int32, flags FDFlags) error { + if fd < 0 { + // Don't accept negative FDs. + return syscall.EBADF + } + + f.mu.Lock() + defer f.mu.Unlock() + + file, _, _ := f.getVFS2(fd) + if file == nil { + // No file found. + return syscall.EBADF + } + + // Update the flags. + f.setVFS2(fd, file, flags) + return nil +} + // Get returns a reference to the file and the flags for the FD or nil if no // file is defined for the given fd. // diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index 8347617bd..696e1c8d3 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -935,10 +935,10 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return uintptr(flags.ToLinuxFDFlags()), nil, nil case linux.F_SETFD: flags := args[2].Uint() - t.FDTable().SetFlags(fd, kernel.FDFlags{ + err := t.FDTable().SetFlags(fd, kernel.FDFlags{ CloseOnExec: flags&linux.FD_CLOEXEC != 0, }) - return 0, nil, nil + return 0, nil, err case linux.F_GETFL: return uintptr(file.Flags().ToLinux()), nil, nil case linux.F_SETFL: diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go index 6006758a5..f9ccb303c 100644 --- a/pkg/sentry/syscalls/linux/vfs2/fd.go +++ b/pkg/sentry/syscalls/linux/vfs2/fd.go @@ -134,10 +134,10 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return uintptr(flags.ToLinuxFDFlags()), nil, nil case linux.F_SETFD: flags := args[2].Uint() - t.FDTable().SetFlags(fd, kernel.FDFlags{ + err := t.FDTable().SetFlagsVFS2(fd, kernel.FDFlags{ CloseOnExec: flags&linux.FD_CLOEXEC != 0, }) - return 0, nil, nil + return 0, nil, err case linux.F_GETFL: return uintptr(file.StatusFlags()), nil, nil case linux.F_SETFL: diff --git a/test/syscalls/linux/fcntl.cc b/test/syscalls/linux/fcntl.cc index c7cc5816e..35e8a4ff3 100644 --- a/test/syscalls/linux/fcntl.cc +++ b/test/syscalls/linux/fcntl.cc @@ -115,6 +115,15 @@ PosixErrorOr SubprocessLock(std::string const& path, bool for_write, return std::move(cleanup); } +TEST(FcntlTest, SetCloExecBadFD) { + // Open an eventfd file descriptor with FD_CLOEXEC descriptor flag not set. + FileDescriptor f = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, 0)); + auto fd = f.get(); + f.reset(); + ASSERT_THAT(fcntl(fd, F_GETFD), SyscallFailsWithErrno(EBADF)); + ASSERT_THAT(fcntl(fd, F_SETFD, FD_CLOEXEC), SyscallFailsWithErrno(EBADF)); +} + TEST(FcntlTest, SetCloExec) { // Open an eventfd file descriptor with FD_CLOEXEC descriptor flag not set. FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, 0)); -- cgit v1.2.3 From 82313667ea4bd56d64dd1f7898aa80942684ca2c Mon Sep 17 00:00:00 2001 From: Tony Gong Date: Fri, 12 Jun 2020 15:06:01 -0700 Subject: Make GenerateRandomPayload available to all tests Moved the function for generating a payload of random byets of a specified length into the testbench package so that it's availbale for all tests to use. Added a test case to the IPv4 ID uniqueness test which uses a payload length of 512 bytes. This test case passes for gVisor currently, whereas the test case with a small payload of 11 bytes fails because gVisor only assigns the ID field if the IP payload is sufficiently large. PiperOrigin-RevId: 316185097 --- test/packetimpact/testbench/testbench.go | 13 +++ test/packetimpact/tests/ipv4_id_uniqueness_test.go | 97 ++++++++++++---------- .../packetimpact/tests/udp_send_recv_dgram_test.go | 34 +++----- 3 files changed, 79 insertions(+), 65 deletions(-) (limited to 'test') diff --git a/test/packetimpact/testbench/testbench.go b/test/packetimpact/testbench/testbench.go index 82eda6565..d64f32a5b 100644 --- a/test/packetimpact/testbench/testbench.go +++ b/test/packetimpact/testbench/testbench.go @@ -17,8 +17,10 @@ package testbench import ( "flag" "fmt" + "math/rand" "net" "os/exec" + "testing" "time" "gvisor.dev/gvisor/test/packetimpact/netdevs" @@ -91,3 +93,14 @@ func genPseudoFlags() error { return nil } + +// GenerateRandomPayload generates a random byte slice of the specified length, +// causing a fatal test failure if it is unable to do so. +func GenerateRandomPayload(t *testing.T, n int) []byte { + t.Helper() + buf := make([]byte, n) + if _, err := rand.Read(buf); err != nil { + t.Fatalf("rand.Read(buf) failed: %s", err) + } + return buf +} diff --git a/test/packetimpact/tests/ipv4_id_uniqueness_test.go b/test/packetimpact/tests/ipv4_id_uniqueness_test.go index 7db7ae02d..70f6df5e0 100644 --- a/test/packetimpact/tests/ipv4_id_uniqueness_test.go +++ b/test/packetimpact/tests/ipv4_id_uniqueness_test.go @@ -37,7 +37,7 @@ func recvTCPSegment(conn *testbench.TCPIPv4, expect *testbench.TCP, expectPayloa return 0, fmt.Errorf("failed to receive TCP segment: %s", err) } if len(layers) < 2 { - return 0, fmt.Errorf("got packet with layers: %s, expected to have at least 2 layers (link and network)", layers) + return 0, fmt.Errorf("got packet with layers: %v, expected to have at least 2 layers (link and network)", layers) } ipv4, ok := layers[1].(*testbench.IPv4) if !ok { @@ -56,56 +56,67 @@ func recvTCPSegment(conn *testbench.TCPIPv4, expect *testbench.TCP, expectPayloa // to force the DF bit to be 0, and checks that a retransmitted segment has a // different IPv4 Identification value than the original segment. func TestIPv4RetransmitIdentificationUniqueness(t *testing.T) { - dut := testbench.NewDUT(t) - defer dut.TearDown() + for _, tc := range []struct { + name string + payload []byte + }{ + {"SmallPayload", []byte("sample data")}, + // 512 bytes is chosen because sending more than this in a single segment + // causes the retransmission to send less than the original amount. + {"512BytePayload", testbench.GenerateRandomPayload(t, 512)}, + } { + t.Run(tc.name, func(t *testing.T) { + dut := testbench.NewDUT(t) + defer dut.TearDown() - listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) - defer dut.Close(listenFD) + listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(listenFD) - conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) - defer conn.Close() + conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) + defer conn.Close() - conn.Connect() - remoteFD, _ := dut.Accept(listenFD) - defer dut.Close(remoteFD) + conn.Connect() + remoteFD, _ := dut.Accept(listenFD) + defer dut.Close(remoteFD) - dut.SetSockOptInt(remoteFD, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) + dut.SetSockOptInt(remoteFD, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) - // TODO(b/129291778) The following socket option clears the DF bit on - // IP packets sent over the socket, and is currently not supported by - // gVisor. gVisor by default sends packets with DF=0 anyway, so the - // socket option being not supported does not affect the operation of - // this test. Once the socket option is supported, the following call - // can be changed to simply assert success. - ret, errno := dut.SetSockOptIntWithErrno(context.Background(), remoteFD, unix.IPPROTO_IP, linux.IP_MTU_DISCOVER, linux.IP_PMTUDISC_DONT) - if ret == -1 && errno != unix.ENOTSUP { - t.Fatalf("failed to set IP_MTU_DISCOVER socket option to IP_PMTUDISC_DONT: %s", errno) - } + // TODO(b/129291778) The following socket option clears the DF bit on + // IP packets sent over the socket, and is currently not supported by + // gVisor. gVisor by default sends packets with DF=0 anyway, so the + // socket option being not supported does not affect the operation of + // this test. Once the socket option is supported, the following call + // can be changed to simply assert success. + ret, errno := dut.SetSockOptIntWithErrno(context.Background(), remoteFD, unix.IPPROTO_IP, linux.IP_MTU_DISCOVER, linux.IP_PMTUDISC_DONT) + if ret == -1 && errno != unix.ENOTSUP { + t.Fatalf("failed to set IP_MTU_DISCOVER socket option to IP_PMTUDISC_DONT: %s", errno) + } - sampleData := []byte("Sample Data") - samplePayload := &testbench.Payload{Bytes: sampleData} + samplePayload := &testbench.Payload{Bytes: tc.payload} - dut.Send(remoteFD, sampleData, 0) - if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { - t.Fatalf("failed to receive TCP segment sent for RTT calculation: %s", err) - } - // Let the DUT estimate RTO with RTT from the DATA-ACK. - // TODO(gvisor.dev/issue/2685) Estimate RTO during handshake, after which - // we can skip sending this ACK. - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) + dut.Send(remoteFD, tc.payload, 0) + if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { + t.Fatalf("failed to receive TCP segment sent for RTT calculation: %s", err) + } + // Let the DUT estimate RTO with RTT from the DATA-ACK. + // TODO(gvisor.dev/issue/2685) Estimate RTO during handshake, after which + // we can skip sending this ACK. + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) - expectTCP := &testbench.TCP{SeqNum: testbench.Uint32(uint32(*conn.RemoteSeqNum()))} - dut.Send(remoteFD, sampleData, 0) - originalID, err := recvTCPSegment(&conn, expectTCP, samplePayload) - if err != nil { - t.Fatalf("failed to receive TCP segment: %s", err) - } + dut.Send(remoteFD, tc.payload, 0) + expectTCP := &testbench.TCP{SeqNum: testbench.Uint32(uint32(*conn.RemoteSeqNum()))} + originalID, err := recvTCPSegment(&conn, expectTCP, samplePayload) + if err != nil { + t.Fatalf("failed to receive TCP segment: %s", err) + } - retransmitID, err := recvTCPSegment(&conn, expectTCP, samplePayload) - if err != nil { - t.Fatalf("failed to receive retransmitted TCP segment: %s", err) - } - if originalID == retransmitID { - t.Fatalf("unexpectedly got retransmitted TCP segment with same IPv4 ID field=%d", originalID) + retransmitID, err := recvTCPSegment(&conn, expectTCP, samplePayload) + if err != nil { + t.Fatalf("failed to receive retransmitted TCP segment: %s", err) + } + if originalID == retransmitID { + t.Fatalf("unexpectedly got retransmitted TCP segment with same IPv4 ID field=%d", originalID) + } + }) } } diff --git a/test/packetimpact/tests/udp_send_recv_dgram_test.go b/test/packetimpact/tests/udp_send_recv_dgram_test.go index a7db384ad..224feef85 100644 --- a/test/packetimpact/tests/udp_send_recv_dgram_test.go +++ b/test/packetimpact/tests/udp_send_recv_dgram_test.go @@ -16,7 +16,6 @@ package udp_send_recv_dgram_test import ( "flag" - "math/rand" "net" "testing" "time" @@ -29,15 +28,6 @@ func init() { testbench.RegisterFlags(flag.CommandLine) } -func generateRandomPayload(t *testing.T, n int) string { - t.Helper() - buf := make([]byte, n) - if _, err := rand.Read(buf); err != nil { - t.Fatalf("rand.Read(buf) failed: %s", err) - } - return string(buf) -} - func TestUDPRecv(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() @@ -48,19 +38,19 @@ func TestUDPRecv(t *testing.T) { testCases := []struct { name string - payload string + payload []byte }{ - {"emptypayload", ""}, - {"small payload", "hello world"}, - {"1kPayload", generateRandomPayload(t, 1<<10)}, + {"emptypayload", nil}, + {"small payload", []byte("hello world")}, + {"1kPayload", testbench.GenerateRandomPayload(t, 1<<10)}, // Even though UDP allows larger dgrams we don't test it here as // they need to be fragmented and written out as individual // frames. } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - conn.Send(testbench.UDP{}, &testbench.Payload{Bytes: []byte(tc.payload)}) - if got, want := string(dut.Recv(boundFD, int32(len(tc.payload)), 0)), tc.payload; got != want { + conn.Send(testbench.UDP{}, &testbench.Payload{Bytes: tc.payload}) + if got, want := string(dut.Recv(boundFD, int32(len(tc.payload)), 0)), string(tc.payload); got != want { t.Fatalf("received payload does not match sent payload got: %s, want: %s", got, want) } }) @@ -77,11 +67,11 @@ func TestUDPSend(t *testing.T) { testCases := []struct { name string - payload string + payload []byte }{ - {"emptypayload", ""}, - {"small payload", "hello world"}, - {"1kPayload", generateRandomPayload(t, 1<<10)}, + {"emptypayload", nil}, + {"small payload", []byte("hello world")}, + {"1kPayload", testbench.GenerateRandomPayload(t, 1<<10)}, // Even though UDP allows larger dgrams we don't test it here as // they need to be fragmented and written out as individual // frames. @@ -89,10 +79,10 @@ func TestUDPSend(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { conn.Drain() - if got, want := int(dut.SendTo(boundFD, []byte(tc.payload), 0, conn.LocalAddr())), len(tc.payload); got != want { + if got, want := int(dut.SendTo(boundFD, tc.payload, 0, conn.LocalAddr())), len(tc.payload); got != want { t.Fatalf("short write got: %d, want: %d", got, want) } - if _, err := conn.ExpectData(testbench.UDP{SrcPort: &remotePort}, testbench.Payload{Bytes: []byte(tc.payload)}, 1*time.Second); err != nil { + if _, err := conn.ExpectData(testbench.UDP{SrcPort: &remotePort}, testbench.Payload{Bytes: tc.payload}, 1*time.Second); err != nil { t.Fatal(err) } }) -- cgit v1.2.3 From 3b5eaad3c8b49367d1812dbaf4182fc7282b7f00 Mon Sep 17 00:00:00 2001 From: Ian Gudger Date: Fri, 12 Jun 2020 20:47:28 -0700 Subject: Allow reading IP_MULTICAST_LOOP and IP_MULTICAST_TTL on TCP sockets. I am not really sure what the point of this is, but someone filed a bug about it, so I assume something relies on it. PiperOrigin-RevId: 316225127 --- pkg/tcpip/transport/tcp/endpoint.go | 6 ++++++ test/syscalls/linux/socket_ip_tcp_generic.cc | 24 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'test') diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 6e4d607da..f225b00e7 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -1855,6 +1855,9 @@ func (e *endpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) { return v, nil + case tcpip.MulticastLoopOption: + return true, nil + default: return false, tcpip.ErrUnknownProtocolOption } @@ -1922,6 +1925,9 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { e.UnlockUser() return v, nil + case tcpip.MulticastTTLOption: + return 1, nil + default: return -1, tcpip.ErrUnknownProtocolOption } diff --git a/test/syscalls/linux/socket_ip_tcp_generic.cc b/test/syscalls/linux/socket_ip_tcp_generic.cc index 15adc8d0e..c2ecb639f 100644 --- a/test/syscalls/linux/socket_ip_tcp_generic.cc +++ b/test/syscalls/linux/socket_ip_tcp_generic.cc @@ -994,6 +994,30 @@ TEST_P(TCPSocketPairTest, SetTCPWindowClampBelowMinRcvBufConnectedSocket) { } } +TEST_P(TCPSocketPairTest, IpMulticastTtlDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_TTL, + &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_GT(get, 0); +} + +TEST_P(TCPSocketPairTest, IpMulticastLoopDefault) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + + int get = -1; + socklen_t get_len = sizeof(get); + EXPECT_THAT(getsockopt(sockets->first_fd(), IPPROTO_IP, IP_MULTICAST_LOOP, + &get, &get_len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(get_len, sizeof(get)); + EXPECT_EQ(get, 1); +} + TEST_P(TCPSocketPairTest, TCPResetDuringClose_NoRandomSave) { DisableSave ds; // Too many syscalls. constexpr int kThreadCount = 1000; -- cgit v1.2.3 From f23f62c2c2bcfb3196b67e64b4a73f820f150caa Mon Sep 17 00:00:00 2001 From: Rahat Mahmood Date: Mon, 15 Jun 2020 01:08:00 -0700 Subject: Correctly set the test VFS environment variable. Also fix test bugs uncovered now that they aren't silently skipped on VFS2. Updates #1487. PiperOrigin-RevId: 316415807 --- test/runner/runner.go | 8 +++-- test/syscalls/BUILD | 5 +++ test/syscalls/linux/BUILD | 17 ++++++++- test/syscalls/linux/socket.cc | 37 ++++--------------- test/syscalls/linux/socket_capability.cc | 61 ++++++++++++++++++++++++++++++++ 5 files changed, 94 insertions(+), 34 deletions(-) create mode 100644 test/syscalls/linux/socket_capability.cc (limited to 'test') diff --git a/test/runner/runner.go b/test/runner/runner.go index 332f1df89..f8baf61b0 100644 --- a/test/runner/runner.go +++ b/test/runner/runner.go @@ -352,11 +352,15 @@ func runTestCaseRunsc(testBin string, tc gtest.TestCase, t *testing.T) { // Set environment variables that indicate we are running in gVisor with // the given platform, network, and filesystem stack. - // TODO(gvisor.dev/issue/1487): Update this when the runner supports VFS2. platformVar := "TEST_ON_GVISOR" networkVar := "GVISOR_NETWORK" + env := append(os.Environ(), platformVar+"="+*platform, networkVar+"="+*network) vfsVar := "GVISOR_VFS" - env := append(os.Environ(), platformVar+"="+*platform, networkVar+"="+*network, vfsVar+"=VFS1") + if *vfs2 { + env = append(env, vfsVar+"=VFS2") + } else { + env = append(env, vfsVar+"=VFS1") + } // Remove env variables that cause the gunit binary to write output // files, since they will stomp on eachother, and on the output files diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index d68afbe44..6d80572e7 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -68,6 +68,11 @@ syscall_test( vfs2 = "True", ) +syscall_test( + test = "//test/syscalls/linux:socket_capability_test", + vfs2 = "True", +) + syscall_test( size = "large", shard_count = 50, diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 6d051f48b..96044928e 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -363,14 +363,29 @@ cc_binary( linkstatic = 1, deps = [ ":socket_test_util", - "//test/util:file_descriptor", gtest, + "//test/util:file_descriptor", "//test/util:temp_umask", "//test/util:test_main", "//test/util:test_util", ], ) +cc_binary( + name = "socket_capability_test", + testonly = 1, + srcs = ["socket_capability.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + "//test/util:capability_util", + "//test/util:file_descriptor", + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + cc_binary( name = "brk_test", testonly = 1, diff --git a/test/syscalls/linux/socket.cc b/test/syscalls/linux/socket.cc index e0a4d0985..c20cd3fcc 100644 --- a/test/syscalls/linux/socket.cc +++ b/test/syscalls/linux/socket.cc @@ -86,37 +86,12 @@ TEST(SocketTest, UnixSocketStat) { EXPECT_EQ(statbuf.st_mode, S_IFSOCK | sock_perm & ~mask); // Timestamps should be equal and non-zero. - EXPECT_NE(statbuf.st_atime, 0); - EXPECT_EQ(statbuf.st_atime, statbuf.st_mtime); - EXPECT_EQ(statbuf.st_atime, statbuf.st_ctime); -} - -TEST(SocketTest, UnixConnectNeedsWritePerm) { - SKIP_IF(IsRunningWithVFS1()); - - FileDescriptor bound = - ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_STREAM, PF_UNIX)); - - struct sockaddr_un addr = - ASSERT_NO_ERRNO_AND_VALUE(UniqueUnixAddr(/*abstract=*/false, AF_UNIX)); - ASSERT_THAT(bind(bound.get(), reinterpret_cast(&addr), - sizeof(addr)), - SyscallSucceeds()); - ASSERT_THAT(listen(bound.get(), 1), SyscallSucceeds()); - - // Connect should fail without write perms. - ASSERT_THAT(chmod(addr.sun_path, 0500), SyscallSucceeds()); - FileDescriptor client = - ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_STREAM, PF_UNIX)); - EXPECT_THAT(connect(client.get(), reinterpret_cast(&addr), - sizeof(addr)), - SyscallFailsWithErrno(EACCES)); - - // Connect should succeed with write perms. - ASSERT_THAT(chmod(addr.sun_path, 0200), SyscallSucceeds()); - EXPECT_THAT(connect(client.get(), reinterpret_cast(&addr), - sizeof(addr)), - SyscallSucceeds()); + // TODO(b/158882152): Sockets currently don't implement timestamps. + if (!IsRunningOnGvisor()) { + EXPECT_NE(statbuf.st_atime, 0); + EXPECT_EQ(statbuf.st_atime, statbuf.st_mtime); + EXPECT_EQ(statbuf.st_atime, statbuf.st_ctime); + } } using SocketOpenTest = ::testing::TestWithParam; diff --git a/test/syscalls/linux/socket_capability.cc b/test/syscalls/linux/socket_capability.cc new file mode 100644 index 000000000..84b5b2b21 --- /dev/null +++ b/test/syscalls/linux/socket_capability.cc @@ -0,0 +1,61 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Subset of socket tests that need Linux-specific headers (compared to POSIX +// headers). + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +TEST(SocketTest, UnixConnectNeedsWritePerm) { + SKIP_IF(IsRunningWithVFS1()); + + FileDescriptor bound = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_STREAM, PF_UNIX)); + + struct sockaddr_un addr = + ASSERT_NO_ERRNO_AND_VALUE(UniqueUnixAddr(/*abstract=*/false, AF_UNIX)); + ASSERT_THAT(bind(bound.get(), reinterpret_cast(&addr), + sizeof(addr)), + SyscallSucceeds()); + ASSERT_THAT(listen(bound.get(), 1), SyscallSucceeds()); + + // Drop capabilites that allow us to override permision checks. Otherwise if + // the test is run as root, the connect below will bypass permission checks + // and succeed unexpectedly. + ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); + + // Connect should fail without write perms. + ASSERT_THAT(chmod(addr.sun_path, 0500), SyscallSucceeds()); + FileDescriptor client = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_UNIX, SOCK_STREAM, PF_UNIX)); + ASSERT_THAT(connect(client.get(), reinterpret_cast(&addr), + sizeof(addr)), + SyscallFailsWithErrno(EACCES)); + + // Connect should succeed with write perms. + ASSERT_THAT(chmod(addr.sun_path, 0200), SyscallSucceeds()); + EXPECT_THAT(connect(client.get(), reinterpret_cast(&addr), + sizeof(addr)), + SyscallSucceeds()); +} + +} // namespace testing +} // namespace gvisor -- cgit v1.2.3 From eb6d3d7710946c39d0ffb6e0a6307e0c2f3bd135 Mon Sep 17 00:00:00 2001 From: Martijn Vels Date: Mon, 15 Jun 2020 10:25:26 -0700 Subject: Internal change. PiperOrigin-RevId: 316492839 --- test/syscalls/linux/proc_net.cc | 68 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/proc_net.cc b/test/syscalls/linux/proc_net.cc index cac394910..02a326fc3 100644 --- a/test/syscalls/linux/proc_net.cc +++ b/test/syscalls/linux/proc_net.cc @@ -20,8 +20,13 @@ #include #include +#include + #include "gtest/gtest.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" #include "absl/time/clock.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/util/capability_util.h" @@ -404,6 +409,69 @@ TEST(ProcNetSnmp, UdpIn_NoRandomSave) { EXPECT_EQ(oldInDatagrams, newInDatagrams - 1); } +TEST(ProcNetSnmp, CheckNetStat) { + // TODO(b/155123175): SNMP and netstat don't work on gVisor. + SKIP_IF(IsRunningOnGvisor()); + + std::string contents = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/netstat")); + + int name_count = 0; + int value_count = 0; + std::vector lines = absl::StrSplit(contents, '\n'); + for (int i = 0; i + 1 < lines.size(); i += 2) { + std::vector names = + absl::StrSplit(lines[i], absl::ByAnyChar("\t ")); + std::vector values = + absl::StrSplit(lines[i + 1], absl::ByAnyChar("\t ")); + EXPECT_EQ(names.size(), values.size()) << " mismatch in lines '" << lines[i] + << "' and '" << lines[i + 1] << "'"; + for (int j = 0; j < names.size() && j < values.size(); ++j) { + if (names[j] == "TCPOrigDataSent" || names[j] == "TCPSynRetrans" || + names[j] == "TCPDSACKRecv" || names[j] == "TCPDSACKOfoRecv") { + ++name_count; + int64_t val; + if (absl::SimpleAtoi(values[j], &val)) { + ++value_count; + } + } + } + } + EXPECT_EQ(name_count, 4); + EXPECT_EQ(value_count, 4); +} + +TEST(ProcNetSnmp, CheckSnmp) { + // TODO(b/155123175): SNMP and netstat don't work on gVisor. + SKIP_IF(IsRunningOnGvisor()); + + std::string contents = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/net/snmp")); + + int name_count = 0; + int value_count = 0; + std::vector lines = absl::StrSplit(contents, '\n'); + for (int i = 0; i + 1 < lines.size(); i += 2) { + std::vector names = + absl::StrSplit(lines[i], absl::ByAnyChar("\t ")); + std::vector values = + absl::StrSplit(lines[i + 1], absl::ByAnyChar("\t ")); + EXPECT_EQ(names.size(), values.size()) << " mismatch in lines '" << lines[i] + << "' and '" << lines[i + 1] << "'"; + for (int j = 0; j < names.size() && j < values.size(); ++j) { + if (names[j] == "RetransSegs") { + ++name_count; + int64_t val; + if (absl::SimpleAtoi(values[j], &val)) { + ++value_count; + } + } + } + } + EXPECT_EQ(name_count, 1); + EXPECT_EQ(value_count, 1); +} + } // namespace } // namespace testing } // namespace gvisor -- cgit v1.2.3 From 885605c5e412f9ced42766094368c86be1002cff Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Mon, 15 Jun 2020 13:34:33 -0700 Subject: Remove blacklist from //test/runtimes Updates #2972 PiperOrigin-RevId: 316534165 --- test/runtimes/BUILD | 10 +-- test/runtimes/blacklist_go1.12.csv | 16 ---- test/runtimes/blacklist_java11.csv | 126 ------------------------------- test/runtimes/blacklist_nodejs12.4.0.csv | 47 ------------ test/runtimes/blacklist_php7.3.6.csv | 29 ------- test/runtimes/blacklist_python3.7.3.csv | 27 ------- test/runtimes/defs.bzl | 22 +++--- test/runtimes/exclude_go1.12.csv | 16 ++++ test/runtimes/exclude_java11.csv | 126 +++++++++++++++++++++++++++++++ test/runtimes/exclude_nodejs12.4.0.csv | 47 ++++++++++++ test/runtimes/exclude_php7.3.6.csv | 29 +++++++ test/runtimes/exclude_python3.7.3.csv | 27 +++++++ test/runtimes/runner/BUILD | 4 +- test/runtimes/runner/blacklist_test.go | 37 --------- test/runtimes/runner/exclude_test.go | 37 +++++++++ test/runtimes/runner/main.go | 38 +++++----- 16 files changed, 319 insertions(+), 319 deletions(-) delete mode 100644 test/runtimes/blacklist_go1.12.csv delete mode 100644 test/runtimes/blacklist_java11.csv delete mode 100644 test/runtimes/blacklist_nodejs12.4.0.csv delete mode 100644 test/runtimes/blacklist_php7.3.6.csv delete mode 100644 test/runtimes/blacklist_python3.7.3.csv create mode 100644 test/runtimes/exclude_go1.12.csv create mode 100644 test/runtimes/exclude_java11.csv create mode 100644 test/runtimes/exclude_nodejs12.4.0.csv create mode 100644 test/runtimes/exclude_php7.3.6.csv create mode 100644 test/runtimes/exclude_python3.7.3.csv delete mode 100644 test/runtimes/runner/blacklist_test.go create mode 100644 test/runtimes/runner/exclude_test.go (limited to 'test') diff --git a/test/runtimes/BUILD b/test/runtimes/BUILD index 4cd627222..022de5ff7 100644 --- a/test/runtimes/BUILD +++ b/test/runtimes/BUILD @@ -4,30 +4,30 @@ package(licenses = ["notice"]) runtime_test( name = "go1.12", - blacklist_file = "blacklist_go1.12.csv", + exclude_file = "exclude_go1.12.csv", lang = "go", ) runtime_test( name = "java11", - blacklist_file = "blacklist_java11.csv", + exclude_file = "exclude_java11.csv", lang = "java", ) runtime_test( name = "nodejs12.4.0", - blacklist_file = "blacklist_nodejs12.4.0.csv", + exclude_file = "exclude_nodejs12.4.0.csv", lang = "nodejs", ) runtime_test( name = "php7.3.6", - blacklist_file = "blacklist_php7.3.6.csv", + exclude_file = "exclude_php7.3.6.csv", lang = "php", ) runtime_test( name = "python3.7.3", - blacklist_file = "blacklist_python3.7.3.csv", + exclude_file = "exclude_python3.7.3.csv", lang = "python", ) diff --git a/test/runtimes/blacklist_go1.12.csv b/test/runtimes/blacklist_go1.12.csv deleted file mode 100644 index 8c8ae0c5d..000000000 --- a/test/runtimes/blacklist_go1.12.csv +++ /dev/null @@ -1,16 +0,0 @@ -test name,bug id,comment -cgo_errors,,FLAKY -cgo_test,,FLAKY -go_test:cmd/go,,FLAKY -go_test:cmd/vendor/golang.org/x/sys/unix,b/118783622,/dev devices missing -go_test:net,b/118784196,socket: invalid argument. Works as intended: see bug. -go_test:os,b/118780122,we have a pollable filesystem but that's a surprise -go_test:os/signal,b/118780860,/dev/pts not properly supported -go_test:runtime,b/118782341,sigtrap not reported or caught or something -go_test:syscall,b/118781998,bad bytes -- bad mem addr -race,b/118782931,thread sanitizer. Works as intended: b/62219744. -runtime:cpu124,b/118778254,segmentation fault -test:0_1,,FLAKY -testasan,, -testcarchive,b/118782924,no sigpipe -testshared,,FLAKY diff --git a/test/runtimes/blacklist_java11.csv b/test/runtimes/blacklist_java11.csv deleted file mode 100644 index c012e5a56..000000000 --- a/test/runtimes/blacklist_java11.csv +++ /dev/null @@ -1,126 +0,0 @@ -test name,bug id,comment -com/sun/crypto/provider/Cipher/PBE/PKCS12Cipher.java,,Fails in Docker -com/sun/jdi/NashornPopFrameTest.java,, -com/sun/jdi/ProcessAttachTest.java,, -com/sun/management/HotSpotDiagnosticMXBean/CheckOrigin.java,,Fails in Docker -com/sun/management/OperatingSystemMXBean/GetCommittedVirtualMemorySize.java,, -com/sun/management/UnixOperatingSystemMXBean/GetMaxFileDescriptorCount.sh,, -com/sun/tools/attach/AttachSelf.java,, -com/sun/tools/attach/BasicTests.java,, -com/sun/tools/attach/PermissionTest.java,, -com/sun/tools/attach/StartManagementAgent.java,, -com/sun/tools/attach/TempDirTest.java,, -com/sun/tools/attach/modules/Driver.java,, -java/lang/Character/CheckScript.java,,Fails in Docker -java/lang/Character/CheckUnicode.java,,Fails in Docker -java/lang/Class/GetPackageBootLoaderChildLayer.java,, -java/lang/ClassLoader/nativeLibrary/NativeLibraryTest.java,,Fails in Docker -java/lang/String/nativeEncoding/StringPlatformChars.java,, -java/net/DatagramSocket/ReuseAddressTest.java,, -java/net/DatagramSocket/SendDatagramToBadAddress.java,b/78473345, -java/net/Inet4Address/PingThis.java,, -java/net/InterfaceAddress/NetworkPrefixLength.java,b/78507103, -java/net/MulticastSocket/MulticastTTL.java,, -java/net/MulticastSocket/Promiscuous.java,, -java/net/MulticastSocket/SetLoopbackMode.java,, -java/net/MulticastSocket/SetTTLAndGetTTL.java,, -java/net/MulticastSocket/Test.java,, -java/net/MulticastSocket/TestDefaults.java,, -java/net/MulticastSocket/TimeToLive.java,, -java/net/NetworkInterface/NetworkInterfaceStreamTest.java,, -java/net/Socket/SetSoLinger.java,b/78527327,SO_LINGER is not yet supported -java/net/Socket/TrafficClass.java,b/78527818,Not supported on gVisor -java/net/Socket/UrgentDataTest.java,b/111515323, -java/net/Socket/setReuseAddress/Basic.java,b/78519214,SO_REUSEADDR enabled by default -java/net/SocketOption/OptionsTest.java,,Fails in Docker -java/net/SocketOption/TcpKeepAliveTest.java,, -java/net/SocketPermission/SocketPermissionTest.java,, -java/net/URLConnection/6212146/TestDriver.java,,Fails in Docker -java/net/httpclient/RequestBuilderTest.java,,Fails in Docker -java/net/httpclient/ShortResponseBody.java,, -java/net/httpclient/ShortResponseBodyWithRetry.java,, -java/nio/channels/AsyncCloseAndInterrupt.java,, -java/nio/channels/AsynchronousServerSocketChannel/Basic.java,, -java/nio/channels/AsynchronousSocketChannel/Basic.java,b/77921528,SO_KEEPALIVE is not settable -java/nio/channels/DatagramChannel/BasicMulticastTests.java,, -java/nio/channels/DatagramChannel/SocketOptionTests.java,,Fails in Docker -java/nio/channels/DatagramChannel/UseDGWithIPv6.java,, -java/nio/channels/FileChannel/directio/DirectIOTest.java,,Fails in Docker -java/nio/channels/Selector/OutOfBand.java,, -java/nio/channels/Selector/SelectWithConsumer.java,,Flaky -java/nio/channels/ServerSocketChannel/SocketOptionTests.java,, -java/nio/channels/SocketChannel/LingerOnClose.java,, -java/nio/channels/SocketChannel/SocketOptionTests.java,b/77965901, -java/nio/channels/spi/SelectorProvider/inheritedChannel/InheritedChannelTest.java,,Fails in Docker -java/rmi/activation/Activatable/extLoadedImpl/ext.sh,, -java/rmi/transport/checkLeaseInfoLeak/CheckLeaseLeak.java,, -java/text/Format/NumberFormat/CurrencyFormat.java,,Fails in Docker -java/text/Format/NumberFormat/CurrencyFormat.java,,Fails in Docker -java/util/Calendar/JapaneseEraNameTest.java,, -java/util/Currency/CurrencyTest.java,,Fails in Docker -java/util/Currency/ValidateISO4217.java,,Fails in Docker -java/util/Locale/LSRDataTest.java,, -java/util/concurrent/locks/Lock/TimedAcquireLeak.java,, -java/util/jar/JarFile/mrjar/MultiReleaseJarAPI.java,,Fails in Docker -java/util/logging/LogManager/Configuration/updateConfiguration/SimpleUpdateConfigWithInputStreamTest.java,, -java/util/logging/TestLoggerWeakRefLeak.java,, -javax/imageio/AppletResourceTest.java,, -javax/management/security/HashedPasswordFileTest.java,, -javax/net/ssl/SSLSession/JSSERenegotiate.java,,Fails in Docker -javax/sound/sampled/AudioInputStream/FrameLengthAfterConversion.java,, -jdk/jfr/event/runtime/TestNetworkUtilizationEvent.java,, -jdk/jfr/event/runtime/TestThreadParkEvent.java,, -jdk/jfr/event/sampling/TestNative.java,, -jdk/jfr/jcmd/TestJcmdChangeLogLevel.java,, -jdk/jfr/jcmd/TestJcmdConfigure.java,, -jdk/jfr/jcmd/TestJcmdDump.java,, -jdk/jfr/jcmd/TestJcmdDumpGeneratedFilename.java,, -jdk/jfr/jcmd/TestJcmdDumpLimited.java,, -jdk/jfr/jcmd/TestJcmdDumpPathToGCRoots.java,, -jdk/jfr/jcmd/TestJcmdLegacy.java,, -jdk/jfr/jcmd/TestJcmdSaveToFile.java,, -jdk/jfr/jcmd/TestJcmdStartDirNotExist.java,, -jdk/jfr/jcmd/TestJcmdStartInvaldFile.java,, -jdk/jfr/jcmd/TestJcmdStartPathToGCRoots.java,, -jdk/jfr/jcmd/TestJcmdStartStopDefault.java,, -jdk/jfr/jcmd/TestJcmdStartWithOptions.java,, -jdk/jfr/jcmd/TestJcmdStartWithSettings.java,, -jdk/jfr/jcmd/TestJcmdStopInvalidFile.java,, -jdk/jfr/jvm/TestJfrJavaBase.java,, -jdk/jfr/startupargs/TestStartRecording.java,, -jdk/modules/incubator/ImageModules.java,, -jdk/net/Sockets/ExtOptionTest.java,, -jdk/net/Sockets/QuickAckTest.java,, -lib/security/cacerts/VerifyCACerts.java,, -sun/management/jmxremote/bootstrap/CustomLauncherTest.java,, -sun/management/jmxremote/bootstrap/JvmstatCountersTest.java,, -sun/management/jmxremote/bootstrap/LocalManagementTest.java,, -sun/management/jmxremote/bootstrap/RmiRegistrySslTest.java,, -sun/management/jmxremote/bootstrap/RmiSslBootstrapTest.sh,, -sun/management/jmxremote/startstop/JMXStartStopTest.java,, -sun/management/jmxremote/startstop/JMXStatusPerfCountersTest.java,, -sun/management/jmxremote/startstop/JMXStatusTest.java,, -sun/text/resources/LocaleDataTest.java,, -sun/tools/jcmd/TestJcmdSanity.java,, -sun/tools/jhsdb/AlternateHashingTest.java,, -sun/tools/jhsdb/BasicLauncherTest.java,, -sun/tools/jhsdb/HeapDumpTest.java,, -sun/tools/jhsdb/heapconfig/JMapHeapConfigTest.java,, -sun/tools/jinfo/BasicJInfoTest.java,, -sun/tools/jinfo/JInfoTest.java,, -sun/tools/jmap/BasicJMapTest.java,, -sun/tools/jstack/BasicJStackTest.java,, -sun/tools/jstack/DeadlockDetectionTest.java,, -sun/tools/jstatd/TestJstatdExternalRegistry.java,, -sun/tools/jstatd/TestJstatdPort.java,,Flaky -sun/tools/jstatd/TestJstatdPortAndServer.java,,Flaky -sun/util/calendar/zi/TestZoneInfo310.java,, -tools/jar/modularJar/Basic.java,, -tools/jar/multiRelease/Basic.java,, -tools/jimage/JImageExtractTest.java,, -tools/jimage/JImageTest.java,, -tools/jlink/JLinkTest.java,, -tools/jlink/plugins/IncludeLocalesPluginTest.java,, -tools/jmod/hashes/HashesTest.java,, -tools/launcher/BigJar.java,b/111611473, -tools/launcher/modules/patch/systemmodules/PatchSystemModules.java,, diff --git a/test/runtimes/blacklist_nodejs12.4.0.csv b/test/runtimes/blacklist_nodejs12.4.0.csv deleted file mode 100644 index 4ab4e2927..000000000 --- a/test/runtimes/blacklist_nodejs12.4.0.csv +++ /dev/null @@ -1,47 +0,0 @@ -test name,bug id,comment -benchmark/test-benchmark-fs.js,, -benchmark/test-benchmark-module.js,, -benchmark/test-benchmark-napi.js,, -doctool/test-make-doc.js,b/68848110,Expected to fail. -fixtures/test-error-first-line-offset.js,, -fixtures/test-fs-readfile-error.js,, -fixtures/test-fs-stat-sync-overflow.js,, -internet/test-dgram-broadcast-multi-process.js,, -internet/test-dgram-multicast-multi-process.js,, -internet/test-dgram-multicast-set-interface-lo.js,, -parallel/test-cluster-dgram-reuse.js,b/64024294, -parallel/test-dgram-bind-fd.js,b/132447356, -parallel/test-dgram-create-socket-handle-fd.js,b/132447238, -parallel/test-dgram-createSocket-type.js,b/68847739, -parallel/test-dgram-socket-buffer-size.js,b/68847921, -parallel/test-fs-access.js,, -parallel/test-fs-write-stream-double-close.js,, -parallel/test-fs-write-stream-throw-type-error.js,b/110226209, -parallel/test-fs-write-stream.js,, -parallel/test-http2-respond-file-error-pipe-offset.js,, -parallel/test-os.js,, -parallel/test-process-uid-gid.js,, -pseudo-tty/test-assert-colors.js,, -pseudo-tty/test-assert-no-color.js,, -pseudo-tty/test-assert-position-indicator.js,, -pseudo-tty/test-async-wrap-getasyncid-tty.js,, -pseudo-tty/test-fatal-error.js,, -pseudo-tty/test-handle-wrap-isrefed-tty.js,, -pseudo-tty/test-readable-tty-keepalive.js,, -pseudo-tty/test-set-raw-mode-reset-process-exit.js,, -pseudo-tty/test-set-raw-mode-reset-signal.js,, -pseudo-tty/test-set-raw-mode-reset.js,, -pseudo-tty/test-stderr-stdout-handle-sigwinch.js,, -pseudo-tty/test-stdout-read.js,, -pseudo-tty/test-tty-color-support.js,, -pseudo-tty/test-tty-isatty.js,, -pseudo-tty/test-tty-stdin-call-end.js,, -pseudo-tty/test-tty-stdin-end.js,, -pseudo-tty/test-stdin-write.js,, -pseudo-tty/test-tty-stdout-end.js,, -pseudo-tty/test-tty-stdout-resize.js,, -pseudo-tty/test-tty-stream-constructors.js,, -pseudo-tty/test-tty-window-size.js,, -pseudo-tty/test-tty-wrap.js,, -pummel/test-net-pingpong.js,, -pummel/test-vm-memleak.js,, diff --git a/test/runtimes/blacklist_php7.3.6.csv b/test/runtimes/blacklist_php7.3.6.csv deleted file mode 100644 index 456bf7487..000000000 --- a/test/runtimes/blacklist_php7.3.6.csv +++ /dev/null @@ -1,29 +0,0 @@ -test name,bug id,comment -ext/intl/tests/bug77895.phpt,, -ext/intl/tests/dateformat_bug65683_2.phpt,, -ext/mbstring/tests/bug76319.phpt,, -ext/mbstring/tests/bug76958.phpt,, -ext/mbstring/tests/bug77025.phpt,, -ext/mbstring/tests/bug77165.phpt,, -ext/mbstring/tests/bug77454.phpt,, -ext/mbstring/tests/mb_convert_encoding_leak.phpt,, -ext/mbstring/tests/mb_strrpos_encoding_3rd_param.phpt,, -ext/standard/tests/file/filetype_variation.phpt,, -ext/standard/tests/file/fopen_variation19.phpt,, -ext/standard/tests/file/php_fd_wrapper_01.phpt,, -ext/standard/tests/file/php_fd_wrapper_02.phpt,, -ext/standard/tests/file/php_fd_wrapper_03.phpt,, -ext/standard/tests/file/php_fd_wrapper_04.phpt,, -ext/standard/tests/file/realpath_bug77484.phpt,, -ext/standard/tests/file/rename_variation.phpt,b/68717309, -ext/standard/tests/file/symlink_link_linkinfo_is_link_variation4.phpt,, -ext/standard/tests/file/symlink_link_linkinfo_is_link_variation8.phpt,, -ext/standard/tests/general_functions/escapeshellarg_bug71270.phpt,, -ext/standard/tests/general_functions/escapeshellcmd_bug71270.phpt,, -ext/standard/tests/network/bug20134.phpt,, -tests/output/stream_isatty_err.phpt,b/68720279, -tests/output/stream_isatty_in-err.phpt,b/68720282, -tests/output/stream_isatty_in-out-err.phpt,, -tests/output/stream_isatty_in-out.phpt,b/68720299, -tests/output/stream_isatty_out-err.phpt,b/68720311, -tests/output/stream_isatty_out.phpt,b/68720325, diff --git a/test/runtimes/blacklist_python3.7.3.csv b/test/runtimes/blacklist_python3.7.3.csv deleted file mode 100644 index 2b9947212..000000000 --- a/test/runtimes/blacklist_python3.7.3.csv +++ /dev/null @@ -1,27 +0,0 @@ -test name,bug id,comment -test_asynchat,b/76031995,SO_REUSEADDR -test_asyncio,,Fails on Docker. -test_asyncore,b/76031995,SO_REUSEADDR -test_epoll,, -test_fcntl,,fcntl invalid argument -- artificial test to make sure something works in 64 bit mode. -test_ftplib,,Fails in Docker -test_httplib,b/76031995,SO_REUSEADDR -test_imaplib,, -test_logging,, -test_multiprocessing_fork,,Flaky. Sometimes times out. -test_multiprocessing_forkserver,,Flaky. Sometimes times out. -test_multiprocessing_main_handling,,Flaky. Sometimes times out. -test_multiprocessing_spawn,,Flaky. Sometimes times out. -test_nntplib,b/76031995,tests should not set SO_REUSEADDR -test_poplib,,Fails on Docker -test_posix,b/76174079,posix.sched_get_priority_min not implemented + posix.sched_rr_get_interval not permitted -test_pty,b/76157709,out of pty devices -test_readline,b/76157709,out of pty devices -test_resource,b/76174079, -test_selectors,b/76116849,OSError not raised with epoll -test_smtplib,b/76031995,SO_REUSEADDR and unclosed sockets -test_socket,b/75983380, -test_ssl,b/76031995,SO_REUSEADDR -test_subprocess,, -test_support,b/76031995,SO_REUSEADDR -test_telnetlib,b/76031995,SO_REUSEADDR diff --git a/test/runtimes/defs.bzl b/test/runtimes/defs.bzl index f836dd952..dc3667f05 100644 --- a/test/runtimes/defs.bzl +++ b/test/runtimes/defs.bzl @@ -10,10 +10,10 @@ def _runtime_test_impl(ctx): "--image", ctx.attr.image, ] - if ctx.attr.blacklist_file: + if ctx.attr.exclude_file: args += [ - "--blacklist_file", - ctx.files.blacklist_file[0].short_path, + "--exclude_file", + ctx.files.exclude_file[0].short_path, ] # Build a runner. @@ -28,7 +28,7 @@ def _runtime_test_impl(ctx): return [DefaultInfo( executable = runner, runfiles = ctx.runfiles( - files = ctx.files._runner + ctx.files.blacklist_file + ctx.files._proctor, + files = ctx.files._runner + ctx.files.exclude_file + ctx.files._proctor, collect_default = True, collect_data = True, ), @@ -43,7 +43,7 @@ _runtime_test = rule( "lang": attr.string( mandatory = True, ), - "blacklist_file": attr.label( + "exclude_file": attr.label( mandatory = False, allow_single_file = True, ), @@ -68,12 +68,12 @@ def runtime_test(name, **kwargs): **kwargs ) -def blacklist_test(name, blacklist_file): - """Test that a blacklist parses correctly.""" +def exclude_test(name, exclude_file): + """Test that a exclude file parses correctly.""" go_test( - name = name + "_blacklist_test", + name = name + "_exclude_test", library = ":runner", - srcs = ["blacklist_test.go"], - args = ["--blacklist_file", "test/runtimes/" + blacklist_file], - data = [blacklist_file], + srcs = ["exclude_test.go"], + args = ["--exclude_file", "test/runtimes/" + exclude_file], + data = [exclude_file], ) diff --git a/test/runtimes/exclude_go1.12.csv b/test/runtimes/exclude_go1.12.csv new file mode 100644 index 000000000..8c8ae0c5d --- /dev/null +++ b/test/runtimes/exclude_go1.12.csv @@ -0,0 +1,16 @@ +test name,bug id,comment +cgo_errors,,FLAKY +cgo_test,,FLAKY +go_test:cmd/go,,FLAKY +go_test:cmd/vendor/golang.org/x/sys/unix,b/118783622,/dev devices missing +go_test:net,b/118784196,socket: invalid argument. Works as intended: see bug. +go_test:os,b/118780122,we have a pollable filesystem but that's a surprise +go_test:os/signal,b/118780860,/dev/pts not properly supported +go_test:runtime,b/118782341,sigtrap not reported or caught or something +go_test:syscall,b/118781998,bad bytes -- bad mem addr +race,b/118782931,thread sanitizer. Works as intended: b/62219744. +runtime:cpu124,b/118778254,segmentation fault +test:0_1,,FLAKY +testasan,, +testcarchive,b/118782924,no sigpipe +testshared,,FLAKY diff --git a/test/runtimes/exclude_java11.csv b/test/runtimes/exclude_java11.csv new file mode 100644 index 000000000..c012e5a56 --- /dev/null +++ b/test/runtimes/exclude_java11.csv @@ -0,0 +1,126 @@ +test name,bug id,comment +com/sun/crypto/provider/Cipher/PBE/PKCS12Cipher.java,,Fails in Docker +com/sun/jdi/NashornPopFrameTest.java,, +com/sun/jdi/ProcessAttachTest.java,, +com/sun/management/HotSpotDiagnosticMXBean/CheckOrigin.java,,Fails in Docker +com/sun/management/OperatingSystemMXBean/GetCommittedVirtualMemorySize.java,, +com/sun/management/UnixOperatingSystemMXBean/GetMaxFileDescriptorCount.sh,, +com/sun/tools/attach/AttachSelf.java,, +com/sun/tools/attach/BasicTests.java,, +com/sun/tools/attach/PermissionTest.java,, +com/sun/tools/attach/StartManagementAgent.java,, +com/sun/tools/attach/TempDirTest.java,, +com/sun/tools/attach/modules/Driver.java,, +java/lang/Character/CheckScript.java,,Fails in Docker +java/lang/Character/CheckUnicode.java,,Fails in Docker +java/lang/Class/GetPackageBootLoaderChildLayer.java,, +java/lang/ClassLoader/nativeLibrary/NativeLibraryTest.java,,Fails in Docker +java/lang/String/nativeEncoding/StringPlatformChars.java,, +java/net/DatagramSocket/ReuseAddressTest.java,, +java/net/DatagramSocket/SendDatagramToBadAddress.java,b/78473345, +java/net/Inet4Address/PingThis.java,, +java/net/InterfaceAddress/NetworkPrefixLength.java,b/78507103, +java/net/MulticastSocket/MulticastTTL.java,, +java/net/MulticastSocket/Promiscuous.java,, +java/net/MulticastSocket/SetLoopbackMode.java,, +java/net/MulticastSocket/SetTTLAndGetTTL.java,, +java/net/MulticastSocket/Test.java,, +java/net/MulticastSocket/TestDefaults.java,, +java/net/MulticastSocket/TimeToLive.java,, +java/net/NetworkInterface/NetworkInterfaceStreamTest.java,, +java/net/Socket/SetSoLinger.java,b/78527327,SO_LINGER is not yet supported +java/net/Socket/TrafficClass.java,b/78527818,Not supported on gVisor +java/net/Socket/UrgentDataTest.java,b/111515323, +java/net/Socket/setReuseAddress/Basic.java,b/78519214,SO_REUSEADDR enabled by default +java/net/SocketOption/OptionsTest.java,,Fails in Docker +java/net/SocketOption/TcpKeepAliveTest.java,, +java/net/SocketPermission/SocketPermissionTest.java,, +java/net/URLConnection/6212146/TestDriver.java,,Fails in Docker +java/net/httpclient/RequestBuilderTest.java,,Fails in Docker +java/net/httpclient/ShortResponseBody.java,, +java/net/httpclient/ShortResponseBodyWithRetry.java,, +java/nio/channels/AsyncCloseAndInterrupt.java,, +java/nio/channels/AsynchronousServerSocketChannel/Basic.java,, +java/nio/channels/AsynchronousSocketChannel/Basic.java,b/77921528,SO_KEEPALIVE is not settable +java/nio/channels/DatagramChannel/BasicMulticastTests.java,, +java/nio/channels/DatagramChannel/SocketOptionTests.java,,Fails in Docker +java/nio/channels/DatagramChannel/UseDGWithIPv6.java,, +java/nio/channels/FileChannel/directio/DirectIOTest.java,,Fails in Docker +java/nio/channels/Selector/OutOfBand.java,, +java/nio/channels/Selector/SelectWithConsumer.java,,Flaky +java/nio/channels/ServerSocketChannel/SocketOptionTests.java,, +java/nio/channels/SocketChannel/LingerOnClose.java,, +java/nio/channels/SocketChannel/SocketOptionTests.java,b/77965901, +java/nio/channels/spi/SelectorProvider/inheritedChannel/InheritedChannelTest.java,,Fails in Docker +java/rmi/activation/Activatable/extLoadedImpl/ext.sh,, +java/rmi/transport/checkLeaseInfoLeak/CheckLeaseLeak.java,, +java/text/Format/NumberFormat/CurrencyFormat.java,,Fails in Docker +java/text/Format/NumberFormat/CurrencyFormat.java,,Fails in Docker +java/util/Calendar/JapaneseEraNameTest.java,, +java/util/Currency/CurrencyTest.java,,Fails in Docker +java/util/Currency/ValidateISO4217.java,,Fails in Docker +java/util/Locale/LSRDataTest.java,, +java/util/concurrent/locks/Lock/TimedAcquireLeak.java,, +java/util/jar/JarFile/mrjar/MultiReleaseJarAPI.java,,Fails in Docker +java/util/logging/LogManager/Configuration/updateConfiguration/SimpleUpdateConfigWithInputStreamTest.java,, +java/util/logging/TestLoggerWeakRefLeak.java,, +javax/imageio/AppletResourceTest.java,, +javax/management/security/HashedPasswordFileTest.java,, +javax/net/ssl/SSLSession/JSSERenegotiate.java,,Fails in Docker +javax/sound/sampled/AudioInputStream/FrameLengthAfterConversion.java,, +jdk/jfr/event/runtime/TestNetworkUtilizationEvent.java,, +jdk/jfr/event/runtime/TestThreadParkEvent.java,, +jdk/jfr/event/sampling/TestNative.java,, +jdk/jfr/jcmd/TestJcmdChangeLogLevel.java,, +jdk/jfr/jcmd/TestJcmdConfigure.java,, +jdk/jfr/jcmd/TestJcmdDump.java,, +jdk/jfr/jcmd/TestJcmdDumpGeneratedFilename.java,, +jdk/jfr/jcmd/TestJcmdDumpLimited.java,, +jdk/jfr/jcmd/TestJcmdDumpPathToGCRoots.java,, +jdk/jfr/jcmd/TestJcmdLegacy.java,, +jdk/jfr/jcmd/TestJcmdSaveToFile.java,, +jdk/jfr/jcmd/TestJcmdStartDirNotExist.java,, +jdk/jfr/jcmd/TestJcmdStartInvaldFile.java,, +jdk/jfr/jcmd/TestJcmdStartPathToGCRoots.java,, +jdk/jfr/jcmd/TestJcmdStartStopDefault.java,, +jdk/jfr/jcmd/TestJcmdStartWithOptions.java,, +jdk/jfr/jcmd/TestJcmdStartWithSettings.java,, +jdk/jfr/jcmd/TestJcmdStopInvalidFile.java,, +jdk/jfr/jvm/TestJfrJavaBase.java,, +jdk/jfr/startupargs/TestStartRecording.java,, +jdk/modules/incubator/ImageModules.java,, +jdk/net/Sockets/ExtOptionTest.java,, +jdk/net/Sockets/QuickAckTest.java,, +lib/security/cacerts/VerifyCACerts.java,, +sun/management/jmxremote/bootstrap/CustomLauncherTest.java,, +sun/management/jmxremote/bootstrap/JvmstatCountersTest.java,, +sun/management/jmxremote/bootstrap/LocalManagementTest.java,, +sun/management/jmxremote/bootstrap/RmiRegistrySslTest.java,, +sun/management/jmxremote/bootstrap/RmiSslBootstrapTest.sh,, +sun/management/jmxremote/startstop/JMXStartStopTest.java,, +sun/management/jmxremote/startstop/JMXStatusPerfCountersTest.java,, +sun/management/jmxremote/startstop/JMXStatusTest.java,, +sun/text/resources/LocaleDataTest.java,, +sun/tools/jcmd/TestJcmdSanity.java,, +sun/tools/jhsdb/AlternateHashingTest.java,, +sun/tools/jhsdb/BasicLauncherTest.java,, +sun/tools/jhsdb/HeapDumpTest.java,, +sun/tools/jhsdb/heapconfig/JMapHeapConfigTest.java,, +sun/tools/jinfo/BasicJInfoTest.java,, +sun/tools/jinfo/JInfoTest.java,, +sun/tools/jmap/BasicJMapTest.java,, +sun/tools/jstack/BasicJStackTest.java,, +sun/tools/jstack/DeadlockDetectionTest.java,, +sun/tools/jstatd/TestJstatdExternalRegistry.java,, +sun/tools/jstatd/TestJstatdPort.java,,Flaky +sun/tools/jstatd/TestJstatdPortAndServer.java,,Flaky +sun/util/calendar/zi/TestZoneInfo310.java,, +tools/jar/modularJar/Basic.java,, +tools/jar/multiRelease/Basic.java,, +tools/jimage/JImageExtractTest.java,, +tools/jimage/JImageTest.java,, +tools/jlink/JLinkTest.java,, +tools/jlink/plugins/IncludeLocalesPluginTest.java,, +tools/jmod/hashes/HashesTest.java,, +tools/launcher/BigJar.java,b/111611473, +tools/launcher/modules/patch/systemmodules/PatchSystemModules.java,, diff --git a/test/runtimes/exclude_nodejs12.4.0.csv b/test/runtimes/exclude_nodejs12.4.0.csv new file mode 100644 index 000000000..4ab4e2927 --- /dev/null +++ b/test/runtimes/exclude_nodejs12.4.0.csv @@ -0,0 +1,47 @@ +test name,bug id,comment +benchmark/test-benchmark-fs.js,, +benchmark/test-benchmark-module.js,, +benchmark/test-benchmark-napi.js,, +doctool/test-make-doc.js,b/68848110,Expected to fail. +fixtures/test-error-first-line-offset.js,, +fixtures/test-fs-readfile-error.js,, +fixtures/test-fs-stat-sync-overflow.js,, +internet/test-dgram-broadcast-multi-process.js,, +internet/test-dgram-multicast-multi-process.js,, +internet/test-dgram-multicast-set-interface-lo.js,, +parallel/test-cluster-dgram-reuse.js,b/64024294, +parallel/test-dgram-bind-fd.js,b/132447356, +parallel/test-dgram-create-socket-handle-fd.js,b/132447238, +parallel/test-dgram-createSocket-type.js,b/68847739, +parallel/test-dgram-socket-buffer-size.js,b/68847921, +parallel/test-fs-access.js,, +parallel/test-fs-write-stream-double-close.js,, +parallel/test-fs-write-stream-throw-type-error.js,b/110226209, +parallel/test-fs-write-stream.js,, +parallel/test-http2-respond-file-error-pipe-offset.js,, +parallel/test-os.js,, +parallel/test-process-uid-gid.js,, +pseudo-tty/test-assert-colors.js,, +pseudo-tty/test-assert-no-color.js,, +pseudo-tty/test-assert-position-indicator.js,, +pseudo-tty/test-async-wrap-getasyncid-tty.js,, +pseudo-tty/test-fatal-error.js,, +pseudo-tty/test-handle-wrap-isrefed-tty.js,, +pseudo-tty/test-readable-tty-keepalive.js,, +pseudo-tty/test-set-raw-mode-reset-process-exit.js,, +pseudo-tty/test-set-raw-mode-reset-signal.js,, +pseudo-tty/test-set-raw-mode-reset.js,, +pseudo-tty/test-stderr-stdout-handle-sigwinch.js,, +pseudo-tty/test-stdout-read.js,, +pseudo-tty/test-tty-color-support.js,, +pseudo-tty/test-tty-isatty.js,, +pseudo-tty/test-tty-stdin-call-end.js,, +pseudo-tty/test-tty-stdin-end.js,, +pseudo-tty/test-stdin-write.js,, +pseudo-tty/test-tty-stdout-end.js,, +pseudo-tty/test-tty-stdout-resize.js,, +pseudo-tty/test-tty-stream-constructors.js,, +pseudo-tty/test-tty-window-size.js,, +pseudo-tty/test-tty-wrap.js,, +pummel/test-net-pingpong.js,, +pummel/test-vm-memleak.js,, diff --git a/test/runtimes/exclude_php7.3.6.csv b/test/runtimes/exclude_php7.3.6.csv new file mode 100644 index 000000000..456bf7487 --- /dev/null +++ b/test/runtimes/exclude_php7.3.6.csv @@ -0,0 +1,29 @@ +test name,bug id,comment +ext/intl/tests/bug77895.phpt,, +ext/intl/tests/dateformat_bug65683_2.phpt,, +ext/mbstring/tests/bug76319.phpt,, +ext/mbstring/tests/bug76958.phpt,, +ext/mbstring/tests/bug77025.phpt,, +ext/mbstring/tests/bug77165.phpt,, +ext/mbstring/tests/bug77454.phpt,, +ext/mbstring/tests/mb_convert_encoding_leak.phpt,, +ext/mbstring/tests/mb_strrpos_encoding_3rd_param.phpt,, +ext/standard/tests/file/filetype_variation.phpt,, +ext/standard/tests/file/fopen_variation19.phpt,, +ext/standard/tests/file/php_fd_wrapper_01.phpt,, +ext/standard/tests/file/php_fd_wrapper_02.phpt,, +ext/standard/tests/file/php_fd_wrapper_03.phpt,, +ext/standard/tests/file/php_fd_wrapper_04.phpt,, +ext/standard/tests/file/realpath_bug77484.phpt,, +ext/standard/tests/file/rename_variation.phpt,b/68717309, +ext/standard/tests/file/symlink_link_linkinfo_is_link_variation4.phpt,, +ext/standard/tests/file/symlink_link_linkinfo_is_link_variation8.phpt,, +ext/standard/tests/general_functions/escapeshellarg_bug71270.phpt,, +ext/standard/tests/general_functions/escapeshellcmd_bug71270.phpt,, +ext/standard/tests/network/bug20134.phpt,, +tests/output/stream_isatty_err.phpt,b/68720279, +tests/output/stream_isatty_in-err.phpt,b/68720282, +tests/output/stream_isatty_in-out-err.phpt,, +tests/output/stream_isatty_in-out.phpt,b/68720299, +tests/output/stream_isatty_out-err.phpt,b/68720311, +tests/output/stream_isatty_out.phpt,b/68720325, diff --git a/test/runtimes/exclude_python3.7.3.csv b/test/runtimes/exclude_python3.7.3.csv new file mode 100644 index 000000000..2b9947212 --- /dev/null +++ b/test/runtimes/exclude_python3.7.3.csv @@ -0,0 +1,27 @@ +test name,bug id,comment +test_asynchat,b/76031995,SO_REUSEADDR +test_asyncio,,Fails on Docker. +test_asyncore,b/76031995,SO_REUSEADDR +test_epoll,, +test_fcntl,,fcntl invalid argument -- artificial test to make sure something works in 64 bit mode. +test_ftplib,,Fails in Docker +test_httplib,b/76031995,SO_REUSEADDR +test_imaplib,, +test_logging,, +test_multiprocessing_fork,,Flaky. Sometimes times out. +test_multiprocessing_forkserver,,Flaky. Sometimes times out. +test_multiprocessing_main_handling,,Flaky. Sometimes times out. +test_multiprocessing_spawn,,Flaky. Sometimes times out. +test_nntplib,b/76031995,tests should not set SO_REUSEADDR +test_poplib,,Fails on Docker +test_posix,b/76174079,posix.sched_get_priority_min not implemented + posix.sched_rr_get_interval not permitted +test_pty,b/76157709,out of pty devices +test_readline,b/76157709,out of pty devices +test_resource,b/76174079, +test_selectors,b/76116849,OSError not raised with epoll +test_smtplib,b/76031995,SO_REUSEADDR and unclosed sockets +test_socket,b/75983380, +test_ssl,b/76031995,SO_REUSEADDR +test_subprocess,, +test_support,b/76031995,SO_REUSEADDR +test_telnetlib,b/76031995,SO_REUSEADDR diff --git a/test/runtimes/runner/BUILD b/test/runtimes/runner/BUILD index 63924b9c5..3972244b9 100644 --- a/test/runtimes/runner/BUILD +++ b/test/runtimes/runner/BUILD @@ -14,8 +14,8 @@ go_binary( ) go_test( - name = "blacklist_test", + name = "exclude_test", size = "small", - srcs = ["blacklist_test.go"], + srcs = ["exclude_test.go"], library = ":runner", ) diff --git a/test/runtimes/runner/blacklist_test.go b/test/runtimes/runner/blacklist_test.go deleted file mode 100644 index 0ff69ab18..000000000 --- a/test/runtimes/runner/blacklist_test.go +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "flag" - "os" - "testing" -) - -func TestMain(m *testing.M) { - flag.Parse() - os.Exit(m.Run()) -} - -// Test that the blacklist parses without error. -func TestBlacklists(t *testing.T) { - bl, err := getBlacklist() - if err != nil { - t.Fatalf("error parsing blacklist: %v", err) - } - if *blacklistFile != "" && len(bl) == 0 { - t.Errorf("got empty blacklist for file %q", *blacklistFile) - } -} diff --git a/test/runtimes/runner/exclude_test.go b/test/runtimes/runner/exclude_test.go new file mode 100644 index 000000000..c08755894 --- /dev/null +++ b/test/runtimes/runner/exclude_test.go @@ -0,0 +1,37 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "flag" + "os" + "testing" +) + +func TestMain(m *testing.M) { + flag.Parse() + os.Exit(m.Run()) +} + +// Test that the exclude file parses without error. +func TestBlacklists(t *testing.T) { + ex, err := getExcludes() + if err != nil { + t.Fatalf("error parsing exclude file: %v", err) + } + if *excludeFile != "" && len(ex) == 0 { + t.Errorf("got empty excludes for file %q", *excludeFile) + } +} diff --git a/test/runtimes/runner/main.go b/test/runtimes/runner/main.go index 57540e00e..7989dca84 100644 --- a/test/runtimes/runner/main.go +++ b/test/runtimes/runner/main.go @@ -31,9 +31,9 @@ import ( ) var ( - lang = flag.String("lang", "", "language runtime to test") - image = flag.String("image", "", "docker image with runtime tests") - blacklistFile = flag.String("blacklist_file", "", "file containing blacklist of tests to exclude, in CSV format with fields: test name, bug id, comment") + lang = flag.String("lang", "", "language runtime to test") + image = flag.String("image", "", "docker image with runtime tests") + excludeFile = flag.String("exclude_file", "", "file containing list of tests to exclude, in CSV format with fields: test name, bug id, comment") ) // Wait time for each test to run. @@ -52,10 +52,10 @@ func main() { // defered functions before exiting. It returns an exit code that should be // passed to os.Exit. func runTests() int { - // Get tests to blacklist. - blacklist, err := getBlacklist() + // Get tests to exclude.. + excludes, err := getExcludes() if err != nil { - fmt.Fprintf(os.Stderr, "Error getting blacklist: %s\n", err.Error()) + fmt.Fprintf(os.Stderr, "Error getting exclude list: %s\n", err.Error()) return 1 } @@ -66,7 +66,7 @@ func runTests() int { // Get a slice of tests to run. This will also start a single Docker // container that will be used to run each test. The final test will // stop the Docker container. - tests, err := getTests(d, blacklist) + tests, err := getTests(d, excludes) if err != nil { fmt.Fprintf(os.Stderr, "%s\n", err.Error()) return 1 @@ -77,7 +77,7 @@ func runTests() int { } // getTests executes all tests as table tests. -func getTests(d *dockerutil.Docker, blacklist map[string]struct{}) ([]testing.InternalTest, error) { +func getTests(d *dockerutil.Docker, excludes map[string]struct{}) ([]testing.InternalTest, error) { // Start the container. d.CopyFiles("/proctor", "test/runtimes/proctor/proctor") if err := d.Spawn(dockerutil.RunOpts{ @@ -108,9 +108,9 @@ func getTests(d *dockerutil.Docker, blacklist map[string]struct{}) ([]testing.In itests = append(itests, testing.InternalTest{ Name: tc, F: func(t *testing.T) { - // Is the test blacklisted? - if _, ok := blacklist[tc]; ok { - t.Skipf("SKIP: blacklisted test %q", tc) + // Is the test excluded? + if _, ok := excludes[tc]; ok { + t.Skipf("SKIP: excluded test %q", tc) } var ( @@ -143,14 +143,14 @@ func getTests(d *dockerutil.Docker, blacklist map[string]struct{}) ([]testing.In return itests, nil } -// getBlacklist reads the blacklist file and returns a set of test names to +// getBlacklist reads the exclude file and returns a set of test names to // exclude. -func getBlacklist() (map[string]struct{}, error) { - blacklist := make(map[string]struct{}) - if *blacklistFile == "" { - return blacklist, nil +func getExcludes() (map[string]struct{}, error) { + excludes := make(map[string]struct{}) + if *excludeFile == "" { + return excludes, nil } - f, err := os.Open(*blacklistFile) + f, err := os.Open(*excludeFile) if err != nil { return nil, err } @@ -171,9 +171,9 @@ func getBlacklist() (map[string]struct{}, error) { if err != nil { return nil, err } - blacklist[record[0]] = struct{}{} + excludes[record[0]] = struct{}{} } - return blacklist, nil + return excludes, nil } // testDeps implements testing.testDeps (an unexported interface), and is -- cgit v1.2.3 From 67f261a87d42118d2f587c9a8d20d94972e47498 Mon Sep 17 00:00:00 2001 From: Mithun Iyer Date: Mon, 15 Jun 2020 16:18:25 -0700 Subject: TCP to honor updated window size during handshake. In passive open cases, we transition to Established state after initializing endpoint's sender and receiver. With this we lose out on any updates coming from the ACK that completes the handshake. This change ensures that we uniformly transition to Established in all cases and does minor cleanups. Fixes #2938 PiperOrigin-RevId: 316567014 --- pkg/tcpip/transport/tcp/accept.go | 28 +++++---- pkg/tcpip/transport/tcp/connect.go | 32 +++++------ test/packetimpact/tests/BUILD | 10 ++++ .../tests/tcp_handshake_window_size_test.go | 66 ++++++++++++++++++++++ 4 files changed, 107 insertions(+), 29 deletions(-) create mode 100644 test/packetimpact/tests/tcp_handshake_window_size_test.go (limited to 'test') diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go index ad197e8db..7679fe169 100644 --- a/pkg/tcpip/transport/tcp/accept.go +++ b/pkg/tcpip/transport/tcp/accept.go @@ -222,12 +222,6 @@ func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, i n.initGSO() - // Create sender and receiver. - // - // The receiver at least temporarily has a zero receive window scale, - // but the caller may change it (before starting the protocol loop). - n.snd = newSender(n, iss, irs, s.window, rcvdSynOpts.MSS, rcvdSynOpts.WS) - n.rcv = newReceiver(n, irs, seqnum.Size(n.initialReceiveWindow()), 0, seqnum.Size(n.receiveBufferSize())) // Bootstrap the auto tuning algorithm. Starting at zero will result in // a large step function on the first window adjustment causing the // window to grow to a really large value. @@ -295,7 +289,7 @@ func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *head } // Perform the 3-way handshake. - h := newPassiveHandshake(ep, ep.rcv.rcvWnd, isn, irs, opts, deferAccept) + h := newPassiveHandshake(ep, seqnum.Size(ep.initialReceiveWindow()), isn, irs, opts, deferAccept) if err := h.execute(); err != nil { ep.mu.Unlock() ep.Close() @@ -536,6 +530,9 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) { return } + iss := s.ackNumber - 1 + irs := s.sequenceNumber - 1 + // Since SYN cookies are in use this is potentially an ACK to a // SYN-ACK we sent but don't have a half open connection state // as cookies are being used to protect against a potential SYN @@ -546,7 +543,7 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) { // when under a potential syn flood attack. // // Validate the cookie. - data, ok := ctx.isCookieValid(s.id, s.ackNumber-1, s.sequenceNumber-1) + data, ok := ctx.isCookieValid(s.id, iss, irs) if !ok || int(data) >= len(mssTable) { e.stack.Stats().TCP.ListenOverflowInvalidSynCookieRcvd.Increment() e.stack.Stats().DroppedPackets.Increment() @@ -571,7 +568,7 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) { rcvdSynOptions.TSEcr = s.parsedOptions.TSEcr } - n, err := ctx.createConnectingEndpoint(s, s.ackNumber-1, s.sequenceNumber-1, rcvdSynOptions, &waiter.Queue{}) + n, err := ctx.createConnectingEndpoint(s, iss, irs, rcvdSynOptions, &waiter.Queue{}) if err != nil { e.stack.Stats().TCP.FailedConnectionAttempts.Increment() e.stats.FailedConnectionAttempts.Increment() @@ -589,10 +586,17 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) { n.tsOffset = 0 // Switch state to connected. - // We do not use transitionToStateEstablishedLocked here as there is - // no handshake state available when doing a SYN cookie based accept. n.isConnectNotified = true - n.setEndpointState(StateEstablished) + n.transitionToStateEstablishedLocked(&handshake{ + ep: n, + iss: iss, + ackNum: irs + 1, + rcvWnd: seqnum.Size(n.initialReceiveWindow()), + sndWnd: s.window, + rcvWndScale: e.rcvWndScaleForHandshake(), + sndWndScale: rcvdSynOptions.WS, + mss: rcvdSynOptions.MSS, + }) // Do the delivery in a separate goroutine so // that we don't block the listen loop in case diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index 7da93dcc4..91ee3b0be 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -995,24 +995,22 @@ func (e *endpoint) completeWorkerLocked() { // transitionToStateEstablisedLocked transitions a given endpoint // to an established state using the handshake parameters provided. -// It also initializes sender/receiver if required. +// It also initializes sender/receiver. func (e *endpoint) transitionToStateEstablishedLocked(h *handshake) { - if e.snd == nil { - // Transfer handshake state to TCP connection. We disable - // receive window scaling if the peer doesn't support it - // (indicated by a negative send window scale). - e.snd = newSender(e, h.iss, h.ackNum-1, h.sndWnd, h.mss, h.sndWndScale) - } - if e.rcv == nil { - rcvBufSize := seqnum.Size(e.receiveBufferSize()) - e.rcvListMu.Lock() - e.rcv = newReceiver(e, h.ackNum-1, h.rcvWnd, h.effectiveRcvWndScale(), rcvBufSize) - // Bootstrap the auto tuning algorithm. Starting at zero will - // result in a really large receive window after the first auto - // tuning adjustment. - e.rcvAutoParams.prevCopied = int(h.rcvWnd) - e.rcvListMu.Unlock() - } + // Transfer handshake state to TCP connection. We disable + // receive window scaling if the peer doesn't support it + // (indicated by a negative send window scale). + e.snd = newSender(e, h.iss, h.ackNum-1, h.sndWnd, h.mss, h.sndWndScale) + + rcvBufSize := seqnum.Size(e.receiveBufferSize()) + e.rcvListMu.Lock() + e.rcv = newReceiver(e, h.ackNum-1, h.rcvWnd, h.effectiveRcvWndScale(), rcvBufSize) + // Bootstrap the auto tuning algorithm. Starting at zero will + // result in a really large receive window after the first auto + // tuning adjustment. + e.rcvAutoParams.prevCopied = int(h.rcvWnd) + e.rcvListMu.Unlock() + e.setEndpointState(StateEstablished) } diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 95dd6c440..09e91b832 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -232,6 +232,16 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "tcp_handshake_window_size", + srcs = ["tcp_handshake_window_size_test.go"], + deps = [ + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + packetimpact_go_test( name = "icmpv6_param_problem", srcs = ["icmpv6_param_problem_test.go"], diff --git a/test/packetimpact/tests/tcp_handshake_window_size_test.go b/test/packetimpact/tests/tcp_handshake_window_size_test.go new file mode 100644 index 000000000..652b530d0 --- /dev/null +++ b/test/packetimpact/tests/tcp_handshake_window_size_test.go @@ -0,0 +1,66 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_handshake_window_size_test + +import ( + "flag" + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func init() { + testbench.RegisterFlags(flag.CommandLine) +} + +// TestTCPHandshakeWindowSize tests if the stack is honoring the window size +// communicated during handshake. +func TestTCPHandshakeWindowSize(t *testing.T) { + dut := testbench.NewDUT(t) + defer dut.TearDown() + listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(listenFD) + conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) + defer conn.Close() + + // Start handshake with zero window size. + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn), WindowSize: testbench.Uint16(uint16(0))}) + if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil { + t.Fatalf("expected SYN-ACK: %s", err) + } + // Update the advertised window size to a non-zero value with the ACK that + // completes the handshake. + // + // Set the window size with MSB set and expect the dut to treat it as + // an unsigned value. + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(uint16(1 << 15))}) + + acceptFd, _ := dut.Accept(listenFD) + defer dut.Close(acceptFd) + + sampleData := []byte("Sample Data") + samplePayload := &testbench.Payload{Bytes: sampleData} + + // Since we advertised a zero window followed by a non-zero window, + // expect the dut to honor the recently advertised non-zero window + // and actually send out the data instead of probing for zero window. + dut.Send(acceptFd, sampleData, 0) + if _, err := conn.ExpectNextData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload, time.Second); err != nil { + t.Fatalf("expected payload was not received: %s", err) + } +} -- cgit v1.2.3 From 6d64028c941e5ba96132b255a70b18311e0a5475 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Wed, 17 Jun 2020 09:11:44 -0700 Subject: Fix typos in test/README.md. Closes #2996. PiperOrigin-RevId: 316900535 --- test/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/README.md b/test/README.md index 97fe7ea04..02bbf42ff 100644 --- a/test/README.md +++ b/test/README.md @@ -24,11 +24,11 @@ also used to run these tests in `kokoro`. To run image and integration tests, run: -`./scripts/docker_test.sh` +`./scripts/docker_tests.sh` To run root tests, run: -`./scripts/root_test.sh` +`./scripts/root_tests.sh` There are a few other interesting variations for image and integration tests: -- cgit v1.2.3 From 96519e2c9d3fa1f15537c4dfc081a19d8d1ce1a2 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Wed, 17 Jun 2020 10:02:41 -0700 Subject: Implement POSIX locks - Change FileDescriptionImpl Lock/UnlockPOSIX signature to take {start,length,whence}, so the correct offset can be calculated in the implementations. - Create PosixLocker interface to make it possible to share the same locking code from different implementations. Closes #1480 PiperOrigin-RevId: 316910286 --- pkg/sentry/fsimpl/devpts/BUILD | 2 +- pkg/sentry/fsimpl/devpts/devpts.go | 3 +- pkg/sentry/fsimpl/devpts/master.go | 14 +++- pkg/sentry/fsimpl/devpts/slave.go | 14 +++- pkg/sentry/fsimpl/ext/BUILD | 2 +- pkg/sentry/fsimpl/ext/directory.go | 11 +++ pkg/sentry/fsimpl/ext/inode.go | 3 +- pkg/sentry/fsimpl/ext/regular_file.go | 11 +++ pkg/sentry/fsimpl/ext/symlink.go | 1 + pkg/sentry/fsimpl/gofer/BUILD | 1 - pkg/sentry/fsimpl/gofer/gofer.go | 12 ++- pkg/sentry/fsimpl/gofer/special_file.go | 3 +- pkg/sentry/fsimpl/host/BUILD | 2 +- pkg/sentry/fsimpl/host/host.go | 14 +++- pkg/sentry/fsimpl/host/tty.go | 11 +++ pkg/sentry/fsimpl/kernfs/BUILD | 3 +- pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go | 16 +++- pkg/sentry/fsimpl/kernfs/fd_impl_util.go | 16 +++- pkg/sentry/fsimpl/kernfs/inode_impl_util.go | 3 +- pkg/sentry/fsimpl/kernfs/kernfs_test.go | 5 +- pkg/sentry/fsimpl/overlay/BUILD | 2 +- pkg/sentry/fsimpl/overlay/overlay.go | 14 +++- pkg/sentry/fsimpl/pipefs/BUILD | 1 - pkg/sentry/fsimpl/pipefs/pipefs.go | 3 +- pkg/sentry/fsimpl/proc/BUILD | 2 +- pkg/sentry/fsimpl/proc/subtasks.go | 3 +- pkg/sentry/fsimpl/proc/task.go | 3 +- pkg/sentry/fsimpl/proc/task_fds.go | 3 +- pkg/sentry/fsimpl/proc/task_files.go | 14 +++- pkg/sentry/fsimpl/proc/tasks.go | 3 +- pkg/sentry/fsimpl/sys/BUILD | 1 - pkg/sentry/fsimpl/sys/sys.go | 3 +- pkg/sentry/fsimpl/tmpfs/BUILD | 1 - pkg/sentry/fsimpl/tmpfs/regular_file_test.go | 33 +++----- pkg/sentry/fsimpl/tmpfs/tmpfs.go | 15 +++- pkg/sentry/kernel/fd_table.go | 10 ++- pkg/sentry/kernel/pipe/BUILD | 2 +- pkg/sentry/kernel/pipe/vfs.go | 18 +++- pkg/sentry/socket/hostinet/BUILD | 2 +- pkg/sentry/socket/hostinet/socket_vfs2.go | 14 +++- pkg/sentry/socket/netlink/BUILD | 2 +- pkg/sentry/socket/netlink/socket_vfs2.go | 14 +++- pkg/sentry/socket/netstack/BUILD | 2 +- pkg/sentry/socket/netstack/netstack_vfs2.go | 14 +++- pkg/sentry/socket/unix/BUILD | 2 +- pkg/sentry/socket/unix/unix_vfs2.go | 16 +++- pkg/sentry/syscalls/linux/vfs2/fd.go | 38 +++++++++ pkg/sentry/vfs/BUILD | 2 +- pkg/sentry/vfs/file_description.go | 18 ++-- pkg/sentry/vfs/file_description_impl_util.go | 25 ++---- pkg/sentry/vfs/lock.go | 109 +++++++++++++++++++++++++ pkg/sentry/vfs/lock/BUILD | 13 --- pkg/sentry/vfs/lock/lock.go | 72 ---------------- test/syscalls/linux/BUILD | 1 + test/syscalls/linux/fcntl.cc | 96 +++++++++++++++------- 55 files changed, 484 insertions(+), 234 deletions(-) create mode 100644 pkg/sentry/vfs/lock.go delete mode 100644 pkg/sentry/vfs/lock/BUILD delete mode 100644 pkg/sentry/vfs/lock/lock.go (limited to 'test') diff --git a/pkg/sentry/fsimpl/devpts/BUILD b/pkg/sentry/fsimpl/devpts/BUILD index cf440dce8..93512c9b6 100644 --- a/pkg/sentry/fsimpl/devpts/BUILD +++ b/pkg/sentry/fsimpl/devpts/BUILD @@ -18,12 +18,12 @@ go_library( "//pkg/context", "//pkg/safemem", "//pkg/sentry/arch", + "//pkg/sentry/fs/lock", "//pkg/sentry/fsimpl/kernfs", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/unimpl", "//pkg/sentry/vfs", - "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserror", "//pkg/usermem", diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go index 9b0e0cca2..e6fda2b4f 100644 --- a/pkg/sentry/fsimpl/devpts/devpts.go +++ b/pkg/sentry/fsimpl/devpts/devpts.go @@ -28,7 +28,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" ) @@ -117,7 +116,7 @@ type rootInode struct { kernfs.InodeNotSymlink kernfs.OrderedChildren - locks lock.FileLocks + locks vfs.FileLocks // Keep a reference to this inode's dentry. dentry kernfs.Dentry diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go index 1d22adbe3..69879498a 100644 --- a/pkg/sentry/fsimpl/devpts/master.go +++ b/pkg/sentry/fsimpl/devpts/master.go @@ -18,11 +18,11 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/unimpl" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" @@ -35,7 +35,7 @@ type masterInode struct { kernfs.InodeNotDirectory kernfs.InodeNotSymlink - locks lock.FileLocks + locks vfs.FileLocks // Keep a reference to this inode's dentry. dentry kernfs.Dentry @@ -189,6 +189,16 @@ func (mfd *masterFileDescription) Stat(ctx context.Context, opts vfs.StatOptions return mfd.inode.Stat(fs, opts) } +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (mfd *masterFileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { + return mfd.Locks().LockPOSIX(ctx, &mfd.vfsfd, uid, t, start, length, whence, block) +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (mfd *masterFileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error { + return mfd.Locks().UnlockPOSIX(ctx, &mfd.vfsfd, uid, start, length, whence) +} + // maybeEmitUnimplementedEvent emits unimplemented event if cmd is valid. func maybeEmitUnimplementedEvent(ctx context.Context, cmd uint32) { switch cmd { diff --git a/pkg/sentry/fsimpl/devpts/slave.go b/pkg/sentry/fsimpl/devpts/slave.go index 7fe475080..cf1a0f0ac 100644 --- a/pkg/sentry/fsimpl/devpts/slave.go +++ b/pkg/sentry/fsimpl/devpts/slave.go @@ -18,10 +18,10 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" @@ -34,7 +34,7 @@ type slaveInode struct { kernfs.InodeNotDirectory kernfs.InodeNotSymlink - locks lock.FileLocks + locks vfs.FileLocks // Keep a reference to this inode's dentry. dentry kernfs.Dentry @@ -185,3 +185,13 @@ func (sfd *slaveFileDescription) Stat(ctx context.Context, opts vfs.StatOptions) fs := sfd.vfsfd.VirtualDentry().Mount().Filesystem() return sfd.inode.Stat(fs, opts) } + +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (sfd *slaveFileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { + return sfd.Locks().LockPOSIX(ctx, &sfd.vfsfd, uid, t, start, length, whence, block) +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (sfd *slaveFileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error { + return sfd.Locks().UnlockPOSIX(ctx, &sfd.vfsfd, uid, start, length, whence) +} diff --git a/pkg/sentry/fsimpl/ext/BUILD b/pkg/sentry/fsimpl/ext/BUILD index 973fa0def..ef24f8159 100644 --- a/pkg/sentry/fsimpl/ext/BUILD +++ b/pkg/sentry/fsimpl/ext/BUILD @@ -54,13 +54,13 @@ go_library( "//pkg/safemem", "//pkg/sentry/arch", "//pkg/sentry/fs", + "//pkg/sentry/fs/lock", "//pkg/sentry/fsimpl/ext/disklayout", "//pkg/sentry/kernel/auth", "//pkg/sentry/memmap", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/syscalls/linux", "//pkg/sentry/vfs", - "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserror", "//pkg/usermem", diff --git a/pkg/sentry/fsimpl/ext/directory.go b/pkg/sentry/fsimpl/ext/directory.go index 43be6928a..357512c7e 100644 --- a/pkg/sentry/fsimpl/ext/directory.go +++ b/pkg/sentry/fsimpl/ext/directory.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" @@ -305,3 +306,13 @@ func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (in fd.off = offset return offset, nil } + +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (fd *directoryFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { + return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block) +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (fd *directoryFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error { + return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence) +} diff --git a/pkg/sentry/fsimpl/ext/inode.go b/pkg/sentry/fsimpl/ext/inode.go index 5caaf14ed..30636cf66 100644 --- a/pkg/sentry/fsimpl/ext/inode.go +++ b/pkg/sentry/fsimpl/ext/inode.go @@ -22,7 +22,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" ) @@ -55,7 +54,7 @@ type inode struct { // diskInode gives us access to the inode struct on disk. Immutable. diskInode disklayout.Inode - locks lock.FileLocks + locks vfs.FileLocks // This is immutable. The first field of the implementations must have inode // as the first field to ensure temporality. diff --git a/pkg/sentry/fsimpl/ext/regular_file.go b/pkg/sentry/fsimpl/ext/regular_file.go index 152036b2e..66d14bb95 100644 --- a/pkg/sentry/fsimpl/ext/regular_file.go +++ b/pkg/sentry/fsimpl/ext/regular_file.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/safemem" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" @@ -149,3 +150,13 @@ func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpt // TODO(b/134676337): Implement mmap(2). return syserror.ENODEV } + +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (fd *regularFileFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { + return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block) +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (fd *regularFileFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error { + return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence) +} diff --git a/pkg/sentry/fsimpl/ext/symlink.go b/pkg/sentry/fsimpl/ext/symlink.go index acb28d85b..62efd4095 100644 --- a/pkg/sentry/fsimpl/ext/symlink.go +++ b/pkg/sentry/fsimpl/ext/symlink.go @@ -66,6 +66,7 @@ func (in *inode) isSymlink() bool { // O_PATH. For this reason most of the functions return EBADF. type symlinkFD struct { fileDescription + vfs.NoLockFD } // Compiles only if symlinkFD implements vfs.FileDescriptionImpl. diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD index 5cdeeaeb5..4a800dcf9 100644 --- a/pkg/sentry/fsimpl/gofer/BUILD +++ b/pkg/sentry/fsimpl/gofer/BUILD @@ -69,7 +69,6 @@ go_library( "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usage", "//pkg/sentry/vfs", - "//pkg/sentry/vfs/lock", "//pkg/syserr", "//pkg/syserror", "//pkg/unet", diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index ac051b3a7..d8ae475ed 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -53,7 +53,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/unet" "gvisor.dev/gvisor/pkg/usermem" @@ -665,7 +664,7 @@ type dentry struct { // endpoint bound to this file. pipe *pipe.VFSPipe - locks lock.FileLocks + locks vfs.FileLocks } // dentryAttrMask returns a p9.AttrMask enabling all attributes used by the @@ -1439,9 +1438,14 @@ func (fd *fileDescription) LockBSD(ctx context.Context, uid fslock.UniqueID, t f } // LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. -func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, rng fslock.LockRange, block fslock.Blocker) error { +func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { fd.lockLogging.Do(func() { log.Infof("Range lock using gofer file handled internally.") }) - return fd.LockFD.LockPOSIX(ctx, uid, t, rng, block) + return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block) +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (fd *fileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error { + return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence) } diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go index 289efdd25..e6e29b329 100644 --- a/pkg/sentry/fsimpl/gofer/special_file.go +++ b/pkg/sentry/fsimpl/gofer/special_file.go @@ -22,7 +22,6 @@ import ( "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" @@ -52,7 +51,7 @@ type specialFileFD struct { off int64 } -func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, locks *lock.FileLocks, flags uint32) (*specialFileFD, error) { +func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, locks *vfs.FileLocks, flags uint32) (*specialFileFD, error) { ftype := d.fileType() seekable := ftype == linux.S_IFREG mayBlock := ftype == linux.S_IFIFO || ftype == linux.S_IFSOCK diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD index 54f16ad63..44a09d87a 100644 --- a/pkg/sentry/fsimpl/host/BUILD +++ b/pkg/sentry/fsimpl/host/BUILD @@ -27,6 +27,7 @@ go_library( "//pkg/safemem", "//pkg/sentry/arch", "//pkg/sentry/fs/fsutil", + "//pkg/sentry/fs/lock", "//pkg/sentry/fsimpl/kernfs", "//pkg/sentry/hostfd", "//pkg/sentry/kernel", @@ -39,7 +40,6 @@ go_library( "//pkg/sentry/unimpl", "//pkg/sentry/uniqueid", "//pkg/sentry/vfs", - "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserr", "//pkg/syserror", diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index 5ec5100b8..7906242c9 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -28,13 +28,13 @@ import ( "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/refs" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/hostfd" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/memmap" unixsocket "gvisor.dev/gvisor/pkg/sentry/socket/unix" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -183,7 +183,7 @@ type inode struct { kernfs.InodeNotDirectory kernfs.InodeNotSymlink - locks lock.FileLocks + locks vfs.FileLocks // When the reference count reaches zero, the host fd is closed. refs.AtomicRefCount @@ -718,3 +718,13 @@ func (f *fileDescription) EventUnregister(e *waiter.Entry) { func (f *fileDescription) Readiness(mask waiter.EventMask) waiter.EventMask { return fdnotifier.NonBlockingPoll(int32(f.inode.hostFD), mask) } + +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (f *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { + return f.Locks().LockPOSIX(ctx, &f.vfsfd, uid, t, start, length, whence, block) +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (f *fileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error { + return f.Locks().UnlockPOSIX(ctx, &f.vfsfd, uid, start, length, whence) +} diff --git a/pkg/sentry/fsimpl/host/tty.go b/pkg/sentry/fsimpl/host/tty.go index 68af6e5af..0fbc543b1 100644 --- a/pkg/sentry/fsimpl/host/tty.go +++ b/pkg/sentry/fsimpl/host/tty.go @@ -18,6 +18,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/unimpl" "gvisor.dev/gvisor/pkg/sentry/vfs" @@ -377,3 +378,13 @@ func (t *TTYFileDescription) checkChange(ctx context.Context, sig linux.Signal) _ = pg.SendSignal(kernel.SignalInfoPriv(sig)) return kernel.ERESTARTSYS } + +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (t *TTYFileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, typ fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { + return t.Locks().LockPOSIX(ctx, &t.vfsfd, uid, typ, start, length, whence, block) +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (t *TTYFileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error { + return t.Locks().UnlockPOSIX(ctx, &t.vfsfd, uid, start, length, whence) +} diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD index 0299dbde9..179df6c1e 100644 --- a/pkg/sentry/fsimpl/kernfs/BUILD +++ b/pkg/sentry/fsimpl/kernfs/BUILD @@ -45,11 +45,11 @@ go_library( "//pkg/fspath", "//pkg/log", "//pkg/refs", + "//pkg/sentry/fs/lock", "//pkg/sentry/kernel/auth", "//pkg/sentry/memmap", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/vfs", - "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserror", "//pkg/usermem", @@ -68,7 +68,6 @@ go_test( "//pkg/sentry/fsimpl/testutil", "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", - "//pkg/sentry/vfs/lock", "//pkg/syserror", "//pkg/usermem", "@com_github_google_go-cmp//cmp:go_default_library", diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go index 6418de0a3..c1215b70a 100644 --- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go +++ b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go @@ -19,9 +19,9 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -39,7 +39,7 @@ type DynamicBytesFile struct { InodeNotDirectory InodeNotSymlink - locks lock.FileLocks + locks vfs.FileLocks data vfs.DynamicBytesSource } @@ -86,7 +86,7 @@ type DynamicBytesFD struct { } // Init initializes a DynamicBytesFD. -func (fd *DynamicBytesFD) Init(m *vfs.Mount, d *vfs.Dentry, data vfs.DynamicBytesSource, locks *lock.FileLocks, flags uint32) error { +func (fd *DynamicBytesFD) Init(m *vfs.Mount, d *vfs.Dentry, data vfs.DynamicBytesSource, locks *vfs.FileLocks, flags uint32) error { fd.LockFD.Init(locks) if err := fd.vfsfd.Init(fd, flags, m, d, &vfs.FileDescriptionOptions{}); err != nil { return err @@ -135,3 +135,13 @@ func (fd *DynamicBytesFD) SetStat(context.Context, vfs.SetStatOptions) error { // DynamicBytesFiles are immutable. return syserror.EPERM } + +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (fd *DynamicBytesFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { + return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block) +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (fd *DynamicBytesFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error { + return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence) +} diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go index 33a5968ca..5f7853a2a 100644 --- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go @@ -19,10 +19,10 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -57,7 +57,7 @@ type GenericDirectoryFD struct { // NewGenericDirectoryFD creates a new GenericDirectoryFD and returns its // dentry. -func NewGenericDirectoryFD(m *vfs.Mount, d *vfs.Dentry, children *OrderedChildren, locks *lock.FileLocks, opts *vfs.OpenOptions) (*GenericDirectoryFD, error) { +func NewGenericDirectoryFD(m *vfs.Mount, d *vfs.Dentry, children *OrderedChildren, locks *vfs.FileLocks, opts *vfs.OpenOptions) (*GenericDirectoryFD, error) { fd := &GenericDirectoryFD{} if err := fd.Init(children, locks, opts); err != nil { return nil, err @@ -71,7 +71,7 @@ func NewGenericDirectoryFD(m *vfs.Mount, d *vfs.Dentry, children *OrderedChildre // Init initializes a GenericDirectoryFD. Use it when overriding // GenericDirectoryFD. Caller must call fd.VFSFileDescription.Init() with the // correct implementation. -func (fd *GenericDirectoryFD) Init(children *OrderedChildren, locks *lock.FileLocks, opts *vfs.OpenOptions) error { +func (fd *GenericDirectoryFD) Init(children *OrderedChildren, locks *vfs.FileLocks, opts *vfs.OpenOptions) error { if vfs.AccessTypesForOpenFlags(opts)&vfs.MayWrite != 0 { // Can't open directories for writing. return syserror.EISDIR @@ -235,3 +235,13 @@ func (fd *GenericDirectoryFD) SetStat(ctx context.Context, opts vfs.SetStatOptio inode := fd.vfsfd.VirtualDentry().Dentry().Impl().(*Dentry).inode return inode.SetStat(ctx, fd.filesystem(), creds, opts) } + +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (fd *GenericDirectoryFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { + return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block) +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (fd *GenericDirectoryFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error { + return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence) +} diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go index 0e4927215..650bd7b88 100644 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go @@ -23,7 +23,6 @@ import ( "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) @@ -557,7 +556,7 @@ type StaticDirectory struct { InodeNoDynamicLookup OrderedChildren - locks lock.FileLocks + locks vfs.FileLocks } var _ Inode = (*StaticDirectory)(nil) diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go index 6749facf7..dc407eb1d 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go @@ -27,7 +27,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -103,7 +102,7 @@ type readonlyDir struct { kernfs.InodeDirectoryNoNewChildren kernfs.OrderedChildren - locks lock.FileLocks + locks vfs.FileLocks dentry kernfs.Dentry } @@ -133,7 +132,7 @@ type dir struct { kernfs.InodeNoDynamicLookup kernfs.OrderedChildren - locks lock.FileLocks + locks vfs.FileLocks fs *filesystem dentry kernfs.Dentry diff --git a/pkg/sentry/fsimpl/overlay/BUILD b/pkg/sentry/fsimpl/overlay/BUILD index f9413bbdd..8cf5b35d3 100644 --- a/pkg/sentry/fsimpl/overlay/BUILD +++ b/pkg/sentry/fsimpl/overlay/BUILD @@ -29,11 +29,11 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/fspath", + "//pkg/sentry/fs/lock", "//pkg/sentry/kernel/auth", "//pkg/sentry/memmap", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/vfs", - "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserror", "//pkg/usermem", diff --git a/pkg/sentry/fsimpl/overlay/overlay.go b/pkg/sentry/fsimpl/overlay/overlay.go index e660d0e2c..e11a3ff19 100644 --- a/pkg/sentry/fsimpl/overlay/overlay.go +++ b/pkg/sentry/fsimpl/overlay/overlay.go @@ -35,9 +35,9 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) @@ -415,7 +415,7 @@ type dentry struct { devMinor uint32 ino uint64 - locks lock.FileLocks + locks vfs.FileLocks } // newDentry creates a new dentry. The dentry initially has no references; it @@ -610,3 +610,13 @@ func (fd *fileDescription) filesystem() *filesystem { func (fd *fileDescription) dentry() *dentry { return fd.vfsfd.Dentry().Impl().(*dentry) } + +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { + return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block) +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (fd *fileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error { + return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence) +} diff --git a/pkg/sentry/fsimpl/pipefs/BUILD b/pkg/sentry/fsimpl/pipefs/BUILD index c618dbe6c..5950a2d59 100644 --- a/pkg/sentry/fsimpl/pipefs/BUILD +++ b/pkg/sentry/fsimpl/pipefs/BUILD @@ -15,7 +15,6 @@ go_library( "//pkg/sentry/kernel/pipe", "//pkg/sentry/kernel/time", "//pkg/sentry/vfs", - "//pkg/sentry/vfs/lock", "//pkg/syserror", "//pkg/usermem", ], diff --git a/pkg/sentry/fsimpl/pipefs/pipefs.go b/pkg/sentry/fsimpl/pipefs/pipefs.go index e4dabaa33..dd7eaf4a8 100644 --- a/pkg/sentry/fsimpl/pipefs/pipefs.go +++ b/pkg/sentry/fsimpl/pipefs/pipefs.go @@ -27,7 +27,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -82,7 +81,7 @@ type inode struct { kernfs.InodeNotSymlink kernfs.InodeNoopRefCount - locks lock.FileLocks + locks vfs.FileLocks pipe *pipe.VFSPipe ino uint64 diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD index 351ba4ee9..6014138ff 100644 --- a/pkg/sentry/fsimpl/proc/BUILD +++ b/pkg/sentry/fsimpl/proc/BUILD @@ -22,6 +22,7 @@ go_library( "//pkg/log", "//pkg/refs", "//pkg/safemem", + "//pkg/sentry/fs/lock", "//pkg/sentry/fsbridge", "//pkg/sentry/fsimpl/kernfs", "//pkg/sentry/inet", @@ -35,7 +36,6 @@ go_library( "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usage", "//pkg/sentry/vfs", - "//pkg/sentry/vfs/lock", "//pkg/syserror", "//pkg/tcpip/header", "//pkg/usermem", diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go index e2cdb7ee9..36a89540c 100644 --- a/pkg/sentry/fsimpl/proc/subtasks.go +++ b/pkg/sentry/fsimpl/proc/subtasks.go @@ -24,7 +24,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" ) @@ -38,7 +37,7 @@ type subtasksInode struct { kernfs.OrderedChildren kernfs.AlwaysValid - locks lock.FileLocks + locks vfs.FileLocks fs *filesystem task *kernel.Task diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index 44078a765..8bb2b0ce1 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -25,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/mm" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" ) @@ -39,7 +38,7 @@ type taskInode struct { kernfs.InodeAttrs kernfs.OrderedChildren - locks lock.FileLocks + locks vfs.FileLocks task *kernel.Task } diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go index ef6c1d04f..7debdb07a 100644 --- a/pkg/sentry/fsimpl/proc/task_fds.go +++ b/pkg/sentry/fsimpl/proc/task_fds.go @@ -27,7 +27,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" ) @@ -54,7 +53,7 @@ func taskFDExists(t *kernel.Task, fd int32) bool { } type fdDir struct { - locks lock.FileLocks + locks vfs.FileLocks fs *filesystem task *kernel.Task diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go index e5eaa91cd..ba4405026 100644 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ b/pkg/sentry/fsimpl/proc/task_files.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/safemem" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/fsbridge" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -30,7 +31,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/mm" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -777,7 +777,7 @@ type namespaceInode struct { kernfs.InodeNotDirectory kernfs.InodeNotSymlink - locks lock.FileLocks + locks vfs.FileLocks } var _ kernfs.Inode = (*namespaceInode)(nil) @@ -830,3 +830,13 @@ func (fd *namespaceFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) err func (fd *namespaceFD) Release() { fd.inode.DecRef() } + +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (fd *namespaceFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { + return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block) +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (fd *namespaceFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error { + return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence) +} diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go index 58c8b9d05..2f214d0c2 100644 --- a/pkg/sentry/fsimpl/proc/tasks.go +++ b/pkg/sentry/fsimpl/proc/tasks.go @@ -25,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" ) @@ -44,7 +43,7 @@ type tasksInode struct { kernfs.OrderedChildren kernfs.AlwaysValid - locks lock.FileLocks + locks vfs.FileLocks fs *filesystem pidns *kernel.PIDNamespace diff --git a/pkg/sentry/fsimpl/sys/BUILD b/pkg/sentry/fsimpl/sys/BUILD index 237f17def..a741e2bb6 100644 --- a/pkg/sentry/fsimpl/sys/BUILD +++ b/pkg/sentry/fsimpl/sys/BUILD @@ -15,7 +15,6 @@ go_library( "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", - "//pkg/sentry/vfs/lock", "//pkg/syserror", ], ) diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go index b84463d3a..fe02f7ee9 100644 --- a/pkg/sentry/fsimpl/sys/sys.go +++ b/pkg/sentry/fsimpl/sys/sys.go @@ -25,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" ) @@ -101,7 +100,7 @@ type dir struct { kernfs.InodeDirectoryNoNewChildren kernfs.OrderedChildren - locks lock.FileLocks + locks vfs.FileLocks dentry kernfs.Dentry } diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD index 062321cbc..e73732a6b 100644 --- a/pkg/sentry/fsimpl/tmpfs/BUILD +++ b/pkg/sentry/fsimpl/tmpfs/BUILD @@ -62,7 +62,6 @@ go_library( "//pkg/sentry/uniqueid", "//pkg/sentry/usage", "//pkg/sentry/vfs", - "//pkg/sentry/vfs/lock", "//pkg/sentry/vfs/memxattr", "//pkg/sync", "//pkg/syserror", diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go index 64e1c40ad..146c7fdfe 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go @@ -138,48 +138,37 @@ func TestLocks(t *testing.T) { } defer cleanup() - var ( - uid1 lock.UniqueID - uid2 lock.UniqueID - // Non-blocking. - block lock.Blocker - ) - - uid1 = 123 - uid2 = 456 - - if err := fd.Impl().LockBSD(ctx, uid1, lock.ReadLock, block); err != nil { + uid1 := 123 + uid2 := 456 + if err := fd.Impl().LockBSD(ctx, uid1, lock.ReadLock, nil); err != nil { t.Fatalf("fd.Impl().LockBSD failed: err = %v", err) } - if err := fd.Impl().LockBSD(ctx, uid2, lock.ReadLock, block); err != nil { + if err := fd.Impl().LockBSD(ctx, uid2, lock.ReadLock, nil); err != nil { t.Fatalf("fd.Impl().LockBSD failed: err = %v", err) } - if got, want := fd.Impl().LockBSD(ctx, uid2, lock.WriteLock, block), syserror.ErrWouldBlock; got != want { + if got, want := fd.Impl().LockBSD(ctx, uid2, lock.WriteLock, nil), syserror.ErrWouldBlock; got != want { t.Fatalf("fd.Impl().LockBSD failed: got = %v, want = %v", got, want) } if err := fd.Impl().UnlockBSD(ctx, uid1); err != nil { t.Fatalf("fd.Impl().UnlockBSD failed: err = %v", err) } - if err := fd.Impl().LockBSD(ctx, uid2, lock.WriteLock, block); err != nil { + if err := fd.Impl().LockBSD(ctx, uid2, lock.WriteLock, nil); err != nil { t.Fatalf("fd.Impl().LockBSD failed: err = %v", err) } - rng1 := lock.LockRange{0, 1} - rng2 := lock.LockRange{1, 2} - - if err := fd.Impl().LockPOSIX(ctx, uid1, lock.ReadLock, rng1, block); err != nil { + if err := fd.Impl().LockPOSIX(ctx, uid1, lock.ReadLock, 0, 1, linux.SEEK_SET, nil); err != nil { t.Fatalf("fd.Impl().LockPOSIX failed: err = %v", err) } - if err := fd.Impl().LockPOSIX(ctx, uid2, lock.ReadLock, rng2, block); err != nil { + if err := fd.Impl().LockPOSIX(ctx, uid2, lock.ReadLock, 1, 2, linux.SEEK_SET, nil); err != nil { t.Fatalf("fd.Impl().LockPOSIX failed: err = %v", err) } - if err := fd.Impl().LockPOSIX(ctx, uid1, lock.WriteLock, rng1, block); err != nil { + if err := fd.Impl().LockPOSIX(ctx, uid1, lock.WriteLock, 0, 1, linux.SEEK_SET, nil); err != nil { t.Fatalf("fd.Impl().LockPOSIX failed: err = %v", err) } - if got, want := fd.Impl().LockPOSIX(ctx, uid2, lock.ReadLock, rng1, block), syserror.ErrWouldBlock; got != want { + if got, want := fd.Impl().LockPOSIX(ctx, uid2, lock.ReadLock, 0, 1, linux.SEEK_SET, nil), syserror.ErrWouldBlock; got != want { t.Fatalf("fd.Impl().LockPOSIX failed: got = %v, want = %v", got, want) } - if err := fd.Impl().UnlockPOSIX(ctx, uid1, rng1); err != nil { + if err := fd.Impl().UnlockPOSIX(ctx, uid1, 0, 1, linux.SEEK_SET); err != nil { t.Fatalf("fd.Impl().UnlockPOSIX failed: err = %v", err) } } diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 71a7522af..d0a3e1a5c 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -36,11 +36,11 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/sentry/vfs/memxattr" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" @@ -310,7 +310,7 @@ type inode struct { ctime int64 // nanoseconds mtime int64 // nanoseconds - locks lock.FileLocks + locks vfs.FileLocks // Inotify watches for this inode. watches vfs.Watches @@ -761,9 +761,20 @@ func NewMemfd(mount *vfs.Mount, creds *auth.Credentials, allowSeals bool, name s // Per Linux, mm/shmem.c:__shmem_file_setup(), memfd files are set up with // FMODE_READ | FMODE_WRITE. var fd regularFileFD + fd.Init(&inode.locks) flags := uint32(linux.O_RDWR) if err := fd.vfsfd.Init(&fd, flags, mount, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { return nil, err } return &fd.vfsfd, nil } + +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { + return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block) +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (fd *fileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error { + return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence) +} diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go index 48911240f..4b7d234a4 100644 --- a/pkg/sentry/kernel/fd_table.go +++ b/pkg/sentry/kernel/fd_table.go @@ -29,6 +29,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" ) // FDFlags define flags for an individual descriptor. @@ -148,7 +149,12 @@ func (f *FDTable) drop(file *fs.File) { // dropVFS2 drops the table reference. func (f *FDTable) dropVFS2(file *vfs.FileDescription) { - // TODO(gvisor.dev/issue/1480): Release locks. + // Release any POSIX lock possibly held by the FDTable. Range {0, 0} means the + // entire file. + err := file.UnlockPOSIX(context.Background(), f, 0, 0, linux.SEEK_SET) + if err != nil && err != syserror.ENOLCK { + panic(fmt.Sprintf("UnlockPOSIX failed: %v", err)) + } // Generate inotify events. ev := uint32(linux.IN_CLOSE_NOWRITE) @@ -157,7 +163,7 @@ func (f *FDTable) dropVFS2(file *vfs.FileDescription) { } file.Dentry().InotifyWithParent(ev, 0, vfs.PathEvent) - // Drop the table reference. + // Drop the table's reference. file.DecRef() } diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD index 0db546b98..449643118 100644 --- a/pkg/sentry/kernel/pipe/BUILD +++ b/pkg/sentry/kernel/pipe/BUILD @@ -26,8 +26,8 @@ go_library( "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", + "//pkg/sentry/fs/lock", "//pkg/sentry/vfs", - "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserror", "//pkg/usermem", diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go index c0e9ee1f4..a4519363f 100644 --- a/pkg/sentry/kernel/pipe/vfs.go +++ b/pkg/sentry/kernel/pipe/vfs.go @@ -20,8 +20,8 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -63,12 +63,12 @@ func NewVFSPipe(isNamed bool, sizeBytes, atomicIOBytes int64) *VFSPipe { // Preconditions: statusFlags should not contain an open access mode. func (vp *VFSPipe) ReaderWriterPair(mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32) (*vfs.FileDescription, *vfs.FileDescription) { // Connected pipes share the same locks. - locks := &lock.FileLocks{} + locks := &vfs.FileLocks{} return vp.newFD(mnt, vfsd, linux.O_RDONLY|statusFlags, locks), vp.newFD(mnt, vfsd, linux.O_WRONLY|statusFlags, locks) } // Open opens the pipe represented by vp. -func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32, locks *lock.FileLocks) (*vfs.FileDescription, error) { +func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32, locks *vfs.FileLocks) (*vfs.FileDescription, error) { vp.mu.Lock() defer vp.mu.Unlock() @@ -130,7 +130,7 @@ func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, s } // Preconditions: vp.mu must be held. -func (vp *VFSPipe) newFD(mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32, locks *lock.FileLocks) *vfs.FileDescription { +func (vp *VFSPipe) newFD(mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32, locks *vfs.FileLocks) *vfs.FileDescription { fd := &VFSPipeFD{ pipe: &vp.pipe, } @@ -451,3 +451,13 @@ func spliceOrTee(ctx context.Context, dst, src *VFSPipeFD, count int64, removeFr } return n, err } + +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (fd *VFSPipeFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { + return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block) +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (fd *VFSPipeFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error { + return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence) +} diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD index 60c9896fc..ff81ea6e6 100644 --- a/pkg/sentry/socket/hostinet/BUILD +++ b/pkg/sentry/socket/hostinet/BUILD @@ -26,6 +26,7 @@ go_library( "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", + "//pkg/sentry/fs/lock", "//pkg/sentry/fsimpl/sockfs", "//pkg/sentry/hostfd", "//pkg/sentry/inet", @@ -34,7 +35,6 @@ go_library( "//pkg/sentry/socket", "//pkg/sentry/socket/control", "//pkg/sentry/vfs", - "//pkg/sentry/vfs/lock", "//pkg/syserr", "//pkg/syserror", "//pkg/tcpip/stack", diff --git a/pkg/sentry/socket/hostinet/socket_vfs2.go b/pkg/sentry/socket/hostinet/socket_vfs2.go index 027add1fd..ad5f64799 100644 --- a/pkg/sentry/socket/hostinet/socket_vfs2.go +++ b/pkg/sentry/socket/hostinet/socket_vfs2.go @@ -21,12 +21,12 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/sentry/arch" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs" "gvisor.dev/gvisor/pkg/sentry/hostfd" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -61,7 +61,7 @@ func newVFS2Socket(t *kernel.Task, family int, stype linux.SockType, protocol in fd: fd, }, } - s.LockFD.Init(&lock.FileLocks{}) + s.LockFD.Init(&vfs.FileLocks{}) if err := fdnotifier.AddFD(int32(fd), &s.queue); err != nil { return nil, syserr.FromError(err) } @@ -134,6 +134,16 @@ func (s *socketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs return int64(n), err } +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (s *socketVFS2) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { + return s.Locks().LockPOSIX(ctx, &s.vfsfd, uid, t, start, length, whence, block) +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (s *socketVFS2) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error { + return s.Locks().UnlockPOSIX(ctx, &s.vfsfd, uid, start, length, whence) +} + type socketProviderVFS2 struct { family int } diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD index 420e573c9..d5ca3ac56 100644 --- a/pkg/sentry/socket/netlink/BUILD +++ b/pkg/sentry/socket/netlink/BUILD @@ -20,6 +20,7 @@ go_library( "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", + "//pkg/sentry/fs/lock", "//pkg/sentry/fsimpl/sockfs", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", @@ -29,7 +30,6 @@ go_library( "//pkg/sentry/socket/unix", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/vfs", - "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserr", "//pkg/syserror", diff --git a/pkg/sentry/socket/netlink/socket_vfs2.go b/pkg/sentry/socket/netlink/socket_vfs2.go index 8bfee5193..dbcd8b49a 100644 --- a/pkg/sentry/socket/netlink/socket_vfs2.go +++ b/pkg/sentry/socket/netlink/socket_vfs2.go @@ -18,12 +18,12 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/unix" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" @@ -78,7 +78,7 @@ func NewVFS2(t *kernel.Task, skType linux.SockType, protocol Protocol) (*SocketV sendBufferSize: defaultSendBufferSize, }, } - fd.LockFD.Init(&lock.FileLocks{}) + fd.LockFD.Init(&vfs.FileLocks{}) return fd, nil } @@ -140,3 +140,13 @@ func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs n, err := s.sendMsg(ctx, src, nil, 0, socket.ControlMessages{}) return int64(n), err.ToError() } + +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (s *SocketVFS2) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { + return s.Locks().LockPOSIX(ctx, &s.vfsfd, uid, t, start, length, whence, block) +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (s *SocketVFS2) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error { + return s.Locks().UnlockPOSIX(ctx, &s.vfsfd, uid, start, length, whence) +} diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD index 0f592ecc3..ea6ebd0e2 100644 --- a/pkg/sentry/socket/netstack/BUILD +++ b/pkg/sentry/socket/netstack/BUILD @@ -28,6 +28,7 @@ go_library( "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", + "//pkg/sentry/fs/lock", "//pkg/sentry/fsimpl/sockfs", "//pkg/sentry/inet", "//pkg/sentry/kernel", @@ -37,7 +38,6 @@ go_library( "//pkg/sentry/socket/netfilter", "//pkg/sentry/unimpl", "//pkg/sentry/vfs", - "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserr", "//pkg/syserror", diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go index 1412a4810..d65a89316 100644 --- a/pkg/sentry/socket/netstack/netstack_vfs2.go +++ b/pkg/sentry/socket/netstack/netstack_vfs2.go @@ -19,13 +19,13 @@ import ( "gvisor.dev/gvisor/pkg/amutex" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/netfilter" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" @@ -66,7 +66,7 @@ func NewVFS2(t *kernel.Task, family int, skType linux.SockType, protocol int, qu protocol: protocol, }, } - s.LockFD.Init(&lock.FileLocks{}) + s.LockFD.Init(&vfs.FileLocks{}) vfsfd := &s.vfsfd if err := vfsfd.Init(s, linux.O_RDWR, mnt, d, &vfs.FileDescriptionOptions{ DenyPRead: true, @@ -318,3 +318,13 @@ func (s *SocketVFS2) SetSockOpt(t *kernel.Task, level int, name int, optVal []by return SetSockOpt(t, s, s.Endpoint, level, name, optVal) } + +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (s *SocketVFS2) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { + return s.Locks().LockPOSIX(ctx, &s.vfsfd, uid, t, start, length, whence, block) +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (s *SocketVFS2) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error { + return s.Locks().UnlockPOSIX(ctx, &s.vfsfd, uid, start, length, whence) +} diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD index 7d4cc80fe..cca5e70f1 100644 --- a/pkg/sentry/socket/unix/BUILD +++ b/pkg/sentry/socket/unix/BUILD @@ -21,6 +21,7 @@ go_library( "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", + "//pkg/sentry/fs/lock", "//pkg/sentry/fsimpl/sockfs", "//pkg/sentry/kernel", "//pkg/sentry/kernel/time", @@ -29,7 +30,6 @@ go_library( "//pkg/sentry/socket/netstack", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/vfs", - "//pkg/sentry/vfs/lock", "//pkg/syserr", "//pkg/syserror", "//pkg/tcpip", diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go index 8c32371a2..ff2149250 100644 --- a/pkg/sentry/socket/unix/unix_vfs2.go +++ b/pkg/sentry/socket/unix/unix_vfs2.go @@ -19,6 +19,7 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/arch" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/socket" @@ -26,7 +27,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/netstack" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" @@ -53,7 +53,7 @@ func NewSockfsFile(t *kernel.Task, ep transport.Endpoint, stype linux.SockType) mnt := t.Kernel().SocketMount() d := sockfs.NewDentry(t.Credentials(), mnt) - fd, err := NewFileDescription(ep, stype, linux.O_RDWR, mnt, d, &lock.FileLocks{}) + fd, err := NewFileDescription(ep, stype, linux.O_RDWR, mnt, d, &vfs.FileLocks{}) if err != nil { return nil, syserr.FromError(err) } @@ -62,7 +62,7 @@ func NewSockfsFile(t *kernel.Task, ep transport.Endpoint, stype linux.SockType) // NewFileDescription creates and returns a socket file description // corresponding to the given mount and dentry. -func NewFileDescription(ep transport.Endpoint, stype linux.SockType, flags uint32, mnt *vfs.Mount, d *vfs.Dentry, locks *lock.FileLocks) (*vfs.FileDescription, error) { +func NewFileDescription(ep transport.Endpoint, stype linux.SockType, flags uint32, mnt *vfs.Mount, d *vfs.Dentry, locks *vfs.FileLocks) (*vfs.FileDescription, error) { // You can create AF_UNIX, SOCK_RAW sockets. They're the same as // SOCK_DGRAM and don't require CAP_NET_RAW. if stype == linux.SOCK_RAW { @@ -300,6 +300,16 @@ func (s *SocketVFS2) SetSockOpt(t *kernel.Task, level int, name int, optVal []by return netstack.SetSockOpt(t, s, s.ep, level, name, optVal) } +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (s *SocketVFS2) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { + return s.Locks().LockPOSIX(ctx, &s.vfsfd, uid, t, start, length, whence, block) +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (s *SocketVFS2) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error { + return s.Locks().UnlockPOSIX(ctx, &s.vfsfd, uid, start, length, whence) +} + // providerVFS2 is a unix domain socket provider for VFS2. type providerVFS2 struct{} diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go index f9ccb303c..f5eaa076b 100644 --- a/pkg/sentry/syscalls/linux/vfs2/fd.go +++ b/pkg/sentry/syscalls/linux/vfs2/fd.go @@ -17,10 +17,12 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" + "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" ) @@ -167,8 +169,44 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } err := tmpfs.AddSeals(file, args[2].Uint()) return 0, nil, err + case linux.F_SETLK, linux.F_SETLKW: + return 0, nil, posixLock(t, args, file, cmd) default: // TODO(gvisor.dev/issue/2920): Everything else is not yet supported. return 0, nil, syserror.EINVAL } } + +func posixLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescription, cmd int32) error { + // Copy in the lock request. + flockAddr := args[2].Pointer() + var flock linux.Flock + if _, err := t.CopyIn(flockAddr, &flock); err != nil { + return err + } + + var blocker lock.Blocker + if cmd == linux.F_SETLKW { + blocker = t + } + + switch flock.Type { + case linux.F_RDLCK: + if !file.IsReadable() { + return syserror.EBADF + } + return file.LockPOSIX(t, t.FDTable(), lock.ReadLock, uint64(flock.Start), uint64(flock.Len), flock.Whence, blocker) + + case linux.F_WRLCK: + if !file.IsWritable() { + return syserror.EBADF + } + return file.LockPOSIX(t, t.FDTable(), lock.WriteLock, uint64(flock.Start), uint64(flock.Len), flock.Whence, blocker) + + case linux.F_UNLCK: + return file.UnlockPOSIX(t, t.FDTable(), uint64(flock.Start), uint64(flock.Len), flock.Whence) + + default: + return syserror.EINVAL + } +} diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD index 16d9f3a28..642769e7c 100644 --- a/pkg/sentry/vfs/BUILD +++ b/pkg/sentry/vfs/BUILD @@ -44,6 +44,7 @@ go_library( "filesystem_impl_util.go", "filesystem_type.go", "inotify.go", + "lock.go", "mount.go", "mount_unsafe.go", "options.go", @@ -72,7 +73,6 @@ go_library( "//pkg/sentry/memmap", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/uniqueid", - "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserror", "//pkg/usermem", diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go index 13c48824e..e0538ea53 100644 --- a/pkg/sentry/vfs/file_description.go +++ b/pkg/sentry/vfs/file_description.go @@ -438,14 +438,10 @@ type FileDescriptionImpl interface { UnlockBSD(ctx context.Context, uid lock.UniqueID) error // LockPOSIX tries to acquire a POSIX-style advisory file lock. - // - // TODO(gvisor.dev/issue/1480): POSIX-style file locking - LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, rng lock.LockRange, block lock.Blocker) error + LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, start, length uint64, whence int16, block lock.Blocker) error // UnlockPOSIX releases a POSIX-style advisory file lock. - // - // TODO(gvisor.dev/issue/1480): POSIX-style file locking - UnlockPOSIX(ctx context.Context, uid lock.UniqueID, rng lock.LockRange) error + UnlockPOSIX(ctx context.Context, uid lock.UniqueID, start, length uint64, whence int16) error } // Dirent holds the information contained in struct linux_dirent64. @@ -764,3 +760,13 @@ func (fd *FileDescription) LockBSD(ctx context.Context, lockType lock.LockType, func (fd *FileDescription) UnlockBSD(ctx context.Context) error { return fd.impl.UnlockBSD(ctx, fd) } + +// LockPOSIX locks a POSIX-style file range lock. +func (fd *FileDescription) LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, start, end uint64, whence int16, block lock.Blocker) error { + return fd.impl.LockPOSIX(ctx, uid, t, start, end, whence, block) +} + +// UnlockPOSIX unlocks a POSIX-style file range lock. +func (fd *FileDescription) UnlockPOSIX(ctx context.Context, uid lock.UniqueID, start, end uint64, whence int16) error { + return fd.impl.UnlockPOSIX(ctx, uid, start, end, whence) +} diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go index af7213dfd..1e66997ce 100644 --- a/pkg/sentry/vfs/file_description_impl_util.go +++ b/pkg/sentry/vfs/file_description_impl_util.go @@ -23,7 +23,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -369,14 +368,19 @@ func GenericConfigureMMap(fd *FileDescription, m memmap.Mappable, opts *memmap.M // LockFD may be used by most implementations of FileDescriptionImpl.Lock* // functions. Caller must call Init(). type LockFD struct { - locks *lock.FileLocks + locks *FileLocks } // Init initializes fd with FileLocks to use. -func (fd *LockFD) Init(locks *lock.FileLocks) { +func (fd *LockFD) Init(locks *FileLocks) { fd.locks = locks } +// Locks returns the locks associated with this file. +func (fd *LockFD) Locks() *FileLocks { + return fd.locks +} + // LockBSD implements vfs.FileDescriptionImpl.LockBSD. func (fd *LockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error { return fd.locks.LockBSD(uid, t, block) @@ -388,17 +392,6 @@ func (fd *LockFD) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error { return nil } -// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. -func (fd *LockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, rng fslock.LockRange, block fslock.Blocker) error { - return fd.locks.LockPOSIX(uid, t, rng, block) -} - -// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. -func (fd *LockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, rng fslock.LockRange) error { - fd.locks.UnlockPOSIX(uid, rng) - return nil -} - // NoLockFD implements Lock*/Unlock* portion of FileDescriptionImpl interface // returning ENOLCK. type NoLockFD struct{} @@ -414,11 +407,11 @@ func (NoLockFD) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error { } // LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. -func (NoLockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, rng fslock.LockRange, block fslock.Blocker) error { +func (NoLockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { return syserror.ENOLCK } // UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. -func (NoLockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, rng fslock.LockRange) error { +func (NoLockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error { return syserror.ENOLCK } diff --git a/pkg/sentry/vfs/lock.go b/pkg/sentry/vfs/lock.go new file mode 100644 index 000000000..6c7583a81 --- /dev/null +++ b/pkg/sentry/vfs/lock.go @@ -0,0 +1,109 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package lock provides POSIX and BSD style file locking for VFS2 file +// implementations. +// +// The actual implementations can be found in the lock package under +// sentry/fs/lock. +package vfs + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" + "gvisor.dev/gvisor/pkg/syserror" +) + +// FileLocks supports POSIX and BSD style locks, which correspond to fcntl(2) +// and flock(2) respectively in Linux. It can be embedded into various file +// implementations for VFS2 that support locking. +// +// Note that in Linux these two types of locks are _not_ cooperative, because +// race and deadlock conditions make merging them prohibitive. We do the same +// and keep them oblivious to each other. +type FileLocks struct { + // bsd is a set of BSD-style advisory file wide locks, see flock(2). + bsd fslock.Locks + + // posix is a set of POSIX-style regional advisory locks, see fcntl(2). + posix fslock.Locks +} + +// LockBSD tries to acquire a BSD-style lock on the entire file. +func (fl *FileLocks) LockBSD(uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error { + if fl.bsd.LockRegion(uid, t, fslock.LockRange{0, fslock.LockEOF}, block) { + return nil + } + return syserror.ErrWouldBlock +} + +// UnlockBSD releases a BSD-style lock on the entire file. +// +// This operation is always successful, even if there did not exist a lock on +// the requested region held by uid in the first place. +func (fl *FileLocks) UnlockBSD(uid fslock.UniqueID) { + fl.bsd.UnlockRegion(uid, fslock.LockRange{0, fslock.LockEOF}) +} + +// LockPOSIX tries to acquire a POSIX-style lock on a file region. +func (fl *FileLocks) LockPOSIX(ctx context.Context, fd *FileDescription, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { + rng, err := computeRange(ctx, fd, start, length, whence) + if err != nil { + return err + } + if fl.posix.LockRegion(uid, t, rng, block) { + return nil + } + return syserror.ErrWouldBlock +} + +// UnlockPOSIX releases a POSIX-style lock on a file region. +// +// This operation is always successful, even if there did not exist a lock on +// the requested region held by uid in the first place. +func (fl *FileLocks) UnlockPOSIX(ctx context.Context, fd *FileDescription, uid fslock.UniqueID, start, length uint64, whence int16) error { + rng, err := computeRange(ctx, fd, start, length, whence) + if err != nil { + return err + } + fl.posix.UnlockRegion(uid, rng) + return nil +} + +func computeRange(ctx context.Context, fd *FileDescription, start uint64, length uint64, whence int16) (fslock.LockRange, error) { + var off int64 + switch whence { + case linux.SEEK_SET: + off = 0 + case linux.SEEK_CUR: + // Note that Linux does not hold any mutexes while retrieving the file + // offset, see fs/locks.c:flock_to_posix_lock and fs/locks.c:fcntl_setlk. + curOff, err := fd.Seek(ctx, 0, linux.SEEK_CUR) + if err != nil { + return fslock.LockRange{}, err + } + off = curOff + case linux.SEEK_END: + stat, err := fd.Stat(ctx, StatOptions{Mask: linux.STATX_SIZE}) + if err != nil { + return fslock.LockRange{}, err + } + off = int64(stat.Size) + default: + return fslock.LockRange{}, syserror.EINVAL + } + + return fslock.ComputeRange(int64(start), int64(length), off) +} diff --git a/pkg/sentry/vfs/lock/BUILD b/pkg/sentry/vfs/lock/BUILD deleted file mode 100644 index d9ab063b7..000000000 --- a/pkg/sentry/vfs/lock/BUILD +++ /dev/null @@ -1,13 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "lock", - srcs = ["lock.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/sentry/fs/lock", - "//pkg/syserror", - ], -) diff --git a/pkg/sentry/vfs/lock/lock.go b/pkg/sentry/vfs/lock/lock.go deleted file mode 100644 index 724dfe743..000000000 --- a/pkg/sentry/vfs/lock/lock.go +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package lock provides POSIX and BSD style file locking for VFS2 file -// implementations. -// -// The actual implementations can be found in the lock package under -// sentry/fs/lock. -package lock - -import ( - fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" - "gvisor.dev/gvisor/pkg/syserror" -) - -// FileLocks supports POSIX and BSD style locks, which correspond to fcntl(2) -// and flock(2) respectively in Linux. It can be embedded into various file -// implementations for VFS2 that support locking. -// -// Note that in Linux these two types of locks are _not_ cooperative, because -// race and deadlock conditions make merging them prohibitive. We do the same -// and keep them oblivious to each other. -type FileLocks struct { - // bsd is a set of BSD-style advisory file wide locks, see flock(2). - bsd fslock.Locks - - // posix is a set of POSIX-style regional advisory locks, see fcntl(2). - posix fslock.Locks -} - -// LockBSD tries to acquire a BSD-style lock on the entire file. -func (fl *FileLocks) LockBSD(uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error { - if fl.bsd.LockRegion(uid, t, fslock.LockRange{0, fslock.LockEOF}, block) { - return nil - } - return syserror.ErrWouldBlock -} - -// UnlockBSD releases a BSD-style lock on the entire file. -// -// This operation is always successful, even if there did not exist a lock on -// the requested region held by uid in the first place. -func (fl *FileLocks) UnlockBSD(uid fslock.UniqueID) { - fl.bsd.UnlockRegion(uid, fslock.LockRange{0, fslock.LockEOF}) -} - -// LockPOSIX tries to acquire a POSIX-style lock on a file region. -func (fl *FileLocks) LockPOSIX(uid fslock.UniqueID, t fslock.LockType, rng fslock.LockRange, block fslock.Blocker) error { - if fl.posix.LockRegion(uid, t, rng, block) { - return nil - } - return syserror.ErrWouldBlock -} - -// UnlockPOSIX releases a POSIX-style lock on a file region. -// -// This operation is always successful, even if there did not exist a lock on -// the requested region held by uid in the first place. -func (fl *FileLocks) UnlockPOSIX(uid fslock.UniqueID, rng fslock.LockRange) { - fl.posix.UnlockRegion(uid, rng) -} diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 96044928e..078e4a284 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -791,6 +791,7 @@ cc_binary( deps = [ ":socket_test_util", "//test/util:cleanup", + "//test/util:epoll_util", "//test/util:eventfd_util", "//test/util:fs_util", "@com_google_absl//absl/base:core_headers", diff --git a/test/syscalls/linux/fcntl.cc b/test/syscalls/linux/fcntl.cc index 35e8a4ff3..25bef2522 100644 --- a/test/syscalls/linux/fcntl.cc +++ b/test/syscalls/linux/fcntl.cc @@ -191,45 +191,85 @@ TEST(FcntlTest, SetFlags) { EXPECT_EQ(rflags, expected); } -TEST_F(FcntlLockTest, SetLockBadFd) { +void TestLock(int fd, short lock_type = F_RDLCK) { // NOLINT, type in flock struct flock fl; - fl.l_type = F_WRLCK; + fl.l_type = lock_type; fl.l_whence = SEEK_SET; fl.l_start = 0; - // len 0 has a special meaning: lock all bytes despite how - // large the file grows. + // len 0 locks all bytes despite how large the file grows. fl.l_len = 0; - EXPECT_THAT(fcntl(-1, F_SETLK, &fl), SyscallFailsWithErrno(EBADF)); + EXPECT_THAT(fcntl(fd, F_SETLK, &fl), SyscallSucceeds()); } -TEST_F(FcntlLockTest, SetLockPipe) { - int fds[2]; - ASSERT_THAT(pipe(fds), SyscallSucceeds()); - +void TestLockBadFD(int fd, + short lock_type = F_RDLCK) { // NOLINT, type in flock struct flock fl; - fl.l_type = F_WRLCK; + fl.l_type = lock_type; fl.l_whence = SEEK_SET; fl.l_start = 0; - // Same as SetLockBadFd, but doesn't matter, we expect this to fail. + // len 0 locks all bytes despite how large the file grows. fl.l_len = 0; - EXPECT_THAT(fcntl(fds[0], F_SETLK, &fl), SyscallFailsWithErrno(EBADF)); - EXPECT_THAT(close(fds[0]), SyscallSucceeds()); - EXPECT_THAT(close(fds[1]), SyscallSucceeds()); + EXPECT_THAT(fcntl(fd, F_SETLK, &fl), SyscallFailsWithErrno(EBADF)); } +TEST_F(FcntlLockTest, SetLockBadFd) { TestLockBadFD(-1); } + TEST_F(FcntlLockTest, SetLockDir) { auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); - FileDescriptor fd = - ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY, 0666)); + auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY, 0000)); + TestLock(fd.get()); +} - struct flock fl; - fl.l_type = F_RDLCK; - fl.l_whence = SEEK_SET; - fl.l_start = 0; - // Same as SetLockBadFd. - fl.l_len = 0; +TEST_F(FcntlLockTest, SetLockSymlink) { + // TODO(gvisor.dev/issue/2782): Replace with IsRunningWithVFS1() when O_PATH + // is supported. + SKIP_IF(IsRunningOnGvisor()); - EXPECT_THAT(fcntl(fd.get(), F_SETLK, &fl), SyscallSucceeds()); + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto symlink = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(GetAbsoluteTestTmpdir(), file.path())); + + auto fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(symlink.path(), O_RDONLY | O_PATH, 0000)); + TestLockBadFD(fd.get()); +} + +TEST_F(FcntlLockTest, SetLockProc) { + auto fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/status", O_RDONLY, 0000)); + TestLock(fd.get()); +} + +TEST_F(FcntlLockTest, SetLockPipe) { + SKIP_IF(IsRunningWithVFS1()); + + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + TestLock(fds[0]); + TestLockBadFD(fds[0], F_WRLCK); + + TestLock(fds[1], F_WRLCK); + TestLockBadFD(fds[1]); + + EXPECT_THAT(close(fds[0]), SyscallSucceeds()); + EXPECT_THAT(close(fds[1]), SyscallSucceeds()); +} + +TEST_F(FcntlLockTest, SetLockSocket) { + SKIP_IF(IsRunningWithVFS1()); + + int sock = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_THAT(sock, SyscallSucceeds()); + + struct sockaddr_un addr = + ASSERT_NO_ERRNO_AND_VALUE(UniqueUnixAddr(true /* abstract */, AF_UNIX)); + ASSERT_THAT( + bind(sock, reinterpret_cast(&addr), sizeof(addr)), + SyscallSucceeds()); + + TestLock(sock); + EXPECT_THAT(close(sock), SyscallSucceeds()); } TEST_F(FcntlLockTest, SetLockBadOpenFlagsWrite) { @@ -241,8 +281,7 @@ TEST_F(FcntlLockTest, SetLockBadOpenFlagsWrite) { fl0.l_type = F_WRLCK; fl0.l_whence = SEEK_SET; fl0.l_start = 0; - // Same as SetLockBadFd. - fl0.l_len = 0; + fl0.l_len = 0; // Lock all file // Expect that setting a write lock using a read only file descriptor // won't work. @@ -704,7 +743,7 @@ TEST_F(FcntlLockTest, SetWriteLockThenBlockingWriteLock) { << "Exited with code: " << status; } -// This test will veirfy that blocking works as expected when another process +// This test will verify that blocking works as expected when another process // holds a read lock when obtaining a write lock. This test will hold the lock // for some amount of time and then wait for the second process to send over the // socket_fd the amount of time it was blocked for before the lock succeeded. @@ -1109,8 +1148,7 @@ int main(int argc, char** argv) { fl.l_start = absl::GetFlag(FLAGS_child_setlock_start); fl.l_len = absl::GetFlag(FLAGS_child_setlock_len); - // Test the fcntl, no need to log, the error is unambiguously - // from fcntl at this point. + // Test the fcntl. int err = 0; int ret = 0; @@ -1123,6 +1161,8 @@ int main(int argc, char** argv) { if (ret == -1 && errno != 0) { err = errno; + std::cerr << "CHILD lock " << setlock_on << " failed " << err + << std::endl; } // If there is a socket fd let's send back the time in microseconds it took -- cgit v1.2.3 From 6d806ee7198422973a2e4efa9b539de7792b933f Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Wed, 17 Jun 2020 12:32:59 -0700 Subject: Remove various uses of 'blacklist' Updates #2972 PiperOrigin-RevId: 316942245 --- pkg/sentry/fs/host/tty.go | 6 +++--- pkg/sentry/fsimpl/host/tty.go | 6 +++--- test/runner/runner.go | 6 +++--- tools/nogo/matchers.go | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go index cb91355ab..82a02fcb2 100644 --- a/pkg/sentry/fs/host/tty.go +++ b/pkg/sentry/fs/host/tty.go @@ -308,9 +308,9 @@ func (t *TTYFileOperations) checkChange(ctx context.Context, sig linux.Signal) e task := kernel.TaskFromContext(ctx) if task == nil { // No task? Linux does not have an analog for this case, but - // tty_check_change is more of a blacklist of cases than a - // whitelist, and is surprisingly permissive. Allowing the - // change seems most appropriate. + // tty_check_change only blocks specific cases and is + // surprisingly permissive. Allowing the change seems + // appropriate. return nil } diff --git a/pkg/sentry/fsimpl/host/tty.go b/pkg/sentry/fsimpl/host/tty.go index 0fbc543b1..4ee9270cc 100644 --- a/pkg/sentry/fsimpl/host/tty.go +++ b/pkg/sentry/fsimpl/host/tty.go @@ -326,9 +326,9 @@ func (t *TTYFileDescription) checkChange(ctx context.Context, sig linux.Signal) task := kernel.TaskFromContext(ctx) if task == nil { // No task? Linux does not have an analog for this case, but - // tty_check_change is more of a blacklist of cases than a - // whitelist, and is surprisingly permissive. Allowing the - // change seems most appropriate. + // tty_check_change only blocks specific cases and is + // surprisingly permissive. Allowing the change seems + // appropriate. return nil } diff --git a/test/runner/runner.go b/test/runner/runner.go index f8baf61b0..5456e46a6 100644 --- a/test/runner/runner.go +++ b/test/runner/runner.go @@ -391,12 +391,12 @@ func runTestCaseRunsc(testBin string, tc gtest.TestCase, t *testing.T) { } } -// filterEnv returns an environment with the blacklisted variables removed. -func filterEnv(env, blacklist []string) []string { +// filterEnv returns an environment with the excluded variables removed. +func filterEnv(env, exclude []string) []string { var out []string for _, kv := range env { ok := true - for _, k := range blacklist { + for _, k := range exclude { if strings.HasPrefix(kv, k+"=") { ok = false break diff --git a/tools/nogo/matchers.go b/tools/nogo/matchers.go index bc5772303..32f099925 100644 --- a/tools/nogo/matchers.go +++ b/tools/nogo/matchers.go @@ -89,7 +89,7 @@ func (r resultExcluded) ShouldReport(d analysis.Diagnostic, _ *token.FileSet) bo return false } } - return true // Not blacklisted. + return true // Not excluded. } // andMatcher is a composite matcher. -- cgit v1.2.3 From 505e8f4e3d72400194a69c220ce2f4288e957c4c Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Wed, 17 Jun 2020 14:07:27 -0700 Subject: Fix TtlDefault test on linux. Different flavors of linux seem to use different defaults we accept 64 or 127 as the TtlDefault in the test. PiperOrigin-RevId: 316961150 --- test/syscalls/linux/socket_ip_unbound.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/syscalls/linux/socket_ip_unbound.cc b/test/syscalls/linux/socket_ip_unbound.cc index 5536ab53a..796598eee 100644 --- a/test/syscalls/linux/socket_ip_unbound.cc +++ b/test/syscalls/linux/socket_ip_unbound.cc @@ -40,7 +40,7 @@ TEST_P(IPUnboundSocketTest, TtlDefault) { socklen_t get_sz = sizeof(get); EXPECT_THAT(getsockopt(socket->get(), IPPROTO_IP, IP_TTL, &get, &get_sz), SyscallSucceedsWithValue(0)); - EXPECT_EQ(get, 64); + EXPECT_TRUE(get == 64 || get == 127); EXPECT_EQ(get_sz, sizeof(get)); } -- cgit v1.2.3 From 28a5c55bb6ef8110f4fb9385ff232796790bb7bc Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Wed, 17 Jun 2020 14:41:05 -0700 Subject: Proc test: Allow root mount pathname to be non-root. The test was expecting that the root mount pathname was "/", but it doesn't need to be. Only the mount point actually should be "/" (otherwise it is not the root). PiperOrigin-RevId: 316968025 --- test/syscalls/linux/proc.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index 63642880a..d73d392d5 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -1921,7 +1921,7 @@ TEST(ProcSelfMountinfo, RequiredFieldsArePresent) { AllOf( // Root mount. ContainsRegex( - R"([0-9]+ [0-9]+ [0-9]+:[0-9]+ / / (rw|ro).*- \S+ \S+ (rw|ro)\S*)"), + R"([0-9]+ [0-9]+ [0-9]+:[0-9]+ /\S* / (rw|ro).*- \S+ \S+ (rw|ro)\S*)"), // Proc mount - always rw. ContainsRegex( R"([0-9]+ [0-9]+ [0-9]+:[0-9]+ / /proc rw.*- \S+ \S+ rw\S*)"))); -- cgit v1.2.3 From 02072fd243a1e88b635dd6a24dd512608fbfce5f Mon Sep 17 00:00:00 2001 From: Ian Gudger Date: Wed, 17 Jun 2020 15:14:17 -0700 Subject: Add test for stat("/proc/net/snmp"). PiperOrigin-RevId: 316974863 --- test/syscalls/linux/proc_net.cc | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/proc_net.cc b/test/syscalls/linux/proc_net.cc index 02a326fc3..3377b65cf 100644 --- a/test/syscalls/linux/proc_net.cc +++ b/test/syscalls/linux/proc_net.cc @@ -441,6 +441,11 @@ TEST(ProcNetSnmp, CheckNetStat) { EXPECT_EQ(value_count, 4); } +TEST(ProcNetSnmp, Stat) { + struct stat st = {}; + ASSERT_THAT(stat("/proc/net/snmp", &st), SyscallSucceeds()); +} + TEST(ProcNetSnmp, CheckSnmp) { // TODO(b/155123175): SNMP and netstat don't work on gVisor. SKIP_IF(IsRunningOnGvisor()); -- cgit v1.2.3 From 97f6b20e89d99c4d92d6491ef3fad0933c9ba53d Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Wed, 17 Jun 2020 18:41:55 -0700 Subject: Move mount configutation to RunOpts Separate mount configuration from links and move it to RunOpts, like the other options. PiperOrigin-RevId: 317010158 --- pkg/test/dockerutil/dockerutil.go | 53 ++++++++++++++++++++------- test/image/image_test.go | 35 +++++++++++------- test/iptables/iptables_test.go | 7 ++-- test/packetimpact/runner/packetimpact_test.go | 10 ++++- test/runtimes/runner/main.go | 7 ++-- 5 files changed, 77 insertions(+), 35 deletions(-) (limited to 'test') diff --git a/pkg/test/dockerutil/dockerutil.go b/pkg/test/dockerutil/dockerutil.go index c45d2ecbc..819dd0a59 100644 --- a/pkg/test/dockerutil/dockerutil.go +++ b/pkg/test/dockerutil/dockerutil.go @@ -210,7 +210,6 @@ type Docker struct { Runtime string Name string copyErr error - mounts []string cleanups []func() } @@ -229,13 +228,8 @@ func MakeDocker(logger testutil.Logger) *Docker { } } -// Mount mounts the given source and makes it available in the container. -func (d *Docker) Mount(target, source string, mode MountMode) { - d.mounts = append(d.mounts, fmt.Sprintf("-v=%s:%s:%v", source, target, mode)) -} - // CopyFiles copies in and mounts the given files. They are always ReadOnly. -func (d *Docker) CopyFiles(target string, sources ...string) { +func (d *Docker) CopyFiles(opts *RunOpts, targetDir string, sources ...string) { dir, err := ioutil.TempDir("", d.Name) if err != nil { d.copyErr = fmt.Errorf("ioutil.TempDir failed: %v", err) @@ -259,12 +253,33 @@ func (d *Docker) CopyFiles(target string, sources ...string) { } d.logger.Logf("copy: %s -> %s", src, dst) } - d.Mount(target, dir, ReadOnly) + opts.Mounts = append(opts.Mounts, Mount{ + Source: dir, + Target: targetDir, + Mode: ReadOnly, + }) +} + +// Mount describes a mount point inside the container. +type Mount struct { + // Source is the path outside the container. + Source string + + // Target is the path inside the container. + Target string + + // Mode tells whether the mount inside the container should be readonly. + Mode MountMode } -// Link links the given target. -func (d *Docker) Link(target string, source *Docker) { - d.mounts = append(d.mounts, fmt.Sprintf("--link=%s:%s", source.Name, target)) +// Link informs dockers that a given container needs to be made accessible from +// the container being configured. +type Link struct { + // Source is the container to connect to. + Source *Docker + + // Target is the alias for the container. + Target string } // RunOpts are options for running a container. @@ -310,6 +325,12 @@ type RunOpts struct { // return value from the Run function. Foreground bool + // Mounts is the list of directories/files to be mounted inside the container. + Mounts []Mount + + // Links is the list of containers to be connected to the container. + Links []Link + // Extra are extra arguments that may be passed. Extra []string } @@ -368,7 +389,13 @@ func (d *Docker) argsFor(r *RunOpts, command string, p []string) (rv []string) { if isExec { rv = append(rv, d.Name) } else { - rv = append(rv, d.mounts...) + for _, m := range r.Mounts { + rv = append(rv, fmt.Sprintf("-v=%s:%s:%v", m.Source, m.Target, m.Mode)) + } + for _, l := range r.Links { + rv = append(rv, fmt.Sprintf("--link=%s:%s", l.Source.Name, l.Target)) + } + if len(d.Runtime) > 0 { rv = append(rv, fmt.Sprintf("--runtime=%s", d.Runtime)) } @@ -501,8 +528,6 @@ func (d *Docker) CleanUp() { if err := d.Remove(); err != nil { d.logger.Logf("error removing container %q: %v", d.Name, err) } - // Forget all mounts. - d.mounts = nil // Execute all cleanups. for _, c := range d.cleanups { c() diff --git a/test/image/image_test.go b/test/image/image_test.go index 2e3543109..3e4321480 100644 --- a/test/image/image_test.go +++ b/test/image/image_test.go @@ -111,11 +111,12 @@ func TestHttpd(t *testing.T) { defer d.CleanUp() // Start the container. - d.CopyFiles("/usr/local/apache2/htdocs", "test/image/latin10k.txt") - if err := d.Spawn(dockerutil.RunOpts{ + opts := dockerutil.RunOpts{ Image: "basic/httpd", Ports: []int{80}, - }); err != nil { + } + d.CopyFiles(&opts, "/usr/local/apache2/htdocs", "test/image/latin10k.txt") + if err := d.Spawn(opts); err != nil { t.Fatalf("docker run failed: %v", err) } @@ -138,11 +139,12 @@ func TestNginx(t *testing.T) { defer d.CleanUp() // Start the container. - d.CopyFiles("/usr/share/nginx/html", "test/image/latin10k.txt") - if err := d.Spawn(dockerutil.RunOpts{ + opts := dockerutil.RunOpts{ Image: "basic/nginx", Ports: []int{80}, - }); err != nil { + } + d.CopyFiles(&opts, "/usr/share/nginx/html", "test/image/latin10k.txt") + if err := d.Spawn(opts); err != nil { t.Fatalf("docker run failed: %v", err) } @@ -183,11 +185,17 @@ func TestMysql(t *testing.T) { // Tell mysql client to connect to the server and execute the file in // verbose mode to verify the output. - client.CopyFiles("/sql", "test/image/mysql.sql") - client.Link("mysql", server) - if _, err := client.Run(dockerutil.RunOpts{ + opts := dockerutil.RunOpts{ Image: "basic/mysql", - }, "mysql", "-hmysql", "-uroot", "-pfoobar123", "-v", "-e", "source /sql/mysql.sql"); err != nil { + Links: []dockerutil.Link{ + { + Source: server, + Target: "mysql", + }, + }, + } + client.CopyFiles(&opts, "/sql", "test/image/mysql.sql") + if _, err := client.Run(opts, "mysql", "-hmysql", "-uroot", "-pfoobar123", "-v", "-e", "source /sql/mysql.sql"); err != nil { t.Fatalf("docker run failed: %v", err) } @@ -236,11 +244,12 @@ func TestRuby(t *testing.T) { defer d.CleanUp() // Execute the ruby workload. - d.CopyFiles("/src", "test/image/ruby.rb", "test/image/ruby.sh") - if err := d.Spawn(dockerutil.RunOpts{ + opts := dockerutil.RunOpts{ Image: "basic/ruby", Ports: []int{8080}, - }, "/src/ruby.sh"); err != nil { + } + d.CopyFiles(&opts, "/src", "test/image/ruby.rb", "test/image/ruby.sh") + if err := d.Spawn(opts, "/src/ruby.sh"); err != nil { t.Fatalf("docker run failed: %v", err) } diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 38319a3b2..340f9426e 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -41,11 +41,12 @@ func singleTest(t *testing.T, test TestCase) { defer d.CleanUp() // Create and start the container. - d.CopyFiles("/runner", "test/iptables/runner/runner") - if err := d.Spawn(dockerutil.RunOpts{ + opts := dockerutil.RunOpts{ Image: "iptables", CapAdd: []string{"NET_ADMIN"}, - }, "/runner/runner", "-name", test.Name()); err != nil { + } + d.CopyFiles(&opts, "/runner", "test/iptables/runner/runner") + if err := d.Spawn(opts, "/runner/runner", "-name", test.Name()); err != nil { t.Fatalf("docker run failed: %v", err) } diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go index 75617f6de..3cac5915f 100644 --- a/test/packetimpact/runner/packetimpact_test.go +++ b/test/packetimpact/runner/packetimpact_test.go @@ -142,7 +142,7 @@ func TestOne(t *testing.T) { } const containerPosixServerBinary = "/packetimpact/posix_server" - dut.CopyFiles("/packetimpact", "/test/packetimpact/dut/posix_server") + dut.CopyFiles(&runOpts, "/packetimpact", "/test/packetimpact/dut/posix_server") if err := dut.Create(runOpts, containerPosixServerBinary, "--ip=0.0.0.0", "--port="+ctrlPort); err != nil { t.Fatalf("unable to create container %s: %s", dut.Name, err) @@ -193,7 +193,13 @@ func TestOne(t *testing.T) { tbb := path.Base(*testbenchBinary) containerTestbenchBinary := "/packetimpact/" + tbb - testbench.CopyFiles("/packetimpact", "/test/packetimpact/tests/"+tbb) + runOpts = dockerutil.RunOpts{ + Image: "packetimpact", + CapAdd: []string{"NET_ADMIN"}, + Extra: []string{"--sysctl", "net.ipv6.conf.all.disable_ipv6=0", "--rm", "-v", tmpDir + ":" + testOutputDir}, + Foreground: true, + } + testbench.CopyFiles(&runOpts, "/packetimpact", "/test/packetimpact/tests/"+tbb) // Run tcpdump in the test bench unbuffered, without DNS resolution, just on // the interface with the test packets. diff --git a/test/runtimes/runner/main.go b/test/runtimes/runner/main.go index 7989dca84..54d1169ef 100644 --- a/test/runtimes/runner/main.go +++ b/test/runtimes/runner/main.go @@ -79,10 +79,11 @@ func runTests() int { // getTests executes all tests as table tests. func getTests(d *dockerutil.Docker, excludes map[string]struct{}) ([]testing.InternalTest, error) { // Start the container. - d.CopyFiles("/proctor", "test/runtimes/proctor/proctor") - if err := d.Spawn(dockerutil.RunOpts{ + opts := dockerutil.RunOpts{ Image: fmt.Sprintf("runtimes/%s", *image), - }, "/proctor/proctor", "--pause"); err != nil { + } + d.CopyFiles(&opts, "/proctor", "test/runtimes/proctor/proctor") + if err := d.Spawn(opts, "/proctor/proctor", "--pause"); err != nil { return nil, fmt.Errorf("docker run failed: %v", err) } -- cgit v1.2.3 From 22b0bb21383614f6258dee27f4a254d2da97b586 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Wed, 17 Jun 2020 19:08:14 -0700 Subject: Add TempTmpMount test This currently doesn't work with VSF2. Add test to ensure it's not missed. Updates #1487 PiperOrigin-RevId: 317013792 --- test/e2e/integration_test.go | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'test') diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go index 91c956e10..60e739c6a 100644 --- a/test/e2e/integration_test.go +++ b/test/e2e/integration_test.go @@ -24,10 +24,12 @@ package integration import ( "flag" "fmt" + "io/ioutil" "net" "net/http" "os" "os/exec" + "path/filepath" "strconv" "strings" "syscall" @@ -389,6 +391,37 @@ func TestTmpFile(t *testing.T) { } } +// TestTmpMount checks that mounts inside '/tmp' are not overridden. +func TestTmpMount(t *testing.T) { + dir, err := ioutil.TempDir(testutil.TmpDir(), "tmp-mount") + if err != nil { + t.Fatalf("TempDir(): %v", err) + } + want := "123" + if err := ioutil.WriteFile(filepath.Join(dir, "file.txt"), []byte("123"), 0666); err != nil { + t.Fatalf("WriteFile(): %v", err) + } + d := dockerutil.MakeDocker(t) + defer d.CleanUp() + + opts := dockerutil.RunOpts{ + Image: "basic/alpine", + Mounts: []dockerutil.Mount{ + { + Source: dir, + Target: "/tmp/foo", + }, + }, + } + got, err := d.Run(opts, "cat", "/tmp/foo/file.txt") + if err != nil { + t.Fatalf("docker run failed: %v", err) + } + if want != got { + t.Errorf("invalid file content, want: %q, got: %q", want, got) + } +} + // TestHostOverlayfsCopyUp tests that the --overlayfs-stale-read option causes // runsc to hide the incoherence of FDs opened before and after overlayfs // copy-up on the host. -- cgit v1.2.3 From 07ff909e76d8233827e705476ec116fc2cecec2f Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Thu, 18 Jun 2020 06:05:47 -0700 Subject: Support setsockopt SO_SNDBUF/SO_RCVBUF for raw/udp sockets. Updates #173,#6 Fixes #2888 PiperOrigin-RevId: 317087652 --- benchmarks/tcp/tcp_proxy.go | 8 +- pkg/sentry/socket/netstack/stack.go | 12 +- pkg/tcpip/tcpip.go | 26 ++ pkg/tcpip/transport/raw/endpoint.go | 72 +++- pkg/tcpip/transport/tcp/connect.go | 2 +- pkg/tcpip/transport/tcp/endpoint.go | 16 +- pkg/tcpip/transport/tcp/endpoint_state.go | 2 +- pkg/tcpip/transport/tcp/protocol.go | 59 ++-- pkg/tcpip/transport/tcp/tcp_sack_test.go | 4 +- pkg/tcpip/transport/tcp/tcp_test.go | 26 +- pkg/tcpip/transport/tcp/testing/context/context.go | 10 +- pkg/tcpip/transport/udp/endpoint.go | 57 +++- pkg/tcpip/transport/udp/protocol.go | 70 +++- runsc/boot/loader.go | 6 +- test/syscalls/linux/raw_socket_ipv4.cc | 379 +++++++++++++++++++++ test/syscalls/linux/socket_ipv4_udp_unbound.cc | 216 ++++++++++++ test/syscalls/linux/udp_socket_test_cases.cc | 115 +++++++ 17 files changed, 984 insertions(+), 96 deletions(-) (limited to 'test') diff --git a/benchmarks/tcp/tcp_proxy.go b/benchmarks/tcp/tcp_proxy.go index f5aa0b515..b3a4dbea3 100644 --- a/benchmarks/tcp/tcp_proxy.go +++ b/benchmarks/tcp/tcp_proxy.go @@ -228,19 +228,19 @@ func newNetstackImpl(mode string) (impl, error) { }) // Set protocol options. - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(*sack)); err != nil { - return nil, fmt.Errorf("SetTransportProtocolOption for SACKEnabled failed: %v", err) + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackSACKEnabled(*sack)); err != nil { + return nil, fmt.Errorf("SetTransportProtocolOption for SACKEnabled failed: %s", err) } // Enable Receive Buffer Auto-Tuning. if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(*moderateRecvBuf)); err != nil { - return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err) + return nil, fmt.Errorf("SetTransportProtocolOption failed: %s", err) } // Set Congestion Control to cubic if requested. if *cubic { if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.CongestionControlOption("cubic")); err != nil { - return nil, fmt.Errorf("SetTransportProtocolOption for CongestionControlOption(cubic) failed: %v", err) + return nil, fmt.Errorf("SetTransportProtocolOption for CongestionControlOption(cubic) failed: %s", err) } } diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go index 9b44c2b89..f97f9b6f3 100644 --- a/pkg/sentry/socket/netstack/stack.go +++ b/pkg/sentry/socket/netstack/stack.go @@ -144,7 +144,7 @@ func (s *Stack) AddInterfaceAddr(idx int32, addr inet.InterfaceAddr) error { // TCPReceiveBufferSize implements inet.Stack.TCPReceiveBufferSize. func (s *Stack) TCPReceiveBufferSize() (inet.TCPBufferSize, error) { - var rs tcp.ReceiveBufferSizeOption + var rs tcpip.StackReceiveBufferSizeOption err := s.Stack.TransportProtocolOption(tcp.ProtocolNumber, &rs) return inet.TCPBufferSize{ Min: rs.Min, @@ -155,7 +155,7 @@ func (s *Stack) TCPReceiveBufferSize() (inet.TCPBufferSize, error) { // SetTCPReceiveBufferSize implements inet.Stack.SetTCPReceiveBufferSize. func (s *Stack) SetTCPReceiveBufferSize(size inet.TCPBufferSize) error { - rs := tcp.ReceiveBufferSizeOption{ + rs := tcpip.StackReceiveBufferSizeOption{ Min: size.Min, Default: size.Default, Max: size.Max, @@ -165,7 +165,7 @@ func (s *Stack) SetTCPReceiveBufferSize(size inet.TCPBufferSize) error { // TCPSendBufferSize implements inet.Stack.TCPSendBufferSize. func (s *Stack) TCPSendBufferSize() (inet.TCPBufferSize, error) { - var ss tcp.SendBufferSizeOption + var ss tcpip.StackSendBufferSizeOption err := s.Stack.TransportProtocolOption(tcp.ProtocolNumber, &ss) return inet.TCPBufferSize{ Min: ss.Min, @@ -176,7 +176,7 @@ func (s *Stack) TCPSendBufferSize() (inet.TCPBufferSize, error) { // SetTCPSendBufferSize implements inet.Stack.SetTCPSendBufferSize. func (s *Stack) SetTCPSendBufferSize(size inet.TCPBufferSize) error { - ss := tcp.SendBufferSizeOption{ + ss := tcpip.StackSendBufferSizeOption{ Min: size.Min, Default: size.Default, Max: size.Max, @@ -186,14 +186,14 @@ func (s *Stack) SetTCPSendBufferSize(size inet.TCPBufferSize) error { // TCPSACKEnabled implements inet.Stack.TCPSACKEnabled. func (s *Stack) TCPSACKEnabled() (bool, error) { - var sack tcp.SACKEnabled + var sack tcpip.StackSACKEnabled err := s.Stack.TransportProtocolOption(tcp.ProtocolNumber, &sack) return bool(sack), syserr.TranslateNetstackError(err).ToError() } // SetTCPSACKEnabled implements inet.Stack.SetTCPSACKEnabled. func (s *Stack) SetTCPSACKEnabled(enabled bool) error { - return syserr.TranslateNetstackError(s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(enabled))).ToError() + return syserr.TranslateNetstackError(s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackSACKEnabled(enabled))).ToError() } // Statistics implements inet.Stack.Statistics. diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index b7b227328..3ad130b23 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -813,6 +813,32 @@ type OutOfBandInlineOption int // a default TTL. type DefaultTTLOption uint8 +// StackSACKEnabled is used by stack.(*Stack).TransportProtocolOption to +// enable/disable SACK support in TCP. See: https://tools.ietf.org/html/rfc2018. +type StackSACKEnabled bool + +// StackDelayEnabled is used by stack.(Stack*).TransportProtocolOption to +// enable/disable Nagle's algorithm in TCP. +type StackDelayEnabled bool + +// StackSendBufferSizeOption is used by stack.(Stack*).TransportProtocolOption +// to get/set the default, min and max send buffer sizes. +type StackSendBufferSizeOption struct { + Min int + Default int + Max int +} + +// StackReceiveBufferSizeOption is used by +// stack.(Stack*).TransportProtocolOption to get/set the default, min and max +// receive buffer sizes. +type StackReceiveBufferSizeOption struct { + Min int + Default int + Max int +} + +// // IPPacketInfo is the message struture for IP_PKTINFO. // // +stateify savable diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go index a406d815e..6a7977259 100644 --- a/pkg/tcpip/transport/raw/endpoint.go +++ b/pkg/tcpip/transport/raw/endpoint.go @@ -26,6 +26,8 @@ package raw import ( + "fmt" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" @@ -66,16 +68,17 @@ type endpoint struct { // protected by rcvMu. rcvMu sync.Mutex `state:"nosave"` rcvList rawPacketList - rcvBufSizeMax int `state:".(int)"` rcvBufSize int + rcvBufSizeMax int `state:".(int)"` rcvClosed bool // The following fields are protected by mu. - mu sync.RWMutex `state:"nosave"` - sndBufSize int - closed bool - connected bool - bound bool + mu sync.RWMutex `state:"nosave"` + sndBufSize int + sndBufSizeMax int + closed bool + connected bool + bound bool // route is the route to a remote network endpoint. It is set via // Connect(), and is valid only when conneted is true. route stack.Route `state:"manual"` @@ -103,10 +106,21 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProt }, waiterQueue: waiterQueue, rcvBufSizeMax: 32 * 1024, - sndBufSize: 32 * 1024, + sndBufSizeMax: 32 * 1024, associated: associated, } + // Override with stack defaults. + var ss tcpip.StackSendBufferSizeOption + if err := s.TransportProtocolOption(transProto, &ss); err == nil { + e.sndBufSizeMax = ss.Default + } + + var rs tcpip.StackReceiveBufferSizeOption + if err := s.TransportProtocolOption(transProto, &rs); err == nil { + e.rcvBufSizeMax = rs.Default + } + // Unassociated endpoints are write-only and users call Write() with IP // headers included. Because they're write-only, We don't need to // register with the stack. @@ -523,7 +537,46 @@ func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error { // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt. func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { - return tcpip.ErrUnknownProtocolOption + switch opt { + case tcpip.SendBufferSizeOption: + // Make sure the send buffer size is within the min and max + // allowed. + var ss tcpip.StackSendBufferSizeOption + if err := e.stack.TransportProtocolOption(e.TransProto, &ss); err != nil { + panic(fmt.Sprintf("s.TransportProtocolOption(%d, %+v) = %s", e.TransProto, ss, err)) + } + if v > ss.Max { + v = ss.Max + } + if v < ss.Min { + v = ss.Min + } + e.mu.Lock() + e.sndBufSizeMax = v + e.mu.Unlock() + return nil + + case tcpip.ReceiveBufferSizeOption: + // Make sure the receive buffer size is within the min and max + // allowed. + var rs tcpip.StackReceiveBufferSizeOption + if err := e.stack.TransportProtocolOption(e.TransProto, &rs); err != nil { + panic(fmt.Sprintf("s.TransportProtocolOption(%d, %+v) = %s", e.TransProto, rs, err)) + } + if v > rs.Max { + v = rs.Max + } + if v < rs.Min { + v = rs.Min + } + e.rcvMu.Lock() + e.rcvBufSizeMax = v + e.rcvMu.Unlock() + return nil + + default: + return tcpip.ErrUnknownProtocolOption + } } // GetSockOpt implements tcpip.Endpoint.GetSockOpt. @@ -563,7 +616,7 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { case tcpip.SendBufferSizeOption: e.mu.Lock() - v := e.sndBufSize + v := e.sndBufSizeMax e.mu.Unlock() return v, nil @@ -636,7 +689,6 @@ func (e *endpoint) HandlePacket(route *stack.Route, pkt *stack.PacketBuffer) { e.rcvList.PushBack(packet) e.rcvBufSize += packet.data.Size() - e.rcvMu.Unlock() e.stats.PacketsReceived.Increment() // Notify waiters that there's data to be read. diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index 9d4dce826..377643b82 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -521,7 +521,7 @@ func (h *handshake) execute() *tcpip.Error { s.AddWaker(&h.ep.newSegmentWaker, wakerForNewSegment) defer s.Done() - var sackEnabled SACKEnabled + var sackEnabled tcpip.StackSACKEnabled if err := h.ep.stack.TransportProtocolOption(ProtocolNumber, &sackEnabled); err != nil { // If stack returned an error when checking for SACKEnabled // status then just default to switching off SACK negotiation. diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index f225b00e7..10df2bcd5 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -851,12 +851,12 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue maxSynRetries: DefaultSynRetries, } - var ss SendBufferSizeOption + var ss tcpip.StackSendBufferSizeOption if err := s.TransportProtocolOption(ProtocolNumber, &ss); err == nil { e.sndBufSize = ss.Default } - var rs ReceiveBufferSizeOption + var rs tcpip.StackReceiveBufferSizeOption if err := s.TransportProtocolOption(ProtocolNumber, &rs); err == nil { e.rcvBufSize = rs.Default } @@ -871,7 +871,7 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue e.rcvAutoParams.disabled = !bool(mrb) } - var de DelayEnabled + var de tcpip.StackDelayEnabled if err := s.TransportProtocolOption(ProtocolNumber, &de); err == nil && de { e.SetSockOptBool(tcpip.DelayOption, true) } @@ -1588,7 +1588,7 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { case tcpip.ReceiveBufferSizeOption: // Make sure the receive buffer size is within the min and max // allowed. - var rs ReceiveBufferSizeOption + var rs tcpip.StackReceiveBufferSizeOption if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err == nil { if v < rs.Min { v = rs.Min @@ -1638,7 +1638,7 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { case tcpip.SendBufferSizeOption: // Make sure the send buffer size is within the min and max // allowed. - var ss SendBufferSizeOption + var ss tcpip.StackSendBufferSizeOption if err := e.stack.TransportProtocolOption(ProtocolNumber, &ss); err == nil { if v < ss.Min { v = ss.Min @@ -1678,7 +1678,7 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { return tcpip.ErrInvalidOptionValue } } - var rs ReceiveBufferSizeOption + var rs tcpip.StackReceiveBufferSizeOption if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err == nil { if v < rs.Min/2 { v = rs.Min / 2 @@ -2609,7 +2609,7 @@ func (e *endpoint) receiveBufferSize() int { } func (e *endpoint) maxReceiveBufferSize() int { - var rs ReceiveBufferSizeOption + var rs tcpip.StackReceiveBufferSizeOption if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err != nil { // As a fallback return the hardcoded max buffer size. return MaxBufferSize @@ -2690,7 +2690,7 @@ func timeStampOffset() uint32 { // if the SYN options indicate that the SACK option was negotiated and the TCP // stack is configured to enable TCP SACK option. func (e *endpoint) maybeEnableSACKPermitted(synOpts *header.TCPSynOptions) { - var v SACKEnabled + var v tcpip.StackSACKEnabled if err := e.stack.TransportProtocolOption(ProtocolNumber, &v); err != nil { // Stack doesn't support SACK. So just return. return diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go index cbb779666..0bebec2d1 100644 --- a/pkg/tcpip/transport/tcp/endpoint_state.go +++ b/pkg/tcpip/transport/tcp/endpoint_state.go @@ -186,7 +186,7 @@ func (e *endpoint) Resume(s *stack.Stack) { epState := e.origEndpointState switch epState { case StateInitial, StateBound, StateListen, StateConnecting, StateEstablished: - var ss SendBufferSizeOption + var ss tcpip.StackSendBufferSizeOption if err := e.stack.TransportProtocolOption(ProtocolNumber, &ss); err == nil { if e.sndBufSize < ss.Min || e.sndBufSize > ss.Max { panic(fmt.Sprintf("endpoint.sndBufSize %d is outside the min and max allowed [%d, %d]", e.sndBufSize, ss.Min, ss.Max)) diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go index 73b8a6782..3cff55afa 100644 --- a/pkg/tcpip/transport/tcp/protocol.go +++ b/pkg/tcpip/transport/tcp/protocol.go @@ -71,29 +71,6 @@ const ( DefaultSynRetries = 6 ) -// SACKEnabled option can be used to enable SACK support in the TCP -// protocol. See: https://tools.ietf.org/html/rfc2018. -type SACKEnabled bool - -// DelayEnabled option can be used to enable Nagle's algorithm in the TCP protocol. -type DelayEnabled bool - -// SendBufferSizeOption allows the default, min and max send buffer sizes for -// TCP endpoints to be queried or configured. -type SendBufferSizeOption struct { - Min int - Default int - Max int -} - -// ReceiveBufferSizeOption allows the default, min and max receive buffer size -// for TCP endpoints to be queried or configured. -type ReceiveBufferSizeOption struct { - Min int - Default int - Max int -} - const ( ccReno = "reno" ccCubic = "cubic" @@ -160,8 +137,8 @@ type protocol struct { mu sync.RWMutex sackEnabled bool delayEnabled bool - sendBufferSize SendBufferSizeOption - recvBufferSize ReceiveBufferSizeOption + sendBufferSize tcpip.StackSendBufferSizeOption + recvBufferSize tcpip.StackReceiveBufferSizeOption congestionControl string availableCongestionControl []string moderateReceiveBuffer bool @@ -272,19 +249,19 @@ func replyWithReset(s *segment, tos, ttl uint8) { // SetOption implements stack.TransportProtocol.SetOption. func (p *protocol) SetOption(option interface{}) *tcpip.Error { switch v := option.(type) { - case SACKEnabled: + case tcpip.StackSACKEnabled: p.mu.Lock() p.sackEnabled = bool(v) p.mu.Unlock() return nil - case DelayEnabled: + case tcpip.StackDelayEnabled: p.mu.Lock() p.delayEnabled = bool(v) p.mu.Unlock() return nil - case SendBufferSizeOption: + case tcpip.StackSendBufferSizeOption: if v.Min <= 0 || v.Default < v.Min || v.Default > v.Max { return tcpip.ErrInvalidOptionValue } @@ -293,7 +270,7 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error { p.mu.Unlock() return nil - case ReceiveBufferSizeOption: + case tcpip.StackReceiveBufferSizeOption: if v.Min <= 0 || v.Default < v.Min || v.Default > v.Max { return tcpip.ErrInvalidOptionValue } @@ -386,25 +363,25 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error { // Option implements stack.TransportProtocol.Option. func (p *protocol) Option(option interface{}) *tcpip.Error { switch v := option.(type) { - case *SACKEnabled: + case *tcpip.StackSACKEnabled: p.mu.RLock() - *v = SACKEnabled(p.sackEnabled) + *v = tcpip.StackSACKEnabled(p.sackEnabled) p.mu.RUnlock() return nil - case *DelayEnabled: + case *tcpip.StackDelayEnabled: p.mu.RLock() - *v = DelayEnabled(p.delayEnabled) + *v = tcpip.StackDelayEnabled(p.delayEnabled) p.mu.RUnlock() return nil - case *SendBufferSizeOption: + case *tcpip.StackSendBufferSizeOption: p.mu.RLock() *v = p.sendBufferSize p.mu.RUnlock() return nil - case *ReceiveBufferSizeOption: + case *tcpip.StackReceiveBufferSizeOption: p.mu.RLock() *v = p.recvBufferSize p.mu.RUnlock() @@ -514,8 +491,16 @@ func (*protocol) Parse(pkt *stack.PacketBuffer) bool { // NewProtocol returns a TCP transport protocol. func NewProtocol() stack.TransportProtocol { return &protocol{ - sendBufferSize: SendBufferSizeOption{MinBufferSize, DefaultSendBufferSize, MaxBufferSize}, - recvBufferSize: ReceiveBufferSizeOption{MinBufferSize, DefaultReceiveBufferSize, MaxBufferSize}, + sendBufferSize: tcpip.StackSendBufferSizeOption{ + Min: MinBufferSize, + Default: DefaultSendBufferSize, + Max: MaxBufferSize, + }, + recvBufferSize: tcpip.StackReceiveBufferSizeOption{ + Min: MinBufferSize, + Default: DefaultReceiveBufferSize, + Max: MaxBufferSize, + }, congestionControl: ccReno, availableCongestionControl: []string{ccReno, ccCubic}, tcpLingerTimeout: DefaultTCPLingerTimeout, diff --git a/pkg/tcpip/transport/tcp/tcp_sack_test.go b/pkg/tcpip/transport/tcp/tcp_sack_test.go index fcc165f17..812e503bc 100644 --- a/pkg/tcpip/transport/tcp/tcp_sack_test.go +++ b/pkg/tcpip/transport/tcp/tcp_sack_test.go @@ -46,8 +46,8 @@ func createConnectedWithSACKAndTS(c *context.Context) *context.RawEndpoint { func setStackSACKPermitted(t *testing.T, c *context.Context, enable bool) { t.Helper() - if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(enable)); err != nil { - t.Fatalf("c.s.SetTransportProtocolOption(tcp.ProtocolNumber, SACKEnabled(%v) = %v", enable, err) + if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackSACKEnabled(enable)); err != nil { + t.Fatalf("c.s.SetTransportProtocolOption(tcp.ProtocolNumber, StackSACKEnabled(%t) = %s", enable, err) } } diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 0668cedc9..aca6a7951 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -3987,7 +3987,10 @@ func TestDefaultBufferSizes(t *testing.T) { checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize) // Change the default send buffer size. - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{1, tcp.DefaultSendBufferSize * 2, tcp.DefaultSendBufferSize * 20}); err != nil { + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackSendBufferSizeOption{ + Min: 1, + Default: tcp.DefaultSendBufferSize * 2, + Max: tcp.DefaultSendBufferSize * 20}); err != nil { t.Fatalf("SetTransportProtocolOption failed: %s", err) } @@ -4001,8 +4004,11 @@ func TestDefaultBufferSizes(t *testing.T) { checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize) // Change the default receive buffer size. - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, tcp.DefaultReceiveBufferSize * 3, tcp.DefaultReceiveBufferSize * 30}); err != nil { - t.Fatalf("SetTransportProtocolOption failed: %s", err) + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackReceiveBufferSizeOption{ + Min: 1, + Default: tcp.DefaultReceiveBufferSize * 3, + Max: tcp.DefaultReceiveBufferSize * 30}); err != nil { + t.Fatalf("SetTransportProtocolOption failed: %v", err) } ep.Close() @@ -4029,11 +4035,11 @@ func TestMinMaxBufferSizes(t *testing.T) { defer ep.Close() // Change the min/max values for send/receive - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{200, tcp.DefaultReceiveBufferSize * 2, tcp.DefaultReceiveBufferSize * 20}); err != nil { + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackReceiveBufferSizeOption{Min: 200, Default: tcp.DefaultReceiveBufferSize * 2, Max: tcp.DefaultReceiveBufferSize * 20}); err != nil { t.Fatalf("SetTransportProtocolOption failed: %s", err) } - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{300, tcp.DefaultSendBufferSize * 3, tcp.DefaultSendBufferSize * 30}); err != nil { + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackSendBufferSizeOption{Min: 300, Default: tcp.DefaultSendBufferSize * 3, Max: tcp.DefaultSendBufferSize * 30}); err != nil { t.Fatalf("SetTransportProtocolOption failed: %s", err) } @@ -5672,7 +5678,7 @@ func TestReceiveBufferAutoTuningApplicationLimited(t *testing.T) { // the segment queue holding unprocessed packets is limited to 500. const receiveBufferSize = 80 << 10 // 80KB. const maxReceiveBufferSize = receiveBufferSize * 10 - if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, receiveBufferSize, maxReceiveBufferSize}); err != nil { + if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackReceiveBufferSizeOption{Min: 1, Default: receiveBufferSize, Max: maxReceiveBufferSize}); err != nil { t.Fatalf("SetTransportProtocolOption failed: %s", err) } @@ -5793,7 +5799,7 @@ func TestReceiveBufferAutoTuning(t *testing.T) { // the segment queue holding unprocessed packets is limited to 300. const receiveBufferSize = 80 << 10 // 80KB. const maxReceiveBufferSize = receiveBufferSize * 10 - if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, receiveBufferSize, maxReceiveBufferSize}); err != nil { + if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackReceiveBufferSizeOption{Min: 1, Default: receiveBufferSize, Max: maxReceiveBufferSize}); err != nil { t.Fatalf("SetTransportProtocolOption failed: %s", err) } @@ -5935,7 +5941,7 @@ func TestDelayEnabled(t *testing.T) { checkDelayOption(t, c, false, false) // Delay is disabled by default. for _, v := range []struct { - delayEnabled tcp.DelayEnabled + delayEnabled tcpip.StackDelayEnabled wantDelayOption bool }{ {delayEnabled: false, wantDelayOption: false}, @@ -5950,10 +5956,10 @@ func TestDelayEnabled(t *testing.T) { } } -func checkDelayOption(t *testing.T, c *context.Context, wantDelayEnabled tcp.DelayEnabled, wantDelayOption bool) { +func checkDelayOption(t *testing.T, c *context.Context, wantDelayEnabled tcpip.StackDelayEnabled, wantDelayOption bool) { t.Helper() - var gotDelayEnabled tcp.DelayEnabled + var gotDelayEnabled tcpip.StackDelayEnabled if err := c.Stack().TransportProtocolOption(tcp.ProtocolNumber, &gotDelayEnabled); err != nil { t.Fatalf("TransportProtocolOption(tcp, &gotDelayEnabled) failed: %s", err) } diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go index 9721f6caf..9e262c272 100644 --- a/pkg/tcpip/transport/tcp/testing/context/context.go +++ b/pkg/tcpip/transport/tcp/testing/context/context.go @@ -144,12 +144,12 @@ func New(t *testing.T, mtu uint32) *Context { }) // Allow minimum send/receive buffer sizes to be 1 during tests. - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{1, tcp.DefaultSendBufferSize, 10 * tcp.DefaultSendBufferSize}); err != nil { - t.Fatalf("SetTransportProtocolOption failed: %v", err) + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackSendBufferSizeOption{Min: 1, Default: tcp.DefaultSendBufferSize, Max: 10 * tcp.DefaultSendBufferSize}); err != nil { + t.Fatalf("SetTransportProtocolOption failed: %s", err) } - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, tcp.DefaultReceiveBufferSize, 10 * tcp.DefaultReceiveBufferSize}); err != nil { - t.Fatalf("SetTransportProtocolOption failed: %v", err) + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackReceiveBufferSizeOption{Min: 1, Default: tcp.DefaultReceiveBufferSize, Max: 10 * tcp.DefaultReceiveBufferSize}); err != nil { + t.Fatalf("SetTransportProtocolOption failed: %s", err) } // Increase minimum RTO in tests to avoid test flakes due to early @@ -1091,7 +1091,7 @@ func (c *Context) PassiveConnectWithOptions(maxPayload, wndScale int, synOptions // SACKEnabled returns true if the TCP Protocol option SACKEnabled is set to true // for the Stack in the context. func (c *Context) SACKEnabled() bool { - var v tcp.SACKEnabled + var v tcpip.StackSACKEnabled if err := c.Stack().TransportProtocolOption(tcp.ProtocolNumber, &v); err != nil { // Stack doesn't support SACK. So just return. return false diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index df5efbf6a..f51988047 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -15,6 +15,8 @@ package udp import ( + "fmt" + "gvisor.dev/gvisor/pkg/sleep" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" @@ -94,6 +96,7 @@ type endpoint struct { // The following fields are protected by the mu mutex. mu sync.RWMutex `state:"nosave"` sndBufSize int + sndBufSizeMax int state EndpointState route stack.Route `state:"manual"` dstPort uint16 @@ -159,7 +162,7 @@ type multicastMembership struct { } func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) *endpoint { - return &endpoint{ + e := &endpoint{ stack: s, TransportEndpointInfo: stack.TransportEndpointInfo{ NetProto: netProto, @@ -181,10 +184,23 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue multicastTTL: 1, multicastLoop: true, rcvBufSizeMax: 32 * 1024, - sndBufSize: 32 * 1024, + sndBufSizeMax: 32 * 1024, state: StateInitial, uniqueID: s.UniqueID(), } + + // Override with stack defaults. + var ss tcpip.StackSendBufferSizeOption + if err := s.TransportProtocolOption(ProtocolNumber, &ss); err == nil { + e.sndBufSizeMax = ss.Default + } + + var rs tcpip.StackReceiveBufferSizeOption + if err := s.TransportProtocolOption(ProtocolNumber, &rs); err == nil { + e.rcvBufSizeMax = rs.Default + } + + return e } // UniqueID implements stack.TransportEndpoint.UniqueID. @@ -611,8 +627,43 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { e.mu.Unlock() case tcpip.ReceiveBufferSizeOption: + // Make sure the receive buffer size is within the min and max + // allowed. + var rs tcpip.StackReceiveBufferSizeOption + if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err != nil { + panic(fmt.Sprintf("e.stack.TransportProtocolOption(%d, %+v) = %s", ProtocolNumber, rs, err)) + } + + if v < rs.Min { + v = rs.Min + } + if v > rs.Max { + v = rs.Max + } + + e.mu.Lock() + e.rcvBufSizeMax = v + e.mu.Unlock() + return nil case tcpip.SendBufferSizeOption: + // Make sure the send buffer size is within the min and max + // allowed. + var ss tcpip.StackSendBufferSizeOption + if err := e.stack.TransportProtocolOption(ProtocolNumber, &ss); err != nil { + panic(fmt.Sprintf("e.stack.TransportProtocolOption(%d, %+v) = %s", ProtocolNumber, ss, err)) + } + + if v < ss.Min { + v = ss.Min + } + if v > ss.Max { + v = ss.Max + } + e.mu.Lock() + e.sndBufSizeMax = v + e.mu.Unlock() + return nil } return nil @@ -861,7 +912,7 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { case tcpip.SendBufferSizeOption: e.mu.Lock() - v := e.sndBufSize + v := e.sndBufSizeMax e.mu.Unlock() return v, nil diff --git a/pkg/tcpip/transport/udp/protocol.go b/pkg/tcpip/transport/udp/protocol.go index 4218e7d03..fc93f93c0 100644 --- a/pkg/tcpip/transport/udp/protocol.go +++ b/pkg/tcpip/transport/udp/protocol.go @@ -21,6 +21,7 @@ package udp import ( + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" @@ -32,9 +33,27 @@ import ( const ( // ProtocolNumber is the udp protocol number. ProtocolNumber = header.UDPProtocolNumber + + // MinBufferSize is the smallest size of a receive or send buffer. + MinBufferSize = 4 << 10 // 4KiB bytes. + + // DefaultSendBufferSize is the default size of the send buffer for + // an endpoint. + DefaultSendBufferSize = 32 << 10 // 32KiB + + // DefaultReceiveBufferSize is the default size of the receive buffer + // for an endpoint. + DefaultReceiveBufferSize = 32 << 10 // 32KiB + + // MaxBufferSize is the largest size a receive/send buffer can grow to. + MaxBufferSize = 4 << 20 // 4MiB ) -type protocol struct{} +type protocol struct { + mu sync.RWMutex + sendBufferSize tcpip.StackSendBufferSizeOption + recvBufferSize tcpip.StackReceiveBufferSizeOption +} // Number returns the udp protocol number. func (*protocol) Number() tcpip.TransportProtocolNumber { @@ -183,13 +202,49 @@ func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.Trans } // SetOption implements stack.TransportProtocol.SetOption. -func (*protocol) SetOption(option interface{}) *tcpip.Error { - return tcpip.ErrUnknownProtocolOption +func (p *protocol) SetOption(option interface{}) *tcpip.Error { + switch v := option.(type) { + case tcpip.StackSendBufferSizeOption: + if v.Min <= 0 || v.Default < v.Min || v.Default > v.Max { + return tcpip.ErrInvalidOptionValue + } + p.mu.Lock() + p.sendBufferSize = v + p.mu.Unlock() + return nil + + case tcpip.StackReceiveBufferSizeOption: + if v.Min <= 0 || v.Default < v.Min || v.Default > v.Max { + return tcpip.ErrInvalidOptionValue + } + p.mu.Lock() + p.recvBufferSize = v + p.mu.Unlock() + return nil + + default: + return tcpip.ErrUnknownProtocolOption + } } // Option implements stack.TransportProtocol.Option. -func (*protocol) Option(option interface{}) *tcpip.Error { - return tcpip.ErrUnknownProtocolOption +func (p *protocol) Option(option interface{}) *tcpip.Error { + switch v := option.(type) { + case *tcpip.StackSendBufferSizeOption: + p.mu.RLock() + *v = p.sendBufferSize + p.mu.RUnlock() + return nil + + case *tcpip.StackReceiveBufferSizeOption: + p.mu.RLock() + *v = p.recvBufferSize + p.mu.RUnlock() + return nil + + default: + return tcpip.ErrUnknownProtocolOption + } } // Close implements stack.TransportProtocol.Close. @@ -212,5 +267,8 @@ func (*protocol) Parse(pkt *stack.PacketBuffer) bool { // NewProtocol returns a UDP transport protocol. func NewProtocol() stack.TransportProtocol { - return &protocol{} + return &protocol{ + sendBufferSize: tcpip.StackSendBufferSizeOption{Min: MinBufferSize, Default: DefaultSendBufferSize, Max: MaxBufferSize}, + recvBufferSize: tcpip.StackReceiveBufferSizeOption{Min: MinBufferSize, Default: DefaultReceiveBufferSize, Max: MaxBufferSize}, + } } diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index b05a8bd45..c6efcdc83 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -1058,8 +1058,8 @@ func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (in })} // Enable SACK Recovery. - if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil { - return nil, fmt.Errorf("failed to enable SACK: %v", err) + if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackSACKEnabled(true)); err != nil { + return nil, fmt.Errorf("failed to enable SACK: %s", err) } // Set default TTLs as required by socket/netstack. @@ -1068,7 +1068,7 @@ func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (in // Enable Receive Buffer Auto-Tuning. if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil { - return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err) + return nil, fmt.Errorf("SetTransportProtocolOption failed: %s", err) } s.FillIPTablesMetadata() diff --git a/test/syscalls/linux/raw_socket_ipv4.cc b/test/syscalls/linux/raw_socket_ipv4.cc index cde2f07c9..0116c3e94 100644 --- a/test/syscalls/linux/raw_socket_ipv4.cc +++ b/test/syscalls/linux/raw_socket_ipv4.cc @@ -357,10 +357,389 @@ TEST_P(RawSocketTest, BindConnectSendAndReceive) { EXPECT_EQ(memcmp(recv_buf + sizeof(struct iphdr), kBuf, sizeof(kBuf)), 0); } +// Check that setting SO_RCVBUF below min is clamped to the minimum +// receive buffer size. +TEST_P(RawSocketTest, SetSocketRecvBufBelowMin) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Discover minimum receive buf size by trying to set it to zero. + // See: + // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820 + constexpr int kRcvBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + int min = 0; + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + + // Linux doubles the value so let's use a value that when doubled will still + // be smaller than min. + int below_min = min / 2 - 1; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &below_min, sizeof(below_min)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), + SyscallSucceeds()); + + ASSERT_EQ(min, val); +} + +// Check that setting SO_RCVBUF above max is clamped to the maximum +// receive buffer size. +TEST_P(RawSocketTest, SetSocketRecvBufAboveMax) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Discover max buf size by trying to set the largest possible buffer size. + constexpr int kRcvBufSz = 0xffffffff; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + int max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len), + SyscallSucceeds()); + + int above_max = max + 1; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &above_max, sizeof(above_max)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), + SyscallSucceeds()); + ASSERT_EQ(max, val); +} + +// Check that setting SO_RCVBUF min <= kRcvBufSz <= max is honored. +TEST_P(RawSocketTest, SetSocketRecvBuf) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int max = 0; + int min = 0; + { + // Discover max buf size by trying to set a really large buffer size. + constexpr int kRcvBufSz = 0xffffffff; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len), + SyscallSucceeds()); + } + + { + // Discover minimum buffer size by trying to set a zero size receive buffer + // size. + // See: + // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820 + constexpr int kRcvBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + } + + int quarter_sz = min + (max - min) / 4; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &quarter_sz, sizeof(quarter_sz)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), + SyscallSucceeds()); + + // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF. + // TODO(gvisor.dev/issue/2926): Remove when Netstack matches linux behavior. + if (!IsRunningOnGvisor()) { + quarter_sz *= 2; + } + ASSERT_EQ(quarter_sz, val); +} + +// Check that setting SO_SNDBUF below min is clamped to the minimum +// receive buffer size. +TEST_P(RawSocketTest, SetSocketSendBufBelowMin) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Discover minimum buffer size by trying to set it to zero. + constexpr int kSndBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), + SyscallSucceeds()); + + int min = 0; + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len), + SyscallSucceeds()); + + // Linux doubles the value so let's use a value that when doubled will still + // be smaller than min. + int below_min = min / 2 - 1; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &below_min, sizeof(below_min)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), + SyscallSucceeds()); + + ASSERT_EQ(min, val); +} + +// Check that setting SO_SNDBUF above max is clamped to the maximum +// send buffer size. +TEST_P(RawSocketTest, SetSocketSendBufAboveMax) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Discover maximum buffer size by trying to set it to a large value. + constexpr int kSndBufSz = 0xffffffff; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), + SyscallSucceeds()); + + int max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len), + SyscallSucceeds()); + + int above_max = max + 1; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &above_max, sizeof(above_max)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), + SyscallSucceeds()); + ASSERT_EQ(max, val); +} + +// Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored. +TEST_P(RawSocketTest, SetSocketSendBuf) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int max = 0; + int min = 0; + { + // Discover maximum buffer size by trying to set it to a large value. + constexpr int kSndBufSz = 0xffffffff; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), + SyscallSucceeds()); + + max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len), + SyscallSucceeds()); + } + + { + // Discover minimum buffer size by trying to set it to zero. + constexpr int kSndBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), + SyscallSucceeds()); + + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len), + SyscallSucceeds()); + } + + int quarter_sz = min + (max - min) / 4; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &quarter_sz, sizeof(quarter_sz)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), + SyscallSucceeds()); + + // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF. + // TODO(gvisor.dev/issue/2926): Remove the gvisor special casing when Netstack + // matches linux behavior. + if (!IsRunningOnGvisor()) { + quarter_sz *= 2; + } + + ASSERT_EQ(quarter_sz, val); +} + void RawSocketTest::SendBuf(const char* buf, int buf_len) { ASSERT_NO_FATAL_FAILURE(SendBufTo(s_, addr_, buf, buf_len)); } +// Test that receive buffer limits are not enforced when the recv buffer is +// empty. +TEST_P(RawSocketTest, RecvBufLimitsEmptyRecvBuffer) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + bind(s_, reinterpret_cast(&addr_), sizeof(addr_)), + SyscallSucceeds()); + ASSERT_THAT( + connect(s_, reinterpret_cast(&addr_), sizeof(addr_)), + SyscallSucceeds()); + + int min = 0; + { + // Discover minimum buffer size by trying to set it to zero. + constexpr int kRcvBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + } + + { + // Send data of size min and verify that it's received. + std::vector buf(min); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); + + // Receive the packet and make sure it's identical. + std::vector recv_buf(buf.size() + sizeof(struct iphdr)); + ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); + EXPECT_EQ( + memcmp(recv_buf.data() + sizeof(struct iphdr), buf.data(), buf.size()), + 0); + } + + { + // Send data of size min + 1 and verify that its received. Both linux and + // Netstack accept a dgram that exceeds rcvBuf limits if the receive buffer + // is currently empty. + std::vector buf(min + 1); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); + // Receive the packet and make sure it's identical. + std::vector recv_buf(buf.size() + sizeof(struct iphdr)); + ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); + EXPECT_EQ( + memcmp(recv_buf.data() + sizeof(struct iphdr), buf.data(), buf.size()), + 0); + } +} + +TEST_P(RawSocketTest, RecvBufLimits) { + // TCP stack generates RSTs for unknown endpoints and it complicates the test + // as we have to deal with the RST packets as well. For testing the raw socket + // endpoints buffer limit enforcement we can just test for UDP. + // + // We don't use SKIP_IF here because root_test_runner explicitly fails if a + // test is skipped. + if (Protocol() == IPPROTO_TCP) { + return; + } + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + bind(s_, reinterpret_cast(&addr_), sizeof(addr_)), + SyscallSucceeds()); + ASSERT_THAT( + connect(s_, reinterpret_cast(&addr_), sizeof(addr_)), + SyscallSucceeds()); + + int min = 0; + { + // Discover minimum buffer size by trying to set it to zero. + constexpr int kRcvBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + } + + // Now set the limit to min * 2. + int new_rcv_buf_sz = min * 4; + if (!IsRunningOnGvisor()) { + // Linux doubles the value specified so just set to min. + new_rcv_buf_sz = min * 2; + } + + ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &new_rcv_buf_sz, + sizeof(new_rcv_buf_sz)), + SyscallSucceeds()); + int rcv_buf_sz = 0; + { + socklen_t rcv_buf_len = sizeof(rcv_buf_sz); + ASSERT_THAT( + getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz, &rcv_buf_len), + SyscallSucceeds()); + } + + // Set a receive timeout so that we don't block forever on reads if the test + // fails. + struct timeval tv { + .tv_sec = 1, .tv_usec = 0, + }; + ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + { + std::vector buf(min); + RandomizeBuffer(buf.data(), buf.size()); + + ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); + ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); + ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); + ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); + int sent = 4; + if (IsRunningOnGvisor()) { + // Linux seems to drop the 4th packet even though technically it should + // fit in the receive buffer. + ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); + sent++; + } + + // Verify that the expected number of packets are available to be read. + for (int i = 0; i < sent - 1; i++) { + // Receive the packet and make sure it's identical. + std::vector recv_buf(buf.size() + sizeof(struct iphdr)); + ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); + EXPECT_EQ(memcmp(recv_buf.data() + sizeof(struct iphdr), buf.data(), + buf.size()), + 0); + } + + // Assert that the last packet is dropped because the receive buffer should + // be full after the first four packets. + std::vector recv_buf(buf.size() + sizeof(struct iphdr)); + struct iovec iov = {}; + iov.iov_base = static_cast(const_cast(recv_buf.data())); + iov.iov_len = buf.size(); + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + ASSERT_THAT(RetryEINTR(recvmsg)(s_, &msg, MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); + } +} + void RawSocketTest::SendBufTo(int sock, const struct sockaddr_in& addr, const char* buf, int buf_len) { // It's safe to use const_cast here because sendmsg won't modify the iovec or diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound.cc b/test/syscalls/linux/socket_ipv4_udp_unbound.cc index 4db9553e1..de0f5f01b 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -2236,5 +2237,220 @@ TEST_P(IPv4UDPUnboundSocketTest, SetAndReceiveIPPKTINFO) { EXPECT_EQ(received_pktinfo.ipi_spec_dst.s_addr, htonl(INADDR_LOOPBACK)); EXPECT_EQ(received_pktinfo.ipi_addr.s_addr, htonl(INADDR_LOOPBACK)); } + +// Check that setting SO_RCVBUF below min is clamped to the minimum +// receive buffer size. +TEST_P(IPv4UDPUnboundSocketTest, SetSocketRecvBufBelowMin) { + auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Discover minimum buffer size by setting it to zero. + constexpr int kRcvBufSz = 0; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, + sizeof(kRcvBufSz)), + SyscallSucceeds()); + + int min = 0; + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + + // Linux doubles the value so let's use a value that when doubled will still + // be smaller than min. + int below_min = min / 2 - 1; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &below_min, + sizeof(below_min)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &val, &val_len), + SyscallSucceeds()); + + ASSERT_EQ(min, val); +} + +// Check that setting SO_RCVBUF above max is clamped to the maximum +// receive buffer size. +TEST_P(IPv4UDPUnboundSocketTest, SetSocketRecvBufAboveMax) { + auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Discover maxmimum buffer size by setting to a really large value. + constexpr int kRcvBufSz = 0xffffffff; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, + sizeof(kRcvBufSz)), + SyscallSucceeds()); + + int max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &max, &max_len), + SyscallSucceeds()); + + int above_max = max + 1; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &above_max, + sizeof(above_max)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &val, &val_len), + SyscallSucceeds()); + ASSERT_EQ(max, val); +} + +// Check that setting SO_RCVBUF min <= rcvBufSz <= max is honored. +TEST_P(IPv4UDPUnboundSocketTest, SetSocketRecvBuf) { + auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + int max = 0; + int min = 0; + { + // Discover maxmimum buffer size by setting to a really large value. + constexpr int kRcvBufSz = 0xffffffff; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, + sizeof(kRcvBufSz)), + SyscallSucceeds()); + + max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &max, &max_len), + SyscallSucceeds()); + } + + { + // Discover minimum buffer size by setting it to zero. + constexpr int kRcvBufSz = 0; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, + sizeof(kRcvBufSz)), + SyscallSucceeds()); + + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + } + + int quarter_sz = min + (max - min) / 4; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &quarter_sz, + sizeof(quarter_sz)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &val, &val_len), + SyscallSucceeds()); + + // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF. + if (!IsRunningOnGvisor()) { + quarter_sz *= 2; + } + ASSERT_EQ(quarter_sz, val); +} + +// Check that setting SO_SNDBUF below min is clamped to the minimum +// send buffer size. +TEST_P(IPv4UDPUnboundSocketTest, SetSocketSendBufBelowMin) { + auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Discover minimum buffer size by setting it to zero. + constexpr int kSndBufSz = 0; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &kSndBufSz, + sizeof(kSndBufSz)), + SyscallSucceeds()); + + int min = 0; + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &min, &min_len), + SyscallSucceeds()); + + // Linux doubles the value so let's use a value that when doubled will still + // be smaller than min. + int below_min = min / 2 - 1; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &below_min, + sizeof(below_min)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &val, &val_len), + SyscallSucceeds()); + + ASSERT_EQ(min, val); +} + +// Check that setting SO_SNDBUF above max is clamped to the maximum +// send buffer size. +TEST_P(IPv4UDPUnboundSocketTest, SetSocketSendBufAboveMax) { + auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Discover maxmimum buffer size by setting to a really large value. + constexpr int kSndBufSz = 0xffffffff; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &kSndBufSz, + sizeof(kSndBufSz)), + SyscallSucceeds()); + + int max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &max, &max_len), + SyscallSucceeds()); + + int above_max = max + 1; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &above_max, + sizeof(above_max)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &val, &val_len), + SyscallSucceeds()); + ASSERT_EQ(max, val); +} + +// Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored. +TEST_P(IPv4UDPUnboundSocketTest, SetSocketSendBuf) { + auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + int max = 0; + int min = 0; + { + // Discover maxmimum buffer size by setting to a really large value. + constexpr int kSndBufSz = 0xffffffff; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &kSndBufSz, + sizeof(kSndBufSz)), + SyscallSucceeds()); + + max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &max, &max_len), + SyscallSucceeds()); + } + + { + // Discover minimum buffer size by setting it to zero. + constexpr int kSndBufSz = 0; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &kSndBufSz, + sizeof(kSndBufSz)), + SyscallSucceeds()); + + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &min, &min_len), + SyscallSucceeds()); + } + + int quarter_sz = min + (max - min) / 4; + ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &quarter_sz, + sizeof(quarter_sz)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &val, &val_len), + SyscallSucceeds()); + + // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF. + if (!IsRunningOnGvisor()) { + quarter_sz *= 2; + } + + ASSERT_EQ(quarter_sz, val); +} } // namespace testing } // namespace gvisor diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc index 42521efef..cc1db3de8 100644 --- a/test/syscalls/linux/udp_socket_test_cases.cc +++ b/test/syscalls/linux/udp_socket_test_cases.cc @@ -1543,5 +1543,120 @@ TEST_P(UdpSocketTest, SendAndReceiveTOS) { memcpy(&received_tos, CMSG_DATA(cmsg), sizeof(received_tos)); EXPECT_EQ(received_tos, sent_tos); } + +TEST_P(UdpSocketTest, RecvBufLimitsEmptyRcvBuf) { + // Discover minimum buffer size by setting it to zero. + constexpr int kRcvBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + int min = 0; + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + + // Bind s_ to loopback. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + { + // Send data of size min and verify that it's received. + std::vector buf(min); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(buf.size())); + std::vector received(buf.size()); + EXPECT_THAT(recv(s_, received.data(), received.size(), MSG_DONTWAIT), + SyscallSucceedsWithValue(received.size())); + } + + { + // Send data of size min + 1 and verify that its received. Both linux and + // Netstack accept a dgram that exceeds rcvBuf limits if the receive buffer + // is currently empty. + std::vector buf(min + 1); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(buf.size())); + + std::vector received(buf.size()); + EXPECT_THAT(recv(s_, received.data(), received.size(), MSG_DONTWAIT), + SyscallSucceedsWithValue(received.size())); + } +} + +// Test that receive buffer limits are enforced. +TEST_P(UdpSocketTest, RecvBufLimits) { + // Bind s_ to loopback. + ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + + int min = 0; + { + // Discover minimum buffer size by trying to set it to zero. + constexpr int kRcvBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + } + + // Now set the limit to min * 4. + int new_rcv_buf_sz = min * 4; + if (!IsRunningOnGvisor() || IsRunningWithHostinet()) { + // Linux doubles the value specified so just set to min * 2. + new_rcv_buf_sz = min * 2; + } + + ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &new_rcv_buf_sz, + sizeof(new_rcv_buf_sz)), + SyscallSucceeds()); + int rcv_buf_sz = 0; + { + socklen_t rcv_buf_len = sizeof(rcv_buf_sz); + ASSERT_THAT( + getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz, &rcv_buf_len), + SyscallSucceeds()); + } + + { + std::vector buf(min); + RandomizeBuffer(buf.data(), buf.size()); + + ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(buf.size())); + ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(buf.size())); + ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(buf.size())); + ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(buf.size())); + int sent = 4; + if (IsRunningOnGvisor() && !IsRunningWithHostinet()) { + // Linux seems to drop the 4th packet even though technically it should + // fit in the receive buffer. + ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_), + SyscallSucceedsWithValue(buf.size())); + sent++; + } + + for (int i = 0; i < sent - 1; i++) { + // Receive the data. + std::vector received(buf.size()); + EXPECT_THAT(recv(s_, received.data(), received.size(), MSG_DONTWAIT), + SyscallSucceedsWithValue(received.size())); + EXPECT_EQ(memcmp(buf.data(), received.data(), buf.size()), 0); + } + + // The last receive should fail with EAGAIN as the last packet should have + // been dropped due to lack of space in the receive buffer. + std::vector received(buf.size()); + EXPECT_THAT(recv(s_, received.data(), received.size(), MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); + } +} + } // namespace testing } // namespace gvisor -- cgit v1.2.3 From 3970c127434817304f67a2ad192cbe8094ad3353 Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Thu, 18 Jun 2020 09:02:14 -0700 Subject: Remove various uses of 'whitelist' Updates #2972 PiperOrigin-RevId: 317113059 --- pkg/bpf/interpreter_test.go | 2 +- pkg/cpuid/cpuid_arm64.go | 5 +++-- pkg/cpuid/cpuid_x86.go | 7 +++---- pkg/seccomp/seccomp_rules.go | 4 ++-- pkg/sentry/fs/filesystems.go | 14 -------------- pkg/sentry/fsimpl/host/host.go | 5 +++-- pkg/sentry/socket/hostinet/socket.go | 8 ++++---- pkg/sentry/vfs/README.md | 2 -- test/syscalls/linux/xattr.cc | 5 +++-- tools/nogo/matchers.go | 27 ++++++++++++++++----------- 10 files changed, 35 insertions(+), 44 deletions(-) (limited to 'test') diff --git a/pkg/bpf/interpreter_test.go b/pkg/bpf/interpreter_test.go index 547921d0a..c85d786b9 100644 --- a/pkg/bpf/interpreter_test.go +++ b/pkg/bpf/interpreter_test.go @@ -767,7 +767,7 @@ func TestSimpleFilter(t *testing.T) { expectedRet: 0, }, { - desc: "Whitelisted syscall is allowed", + desc: "Allowed syscall is indeed allowed", seccompData: seccompData{nr: 231 /* __NR_exit_group */, arch: 0xc000003e}, expectedRet: 0x7fff0000, }, diff --git a/pkg/cpuid/cpuid_arm64.go b/pkg/cpuid/cpuid_arm64.go index 08381c1c0..ac7bb6774 100644 --- a/pkg/cpuid/cpuid_arm64.go +++ b/pkg/cpuid/cpuid_arm64.go @@ -312,8 +312,9 @@ func HostFeatureSet() *FeatureSet { } } -// Reads bogomips from host /proc/cpuinfo. Must run before whitelisting. -// This value is used to create the fake /proc/cpuinfo from a FeatureSet. +// Reads bogomips from host /proc/cpuinfo. Must run before syscall filter +// installation. This value is used to create the fake /proc/cpuinfo from a +// FeatureSet. func initCPUInfo() { cpuinfob, err := ioutil.ReadFile("/proc/cpuinfo") if err != nil { diff --git a/pkg/cpuid/cpuid_x86.go b/pkg/cpuid/cpuid_x86.go index 562f8f405..17a89c00d 100644 --- a/pkg/cpuid/cpuid_x86.go +++ b/pkg/cpuid/cpuid_x86.go @@ -1057,9 +1057,9 @@ func HostFeatureSet() *FeatureSet { } } -// Reads max cpu frequency from host /proc/cpuinfo. Must run before -// whitelisting. This value is used to create the fake /proc/cpuinfo from a -// FeatureSet. +// Reads max cpu frequency from host /proc/cpuinfo. Must run before syscall +// filter installation. This value is used to create the fake /proc/cpuinfo +// from a FeatureSet. func initCPUFreq() { cpuinfob, err := ioutil.ReadFile("/proc/cpuinfo") if err != nil { @@ -1106,7 +1106,6 @@ func initFeaturesFromString() { } func init() { - // initCpuFreq must be run before whitelists are enabled. initCPUFreq() initFeaturesFromString() } diff --git a/pkg/seccomp/seccomp_rules.go b/pkg/seccomp/seccomp_rules.go index 06308cd29..a52dc1b4e 100644 --- a/pkg/seccomp/seccomp_rules.go +++ b/pkg/seccomp/seccomp_rules.go @@ -56,7 +56,7 @@ func (a AllowValue) String() (s string) { return fmt.Sprintf("%#x ", uintptr(a)) } -// Rule stores the whitelist of syscall arguments. +// Rule stores the allowed syscall arguments. // // For example: // rule := Rule { @@ -82,7 +82,7 @@ func (r Rule) String() (s string) { return } -// SyscallRules stores a map of OR'ed whitelist rules indexed by the syscall number. +// SyscallRules stores a map of OR'ed argument rules indexed by the syscall number. // If the 'Rules' is empty, we treat it as any argument is allowed. // // For example: diff --git a/pkg/sentry/fs/filesystems.go b/pkg/sentry/fs/filesystems.go index 084da2a8d..d41f30bbb 100644 --- a/pkg/sentry/fs/filesystems.go +++ b/pkg/sentry/fs/filesystems.go @@ -87,20 +87,6 @@ func RegisterFilesystem(f Filesystem) { filesystems.registered[f.Name()] = f } -// UnregisterFilesystem removes a file system from the global set. To keep the -// file system set compatible with save/restore, UnregisterFilesystem must be -// called before save/restore methods. -// -// For instance, packages may unregister their file system after it is mounted. -// This makes sense for pseudo file systems that should not be visible or -// mountable. See whitelistfs in fs/host/fs.go for one example. -func UnregisterFilesystem(name string) { - filesystems.mu.Lock() - defer filesystems.mu.Unlock() - - delete(filesystems.registered, name) -} - // FindFilesystem returns a Filesystem registered at name or (nil, false) if name // is not a file system type that can be found in /proc/filesystems. func FindFilesystem(name string) (Filesystem, bool) { diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index a3a312edb..43a173bc9 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -476,8 +476,9 @@ func (i *inode) open(ctx context.Context, d *vfs.Dentry, mnt *vfs.Mount, flags u return unixsocket.NewFileDescription(ep, ep.Type(), flags, mnt, d, &i.locks) } - // TODO(gvisor.dev/issue/1672): Whitelist specific file types here, so that - // we don't allow importing arbitrary file types without proper support. + // TODO(gvisor.dev/issue/1672): Allow only specific file types here, so + // that we don't allow importing arbitrary file types without proper + // support. if i.isTTY { fd := &TTYFileDescription{ fileDescription: fileDescription{inode: i}, diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go index c11e82c10..a92aed2c9 100644 --- a/pkg/sentry/socket/hostinet/socket.go +++ b/pkg/sentry/socket/hostinet/socket.go @@ -324,7 +324,7 @@ func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr return nil, syserr.ErrInvalidArgument } - // Whitelist options and constrain option length. + // Only allow known and safe options. optlen := getSockOptLen(t, level, name) switch level { case linux.SOL_IP: @@ -369,7 +369,7 @@ func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr // SetSockOpt implements socket.Socket.SetSockOpt. func (s *socketOpsCommon) SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *syserr.Error { - // Whitelist options and constrain option length. + // Only allow known and safe options. optlen := setSockOptLen(t, level, name) switch level { case linux.SOL_IP: @@ -415,7 +415,7 @@ func (s *socketOpsCommon) SetSockOpt(t *kernel.Task, level int, name int, opt [] // RecvMsg implements socket.Socket.RecvMsg. func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlLen uint64) (int, int, linux.SockAddr, uint32, socket.ControlMessages, *syserr.Error) { - // Whitelist flags. + // Only allow known and safe flags. // // FIXME(jamieliu): We can't support MSG_ERRQUEUE because it uses ancillary // messages that gvisor/pkg/tcpip/transport/unix doesn't understand. Kill the @@ -537,7 +537,7 @@ func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags // SendMsg implements socket.Socket.SendMsg. func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, haveDeadline bool, deadline ktime.Time, controlMessages socket.ControlMessages) (int, *syserr.Error) { - // Whitelist flags. + // Only allow known and safe flags. if flags&^(syscall.MSG_DONTWAIT|syscall.MSG_EOR|syscall.MSG_FASTOPEN|syscall.MSG_MORE|syscall.MSG_NOSIGNAL) != 0 { return 0, syserr.ErrInvalidArgument } diff --git a/pkg/sentry/vfs/README.md b/pkg/sentry/vfs/README.md index 66f3105bd..4b9faf2ea 100644 --- a/pkg/sentry/vfs/README.md +++ b/pkg/sentry/vfs/README.md @@ -169,8 +169,6 @@ This construction, which is essentially a type-safe analogue to Linux's - binder, which is similarly far too incomplete to use. - - whitelistfs, which we are already actively attempting to remove. - - Save/restore. For instance, it is unclear if the current implementation of the `state` package supports the inheritance pattern described above. diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc index 3231732ec..cbcf08451 100644 --- a/test/syscalls/linux/xattr.cc +++ b/test/syscalls/linux/xattr.cc @@ -73,9 +73,10 @@ TEST_F(XattrTest, XattrLargeName) { std::string name = "user."; name += std::string(XATTR_NAME_MAX - name.length(), 'a'); - // An xattr should be whitelisted before it can be accessed--do not allow - // arbitrary xattrs to be read/written in gVisor. if (!IsRunningOnGvisor()) { + // In gVisor, access to xattrs is controlled with an explicit list of + // allowed names. This name isn't going to be configured to allow access, so + // don't test it. EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), SyscallSucceeds()); EXPECT_THAT(getxattr(path, name.c_str(), nullptr, 0), diff --git a/tools/nogo/matchers.go b/tools/nogo/matchers.go index 32f099925..57a250501 100644 --- a/tools/nogo/matchers.go +++ b/tools/nogo/matchers.go @@ -27,10 +27,15 @@ type matcher interface { ShouldReport(d analysis.Diagnostic, fs *token.FileSet) bool } -// pathRegexps excludes explicit paths. +// pathRegexps filters explicit paths. type pathRegexps struct { - expr []*regexp.Regexp - whitelist bool + expr []*regexp.Regexp + + // include, if true, indicates that paths matching any regexp in expr + // match. + // + // If false, paths matching no regexps in expr match. + include bool } // buildRegexps builds a list of regular expressions. @@ -49,33 +54,33 @@ func (p *pathRegexps) ShouldReport(d analysis.Diagnostic, fs *token.FileSet) boo fullPos := fs.Position(d.Pos).String() for _, path := range p.expr { if path.MatchString(fullPos) { - return p.whitelist + return p.include } } - return !p.whitelist + return !p.include } // internalExcluded excludes specific internal paths. func internalExcluded(paths ...string) *pathRegexps { return &pathRegexps{ - expr: buildRegexps(internalPrefix, paths...), - whitelist: false, + expr: buildRegexps(internalPrefix, paths...), + include: false, } } // excludedExcluded excludes specific external paths. func externalExcluded(paths ...string) *pathRegexps { return &pathRegexps{ - expr: buildRegexps(externalPrefix, paths...), - whitelist: false, + expr: buildRegexps(externalPrefix, paths...), + include: false, } } // internalMatches returns a path matcher for internal packages. func internalMatches() *pathRegexps { return &pathRegexps{ - expr: buildRegexps(internalPrefix, ".*"), - whitelist: true, + expr: buildRegexps(internalPrefix, ".*"), + include: true, } } -- cgit v1.2.3 From 878050b5cf924b1f314965e5bfe21248a55616c4 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Thu, 18 Jun 2020 14:51:07 -0700 Subject: Enable more VFS2 syscall tests Updates #2923 PiperOrigin-RevId: 317185798 --- test/syscalls/BUILD | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'test') diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 6d80572e7..1638a11c7 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -33,6 +33,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:aio_test", + vfs2 = "True", ) syscall_test( @@ -106,6 +107,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:chroot_test", + vfs2 = "True", ) syscall_test( @@ -215,6 +217,7 @@ syscall_test( size = "medium", add_overlay = True, test = "//test/syscalls/linux:flock_test", + vfs2 = "True", ) syscall_test( @@ -350,6 +353,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:mount_test", + vfs2 = "True", ) syscall_test( @@ -900,6 +904,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:stat_test", + vfs2 = "True", ) syscall_test( @@ -916,6 +921,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:symlink_test", + vfs2 = "True", ) syscall_test( -- cgit v1.2.3 From 408f3d2cd64cae6b2f76a940c76236e9841c095f Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Thu, 18 Jun 2020 22:00:56 -0700 Subject: Fix vfs2 tmpfs link permission checks. Updates #2923. PiperOrigin-RevId: 317246916 --- pkg/sentry/fsimpl/tmpfs/filesystem.go | 16 ++++++++++------ pkg/sentry/vfs/permissions.go | 31 +++++++++++++++++++++++++++++++ test/syscalls/BUILD | 1 + test/syscalls/linux/link.cc | 15 +++++++++++++-- test/util/test_util.h | 1 + 5 files changed, 56 insertions(+), 8 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 72399b321..ac359cf7b 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -237,18 +237,22 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. return syserror.EXDEV } d := vd.Dentry().Impl().(*dentry) - if d.inode.isDir() { + i := d.inode + if i.isDir() { return syserror.EPERM } - if d.inode.nlink == 0 { + if err := vfs.MayLink(auth.CredentialsFromContext(ctx), linux.FileMode(atomic.LoadUint32(&i.mode)), auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid))); err != nil { + return err + } + if i.nlink == 0 { return syserror.ENOENT } - if d.inode.nlink == maxLinks { + if i.nlink == maxLinks { return syserror.EMLINK } - d.inode.incLinksLocked() - d.inode.watches.Notify("", linux.IN_ATTRIB, 0, vfs.InodeEvent) - parentDir.insertChildLocked(fs.newDentry(d.inode), name) + i.incLinksLocked() + i.watches.Notify("", linux.IN_ATTRIB, 0, vfs.InodeEvent) + parentDir.insertChildLocked(fs.newDentry(i), name) return nil }) } diff --git a/pkg/sentry/vfs/permissions.go b/pkg/sentry/vfs/permissions.go index f9647f90e..afe2be8d7 100644 --- a/pkg/sentry/vfs/permissions.go +++ b/pkg/sentry/vfs/permissions.go @@ -94,6 +94,37 @@ func GenericCheckPermissions(creds *auth.Credentials, ats AccessTypes, mode linu return syserror.EACCES } +// MayLink determines whether creating a hard link to a file with the given +// mode, kuid, and kgid is permitted. +// +// This corresponds to Linux's fs/namei.c:may_linkat. +func MayLink(creds *auth.Credentials, mode linux.FileMode, kuid auth.KUID, kgid auth.KGID) error { + // Source inode owner can hardlink all they like; otherwise, it must be a + // safe source. + if CanActAsOwner(creds, kuid) { + return nil + } + + // Only regular files can be hard linked. + if mode.FileType() != linux.S_IFREG { + return syserror.EPERM + } + + // Setuid files should not get pinned to the filesystem. + if mode&linux.S_ISUID != 0 { + return syserror.EPERM + } + + // Executable setgid files should not get pinned to the filesystem, but we + // don't support S_IXGRP anyway. + + // Hardlinking to unreadable or unwritable sources is dangerous. + if err := GenericCheckPermissions(creds, MayRead|MayWrite, mode, kuid, kgid); err != nil { + return syserror.EPERM + } + return nil +} + // AccessTypesForOpenFlags returns the access types required to open a file // with the given OpenOptions.Flags. Note that this is NOT the same thing as // the set of accesses permitted for the opened file: diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 1638a11c7..65a6a7f37 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -305,6 +305,7 @@ syscall_test( add_overlay = True, test = "//test/syscalls/linux:link_test", use_tmpfs = True, # gofer needs CAP_DAC_READ_SEARCH to use AT_EMPTY_PATH with linkat(2) + vfs2 = "True", ) syscall_test( diff --git a/test/syscalls/linux/link.cc b/test/syscalls/linux/link.cc index e74fa2ed5..544681168 100644 --- a/test/syscalls/linux/link.cc +++ b/test/syscalls/linux/link.cc @@ -79,8 +79,13 @@ TEST(LinkTest, PermissionDenied) { // Make the file "unsafe" to link by making it only readable, but not // writable. - const auto oldfile = + const auto unwriteable_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0400)); + const std::string special_path = NewTempAbsPath(); + ASSERT_THAT(mkfifo(special_path.c_str(), 0666), SyscallSucceeds()); + const auto setuid_file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileMode(0666 | S_ISUID)); + const std::string newname = NewTempAbsPath(); // Do setuid in a separate thread so that after finishing this test, the @@ -97,8 +102,14 @@ TEST(LinkTest, PermissionDenied) { EXPECT_THAT(syscall(SYS_setuid, absl::GetFlag(FLAGS_scratch_uid)), SyscallSucceeds()); - EXPECT_THAT(link(oldfile.path().c_str(), newname.c_str()), + EXPECT_THAT(link(unwriteable_file.path().c_str(), newname.c_str()), + SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(link(special_path.c_str(), newname.c_str()), SyscallFailsWithErrno(EPERM)); + if (!IsRunningWithVFS1()) { + EXPECT_THAT(link(setuid_file.path().c_str(), newname.c_str()), + SyscallFailsWithErrno(EPERM)); + } }); } diff --git a/test/util/test_util.h b/test/util/test_util.h index 8e3245b27..e635827e6 100644 --- a/test/util/test_util.h +++ b/test/util/test_util.h @@ -220,6 +220,7 @@ constexpr char kKVM[] = "kvm"; bool IsRunningOnGvisor(); const std::string GvisorPlatform(); bool IsRunningWithHostinet(); +// TODO(gvisor.dev/issue/1624): Delete once VFS1 is gone. bool IsRunningWithVFS1(); #ifdef __linux__ -- cgit v1.2.3 From 46957ed24f21396683cf9aff13fa0cd3086ea466 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Fri, 19 Jun 2020 05:55:35 -0700 Subject: Fix synthetic file bugs in gofer fs. Always check if a synthetic file already exists at a location before creating a file there, and do not try to delete synthetic gofer files from the remote fs. This fixes runsc_ptrace socket tests that create/unlink synthetic, named socket files. Updates #2923. PiperOrigin-RevId: 317293648 --- pkg/sentry/fsimpl/gofer/filesystem.go | 19 +++++++++++-------- test/syscalls/BUILD | 15 +++++++++++++++ 2 files changed, 26 insertions(+), 8 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 3c467e313..21eb976cb 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -374,13 +374,16 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir return nil } if fs.opts.interop == InteropModeShared { - // The existence of a dentry at name would be inconclusive because the - // file it represents may have been deleted from the remote filesystem, - // so we would need to make an RPC to revalidate the dentry. Just - // attempt the file creation RPC instead. If a file does exist, the RPC - // will fail with EEXIST like we would have. If the RPC succeeds, and a - // stale dentry exists, the dentry will fail revalidation next time - // it's used. + if child := parent.children[name]; child != nil && child.isSynthetic() { + return syserror.EEXIST + } + // The existence of a non-synthetic dentry at name would be inconclusive + // because the file it represents may have been deleted from the remote + // filesystem, so we would need to make an RPC to revalidate the dentry. + // Just attempt the file creation RPC instead. If a file does exist, the + // RPC will fail with EEXIST like we would have. If the RPC succeeds, and a + // stale dentry exists, the dentry will fail revalidation next time it's + // used. return createInRemoteDir(parent, name) } if child := parent.children[name]; child != nil { @@ -518,7 +521,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b if child == nil { return syserror.ENOENT } - } else { + } else if child == nil || !child.isSynthetic() { err = parent.file.unlinkAt(ctx, name, flags) if err != nil { if child != nil { diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 65a6a7f37..7b0248a39 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -57,6 +57,7 @@ syscall_test( size = "large", add_overlay = True, test = "//test/syscalls/linux:bind_test", + vfs2 = "True", ) syscall_test( @@ -700,6 +701,7 @@ syscall_test( size = "medium", add_overlay = True, test = "//test/syscalls/linux:socket_filesystem_non_blocking_test", + vfs2 = "True", ) syscall_test( @@ -707,12 +709,14 @@ syscall_test( add_overlay = True, shard_count = 50, test = "//test/syscalls/linux:socket_filesystem_test", + vfs2 = "True", ) syscall_test( size = "large", shard_count = 50, test = "//test/syscalls/linux:socket_inet_loopback_test", + vfs2 = "True", ) syscall_test( @@ -721,6 +725,7 @@ syscall_test( # Takes too long for TSAN. Creates a lot of TCP sockets. tags = ["nogotsan"], test = "//test/syscalls/linux:socket_inet_loopback_nogotsan_test", + vfs2 = "True", ) syscall_test( @@ -796,6 +801,7 @@ syscall_test( syscall_test( test = "//test/syscalls/linux:socket_blocking_local_test", + vfs2 = "True", ) syscall_test( @@ -805,6 +811,7 @@ syscall_test( syscall_test( test = "//test/syscalls/linux:socket_non_stream_blocking_local_test", + vfs2 = "True", ) syscall_test( @@ -815,6 +822,7 @@ syscall_test( syscall_test( size = "large", test = "//test/syscalls/linux:socket_stream_blocking_local_test", + vfs2 = "True", ) syscall_test( @@ -826,11 +834,13 @@ syscall_test( syscall_test( size = "medium", test = "//test/syscalls/linux:socket_stream_local_test", + vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_stream_nonblock_local_test", + vfs2 = "True", ) syscall_test( @@ -838,6 +848,7 @@ syscall_test( size = "enormous", shard_count = 5, test = "//test/syscalls/linux:socket_unix_dgram_local_test", + vfs2 = "True", ) syscall_test( @@ -859,11 +870,13 @@ syscall_test( size = "enormous", shard_count = 5, test = "//test/syscalls/linux:socket_unix_seqpacket_local_test", + vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_unix_stream_test", + vfs2 = "True", ) syscall_test( @@ -881,6 +894,7 @@ syscall_test( syscall_test( size = "medium", test = "//test/syscalls/linux:socket_unix_unbound_filesystem_test", + vfs2 = "True", ) syscall_test( @@ -894,6 +908,7 @@ syscall_test( size = "large", shard_count = 50, test = "//test/syscalls/linux:socket_unix_unbound_stream_test", + vfs2 = "True", ) syscall_test( -- cgit v1.2.3 From f40d023ad6f8c19898ca105842a88961b3c2994c Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Fri, 19 Jun 2020 08:44:26 -0700 Subject: Don't adjust parent link count if we replace a child dir with another. Updates #2923. PiperOrigin-RevId: 317314460 --- pkg/sentry/fsimpl/gofer/filesystem.go | 3 ++- test/syscalls/BUILD | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 21eb976cb..5501781ac 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -1196,7 +1196,8 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa if newParent.cachedMetadataAuthoritative() { newParent.dirents = nil newParent.touchCMtime() - if renamed.isDir() { + if renamed.isDir() && (replaced == nil || !replaced.isDir()) { + // Increase the link count if we did not replace another directory. newParent.incLinks() } } diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 7b0248a39..131f09ab4 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -572,6 +572,7 @@ syscall_test( size = "medium", add_overlay = True, test = "//test/syscalls/linux:rename_test", + vfs2 = "True", ) syscall_test( -- cgit v1.2.3 From a609fff9d1516a095341c2016ec36f952550a46f Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Fri, 19 Jun 2020 10:18:35 -0700 Subject: Fix vfs2 handling of preadv2/pwritev2 flags. Check for unsupported flags, and silently support RWF_HIPRI by doing nothing. From pkg/abi/linux/file.go: "gVisor does not implement the RWF_HIPRI feature, but the flag is accepted as a valid flag argument for preadv2/pwritev2." Updates #2923. PiperOrigin-RevId: 317330631 --- pkg/sentry/fsimpl/gofer/regular_file.go | 9 +++++++-- pkg/sentry/fsimpl/gofer/special_file.go | 8 ++++++-- pkg/sentry/fsimpl/tmpfs/regular_file.go | 12 ++++++++++++ test/syscalls/BUILD | 1 + 4 files changed, 26 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go index f7a15a2ed..404a452c4 100644 --- a/pkg/sentry/fsimpl/gofer/regular_file.go +++ b/pkg/sentry/fsimpl/gofer/regular_file.go @@ -72,7 +72,9 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs if offset < 0 { return 0, syserror.EINVAL } - if opts.Flags != 0 { + + // Check that flags are supported. Silently ignore RWF_HIPRI. + if opts.Flags&^linux.RWF_HIPRI != 0 { return 0, syserror.EOPNOTSUPP } @@ -123,9 +125,12 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off if offset < 0 { return 0, syserror.EINVAL } - if opts.Flags != 0 { + + // Check that flags are supported. Silently ignore RWF_HIPRI. + if opts.Flags&^linux.RWF_HIPRI != 0 { return 0, syserror.EOPNOTSUPP } + limit, err := vfs.CheckLimit(ctx, offset, src.NumBytes()) if err != nil { return 0, err diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go index a92008208..a016cbae1 100644 --- a/pkg/sentry/fsimpl/gofer/special_file.go +++ b/pkg/sentry/fsimpl/gofer/special_file.go @@ -129,7 +129,9 @@ func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs if fd.seekable && offset < 0 { return 0, syserror.EINVAL } - if opts.Flags != 0 { + + // Check that flags are supported. Silently ignore RWF_HIPRI. + if opts.Flags&^linux.RWF_HIPRI != 0 { return 0, syserror.EOPNOTSUPP } @@ -173,7 +175,9 @@ func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off if fd.seekable && offset < 0 { return 0, syserror.EINVAL } - if opts.Flags != 0 { + + // Check that flags are supported. Silently ignore RWF_HIPRI. + if opts.Flags&^linux.RWF_HIPRI != 0 { return 0, syserror.EOPNOTSUPP } diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go index bfd9c5995..b805aadd0 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go @@ -279,6 +279,12 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs if offset < 0 { return 0, syserror.EINVAL } + + // Check that flags are supported. Silently ignore RWF_HIPRI. + if opts.Flags&^linux.RWF_HIPRI != 0 { + return 0, syserror.EOPNOTSUPP + } + if dst.NumBytes() == 0 { return 0, nil } @@ -304,6 +310,12 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off if offset < 0 { return 0, syserror.EINVAL } + + // Check that flags are supported. Silently ignore RWF_HIPRI. + if opts.Flags&^linux.RWF_HIPRI != 0 { + return 0, syserror.EOPNOTSUPP + } + srclen := src.NumBytes() if srclen == 0 { return 0, nil diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 131f09ab4..9555dc308 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -463,6 +463,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:preadv2_test", + vfs2 = "True", ) syscall_test( -- cgit v1.2.3 From 7db196c4dbb33562dbfaa28a9476c590f356b2b5 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Fri, 19 Jun 2020 11:48:24 -0700 Subject: Port fadvise64 to vfs2. Like vfs1, we have a trivial implementation that ignores all valid advice. Updates #2923. PiperOrigin-RevId: 317349505 --- pkg/abi/linux/BUILD | 1 + pkg/abi/linux/fadvise.go | 24 +++++++++++++++++++++ pkg/sentry/syscalls/linux/sys_file.go | 25 ++++++---------------- pkg/sentry/syscalls/linux/vfs2/fd.go | 38 ++++++++++++++++++++++++++++++++++ pkg/sentry/syscalls/linux/vfs2/vfs2.go | 2 +- test/syscalls/BUILD | 1 + 6 files changed, 71 insertions(+), 20 deletions(-) create mode 100644 pkg/abi/linux/fadvise.go (limited to 'test') diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD index 114b516e2..2b789c4ec 100644 --- a/pkg/abi/linux/BUILD +++ b/pkg/abi/linux/BUILD @@ -23,6 +23,7 @@ go_library( "errors.go", "eventfd.go", "exec.go", + "fadvise.go", "fcntl.go", "file.go", "file_amd64.go", diff --git a/pkg/abi/linux/fadvise.go b/pkg/abi/linux/fadvise.go new file mode 100644 index 000000000..b06ff9964 --- /dev/null +++ b/pkg/abi/linux/fadvise.go @@ -0,0 +1,24 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +const ( + POSIX_FADV_NORMAL = 0 + POSIX_FADV_RANDOM = 1 + POSIX_FADV_SEQUENTIAL = 2 + POSIX_FADV_WILLNEED = 3 + POSIX_FADV_DONTNEED = 4 + POSIX_FADV_NOREUSE = 5 +) diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index 696e1c8d3..35eba20c5 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -1111,17 +1111,6 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } } -// LINT.ThenChange(vfs2/fd.go) - -const ( - _FADV_NORMAL = 0 - _FADV_RANDOM = 1 - _FADV_SEQUENTIAL = 2 - _FADV_WILLNEED = 3 - _FADV_DONTNEED = 4 - _FADV_NOREUSE = 5 -) - // Fadvise64 implements linux syscall fadvise64(2). // This implementation currently ignores the provided advice. func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { @@ -1146,12 +1135,12 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys } switch advice { - case _FADV_NORMAL: - case _FADV_RANDOM: - case _FADV_SEQUENTIAL: - case _FADV_WILLNEED: - case _FADV_DONTNEED: - case _FADV_NOREUSE: + case linux.POSIX_FADV_NORMAL: + case linux.POSIX_FADV_RANDOM: + case linux.POSIX_FADV_SEQUENTIAL: + case linux.POSIX_FADV_WILLNEED: + case linux.POSIX_FADV_DONTNEED: + case linux.POSIX_FADV_NOREUSE: default: return 0, nil, syserror.EINVAL } @@ -1160,8 +1149,6 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, nil } -// LINT.IfChange - func mkdirAt(t *kernel.Task, dirFD int32, addr usermem.Addr, mode linux.FileMode) error { path, _, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go index f5eaa076b..e68b20bed 100644 --- a/pkg/sentry/syscalls/linux/vfs2/fd.go +++ b/pkg/sentry/syscalls/linux/vfs2/fd.go @@ -210,3 +210,41 @@ func posixLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescrip return syserror.EINVAL } } + +// Fadvise64 implements fadvise64(2). +// This implementation currently ignores the provided advice. +func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + length := args[2].Int64() + advice := args[3].Int() + + // Note: offset is allowed to be negative. + if length < 0 { + return 0, nil, syserror.EINVAL + } + + file := t.GetFileVFS2(fd) + if file == nil { + return 0, nil, syserror.EBADF + } + defer file.DecRef() + + // If the FD refers to a pipe or FIFO, return error. + if _, isPipe := file.Impl().(*pipe.VFSPipeFD); isPipe { + return 0, nil, syserror.ESPIPE + } + + switch advice { + case linux.POSIX_FADV_NORMAL: + case linux.POSIX_FADV_RANDOM: + case linux.POSIX_FADV_SEQUENTIAL: + case linux.POSIX_FADV_WILLNEED: + case linux.POSIX_FADV_DONTNEED: + case linux.POSIX_FADV_NOREUSE: + default: + return 0, nil, syserror.EINVAL + } + + // Sure, whatever. + return 0, nil, nil +} diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go index caa6a98ff..b463edf2a 100644 --- a/pkg/sentry/syscalls/linux/vfs2/vfs2.go +++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go @@ -108,7 +108,7 @@ func Override() { s.Table[209] = syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}) s.Table[213] = syscalls.Supported("epoll_create", EpollCreate) s.Table[217] = syscalls.Supported("getdents64", Getdents64) - delete(s.Table, 221) // fdavise64 + s.Table[221] = syscalls.PartiallySupported("fadvise64", Fadvise64, "The syscall is 'supported', but ignores all provided advice.", nil) s.Table[232] = syscalls.Supported("epoll_wait", EpollWait) s.Table[233] = syscalls.Supported("epoll_ctl", EpollCtl) s.Table[235] = syscalls.Supported("utimes", Utimes) diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 9555dc308..3479358b6 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -191,6 +191,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:fadvise64_test", + vfs2 = "True", ) syscall_test( -- cgit v1.2.3 From f0feada89c3a2c01870e5ab6d18caae8b6a8848a Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Fri, 19 Jun 2020 14:12:33 -0700 Subject: Use internal tmpfs in test runner, even when running with overlay. PiperOrigin-RevId: 317377571 --- test/runner/defs.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/runner/defs.bzl b/test/runner/defs.bzl index 5a83f8060..921e499be 100644 --- a/test/runner/defs.bzl +++ b/test/runner/defs.bzl @@ -195,7 +195,7 @@ def syscall_test( shard_count = shard_count, size = size, platform = default_platform, - use_tmpfs = False, # overlay is adding a writable tmpfs on top of root. + use_tmpfs = use_tmpfs, add_uds_tree = add_uds_tree, tags = platforms[default_platform] + tags, overlay = True, -- cgit v1.2.3 From f46f4a2af98a2a5cf5dd54e71a1a2dc999d4b4b1 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Fri, 19 Jun 2020 18:26:04 -0700 Subject: Enable passing vfs2 tests. I forgot to update getdents earlier. Several thousand runs of the fsync and proc_net_unix tests all passed as well. Updates #2923. PiperOrigin-RevId: 317415488 --- test/syscalls/BUILD | 3 +++ 1 file changed, 3 insertions(+) (limited to 'test') diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 3479358b6..46a98a4cd 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -240,6 +240,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:fsync_test", + vfs2 = "True", ) syscall_test( @@ -262,6 +263,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:getdents_test", + vfs2 = "True", ) syscall_test( @@ -1091,6 +1093,7 @@ syscall_test( syscall_test( test = "//test/syscalls/linux:proc_net_unix_test", + vfs2 = "True", ) syscall_test( -- cgit v1.2.3 From a480b4faf4befb029bf905fdb604996c8312a6a2 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Mon, 22 Jun 2020 09:52:51 -0700 Subject: Allow readdir(/proc/[tid]/net) to return EINVAL on a zombie task. Despite what the man page says, linux will return EINVAL when calling getdents() an a /proc/[tid]/net file corresponding to a zombie task. This causes readdir() to return a null pointer AND errno=EINVAL. See fs/proc/proc_net.c:proc_tgid_net_readdir() for where this occurs. We have tests that recursively read /proc, and are likely to hit this when running natively, so we must catch and handle this case. PiperOrigin-RevId: 317674168 --- test/syscalls/linux/proc.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index d73d392d5..923699ed3 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -1967,6 +1967,17 @@ void CheckDuplicatesRecursively(std::string path) { errno = 0; struct dirent* dp = readdir(dir); if (dp == nullptr) { + // Linux will return EINVAL when calling getdents on a /proc/tid/net + // file corresponding to a zombie task. + // See fs/proc/proc_net.c:proc_tgid_net_readdir(). + // + // We just ignore the directory in this case. + if (errno == EINVAL && absl::StartsWith(path, "/proc/") && + absl::EndsWith(path, "/net")) { + break; + } + + // Otherwise, no errors are allowed. ASSERT_EQ(errno, 0) << path; break; // We're done. } -- cgit v1.2.3 From 4573e7d863d59d59c6a4f72f396f72b0f6458cb2 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Mon, 22 Jun 2020 11:38:25 -0700 Subject: Check for invalid trailing / when traversing path in gofer OpenAt. Updates #2923. PiperOrigin-RevId: 317700049 --- pkg/sentry/fsimpl/gofer/filesystem.go | 8 +++++--- test/syscalls/BUILD | 1 + test/syscalls/linux/open.cc | 6 ++++++ 3 files changed, 12 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 5501781ac..f065c4bad 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -767,15 +767,17 @@ afterTrailingSymlink: parent.dirMu.Unlock() return fd, err } + parent.dirMu.Unlock() if err != nil { - parent.dirMu.Unlock() return nil, err } - // Open existing child or follow symlink. - parent.dirMu.Unlock() if mustCreate { return nil, syserror.EEXIST } + if !child.isDir() && rp.MustBeDir() { + return nil, syserror.ENOTDIR + } + // Open existing child or follow symlink. if child.isSymlink() && rp.ShouldFollowSymlink() { target, err := child.readlink(ctx, rp.Mount()) if err != nil { diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 46a98a4cd..f94c383ae 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -181,6 +181,7 @@ syscall_test( size = "medium", add_overlay = True, test = "//test/syscalls/linux:exec_binary_test", + vfs2 = "True", ) syscall_test( diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc index 670c0284b..bb7d108e8 100644 --- a/test/syscalls/linux/open.cc +++ b/test/syscalls/linux/open.cc @@ -439,6 +439,12 @@ TEST_F(OpenTest, CanTruncateWithStrangePermissions) { EXPECT_THAT(close(fd), SyscallSucceeds()); } +TEST_F(OpenTest, OpenNonDirectoryWithTrailingSlash) { + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string bad_path = file.path() + "/"; + EXPECT_THAT(open(bad_path.c_str(), O_RDONLY), SyscallFailsWithErrno(ENOTDIR)); +} + } // namespace } // namespace testing -- cgit v1.2.3 From ca1bc46f1540cdef2743bc2cae0eac89d0e61c69 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Tue, 23 Jun 2020 14:40:44 -0700 Subject: Internal change. PiperOrigin-RevId: 317941748 --- test/perf/linux/futex_benchmark.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/perf/linux/futex_benchmark.cc b/test/perf/linux/futex_benchmark.cc index 241f39896..e686041c9 100644 --- a/test/perf/linux/futex_benchmark.cc +++ b/test/perf/linux/futex_benchmark.cc @@ -84,9 +84,9 @@ BENCHMARK(BM_FutexWaitNop)->MinTime(5); // changes, such that it always times out. Timeout overhead can be estimated by // timer overruns for short timeouts. void BM_FutexWaitMonotonicTimeout(benchmark::State& state) { - const int timeout_ns = state.range(0); + const absl::Duration timeout = absl::Nanoseconds(state.range(0)); std::atomic v(0); - auto ts = absl::ToTimespec(absl::Nanoseconds(timeout_ns)); + auto ts = absl::ToTimespec(timeout); for (auto _ : state) { TEST_PCHECK(FutexWaitMonotonicTimeout(&v, 0, &ts) == -1 && -- cgit v1.2.3 From edea9a8d02d9877a05bb14046a26ebd9633f34f6 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Tue, 23 Jun 2020 14:51:43 -0700 Subject: Port readahead to VFS2. It preserves the same functionality (almost none) as in VFS1. Updates #2923 #1035 PiperOrigin-RevId: 317943522 --- pkg/sentry/syscalls/linux/vfs2/read_write.go | 33 ++++++++++++++++++++++++++++ pkg/sentry/syscalls/linux/vfs2/vfs2.go | 2 +- test/syscalls/BUILD | 1 + 3 files changed, 35 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/pkg/sentry/syscalls/linux/vfs2/read_write.go b/pkg/sentry/syscalls/linux/vfs2/read_write.go index 7f9debd4a..cd25597a7 100644 --- a/pkg/sentry/syscalls/linux/vfs2/read_write.go +++ b/pkg/sentry/syscalls/linux/vfs2/read_write.go @@ -606,3 +606,36 @@ func Lseek(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall newoff, err := file.Seek(t, offset, whence) return uintptr(newoff), nil, err } + +// Readahead implements readahead(2). +func Readahead(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + offset := args[1].Int64() + size := args[2].SizeT() + + file := t.GetFileVFS2(fd) + if file == nil { + return 0, nil, syserror.EBADF + } + defer file.DecRef() + + // Check that the file is readable. + if !file.IsReadable() { + return 0, nil, syserror.EBADF + } + + // Check that the size is valid. + if int(size) < 0 { + return 0, nil, syserror.EINVAL + } + + // Check that the offset is legitimate and does not overflow. + if offset < 0 || offset+int64(size) < 0 { + return 0, nil, syserror.EINVAL + } + + // Return EINVAL; if the underlying file type does not support readahead, + // then Linux will return EINVAL to indicate as much. In the future, we + // may extend this function to actually support readahead hints. + return 0, nil, syserror.EINVAL +} diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go index b463edf2a..9e60c4a1c 100644 --- a/pkg/sentry/syscalls/linux/vfs2/vfs2.go +++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go @@ -92,7 +92,7 @@ func Override() { s.Table[162] = syscalls.Supported("sync", Sync) s.Table[165] = syscalls.Supported("mount", Mount) s.Table[166] = syscalls.Supported("umount2", Umount2) - delete(s.Table, 187) // readahead + s.Table[187] = syscalls.Supported("readahead", Readahead) s.Table[188] = syscalls.Supported("setxattr", Setxattr) s.Table[189] = syscalls.Supported("lsetxattr", Lsetxattr) s.Table[190] = syscalls.Supported("fsetxattr", Fsetxattr) diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index f94c383ae..8ce5d24f4 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -557,6 +557,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:readahead_test", + vfs2 = "True", ) syscall_test( -- cgit v1.2.3 From 793edf4cb4597751b7f2b7b913a5ab7fa3d50373 Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Tue, 23 Jun 2020 16:05:39 -0700 Subject: Deflake proc test: Don't fail on DT_UNKNOWN. Per manual page: "All applications must properly handle a return of DT_UNKNOWN." PiperOrigin-RevId: 317957013 --- test/syscalls/linux/proc.cc | 2 -- 1 file changed, 2 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index 923699ed3..da8f30f32 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -1998,8 +1998,6 @@ void CheckDuplicatesRecursively(std::string path) { } children.insert(std::string(dp->d_name)); - ASSERT_NE(dp->d_type, DT_UNKNOWN); - if (dp->d_type == DT_DIR) { child_dirs.push_back(std::string(dp->d_name)); } -- cgit v1.2.3 From 0c628c3152a727fff287a98897d83ee45ad990e5 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Tue, 23 Jun 2020 16:11:31 -0700 Subject: Support inotify in vfs2 gofer fs. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because there is no inode structure stored in the sandbox, inotify watches must be held on the dentry. This would be an issue in the presence of hard links, where multiple dentries would need to share the same set of watches, but in VFS2, we do not support the internal creation of hard links on gofer fs. As a result, we make the assumption that every dentry corresponds to a unique inode. Furthermore, dentries can be cached and then evicted, even if the underlying file has not be deleted. We must prevent this from occurring if there are any watches that would be lost. Note that if the dentry was deleted or invalidated (d.vfsd.IsDead()), we should still destroy it along with its watches. Additionally, when a dentry’s last watch is removed, we cache it if it also has zero references. This way, the dentry can eventually be evicted from memory if it is no longer needed. This is accomplished with a new dentry method, OnZeroWatches(), which is called by Inotify.RmWatch and Inotify.Release. Note that it must be called after all inotify locks are released to avoid violating lock order. Stress tests are added to make sure that inotify operations don't deadlock with gofer.OnZeroWatches. Updates #1479. PiperOrigin-RevId: 317958034 --- pkg/sentry/fsimpl/ext/dentry.go | 11 +- pkg/sentry/fsimpl/gofer/directory.go | 1 + pkg/sentry/fsimpl/gofer/filesystem.go | 59 ++++++- pkg/sentry/fsimpl/gofer/gofer.go | 73 +++++++-- pkg/sentry/fsimpl/kernfs/kernfs.go | 7 +- pkg/sentry/fsimpl/overlay/overlay.go | 5 + pkg/sentry/fsimpl/tmpfs/directory.go | 4 +- pkg/sentry/fsimpl/tmpfs/filesystem.go | 12 +- pkg/sentry/fsimpl/tmpfs/tmpfs.go | 19 ++- pkg/sentry/syscalls/linux/vfs2/inotify.go | 7 +- pkg/sentry/vfs/anonfs.go | 7 +- pkg/sentry/vfs/dentry.go | 32 +++- pkg/sentry/vfs/inotify.go | 108 +++++++++---- pkg/sentry/vfs/vfs.go | 3 + test/syscalls/linux/BUILD | 1 + test/syscalls/linux/inotify.cc | 251 +++++++++++++++++++++++++++++- 16 files changed, 523 insertions(+), 77 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fsimpl/ext/dentry.go b/pkg/sentry/fsimpl/ext/dentry.go index 6bd1a9fc6..55902322a 100644 --- a/pkg/sentry/fsimpl/ext/dentry.go +++ b/pkg/sentry/fsimpl/ext/dentry.go @@ -63,12 +63,17 @@ func (d *dentry) DecRef() { // InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. // -// TODO(gvisor.dev/issue/1479): Implement inotify. -func (d *dentry) InotifyWithParent(events uint32, cookie uint32, et vfs.EventType) {} +// TODO(b/134676337): Implement inotify. +func (d *dentry) InotifyWithParent(events, cookie uint32, et vfs.EventType) {} // Watches implements vfs.DentryImpl.Watches. // -// TODO(gvisor.dev/issue/1479): Implement inotify. +// TODO(b/134676337): Implement inotify. func (d *dentry) Watches() *vfs.Watches { return nil } + +// OnZeroWatches implements vfs.Dentry.OnZeroWatches. +// +// TODO(b/134676337): Implement inotify. +func (d *dentry) OnZeroWatches() {} diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go index 480510b2a..5d83fe363 100644 --- a/pkg/sentry/fsimpl/gofer/directory.go +++ b/pkg/sentry/fsimpl/gofer/directory.go @@ -138,6 +138,7 @@ func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallba fd.dirents = ds } + d.InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) if d.cachedMetadataAuthoritative() { d.touchAtime(fd.vfsfd.Mount()) } diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index f065c4bad..0321fd384 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -371,6 +371,11 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir } parent.touchCMtime() parent.dirents = nil + ev := linux.IN_CREATE + if dir { + ev |= linux.IN_ISDIR + } + parent.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent) return nil } if fs.opts.interop == InteropModeShared { @@ -400,6 +405,11 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir } parent.touchCMtime() parent.dirents = nil + ev := linux.IN_CREATE + if dir { + ev |= linux.IN_ISDIR + } + parent.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent) return nil } @@ -530,6 +540,18 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b return err } } + + // Generate inotify events for rmdir or unlink. + if dir { + parent.watches.Notify(name, linux.IN_DELETE|linux.IN_ISDIR, 0, vfs.InodeEvent) + } else { + var cw *vfs.Watches + if child != nil { + cw = &child.watches + } + vfs.InotifyRemoveChild(cw, &parent.watches, name) + } + if child != nil { vfsObj.CommitDeleteDentry(&child.vfsd) child.setDeleted() @@ -1018,6 +1040,7 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving } childVFSFD = &fd.vfsfd } + d.watches.Notify(name, linux.IN_CREATE, 0, vfs.PathEvent) return childVFSFD, nil } @@ -1203,6 +1226,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa newParent.incLinks() } } + vfs.InotifyRename(ctx, &renamed.watches, &oldParent.watches, &newParent.watches, oldName, newName, renamed.isDir()) return nil } @@ -1215,12 +1239,21 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckCaching(&ds) d, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { + fs.renameMuRUnlockAndCheckCaching(&ds) + return err + } + if err := d.setStat(ctx, rp.Credentials(), &opts.Stat, rp.Mount()); err != nil { + fs.renameMuRUnlockAndCheckCaching(&ds) return err } - return d.setStat(ctx, rp.Credentials(), &opts.Stat, rp.Mount()) + fs.renameMuRUnlockAndCheckCaching(&ds) + + if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 { + d.InotifyWithParent(ev, 0, vfs.InodeEvent) + } + return nil } // StatAt implements vfs.FilesystemImpl.StatAt. @@ -1344,24 +1377,38 @@ func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckCaching(&ds) d, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { + fs.renameMuRUnlockAndCheckCaching(&ds) return err } - return d.setxattr(ctx, rp.Credentials(), &opts) + if err := d.setxattr(ctx, rp.Credentials(), &opts); err != nil { + fs.renameMuRUnlockAndCheckCaching(&ds) + return err + } + fs.renameMuRUnlockAndCheckCaching(&ds) + + d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent) + return nil } // RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt. func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckCaching(&ds) d, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { + fs.renameMuRUnlockAndCheckCaching(&ds) + return err + } + if err := d.removexattr(ctx, rp.Credentials(), name); err != nil { + fs.renameMuRUnlockAndCheckCaching(&ds) return err } - return d.removexattr(ctx, rp.Credentials(), name) + fs.renameMuRUnlockAndCheckCaching(&ds) + + d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent) + return nil } // PrependPath implements vfs.FilesystemImpl.PrependPath. diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 43c8153a4..c6c284ff3 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -665,6 +665,9 @@ type dentry struct { pipe *pipe.VFSPipe locks vfs.FileLocks + + // Inotify watches for this dentry. + watches vfs.Watches } // dentryAttrMask returns a p9.AttrMask enabling all attributes used by the @@ -947,6 +950,8 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *lin } else { atomic.StoreInt64(&d.atime, dentryTimestampFromStatx(stat.Atime)) } + // Restore mask bits that we cleared earlier. + stat.Mask |= linux.STATX_ATIME } if setLocalMtime { if stat.Mtime.Nsec == linux.UTIME_NOW { @@ -954,6 +959,8 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *lin } else { atomic.StoreInt64(&d.mtime, dentryTimestampFromStatx(stat.Mtime)) } + // Restore mask bits that we cleared earlier. + stat.Mask |= linux.STATX_MTIME } atomic.StoreInt64(&d.ctime, now) if stat.Mask&linux.STATX_SIZE != 0 { @@ -1051,15 +1058,34 @@ func (d *dentry) decRefLocked() { } // InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. -// -// TODO(gvisor.dev/issue/1479): Implement inotify. -func (d *dentry) InotifyWithParent(events uint32, cookie uint32, et vfs.EventType) {} +func (d *dentry) InotifyWithParent(events, cookie uint32, et vfs.EventType) { + if d.isDir() { + events |= linux.IN_ISDIR + } + + d.fs.renameMu.RLock() + // The ordering below is important, Linux always notifies the parent first. + if d.parent != nil { + d.parent.watches.NotifyWithExclusions(d.name, events, cookie, et, d.isDeleted()) + } + d.watches.Notify("", events, cookie, et) + d.fs.renameMu.RUnlock() +} // Watches implements vfs.DentryImpl.Watches. -// -// TODO(gvisor.dev/issue/1479): Implement inotify. func (d *dentry) Watches() *vfs.Watches { - return nil + return &d.watches +} + +// OnZeroWatches implements vfs.DentryImpl.OnZeroWatches. +// +// If no watches are left on this dentry and it has no references, cache it. +func (d *dentry) OnZeroWatches() { + if atomic.LoadInt64(&d.refs) == 0 { + d.fs.renameMu.Lock() + d.checkCachingLocked() + d.fs.renameMu.Unlock() + } } // checkCachingLocked should be called after d's reference count becomes 0 or it @@ -1093,6 +1119,9 @@ func (d *dentry) checkCachingLocked() { // Deleted and invalidated dentries with zero references are no longer // reachable by path resolution and should be dropped immediately. if d.vfsd.IsDead() { + if d.isDeleted() { + d.watches.HandleDeletion() + } if d.cached { d.fs.cachedDentries.Remove(d) d.fs.cachedDentriesLen-- @@ -1101,6 +1130,14 @@ func (d *dentry) checkCachingLocked() { d.destroyLocked() return } + // If d still has inotify watches and it is not deleted or invalidated, we + // cannot cache it and allow it to be evicted. Otherwise, we will lose its + // watches, even if a new dentry is created for the same file in the future. + // Note that the size of d.watches cannot concurrently transition from zero + // to non-zero, because adding a watch requires holding a reference on d. + if d.watches.Size() > 0 { + return + } // If d is already cached, just move it to the front of the LRU. if d.cached { d.fs.cachedDentries.Remove(d) @@ -1277,7 +1314,7 @@ func (d *dentry) userXattrSupported() bool { return filetype == linux.S_IFREG || filetype == linux.S_IFDIR } -// Preconditions: !d.isSynthetic(). d.isRegularFile() || d.isDirectory(). +// Preconditions: !d.isSynthetic(). d.isRegularFile() || d.isDir(). func (d *dentry) ensureSharedHandle(ctx context.Context, read, write, trunc bool) error { // O_TRUNC unconditionally requires us to obtain a new handle (opened with // O_TRUNC). @@ -1422,7 +1459,13 @@ func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linu // SetStat implements vfs.FileDescriptionImpl.SetStat. func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { - return fd.dentry().setStat(ctx, auth.CredentialsFromContext(ctx), &opts.Stat, fd.vfsfd.Mount()) + if err := fd.dentry().setStat(ctx, auth.CredentialsFromContext(ctx), &opts.Stat, fd.vfsfd.Mount()); err != nil { + return err + } + if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 { + fd.dentry().InotifyWithParent(ev, 0, vfs.InodeEvent) + } + return nil } // Listxattr implements vfs.FileDescriptionImpl.Listxattr. @@ -1437,12 +1480,22 @@ func (fd *fileDescription) Getxattr(ctx context.Context, opts vfs.GetxattrOption // Setxattr implements vfs.FileDescriptionImpl.Setxattr. func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOptions) error { - return fd.dentry().setxattr(ctx, auth.CredentialsFromContext(ctx), &opts) + d := fd.dentry() + if err := d.setxattr(ctx, auth.CredentialsFromContext(ctx), &opts); err != nil { + return err + } + d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent) + return nil } // Removexattr implements vfs.FileDescriptionImpl.Removexattr. func (fd *fileDescription) Removexattr(ctx context.Context, name string) error { - return fd.dentry().removexattr(ctx, auth.CredentialsFromContext(ctx), name) + d := fd.dentry() + if err := d.removexattr(ctx, auth.CredentialsFromContext(ctx), name); err != nil { + return err + } + d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent) + return nil } // LockBSD implements vfs.FileDescriptionImpl.LockBSD. diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go index 07a9dc830..55349f2a3 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs.go @@ -228,7 +228,7 @@ func (d *Dentry) destroy() { // InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. // // TODO(gvisor.dev/issue/1479): Implement inotify. -func (d *Dentry) InotifyWithParent(events uint32, cookie uint32, et vfs.EventType) {} +func (d *Dentry) InotifyWithParent(events, cookie uint32, et vfs.EventType) {} // Watches implements vfs.DentryImpl.Watches. // @@ -237,6 +237,11 @@ func (d *Dentry) Watches() *vfs.Watches { return nil } +// OnZeroWatches implements vfs.Dentry.OnZeroWatches. +// +// TODO(gvisor.dev/issue/1479): Implement inotify. +func (d *Dentry) OnZeroWatches() {} + // InsertChild inserts child into the vfs dentry cache with the given name under // this dentry. This does not update the directory inode, so calling this on // its own isn't sufficient to insert a child into a directory. InsertChild diff --git a/pkg/sentry/fsimpl/overlay/overlay.go b/pkg/sentry/fsimpl/overlay/overlay.go index e11a3ff19..e720d4825 100644 --- a/pkg/sentry/fsimpl/overlay/overlay.go +++ b/pkg/sentry/fsimpl/overlay/overlay.go @@ -528,6 +528,11 @@ func (d *dentry) Watches() *vfs.Watches { return nil } +// OnZeroWatches implements vfs.DentryImpl.OnZeroWatches. +// +// TODO(gvisor.dev/issue/1479): Implement inotify. +func (d *dentry) OnZeroWatches() {} + // iterLayers invokes yield on each layer comprising d, from top to bottom. If // any call to yield returns false, iterLayer stops iteration. func (d *dentry) iterLayers(yield func(vd vfs.VirtualDentry, isUpper bool) bool) { diff --git a/pkg/sentry/fsimpl/tmpfs/directory.go b/pkg/sentry/fsimpl/tmpfs/directory.go index 913b8a6c5..b172abc15 100644 --- a/pkg/sentry/fsimpl/tmpfs/directory.go +++ b/pkg/sentry/fsimpl/tmpfs/directory.go @@ -79,7 +79,6 @@ func (dir *directory) removeChildLocked(child *dentry) { dir.iterMu.Lock() dir.childList.Remove(child) dir.iterMu.Unlock() - child.unlinked = true } type directoryFD struct { @@ -107,13 +106,14 @@ func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallba fs := fd.filesystem() dir := fd.inode().impl.(*directory) + defer fd.dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) + // fs.mu is required to read d.parent and dentry.name. fs.mu.RLock() defer fs.mu.RUnlock() dir.iterMu.Lock() defer dir.iterMu.Unlock() - fd.dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) fd.inode().touchAtime(fd.vfsfd.Mount()) if fd.off == 0 { diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 637d84e04..85d8e37b2 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -638,14 +638,16 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error // SetStatAt implements vfs.FilesystemImpl.SetStatAt. func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { fs.mu.RLock() - defer fs.mu.RUnlock() d, err := resolveLocked(rp) if err != nil { + fs.mu.RUnlock() return err } if err := d.inode.setStat(ctx, rp.Credentials(), &opts.Stat); err != nil { + fs.mu.RUnlock() return err } + fs.mu.RUnlock() if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 { d.InotifyWithParent(ev, 0, vfs.InodeEvent) @@ -788,14 +790,16 @@ func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt // SetxattrAt implements vfs.FilesystemImpl.SetxattrAt. func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error { fs.mu.RLock() - defer fs.mu.RUnlock() d, err := resolveLocked(rp) if err != nil { + fs.mu.RUnlock() return err } if err := d.inode.setxattr(rp.Credentials(), &opts); err != nil { + fs.mu.RUnlock() return err } + fs.mu.RUnlock() d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent) return nil @@ -804,14 +808,16 @@ func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt // RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt. func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { fs.mu.RLock() - defer fs.mu.RUnlock() d, err := resolveLocked(rp) if err != nil { + fs.mu.RUnlock() return err } if err := d.inode.removexattr(rp.Credentials(), name); err != nil { + fs.mu.RUnlock() return err } + fs.mu.RUnlock() d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent) return nil diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index a94333ee0..a85bfc968 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -215,11 +215,6 @@ type dentry struct { // filesystem.mu. name string - // unlinked indicates whether this dentry has been unlinked from its parent. - // It is only set to true on an unlink operation, and never set from true to - // false. unlinked is protected by filesystem.mu. - unlinked bool - // dentryEntry (ugh) links dentries into their parent directory.childList. dentryEntry @@ -259,18 +254,20 @@ func (d *dentry) DecRef() { } // InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. -func (d *dentry) InotifyWithParent(events uint32, cookie uint32, et vfs.EventType) { +func (d *dentry) InotifyWithParent(events, cookie uint32, et vfs.EventType) { if d.inode.isDir() { events |= linux.IN_ISDIR } + d.inode.fs.mu.RLock() // The ordering below is important, Linux always notifies the parent first. if d.parent != nil { - // Note that d.parent or d.name may be stale if there is a concurrent - // rename operation. Inotify does not provide consistency guarantees. - d.parent.inode.watches.NotifyWithExclusions(d.name, events, cookie, et, d.unlinked) + // tmpfs never calls VFS.InvalidateDentry(), so d.vfsd.IsDead() indicates + // that d was deleted. + d.parent.inode.watches.NotifyWithExclusions(d.name, events, cookie, et, d.vfsd.IsDead()) } d.inode.watches.Notify("", events, cookie, et) + d.inode.fs.mu.RUnlock() } // Watches implements vfs.DentryImpl.Watches. @@ -278,6 +275,9 @@ func (d *dentry) Watches() *vfs.Watches { return &d.inode.watches } +// OnZeroWatches implements vfs.Dentry.OnZeroWatches. +func (d *dentry) OnZeroWatches() {} + // inode represents a filesystem object. type inode struct { // fs is the owning filesystem. fs is immutable. @@ -336,7 +336,6 @@ func (i *inode) init(impl interface{}, fs *filesystem, kuid auth.KUID, kgid auth i.ctime = now i.mtime = now // i.nlink initialized by caller - i.watches = vfs.Watches{} i.impl = impl } diff --git a/pkg/sentry/syscalls/linux/vfs2/inotify.go b/pkg/sentry/syscalls/linux/vfs2/inotify.go index 7d50b6a16..74e5287b7 100644 --- a/pkg/sentry/syscalls/linux/vfs2/inotify.go +++ b/pkg/sentry/syscalls/linux/vfs2/inotify.go @@ -116,8 +116,11 @@ func InotifyAddWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kern } defer d.DecRef() - fd = ino.AddWatch(d.Dentry(), mask) - return uintptr(fd), nil, err + fd, err = ino.AddWatch(d.Dentry(), mask) + if err != nil { + return 0, nil, err + } + return uintptr(fd), nil, nil } // InotifyRmWatch implements the inotify_rm_watch() syscall. diff --git a/pkg/sentry/vfs/anonfs.go b/pkg/sentry/vfs/anonfs.go index b7c6b60b8..746af03ec 100644 --- a/pkg/sentry/vfs/anonfs.go +++ b/pkg/sentry/vfs/anonfs.go @@ -301,7 +301,7 @@ func (d *anonDentry) DecRef() { // InotifyWithParent implements DentryImpl.InotifyWithParent. // // TODO(gvisor.dev/issue/1479): Implement inotify. -func (d *anonDentry) InotifyWithParent(events uint32, cookie uint32, et EventType) {} +func (d *anonDentry) InotifyWithParent(events, cookie uint32, et EventType) {} // Watches implements DentryImpl.Watches. // @@ -309,3 +309,8 @@ func (d *anonDentry) InotifyWithParent(events uint32, cookie uint32, et EventTyp func (d *anonDentry) Watches() *Watches { return nil } + +// OnZeroWatches implements Dentry.OnZeroWatches. +// +// TODO(gvisor.dev/issue/1479): Implement inotify. +func (d *anonDentry) OnZeroWatches() {} diff --git a/pkg/sentry/vfs/dentry.go b/pkg/sentry/vfs/dentry.go index 24af13eb1..cea3e6955 100644 --- a/pkg/sentry/vfs/dentry.go +++ b/pkg/sentry/vfs/dentry.go @@ -113,12 +113,29 @@ type DentryImpl interface { // // Note that the events may not actually propagate up to the user, depending // on the event masks. - InotifyWithParent(events uint32, cookie uint32, et EventType) + InotifyWithParent(events, cookie uint32, et EventType) // Watches returns the set of inotify watches for the file corresponding to // the Dentry. Dentries that are hard links to the same underlying file // share the same watches. + // + // Watches may return nil if the dentry belongs to a FilesystemImpl that + // does not support inotify. If an implementation returns a non-nil watch + // set, it must always return a non-nil watch set. Likewise, if an + // implementation returns a nil watch set, it must always return a nil watch + // set. + // + // The caller does not need to hold a reference on the dentry. Watches() *Watches + + // OnZeroWatches is called whenever the number of watches on a dentry drops + // to zero. This is needed by some FilesystemImpls (e.g. gofer) to manage + // dentry lifetime. + // + // The caller does not need to hold a reference on the dentry. OnZeroWatches + // may acquire inotify locks, so to prevent deadlock, no inotify locks should + // be held by the caller. + OnZeroWatches() } // IncRef increments d's reference count. @@ -149,17 +166,26 @@ func (d *Dentry) isMounted() bool { return atomic.LoadUint32(&d.mounts) != 0 } -// InotifyWithParent notifies all watches on the inodes for this dentry and +// InotifyWithParent notifies all watches on the targets represented by d and // its parent of events. -func (d *Dentry) InotifyWithParent(events uint32, cookie uint32, et EventType) { +func (d *Dentry) InotifyWithParent(events, cookie uint32, et EventType) { d.impl.InotifyWithParent(events, cookie, et) } // Watches returns the set of inotify watches associated with d. +// +// Watches will return nil if d belongs to a FilesystemImpl that does not +// support inotify. func (d *Dentry) Watches() *Watches { return d.impl.Watches() } +// OnZeroWatches performs cleanup tasks whenever the number of watches on a +// dentry drops to zero. +func (d *Dentry) OnZeroWatches() { + d.impl.OnZeroWatches() +} + // The following functions are exported so that filesystem implementations can // use them. The vfs package, and users of VFS, should not call these // functions. diff --git a/pkg/sentry/vfs/inotify.go b/pkg/sentry/vfs/inotify.go index 7fa7d2d0c..36e7a3767 100644 --- a/pkg/sentry/vfs/inotify.go +++ b/pkg/sentry/vfs/inotify.go @@ -49,9 +49,6 @@ const ( // Inotify represents an inotify instance created by inotify_init(2) or // inotify_init1(2). Inotify implements FileDescriptionImpl. // -// Lock ordering: -// Inotify.mu -> Watches.mu -> Inotify.evMu -// // +stateify savable type Inotify struct { vfsfd FileDescription @@ -122,17 +119,31 @@ func NewInotifyFD(ctx context.Context, vfsObj *VirtualFilesystem, flags uint32) // Release implements FileDescriptionImpl.Release. Release removes all // watches and frees all resources for an inotify instance. func (i *Inotify) Release() { + var ds []*Dentry // We need to hold i.mu to avoid a race with concurrent calls to // Inotify.handleDeletion from Watches. There's no risk of Watches // accessing this Inotify after the destructor ends, because we remove all // references to it below. i.mu.Lock() - defer i.mu.Unlock() for _, w := range i.watches { // Remove references to the watch from the watches set on the target. We // don't need to worry about the references from i.watches, since this // file description is about to be destroyed. - w.set.Remove(i.id) + d := w.target + ws := d.Watches() + // Watchable dentries should never return a nil watch set. + if ws == nil { + panic("Cannot remove watch from an unwatchable dentry") + } + ws.Remove(i.id) + if ws.Size() == 0 { + ds = append(ds, d) + } + } + i.mu.Unlock() + + for _, d := range ds { + d.OnZeroWatches() } } @@ -272,20 +283,19 @@ func (i *Inotify) queueEvent(ev *Event) { // newWatchLocked creates and adds a new watch to target. // -// Precondition: i.mu must be locked. -func (i *Inotify) newWatchLocked(target *Dentry, mask uint32) *Watch { - targetWatches := target.Watches() +// Precondition: i.mu must be locked. ws must be the watch set for target d. +func (i *Inotify) newWatchLocked(d *Dentry, ws *Watches, mask uint32) *Watch { w := &Watch{ - owner: i, - wd: i.nextWatchIDLocked(), - set: targetWatches, - mask: mask, + owner: i, + wd: i.nextWatchIDLocked(), + target: d, + mask: mask, } // Hold the watch in this inotify instance as well as the watch set on the // target. i.watches[w.wd] = w - targetWatches.Add(w) + ws.Add(w) return w } @@ -312,7 +322,9 @@ func (i *Inotify) handleDeletion(w *Watch) { // AddWatch constructs a new inotify watch and adds it to the target. It // returns the watch descriptor returned by inotify_add_watch(2). -func (i *Inotify) AddWatch(target *Dentry, mask uint32) int32 { +// +// The caller must hold a reference on target. +func (i *Inotify) AddWatch(target *Dentry, mask uint32) (int32, error) { // Note: Locking this inotify instance protects the result returned by // Lookup() below. With the lock held, we know for sure the lookup result // won't become stale because it's impossible for *this* instance to @@ -320,8 +332,14 @@ func (i *Inotify) AddWatch(target *Dentry, mask uint32) int32 { i.mu.Lock() defer i.mu.Unlock() + ws := target.Watches() + if ws == nil { + // While Linux supports inotify watches on all filesystem types, watches on + // filesystems like kernfs are not generally useful, so we do not. + return 0, syserror.EPERM + } // Does the target already have a watch from this inotify instance? - if existing := target.Watches().Lookup(i.id); existing != nil { + if existing := ws.Lookup(i.id); existing != nil { newmask := mask if mask&linux.IN_MASK_ADD != 0 { // "Add (OR) events to watch mask for this pathname if it already @@ -329,12 +347,12 @@ func (i *Inotify) AddWatch(target *Dentry, mask uint32) int32 { newmask |= atomic.LoadUint32(&existing.mask) } atomic.StoreUint32(&existing.mask, newmask) - return existing.wd + return existing.wd, nil } // No existing watch, create a new watch. - w := i.newWatchLocked(target, mask) - return w.wd + w := i.newWatchLocked(target, ws, mask) + return w.wd, nil } // RmWatch looks up an inotify watch for the given 'wd' and configures the @@ -353,9 +371,19 @@ func (i *Inotify) RmWatch(wd int32) error { delete(i.watches, wd) // Remove the watch from the watch target. - w.set.Remove(w.OwnerID()) + ws := w.target.Watches() + // AddWatch ensures that w.target has a non-nil watch set. + if ws == nil { + panic("Watched dentry cannot have nil watch set") + } + ws.Remove(w.OwnerID()) + remaining := ws.Size() i.mu.Unlock() + if remaining == 0 { + w.target.OnZeroWatches() + } + // Generate the event for the removal. i.queueEvent(newEvent(wd, "", linux.IN_IGNORED, 0)) @@ -374,6 +402,13 @@ type Watches struct { ws map[uint64]*Watch } +// Size returns the number of watches held by w. +func (w *Watches) Size() int { + w.mu.Lock() + defer w.mu.Unlock() + return len(w.ws) +} + // Lookup returns the watch owned by an inotify instance with the given id. // Returns nil if no such watch exists. // @@ -459,10 +494,6 @@ func (w *Watches) NotifyWithExclusions(name string, events, cookie uint32, et Ev func (w *Watches) HandleDeletion() { w.Notify("", linux.IN_DELETE_SELF, 0, InodeEvent) - // TODO(gvisor.dev/issue/1479): This doesn't work because maps are not copied - // by value. Ideally, we wouldn't have this circular locking so we can just - // notify of IN_DELETE_SELF in the same loop below. - // // We can't hold w.mu while calling watch.handleDeletion to preserve lock // ordering w.r.t to the owner inotify instances. Instead, atomically move // the watches map into a local variable so we can iterate over it safely. @@ -495,9 +526,8 @@ type Watch struct { // Descriptor for this watch. This is unique across an inotify instance. wd int32 - // set is the watch set containing this watch. It belongs to the target file - // of this watch. - set *Watches + // target is a dentry representing the watch target. Its watch set contains this watch. + target *Dentry // Events being monitored via this watch. Must be accessed with atomic // memory operations. @@ -606,7 +636,7 @@ func (e *Event) setName(name string) { func (e *Event) sizeOf() int { s := inotifyEventBaseSize + int(e.len) if s < inotifyEventBaseSize { - panic("overflow") + panic("Overflowed event size") } return s } @@ -676,11 +706,15 @@ func InotifyEventFromStatMask(mask uint32) uint32 { } // InotifyRemoveChild sends the appriopriate notifications to the watch sets of -// the child being removed and its parent. +// the child being removed and its parent. Note that unlike most pairs of +// parent/child notifications, the child is notified first in this case. func InotifyRemoveChild(self, parent *Watches, name string) { - self.Notify("", linux.IN_ATTRIB, 0, InodeEvent) - parent.Notify(name, linux.IN_DELETE, 0, InodeEvent) - // TODO(gvisor.dev/issue/1479): implement IN_EXCL_UNLINK. + if self != nil { + self.Notify("", linux.IN_ATTRIB, 0, InodeEvent) + } + if parent != nil { + parent.Notify(name, linux.IN_DELETE, 0, InodeEvent) + } } // InotifyRename sends the appriopriate notifications to the watch sets of the @@ -691,8 +725,14 @@ func InotifyRename(ctx context.Context, renamed, oldParent, newParent *Watches, dirEv = linux.IN_ISDIR } cookie := uniqueid.InotifyCookie(ctx) - oldParent.Notify(oldName, dirEv|linux.IN_MOVED_FROM, cookie, InodeEvent) - newParent.Notify(newName, dirEv|linux.IN_MOVED_TO, cookie, InodeEvent) + if oldParent != nil { + oldParent.Notify(oldName, dirEv|linux.IN_MOVED_FROM, cookie, InodeEvent) + } + if newParent != nil { + newParent.Notify(newName, dirEv|linux.IN_MOVED_TO, cookie, InodeEvent) + } // Somewhat surprisingly, self move events do not have a cookie. - renamed.Notify("", linux.IN_MOVE_SELF, 0, InodeEvent) + if renamed != nil { + renamed.Notify("", linux.IN_MOVE_SELF, 0, InodeEvent) + } } diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go index 9acca8bc7..58c7ad778 100644 --- a/pkg/sentry/vfs/vfs.go +++ b/pkg/sentry/vfs/vfs.go @@ -24,6 +24,9 @@ // Locks acquired by FilesystemImpls between Prepare{Delete,Rename}Dentry and Commit{Delete,Rename*}Dentry // VirtualFilesystem.filesystemsMu // EpollInstance.mu +// Inotify.mu +// Watches.mu +// Inotify.evMu // VirtualFilesystem.fsTypesMu // // Locking Dentry.mu in multiple Dentries requires holding diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 078e4a284..7282d675e 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -974,6 +974,7 @@ cc_binary( "//test/util:thread_util", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/synchronization", "@com_google_absl//absl/time", ], ) diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc index 1d1a7171d..13e096e9c 100644 --- a/test/syscalls/linux/inotify.cc +++ b/test/syscalls/linux/inotify.cc @@ -29,6 +29,7 @@ #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/strings/str_join.h" +#include "absl/synchronization/mutex.h" #include "absl/time/clock.h" #include "absl/time/time.h" #include "test/util/epoll_util.h" @@ -1305,7 +1306,7 @@ TEST(Inotify, UtimesGeneratesAttribEvent) { const int wd = ASSERT_NO_ERRNO_AND_VALUE( InotifyAddWatch(fd.get(), root.path(), IN_ALL_EVENTS)); - struct timeval times[2] = {{1, 0}, {2, 0}}; + const struct timeval times[2] = {{1, 0}, {2, 0}}; EXPECT_THAT(futimes(file1_fd.get(), times), SyscallSucceeds()); const std::vector events = @@ -2018,7 +2019,7 @@ TEST(Inotify, ExcludeUnlinkInodeEvents_NoRandomSave) { Event(IN_MODIFY, wd, Basename(file.path())), })); - struct timeval times[2] = {{1, 0}, {2, 0}}; + const struct timeval times[2] = {{1, 0}, {2, 0}}; ASSERT_THAT(futimes(fd.get(), times), SyscallSucceeds()); events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd, Basename(file.path()))})); @@ -2030,6 +2031,252 @@ TEST(Inotify, ExcludeUnlinkInodeEvents_NoRandomSave) { EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd, Basename(file.path()))})); } +// This test helps verify that the lock order of filesystem and inotify locks +// is respected when inotify instances and watch targets are concurrently being +// destroyed. +TEST(InotifyTest, InotifyAndTargetDestructionDoNotDeadlock_NoRandomSave) { + const DisableSave ds; // Too many syscalls. + + // A file descriptor protected by a mutex. This ensures that while a + // descriptor is in use, it cannot be closed and reused for a different file + // description. + struct atomic_fd { + int fd; + absl::Mutex mu; + }; + + // Set up initial inotify instances. + constexpr int num_fds = 3; + std::vector fds(num_fds); + for (int i = 0; i < num_fds; i++) { + int fd; + ASSERT_THAT(fd = inotify_init1(IN_NONBLOCK), SyscallSucceeds()); + fds[i].fd = fd; + } + + // Set up initial watch targets. + std::vector paths; + for (int i = 0; i < 3; i++) { + paths.push_back(NewTempAbsPath()); + ASSERT_THAT(mknod(paths[i].c_str(), S_IFREG | 0600, 0), SyscallSucceeds()); + } + + constexpr absl::Duration runtime = absl::Seconds(4); + + // Constantly replace each inotify instance with a new one. + auto replace_fds = [&] { + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + for (auto& afd : fds) { + int new_fd; + ASSERT_THAT(new_fd = inotify_init1(IN_NONBLOCK), SyscallSucceeds()); + absl::MutexLock l(&afd.mu); + ASSERT_THAT(close(afd.fd), SyscallSucceeds()); + afd.fd = new_fd; + for (auto& p : paths) { + // inotify_add_watch may fail if the file at p was deleted. + ASSERT_THAT(inotify_add_watch(afd.fd, p.c_str(), IN_ALL_EVENTS), + AnyOf(SyscallSucceeds(), SyscallFailsWithErrno(ENOENT))); + } + } + sched_yield(); + } + }; + + std::list ts; + for (int i = 0; i < 3; i++) { + ts.emplace_back(replace_fds); + } + + // Constantly replace each watch target with a new one. + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + for (auto& p : paths) { + ASSERT_THAT(unlink(p.c_str()), SyscallSucceeds()); + ASSERT_THAT(mknod(p.c_str(), S_IFREG | 0600, 0), SyscallSucceeds()); + } + sched_yield(); + } +} + +// This test helps verify that the lock order of filesystem and inotify locks +// is respected when adding/removing watches occurs concurrently with the +// removal of their targets. +TEST(InotifyTest, AddRemoveUnlinkDoNotDeadlock_NoRandomSave) { + const DisableSave ds; // Too many syscalls. + + // Set up inotify instances. + constexpr int num_fds = 3; + std::vector fds(num_fds); + for (int i = 0; i < num_fds; i++) { + ASSERT_THAT(fds[i] = inotify_init1(IN_NONBLOCK), SyscallSucceeds()); + } + + // Set up initial watch targets. + std::vector paths; + for (int i = 0; i < 3; i++) { + paths.push_back(NewTempAbsPath()); + ASSERT_THAT(mknod(paths[i].c_str(), S_IFREG | 0600, 0), SyscallSucceeds()); + } + + constexpr absl::Duration runtime = absl::Seconds(1); + + // Constantly add/remove watches for each inotify instance/watch target pair. + auto add_remove_watches = [&] { + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + for (int fd : fds) { + for (auto& p : paths) { + // Do not assert on inotify_add_watch and inotify_rm_watch. They may + // fail if the file at p was deleted. inotify_add_watch may also fail + // if another thread beat us to adding a watch. + const int wd = inotify_add_watch(fd, p.c_str(), IN_ALL_EVENTS); + if (wd > 0) { + inotify_rm_watch(fd, wd); + } + } + } + sched_yield(); + } + }; + + std::list ts; + for (int i = 0; i < 15; i++) { + ts.emplace_back(add_remove_watches); + } + + // Constantly replace each watch target with a new one. + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + for (auto& p : paths) { + ASSERT_THAT(unlink(p.c_str()), SyscallSucceeds()); + ASSERT_THAT(mknod(p.c_str(), S_IFREG | 0600, 0), SyscallSucceeds()); + } + sched_yield(); + } +} + +// This test helps verify that the lock order of filesystem and inotify locks +// is respected when many inotify events and filesystem operations occur +// simultaneously. +TEST(InotifyTest, NotifyNoDeadlock_NoRandomSave) { + const DisableSave ds; // Too many syscalls. + + const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const std::string dir = parent.path(); + + // mu protects file, which will change on rename. + absl::Mutex mu; + std::string file = NewTempAbsPathInDir(dir); + ASSERT_THAT(mknod(file.c_str(), 0644 | S_IFREG, 0), SyscallSucceeds()); + + const absl::Duration runtime = absl::Milliseconds(300); + + // Add/remove watches on dir and file. + ScopedThread add_remove_watches([&] { + const FileDescriptor ifd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + int dir_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(ifd.get(), dir, IN_ALL_EVENTS)); + int file_wd; + { + absl::ReaderMutexLock l(&mu); + file_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(ifd.get(), file, IN_ALL_EVENTS)); + } + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + ASSERT_THAT(inotify_rm_watch(ifd.get(), file_wd), SyscallSucceeds()); + ASSERT_THAT(inotify_rm_watch(ifd.get(), dir_wd), SyscallSucceeds()); + dir_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(ifd.get(), dir, IN_ALL_EVENTS)); + { + absl::ReaderMutexLock l(&mu); + file_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(ifd.get(), file, IN_ALL_EVENTS)); + } + sched_yield(); + } + }); + + // Modify attributes on dir and file. + ScopedThread stats([&] { + int fd, dir_fd; + { + absl::ReaderMutexLock l(&mu); + ASSERT_THAT(fd = open(file.c_str(), O_RDONLY), SyscallSucceeds()); + } + ASSERT_THAT(dir_fd = open(dir.c_str(), O_RDONLY | O_DIRECTORY), + SyscallSucceeds()); + const struct timeval times[2] = {{1, 0}, {2, 0}}; + + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + { + absl::ReaderMutexLock l(&mu); + EXPECT_THAT(utimes(file.c_str(), times), SyscallSucceeds()); + } + EXPECT_THAT(futimes(fd, times), SyscallSucceeds()); + EXPECT_THAT(utimes(dir.c_str(), times), SyscallSucceeds()); + EXPECT_THAT(futimes(dir_fd, times), SyscallSucceeds()); + sched_yield(); + } + }); + + // Modify extended attributes on dir and file. + ScopedThread xattrs([&] { + // TODO(gvisor.dev/issue/1636): Support extended attributes in runsc gofer. + if (!IsRunningOnGvisor()) { + int fd; + { + absl::ReaderMutexLock l(&mu); + ASSERT_THAT(fd = open(file.c_str(), O_RDONLY), SyscallSucceeds()); + } + + const char* name = "user.test"; + int val = 123; + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + { + absl::ReaderMutexLock l(&mu); + ASSERT_THAT( + setxattr(file.c_str(), name, &val, sizeof(val), /*flags=*/0), + SyscallSucceeds()); + ASSERT_THAT(removexattr(file.c_str(), name), SyscallSucceeds()); + } + + ASSERT_THAT(fsetxattr(fd, name, &val, sizeof(val), /*flags=*/0), + SyscallSucceeds()); + ASSERT_THAT(fremovexattr(fd, name), SyscallSucceeds()); + sched_yield(); + } + } + }); + + // Read and write file's contents. Read and write dir's entries. + ScopedThread read_write([&] { + int fd; + { + absl::ReaderMutexLock l(&mu); + ASSERT_THAT(fd = open(file.c_str(), O_RDWR), SyscallSucceeds()); + } + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + int val = 123; + ASSERT_THAT(write(fd, &val, sizeof(val)), SyscallSucceeds()); + ASSERT_THAT(read(fd, &val, sizeof(val)), SyscallSucceeds()); + TempPath new_file = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir)); + ASSERT_NO_ERRNO(ListDir(dir, false)); + new_file.reset(); + sched_yield(); + } + }); + + // Rename file. + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + const std::string new_path = NewTempAbsPathInDir(dir); + { + absl::WriterMutexLock l(&mu); + ASSERT_THAT(rename(file.c_str(), new_path.c_str()), SyscallSucceeds()); + file = new_path; + } + sched_yield(); + } +} + } // namespace } // namespace testing } // namespace gvisor -- cgit v1.2.3 From acf519a77b480e8d974186568bd66eaa89bac024 Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Tue, 23 Jun 2020 17:43:02 -0700 Subject: Nit fix: Create and use a std::string object for `const char*`. PiperOrigin-RevId: 317973144 --- test/syscalls/linux/proc.cc | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index da8f30f32..e77e9820e 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -1982,24 +1982,26 @@ void CheckDuplicatesRecursively(std::string path) { break; // We're done. } - if (strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..") == 0) { + const std::string name = dp->d_name; + + if (name == "." || name == "..") { continue; } // Ignore a duplicate entry if it isn't the last attempt. if (i == max_attempts - 1) { - ASSERT_EQ(children.find(std::string(dp->d_name)), children.end()) - << absl::StrCat(path, "/", dp->d_name); - } else if (children.find(std::string(dp->d_name)) != children.end()) { + ASSERT_EQ(children.find(name), children.end()) + << absl::StrCat(path, "/", name); + } else if (children.find(name) != children.end()) { std::cerr << "Duplicate entry: " << i << ":" - << absl::StrCat(path, "/", dp->d_name) << std::endl; + << absl::StrCat(path, "/", name) << std::endl; success = false; break; } - children.insert(std::string(dp->d_name)); + children.insert(name); if (dp->d_type == DT_DIR) { - child_dirs.push_back(std::string(dp->d_name)); + child_dirs.push_back(name); } } if (success) { -- cgit v1.2.3 From 65a587dedf1a30b3614a66532d2b448026b9c540 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Tue, 23 Jun 2020 18:31:53 -0700 Subject: Complete inotify IN_EXCL_UNLINK implementation in VFS2. Events were only skipped on parent directories after their children were unlinked; events on the unlinked file itself need to be skipped as well. As a result, all Watches.Notify() calls need to know whether the dentry where the call came from was unlinked. Updates #1479. PiperOrigin-RevId: 317979476 --- pkg/sentry/fsimpl/gofer/filesystem.go | 8 ++-- pkg/sentry/fsimpl/gofer/gofer.go | 4 +- pkg/sentry/fsimpl/tmpfs/filesystem.go | 8 ++-- pkg/sentry/fsimpl/tmpfs/tmpfs.go | 10 +++-- pkg/sentry/vfs/inotify.go | 39 +++++++--------- test/syscalls/linux/inotify.cc | 84 ++++++++++++++++++++++++++--------- 6 files changed, 95 insertions(+), 58 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 0321fd384..d253c996c 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -375,7 +375,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir if dir { ev |= linux.IN_ISDIR } - parent.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent) + parent.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */) return nil } if fs.opts.interop == InteropModeShared { @@ -409,7 +409,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir if dir { ev |= linux.IN_ISDIR } - parent.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent) + parent.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */) return nil } @@ -543,7 +543,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b // Generate inotify events for rmdir or unlink. if dir { - parent.watches.Notify(name, linux.IN_DELETE|linux.IN_ISDIR, 0, vfs.InodeEvent) + parent.watches.Notify(name, linux.IN_DELETE|linux.IN_ISDIR, 0, vfs.InodeEvent, true /* unlinked */) } else { var cw *vfs.Watches if child != nil { @@ -1040,7 +1040,7 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving } childVFSFD = &fd.vfsfd } - d.watches.Notify(name, linux.IN_CREATE, 0, vfs.PathEvent) + d.watches.Notify(name, linux.IN_CREATE, 0, vfs.PathEvent, false /* unlinked */) return childVFSFD, nil } diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index c6c284ff3..b0b6d8c64 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -1066,9 +1066,9 @@ func (d *dentry) InotifyWithParent(events, cookie uint32, et vfs.EventType) { d.fs.renameMu.RLock() // The ordering below is important, Linux always notifies the parent first. if d.parent != nil { - d.parent.watches.NotifyWithExclusions(d.name, events, cookie, et, d.isDeleted()) + d.parent.watches.Notify(d.name, events, cookie, et, d.isDeleted()) } - d.watches.Notify("", events, cookie, et) + d.watches.Notify("", events, cookie, et, d.isDeleted()) d.fs.renameMu.RUnlock() } diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 85d8e37b2..f766b7ce2 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -182,7 +182,7 @@ func (fs *filesystem) doCreateAt(rp *vfs.ResolvingPath, dir bool, create func(pa if dir { ev |= linux.IN_ISDIR } - parentDir.inode.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent) + parentDir.inode.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */) parentDir.inode.touchCMtime() return nil } @@ -251,7 +251,7 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. return syserror.EMLINK } i.incLinksLocked() - i.watches.Notify("", linux.IN_ATTRIB, 0, vfs.InodeEvent) + i.watches.Notify("", linux.IN_ATTRIB, 0, vfs.InodeEvent, false /* unlinked */) parentDir.insertChildLocked(fs.newDentry(i), name) return nil }) @@ -368,7 +368,7 @@ afterTrailingSymlink: if err != nil { return nil, err } - parentDir.inode.watches.Notify(name, linux.IN_CREATE, 0, vfs.PathEvent) + parentDir.inode.watches.Notify(name, linux.IN_CREATE, 0, vfs.PathEvent, false /* unlinked */) parentDir.inode.touchCMtime() return fd, nil } @@ -625,7 +625,7 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error return err } parentDir.removeChildLocked(child) - parentDir.inode.watches.Notify(name, linux.IN_DELETE|linux.IN_ISDIR, 0, vfs.InodeEvent) + parentDir.inode.watches.Notify(name, linux.IN_DELETE|linux.IN_ISDIR, 0, vfs.InodeEvent, true /* unlinked */) // Remove links for child, child/., and child/.. child.inode.decLinksLocked() child.inode.decLinksLocked() diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index a85bfc968..d7f4f0779 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -259,14 +259,16 @@ func (d *dentry) InotifyWithParent(events, cookie uint32, et vfs.EventType) { events |= linux.IN_ISDIR } + // tmpfs never calls VFS.InvalidateDentry(), so d.vfsd.IsDead() indicates + // that d was deleted. + deleted := d.vfsd.IsDead() + d.inode.fs.mu.RLock() // The ordering below is important, Linux always notifies the parent first. if d.parent != nil { - // tmpfs never calls VFS.InvalidateDentry(), so d.vfsd.IsDead() indicates - // that d was deleted. - d.parent.inode.watches.NotifyWithExclusions(d.name, events, cookie, et, d.vfsd.IsDead()) + d.parent.inode.watches.Notify(d.name, events, cookie, et, deleted) } - d.inode.watches.Notify("", events, cookie, et) + d.inode.watches.Notify("", events, cookie, et, deleted) d.inode.fs.mu.RUnlock() } diff --git a/pkg/sentry/vfs/inotify.go b/pkg/sentry/vfs/inotify.go index 36e7a3767..6043a0566 100644 --- a/pkg/sentry/vfs/inotify.go +++ b/pkg/sentry/vfs/inotify.go @@ -467,21 +467,16 @@ func (w *Watches) Remove(id uint64) { delete(w.ws, id) } -// Notify queues a new event with all watches in this set. -func (w *Watches) Notify(name string, events, cookie uint32, et EventType) { - w.NotifyWithExclusions(name, events, cookie, et, false) -} - -// NotifyWithExclusions queues a new event with watches in this set. Watches -// with IN_EXCL_UNLINK are skipped if the event is coming from a child that -// has been unlinked. -func (w *Watches) NotifyWithExclusions(name string, events, cookie uint32, et EventType, unlinked bool) { +// Notify queues a new event with watches in this set. Watches with +// IN_EXCL_UNLINK are skipped if the event is coming from a child that has been +// unlinked. +func (w *Watches) Notify(name string, events, cookie uint32, et EventType, unlinked bool) { // N.B. We don't defer the unlocks because Notify is in the hot path of // all IO operations, and the defer costs too much for small IO // operations. w.mu.RLock() for _, watch := range w.ws { - if unlinked && watch.ExcludeUnlinkedChildren() && et == PathEvent { + if unlinked && watch.ExcludeUnlinked() && et == PathEvent { continue } watch.Notify(name, events, cookie) @@ -492,7 +487,7 @@ func (w *Watches) NotifyWithExclusions(name string, events, cookie uint32, et Ev // HandleDeletion is called when the watch target is destroyed to emit // the appropriate events. func (w *Watches) HandleDeletion() { - w.Notify("", linux.IN_DELETE_SELF, 0, InodeEvent) + w.Notify("", linux.IN_DELETE_SELF, 0, InodeEvent, true /* unlinked */) // We can't hold w.mu while calling watch.handleDeletion to preserve lock // ordering w.r.t to the owner inotify instances. Instead, atomically move @@ -539,14 +534,12 @@ func (w *Watch) OwnerID() uint64 { return w.owner.id } -// ExcludeUnlinkedChildren indicates whether the watched object should continue -// to be notified of events of its children after they have been unlinked, e.g. -// for an open file descriptor. +// ExcludeUnlinked indicates whether the watched object should continue to be +// notified of events originating from a path that has been unlinked. // -// TODO(gvisor.dev/issue/1479): Implement IN_EXCL_UNLINK. -// We can do this by keeping track of the set of unlinked children in Watches -// to skip notification. -func (w *Watch) ExcludeUnlinkedChildren() bool { +// For example, if "foo/bar" is opened and then unlinked, operations on the +// open fd may be ignored by watches on "foo" and "foo/bar" with IN_EXCL_UNLINK. +func (w *Watch) ExcludeUnlinked() bool { return atomic.LoadUint32(&w.mask)&linux.IN_EXCL_UNLINK != 0 } @@ -710,10 +703,10 @@ func InotifyEventFromStatMask(mask uint32) uint32 { // parent/child notifications, the child is notified first in this case. func InotifyRemoveChild(self, parent *Watches, name string) { if self != nil { - self.Notify("", linux.IN_ATTRIB, 0, InodeEvent) + self.Notify("", linux.IN_ATTRIB, 0, InodeEvent, true /* unlinked */) } if parent != nil { - parent.Notify(name, linux.IN_DELETE, 0, InodeEvent) + parent.Notify(name, linux.IN_DELETE, 0, InodeEvent, true /* unlinked */) } } @@ -726,13 +719,13 @@ func InotifyRename(ctx context.Context, renamed, oldParent, newParent *Watches, } cookie := uniqueid.InotifyCookie(ctx) if oldParent != nil { - oldParent.Notify(oldName, dirEv|linux.IN_MOVED_FROM, cookie, InodeEvent) + oldParent.Notify(oldName, dirEv|linux.IN_MOVED_FROM, cookie, InodeEvent, false /* unlinked */) } if newParent != nil { - newParent.Notify(newName, dirEv|linux.IN_MOVED_TO, cookie, InodeEvent) + newParent.Notify(newName, dirEv|linux.IN_MOVED_TO, cookie, InodeEvent, false /* unlinked */) } // Somewhat surprisingly, self move events do not have a cookie. if renamed != nil { - renamed.Notify("", linux.IN_MOVE_SELF, 0, InodeEvent) + renamed.Notify("", linux.IN_MOVE_SELF, 0, InodeEvent, false /* unlinked */) } } diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc index 13e096e9c..731c7046c 100644 --- a/test/syscalls/linux/inotify.cc +++ b/test/syscalls/linux/inotify.cc @@ -1840,9 +1840,7 @@ TEST(Inotify, IncludeUnlinkedFile_NoRandomSave) { const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); const TempPath file = ASSERT_NO_ERRNO_AND_VALUE( TempPath::CreateFileWith(dir.path(), "123", TempPath::kDefaultFileMode)); - - const FileDescriptor fd = - ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); const FileDescriptor inotify_fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); @@ -1855,7 +1853,7 @@ TEST(Inotify, IncludeUnlinkedFile_NoRandomSave) { int val = 0; ASSERT_THAT(read(fd.get(), &val, sizeof(val)), SyscallSucceeds()); ASSERT_THAT(write(fd.get(), &val, sizeof(val)), SyscallSucceeds()); - const std::vector events = + std::vector events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); EXPECT_THAT(events, Are({ Event(IN_ATTRIB, file_wd), @@ -1865,6 +1863,15 @@ TEST(Inotify, IncludeUnlinkedFile_NoRandomSave) { Event(IN_MODIFY, dir_wd, Basename(file.path())), Event(IN_MODIFY, file_wd), })); + + fd.reset(); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({ + Event(IN_CLOSE_WRITE, dir_wd, Basename(file.path())), + Event(IN_CLOSE_WRITE, file_wd), + Event(IN_DELETE_SELF, file_wd), + Event(IN_IGNORED, file_wd), + })); } // Watches created with IN_EXCL_UNLINK will stop emitting events on fds for @@ -1881,13 +1888,14 @@ TEST(Inotify, ExcludeUnlink_NoRandomSave) { const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path())); - const FileDescriptor fd = - ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); const FileDescriptor inotify_fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); - const int wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch( + const int dir_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch( inotify_fd.get(), dir.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK)); + const int file_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch( + inotify_fd.get(), file.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK)); // Unlink the child, which should cause further operations on the open file // descriptor to be ignored. @@ -1895,14 +1903,28 @@ TEST(Inotify, ExcludeUnlink_NoRandomSave) { int val = 0; ASSERT_THAT(write(fd.get(), &val, sizeof(val)), SyscallSucceeds()); ASSERT_THAT(read(fd.get(), &val, sizeof(val)), SyscallSucceeds()); - const std::vector events = + std::vector events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); - EXPECT_THAT(events, Are({Event(IN_DELETE, wd, Basename(file.path()))})); + EXPECT_THAT(events, Are({ + Event(IN_ATTRIB, file_wd), + Event(IN_DELETE, dir_wd, Basename(file.path())), + })); + + fd.reset(); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + ASSERT_THAT(events, Are({ + Event(IN_DELETE_SELF, file_wd), + Event(IN_IGNORED, file_wd), + })); } // We need to disable S/R because there are filesystems where we cannot re-open // fds to an unlinked file across S/R, e.g. gofer-backed filesytems. TEST(Inotify, ExcludeUnlinkDirectory_NoRandomSave) { + // TODO(gvisor.dev/issue/1624): This test fails on VFS1. Remove once VFS1 is + // deleted. + SKIP_IF(IsRunningWithVFS1()); + const DisableSave ds; const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); @@ -1912,20 +1934,29 @@ TEST(Inotify, ExcludeUnlinkDirectory_NoRandomSave) { const FileDescriptor inotify_fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); - const FileDescriptor fd = + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(dirPath.c_str(), O_RDONLY | O_DIRECTORY)); - const int wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch( + const int parent_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch( inotify_fd.get(), parent.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK)); + const int self_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch( + inotify_fd.get(), dir.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK)); // Unlink the dir, and then close the open fd. ASSERT_THAT(rmdir(dirPath.c_str()), SyscallSucceeds()); dir.reset(); - const std::vector events = + std::vector events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); // No close event should appear. ASSERT_THAT(events, - Are({Event(IN_DELETE | IN_ISDIR, wd, Basename(dirPath))})); + Are({Event(IN_DELETE | IN_ISDIR, parent_wd, Basename(dirPath))})); + + fd.reset(); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + ASSERT_THAT(events, Are({ + Event(IN_DELETE_SELF, self_wd), + Event(IN_IGNORED, self_wd), + })); } // If "dir/child" and "dir/child2" are links to the same file, and "dir/child" @@ -1989,10 +2020,6 @@ TEST(Inotify, ExcludeUnlinkInodeEvents_NoRandomSave) { const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path().c_str(), O_RDWR)); - const FileDescriptor inotify_fd = - ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); - const int wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch( - inotify_fd.get(), dir.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK)); // NOTE(b/157163751): Create another link before unlinking. This is needed for // the gofer filesystem in gVisor, where open fds will not work once the link @@ -2006,6 +2033,13 @@ TEST(Inotify, ExcludeUnlinkInodeEvents_NoRandomSave) { ASSERT_THAT(rc, SyscallSucceeds()); } + const FileDescriptor inotify_fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int dir_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch( + inotify_fd.get(), dir.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK)); + const int file_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch( + inotify_fd.get(), file.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK)); + // Even after unlinking, inode-level operations will trigger events regardless // of IN_EXCL_UNLINK. ASSERT_THAT(unlink(file.path().c_str()), SyscallSucceeds()); @@ -2015,20 +2049,28 @@ TEST(Inotify, ExcludeUnlinkInodeEvents_NoRandomSave) { std::vector events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); EXPECT_THAT(events, Are({ - Event(IN_DELETE, wd, Basename(file.path())), - Event(IN_MODIFY, wd, Basename(file.path())), + Event(IN_ATTRIB, file_wd), + Event(IN_DELETE, dir_wd, Basename(file.path())), + Event(IN_MODIFY, dir_wd, Basename(file.path())), + Event(IN_MODIFY, file_wd), })); const struct timeval times[2] = {{1, 0}, {2, 0}}; ASSERT_THAT(futimes(fd.get(), times), SyscallSucceeds()); events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); - EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd, Basename(file.path()))})); + EXPECT_THAT(events, Are({ + Event(IN_ATTRIB, dir_wd, Basename(file.path())), + Event(IN_ATTRIB, file_wd), + })); // S/R is disabled on this entire test due to behavior with unlink; it must // also be disabled after this point because of fchmod. ASSERT_THAT(fchmod(fd.get(), 0777), SyscallSucceeds()); events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); - EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd, Basename(file.path()))})); + EXPECT_THAT(events, Are({ + Event(IN_ATTRIB, dir_wd, Basename(file.path())), + Event(IN_ATTRIB, file_wd), + })); } // This test helps verify that the lock order of filesystem and inotify locks -- cgit v1.2.3 From 0f328beb0d830a6e3b88b9f2cc1c9cb48f55c752 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Tue, 23 Jun 2020 18:47:22 -0700 Subject: Port /dev/tty device to VFS2. Support is limited to the functionality that exists in VFS1. Updates #2923 #1035 PiperOrigin-RevId: 317981417 --- pkg/sentry/devices/ttydev/BUILD | 16 +++++++ pkg/sentry/devices/ttydev/ttydev.go | 91 +++++++++++++++++++++++++++++++++++++ runsc/boot/BUILD | 1 + runsc/boot/vfs.go | 7 +++ test/syscalls/BUILD | 1 + 5 files changed, 116 insertions(+) create mode 100644 pkg/sentry/devices/ttydev/BUILD create mode 100644 pkg/sentry/devices/ttydev/ttydev.go (limited to 'test') diff --git a/pkg/sentry/devices/ttydev/BUILD b/pkg/sentry/devices/ttydev/BUILD new file mode 100644 index 000000000..12e49b58a --- /dev/null +++ b/pkg/sentry/devices/ttydev/BUILD @@ -0,0 +1,16 @@ +load("//tools:defs.bzl", "go_library") + +licenses(["notice"]) + +go_library( + name = "ttydev", + srcs = ["ttydev.go"], + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/abi/linux", + "//pkg/context", + "//pkg/sentry/fsimpl/devtmpfs", + "//pkg/sentry/vfs", + "//pkg/usermem", + ], +) diff --git a/pkg/sentry/devices/ttydev/ttydev.go b/pkg/sentry/devices/ttydev/ttydev.go new file mode 100644 index 000000000..fbb7fd92c --- /dev/null +++ b/pkg/sentry/devices/ttydev/ttydev.go @@ -0,0 +1,91 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package ttydev implements devices for /dev/tty and (eventually) +// /dev/console. +// +// TODO(b/159623826): Support /dev/console. +package ttydev + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/usermem" +) + +const ( + // See drivers/tty/tty_io.c:tty_init(). + ttyDevMinor = 0 + consoleDevMinor = 1 +) + +// ttyDevice implements vfs.Device for /dev/tty. +type ttyDevice struct{} + +// Open implements vfs.Device.Open. +func (ttyDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + fd := &ttyFD{} + if err := fd.vfsfd.Init(fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{ + UseDentryMetadata: true, + }); err != nil { + return nil, err + } + return &fd.vfsfd, nil +} + +// ttyFD implements vfs.FileDescriptionImpl for /dev/tty. +type ttyFD struct { + vfsfd vfs.FileDescription + vfs.FileDescriptionDefaultImpl + vfs.DentryMetadataFileDescriptionImpl + vfs.NoLockFD +} + +// Release implements vfs.FileDescriptionImpl.Release. +func (fd *ttyFD) Release() {} + +// PRead implements vfs.FileDescriptionImpl.PRead. +func (fd *ttyFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { + return 0, nil +} + +// Read implements vfs.FileDescriptionImpl.Read. +func (fd *ttyFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { + return 0, nil +} + +// PWrite implements vfs.FileDescriptionImpl.PWrite. +func (fd *ttyFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { + return src.NumBytes(), nil +} + +// Write implements vfs.FileDescriptionImpl.Write. +func (fd *ttyFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { + return src.NumBytes(), nil +} + +// Register registers all devices implemented by this package in vfsObj. +func Register(vfsObj *vfs.VirtualFilesystem) error { + return vfsObj.RegisterDevice(vfs.CharDevice, linux.TTYAUX_MAJOR, ttyDevMinor, ttyDevice{}, &vfs.RegisterDeviceOptions{ + GroupName: "tty", + }) +} + +// CreateDevtmpfsFiles creates device special files in dev representing all +// devices implemented by this package. +func CreateDevtmpfsFiles(ctx context.Context, dev *devtmpfs.Accessor) error { + return dev.CreateDeviceFile(ctx, "tty", vfs.CharDevice, linux.TTYAUX_MAJOR, ttyDevMinor, 0666 /* mode */) +} diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 2b1e6b13e..b701c65d2 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -40,6 +40,7 @@ go_library( "//pkg/sentry/arch:registers_go_proto", "//pkg/sentry/control", "//pkg/sentry/devices/memdev", + "//pkg/sentry/devices/ttydev", "//pkg/sentry/fdimport", "//pkg/sentry/fs", "//pkg/sentry/fs/dev", diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index d1653b279..2fdddc719 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -26,6 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/devices/memdev" + "gvisor.dev/gvisor/pkg/sentry/devices/ttydev" "gvisor.dev/gvisor/pkg/sentry/fs/user" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" @@ -75,6 +76,9 @@ func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, cre if err := memdev.Register(vfsObj); err != nil { return fmt.Errorf("registering memdev: %w", err) } + if err := ttydev.Register(vfsObj); err != nil { + return fmt.Errorf("registering ttydev: %w", err) + } a, err := devtmpfs.NewAccessor(ctx, vfsObj, creds, devtmpfs.Name) if err != nil { return fmt.Errorf("creating devtmpfs accessor: %w", err) @@ -87,6 +91,9 @@ func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, cre if err := memdev.CreateDevtmpfsFiles(ctx, a); err != nil { return fmt.Errorf("creating devtmpfs files: %w", err) } + if err := ttydev.CreateDevtmpfsFiles(ctx, a); err != nil { + return fmt.Errorf("creating devtmpfs files: %w", err) + } return nil } diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 8ce5d24f4..315af9efe 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -147,6 +147,7 @@ syscall_test( syscall_test( test = "//test/syscalls/linux:dev_test", + vfs2 = "True", ) syscall_test( -- cgit v1.2.3 From 2141013dcef04de0591e36462ea997be1b6374d7 Mon Sep 17 00:00:00 2001 From: Ian Gudger Date: Tue, 23 Jun 2020 19:14:05 -0700 Subject: Add support for SO_REUSEADDR to TCP sockets/endpoints. For TCP sockets, SO_REUSEADDR relaxes the rules for binding addresses. gVisor/netstack already supported a behavior similar to SO_REUSEADDR, but did not allow disabling it. This change brings the SO_REUSEADDR behavior closer to the behavior implemented by Linux and adds a new SO_REUSEADDR disabled behavior. Like Linux, SO_REUSEADDR is now disabled by default. PiperOrigin-RevId: 317984380 --- pkg/tcpip/ports/ports.go | 206 ++++++++++++++++---- pkg/tcpip/ports/ports_test.go | 58 +++++- pkg/tcpip/stack/stack.go | 6 + pkg/tcpip/stack/transport_demuxer.go | 65 ++++++- pkg/tcpip/transport/tcp/accept.go | 140 ++++++++++---- pkg/tcpip/transport/tcp/endpoint.go | 100 +++++----- pkg/tcpip/transport/tcp/endpoint_state.go | 31 +-- pkg/tcpip/transport/tcp/tcp_test.go | 18 ++ pkg/tcpip/transport/udp/endpoint.go | 8 +- .../linux/socket_bind_to_device_sequence.cc | 12 -- test/syscalls/linux/socket_inet_loopback.cc | 210 +++++++++++++++++---- test/syscalls/linux/socket_ip_unbound.cc | 4 - 12 files changed, 633 insertions(+), 225 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/ports/ports.go b/pkg/tcpip/ports/ports.go index edc29ad27..f6d592eb5 100644 --- a/pkg/tcpip/ports/ports.go +++ b/pkg/tcpip/ports/ports.go @@ -52,6 +52,9 @@ type Flags struct { // // LoadBalanced takes precidence over MostRecent. LoadBalanced bool + + // TupleOnly represents TCP SO_REUSEADDR. + TupleOnly bool } // Bits converts the Flags to their bitset form. @@ -63,6 +66,9 @@ func (f Flags) Bits() BitFlags { if f.LoadBalanced { rf |= LoadBalancedFlag } + if f.TupleOnly { + rf |= TupleOnlyFlag + } return rf } @@ -98,6 +104,9 @@ const ( // LoadBalancedFlag represents Flags.LoadBalanced. LoadBalancedFlag + // TupleOnlyFlag represents Flags.TupleOnly. + TupleOnlyFlag + // nextFlag is the value that the next added flag will have. // // It is used to calculate FlagMask below. It is also the number of @@ -106,6 +115,10 @@ const ( // FlagMask is a bit mask for BitFlags. FlagMask = nextFlag - 1 + + // MultiBindFlagMask contains the flags that allow binding the same + // tuple multiple times. + MultiBindFlagMask = MostRecentFlag | LoadBalancedFlag ) // ToFlags converts the bitset into a Flags struct. @@ -113,6 +126,7 @@ func (f BitFlags) ToFlags() Flags { return Flags{ MostRecent: f&MostRecentFlag != 0, LoadBalanced: f&LoadBalancedFlag != 0, + TupleOnly: f&TupleOnlyFlag != 0, } } @@ -175,9 +189,54 @@ func (c FlagCounter) IntersectionRefs() BitFlags { return intersection } +type destination struct { + addr tcpip.Address + port uint16 +} + +func makeDestination(a tcpip.FullAddress) destination { + return destination{ + a.Addr, + a.Port, + } +} + +// portNode is never empty. When it has no elements, it is removed from the +// map that references it. +type portNode map[destination]FlagCounter + +// intersectionRefs calculates the intersection of flag bit values which affect +// the specified destination. +// +// If no destinations are present, all flag values are returned as there are no +// entries to limit possible flag values of a new entry. +// +// In addition to the intersection, the number of intersecting refs is +// returned. +func (p portNode) intersectionRefs(dst destination) (BitFlags, int) { + intersection := FlagMask + var count int + + for d, f := range p { + if d == dst { + intersection &= f.IntersectionRefs() + count++ + continue + } + // Wildcard destinations affect all destinations for TupleOnly. + if d.addr == anyIPAddress || dst.addr == anyIPAddress { + // Only bitwise and the TupleOnlyFlag. + intersection &= ((^TupleOnlyFlag) | f.IntersectionRefs()) + count++ + } + } + + return intersection, count +} + // deviceNode is never empty. When it has no elements, it is removed from the // map that references it. -type deviceNode map[tcpip.NICID]FlagCounter +type deviceNode map[tcpip.NICID]portNode // isAvailable checks whether binding is possible by device. If not binding to a // device, check against all FlagCounters. If binding to a specific device, check @@ -186,17 +245,15 @@ type deviceNode map[tcpip.NICID]FlagCounter // If either of the port reuse flags is enabled on any of the nodes, all nodes // sharing a port must share at least one reuse flag. This matches Linux's // behavior. -func (d deviceNode) isAvailable(flags Flags, bindToDevice tcpip.NICID) bool { +func (d deviceNode) isAvailable(flags Flags, bindToDevice tcpip.NICID, dst destination) bool { flagBits := flags.Bits() if bindToDevice == 0 { - // Trying to binding all devices. - if flagBits == 0 { - // Can't bind because the (addr,port) is already bound. - return false - } intersection := FlagMask for _, p := range d { - i := p.IntersectionRefs() + i, c := p.intersectionRefs(dst) + if c == 0 { + continue + } intersection &= i if intersection&flagBits == 0 { // Can't bind because the (addr,port) was @@ -210,16 +267,17 @@ func (d deviceNode) isAvailable(flags Flags, bindToDevice tcpip.NICID) bool { intersection := FlagMask if p, ok := d[0]; ok { - intersection = p.IntersectionRefs() - if intersection&flagBits == 0 { + var c int + intersection, c = p.intersectionRefs(dst) + if c > 0 && intersection&flagBits == 0 { return false } } if p, ok := d[bindToDevice]; ok { - i := p.IntersectionRefs() + i, c := p.intersectionRefs(dst) intersection &= i - if intersection&flagBits == 0 { + if c > 0 && intersection&flagBits == 0 { return false } } @@ -233,12 +291,12 @@ type bindAddresses map[tcpip.Address]deviceNode // isAvailable checks whether an IP address is available to bind to. If the // address is the "any" address, check all other addresses. Otherwise, just // check against the "any" address and the provided address. -func (b bindAddresses) isAvailable(addr tcpip.Address, flags Flags, bindToDevice tcpip.NICID) bool { +func (b bindAddresses) isAvailable(addr tcpip.Address, flags Flags, bindToDevice tcpip.NICID, dst destination) bool { if addr == anyIPAddress { // If binding to the "any" address then check that there are no conflicts // with all addresses. for _, d := range b { - if !d.isAvailable(flags, bindToDevice) { + if !d.isAvailable(flags, bindToDevice, dst) { return false } } @@ -247,14 +305,14 @@ func (b bindAddresses) isAvailable(addr tcpip.Address, flags Flags, bindToDevice // Check that there is no conflict with the "any" address. if d, ok := b[anyIPAddress]; ok { - if !d.isAvailable(flags, bindToDevice) { + if !d.isAvailable(flags, bindToDevice, dst) { return false } } // Check that this is no conflict with the provided address. if d, ok := b[addr]; ok { - if !d.isAvailable(flags, bindToDevice) { + if !d.isAvailable(flags, bindToDevice, dst) { return false } } @@ -320,17 +378,17 @@ func (s *PortManager) pickEphemeralPort(offset, count uint32, testPort func(p ui } // IsPortAvailable tests if the given port is available on all given protocols. -func (s *PortManager) IsPortAvailable(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID) bool { +func (s *PortManager) IsPortAvailable(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID, dest tcpip.FullAddress) bool { s.mu.Lock() defer s.mu.Unlock() - return s.isPortAvailableLocked(networks, transport, addr, port, flags, bindToDevice) + return s.isPortAvailableLocked(networks, transport, addr, port, flags, bindToDevice, makeDestination(dest)) } -func (s *PortManager) isPortAvailableLocked(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID) bool { +func (s *PortManager) isPortAvailableLocked(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID, dst destination) bool { for _, network := range networks { desc := portDescriptor{network, transport, port} if addrs, ok := s.allocatedPorts[desc]; ok { - if !addrs.isAvailable(addr, flags, bindToDevice) { + if !addrs.isAvailable(addr, flags, bindToDevice, dst) { return false } } @@ -342,14 +400,16 @@ func (s *PortManager) isPortAvailableLocked(networks []tcpip.NetworkProtocolNumb // reserved by another endpoint. If port is zero, ReservePort will search for // an unreserved ephemeral port and reserve it, returning its value in the // "port" return value. -func (s *PortManager) ReservePort(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID) (reservedPort uint16, err *tcpip.Error) { +func (s *PortManager) ReservePort(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID, dest tcpip.FullAddress) (reservedPort uint16, err *tcpip.Error) { s.mu.Lock() defer s.mu.Unlock() + dst := makeDestination(dest) + // If a port is specified, just try to reserve it for all network // protocols. if port != 0 { - if !s.reserveSpecificPort(networks, transport, addr, port, flags, bindToDevice) { + if !s.reserveSpecificPort(networks, transport, addr, port, flags, bindToDevice, dst) { return 0, tcpip.ErrPortInUse } return port, nil @@ -357,15 +417,16 @@ func (s *PortManager) ReservePort(networks []tcpip.NetworkProtocolNumber, transp // A port wasn't specified, so try to find one. return s.PickEphemeralPort(func(p uint16) (bool, *tcpip.Error) { - return s.reserveSpecificPort(networks, transport, addr, p, flags, bindToDevice), nil + return s.reserveSpecificPort(networks, transport, addr, p, flags, bindToDevice, dst), nil }) } // reserveSpecificPort tries to reserve the given port on all given protocols. -func (s *PortManager) reserveSpecificPort(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID) bool { - if !s.isPortAvailableLocked(networks, transport, addr, port, flags, bindToDevice) { +func (s *PortManager) reserveSpecificPort(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID, dst destination) bool { + if !s.isPortAvailableLocked(networks, transport, addr, port, flags, bindToDevice, dst) { return false } + flagBits := flags.Bits() // Reserve port on all network protocols. @@ -381,9 +442,65 @@ func (s *PortManager) reserveSpecificPort(networks []tcpip.NetworkProtocolNumber d = make(deviceNode) m[addr] = d } - n := d[bindToDevice] + p := d[bindToDevice] + if p == nil { + p = make(portNode) + } + n := p[dst] n.AddRef(flagBits) - d[bindToDevice] = n + p[dst] = n + d[bindToDevice] = p + } + + return true +} + +// ReserveTuple adds a port reservation for the tuple on all given protocol. +func (s *PortManager) ReserveTuple(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID, dest tcpip.FullAddress) bool { + flagBits := flags.Bits() + dst := makeDestination(dest) + + s.mu.Lock() + defer s.mu.Unlock() + + // It is easier to undo the entire reservation, so if we find that the + // tuple can't be fully added, finish and undo the whole thing. + undo := false + + // Reserve port on all network protocols. + for _, network := range networks { + desc := portDescriptor{network, transport, port} + m, ok := s.allocatedPorts[desc] + if !ok { + m = make(bindAddresses) + s.allocatedPorts[desc] = m + } + d, ok := m[addr] + if !ok { + d = make(deviceNode) + m[addr] = d + } + p := d[bindToDevice] + if p == nil { + p = make(portNode) + } + + n := p[dst] + if n.TotalRefs() != 0 && n.IntersectionRefs()&flagBits == 0 { + // Tuple already exists. + undo = true + } + n.AddRef(flagBits) + p[dst] = n + d[bindToDevice] = p + } + + if undo { + // releasePortLocked decrements the counts (rather than setting + // them to zero), so it will undo the incorrect incrementing + // above. + s.releasePortLocked(networks, transport, addr, port, flagBits, bindToDevice, dst) + return false } return true @@ -391,12 +508,14 @@ func (s *PortManager) reserveSpecificPort(networks []tcpip.NetworkProtocolNumber // ReleasePort releases the reservation on a port/IP combination so that it can // be reserved by other endpoints. -func (s *PortManager) ReleasePort(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID) { +func (s *PortManager) ReleasePort(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID, dest tcpip.FullAddress) { s.mu.Lock() defer s.mu.Unlock() - flagBits := flags.Bits() + s.releasePortLocked(networks, transport, addr, port, flags.Bits(), bindToDevice, makeDestination(dest)) +} +func (s *PortManager) releasePortLocked(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags BitFlags, bindToDevice tcpip.NICID, dst destination) { for _, network := range networks { desc := portDescriptor{network, transport, port} if m, ok := s.allocatedPorts[desc]; ok { @@ -404,21 +523,32 @@ func (s *PortManager) ReleasePort(networks []tcpip.NetworkProtocolNumber, transp if !ok { continue } - n, ok := d[bindToDevice] + p, ok := d[bindToDevice] if !ok { continue } - n.refs[flagBits]-- - d[bindToDevice] = n - if n.TotalRefs() == 0 { - delete(d, bindToDevice) + n, ok := p[dst] + if !ok { + continue + } + n.DropRef(flags) + if n.TotalRefs() > 0 { + p[dst] = n + continue } - if len(d) == 0 { - delete(m, addr) + delete(p, dst) + if len(p) > 0 { + continue + } + delete(d, bindToDevice) + if len(d) > 0 { + continue } - if len(m) == 0 { - delete(s.allocatedPorts, desc) + delete(m, addr) + if len(m) > 0 { + continue } + delete(s.allocatedPorts, desc) } } } diff --git a/pkg/tcpip/ports/ports_test.go b/pkg/tcpip/ports/ports_test.go index d6969d050..58db5868c 100644 --- a/pkg/tcpip/ports/ports_test.go +++ b/pkg/tcpip/ports/ports_test.go @@ -36,6 +36,7 @@ type portReserveTestAction struct { flags Flags release bool device tcpip.NICID + dest tcpip.FullAddress } func TestPortReservation(t *testing.T) { @@ -272,6 +273,54 @@ func TestPortReservation(t *testing.T) { {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true, LoadBalanced: true}, want: nil}, {port: 24, ip: fakeIPAddress, flags: Flags{MostRecent: true}, want: tcpip.ErrPortInUse}, }, + }, { + tname: "bind tuple with reuseaddr, and then wildcard with reuseaddr", + actions: []portReserveTestAction{ + {port: 24, ip: fakeIPAddress, flags: Flags{TupleOnly: true}, dest: tcpip.FullAddress{Addr: fakeIPAddress, Port: 24}, want: nil}, + {port: 24, ip: fakeIPAddress, flags: Flags{TupleOnly: true}, dest: tcpip.FullAddress{}, want: nil}, + }, + }, { + tname: "bind tuple with reuseaddr, and then wildcard", + actions: []portReserveTestAction{ + {port: 24, ip: fakeIPAddress, flags: Flags{TupleOnly: true}, dest: tcpip.FullAddress{Addr: fakeIPAddress, Port: 24}, want: nil}, + {port: 24, ip: fakeIPAddress, want: tcpip.ErrPortInUse}, + }, + }, { + tname: "bind wildcard with reuseaddr, and then tuple with reuseaddr", + actions: []portReserveTestAction{ + {port: 24, ip: fakeIPAddress, flags: Flags{TupleOnly: true}, dest: tcpip.FullAddress{}, want: nil}, + {port: 24, ip: fakeIPAddress, flags: Flags{TupleOnly: true}, dest: tcpip.FullAddress{Addr: fakeIPAddress, Port: 24}, want: nil}, + }, + }, { + tname: "bind tuple with reuseaddr, and then wildcard", + actions: []portReserveTestAction{ + {port: 24, ip: fakeIPAddress, want: nil}, + {port: 24, ip: fakeIPAddress, flags: Flags{TupleOnly: true}, dest: tcpip.FullAddress{Addr: fakeIPAddress, Port: 24}, want: tcpip.ErrPortInUse}, + }, + }, { + tname: "bind two tuples with reuseaddr", + actions: []portReserveTestAction{ + {port: 24, ip: fakeIPAddress, flags: Flags{TupleOnly: true}, dest: tcpip.FullAddress{Addr: fakeIPAddress, Port: 24}, want: nil}, + {port: 24, ip: fakeIPAddress, flags: Flags{TupleOnly: true}, dest: tcpip.FullAddress{Addr: fakeIPAddress, Port: 25}, want: nil}, + }, + }, { + tname: "bind two tuples", + actions: []portReserveTestAction{ + {port: 24, ip: fakeIPAddress, dest: tcpip.FullAddress{Addr: fakeIPAddress, Port: 24}, want: nil}, + {port: 24, ip: fakeIPAddress, dest: tcpip.FullAddress{Addr: fakeIPAddress, Port: 25}, want: nil}, + }, + }, { + tname: "bind wildcard, and then tuple with reuseaddr", + actions: []portReserveTestAction{ + {port: 24, ip: fakeIPAddress, dest: tcpip.FullAddress{}, want: nil}, + {port: 24, ip: fakeIPAddress, flags: Flags{TupleOnly: true}, dest: tcpip.FullAddress{Addr: fakeIPAddress, Port: 24}, want: tcpip.ErrPortInUse}, + }, + }, { + tname: "bind wildcard twice with reuseaddr", + actions: []portReserveTestAction{ + {port: 24, ip: anyIPAddress, flags: Flags{TupleOnly: true}, want: nil}, + {port: 24, ip: anyIPAddress, flags: Flags{TupleOnly: true}, want: nil}, + }, }, } { t.Run(test.tname, func(t *testing.T) { @@ -280,19 +329,18 @@ func TestPortReservation(t *testing.T) { for _, test := range test.actions { if test.release { - pm.ReleasePort(net, fakeTransNumber, test.ip, test.port, test.flags, test.device) + pm.ReleasePort(net, fakeTransNumber, test.ip, test.port, test.flags, test.device, test.dest) continue } - gotPort, err := pm.ReservePort(net, fakeTransNumber, test.ip, test.port, test.flags, test.device) + gotPort, err := pm.ReservePort(net, fakeTransNumber, test.ip, test.port, test.flags, test.device, test.dest) if err != test.want { - t.Fatalf("ReservePort(.., .., %s, %d, %+v, %d) = %v, want %v", test.ip, test.port, test.flags, test.device, err, test.want) + t.Fatalf("ReservePort(.., .., %s, %d, %+v, %d, %v) = %v, want %v", test.ip, test.port, test.flags, test.device, test.dest, err, test.want) } if test.port == 0 && (gotPort == 0 || gotPort < FirstEphemeral) { - t.Fatalf("ReservePort(.., .., .., 0) = %d, want port number >= %d to be picked", gotPort, FirstEphemeral) + t.Fatalf("ReservePort(.., .., .., 0, ..) = %d, want port number >= %d to be picked", gotPort, FirstEphemeral) } } }) - } } diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index 51abe32a7..e92ec0c24 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -1408,6 +1408,12 @@ func (s *Stack) RegisterTransportEndpoint(nicID tcpip.NICID, netProtos []tcpip.N return s.demux.registerEndpoint(netProtos, protocol, id, ep, flags, bindToDevice) } +// CheckRegisterTransportEndpoint checks if an endpoint can be registered with +// the stack transport dispatcher. +func (s *Stack) CheckRegisterTransportEndpoint(nicID tcpip.NICID, netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, flags ports.Flags, bindToDevice tcpip.NICID) *tcpip.Error { + return s.demux.checkEndpoint(netProtos, protocol, id, flags, bindToDevice) +} + // UnregisterTransportEndpoint removes the endpoint with the given id from the // stack transport dispatcher. func (s *Stack) UnregisterTransportEndpoint(nicID tcpip.NICID, netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) { diff --git a/pkg/tcpip/stack/transport_demuxer.go b/pkg/tcpip/stack/transport_demuxer.go index 118b449d5..b902c6ca9 100644 --- a/pkg/tcpip/stack/transport_demuxer.go +++ b/pkg/tcpip/stack/transport_demuxer.go @@ -221,6 +221,18 @@ func (epsByNIC *endpointsByNIC) registerEndpoint(d *transportDemuxer, netProto t return multiPortEp.singleRegisterEndpoint(t, flags) } +func (epsByNIC *endpointsByNIC) checkEndpoint(d *transportDemuxer, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, flags ports.Flags, bindToDevice tcpip.NICID) *tcpip.Error { + epsByNIC.mu.RLock() + defer epsByNIC.mu.RUnlock() + + multiPortEp, ok := epsByNIC.endpoints[bindToDevice] + if !ok { + return nil + } + + return multiPortEp.singleCheckEndpoint(flags) +} + // unregisterEndpoint returns true if endpointsByNIC has to be unregistered. func (epsByNIC *endpointsByNIC) unregisterEndpoint(bindToDevice tcpip.NICID, t TransportEndpoint, flags ports.Flags) bool { epsByNIC.mu.Lock() @@ -289,6 +301,17 @@ func (d *transportDemuxer) registerEndpoint(netProtos []tcpip.NetworkProtocolNum return nil } +// checkEndpoint checks if an endpoint can be registered with the dispatcher. +func (d *transportDemuxer) checkEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, flags ports.Flags, bindToDevice tcpip.NICID) *tcpip.Error { + for _, n := range netProtos { + if err := d.singleCheckEndpoint(n, protocol, id, flags, bindToDevice); err != nil { + return err + } + } + + return nil +} + // multiPortEndpoint is a container for TransportEndpoints which are bound to // the same pair of address and port. endpointsArr always has at least one // element. @@ -380,7 +403,7 @@ func (ep *multiPortEndpoint) singleRegisterEndpoint(t TransportEndpoint, flags p ep.mu.Lock() defer ep.mu.Unlock() - bits := flags.Bits() + bits := flags.Bits() & ports.MultiBindFlagMask if len(ep.endpoints) != 0 { // If it was previously bound, we need to check if we can bind again. @@ -395,6 +418,22 @@ func (ep *multiPortEndpoint) singleRegisterEndpoint(t TransportEndpoint, flags p return nil } +func (ep *multiPortEndpoint) singleCheckEndpoint(flags ports.Flags) *tcpip.Error { + ep.mu.RLock() + defer ep.mu.RUnlock() + + bits := flags.Bits() & ports.MultiBindFlagMask + + if len(ep.endpoints) != 0 { + // If it was previously bound, we need to check if we can bind again. + if ep.flags.TotalRefs() > 0 && bits&ep.flags.IntersectionRefs() == 0 { + return tcpip.ErrPortInUse + } + } + + return nil +} + // unregisterEndpoint returns true if multiPortEndpoint has to be unregistered. func (ep *multiPortEndpoint) unregisterEndpoint(t TransportEndpoint, flags ports.Flags) bool { ep.mu.Lock() @@ -406,7 +445,7 @@ func (ep *multiPortEndpoint) unregisterEndpoint(t TransportEndpoint, flags ports ep.endpoints[len(ep.endpoints)-1] = nil ep.endpoints = ep.endpoints[:len(ep.endpoints)-1] - ep.flags.DropRef(flags.Bits()) + ep.flags.DropRef(flags.Bits() & ports.MultiBindFlagMask) break } } @@ -439,6 +478,28 @@ func (d *transportDemuxer) singleRegisterEndpoint(netProto tcpip.NetworkProtocol return epsByNIC.registerEndpoint(d, netProto, protocol, ep, flags, bindToDevice) } +func (d *transportDemuxer) singleCheckEndpoint(netProto tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, flags ports.Flags, bindToDevice tcpip.NICID) *tcpip.Error { + if id.RemotePort != 0 { + // SO_REUSEPORT only applies to bound/listening endpoints. + flags.LoadBalanced = false + } + + eps, ok := d.protocol[protocolIDs{netProto, protocol}] + if !ok { + return tcpip.ErrUnknownProtocol + } + + eps.mu.RLock() + defer eps.mu.RUnlock() + + epsByNIC, ok := eps.endpoints[id] + if !ok { + return nil + } + + return epsByNIC.checkEndpoint(d, netProto, protocol, flags, bindToDevice) +} + // unregisterEndpoint unregisters the endpoint with the given id such that it // won't receive any more packets. func (d *transportDemuxer) unregisterEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) { diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go index 7679fe169..6e00e5526 100644 --- a/pkg/tcpip/transport/tcp/accept.go +++ b/pkg/tcpip/transport/tcp/accept.go @@ -27,7 +27,6 @@ import ( "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" - "gvisor.dev/gvisor/pkg/tcpip/ports" "gvisor.dev/gvisor/pkg/tcpip/seqnum" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/waiter" @@ -199,9 +198,8 @@ func (l *listenContext) isCookieValid(id stack.TransportEndpointID, cookie seqnu } // createConnectingEndpoint creates a new endpoint in a connecting state, with -// the connection parameters given by the arguments. The endpoint is returned -// with n.mu held. -func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, irs seqnum.Value, rcvdSynOpts *header.TCPSynOptions, queue *waiter.Queue) (*endpoint, *tcpip.Error) { +// the connection parameters given by the arguments. +func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, irs seqnum.Value, rcvdSynOpts *header.TCPSynOptions, queue *waiter.Queue) *endpoint { // Create a new endpoint. netProto := l.netProto if netProto == 0 { @@ -227,22 +225,7 @@ func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, i // window to grow to a really large value. n.rcvAutoParams.prevCopied = n.initialReceiveWindow() - // Lock the endpoint before registering to ensure that no out of - // band changes are possible due to incoming packets etc till - // the endpoint is done initializing. - n.mu.Lock() - - // Register new endpoint so that packets are routed to it. - if err := n.stack.RegisterTransportEndpoint(n.boundNICID, n.effectiveNetProtos, ProtocolNumber, n.ID, n, ports.Flags{LoadBalanced: n.reusePort}, n.boundBindToDevice); err != nil { - n.mu.Unlock() - n.Close() - return nil, err - } - - n.isRegistered = true - n.registeredReusePort = n.reusePort - - return n, nil + return n } // createEndpointAndPerformHandshake creates a new endpoint in connected state @@ -253,10 +236,12 @@ func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *head // Create new endpoint. irs := s.sequenceNumber isn := generateSecureISN(s.id, l.stack.Seed()) - ep, err := l.createConnectingEndpoint(s, isn, irs, opts, queue) - if err != nil { - return nil, err - } + ep := l.createConnectingEndpoint(s, isn, irs, opts, queue) + + // Lock the endpoint before registering to ensure that no out of + // band changes are possible due to incoming packets etc till + // the endpoint is done initializing. + ep.mu.Lock() ep.owner = owner // listenEP is nil when listenContext is used by tcp.Forwarder. @@ -264,18 +249,13 @@ func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *head if l.listenEP != nil { l.listenEP.mu.Lock() if l.listenEP.EndpointState() != StateListen { + l.listenEP.mu.Unlock() // Ensure we release any registrations done by the newly // created endpoint. ep.mu.Unlock() ep.Close() - // Wake up any waiters. This is strictly not required normally - // as a socket that was never accepted can't really have any - // registered waiters except when stack.Wait() is called which - // waits for all registered endpoints to stop and expects an - // EventHUp. - ep.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut) return nil, tcpip.ErrConnectionAborted } l.addPendingEndpoint(ep) @@ -284,21 +264,44 @@ func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *head // to the newly created endpoint. l.listenEP.propagateInheritableOptionsLocked(ep) + if !ep.reserveTupleLocked() { + ep.mu.Unlock() + ep.Close() + + if l.listenEP != nil { + l.removePendingEndpoint(ep) + l.listenEP.mu.Unlock() + } + + return nil, tcpip.ErrConnectionAborted + } + deferAccept = l.listenEP.deferAccept l.listenEP.mu.Unlock() } + // Register new endpoint so that packets are routed to it. + if err := ep.stack.RegisterTransportEndpoint(ep.boundNICID, ep.effectiveNetProtos, ProtocolNumber, ep.ID, ep, ep.boundPortFlags, ep.boundBindToDevice); err != nil { + ep.mu.Unlock() + ep.Close() + + if l.listenEP != nil { + l.removePendingEndpoint(ep) + } + + ep.drainClosingSegmentQueue() + + return nil, err + } + + ep.isRegistered = true + // Perform the 3-way handshake. h := newPassiveHandshake(ep, seqnum.Size(ep.initialReceiveWindow()), isn, irs, opts, deferAccept) if err := h.execute(); err != nil { ep.mu.Unlock() ep.Close() - // Wake up any waiters. This is strictly not required normally - // as a socket that was never accepted can't really have any - // registered waiters except when stack.Wait() is called which - // waits for all registered endpoints to stop and expects an - // EventHUp. - ep.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut) + ep.notifyAborted() if l.listenEP != nil { l.removePendingEndpoint(ep) @@ -374,6 +377,43 @@ func (e *endpoint) deliverAccepted(n *endpoint) { // Precondition: e.mu and n.mu must be held. func (e *endpoint) propagateInheritableOptionsLocked(n *endpoint) { n.userTimeout = e.userTimeout + n.portFlags = e.portFlags + n.boundBindToDevice = e.boundBindToDevice + n.boundPortFlags = e.boundPortFlags +} + +// reserveTupleLocked reserves an accepted endpoint's tuple. +// +// Preconditions: +// * propagateInheritableOptionsLocked has been called. +// * e.mu is held. +func (e *endpoint) reserveTupleLocked() bool { + dest := tcpip.FullAddress{Addr: e.ID.RemoteAddress, Port: e.ID.RemotePort} + if !e.stack.ReserveTuple( + e.effectiveNetProtos, + ProtocolNumber, + e.ID.LocalAddress, + e.ID.LocalPort, + e.boundPortFlags, + e.boundBindToDevice, + dest, + ) { + return false + } + + e.isPortReserved = true + e.boundDest = dest + return true +} + +// notifyAborted wakes up any waiters on registered, but not accepted +// endpoints. +// +// This is strictly not required normally as a socket that was never accepted +// can't really have any registered waiters except when stack.Wait() is called +// which waits for all registered endpoints to stop and expects an EventHUp. +func (e *endpoint) notifyAborted() { + e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut) } // handleSynSegment is called in its own goroutine once the listening endpoint @@ -568,16 +608,34 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) { rcvdSynOptions.TSEcr = s.parsedOptions.TSEcr } - n, err := ctx.createConnectingEndpoint(s, iss, irs, rcvdSynOptions, &waiter.Queue{}) - if err != nil { + n := ctx.createConnectingEndpoint(s, iss, irs, rcvdSynOptions, &waiter.Queue{}) + + n.mu.Lock() + + // Propagate any inheritable options from the listening endpoint + // to the newly created endpoint. + e.propagateInheritableOptionsLocked(n) + + if !n.reserveTupleLocked() { + n.mu.Unlock() + n.Close() + e.stack.Stats().TCP.FailedConnectionAttempts.Increment() e.stats.FailedConnectionAttempts.Increment() return } - // Propagate any inheritable options from the listening endpoint - // to the newly created endpoint. - e.propagateInheritableOptionsLocked(n) + // Register new endpoint so that packets are routed to it. + if err := n.stack.RegisterTransportEndpoint(n.boundNICID, n.effectiveNetProtos, ProtocolNumber, n.ID, n, n.boundPortFlags, n.boundBindToDevice); err != nil { + n.mu.Unlock() + n.Close() + + e.stack.Stats().TCP.FailedConnectionAttempts.Increment() + e.stats.FailedConnectionAttempts.Increment() + return + } + + n.isRegistered = true // clear the tsOffset for the newly created // endpoint as the Timestamp was already diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 10df2bcd5..1e4c2f507 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -396,7 +396,8 @@ type endpoint struct { mu sync.Mutex `state:"nosave"` ownedByUser uint32 - // state must be read/set using the EndpointState()/setEndpointState() methods. + // state must be read/set using the EndpointState()/setEndpointState() + // methods. state EndpointState `state:".(EndpointState)"` // origEndpointState is only used during a restore phase to save the @@ -405,8 +406,8 @@ type endpoint struct { origEndpointState EndpointState `state:"nosave"` isPortReserved bool `state:"manual"` - isRegistered bool - boundNICID tcpip.NICID `state:"manual"` + isRegistered bool `state:"manual"` + boundNICID tcpip.NICID route stack.Route `state:"manual"` ttl uint8 v6only bool @@ -415,10 +416,14 @@ type endpoint struct { // disabling SO_BROADCAST, albeit as a NOOP. broadcast bool + // portFlags stores the current values of port related flags. + portFlags ports.Flags + // Values used to reserve a port or register a transport endpoint // (which ever happens first). boundBindToDevice tcpip.NICID boundPortFlags ports.Flags + boundDest tcpip.FullAddress // effectiveNetProtos contains the network protocols actually in use. In // most cases it will only contain "netProto", but in cases like IPv6 @@ -426,7 +431,7 @@ type endpoint struct { // protocols (e.g., IPv6 and IPv4) or a single different protocol (e.g., // IPv4 when IPv6 endpoint is bound or connected to an IPv4 mapped // address). - effectiveNetProtos []tcpip.NetworkProtocolNumber `state:"manual"` + effectiveNetProtos []tcpip.NetworkProtocolNumber // workerRunning specifies if a worker goroutine is running. workerRunning bool @@ -462,13 +467,6 @@ type endpoint struct { // sack holds TCP SACK related information for this endpoint. sack SACKInfo - // reusePort is set to true if SO_REUSEPORT is enabled. - reusePort bool - - // registeredReusePort is set if the current endpoint registration was - // done with SO_REUSEPORT enabled. - registeredReusePort bool - // bindToDevice is set to the NIC on which to bind or disabled if 0. bindToDevice tcpip.NICID @@ -488,7 +486,6 @@ type endpoint struct { // The options below aren't implemented, but we remember the user // settings because applications expect to be able to set/query these // options. - reuseAddr bool // slowAck holds the negated state of quick ack. It is stubbed out and // does nothing. @@ -838,7 +835,6 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue rcvBufSize: DefaultReceiveBufferSize, sndBufSize: DefaultSendBufferSize, sndMTU: int(math.MaxInt32), - reuseAddr: true, keepalive: keepalive{ // Linux defaults. idle: 2 * time.Hour, @@ -1025,15 +1021,15 @@ func (e *endpoint) closeNoShutdownLocked() { // in Listen() when trying to register. if e.EndpointState() == StateListen && e.isPortReserved { if e.isRegistered { - e.stack.StartTransportEndpointCleanup(e.boundNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, ports.Flags{LoadBalanced: e.registeredReusePort}, e.boundBindToDevice) + e.stack.StartTransportEndpointCleanup(e.boundNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, e.boundPortFlags, e.boundBindToDevice) e.isRegistered = false - e.registeredReusePort = false } - e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, e.boundPortFlags, e.boundBindToDevice) + e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, e.boundPortFlags, e.boundBindToDevice, e.boundDest) e.isPortReserved = false e.boundBindToDevice = 0 e.boundPortFlags = ports.Flags{} + e.boundDest = tcpip.FullAddress{} } // Mark endpoint as closed. @@ -1091,17 +1087,17 @@ func (e *endpoint) cleanupLocked() { e.workerCleanup = false if e.isRegistered { - e.stack.StartTransportEndpointCleanup(e.boundNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, ports.Flags{LoadBalanced: e.registeredReusePort}, e.boundBindToDevice) + e.stack.StartTransportEndpointCleanup(e.boundNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, e.boundPortFlags, e.boundBindToDevice) e.isRegistered = false - e.registeredReusePort = false } if e.isPortReserved { - e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, e.boundPortFlags, e.boundBindToDevice) + e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, e.boundPortFlags, e.boundBindToDevice, e.boundDest) e.isPortReserved = false } e.boundBindToDevice = 0 e.boundPortFlags = ports.Flags{} + e.boundDest = tcpip.FullAddress{} e.route.Release() e.stack.CompleteTransportEndpointCleanup(e) @@ -1522,12 +1518,12 @@ func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error { case tcpip.ReuseAddressOption: e.LockUser() - e.reuseAddr = v + e.portFlags.TupleOnly = v e.UnlockUser() case tcpip.ReusePortOption: e.LockUser() - e.reusePort = v + e.portFlags.LoadBalanced = v e.UnlockUser() case tcpip.V6OnlyOption: @@ -1831,14 +1827,14 @@ func (e *endpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) { case tcpip.ReuseAddressOption: e.LockUser() - v := e.reuseAddr + v := e.portFlags.TupleOnly e.UnlockUser() return v, nil case tcpip.ReusePortOption: e.LockUser() - v := e.reusePort + v := e.portFlags.LoadBalanced e.UnlockUser() return v, nil @@ -2091,8 +2087,6 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc } defer r.Release() - origID := e.ID - netProtos := []tcpip.NetworkProtocolNumber{netProto} e.ID.LocalAddress = r.LocalAddress e.ID.RemoteAddress = r.RemoteAddress @@ -2100,11 +2094,10 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc if e.ID.LocalPort != 0 { // The endpoint is bound to a port, attempt to register it. - err := e.stack.RegisterTransportEndpoint(nicID, netProtos, ProtocolNumber, e.ID, e, ports.Flags{LoadBalanced: e.reusePort}, e.boundBindToDevice) + err := e.stack.RegisterTransportEndpoint(nicID, netProtos, ProtocolNumber, e.ID, e, e.boundPortFlags, e.boundBindToDevice) if err != nil { return err } - e.registeredReusePort = e.reusePort } else { // The endpoint doesn't have a local port yet, so try to get // one. Make sure that it isn't one that will result in the same @@ -2128,40 +2121,33 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc if sameAddr && p == e.ID.RemotePort { return false, nil } - // reusePort is false below because connect cannot reuse a port even if - // reusePort was set. - if !e.stack.IsPortAvailable(netProtos, ProtocolNumber, e.ID.LocalAddress, p, ports.Flags{LoadBalanced: false}, e.bindToDevice) { + if _, err := e.stack.ReservePort(netProtos, ProtocolNumber, e.ID.LocalAddress, p, e.portFlags, e.bindToDevice, addr); err != nil { return false, nil } id := e.ID id.LocalPort = p - switch e.stack.RegisterTransportEndpoint(nicID, netProtos, ProtocolNumber, id, e, ports.Flags{LoadBalanced: e.reusePort}, e.bindToDevice) { - case nil: - // Port picking successful. Save the details of - // the selected port. - e.ID = id - e.boundBindToDevice = e.bindToDevice - e.registeredReusePort = e.reusePort - return true, nil - case tcpip.ErrPortInUse: - return false, nil - default: + if err := e.stack.RegisterTransportEndpoint(nicID, netProtos, ProtocolNumber, id, e, e.portFlags, e.bindToDevice); err != nil { + e.stack.ReleasePort(netProtos, ProtocolNumber, e.ID.LocalAddress, p, e.portFlags, e.bindToDevice, addr) + if err == tcpip.ErrPortInUse { + return false, nil + } return false, err } + + // Port picking successful. Save the details of + // the selected port. + e.ID = id + e.isPortReserved = true + e.boundBindToDevice = e.bindToDevice + e.boundPortFlags = e.portFlags + e.boundDest = addr + return true, nil }); err != nil { return err } } - // Remove the port reservation. This can happen when Bind is called - // before Connect: in such a case we don't want to hold on to - // reservations anymore. - if e.isPortReserved { - e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, origID.LocalAddress, origID.LocalPort, e.boundPortFlags, e.boundBindToDevice) - e.isPortReserved = false - } - e.isRegistered = true e.setEndpointState(StateConnecting) e.route = r.Clone() @@ -2340,13 +2326,12 @@ func (e *endpoint) listen(backlog int) *tcpip.Error { } // Register the endpoint. - if err := e.stack.RegisterTransportEndpoint(e.boundNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, ports.Flags{LoadBalanced: e.reusePort}, e.boundBindToDevice); err != nil { + if err := e.stack.RegisterTransportEndpoint(e.boundNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, e.boundPortFlags, e.boundBindToDevice); err != nil { return err } e.isRegistered = true e.setEndpointState(StateListen) - e.registeredReusePort = e.reusePort // The channel may be non-nil when we're restoring the endpoint, and it // may be pre-populated with some previously accepted (but not Accepted) @@ -2433,16 +2418,13 @@ func (e *endpoint) bindLocked(addr tcpip.FullAddress) (err *tcpip.Error) { } } - flags := ports.Flags{ - LoadBalanced: e.reusePort, - } - port, err := e.stack.ReservePort(netProtos, ProtocolNumber, addr.Addr, addr.Port, flags, e.bindToDevice) + port, err := e.stack.ReservePort(netProtos, ProtocolNumber, addr.Addr, addr.Port, e.portFlags, e.bindToDevice, tcpip.FullAddress{}) if err != nil { return err } e.boundBindToDevice = e.bindToDevice - e.boundPortFlags = flags + e.boundPortFlags = e.portFlags e.isPortReserved = true e.effectiveNetProtos = netProtos e.ID.LocalPort = port @@ -2450,7 +2432,7 @@ func (e *endpoint) bindLocked(addr tcpip.FullAddress) (err *tcpip.Error) { // Any failures beyond this point must remove the port registration. defer func(portFlags ports.Flags, bindToDevice tcpip.NICID) { if err != nil { - e.stack.ReleasePort(netProtos, ProtocolNumber, addr.Addr, port, portFlags, bindToDevice) + e.stack.ReleasePort(netProtos, ProtocolNumber, addr.Addr, port, portFlags, bindToDevice, tcpip.FullAddress{}) e.isPortReserved = false e.effectiveNetProtos = nil e.ID.LocalPort = 0 @@ -2473,6 +2455,10 @@ func (e *endpoint) bindLocked(addr tcpip.FullAddress) (err *tcpip.Error) { e.ID.LocalAddress = addr.Addr } + if err := e.stack.CheckRegisterTransportEndpoint(e.boundNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e.boundPortFlags, e.boundBindToDevice); err != nil { + return err + } + // Mark endpoint as bound. e.setEndpointState(StateBound) diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go index 0bebec2d1..8258c0ecc 100644 --- a/pkg/tcpip/transport/tcp/endpoint_state.go +++ b/pkg/tcpip/transport/tcp/endpoint_state.go @@ -93,10 +93,6 @@ func (e *endpoint) beforeSave() { if e.waiterQueue != nil && !e.waiterQueue.IsEmpty() { panic("endpoint still has waiters upon save") } - - if e.EndpointState() != StateClose && !((e.EndpointState() == StateBound || e.EndpointState() == StateListen) == e.isPortReserved) { - panic("endpoints which are not in the closed state must have a reserved port IFF they are in bound or listen state") - } } // saveAcceptedChan is invoked by stateify. @@ -198,14 +194,17 @@ func (e *endpoint) Resume(s *stack.Stack) { } bind := func() { - if len(e.BindAddr) == 0 { - e.BindAddr = e.ID.LocalAddress + addr, _, err := e.checkV4MappedLocked(tcpip.FullAddress{Addr: e.BindAddr, Port: e.ID.LocalPort}) + if err != nil { + panic("unable to parse BindAddr: " + err.String()) } - addr := e.BindAddr - port := e.ID.LocalPort - if err := e.Bind(tcpip.FullAddress{Addr: addr, Port: port}); err != nil { - panic(fmt.Sprintf("endpoint binding [%v]:%d failed: %v", addr, port, err)) + if ok := e.stack.ReserveTuple(e.effectiveNetProtos, ProtocolNumber, addr.Addr, addr.Port, e.boundPortFlags, e.boundBindToDevice, e.boundDest); !ok { + panic(fmt.Sprintf("unable to re-reserve tuple (%v, %q, %d, %+v, %d, %v)", e.effectiveNetProtos, addr.Addr, addr.Port, e.boundPortFlags, e.boundBindToDevice, e.boundDest)) } + e.isPortReserved = true + + // Mark endpoint as bound. + e.setEndpointState(StateBound) } switch { @@ -277,17 +276,7 @@ func (e *endpoint) Resume(s *stack.Stack) { tcpip.AsyncLoading.Done() }() case epState == StateClose: - if e.isPortReserved { - tcpip.AsyncLoading.Add(1) - go func() { - connectedLoading.Wait() - listenLoading.Wait() - connectingLoading.Wait() - bind() - e.setEndpointState(StateClose) - tcpip.AsyncLoading.Done() - }() - } + e.isPortReserved = false e.state = StateClose e.stack.CompleteTransportEndpointCleanup(e) tcpip.DeleteDanglingEndpoint(e) diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index aca6a7951..2632a3c67 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -3879,6 +3879,9 @@ func TestReusePort(t *testing.T) { if err != nil { t.Fatalf("NewEndpoint failed; %s", err) } + if err := c.EP.SetSockOptBool(tcpip.ReuseAddressOption, true); err != nil { + t.Fatalf("SetSockOptBool ReuseAddressOption failed: %s", err) + } if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { t.Fatalf("Bind failed: %s", err) } @@ -3888,6 +3891,9 @@ func TestReusePort(t *testing.T) { if err != nil { t.Fatalf("NewEndpoint failed; %s", err) } + if err := c.EP.SetSockOptBool(tcpip.ReuseAddressOption, true); err != nil { + t.Fatalf("SetSockOptBool ReuseAddressOption failed: %s", err) + } if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { t.Fatalf("Bind failed: %s", err) } @@ -3898,6 +3904,9 @@ func TestReusePort(t *testing.T) { if err != nil { t.Fatalf("NewEndpoint failed; %s", err) } + if err := c.EP.SetSockOptBool(tcpip.ReuseAddressOption, true); err != nil { + t.Fatalf("SetSockOptBool ReuseAddressOption failed: %s", err) + } if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { t.Fatalf("Bind failed: %s", err) } @@ -3910,6 +3919,9 @@ func TestReusePort(t *testing.T) { if err != nil { t.Fatalf("NewEndpoint failed; %s", err) } + if err := c.EP.SetSockOptBool(tcpip.ReuseAddressOption, true); err != nil { + t.Fatalf("SetSockOptBool ReuseAddressOption failed: %s", err) + } if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { t.Fatalf("Bind failed: %s", err) } @@ -3920,6 +3932,9 @@ func TestReusePort(t *testing.T) { if err != nil { t.Fatalf("NewEndpoint failed; %s", err) } + if err := c.EP.SetSockOptBool(tcpip.ReuseAddressOption, true); err != nil { + t.Fatalf("SetSockOptBool ReuseAddressOption failed: %s", err) + } if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { t.Fatalf("Bind failed: %s", err) } @@ -3932,6 +3947,9 @@ func TestReusePort(t *testing.T) { if err != nil { t.Fatalf("NewEndpoint failed; %s", err) } + if err := c.EP.SetSockOptBool(tcpip.ReuseAddressOption, true); err != nil { + t.Fatalf("SetSockOptBool ReuseAddressOption failed: %s", err) + } if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil { t.Fatalf("Bind failed: %s", err) } diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 40d66ef09..6ea212093 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -231,7 +231,7 @@ func (e *endpoint) Close() { switch e.state { case StateBound, StateConnected: e.stack.UnregisterTransportEndpoint(e.RegisterNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, e.boundPortFlags, e.boundBindToDevice) - e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, e.boundPortFlags, e.boundBindToDevice) + e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, e.boundPortFlags, e.boundBindToDevice, tcpip.FullAddress{}) e.boundBindToDevice = 0 e.boundPortFlags = ports.Flags{} } @@ -1047,7 +1047,7 @@ func (e *endpoint) Disconnect() *tcpip.Error { } else { if e.ID.LocalPort != 0 { // Release the ephemeral port. - e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, boundPortFlags, e.boundBindToDevice) + e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, boundPortFlags, e.boundBindToDevice, tcpip.FullAddress{}) e.boundPortFlags = ports.Flags{} } e.state = StateInitial @@ -1198,7 +1198,7 @@ func (*endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) { func (e *endpoint) registerWithStack(nicID tcpip.NICID, netProtos []tcpip.NetworkProtocolNumber, id stack.TransportEndpointID) (stack.TransportEndpointID, tcpip.NICID, *tcpip.Error) { if e.ID.LocalPort == 0 { - port, err := e.stack.ReservePort(netProtos, ProtocolNumber, id.LocalAddress, id.LocalPort, e.portFlags, e.bindToDevice) + port, err := e.stack.ReservePort(netProtos, ProtocolNumber, id.LocalAddress, id.LocalPort, e.portFlags, e.bindToDevice, tcpip.FullAddress{}) if err != nil { return id, e.bindToDevice, err } @@ -1208,7 +1208,7 @@ func (e *endpoint) registerWithStack(nicID tcpip.NICID, netProtos []tcpip.Networ err := e.stack.RegisterTransportEndpoint(nicID, netProtos, ProtocolNumber, id, e, e.boundPortFlags, e.bindToDevice) if err != nil { - e.stack.ReleasePort(netProtos, ProtocolNumber, id.LocalAddress, id.LocalPort, e.boundPortFlags, e.bindToDevice) + e.stack.ReleasePort(netProtos, ProtocolNumber, id.LocalAddress, id.LocalPort, e.boundPortFlags, e.bindToDevice, tcpip.FullAddress{}) e.boundPortFlags = ports.Flags{} } return id, e.bindToDevice, err diff --git a/test/syscalls/linux/socket_bind_to_device_sequence.cc b/test/syscalls/linux/socket_bind_to_device_sequence.cc index 4b2cb0606..6557c8e8e 100644 --- a/test/syscalls/linux/socket_bind_to_device_sequence.cc +++ b/test/syscalls/linux/socket_bind_to_device_sequence.cc @@ -363,10 +363,6 @@ TEST_P(BindToDeviceSequenceTest, BindTwiceWithReuseOnce) { } TEST_P(BindToDeviceSequenceTest, BindWithReuseAddr) { - // FIXME(b/158253835): Support SO_REUSEADDR on TCP sockets. - SKIP_IF(IsRunningOnGvisor() && - (GetParam().type & SOCK_STREAM) == SOCK_STREAM); - ASSERT_NO_FATAL_FAILURE( BindSocket(/* reusePort */ false, /* reuse_addr */ true)); ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ false, @@ -406,10 +402,6 @@ TEST_P(BindToDeviceSequenceTest, BindReuseAddrReusePortThenReusePort) { } TEST_P(BindToDeviceSequenceTest, BindReuseAddrReusePortThenReuseAddr) { - // FIXME(b/158253835): Support SO_REUSEADDR on TCP sockets. - SKIP_IF(IsRunningOnGvisor() && - (GetParam().type & SOCK_STREAM) == SOCK_STREAM); - ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, /* reuse_addr */ true, /* bind_to_device */ 0)); @@ -436,10 +428,6 @@ TEST_P(BindToDeviceSequenceTest, BindDoubleReuseAddrReusePortThenReusePort) { } TEST_P(BindToDeviceSequenceTest, BindDoubleReuseAddrReusePortThenReuseAddr) { - // FIXME(b/158253835): Support SO_REUSEADDR on TCP sockets. - SKIP_IF(IsRunningOnGvisor() && - (GetParam().type & SOCK_STREAM) == SOCK_STREAM); - ASSERT_NO_FATAL_FAILURE(BindSocket( /* reuse_port */ true, /* reuse_addr */ true, /* bind_to_device */ 0)); ASSERT_NO_FATAL_FAILURE(BindSocket(/* reuse_port */ true, diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index a914d9a12..18b9e4b70 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -686,24 +687,9 @@ TEST_P(SocketInetLoopbackTest, TCPFinWait2Test_NoRandomSave) { // be restarted causing the final bind/connect to fail. DisableSave ds; - // TODO(gvisor.dev/issue/1030): Portmanager does not track all 5 tuple - // reservations which causes the bind() to succeed on gVisor but connect - // correctly fails. - if (IsRunningOnGvisor()) { - ASSERT_THAT( - bind(conn_fd2.get(), reinterpret_cast(&conn_bound_addr), - conn_addrlen), - SyscallSucceeds()); - ASSERT_THAT(RetryEINTR(connect)(conn_fd2.get(), - reinterpret_cast(&conn_addr), - conn_addrlen), - SyscallFailsWithErrno(EADDRINUSE)); - } else { - ASSERT_THAT( - bind(conn_fd2.get(), reinterpret_cast(&conn_bound_addr), - conn_addrlen), - SyscallFailsWithErrno(EADDRINUSE)); - } + ASSERT_THAT(bind(conn_fd2.get(), + reinterpret_cast(&conn_bound_addr), conn_addrlen), + SyscallFailsWithErrno(EADDRINUSE)); // Sleep for a little over the linger timeout to reduce flakiness in // save/restore tests. @@ -1737,6 +1723,171 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, DualStackV6AnyReservesEverything) { ASSERT_THAT(bind(fd_v4.get(), reinterpret_cast(&addr_v4), test_addr_v4.addr_len), SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v4 any on the same port with a v4 socket + // fails. + TestAddress const& test_addr_v4_any = V4Any(); + sockaddr_storage addr_v4_any = test_addr_v4_any.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4_any.family(), &addr_v4_any, port)); + const FileDescriptor fd_v4_any = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v4_any.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v4_any.get(), reinterpret_cast(&addr_v4_any), + test_addr_v4_any.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); +} + +TEST_P(SocketMultiProtocolInetLoopbackTest, + DualStackV6AnyReuseAddrDoesNotReserveV4Any) { + auto const& param = GetParam(); + + // Bind the v6 any on a dual stack socket. + TestAddress const& test_addr_dual = V6Any(); + sockaddr_storage addr_dual = test_addr_dual.addr; + const FileDescriptor fd_dual = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_dual.family(), param.type, 0)); + ASSERT_THAT(setsockopt(fd_dual.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast(&addr_dual), + test_addr_dual.addr_len), + SyscallSucceeds()); + + // Get the port that we bound. + socklen_t addrlen = test_addr_dual.addr_len; + ASSERT_THAT(getsockname(fd_dual.get(), + reinterpret_cast(&addr_dual), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual)); + + // Verify that binding the v4 any on the same port with a v4 socket succeeds. + TestAddress const& test_addr_v4_any = V4Any(); + sockaddr_storage addr_v4_any = test_addr_v4_any.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4_any.family(), &addr_v4_any, port)); + const FileDescriptor fd_v4_any = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v4_any.family(), param.type, 0)); + ASSERT_THAT(setsockopt(fd_v4_any.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(fd_v4_any.get(), reinterpret_cast(&addr_v4_any), + test_addr_v4_any.addr_len), + SyscallSucceeds()); +} + +TEST_P(SocketMultiProtocolInetLoopbackTest, + DualStackV6AnyReuseAddrListenReservesV4Any) { + auto const& param = GetParam(); + + // Only TCP sockets are supported. + SKIP_IF((param.type & SOCK_STREAM) == 0); + + // Bind the v6 any on a dual stack socket. + TestAddress const& test_addr_dual = V6Any(); + sockaddr_storage addr_dual = test_addr_dual.addr; + const FileDescriptor fd_dual = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_dual.family(), param.type, 0)); + ASSERT_THAT(setsockopt(fd_dual.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast(&addr_dual), + test_addr_dual.addr_len), + SyscallSucceeds()); + + ASSERT_THAT(listen(fd_dual.get(), 5), SyscallSucceeds()); + + // Get the port that we bound. + socklen_t addrlen = test_addr_dual.addr_len; + ASSERT_THAT(getsockname(fd_dual.get(), + reinterpret_cast(&addr_dual), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual)); + + // Verify that binding the v4 any on the same port with a v4 socket succeeds. + TestAddress const& test_addr_v4_any = V4Any(); + sockaddr_storage addr_v4_any = test_addr_v4_any.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4_any.family(), &addr_v4_any, port)); + const FileDescriptor fd_v4_any = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v4_any.family(), param.type, 0)); + ASSERT_THAT(setsockopt(fd_v4_any.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + ASSERT_THAT(bind(fd_v4_any.get(), reinterpret_cast(&addr_v4_any), + test_addr_v4_any.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); +} + +TEST_P(SocketMultiProtocolInetLoopbackTest, + DualStackV6AnyWithListenReservesEverything) { + auto const& param = GetParam(); + + // Only TCP sockets are supported. + SKIP_IF((param.type & SOCK_STREAM) == 0); + + // Bind the v6 any on a dual stack socket. + TestAddress const& test_addr_dual = V6Any(); + sockaddr_storage addr_dual = test_addr_dual.addr; + const FileDescriptor fd_dual = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_dual.family(), param.type, 0)); + ASSERT_THAT(bind(fd_dual.get(), reinterpret_cast(&addr_dual), + test_addr_dual.addr_len), + SyscallSucceeds()); + + ASSERT_THAT(listen(fd_dual.get(), 5), SyscallSucceeds()); + + // Get the port that we bound. + socklen_t addrlen = test_addr_dual.addr_len; + ASSERT_THAT(getsockname(fd_dual.get(), + reinterpret_cast(&addr_dual), &addrlen), + SyscallSucceeds()); + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(test_addr_dual.family(), addr_dual)); + + // Verify that binding the v6 loopback with the same port fails. + TestAddress const& test_addr_v6 = V6Loopback(); + sockaddr_storage addr_v6 = test_addr_v6.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v6.family(), &addr_v6, port)); + const FileDescriptor fd_v6 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v6.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v6.get(), reinterpret_cast(&addr_v6), + test_addr_v6.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v4 loopback on the same port with a v6 socket + // fails. + TestAddress const& test_addr_v4_mapped = V4MappedLoopback(); + sockaddr_storage addr_v4_mapped = test_addr_v4_mapped.addr; + ASSERT_NO_ERRNO( + SetAddrPort(test_addr_v4_mapped.family(), &addr_v4_mapped, port)); + const FileDescriptor fd_v4_mapped = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v4_mapped.family(), param.type, 0)); + ASSERT_THAT( + bind(fd_v4_mapped.get(), reinterpret_cast(&addr_v4_mapped), + test_addr_v4_mapped.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v4 loopback on the same port with a v4 socket + // fails. + TestAddress const& test_addr_v4 = V4Loopback(); + sockaddr_storage addr_v4 = test_addr_v4.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4.family(), &addr_v4, port)); + const FileDescriptor fd_v4 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr_v4.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v4.get(), reinterpret_cast(&addr_v4), + test_addr_v4.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); + + // Verify that binding the v4 any on the same port with a v4 socket + // fails. + TestAddress const& test_addr_v4_any = V4Any(); + sockaddr_storage addr_v4_any = test_addr_v4_any.addr; + ASSERT_NO_ERRNO(SetAddrPort(test_addr_v4_any.family(), &addr_v4_any, port)); + const FileDescriptor fd_v4_any = ASSERT_NO_ERRNO_AND_VALUE( + Socket(test_addr_v4_any.family(), param.type, 0)); + ASSERT_THAT(bind(fd_v4_any.get(), reinterpret_cast(&addr_v4_any), + test_addr_v4_any.addr_len), + SyscallFailsWithErrno(EADDRINUSE)); } TEST_P(SocketMultiProtocolInetLoopbackTest, V6OnlyV6AnyReservesV6) { @@ -1798,10 +1949,6 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V6OnlyV6AnyReservesV6) { TEST_P(SocketMultiProtocolInetLoopbackTest, V6EphemeralPortReserved) { auto const& param = GetParam(); - // FIXME(b/76031995): Support disabling SO_REUSEADDR for TCP sockets and make - // it disabled by default. - SKIP_IF(IsRunningOnGvisor() && param.type == SOCK_STREAM); - for (int i = 0; true; i++) { // Bind the v6 loopback on a dual stack socket. TestAddress const& test_addr = V6Loopback(); @@ -1864,17 +2011,6 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V6EphemeralPortReserved) { test_addr_v6.addr_len), SyscallFailsWithErrno(EADDRINUSE)); - // Verify that binding the v4 any with the same port fails. - TestAddress const& test_addr_v4_any = V4Any(); - sockaddr_storage addr_v4_any = test_addr_v4_any.addr; - ASSERT_NO_ERRNO( - SetAddrPort(test_addr_v4_any.family(), &addr_v4_any, ephemeral_port)); - const FileDescriptor fd_v4_any = ASSERT_NO_ERRNO_AND_VALUE( - Socket(test_addr_v4_any.family(), param.type, 0)); - ASSERT_THAT(bind(fd_v4_any.get(), reinterpret_cast(&addr_v4_any), - test_addr_v4_any.addr_len), - SyscallFailsWithErrno(EADDRINUSE)); - // Verify that we can still bind the v4 loopback on the same port. TestAddress const& test_addr_v4_mapped = V4MappedLoopback(); sockaddr_storage addr_v4_mapped = test_addr_v4_mapped.addr; @@ -1963,10 +2099,6 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V6EphemeralPortReservedReuseAddr) { TEST_P(SocketMultiProtocolInetLoopbackTest, V4MappedEphemeralPortReserved) { auto const& param = GetParam(); - // FIXME(b/76031995): Support disabling SO_REUSEADDR for TCP sockets and make - // it disabled by default. - SKIP_IF(IsRunningOnGvisor() && param.type == SOCK_STREAM); - for (int i = 0; true; i++) { // Bind the v4 loopback on a dual stack socket. TestAddress const& test_addr = V4MappedLoopback(); @@ -2152,10 +2284,6 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, TEST_P(SocketMultiProtocolInetLoopbackTest, V4EphemeralPortReserved) { auto const& param = GetParam(); - // FIXME(b/76031995): Support disabling SO_REUSEADDR for TCP sockets and make - // it disabled by default. - SKIP_IF(IsRunningOnGvisor() && param.type == SOCK_STREAM); - for (int i = 0; true; i++) { // Bind the v4 loopback on a v4 socket. TestAddress const& test_addr = V4Loopback(); diff --git a/test/syscalls/linux/socket_ip_unbound.cc b/test/syscalls/linux/socket_ip_unbound.cc index 796598eee..1c7b0cf90 100644 --- a/test/syscalls/linux/socket_ip_unbound.cc +++ b/test/syscalls/linux/socket_ip_unbound.cc @@ -425,10 +425,6 @@ TEST_P(IPUnboundSocketTest, InsufficientBufferTOS) { TEST_P(IPUnboundSocketTest, ReuseAddrDefault) { auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); - // FIXME(b/76031995): gVisor TCP sockets default to SO_REUSEADDR enabled. - SKIP_IF(IsRunningOnGvisor() && - (GetParam().type & SOCK_STREAM) == SOCK_STREAM); - int get = -1; socklen_t get_sz = sizeof(get); ASSERT_THAT( -- cgit v1.2.3 From 58880bf551f4fdaeecf0a355816f1af353ef81b6 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Wed, 24 Jun 2020 16:21:53 -0700 Subject: Port /dev/net/tun device to VFS2. Updates #2912 #1035 PiperOrigin-RevId: 318162565 --- pkg/sentry/devices/tundev/BUILD | 23 +++++ pkg/sentry/devices/tundev/tundev.go | 178 +++++++++++++++++++++++++++++++++ pkg/sentry/fsimpl/devtmpfs/devtmpfs.go | 16 ++- runsc/boot/BUILD | 1 + runsc/boot/vfs.go | 11 +- test/syscalls/BUILD | 1 + 6 files changed, 227 insertions(+), 3 deletions(-) create mode 100644 pkg/sentry/devices/tundev/BUILD create mode 100644 pkg/sentry/devices/tundev/tundev.go (limited to 'test') diff --git a/pkg/sentry/devices/tundev/BUILD b/pkg/sentry/devices/tundev/BUILD new file mode 100644 index 000000000..71c59287c --- /dev/null +++ b/pkg/sentry/devices/tundev/BUILD @@ -0,0 +1,23 @@ +load("//tools:defs.bzl", "go_library") + +licenses(["notice"]) + +go_library( + name = "tundev", + srcs = ["tundev.go"], + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/abi/linux", + "//pkg/context", + "//pkg/sentry/arch", + "//pkg/sentry/fsimpl/devtmpfs", + "//pkg/sentry/inet", + "//pkg/sentry/kernel", + "//pkg/sentry/socket/netstack", + "//pkg/sentry/vfs", + "//pkg/syserror", + "//pkg/tcpip/link/tun", + "//pkg/usermem", + "//pkg/waiter", + ], +) diff --git a/pkg/sentry/devices/tundev/tundev.go b/pkg/sentry/devices/tundev/tundev.go new file mode 100644 index 000000000..dfbd069af --- /dev/null +++ b/pkg/sentry/devices/tundev/tundev.go @@ -0,0 +1,178 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package tundev implements the /dev/net/tun device. +package tundev + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" + "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/socket/netstack" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/tcpip/link/tun" + "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/waiter" +) + +const ( + netTunDevMajor = 10 + netTunDevMinor = 200 +) + +// tunDevice implements vfs.Device for /dev/net/tun. +type tunDevice struct{} + +// Open implements vfs.Device.Open. +func (tunDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + fd := &tunFD{} + if err := fd.vfsfd.Init(fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{ + UseDentryMetadata: true, + }); err != nil { + return nil, err + } + return &fd.vfsfd, nil +} + +// tunFD implements vfs.FileDescriptionImpl for /dev/net/tun. +type tunFD struct { + vfsfd vfs.FileDescription + vfs.FileDescriptionDefaultImpl + vfs.DentryMetadataFileDescriptionImpl + vfs.NoLockFD + + device tun.Device +} + +// Ioctl implements vfs.FileDescriptionImpl.Ioctl. +func (fd *tunFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) { + request := args[1].Uint() + data := args[2].Pointer() + + switch request { + case linux.TUNSETIFF: + t := kernel.TaskFromContext(ctx) + if t == nil { + panic("Ioctl should be called from a task context") + } + if !t.HasCapability(linux.CAP_NET_ADMIN) { + return 0, syserror.EPERM + } + stack, ok := t.NetworkContext().(*netstack.Stack) + if !ok { + return 0, syserror.EINVAL + } + + var req linux.IFReq + if _, err := usermem.CopyObjectIn(ctx, uio, data, &req, usermem.IOOpts{ + AddressSpaceActive: true, + }); err != nil { + return 0, err + } + flags := usermem.ByteOrder.Uint16(req.Data[:]) + return 0, fd.device.SetIff(stack.Stack, req.Name(), flags) + + case linux.TUNGETIFF: + var req linux.IFReq + + copy(req.IFName[:], fd.device.Name()) + + // Linux adds IFF_NOFILTER (the same value as IFF_NO_PI unfortunately) when + // there is no sk_filter. See __tun_chr_ioctl() in net/drivers/tun.c. + flags := fd.device.Flags() | linux.IFF_NOFILTER + usermem.ByteOrder.PutUint16(req.Data[:], flags) + + _, err := usermem.CopyObjectOut(ctx, uio, data, &req, usermem.IOOpts{ + AddressSpaceActive: true, + }) + return 0, err + + default: + return 0, syserror.ENOTTY + } +} + +// Release implements vfs.FileDescriptionImpl.Release. +func (fd *tunFD) Release() { + fd.device.Release() +} + +// PRead implements vfs.FileDescriptionImpl.PRead. +func (fd *tunFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { + return fd.Read(ctx, dst, opts) +} + +// Read implements vfs.FileDescriptionImpl.Read. +func (fd *tunFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { + data, err := fd.device.Read() + if err != nil { + return 0, err + } + n, err := dst.CopyOut(ctx, data) + if n > 0 && n < len(data) { + // Not an error for partial copying. Packet truncated. + err = nil + } + return int64(n), err +} + +// PWrite implements vfs.FileDescriptionImpl.PWrite. +func (fd *tunFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { + return fd.Write(ctx, src, opts) +} + +// Write implements vfs.FileDescriptionImpl.Write. +func (fd *tunFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { + data := make([]byte, src.NumBytes()) + if _, err := src.CopyIn(ctx, data); err != nil { + return 0, err + } + return fd.device.Write(data) +} + +// Readiness implements watier.Waitable.Readiness. +func (fd *tunFD) Readiness(mask waiter.EventMask) waiter.EventMask { + return fd.device.Readiness(mask) +} + +// EventRegister implements watier.Waitable.EventRegister. +func (fd *tunFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) { + fd.device.EventRegister(e, mask) +} + +// EventUnregister implements watier.Waitable.EventUnregister. +func (fd *tunFD) EventUnregister(e *waiter.Entry) { + fd.device.EventUnregister(e) +} + +// isNetTunSupported returns whether /dev/net/tun device is supported for s. +func isNetTunSupported(s inet.Stack) bool { + _, ok := s.(*netstack.Stack) + return ok +} + +// Register registers all devices implemented by this package in vfsObj. +func Register(vfsObj *vfs.VirtualFilesystem) error { + return vfsObj.RegisterDevice(vfs.CharDevice, netTunDevMajor, netTunDevMinor, tunDevice{}, &vfs.RegisterDeviceOptions{}) +} + +// CreateDevtmpfsFiles creates device special files in dev representing all +// devices implemented by this package. +func CreateDevtmpfsFiles(ctx context.Context, dev *devtmpfs.Accessor) error { + return dev.CreateDeviceFile(ctx, "net/tun", vfs.CharDevice, netTunDevMajor, netTunDevMinor, 0666 /* mode */) +} diff --git a/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go b/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go index 142ee53b0..d0e06cdc0 100644 --- a/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go +++ b/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go @@ -136,6 +136,8 @@ func (a *Accessor) pathOperationAt(pathname string) *vfs.PathOperation { // CreateDeviceFile creates a device special file at the given pathname in the // devtmpfs instance accessed by the Accessor. func (a *Accessor) CreateDeviceFile(ctx context.Context, pathname string, kind vfs.DeviceKind, major, minor uint32, perms uint16) error { + actx := a.wrapContext(ctx) + mode := (linux.FileMode)(perms) switch kind { case vfs.BlockDevice: @@ -145,12 +147,24 @@ func (a *Accessor) CreateDeviceFile(ctx context.Context, pathname string, kind v default: panic(fmt.Sprintf("invalid vfs.DeviceKind: %v", kind)) } + + // Create any parent directories. See + // devtmpfs.c:handle_create()=>path_create(). + for it := fspath.Parse(pathname).Begin; it.NextOk(); it = it.Next() { + pop := a.pathOperationAt(it.String()) + if err := a.vfsObj.MkdirAt(actx, a.creds, pop, &vfs.MkdirOptions{ + Mode: 0755, + }); err != nil { + return fmt.Errorf("failed to create directory %q: %v", it.String(), err) + } + } + // NOTE: Linux's devtmpfs refuses to automatically delete files it didn't // create, which it recognizes by storing a pointer to the kdevtmpfs struct // thread in struct inode::i_private. Accessor doesn't yet support deletion // of files at all, and probably won't as long as we don't need to support // kernel modules, so this is moot for now. - return a.vfsObj.MknodAt(a.wrapContext(ctx), a.creds, a.pathOperationAt(pathname), &vfs.MknodOptions{ + return a.vfsObj.MknodAt(actx, a.creds, a.pathOperationAt(pathname), &vfs.MknodOptions{ Mode: mode, DevMajor: major, DevMinor: minor, diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index b701c65d2..45d7f7d09 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -41,6 +41,7 @@ go_library( "//pkg/sentry/control", "//pkg/sentry/devices/memdev", "//pkg/sentry/devices/ttydev", + "//pkg/sentry/devices/tundev", "//pkg/sentry/fdimport", "//pkg/sentry/fs", "//pkg/sentry/fs/dev", diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 2fdddc719..9ba5e9cd9 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -27,6 +27,7 @@ import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/devices/memdev" "gvisor.dev/gvisor/pkg/sentry/devices/ttydev" + "gvisor.dev/gvisor/pkg/sentry/devices/tundev" "gvisor.dev/gvisor/pkg/sentry/fs/user" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" @@ -79,6 +80,9 @@ func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, cre if err := ttydev.Register(vfsObj); err != nil { return fmt.Errorf("registering ttydev: %w", err) } + if err := tundev.Register(vfsObj); err != nil { + return fmt.Errorf("registering tundev: %v", err) + } a, err := devtmpfs.NewAccessor(ctx, vfsObj, creds, devtmpfs.Name) if err != nil { return fmt.Errorf("creating devtmpfs accessor: %w", err) @@ -89,10 +93,13 @@ func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, cre return fmt.Errorf("initializing userspace: %w", err) } if err := memdev.CreateDevtmpfsFiles(ctx, a); err != nil { - return fmt.Errorf("creating devtmpfs files: %w", err) + return fmt.Errorf("creating memdev devtmpfs files: %w", err) } if err := ttydev.CreateDevtmpfsFiles(ctx, a); err != nil { - return fmt.Errorf("creating devtmpfs files: %w", err) + return fmt.Errorf("creating ttydev devtmpfs files: %w", err) + } + if err := tundev.CreateDevtmpfsFiles(ctx, a); err != nil { + return fmt.Errorf("creating tundev devtmpfs files: %v", err) } return nil } diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 315af9efe..39c1f79ad 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -1013,6 +1013,7 @@ syscall_test( syscall_test( test = "//test/syscalls/linux:tuntap_test", + vfs2 = "True", ) syscall_test( -- cgit v1.2.3 From ac6f7b600bb6fe2e0c9171d24c0da2cc29388397 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Wed, 24 Jun 2020 18:12:19 -0700 Subject: Internal change. PiperOrigin-RevId: 318180382 --- test/syscalls/linux/socket_bind_to_device_sequence.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/syscalls/linux/socket_bind_to_device_sequence.cc b/test/syscalls/linux/socket_bind_to_device_sequence.cc index 6557c8e8e..d3cc71dbf 100644 --- a/test/syscalls/linux/socket_bind_to_device_sequence.cc +++ b/test/syscalls/linux/socket_bind_to_device_sequence.cc @@ -180,7 +180,7 @@ class BindToDeviceSequenceTest : public ::testing::TestWithParam { private: SocketKind socket_factory_; // devices maps from the device id in the test case to the name of the device. - std::unordered_map devices_; + absl::node_hash_map devices_; // These are the tunnels that were created for the test and will be destroyed // by the destructor. vector> tunnels_; -- cgit v1.2.3 From b5e814445a4db5df7f4f58027422a5dba97ea766 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Wed, 24 Jun 2020 19:20:58 -0700 Subject: Fix procfs bugs in vfs2. - Support writing on proc/[pid]/{uid,gid}map - Return EIO for writing to static files. Updates #2923. PiperOrigin-RevId: 318188503 --- pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go | 4 +- pkg/sentry/fsimpl/kernfs/inode_impl_util.go | 2 + pkg/sentry/fsimpl/proc/task_files.go | 60 ++++++++++++++++++++++- pkg/sentry/vfs/file_description_impl_util.go | 2 +- pkg/sentry/vfs/file_description_impl_util_test.go | 8 +-- test/syscalls/BUILD | 2 + test/syscalls/linux/proc.cc | 49 +++++++++++++++++- 7 files changed, 117 insertions(+), 10 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go index c1215b70a..6886b0876 100644 --- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go +++ b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go @@ -101,12 +101,12 @@ func (fd *DynamicBytesFD) Seek(ctx context.Context, offset int64, whence int32) return fd.DynamicBytesFileDescriptionImpl.Seek(ctx, offset, whence) } -// Read implmenets vfs.FileDescriptionImpl.Read. +// Read implements vfs.FileDescriptionImpl.Read. func (fd *DynamicBytesFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { return fd.DynamicBytesFileDescriptionImpl.Read(ctx, dst, opts) } -// PRead implmenets vfs.FileDescriptionImpl.PRead. +// PRead implements vfs.FileDescriptionImpl.PRead. func (fd *DynamicBytesFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { return fd.DynamicBytesFileDescriptionImpl.PRead(ctx, dst, offset, opts) } diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go index 650bd7b88..53aec4918 100644 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go @@ -293,6 +293,8 @@ func (a *InodeAttrs) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *aut // inode numbers are immutable after node creation. // TODO(gvisor.dev/issue/1193): Implement other stat fields like timestamps. + // Also, STATX_SIZE will need some special handling, because read-only static + // files should return EIO for truncate operations. return nil } diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go index ba4405026..286c23f01 100644 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ b/pkg/sentry/fsimpl/proc/task_files.go @@ -35,6 +35,10 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) +// "There is an (arbitrary) limit on the number of lines in the file. As at +// Linux 3.18, the limit is five lines." - user_namespaces(7) +const maxIDMapLines = 5 + // mm gets the kernel task's MemoryManager. No additional reference is taken on // mm here. This is safe because MemoryManager.destroy is required to leave the // MemoryManager in a state where it's still usable as a DynamicBytesSource. @@ -283,7 +287,8 @@ func (d *commData) Generate(ctx context.Context, buf *bytes.Buffer) error { return nil } -// idMapData implements vfs.DynamicBytesSource for /proc/[pid]/{gid_map|uid_map}. +// idMapData implements vfs.WritableDynamicBytesSource for +// /proc/[pid]/{gid_map|uid_map}. // // +stateify savable type idMapData struct { @@ -309,6 +314,59 @@ func (d *idMapData) Generate(ctx context.Context, buf *bytes.Buffer) error { return nil } +func (d *idMapData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { + // "In addition, the number of bytes written to the file must be less than + // the system page size, and the write must be performed at the start of + // the file ..." - user_namespaces(7) + srclen := src.NumBytes() + if srclen >= usermem.PageSize || offset != 0 { + return 0, syserror.EINVAL + } + b := make([]byte, srclen) + if _, err := src.CopyIn(ctx, b); err != nil { + return 0, err + } + + // Truncate from the first NULL byte. + var nul int64 + nul = int64(bytes.IndexByte(b, 0)) + if nul == -1 { + nul = srclen + } + b = b[:nul] + // Remove the last \n. + if nul >= 1 && b[nul-1] == '\n' { + b = b[:nul-1] + } + lines := bytes.SplitN(b, []byte("\n"), maxIDMapLines+1) + if len(lines) > maxIDMapLines { + return 0, syserror.EINVAL + } + + entries := make([]auth.IDMapEntry, len(lines)) + for i, l := range lines { + var e auth.IDMapEntry + _, err := fmt.Sscan(string(l), &e.FirstID, &e.FirstParentID, &e.Length) + if err != nil { + return 0, syserror.EINVAL + } + entries[i] = e + } + var err error + if d.gids { + err = d.task.UserNamespace().SetGIDMap(ctx, entries) + } else { + err = d.task.UserNamespace().SetUIDMap(ctx, entries) + } + if err != nil { + return 0, err + } + + // On success, Linux's kernel/user_namespace.c:map_write() always returns + // count, even if fewer bytes were used. + return int64(srclen), nil +} + // mapsData implements vfs.DynamicBytesSource for /proc/[pid]/maps. // // +stateify savable diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go index 1e66997ce..3fec0d6d6 100644 --- a/pkg/sentry/vfs/file_description_impl_util.go +++ b/pkg/sentry/vfs/file_description_impl_util.go @@ -327,7 +327,7 @@ func (fd *DynamicBytesFileDescriptionImpl) pwriteLocked(ctx context.Context, src writable, ok := fd.data.(WritableDynamicBytesSource) if !ok { - return 0, syserror.EINVAL + return 0, syserror.EIO } n, err := writable.Write(ctx, src, offset) if err != nil { diff --git a/pkg/sentry/vfs/file_description_impl_util_test.go b/pkg/sentry/vfs/file_description_impl_util_test.go index 5061f6ac9..3b7e1c273 100644 --- a/pkg/sentry/vfs/file_description_impl_util_test.go +++ b/pkg/sentry/vfs/file_description_impl_util_test.go @@ -155,11 +155,11 @@ func TestGenCountFD(t *testing.T) { } // Write and PWrite fails. - if _, err := fd.Write(ctx, ioseq, WriteOptions{}); err != syserror.EINVAL { - t.Errorf("Write: got err %v, wanted %v", err, syserror.EINVAL) + if _, err := fd.Write(ctx, ioseq, WriteOptions{}); err != syserror.EIO { + t.Errorf("Write: got err %v, wanted %v", err, syserror.EIO) } - if _, err := fd.PWrite(ctx, ioseq, 0, WriteOptions{}); err != syserror.EINVAL { - t.Errorf("Write: got err %v, wanted %v", err, syserror.EINVAL) + if _, err := fd.PWrite(ctx, ioseq, 0, WriteOptions{}); err != syserror.EIO { + t.Errorf("Write: got err %v, wanted %v", err, syserror.EIO) } } diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 39c1f79ad..23123006f 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -479,6 +479,7 @@ syscall_test( syscall_test( size = "medium", test = "//test/syscalls/linux:proc_test", + vfs2 = "True", ) syscall_test( @@ -498,6 +499,7 @@ syscall_test( syscall_test( test = "//test/syscalls/linux:proc_pid_uid_gid_map_test", + vfs2 = "True", ) syscall_test( diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index e77e9820e..d6b875dbf 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -754,8 +754,53 @@ TEST(ProcCpuinfo, RequiredFieldsArePresent) { } } -TEST(ProcCpuinfo, DeniesWrite) { - EXPECT_THAT(open("/proc/cpuinfo", O_WRONLY), SyscallFailsWithErrno(EACCES)); +TEST(ProcCpuinfo, DeniesWriteNonRoot) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_FOWNER))); + + // Do setuid in a separate thread so that after finishing this test, the + // process can still open files the test harness created before starting this + // test. Otherwise, the files are created by root (UID before the test), but + // cannot be opened by the `uid` set below after the test. After calling + // setuid(non-zero-UID), there is no way to get root privileges back. + ScopedThread([&] { + // Use syscall instead of glibc setuid wrapper because we want this setuid + // call to only apply to this task. POSIX threads, however, require that all + // threads have the same UIDs, so using the setuid wrapper sets all threads' + // real UID. + // Also drops capabilities. + constexpr int kNobody = 65534; + EXPECT_THAT(syscall(SYS_setuid, kNobody), SyscallSucceeds()); + EXPECT_THAT(open("/proc/cpuinfo", O_WRONLY), SyscallFailsWithErrno(EACCES)); + // TODO(gvisor.dev/issue/1193): Properly support setting size attributes in + // kernfs. + if (!IsRunningOnGvisor() || IsRunningWithVFS1()) { + EXPECT_THAT(truncate("/proc/cpuinfo", 123), + SyscallFailsWithErrno(EACCES)); + } + }); +} + +// With root privileges, it is possible to open /proc/cpuinfo with write mode, +// but all write operations will return EIO. +TEST(ProcCpuinfo, DeniesWriteRoot) { + // VFS1 does not behave differently for root/non-root. + SKIP_IF(IsRunningWithVFS1()); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_FOWNER))); + + int fd; + EXPECT_THAT(fd = open("/proc/cpuinfo", O_WRONLY), SyscallSucceeds()); + if (fd > 0) { + EXPECT_THAT(write(fd, "x", 1), SyscallFailsWithErrno(EIO)); + EXPECT_THAT(pwrite(fd, "x", 1, 123), SyscallFailsWithErrno(EIO)); + } + // TODO(gvisor.dev/issue/1193): Properly support setting size attributes in + // kernfs. + if (!IsRunningOnGvisor() || IsRunningWithVFS1()) { + if (fd > 0) { + EXPECT_THAT(ftruncate(fd, 123), SyscallFailsWithErrno(EIO)); + } + EXPECT_THAT(truncate("/proc/cpuinfo", 123), SyscallFailsWithErrno(EIO)); + } } // Sanity checks that uptime is present. -- cgit v1.2.3 From bd5f0e2dc42866f28437f07b1a24e19d1748c3ea Mon Sep 17 00:00:00 2001 From: Ridwan Sharif Date: Tue, 9 Jun 2020 12:35:39 -0400 Subject: Add FUSE character device This change adds a FUSE character device backed by devtmpfs. This device will be used to establish a connection between the FUSE server daemon and fusefs. The FileDescriptionImpl methods will be implemented as we flesh out fusefs some more. The tests assert that the device can be opened and used. --- pkg/abi/linux/dev.go | 4 ++ pkg/sentry/devices/miscdev/BUILD | 20 +++++++++ pkg/sentry/devices/miscdev/fuse.go | 78 +++++++++++++++++++++++++++++++++++ pkg/sentry/devices/miscdev/miscdev.go | 54 ++++++++++++++++++++++++ test/syscalls/linux/dev.cc | 20 +++++++++ 5 files changed, 176 insertions(+) create mode 100644 pkg/sentry/devices/miscdev/BUILD create mode 100644 pkg/sentry/devices/miscdev/fuse.go create mode 100644 pkg/sentry/devices/miscdev/miscdev.go (limited to 'test') diff --git a/pkg/abi/linux/dev.go b/pkg/abi/linux/dev.go index fa3ae5f18..192e2093b 100644 --- a/pkg/abi/linux/dev.go +++ b/pkg/abi/linux/dev.go @@ -46,6 +46,10 @@ const ( // TTYAUX_MAJOR is the major device number for alternate TTY devices. TTYAUX_MAJOR = 5 + // MISC_MAJOR is the major device number for non-serial mice, misc feature + // devices. + MISC_MAJOR = 10 + // UNIX98_PTY_MASTER_MAJOR is the initial major device number for // Unix98 PTY masters. UNIX98_PTY_MASTER_MAJOR = 128 diff --git a/pkg/sentry/devices/miscdev/BUILD b/pkg/sentry/devices/miscdev/BUILD new file mode 100644 index 000000000..aaa76c5d2 --- /dev/null +++ b/pkg/sentry/devices/miscdev/BUILD @@ -0,0 +1,20 @@ +load("//tools:defs.bzl", "go_library") + +licenses(["notice"]) + +go_library( + name = "miscdev", + srcs = [ + "fuse.go", + "miscdev.go", + ], + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/abi/linux", + "//pkg/context", + "//pkg/sentry/fsimpl/devtmpfs", + "//pkg/sentry/vfs", + "//pkg/syserror", + "//pkg/usermem", + ], +) diff --git a/pkg/sentry/devices/miscdev/fuse.go b/pkg/sentry/devices/miscdev/fuse.go new file mode 100644 index 000000000..d0a963191 --- /dev/null +++ b/pkg/sentry/devices/miscdev/fuse.go @@ -0,0 +1,78 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package miscdev + +import ( + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +const fuseDevMinor = 229 + +// fuseDevice implements vfs.Device for /dev/fuse. +type fuseDevice struct{} + +// Open implements vfs.Device.Open. +func (fuseDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + var fd FUSEDeviceFile + if err := fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{ + UseDentryMetadata: true, + }); err != nil { + return nil, err + } + return &fd.vfsfd, nil +} + +// FUSEDeviceFile implements vfs.FileDescriptionImpl for /dev/fuse. +type FUSEDeviceFile struct { + vfsfd vfs.FileDescription + vfs.FileDescriptionDefaultImpl + vfs.DentryMetadataFileDescriptionImpl + vfs.NoLockFD + + // TODO(gvisor.dev/issue/2987): Add all the data structures needed to enqueue + // and deque requests, control synchronization and establish communication + // between the FUSE kernel module and the /dev/fuse character device. +} + +// Release implements vfs.FileDescriptionImpl.Release. +func (fd *FUSEDeviceFile) Release() {} + +// PRead implements vfs.FileDescriptionImpl.PRead. +func (fd *FUSEDeviceFile) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { + return 0, syserror.ENOSYS +} + +// Read implements vfs.FileDescriptionImpl.Read. +func (fd *FUSEDeviceFile) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { + return 0, syserror.ENOSYS +} + +// PWrite implements vfs.FileDescriptionImpl.PWrite. +func (fd *FUSEDeviceFile) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { + return 0, syserror.ENOSYS +} + +// Write implements vfs.FileDescriptionImpl.Write. +func (fd *FUSEDeviceFile) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { + return 0, syserror.ENOSYS +} + +// Seek implements vfs.FileDescriptionImpl.Seek. +func (fd *FUSEDeviceFile) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { + return 0, syserror.ENOSYS +} diff --git a/pkg/sentry/devices/miscdev/miscdev.go b/pkg/sentry/devices/miscdev/miscdev.go new file mode 100644 index 000000000..500d92ed9 --- /dev/null +++ b/pkg/sentry/devices/miscdev/miscdev.go @@ -0,0 +1,54 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package miscdev implements "misc" character devices, as implemented in Linux +// by drivers/char/misc.c and fs/fuse/dev.c. +package miscdev + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" + "gvisor.dev/gvisor/pkg/sentry/vfs" +) + +// miscDevMajor is the major device number for devices defined in this package. +const miscDevMajor = linux.MISC_MAJOR + +// Register registers all devices implemented by this package in vfsObj. +func Register(vfsObj *vfs.VirtualFilesystem) error { + for minor, dev := range map[uint32]vfs.Device{ + fuseDevMinor: fuseDevice{}, + } { + if err := vfsObj.RegisterDevice(vfs.CharDevice, miscDevMajor, minor, dev, &vfs.RegisterDeviceOptions{ + GroupName: "misc", + }); err != nil { + return err + } + } + return nil +} + +// CreateDevtmpfsFiles creates device special files in dev representing all +// devices implemented by this package. +func CreateDevtmpfsFiles(ctx context.Context, dev *devtmpfs.Accessor) error { + for minor, name := range map[uint32]string{ + fuseDevMinor: "fuse", + } { + if err := dev.CreateDeviceFile(ctx, name, vfs.CharDevice, miscDevMajor, minor, 0666 /* mode */); err != nil { + return err + } + } + return nil +} diff --git a/test/syscalls/linux/dev.cc b/test/syscalls/linux/dev.cc index 4dd302eed..82b115981 100644 --- a/test/syscalls/linux/dev.cc +++ b/test/syscalls/linux/dev.cc @@ -146,7 +146,27 @@ TEST(DevTest, WriteDevFull) { EXPECT_THAT(WriteFd(fd.get(), "a", 1), SyscallFailsWithErrno(ENOSPC)); } +TEST(DevTest, ReadDevFuse) { + SKIP_IF(IsRunningWithVFS1()); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/fuse", O_RDONLY)); + std::vector buf(1); + EXPECT_THAT(ReadFd(fd.get(), buf.data(), sizeof(buf)), SyscallFailsWithErrno(ENOSYS)); +} + +TEST(DevTest, WriteDevFuse) { + SKIP_IF(IsRunningWithVFS1()); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/fuse", O_WRONLY)); + const char* testStr = "test"; + EXPECT_THAT(WriteFd(fd.get(), testStr, sizeof(testStr)), SyscallFailsWithErrno(ENOSYS)); +} + TEST(DevTest, TTYExists) { + SKIP_IF(!IsRunningWithVFS1()); + struct stat statbuf = {}; ASSERT_THAT(stat("/dev/tty", &statbuf), SyscallSucceeds()); // Check that it's a character device with rw-rw-rw- permissions. -- cgit v1.2.3 From a63db7d90303280de9431f369e5a9c8db351a9e8 Mon Sep 17 00:00:00 2001 From: Ridwan Sharif Date: Wed, 17 Jun 2020 16:23:27 -0400 Subject: Moved FUSE device under the fuse directory --- pkg/sentry/devices/miscdev/BUILD | 20 ------- pkg/sentry/devices/miscdev/fuse.go | 78 -------------------------- pkg/sentry/devices/miscdev/miscdev.go | 54 ------------------ pkg/sentry/fsimpl/fuse/BUILD | 19 +++++++ pkg/sentry/fsimpl/fuse/dev.go | 100 ++++++++++++++++++++++++++++++++++ runsc/boot/BUILD | 1 + runsc/boot/vfs.go | 7 +++ test/syscalls/linux/dev.cc | 3 +- 8 files changed, 129 insertions(+), 153 deletions(-) delete mode 100644 pkg/sentry/devices/miscdev/BUILD delete mode 100644 pkg/sentry/devices/miscdev/fuse.go delete mode 100644 pkg/sentry/devices/miscdev/miscdev.go create mode 100644 pkg/sentry/fsimpl/fuse/BUILD create mode 100644 pkg/sentry/fsimpl/fuse/dev.go (limited to 'test') diff --git a/pkg/sentry/devices/miscdev/BUILD b/pkg/sentry/devices/miscdev/BUILD deleted file mode 100644 index aaa76c5d2..000000000 --- a/pkg/sentry/devices/miscdev/BUILD +++ /dev/null @@ -1,20 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -licenses(["notice"]) - -go_library( - name = "miscdev", - srcs = [ - "fuse.go", - "miscdev.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/sentry/fsimpl/devtmpfs", - "//pkg/sentry/vfs", - "//pkg/syserror", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/devices/miscdev/fuse.go b/pkg/sentry/devices/miscdev/fuse.go deleted file mode 100644 index d0a963191..000000000 --- a/pkg/sentry/devices/miscdev/fuse.go +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package miscdev - -import ( - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" -) - -const fuseDevMinor = 229 - -// fuseDevice implements vfs.Device for /dev/fuse. -type fuseDevice struct{} - -// Open implements vfs.Device.Open. -func (fuseDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - var fd FUSEDeviceFile - if err := fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{ - UseDentryMetadata: true, - }); err != nil { - return nil, err - } - return &fd.vfsfd, nil -} - -// FUSEDeviceFile implements vfs.FileDescriptionImpl for /dev/fuse. -type FUSEDeviceFile struct { - vfsfd vfs.FileDescription - vfs.FileDescriptionDefaultImpl - vfs.DentryMetadataFileDescriptionImpl - vfs.NoLockFD - - // TODO(gvisor.dev/issue/2987): Add all the data structures needed to enqueue - // and deque requests, control synchronization and establish communication - // between the FUSE kernel module and the /dev/fuse character device. -} - -// Release implements vfs.FileDescriptionImpl.Release. -func (fd *FUSEDeviceFile) Release() {} - -// PRead implements vfs.FileDescriptionImpl.PRead. -func (fd *FUSEDeviceFile) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - return 0, syserror.ENOSYS -} - -// Read implements vfs.FileDescriptionImpl.Read. -func (fd *FUSEDeviceFile) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { - return 0, syserror.ENOSYS -} - -// PWrite implements vfs.FileDescriptionImpl.PWrite. -func (fd *FUSEDeviceFile) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - return 0, syserror.ENOSYS -} - -// Write implements vfs.FileDescriptionImpl.Write. -func (fd *FUSEDeviceFile) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { - return 0, syserror.ENOSYS -} - -// Seek implements vfs.FileDescriptionImpl.Seek. -func (fd *FUSEDeviceFile) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { - return 0, syserror.ENOSYS -} diff --git a/pkg/sentry/devices/miscdev/miscdev.go b/pkg/sentry/devices/miscdev/miscdev.go deleted file mode 100644 index 500d92ed9..000000000 --- a/pkg/sentry/devices/miscdev/miscdev.go +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package miscdev implements "misc" character devices, as implemented in Linux -// by drivers/char/misc.c and fs/fuse/dev.c. -package miscdev - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" - "gvisor.dev/gvisor/pkg/sentry/vfs" -) - -// miscDevMajor is the major device number for devices defined in this package. -const miscDevMajor = linux.MISC_MAJOR - -// Register registers all devices implemented by this package in vfsObj. -func Register(vfsObj *vfs.VirtualFilesystem) error { - for minor, dev := range map[uint32]vfs.Device{ - fuseDevMinor: fuseDevice{}, - } { - if err := vfsObj.RegisterDevice(vfs.CharDevice, miscDevMajor, minor, dev, &vfs.RegisterDeviceOptions{ - GroupName: "misc", - }); err != nil { - return err - } - } - return nil -} - -// CreateDevtmpfsFiles creates device special files in dev representing all -// devices implemented by this package. -func CreateDevtmpfsFiles(ctx context.Context, dev *devtmpfs.Accessor) error { - for minor, name := range map[uint32]string{ - fuseDevMinor: "fuse", - } { - if err := dev.CreateDeviceFile(ctx, name, vfs.CharDevice, miscDevMajor, minor, 0666 /* mode */); err != nil { - return err - } - } - return nil -} diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD new file mode 100644 index 000000000..41567967d --- /dev/null +++ b/pkg/sentry/fsimpl/fuse/BUILD @@ -0,0 +1,19 @@ +load("//tools:defs.bzl", "go_library") + +licenses(["notice"]) + +go_library( + name = "fuse", + srcs = [ + "dev.go", + ], + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/abi/linux", + "//pkg/context", + "//pkg/sentry/fsimpl/devtmpfs", + "//pkg/sentry/vfs", + "//pkg/syserror", + "//pkg/usermem", + ], +) diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go new file mode 100644 index 000000000..f6a67d005 --- /dev/null +++ b/pkg/sentry/fsimpl/fuse/dev.go @@ -0,0 +1,100 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fuse + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +const fuseDevMinor = 229 + +// fuseDevice implements vfs.Device for /dev/fuse. +type fuseDevice struct{} + +// Open implements vfs.Device.Open. +func (fuseDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + var fd DeviceFD + if err := fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{ + UseDentryMetadata: true, + }); err != nil { + return nil, err + } + return &fd.vfsfd, nil +} + +// DeviceFD implements vfs.FileDescriptionImpl for /dev/fuse. +type DeviceFD struct { + vfsfd vfs.FileDescription + vfs.FileDescriptionDefaultImpl + vfs.DentryMetadataFileDescriptionImpl + vfs.NoLockFD + + // TODO(gvisor.dev/issue/2987): Add all the data structures needed to enqueue + // and deque requests, control synchronization and establish communication + // between the FUSE kernel module and the /dev/fuse character device. +} + +// Release implements vfs.FileDescriptionImpl.Release. +func (fd *DeviceFD) Release() {} + +// PRead implements vfs.FileDescriptionImpl.PRead. +func (fd *DeviceFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { + return 0, syserror.ENOSYS +} + +// Read implements vfs.FileDescriptionImpl.Read. +func (fd *DeviceFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { + return 0, syserror.ENOSYS +} + +// PWrite implements vfs.FileDescriptionImpl.PWrite. +func (fd *DeviceFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { + return 0, syserror.ENOSYS +} + +// Write implements vfs.FileDescriptionImpl.Write. +func (fd *DeviceFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { + return 0, syserror.ENOSYS +} + +// Seek implements vfs.FileDescriptionImpl.Seek. +func (fd *DeviceFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { + return 0, syserror.ENOSYS +} + +// Register registers the FUSE device with vfsObj. +func Register(vfsObj *vfs.VirtualFilesystem) error { + if err := vfsObj.RegisterDevice(vfs.CharDevice, linux.MISC_MAJOR, fuseDevMinor, fuseDevice{}, &vfs.RegisterDeviceOptions{ + GroupName: "misc", + }); err != nil { + return err + } + + return nil +} + +// CreateDevtmpfsFile creates a device special file in devtmpfs. +func CreateDevtmpfsFile(ctx context.Context, dev *devtmpfs.Accessor) error { + if err := dev.CreateDeviceFile(ctx, "fuse", vfs.CharDevice, linux.MISC_MAJOR, fuseDevMinor, 0666 /* mode */); err != nil { + return err + } + + return nil +} diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 45d7f7d09..aad2a41de 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -55,6 +55,7 @@ go_library( "//pkg/sentry/fs/user", "//pkg/sentry/fsimpl/devpts", "//pkg/sentry/fsimpl/devtmpfs", + "//pkg/sentry/fsimpl/fuse", "//pkg/sentry/fsimpl/gofer", "//pkg/sentry/fsimpl/host", "//pkg/sentry/fsimpl/overlay", diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 9ba5e9cd9..0f5ad1e05 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -31,6 +31,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/user" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/fuse" "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer" "gvisor.dev/gvisor/pkg/sentry/fsimpl/overlay" "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc" @@ -79,6 +80,9 @@ func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, cre } if err := ttydev.Register(vfsObj); err != nil { return fmt.Errorf("registering ttydev: %w", err) + + if err := fuse.Register(vfsObj); err != nil { + return fmt.Errorf("registering /dev/fuse: %w", err) } if err := tundev.Register(vfsObj); err != nil { return fmt.Errorf("registering tundev: %v", err) @@ -101,6 +105,9 @@ func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, cre if err := tundev.CreateDevtmpfsFiles(ctx, a); err != nil { return fmt.Errorf("creating tundev devtmpfs files: %v", err) } + if err := fuse.CreateDevtmpfsFile(ctx, a); err != nil { + return fmt.Errorf("creating devtmpfs fuse device file: %w", err) + } return nil } diff --git a/test/syscalls/linux/dev.cc b/test/syscalls/linux/dev.cc index 82b115981..6be173c14 100644 --- a/test/syscalls/linux/dev.cc +++ b/test/syscalls/linux/dev.cc @@ -165,7 +165,8 @@ TEST(DevTest, WriteDevFuse) { } TEST(DevTest, TTYExists) { - SKIP_IF(!IsRunningWithVFS1()); + // Run test if running on VFS1 or on Linux. + SKIP_IF(!IsRunningWithVFS1() && IsRunningOnGvisor()); struct stat statbuf = {}; ASSERT_THAT(stat("/dev/tty", &statbuf), SyscallSucceeds()); -- cgit v1.2.3 From 2828806fb015bbbec0f4a48670d1eb048f21099a Mon Sep 17 00:00:00 2001 From: Ridwan Sharif Date: Tue, 23 Jun 2020 14:25:38 -0400 Subject: Test that the fuse device can be opened --- runsc/boot/vfs.go | 5 +++-- test/syscalls/linux/dev.cc | 29 ++++++++--------------------- 2 files changed, 11 insertions(+), 23 deletions(-) (limited to 'test') diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 0f5ad1e05..b68117867 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -80,9 +80,10 @@ func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, cre } if err := ttydev.Register(vfsObj); err != nil { return fmt.Errorf("registering ttydev: %w", err) + } if err := fuse.Register(vfsObj); err != nil { - return fmt.Errorf("registering /dev/fuse: %w", err) + return fmt.Errorf("registering fusedev: %w", err) } if err := tundev.Register(vfsObj); err != nil { return fmt.Errorf("registering tundev: %v", err) @@ -106,7 +107,7 @@ func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, cre return fmt.Errorf("creating tundev devtmpfs files: %v", err) } if err := fuse.CreateDevtmpfsFile(ctx, a); err != nil { - return fmt.Errorf("creating devtmpfs fuse device file: %w", err) + return fmt.Errorf("creating fusedev devtmpfs files: %w", err) } return nil } diff --git a/test/syscalls/linux/dev.cc b/test/syscalls/linux/dev.cc index 6be173c14..3c88c4cbd 100644 --- a/test/syscalls/linux/dev.cc +++ b/test/syscalls/linux/dev.cc @@ -146,34 +146,21 @@ TEST(DevTest, WriteDevFull) { EXPECT_THAT(WriteFd(fd.get(), "a", 1), SyscallFailsWithErrno(ENOSPC)); } -TEST(DevTest, ReadDevFuse) { - SKIP_IF(IsRunningWithVFS1()); - - const FileDescriptor fd = - ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/fuse", O_RDONLY)); - std::vector buf(1); - EXPECT_THAT(ReadFd(fd.get(), buf.data(), sizeof(buf)), SyscallFailsWithErrno(ENOSYS)); -} - -TEST(DevTest, WriteDevFuse) { - SKIP_IF(IsRunningWithVFS1()); - - const FileDescriptor fd = - ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/fuse", O_WRONLY)); - const char* testStr = "test"; - EXPECT_THAT(WriteFd(fd.get(), testStr, sizeof(testStr)), SyscallFailsWithErrno(ENOSYS)); -} - TEST(DevTest, TTYExists) { - // Run test if running on VFS1 or on Linux. - SKIP_IF(!IsRunningWithVFS1() && IsRunningOnGvisor()); - struct stat statbuf = {}; ASSERT_THAT(stat("/dev/tty", &statbuf), SyscallSucceeds()); // Check that it's a character device with rw-rw-rw- permissions. EXPECT_EQ(statbuf.st_mode, S_IFCHR | 0666); } +TEST(DevTest, OpenDevFuse) { + // Note(gvisor.dev/issue/3076) This won't work in the sentry until the new + // device registration is complete. + SKIP_IF(IsRunningWithVFS1() || IsRunningOnGvisor()); + + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/fuse", O_RDONLY)); +} + } // namespace } // namespace testing -- cgit v1.2.3 From 54a31e219ca9d6086a367213a92d2a72ce3af07b Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Fri, 26 Jun 2020 13:46:01 -0700 Subject: Support inotify IN_ONESHOT. Also, while we're here, make sure that gofer inotify events are generated when files are created in remote revalidating mode. Updates #1479. PiperOrigin-RevId: 318536354 --- pkg/sentry/fsimpl/gofer/filesystem.go | 10 +++- pkg/sentry/syscalls/linux/vfs2/inotify.go | 2 +- pkg/sentry/vfs/inotify.go | 84 +++++++++++++++++++++++-------- test/syscalls/linux/inotify.cc | 48 ++++++++++++++++-- 4 files changed, 115 insertions(+), 29 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index d253c996c..73bac738d 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -389,7 +389,15 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir // RPC will fail with EEXIST like we would have. If the RPC succeeds, and a // stale dentry exists, the dentry will fail revalidation next time it's // used. - return createInRemoteDir(parent, name) + if err := createInRemoteDir(parent, name); err != nil { + return err + } + ev := linux.IN_CREATE + if dir { + ev |= linux.IN_ISDIR + } + parent.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */) + return nil } if child := parent.children[name]; child != nil { return syserror.EEXIST diff --git a/pkg/sentry/syscalls/linux/vfs2/inotify.go b/pkg/sentry/syscalls/linux/vfs2/inotify.go index 74e5287b7..5d98134a5 100644 --- a/pkg/sentry/syscalls/linux/vfs2/inotify.go +++ b/pkg/sentry/syscalls/linux/vfs2/inotify.go @@ -81,7 +81,7 @@ func InotifyAddWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kern // "EINVAL: The given event mask contains no valid events." // -- inotify_add_watch(2) - if validBits := mask & linux.ALL_INOTIFY_BITS; validBits == 0 { + if mask&linux.ALL_INOTIFY_BITS == 0 { return 0, nil, syserror.EINVAL } diff --git a/pkg/sentry/vfs/inotify.go b/pkg/sentry/vfs/inotify.go index 35960394c..509034531 100644 --- a/pkg/sentry/vfs/inotify.go +++ b/pkg/sentry/vfs/inotify.go @@ -447,29 +447,51 @@ func (w *Watches) Remove(id uint64) { return } - if _, ok := w.ws[id]; !ok { - // While there's technically no problem with silently ignoring a missing - // watch, this is almost certainly a bug. - panic(fmt.Sprintf("Attempt to remove a watch, but no watch found with provided id %+v.", id)) + // It is possible for w.Remove() to be called for the same watch multiple + // times. See the treatment of one-shot watches in Watches.Notify(). + if _, ok := w.ws[id]; ok { + delete(w.ws, id) } - delete(w.ws, id) } // Notify queues a new event with watches in this set. Watches with // IN_EXCL_UNLINK are skipped if the event is coming from a child that has been // unlinked. func (w *Watches) Notify(name string, events, cookie uint32, et EventType, unlinked bool) { - // N.B. We don't defer the unlocks because Notify is in the hot path of - // all IO operations, and the defer costs too much for small IO - // operations. + var hasExpired bool w.mu.RLock() for _, watch := range w.ws { if unlinked && watch.ExcludeUnlinked() && et == PathEvent { continue } - watch.Notify(name, events, cookie) + if watch.Notify(name, events, cookie) { + hasExpired = true + } + } + w.mu.RUnlock() + + if hasExpired { + w.cleanupExpiredWatches() + } +} + +// This function is relatively expensive and should only be called where there +// are expired watches. +func (w *Watches) cleanupExpiredWatches() { + // Because of lock ordering, we cannot acquire Inotify.mu for each watch + // owner while holding w.mu. As a result, store expired watches locally + // before removing. + var toRemove []*Watch + w.mu.RLock() + for _, watch := range w.ws { + if atomic.LoadInt32(&watch.expired) == 1 { + toRemove = append(toRemove, watch) + } } w.mu.RUnlock() + for _, watch := range toRemove { + watch.owner.RmWatch(watch.wd) + } } // HandleDeletion is called when the watch target is destroyed. Clear the @@ -478,16 +500,10 @@ func (w *Watches) Notify(name string, events, cookie uint32, et EventType, unlin func (w *Watches) HandleDeletion() { w.Notify("", linux.IN_DELETE_SELF, 0, InodeEvent, true /* unlinked */) - // We can't hold w.mu while calling watch.handleDeletion to preserve lock - // ordering w.r.t to the owner inotify instances. Instead, atomically move - // the watches map into a local variable so we can iterate over it safely. - // - // Because of this however, it is possible for the watches' owners to reach - // this inode while the inode has no refs. This is still safe because the - // owners can only reach the inode until this function finishes calling - // watch.handleDeletion below and the inode is guaranteed to exist in the - // meantime. But we still have to be very careful not to rely on inode state - // that may have been already destroyed. + // As in Watches.Notify, we can't hold w.mu while acquiring Inotify.mu for + // the owner of each watch being deleted. Instead, atomically store the + // watches map in a local variable and set it to nil so we can iterate over + // it with the assurance that there will be no concurrent accesses. var ws map[uint64]*Watch w.mu.Lock() ws = w.ws @@ -519,17 +535,28 @@ func (w *Watches) HandleDeletion() { // +stateify savable type Watch struct { // Inotify instance which owns this watch. + // + // This field is immutable after creation. owner *Inotify // Descriptor for this watch. This is unique across an inotify instance. + // + // This field is immutable after creation. wd int32 // target is a dentry representing the watch target. Its watch set contains this watch. + // + // This field is immutable after creation. target *Dentry // Events being monitored via this watch. Must be accessed with atomic // memory operations. mask uint32 + + // expired is set to 1 to indicate that this watch is a one-shot that has + // already sent a notification and therefore can be removed. Must be accessed + // with atomic memory operations. + expired int32 } // OwnerID returns the id of the inotify instance that owns this watch. @@ -546,12 +573,20 @@ func (w *Watch) ExcludeUnlinked() bool { return atomic.LoadUint32(&w.mask)&linux.IN_EXCL_UNLINK != 0 } -// Notify queues a new event on this watch. -func (w *Watch) Notify(name string, events uint32, cookie uint32) { +// Notify queues a new event on this watch. Returns true if this is a one-shot +// watch that should be deleted, after this event was successfully queued. +func (w *Watch) Notify(name string, events uint32, cookie uint32) bool { + if atomic.LoadInt32(&w.expired) == 1 { + // This is a one-shot watch that is already in the process of being + // removed. This may happen if a second event reaches the watch target + // before this watch has been removed. + return false + } + mask := atomic.LoadUint32(&w.mask) if mask&events == 0 { // We weren't watching for this event. - return + return false } // Event mask should include bits matched from the watch plus all control @@ -560,6 +595,11 @@ func (w *Watch) Notify(name string, events uint32, cookie uint32) { effectiveMask := unmaskableBits | mask matchedEvents := effectiveMask & events w.owner.queueEvent(newEvent(w.wd, name, matchedEvents, cookie)) + if mask&linux.IN_ONESHOT != 0 { + atomic.StoreInt32(&w.expired, 1) + return true + } + return false } // Event represents a struct inotify_event from linux. diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc index 731c7046c..bdb645c35 100644 --- a/test/syscalls/linux/inotify.cc +++ b/test/syscalls/linux/inotify.cc @@ -1485,20 +1485,26 @@ TEST(Inotify, DuplicateWatchReturnsSameWatchDescriptor) { TEST(Inotify, UnmatchedEventsAreDiscarded) { const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); - const TempPath file1 = + TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path())); const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); - ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(fd.get(), file1.path(), IN_ACCESS)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ACCESS)); - const FileDescriptor file1_fd = + FileDescriptor file1_fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY)); - const std::vector events = - ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + std::vector events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); // We only asked for access events, the open event should be discarded. ASSERT_THAT(events, Are({})); + + // IN_IGNORED events are always generated, regardless of the mask. + file1_fd.reset(); + file1.reset(); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get())); + ASSERT_THAT(events, Are({Event(IN_IGNORED, wd)})); } TEST(Inotify, AddWatchWithInvalidEventMaskFails) { @@ -2073,6 +2079,38 @@ TEST(Inotify, ExcludeUnlinkInodeEvents_NoRandomSave) { })); } +TEST(Inotify, OneShot) { + // TODO(gvisor.dev/issue/1624): IN_ONESHOT not supported in VFS1. + SKIP_IF(IsRunningWithVFS1()); + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor inotify_fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(inotify_fd.get(), file.path(), IN_MODIFY | IN_ONESHOT)); + + // Open an fd, write to it, and then close it. + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_WRONLY)); + ASSERT_THAT(write(fd.get(), "x", 1), SyscallSucceedsWithValue(1)); + fd.reset(); + + // We should get a single event followed by IN_IGNORED indicating removal + // of the one-shot watch. Prior activity (i.e. open) that is not in the mask + // should not trigger removal, and activity after removal (i.e. close) should + // not generate events. + std::vector events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({ + Event(IN_MODIFY, wd), + Event(IN_IGNORED, wd), + })); + + // The watch should already have been removed. + EXPECT_THAT(inotify_rm_watch(inotify_fd.get(), wd), + SyscallFailsWithErrno(EINVAL)); +} + // This test helps verify that the lock order of filesystem and inotify locks // is respected when inotify instances and watch targets are concurrently being // destroyed. -- cgit v1.2.3 From e6a90baef1e6921c560c939f4b591163004613b9 Mon Sep 17 00:00:00 2001 From: Zeling Feng Date: Fri, 26 Jun 2020 15:07:35 -0700 Subject: Support IPv6 extension headers in packetimpact tests. - IPv6 HopByHop Options Extension Header - IPv6 Destination Options Extension Header PiperOrigin-RevId: 318551425 --- test/packetimpact/testbench/layers.go | 164 ++++++++++++++++++++++++++--- test/packetimpact/testbench/layers_test.go | 111 +++++++++++++++++++ 2 files changed, 261 insertions(+), 14 deletions(-) (limited to 'test') diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go index 560c4111b..a8121b0da 100644 --- a/test/packetimpact/testbench/layers.go +++ b/test/packetimpact/testbench/layers.go @@ -477,6 +477,10 @@ func (l *IPv6) ToBytes() ([]byte, error) { fields.NextHeader = uint8(header.UDPProtocolNumber) case *ICMPv6: fields.NextHeader = uint8(header.ICMPv6ProtocolNumber) + case *IPv6HopByHopOptionsExtHdr: + fields.NextHeader = uint8(header.IPv6HopByHopOptionsExtHdrIdentifier) + case *IPv6DestinationOptionsExtHdr: + fields.NextHeader = uint8(header.IPv6DestinationOptionsExtHdrIdentifier) default: // TODO(b/150301488): Support more protocols as needed. return nil, fmt.Errorf("ToBytes can't deduce the IPv6 header's next protocol: %#v", n) @@ -495,6 +499,25 @@ func (l *IPv6) ToBytes() ([]byte, error) { return h, nil } +// nextIPv6PayloadParser finds the corresponding parser for nextHeader. +func nextIPv6PayloadParser(nextHeader uint8) layerParser { + switch tcpip.TransportProtocolNumber(nextHeader) { + case header.TCPProtocolNumber: + return parseTCP + case header.UDPProtocolNumber: + return parseUDP + case header.ICMPv6ProtocolNumber: + return parseICMPv6 + } + switch header.IPv6ExtensionHeaderIdentifier(nextHeader) { + case header.IPv6HopByHopOptionsExtHdrIdentifier: + return parseIPv6HopByHopOptionsExtHdr + case header.IPv6DestinationOptionsExtHdrIdentifier: + return parseIPv6DestinationOptionsExtHdr + } + return parsePayload +} + // parseIPv6 parses the bytes assuming that they start with an ipv6 header and // continues parsing further encapsulations. func parseIPv6(b []byte) (Layer, layerParser) { @@ -509,18 +532,7 @@ func parseIPv6(b []byte) (Layer, layerParser) { SrcAddr: Address(h.SourceAddress()), DstAddr: Address(h.DestinationAddress()), } - var nextParser layerParser - switch h.TransportProtocol() { - case header.TCPProtocolNumber: - nextParser = parseTCP - case header.UDPProtocolNumber: - nextParser = parseUDP - case header.ICMPv6ProtocolNumber: - nextParser = parseICMPv6 - default: - // Assume that the rest is a payload. - nextParser = parsePayload - } + nextParser := nextIPv6PayloadParser(h.NextHeader()) return &ipv6, nextParser } @@ -538,6 +550,123 @@ func (l *IPv6) merge(other Layer) error { return mergeLayer(l, other) } +// IPv6HopByHopOptionsExtHdr can construct and match an IPv6HopByHopOptions +// Extension Header. +type IPv6HopByHopOptionsExtHdr struct { + LayerBase + NextHeader *header.IPv6ExtensionHeaderIdentifier + Options []byte +} + +// IPv6DestinationOptionsExtHdr can construct and match an IPv6DestinationOptions +// Extension Header. +type IPv6DestinationOptionsExtHdr struct { + LayerBase + NextHeader *header.IPv6ExtensionHeaderIdentifier + Options []byte +} + +// ipv6OptionsExtHdrToBytes serializes an options extension header into bytes. +func ipv6OptionsExtHdrToBytes(nextHeader *header.IPv6ExtensionHeaderIdentifier, options []byte) []byte { + length := len(options) + 2 + bytes := make([]byte, length) + if nextHeader == nil { + bytes[0] = byte(header.IPv6NoNextHeaderIdentifier) + } else { + bytes[0] = byte(*nextHeader) + } + // ExtHdrLen field is the length of the extension header + // in 8-octet unit, ignoring the first 8 octets. + // https://tools.ietf.org/html/rfc2460#section-4.3 + // https://tools.ietf.org/html/rfc2460#section-4.6 + bytes[1] = uint8((length - 8) / 8) + copy(bytes[2:], options) + return bytes +} + +// IPv6ExtHdrIdent is a helper routine that allocates a new +// header.IPv6ExtensionHeaderIdentifier value to store v and returns a pointer +// to it. +func IPv6ExtHdrIdent(id header.IPv6ExtensionHeaderIdentifier) *header.IPv6ExtensionHeaderIdentifier { + return &id +} + +// ToBytes implements Layer.ToBytes +func (l *IPv6HopByHopOptionsExtHdr) ToBytes() ([]byte, error) { + return ipv6OptionsExtHdrToBytes(l.NextHeader, l.Options), nil +} + +// ToBytes implements Layer.ToBytes +func (l *IPv6DestinationOptionsExtHdr) ToBytes() ([]byte, error) { + return ipv6OptionsExtHdrToBytes(l.NextHeader, l.Options), nil +} + +// parseIPv6ExtHdr parses an IPv6 extension header and returns the NextHeader +// field, the rest of the payload and a parser function for the corresponding +// next extension header. +func parseIPv6ExtHdr(b []byte) (header.IPv6ExtensionHeaderIdentifier, []byte, layerParser) { + nextHeader := b[0] + // For HopByHop and Destination options extension headers, + // This field is the length of the extension header in + // 8-octet units, not including the first 8 octets. + // https://tools.ietf.org/html/rfc2460#section-4.3 + // https://tools.ietf.org/html/rfc2460#section-4.6 + length := b[1]*8 + 8 + data := b[2:length] + nextParser := nextIPv6PayloadParser(nextHeader) + return header.IPv6ExtensionHeaderIdentifier(nextHeader), data, nextParser +} + +// parseIPv6HopByHopOptionsExtHdr parses the bytes assuming that they start +// with an IPv6 HopByHop Options Extension Header. +func parseIPv6HopByHopOptionsExtHdr(b []byte) (Layer, layerParser) { + nextHeader, options, nextParser := parseIPv6ExtHdr(b) + return &IPv6HopByHopOptionsExtHdr{NextHeader: &nextHeader, Options: options}, nextParser +} + +// parseIPv6DestinationOptionsExtHdr parses the bytes assuming that they start +// with an IPv6 Destination Options Extension Header. +func parseIPv6DestinationOptionsExtHdr(b []byte) (Layer, layerParser) { + nextHeader, options, nextParser := parseIPv6ExtHdr(b) + return &IPv6DestinationOptionsExtHdr{NextHeader: &nextHeader, Options: options}, nextParser +} + +func (l *IPv6HopByHopOptionsExtHdr) length() int { + return len(l.Options) + 2 +} + +func (l *IPv6HopByHopOptionsExtHdr) match(other Layer) bool { + return equalLayer(l, other) +} + +// merge overrides the values in l with the values from other but only in fields +// where the value is not nil. +func (l *IPv6HopByHopOptionsExtHdr) merge(other Layer) error { + return mergeLayer(l, other) +} + +func (l *IPv6HopByHopOptionsExtHdr) String() string { + return stringLayer(l) +} + +func (l *IPv6DestinationOptionsExtHdr) length() int { + return len(l.Options) + 2 +} + +func (l *IPv6DestinationOptionsExtHdr) match(other Layer) bool { + return equalLayer(l, other) +} + +// merge overrides the values in l with the values from other but only in fields +// where the value is not nil. +func (l *IPv6DestinationOptionsExtHdr) merge(other Layer) error { + return mergeLayer(l, other) +} + +func (l *IPv6DestinationOptionsExtHdr) String() string { + return stringLayer(l) +} + // ICMPv6 can construct and match an ICMPv6 encapsulation. type ICMPv6 struct { LayerBase @@ -567,8 +696,15 @@ func (l *ICMPv6) ToBytes() ([]byte, error) { if l.Checksum != nil { h.SetChecksum(*l.Checksum) } else { - ipv6 := l.Prev().(*IPv6) - h.SetChecksum(header.ICMPv6Checksum(h, *ipv6.SrcAddr, *ipv6.DstAddr, buffer.VectorisedView{})) + // It is possible that the ICMPv6 header does not follow the IPv6 header + // immediately, there could be one or more extension headers in between. + // We need to search forward to find the IPv6 header. + for prev := l.Prev(); prev != nil; prev = prev.Prev() { + if ipv6, ok := prev.(*IPv6); ok { + h.SetChecksum(header.ICMPv6Checksum(h, *ipv6.SrcAddr, *ipv6.DstAddr, buffer.VectorisedView{})) + break + } + } } return h, nil } diff --git a/test/packetimpact/testbench/layers_test.go b/test/packetimpact/testbench/layers_test.go index c7f00e70d..382a983a1 100644 --- a/test/packetimpact/testbench/layers_test.go +++ b/test/packetimpact/testbench/layers_test.go @@ -505,3 +505,114 @@ func TestTCPOptions(t *testing.T) { }) } } + +func TestIPv6ExtHdrOptions(t *testing.T) { + for _, tt := range []struct { + description string + wantBytes []byte + wantLayers Layers + }{ + { + description: "IPv6/HopByHop", + wantBytes: []byte{ + // IPv6 Header + 0x60, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0xad, 0xbe, 0xef, + // HopByHop Options + 0x3b, 0x00, 0x05, 0x02, 0x00, 0x00, 0x01, 0x00, + }, + wantLayers: []Layer{ + &IPv6{ + SrcAddr: Address(tcpip.Address(net.ParseIP("::1"))), + DstAddr: Address(tcpip.Address(net.ParseIP("fe80::dead:beef"))), + }, + &IPv6HopByHopOptionsExtHdr{ + NextHeader: IPv6ExtHdrIdent(header.IPv6NoNextHeaderIdentifier), + Options: []byte{0x05, 0x02, 0x00, 0x00, 0x01, 0x00}, + }, + &Payload{ + Bytes: nil, + }, + }, + }, + { + description: "IPv6/HopByHop/Payload", + wantBytes: []byte{ + // IPv6 Header + 0x60, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0xad, 0xbe, 0xef, + // HopByHop Options + 0x3b, 0x00, 0x05, 0x02, 0x00, 0x00, 0x01, 0x00, + // Sample Data + 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x20, 0x44, 0x61, 0x74, 0x61, + }, + wantLayers: []Layer{ + &IPv6{ + SrcAddr: Address(tcpip.Address(net.ParseIP("::1"))), + DstAddr: Address(tcpip.Address(net.ParseIP("fe80::dead:beef"))), + }, + &IPv6HopByHopOptionsExtHdr{ + NextHeader: IPv6ExtHdrIdent(header.IPv6NoNextHeaderIdentifier), + Options: []byte{0x05, 0x02, 0x00, 0x00, 0x01, 0x00}, + }, + &Payload{ + Bytes: []byte("Sample Data"), + }, + }, + }, + { + description: "IPv6/HopByHop/Destination/ICMPv6", + wantBytes: []byte{ + // IPv6 Header + 0x60, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0xad, 0xbe, 0xef, + // HopByHop Options + 0x3c, 0x00, 0x05, 0x02, 0x00, 0x00, 0x01, 0x00, + // Destination Options + 0x3a, 0x00, 0x05, 0x02, 0x00, 0x00, 0x01, 0x00, + // ICMPv6 Param Problem + 0x04, 0x00, 0x5f, 0x98, 0x00, 0x00, 0x00, 0x06, + }, + wantLayers: []Layer{ + &IPv6{ + SrcAddr: Address(tcpip.Address(net.ParseIP("::1"))), + DstAddr: Address(tcpip.Address(net.ParseIP("fe80::dead:beef"))), + }, + &IPv6HopByHopOptionsExtHdr{ + NextHeader: IPv6ExtHdrIdent(header.IPv6DestinationOptionsExtHdrIdentifier), + Options: []byte{0x05, 0x02, 0x00, 0x00, 0x01, 0x00}, + }, + &IPv6DestinationOptionsExtHdr{ + NextHeader: IPv6ExtHdrIdent(header.IPv6ExtensionHeaderIdentifier(header.ICMPv6ProtocolNumber)), + Options: []byte{0x05, 0x02, 0x00, 0x00, 0x01, 0x00}, + }, + &ICMPv6{ + Type: ICMPv6Type(header.ICMPv6ParamProblem), + Code: Byte(0), + Checksum: Uint16(0x5f98), + NDPPayload: []byte{0x00, 0x00, 0x00, 0x06}, + }, + }, + }, + } { + t.Run(tt.description, func(t *testing.T) { + layers := parse(parseIPv6, tt.wantBytes) + if !layers.match(tt.wantLayers) { + t.Fatalf("match failed with diff: %s", layers.diff(tt.wantLayers)) + } + gotBytes, err := layers.ToBytes() + if err != nil { + t.Fatalf("ToBytes() failed on %s: %s", &layers, err) + } + if !bytes.Equal(tt.wantBytes, gotBytes) { + t.Fatalf("mismatching bytes, gotBytes: %x, wantBytes: %x", gotBytes, tt.wantBytes) + } + }) + } +} -- cgit v1.2.3 From 9cfc15497581824f1c6ba2b9f9ee653d0be0bc5a Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Fri, 26 Jun 2020 16:23:15 -0700 Subject: Require CAP_SYS_ADMIN in the root user namespace for TTY theft PiperOrigin-RevId: 318563543 --- pkg/sentry/kernel/thread_group.go | 3 ++- test/syscalls/linux/pty_root.cc | 22 ++++++++++++++++------ 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'test') diff --git a/pkg/sentry/kernel/thread_group.go b/pkg/sentry/kernel/thread_group.go index 52849f5b3..4dfd2c990 100644 --- a/pkg/sentry/kernel/thread_group.go +++ b/pkg/sentry/kernel/thread_group.go @@ -366,7 +366,8 @@ func (tg *ThreadGroup) SetControllingTTY(tty *TTY, arg int32) error { // terminal is stolen, and all processes that had it as controlling // terminal lose it." - tty_ioctl(4) if tty.tg != nil && tg.processGroup.session != tty.tg.processGroup.session { - if !auth.CredentialsFromContext(tg.leader).HasCapability(linux.CAP_SYS_ADMIN) || arg != 1 { + // Stealing requires CAP_SYS_ADMIN in the root user namespace. + if creds := auth.CredentialsFromContext(tg.leader); !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, creds.UserNamespace.Root()) || arg != 1 { return syserror.EPERM } // Steal the TTY away. Unlike TIOCNOTTY, don't send signals. diff --git a/test/syscalls/linux/pty_root.cc b/test/syscalls/linux/pty_root.cc index 14a4af980..1d7dbefdb 100644 --- a/test/syscalls/linux/pty_root.cc +++ b/test/syscalls/linux/pty_root.cc @@ -25,16 +25,26 @@ namespace gvisor { namespace testing { -// These tests should be run as root. namespace { +// StealTTY tests whether privileged processes can steal controlling terminals. +// If the stealing process has CAP_SYS_ADMIN in the root user namespace, the +// test ensures that stealing works. If it has non-root CAP_SYS_ADMIN, it +// ensures stealing fails. TEST(JobControlRootTest, StealTTY) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); - // Make this a session leader, which also drops the controlling terminal. - // In the gVisor test environment, this test will be run as the session - // leader already (as the sentry init process). + bool true_root = true; if (!IsRunningOnGvisor()) { + // If running in Linux, we may only have CAP_SYS_ADMIN in a non-root user + // namespace (i.e. we are not truly root). We use init_module as a proxy for + // whether we are true root, as it returns EPERM immediately. + ASSERT_THAT(syscall(SYS_init_module, nullptr, 0, nullptr), SyscallFails()); + true_root = errno != EPERM; + + // Make this a session leader, which also drops the controlling terminal. + // In the gVisor test environment, this test will be run as the session + // leader already (as the sentry init process). ASSERT_THAT(setsid(), SyscallSucceeds()); } @@ -53,8 +63,8 @@ TEST(JobControlRootTest, StealTTY) { ASSERT_THAT(setsid(), SyscallSucceeds()); // We shouldn't be able to steal the terminal with the wrong arg value. TEST_PCHECK(ioctl(slave.get(), TIOCSCTTY, 0)); - // We should be able to steal it here. - TEST_PCHECK(!ioctl(slave.get(), TIOCSCTTY, 1)); + // We should be able to steal it if we are true root. + TEST_PCHECK(true_root == !ioctl(slave.get(), TIOCSCTTY, 1)); _exit(0); } -- cgit v1.2.3 From aed71839769be3106896a534b315c128ca15e46b Mon Sep 17 00:00:00 2001 From: Zeling Feng Date: Fri, 26 Jun 2020 16:42:38 -0700 Subject: Packetimpact test for IPv6 unknown options action The Option Type identifiers are internally encoded such that their highest-order two bits specify the action that must be taken if the processing IPv6 node does not recognize the Option Type: 00 - skip over this option and continue processing the header. 01 - discard the packet. 10 - discard the packet and, regardless of whether or not the packet's Destination Address was a multicast address, send an ICMP Parameter Problem, Code 2, message to the packet's Source Address, pointing to the unrecognized Option Type. 11 - discard the packet and, only if the packet's Destination Address was not a multicast address, send an ICMP Parameter Problem, Code 2, message to the packet's Source Address, pointing to the unrecognized Option Type. PiperOrigin-RevId: 318566613 --- test/packetimpact/tests/BUILD | 13 ++ .../tests/ipv6_unknown_options_action_test.go | 187 +++++++++++++++++++++ 2 files changed, 200 insertions(+) create mode 100644 test/packetimpact/tests/ipv6_unknown_options_action_test.go (limited to 'test') diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 09e91b832..1c9f59df5 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -255,6 +255,19 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "ipv6_unknown_options_action", + srcs = ["ipv6_unknown_options_action_test.go"], + # TODO(b/159928940): Fix netstack then remove the line below. + expect_netstack_failure = True, + deps = [ + "//pkg/tcpip", + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + packetimpact_go_test( name = "udp_send_recv_dgram", srcs = ["udp_send_recv_dgram_test.go"], diff --git a/test/packetimpact/tests/ipv6_unknown_options_action_test.go b/test/packetimpact/tests/ipv6_unknown_options_action_test.go new file mode 100644 index 000000000..d301d8829 --- /dev/null +++ b/test/packetimpact/tests/ipv6_unknown_options_action_test.go @@ -0,0 +1,187 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ipv6_unknown_options_action_test + +import ( + "encoding/binary" + "flag" + "net" + "testing" + "time" + + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" + tb "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func init() { + tb.RegisterFlags(flag.CommandLine) +} + +func mkHopByHopOptionsExtHdr(optType byte) tb.Layer { + return &tb.IPv6HopByHopOptionsExtHdr{ + Options: []byte{optType, 0x04, 0x00, 0x00, 0x00, 0x00}, + } +} + +func mkDestinationOptionsExtHdr(optType byte) tb.Layer { + return &tb.IPv6DestinationOptionsExtHdr{ + Options: []byte{optType, 0x04, 0x00, 0x00, 0x00, 0x00}, + } +} + +func optionTypeFromAction(action header.IPv6OptionUnknownAction) byte { + return byte(action << 6) +} + +func TestIPv6UnknownOptionAction(t *testing.T) { + for _, tt := range []struct { + description string + mkExtHdr func(optType byte) tb.Layer + action header.IPv6OptionUnknownAction + multicastDst bool + wantICMPv6 bool + }{ + { + description: "0b00/hbh", + mkExtHdr: mkHopByHopOptionsExtHdr, + action: header.IPv6OptionUnknownActionSkip, + multicastDst: false, + wantICMPv6: false, + }, + { + description: "0b01/hbh", + mkExtHdr: mkHopByHopOptionsExtHdr, + action: header.IPv6OptionUnknownActionDiscard, + multicastDst: false, + wantICMPv6: false, + }, + { + description: "0b10/hbh/unicast", + mkExtHdr: mkHopByHopOptionsExtHdr, + action: header.IPv6OptionUnknownActionDiscardSendICMP, + multicastDst: false, + wantICMPv6: true, + }, + { + description: "0b10/hbh/multicast", + mkExtHdr: mkHopByHopOptionsExtHdr, + action: header.IPv6OptionUnknownActionDiscardSendICMP, + multicastDst: true, + wantICMPv6: true, + }, + { + description: "0b11/hbh/unicast", + mkExtHdr: mkHopByHopOptionsExtHdr, + action: header.IPv6OptionUnknownActionDiscardSendICMPNoMulticastDest, + multicastDst: false, + wantICMPv6: true, + }, + { + description: "0b11/hbh/multicast", + mkExtHdr: mkHopByHopOptionsExtHdr, + action: header.IPv6OptionUnknownActionDiscardSendICMPNoMulticastDest, + multicastDst: true, + wantICMPv6: false, + }, + { + description: "0b00/destination", + mkExtHdr: mkDestinationOptionsExtHdr, + action: header.IPv6OptionUnknownActionSkip, + multicastDst: false, + wantICMPv6: false, + }, + { + description: "0b01/destination", + mkExtHdr: mkDestinationOptionsExtHdr, + action: header.IPv6OptionUnknownActionDiscard, + multicastDst: false, + wantICMPv6: false, + }, + { + description: "0b10/destination/unicast", + mkExtHdr: mkDestinationOptionsExtHdr, + action: header.IPv6OptionUnknownActionDiscardSendICMP, + multicastDst: false, + wantICMPv6: true, + }, + { + description: "0b10/destination/multicast", + mkExtHdr: mkDestinationOptionsExtHdr, + action: header.IPv6OptionUnknownActionDiscardSendICMP, + multicastDst: true, + wantICMPv6: true, + }, + { + description: "0b11/destination/unicast", + mkExtHdr: mkDestinationOptionsExtHdr, + action: header.IPv6OptionUnknownActionDiscardSendICMPNoMulticastDest, + multicastDst: false, + wantICMPv6: true, + }, + { + description: "0b11/destination/multicast", + mkExtHdr: mkDestinationOptionsExtHdr, + action: header.IPv6OptionUnknownActionDiscardSendICMPNoMulticastDest, + multicastDst: true, + wantICMPv6: false, + }, + } { + t.Run(tt.description, func(t *testing.T) { + dut := tb.NewDUT(t) + defer dut.TearDown() + ipv6Conn := tb.NewIPv6Conn(t, tb.IPv6{}, tb.IPv6{}) + conn := (*tb.Connection)(&ipv6Conn) + defer ipv6Conn.Close() + + outgoingOverride := tb.Layers{} + if tt.multicastDst { + outgoingOverride = tb.Layers{&tb.IPv6{ + DstAddr: tb.Address(tcpip.Address(net.ParseIP("ff02::1"))), + }} + } + + outgoing := conn.CreateFrame(outgoingOverride, tt.mkExtHdr(optionTypeFromAction(tt.action))) + conn.SendFrame(outgoing) + ipv6Sent := outgoing[1:] + invokingPacket, err := ipv6Sent.ToBytes() + if err != nil { + t.Fatalf("failed to serialize the outgoing packet: %s", err) + } + icmpv6Payload := make([]byte, 4) + // The pointer in the ICMPv6 parameter problem message should point to + // the option type of the unknown option. In our test case, it is the + // first option in the extension header whose option type is 2 bytes + // after the IPv6 header (after NextHeader and ExtHdrLen). + binary.BigEndian.PutUint32(icmpv6Payload, header.IPv6MinimumSize+2) + icmpv6Payload = append(icmpv6Payload, invokingPacket...) + gotICMPv6, err := ipv6Conn.ExpectFrame(tb.Layers{ + &tb.Ether{}, + &tb.IPv6{}, + &tb.ICMPv6{ + Type: tb.ICMPv6Type(header.ICMPv6ParamProblem), + Code: tb.Byte(2), + NDPPayload: icmpv6Payload, + }, + }, time.Second) + if tt.wantICMPv6 && err != nil { + t.Fatalf("expected ICMPv6 Parameter Problem but got none: %s", err) + } + if !tt.wantICMPv6 && gotICMPv6 != nil { + t.Fatalf("expected no ICMPv6 Parameter Problem but got one: %s", gotICMPv6) + } + }) + } +} -- cgit v1.2.3 From 8dbeac53ce1b3c1cf4a5f2f0ccdd7196f4656fd8 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Fri, 26 Jun 2020 17:49:32 -0700 Subject: Implement SO_NO_CHECK socket option. SO_NO_CHECK is used to skip the UDP checksum generation on a TX socket (UDP checksum is optional on IPv4). Test: - TestNoChecksum - SoNoCheckOffByDefault (UdpSocketTest) - SoNoCheck (UdpSocketTest) Fixes #3055 PiperOrigin-RevId: 318575215 --- pkg/sentry/socket/netstack/netstack.go | 19 +++++ pkg/sentry/socket/socket.go | 1 - pkg/tcpip/checker/checker.go | 16 +++++ pkg/tcpip/tcpip.go | 100 +++++++++++++++------------ pkg/tcpip/transport/udp/endpoint.go | 24 +++++-- pkg/tcpip/transport/udp/udp_test.go | 24 +++++++ test/syscalls/linux/udp_socket_test_cases.cc | 38 ++++++++++ 7 files changed, 173 insertions(+), 49 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index c0b63a803..e7d2c83d7 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -1169,6 +1169,17 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam return int32(v), nil + case linux.SO_NO_CHECK: + if outLen < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + + v, err := ep.GetSockOptBool(tcpip.NoChecksumOption) + if err != nil { + return nil, syserr.TranslateNetstackError(err) + } + return boolToInt32(v), nil + default: socket.GetSockOptEmitUnimplementedEvent(t, name) } @@ -1720,6 +1731,14 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.OutOfBandInlineOption(v))) + case linux.SO_NO_CHECK: + if len(optVal) < sizeOfInt32 { + return syserr.ErrInvalidArgument + } + + v := usermem.ByteOrder.Uint32(optVal) + return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.NoChecksumOption, v != 0)) + case linux.SO_LINGER: if len(optVal) < linux.SizeOfLinger { return syserr.ErrInvalidArgument diff --git a/pkg/sentry/socket/socket.go b/pkg/sentry/socket/socket.go index 6580bd6e9..fcd7f9d7f 100644 --- a/pkg/sentry/socket/socket.go +++ b/pkg/sentry/socket/socket.go @@ -407,7 +407,6 @@ func emitUnimplementedEvent(t *kernel.Task, name int) { linux.SO_MARK, linux.SO_MAX_PACING_RATE, linux.SO_NOFCS, - linux.SO_NO_CHECK, linux.SO_OOBINLINE, linux.SO_PASSCRED, linux.SO_PASSSEC, diff --git a/pkg/tcpip/checker/checker.go b/pkg/tcpip/checker/checker.go index c1745ba6a..ee264b726 100644 --- a/pkg/tcpip/checker/checker.go +++ b/pkg/tcpip/checker/checker.go @@ -320,6 +320,22 @@ func DstPort(port uint16) TransportChecker { } } +// NoChecksum creates a checker that checks if the checksum is zero. +func NoChecksum(noChecksum bool) TransportChecker { + return func(t *testing.T, h header.Transport) { + t.Helper() + + udp, ok := h.(header.UDP) + if !ok { + return + } + + if b := udp.Checksum() == 0; b != noChecksum { + t.Errorf("bad checksum state, got %t, want %t", b, noChecksum) + } + } +} + // SeqNum creates a checker that checks the sequence number. func SeqNum(seq uint32) TransportChecker { return func(t *testing.T, h header.Transport) { diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 4d45dcc42..2be1c107a 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -585,59 +585,68 @@ type WriteOptions struct { type SockOptBool int const ( - // BroadcastOption is used by SetSockOpt/GetSockOpt to specify whether - // datagram sockets are allowed to send packets to a broadcast address. + // BroadcastOption is used by SetSockOptBool/GetSockOptBool to specify + // whether datagram sockets are allowed to send packets to a broadcast + // address. BroadcastOption SockOptBool = iota - // CorkOption is used by SetSockOpt/GetSockOpt to specify if data should be - // held until segments are full by the TCP transport protocol. + // CorkOption is used by SetSockOptBool/GetSockOptBool to specify if + // data should be held until segments are full by the TCP transport + // protocol. CorkOption - // DelayOption is used by SetSockOpt/GetSockOpt to specify if data - // should be sent out immediately by the transport protocol. For TCP, - // it determines if the Nagle algorithm is on or off. + // DelayOption is used by SetSockOptBool/GetSockOptBool to specify if + // data should be sent out immediately by the transport protocol. For + // TCP, it determines if the Nagle algorithm is on or off. DelayOption - // KeepaliveEnabledOption is used by SetSockOpt/GetSockOpt to specify whether - // TCP keepalive is enabled for this socket. + // KeepaliveEnabledOption is used by SetSockOptBool/GetSockOptBool to + // specify whether TCP keepalive is enabled for this socket. KeepaliveEnabledOption - // MulticastLoopOption is used by SetSockOpt/GetSockOpt to specify whether - // multicast packets sent over a non-loopback interface will be looped back. + // MulticastLoopOption is used by SetSockOptBool/GetSockOptBool to + // specify whether multicast packets sent over a non-loopback interface + // will be looped back. MulticastLoopOption - // PasscredOption is used by SetSockOpt/GetSockOpt to specify whether - // SCM_CREDENTIALS socket control messages are enabled. + // NoChecksumOption is used by SetSockOptBool/GetSockOptBool to specify + // whether UDP checksum is disabled for this socket. + NoChecksumOption + + // PasscredOption is used by SetSockOptBool/GetSockOptBool to specify + // whether SCM_CREDENTIALS socket control messages are enabled. // // Only supported on Unix sockets. PasscredOption - // QuickAckOption is stubbed out in SetSockOpt/GetSockOpt. + // QuickAckOption is stubbed out in SetSockOptBool/GetSockOptBool. QuickAckOption - // ReceiveTClassOption is used by SetSockOpt/GetSockOpt to specify if the - // IPV6_TCLASS ancillary message is passed with incoming packets. + // ReceiveTClassOption is used by SetSockOptBool/GetSockOptBool to + // specify if the IPV6_TCLASS ancillary message is passed with incoming + // packets. ReceiveTClassOption - // ReceiveTOSOption is used by SetSockOpt/GetSockOpt to specify if the TOS - // ancillary message is passed with incoming packets. + // ReceiveTOSOption is used by SetSockOptBool/GetSockOptBool to specify + // if the TOS ancillary message is passed with incoming packets. ReceiveTOSOption - // ReceiveIPPacketInfoOption is used by {G,S}etSockOptBool to specify - // if more inforamtion is provided with incoming packets such - // as interface index and address. + // ReceiveIPPacketInfoOption is used by SetSockOptBool/GetSockOptBool to + // specify if more inforamtion is provided with incoming packets such as + // interface index and address. ReceiveIPPacketInfoOption - // ReuseAddressOption is used by SetSockOpt/GetSockOpt to specify whether Bind() - // should allow reuse of local address. + // ReuseAddressOption is used by SetSockOptBool/GetSockOptBool to + // specify whether Bind() should allow reuse of local address. ReuseAddressOption - // ReusePortOption is used by SetSockOpt/GetSockOpt to permit multiple sockets - // to be bound to an identical socket address. + // ReusePortOption is used by SetSockOptBool/GetSockOptBool to permit + // multiple sockets to be bound to an identical socket address. ReusePortOption - // V6OnlyOption is used by {G,S}etSockOptBool to specify whether an IPv6 - // socket is to be restricted to sending and receiving IPv6 packets only. + // V6OnlyOption is used by SetSockOptBool/GetSockOptBool to specify + // whether an IPv6 socket is to be restricted to sending and receiving + // IPv6 packets only. V6OnlyOption ) @@ -645,25 +654,27 @@ const ( type SockOptInt int const ( - // KeepaliveCountOption is used by SetSockOpt/GetSockOpt to specify the number - // of un-ACKed TCP keepalives that will be sent before the connection is - // closed. + // KeepaliveCountOption is used by SetSockOptInt/GetSockOptInt to + // specify the number of un-ACKed TCP keepalives that will be sent + // before the connection is closed. KeepaliveCountOption SockOptInt = iota - // IPv4TOSOption is used by SetSockOpt/GetSockOpt to specify TOS + // IPv4TOSOption is used by SetSockOptInt/GetSockOptInt to specify TOS // for all subsequent outgoing IPv4 packets from the endpoint. IPv4TOSOption - // IPv6TrafficClassOption is used by SetSockOpt/GetSockOpt to specify TOS - // for all subsequent outgoing IPv6 packets from the endpoint. + // IPv6TrafficClassOption is used by SetSockOptInt/GetSockOptInt to + // specify TOS for all subsequent outgoing IPv6 packets from the + // endpoint. IPv6TrafficClassOption - // MaxSegOption is used by SetSockOpt/GetSockOpt to set/get the current - // Maximum Segment Size(MSS) value as specified using the TCP_MAXSEG option. + // MaxSegOption is used by SetSockOptInt/GetSockOptInt to set/get the + // current Maximum Segment Size(MSS) value as specified using the + // TCP_MAXSEG option. MaxSegOption - // MulticastTTLOption is used by SetSockOpt/GetSockOpt to control the default - // TTL value for multicast messages. The default is 1. + // MulticastTTLOption is used by SetSockOptInt/GetSockOptInt to control + // the default TTL value for multicast messages. The default is 1. MulticastTTLOption // ReceiveQueueSizeOption is used in GetSockOptInt to specify that the @@ -682,21 +693,22 @@ const ( // number of unread bytes in the output buffer should be returned. SendQueueSizeOption - // TTLOption is used by SetSockOpt/GetSockOpt to control the default TTL/hop - // limit value for unicast messages. The default is protocol specific. + // TTLOption is used by SetSockOptInt/GetSockOptInt to control the + // default TTL/hop limit value for unicast messages. The default is + // protocol specific. // // A zero value indicates the default. TTLOption - // TCPSynCountOption is used by SetSockOpt/GetSockOpt to specify the number of - // SYN retransmits that TCP should send before aborting the attempt to - // connect. It cannot exceed 255. + // TCPSynCountOption is used by SetSockOptInt/GetSockOptInt to specify + // the number of SYN retransmits that TCP should send before aborting + // the attempt to connect. It cannot exceed 255. // // NOTE: This option is currently only stubbed out and is no-op. TCPSynCountOption - // TCPWindowClampOption is used by SetSockOpt/GetSockOpt to bound the size - // of the advertised window to this value. + // TCPWindowClampOption is used by SetSockOptInt/GetSockOptInt to bound + // the size of the advertised window to this value. // // NOTE: This option is currently only stubed out and is a no-op TCPWindowClampOption diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 8bdc1ee1f..cae29fbff 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -109,6 +109,7 @@ type endpoint struct { portFlags ports.Flags bindToDevice tcpip.NICID broadcast bool + noChecksum bool lastErrorMu sync.Mutex `state:"nosave"` lastError *tcpip.Error `state:".(string)"` @@ -529,7 +530,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c useDefaultTTL = false } - if err := sendUDP(route, buffer.View(v).ToVectorisedView(), e.ID.LocalPort, dstPort, ttl, useDefaultTTL, e.sendTOS, e.owner); err != nil { + if err := sendUDP(route, buffer.View(v).ToVectorisedView(), e.ID.LocalPort, dstPort, ttl, useDefaultTTL, e.sendTOS, e.owner, e.noChecksum); err != nil { return 0, nil, err } return int64(len(v)), nil, nil @@ -553,6 +554,11 @@ func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error { e.multicastLoop = v e.mu.Unlock() + case tcpip.NoChecksumOption: + e.mu.Lock() + e.noChecksum = v + e.mu.Unlock() + case tcpip.ReceiveTOSOption: e.mu.Lock() e.receiveTOS = v @@ -825,6 +831,12 @@ func (e *endpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) { e.mu.RUnlock() return v, nil + case tcpip.NoChecksumOption: + e.mu.RLock() + v := e.noChecksum + e.mu.RUnlock() + return v, nil + case tcpip.ReceiveTOSOption: e.mu.RLock() v := e.receiveTOS @@ -959,7 +971,7 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { // sendUDP sends a UDP segment via the provided network endpoint and under the // provided identity. -func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort uint16, ttl uint8, useDefaultTTL bool, tos uint8, owner tcpip.PacketOwner) *tcpip.Error { +func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort uint16, ttl uint8, useDefaultTTL bool, tos uint8, owner tcpip.PacketOwner, noChecksum bool) *tcpip.Error { // Allocate a buffer for the UDP header. hdr := buffer.NewPrependable(header.UDPMinimumSize + int(r.MaxHeaderLength())) @@ -973,8 +985,12 @@ func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort u Length: length, }) - // Only calculate the checksum if offloading isn't supported. - if r.Capabilities()&stack.CapabilityTXChecksumOffload == 0 { + // Set the checksum field unless TX checksum offload is enabled. + // On IPv4, UDP checksum is optional, and a zero value indicates the + // transmitter skipped the checksum generation (RFC768). + // On IPv6, UDP checksum is not optional (RFC2460 Section 8.1). + if r.Capabilities()&stack.CapabilityTXChecksumOffload == 0 && + (!noChecksum || r.NetProto == header.IPv6ProtocolNumber) { xsum := r.PseudoHeaderChecksum(ProtocolNumber, length) for _, v := range data.Views() { xsum = header.Checksum(v, xsum) diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go index ff9f60cf9..db59eb5a0 100644 --- a/pkg/tcpip/transport/udp/udp_test.go +++ b/pkg/tcpip/transport/udp/udp_test.go @@ -1251,6 +1251,30 @@ func TestWriteIncrementsPacketsSent(t *testing.T) { } } +func TestNoChecksum(t *testing.T) { + for _, flow := range []testFlow{unicastV4, unicastV6} { + t.Run(fmt.Sprintf("flow:%s", flow), func(t *testing.T) { + c := newDualTestContext(t, defaultMTU) + defer c.cleanup() + + c.createEndpointForFlow(flow) + + // Disable the checksum generation. + if err := c.ep.SetSockOptBool(tcpip.NoChecksumOption, true); err != nil { + t.Fatalf("SetSockOptBool failed: %s", err) + } + // This option is effective on IPv4 only. + testWrite(c, flow, checker.UDP(checker.NoChecksum(flow.isV4()))) + + // Enable the checksum generation. + if err := c.ep.SetSockOptBool(tcpip.NoChecksumOption, false); err != nil { + t.Fatalf("SetSockOptBool failed: %s", err) + } + testWrite(c, flow, checker.UDP(checker.NoChecksum(false))) + }) + } +} + func TestTTL(t *testing.T) { for _, flow := range []testFlow{unicastV4, unicastV4in6, unicastV6, unicastV6Only, multicastV4, multicastV4in6, multicastV6, broadcast, broadcastIn6} { t.Run(fmt.Sprintf("flow:%s", flow), func(t *testing.T) { diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc index cc1db3de8..1d13432ca 100644 --- a/test/syscalls/linux/udp_socket_test_cases.cc +++ b/test/syscalls/linux/udp_socket_test_cases.cc @@ -1239,6 +1239,44 @@ TEST_P(UdpSocketTest, FIONREADZeroLengthWriteShutdown) { EXPECT_EQ(n, 0); } +TEST_P(UdpSocketTest, SoNoCheckOffByDefault) { + // TODO(gvisor.dev/issue/1202): SO_NO_CHECK socket option not supported by + // hostinet. + SKIP_IF(IsRunningWithHostinet()); + + int v = -1; + socklen_t optlen = sizeof(v); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_NO_CHECK, &v, &optlen), + SyscallSucceeds()); + ASSERT_EQ(v, kSockOptOff); + ASSERT_EQ(optlen, sizeof(v)); +} + +TEST_P(UdpSocketTest, SoNoCheck) { + // TODO(gvisor.dev/issue/1202): SO_NO_CHECK socket option not supported by + // hostinet. + SKIP_IF(IsRunningWithHostinet()); + + int v = kSockOptOn; + socklen_t optlen = sizeof(v); + ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_NO_CHECK, &v, optlen), + SyscallSucceeds()); + v = -1; + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_NO_CHECK, &v, &optlen), + SyscallSucceeds()); + ASSERT_EQ(v, kSockOptOn); + ASSERT_EQ(optlen, sizeof(v)); + + v = kSockOptOff; + ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_NO_CHECK, &v, optlen), + SyscallSucceeds()); + v = -1; + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_NO_CHECK, &v, &optlen), + SyscallSucceeds()); + ASSERT_EQ(v, kSockOptOff); + ASSERT_EQ(optlen, sizeof(v)); +} + TEST_P(UdpSocketTest, SoTimestampOffByDefault) { // TODO(gvisor.dev/issue/1202): SO_TIMESTAMP socket option not supported by // hostinet. -- cgit v1.2.3 From 66d1665441461a5226ba0c884e22888d58f393b6 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Fri, 26 Jun 2020 19:04:59 -0700 Subject: IPv6 raw sockets. Needed for ip6tables. IPv6 raw sockets never include the IPv6 header. PiperOrigin-RevId: 318582989 --- pkg/tcpip/transport/raw/endpoint.go | 61 ++- test/syscalls/BUILD | 2 +- test/syscalls/linux/BUILD | 4 +- test/syscalls/linux/raw_socket.cc | 819 +++++++++++++++++++++++++++++++++ test/syscalls/linux/raw_socket_ipv4.cc | 771 ------------------------------- 5 files changed, 848 insertions(+), 809 deletions(-) create mode 100644 test/syscalls/linux/raw_socket.cc delete mode 100644 test/syscalls/linux/raw_socket_ipv4.cc (limited to 'test') diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go index dd514d397..766c7648e 100644 --- a/pkg/tcpip/transport/raw/endpoint.go +++ b/pkg/tcpip/transport/raw/endpoint.go @@ -94,7 +94,7 @@ func NewEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, trans } func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue, associated bool) (tcpip.Endpoint, *tcpip.Error) { - if netProto != header.IPv4ProtocolNumber { + if netProto != header.IPv4ProtocolNumber && netProto != header.IPv6ProtocolNumber { return nil, tcpip.ErrUnknownProtocol } @@ -215,6 +215,11 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess // Write implements tcpip.Endpoint.Write. func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-chan struct{}, *tcpip.Error) { + // We can create, but not write to, unassociated IPv6 endpoints. + if !e.associated && e.TransportEndpointInfo.NetProto == header.IPv6ProtocolNumber { + return 0, nil, tcpip.ErrInvalidOptionValue + } + n, ch, err := e.write(p, opts) switch err { case nil: @@ -319,12 +324,6 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c return 0, nil, tcpip.ErrNoRoute } - // We don't support IPv6 yet, so this has to be an IPv4 address. - if len(opts.To.Addr) != header.IPv4AddressSize { - e.mu.RUnlock() - return 0, nil, tcpip.ErrInvalidEndpointState - } - // Find the route to the destination. If BindAddress is 0, // FindRoute will choose an appropriate source address. route, err := e.stack.FindRoute(nic, e.BindAddr, opts.To.Addr, e.NetProto, false) @@ -354,17 +353,13 @@ func (e *endpoint) finishWrite(payloadBytes []byte, route *stack.Route) (int64, } } - switch e.NetProto { - case header.IPv4ProtocolNumber: - if !e.associated { - if err := route.WriteHeaderIncludedPacket(&stack.PacketBuffer{ - Data: buffer.View(payloadBytes).ToVectorisedView(), - }); err != nil { - return 0, nil, err - } - break + if !e.associated { + if err := route.WriteHeaderIncludedPacket(&stack.PacketBuffer{ + Data: buffer.View(payloadBytes).ToVectorisedView(), + }); err != nil { + return 0, nil, err } - + } else { hdr := buffer.NewPrependable(len(payloadBytes) + int(route.MaxHeaderLength())) if err := route.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: e.TransProto, TTL: route.DefaultTTL(), TOS: stack.DefaultTOS}, &stack.PacketBuffer{ Header: hdr, @@ -373,9 +368,6 @@ func (e *endpoint) finishWrite(payloadBytes []byte, route *stack.Route) (int64, }); err != nil { return 0, nil, err } - - default: - return 0, nil, tcpip.ErrUnknownProtocol } return int64(len(payloadBytes)), nil, nil @@ -400,11 +392,6 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { return tcpip.ErrInvalidEndpointState } - // We don't support IPv6 yet. - if len(addr.Addr) != header.IPv4AddressSize { - return tcpip.ErrInvalidEndpointState - } - nic := addr.NIC if e.bound { if e.BindNICID == 0 { @@ -470,14 +457,8 @@ func (e *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error { e.mu.Lock() defer e.mu.Unlock() - // Callers must provide an IPv4 address or no network address (for - // binding to a NIC, but not an address). - if len(addr.Addr) != 0 && len(addr.Addr) != 4 { - return tcpip.ErrInvalidEndpointState - } - // If a local address was specified, verify that it's valid. - if len(addr.Addr) == header.IPv4AddressSize && e.stack.CheckLocalAddress(addr.NIC, e.NetProto, addr.Addr) == 0 { + if e.stack.CheckLocalAddress(addr.NIC, e.NetProto, addr.Addr) == 0 { return tcpip.ErrBadLocalAddress } @@ -680,9 +661,19 @@ func (e *endpoint) HandlePacket(route *stack.Route, pkt *stack.PacketBuffer) { }, } - headers := append(buffer.View(nil), pkt.NetworkHeader...) - headers = append(headers, pkt.TransportHeader...) - combinedVV := headers.ToVectorisedView() + // Raw IPv4 endpoints return the IP header, but IPv6 endpoints do not. + // We copy headers' underlying bytes because pkt.*Header may point to + // the middle of a slice, and another struct may point to the "outer" + // slice. Save/restore doesn't support overlapping slices and will fail. + var combinedVV buffer.VectorisedView + if e.TransportEndpointInfo.NetProto == header.IPv4ProtocolNumber { + headers := make(buffer.View, 0, len(pkt.NetworkHeader)+len(pkt.TransportHeader)) + headers = append(headers, pkt.NetworkHeader...) + headers = append(headers, pkt.TransportHeader...) + combinedVV = headers.ToVectorisedView() + } else { + combinedVV = append(buffer.View(nil), pkt.TransportHeader...).ToVectorisedView() + } combinedVV.Append(pkt.Data) packet.data = combinedVV packet.timestampNS = e.stack.NowNanoseconds() diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 23123006f..c4fff0ac8 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -547,7 +547,7 @@ syscall_test( ) syscall_test( - test = "//test/syscalls/linux:raw_socket_ipv4_test", + test = "//test/syscalls/linux:raw_socket_test", vfs2 = "True", ) diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 7282d675e..270b9e4c4 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1800,9 +1800,9 @@ cc_binary( ) cc_binary( - name = "raw_socket_ipv4_test", + name = "raw_socket_test", testonly = 1, - srcs = ["raw_socket_ipv4.cc"], + srcs = ["raw_socket.cc"], linkstatic = 1, deps = [ ":socket_test_util", diff --git a/test/syscalls/linux/raw_socket.cc b/test/syscalls/linux/raw_socket.cc new file mode 100644 index 000000000..05c4ed03f --- /dev/null +++ b/test/syscalls/linux/raw_socket.cc @@ -0,0 +1,819 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/test_util.h" + +// Note: in order to run these tests, /proc/sys/net/ipv4/ping_group_range will +// need to be configured to let the superuser create ping sockets (see icmp(7)). + +namespace gvisor { +namespace testing { + +namespace { + +// Fixture for tests parameterized by protocol. +class RawSocketTest : public ::testing::TestWithParam> { + protected: + // Creates a socket to be used in tests. + void SetUp() override; + + // Closes the socket created by SetUp(). + void TearDown() override; + + // Sends buf via s_. + void SendBuf(const char* buf, int buf_len); + + // Reads from s_ into recv_buf. + void ReceiveBuf(char* recv_buf, size_t recv_buf_len); + + void ReceiveBufFrom(int sock, char* recv_buf, size_t recv_buf_len); + + int Protocol() { return std::get<0>(GetParam()); } + + int Family() { return std::get<1>(GetParam()); } + + socklen_t AddrLen() { + if (Family() == AF_INET) { + return sizeof(sockaddr_in); + } + return sizeof(sockaddr_in6); + } + + int HdrLen() { + if (Family() == AF_INET) { + return sizeof(struct iphdr); + } + // IPv6 raw sockets don't include the header. + return 0; + } + + // The socket used for both reading and writing. + int s_; + + // The loopback address. + struct sockaddr_storage addr_; +}; + +void RawSocketTest::SetUp() { + if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + ASSERT_THAT(socket(Family(), SOCK_RAW, Protocol()), + SyscallFailsWithErrno(EPERM)); + GTEST_SKIP(); + } + + ASSERT_THAT(s_ = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds()); + + addr_ = {}; + + // We don't set ports because raw sockets don't have a notion of ports. + if (Family() == AF_INET) { + struct sockaddr_in* sin = reinterpret_cast(&addr_); + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + } else { + struct sockaddr_in6* sin6 = reinterpret_cast(&addr_); + sin6->sin6_family = AF_INET6; + sin6->sin6_addr = in6addr_loopback; + } +} + +void RawSocketTest::TearDown() { + // TearDown will be run even if we skip the test. + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { + EXPECT_THAT(close(s_), SyscallSucceeds()); + } +} + +// We should be able to create multiple raw sockets for the same protocol. +// BasicRawSocket::Setup creates the first one, so we only have to create one +// more here. +TEST_P(RawSocketTest, MultipleCreation) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int s2; + ASSERT_THAT(s2 = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds()); + + ASSERT_THAT(close(s2), SyscallSucceeds()); +} + +// Test that shutting down an unconnected socket fails. +TEST_P(RawSocketTest, FailShutdownWithoutConnect) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT(shutdown(s_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN)); + ASSERT_THAT(shutdown(s_, SHUT_RD), SyscallFailsWithErrno(ENOTCONN)); +} + +// Shutdown is a no-op for raw sockets (and datagram sockets in general). +TEST_P(RawSocketTest, ShutdownWriteNoop) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + connect(s_, reinterpret_cast(&addr_), AddrLen()), + SyscallSucceeds()); + ASSERT_THAT(shutdown(s_, SHUT_WR), SyscallSucceeds()); + + // Arbitrary. + constexpr char kBuf[] = "noop"; + ASSERT_THAT(RetryEINTR(write)(s_, kBuf, sizeof(kBuf)), + SyscallSucceedsWithValue(sizeof(kBuf))); +} + +// Shutdown is a no-op for raw sockets (and datagram sockets in general). +TEST_P(RawSocketTest, ShutdownReadNoop) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + connect(s_, reinterpret_cast(&addr_), AddrLen()), + SyscallSucceeds()); + ASSERT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); + + // Arbitrary. + constexpr char kBuf[] = "gdg"; + ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); + + std::vector c(sizeof(kBuf) + HdrLen()); + ASSERT_THAT(read(s_, c.data(), c.size()), SyscallSucceedsWithValue(c.size())); +} + +// Test that listen() fails. +TEST_P(RawSocketTest, FailListen) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT(listen(s_, 1), SyscallFailsWithErrno(ENOTSUP)); +} + +// Test that accept() fails. +TEST_P(RawSocketTest, FailAccept) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + struct sockaddr saddr; + socklen_t addrlen; + ASSERT_THAT(accept(s_, &saddr, &addrlen), SyscallFailsWithErrno(ENOTSUP)); +} + +// Test that getpeername() returns nothing before connect(). +TEST_P(RawSocketTest, FailGetPeerNameBeforeConnect) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + struct sockaddr saddr; + socklen_t addrlen = sizeof(saddr); + ASSERT_THAT(getpeername(s_, &saddr, &addrlen), + SyscallFailsWithErrno(ENOTCONN)); +} + +// Test that getpeername() returns something after connect(). +TEST_P(RawSocketTest, GetPeerName) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + connect(s_, reinterpret_cast(&addr_), AddrLen()), + SyscallSucceeds()); + struct sockaddr saddr; + socklen_t addrlen = sizeof(saddr); + ASSERT_THAT(getpeername(s_, &saddr, &addrlen), + SyscallFailsWithErrno(ENOTCONN)); + ASSERT_GT(addrlen, 0); +} + +// Test that the socket is writable immediately. +TEST_P(RawSocketTest, PollWritableImmediately) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + struct pollfd pfd = {}; + pfd.fd = s_; + pfd.events = POLLOUT; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 10000), SyscallSucceedsWithValue(1)); +} + +// Test that the socket isn't readable before receiving anything. +TEST_P(RawSocketTest, PollNotReadableInitially) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Try to receive data with MSG_DONTWAIT, which returns immediately if there's + // nothing to be read. + char buf[117]; + ASSERT_THAT(RetryEINTR(recv)(s_, buf, sizeof(buf), MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); +} + +// Test that the socket becomes readable once something is written to it. +TEST_P(RawSocketTest, PollTriggeredOnWrite) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Write something so that there's data to be read. + // Arbitrary. + constexpr char kBuf[] = "JP5"; + ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); + + struct pollfd pfd = {}; + pfd.fd = s_; + pfd.events = POLLIN; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 10000), SyscallSucceedsWithValue(1)); +} + +// Test that we can connect() to a valid IP (loopback). +TEST_P(RawSocketTest, ConnectToLoopback) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + connect(s_, reinterpret_cast(&addr_), AddrLen()), + SyscallSucceeds()); +} + +// Test that calling send() without connect() fails. +TEST_P(RawSocketTest, SendWithoutConnectFails) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Arbitrary. + constexpr char kBuf[] = "Endgame was good"; + ASSERT_THAT(send(s_, kBuf, sizeof(kBuf), 0), + SyscallFailsWithErrno(EDESTADDRREQ)); +} + +// Bind to localhost. +TEST_P(RawSocketTest, BindToLocalhost) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + bind(s_, reinterpret_cast(&addr_), AddrLen()), + SyscallSucceeds()); +} + +// Bind to a different address. +TEST_P(RawSocketTest, BindToInvalid) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + struct sockaddr_storage bind_addr = addr_; + if (Family() == AF_INET) { + struct sockaddr_in* sin = reinterpret_cast(&bind_addr); + sin->sin_addr = {1}; // 1.0.0.0 - An address that we can't bind to. + } else { + struct sockaddr_in6* sin6 = + reinterpret_cast(&bind_addr); + memset(&sin6->sin6_addr.s6_addr, 0, sizeof(sin6->sin6_addr.s6_addr)); + sin6->sin6_addr.s6_addr[0] = 1; // 1: - An address that we can't bind to. + } + ASSERT_THAT(bind(s_, reinterpret_cast(&bind_addr), + AddrLen()), SyscallFailsWithErrno(EADDRNOTAVAIL)); +} + +// Send and receive an packet. +TEST_P(RawSocketTest, SendAndReceive) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Arbitrary. + constexpr char kBuf[] = "TB12"; + ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); + + // Receive the packet and make sure it's identical. + std::vector recv_buf(sizeof(kBuf) + HdrLen()); + ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); + EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0); +} + +// We should be able to create multiple raw sockets for the same protocol and +// receive the same packet on both. +TEST_P(RawSocketTest, MultipleSocketReceive) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int s2; + ASSERT_THAT(s2 = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds()); + + // Arbitrary. + constexpr char kBuf[] = "TB10"; + ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); + + // Receive it on socket 1. + std::vector recv_buf1(sizeof(kBuf) + HdrLen()); + ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf1.data(), recv_buf1.size())); + + // Receive it on socket 2. + std::vector recv_buf2(sizeof(kBuf) + HdrLen()); + ASSERT_NO_FATAL_FAILURE(ReceiveBufFrom(s2, recv_buf2.data(), + recv_buf2.size())); + + EXPECT_EQ(memcmp(recv_buf1.data() + HdrLen(), + recv_buf2.data() + HdrLen(), sizeof(kBuf)), + 0); + + ASSERT_THAT(close(s2), SyscallSucceeds()); +} + +// Test that connect sends packets to the right place. +TEST_P(RawSocketTest, SendAndReceiveViaConnect) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + connect(s_, reinterpret_cast(&addr_), AddrLen()), + SyscallSucceeds()); + + // Arbitrary. + constexpr char kBuf[] = "JH4"; + ASSERT_THAT(send(s_, kBuf, sizeof(kBuf), 0), + SyscallSucceedsWithValue(sizeof(kBuf))); + + // Receive the packet and make sure it's identical. + std::vector recv_buf(sizeof(kBuf) + HdrLen()); + ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); + EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0); +} + +// Bind to localhost, then send and receive packets. +TEST_P(RawSocketTest, BindSendAndReceive) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + bind(s_, reinterpret_cast(&addr_), AddrLen()), + SyscallSucceeds()); + + // Arbitrary. + constexpr char kBuf[] = "DR16"; + ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); + + // Receive the packet and make sure it's identical. + std::vector recv_buf(sizeof(kBuf) + HdrLen()); + ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); + EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0); +} + +// Bind and connect to localhost and send/receive packets. +TEST_P(RawSocketTest, BindConnectSendAndReceive) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + bind(s_, reinterpret_cast(&addr_), AddrLen()), + SyscallSucceeds()); + ASSERT_THAT( + connect(s_, reinterpret_cast(&addr_), AddrLen()), + SyscallSucceeds()); + + // Arbitrary. + constexpr char kBuf[] = "DG88"; + ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); + + // Receive the packet and make sure it's identical. + std::vector recv_buf(sizeof(kBuf) + HdrLen()); + ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); + EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0); +} + +// Check that setting SO_RCVBUF below min is clamped to the minimum +// receive buffer size. +TEST_P(RawSocketTest, SetSocketRecvBufBelowMin) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Discover minimum receive buf size by trying to set it to zero. + // See: + // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820 + constexpr int kRcvBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + int min = 0; + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + + // Linux doubles the value so let's use a value that when doubled will still + // be smaller than min. + int below_min = min / 2 - 1; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &below_min, sizeof(below_min)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), + SyscallSucceeds()); + + ASSERT_EQ(min, val); +} + +// Check that setting SO_RCVBUF above max is clamped to the maximum +// receive buffer size. +TEST_P(RawSocketTest, SetSocketRecvBufAboveMax) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Discover max buf size by trying to set the largest possible buffer size. + constexpr int kRcvBufSz = 0xffffffff; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + int max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len), + SyscallSucceeds()); + + int above_max = max + 1; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &above_max, sizeof(above_max)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), + SyscallSucceeds()); + ASSERT_EQ(max, val); +} + +// Check that setting SO_RCVBUF min <= kRcvBufSz <= max is honored. +TEST_P(RawSocketTest, SetSocketRecvBuf) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int max = 0; + int min = 0; + { + // Discover max buf size by trying to set a really large buffer size. + constexpr int kRcvBufSz = 0xffffffff; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len), + SyscallSucceeds()); + } + + { + // Discover minimum buffer size by trying to set a zero size receive buffer + // size. + // See: + // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820 + constexpr int kRcvBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + } + + int quarter_sz = min + (max - min) / 4; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &quarter_sz, sizeof(quarter_sz)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), + SyscallSucceeds()); + + // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF. + // TODO(gvisor.dev/issue/2926): Remove when Netstack matches linux behavior. + if (!IsRunningOnGvisor()) { + quarter_sz *= 2; + } + ASSERT_EQ(quarter_sz, val); +} + +// Check that setting SO_SNDBUF below min is clamped to the minimum +// receive buffer size. +TEST_P(RawSocketTest, SetSocketSendBufBelowMin) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Discover minimum buffer size by trying to set it to zero. + constexpr int kSndBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), + SyscallSucceeds()); + + int min = 0; + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len), + SyscallSucceeds()); + + // Linux doubles the value so let's use a value that when doubled will still + // be smaller than min. + int below_min = min / 2 - 1; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &below_min, sizeof(below_min)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), + SyscallSucceeds()); + + ASSERT_EQ(min, val); +} + +// Check that setting SO_SNDBUF above max is clamped to the maximum +// send buffer size. +TEST_P(RawSocketTest, SetSocketSendBufAboveMax) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Discover maximum buffer size by trying to set it to a large value. + constexpr int kSndBufSz = 0xffffffff; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), + SyscallSucceeds()); + + int max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len), + SyscallSucceeds()); + + int above_max = max + 1; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &above_max, sizeof(above_max)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), + SyscallSucceeds()); + ASSERT_EQ(max, val); +} + +// Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored. +TEST_P(RawSocketTest, SetSocketSendBuf) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int max = 0; + int min = 0; + { + // Discover maximum buffer size by trying to set it to a large value. + constexpr int kSndBufSz = 0xffffffff; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), + SyscallSucceeds()); + + max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len), + SyscallSucceeds()); + } + + { + // Discover minimum buffer size by trying to set it to zero. + constexpr int kSndBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), + SyscallSucceeds()); + + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len), + SyscallSucceeds()); + } + + int quarter_sz = min + (max - min) / 4; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &quarter_sz, sizeof(quarter_sz)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), + SyscallSucceeds()); + + // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF. + // TODO(gvisor.dev/issue/2926): Remove the gvisor special casing when Netstack + // matches linux behavior. + if (!IsRunningOnGvisor()) { + quarter_sz *= 2; + } + + ASSERT_EQ(quarter_sz, val); +} + +// Test that receive buffer limits are not enforced when the recv buffer is +// empty. +TEST_P(RawSocketTest, RecvBufLimitsEmptyRecvBuffer) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + bind(s_, reinterpret_cast(&addr_), AddrLen()), + SyscallSucceeds()); + ASSERT_THAT( + connect(s_, reinterpret_cast(&addr_), AddrLen()), + SyscallSucceeds()); + + int min = 0; + { + // Discover minimum buffer size by trying to set it to zero. + constexpr int kRcvBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + } + + { + // Send data of size min and verify that it's received. + std::vector buf(min); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); + + // Receive the packet and make sure it's identical. + std::vector recv_buf(buf.size() + HdrLen()); + ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); + EXPECT_EQ( + memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()), + 0); + } + + { + // Send data of size min + 1 and verify that its received. Both linux and + // Netstack accept a dgram that exceeds rcvBuf limits if the receive buffer + // is currently empty. + std::vector buf(min + 1); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); + // Receive the packet and make sure it's identical. + std::vector recv_buf(buf.size() + HdrLen()); + ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); + EXPECT_EQ( + memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()), + 0); + } +} + +TEST_P(RawSocketTest, RecvBufLimits) { + // TCP stack generates RSTs for unknown endpoints and it complicates the test + // as we have to deal with the RST packets as well. For testing the raw socket + // endpoints buffer limit enforcement we can just test for UDP. + // + // We don't use SKIP_IF here because root_test_runner explicitly fails if a + // test is skipped. + if (Protocol() == IPPROTO_TCP) { + return; + } + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + ASSERT_THAT( + bind(s_, reinterpret_cast(&addr_), AddrLen()), + SyscallSucceeds()); + ASSERT_THAT( + connect(s_, reinterpret_cast(&addr_), AddrLen()), + SyscallSucceeds()); + + int min = 0; + { + // Discover minimum buffer size by trying to set it to zero. + constexpr int kRcvBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + } + + // Now set the limit to min * 2. + int new_rcv_buf_sz = min * 4; + if (!IsRunningOnGvisor()) { + // Linux doubles the value specified so just set to min. + new_rcv_buf_sz = min * 2; + } + + ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &new_rcv_buf_sz, + sizeof(new_rcv_buf_sz)), + SyscallSucceeds()); + int rcv_buf_sz = 0; + { + socklen_t rcv_buf_len = sizeof(rcv_buf_sz); + ASSERT_THAT( + getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz, &rcv_buf_len), + SyscallSucceeds()); + } + + // Set a receive timeout so that we don't block forever on reads if the test + // fails. + struct timeval tv { + .tv_sec = 1, .tv_usec = 0, + }; + ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), + SyscallSucceeds()); + + { + std::vector buf(min); + RandomizeBuffer(buf.data(), buf.size()); + + ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); + ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); + ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); + ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); + int sent = 4; + if (IsRunningOnGvisor()) { + // Linux seems to drop the 4th packet even though technically it should + // fit in the receive buffer. + ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); + sent++; + } + + // Verify that the expected number of packets are available to be read. + for (int i = 0; i < sent - 1; i++) { + // Receive the packet and make sure it's identical. + std::vector recv_buf(buf.size() + HdrLen()); + ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); + EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), buf.data(), + buf.size()), + 0); + } + + // Assert that the last packet is dropped because the receive buffer should + // be full after the first four packets. + std::vector recv_buf(buf.size() + HdrLen()); + struct iovec iov = {}; + iov.iov_base = static_cast(const_cast(recv_buf.data())); + iov.iov_len = buf.size(); + struct msghdr msg = {}; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + ASSERT_THAT(RetryEINTR(recvmsg)(s_, &msg, MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); + } +} + +void RawSocketTest::SendBuf(const char* buf, int buf_len) { + // It's safe to use const_cast here because sendmsg won't modify the iovec or + // address. + struct iovec iov = {}; + iov.iov_base = static_cast(const_cast(buf)); + iov.iov_len = static_cast(buf_len); + struct msghdr msg = {}; + msg.msg_name = static_cast(&addr_); + msg.msg_namelen = AddrLen(); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + ASSERT_THAT(sendmsg(s_, &msg, 0), SyscallSucceedsWithValue(buf_len)); +} + +void RawSocketTest::ReceiveBuf(char* recv_buf, size_t recv_buf_len) { + ASSERT_NO_FATAL_FAILURE(ReceiveBufFrom(s_, recv_buf, recv_buf_len)); +} + +void RawSocketTest::ReceiveBufFrom(int sock, char* recv_buf, + size_t recv_buf_len) { + ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(sock, recv_buf, recv_buf_len)); +} + +INSTANTIATE_TEST_SUITE_P(AllInetTests, RawSocketTest, + ::testing::Combine( + ::testing::Values(IPPROTO_TCP, IPPROTO_UDP), + ::testing::Values(AF_INET, AF_INET6))); + +// AF_INET6+SOCK_RAW+IPPROTO_RAW sockets can be created, but not written to. +TEST(RawSocketTest, IPv6ProtoRaw) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int sock; + ASSERT_THAT(sock = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW), + SyscallSucceeds()); + + // Verify that writing yields EINVAL. + char buf[] = "This is such a weird little edge case"; + struct sockaddr_in6 sin6 = {}; + sin6.sin6_family = AF_INET6; + sin6.sin6_addr = in6addr_loopback; + ASSERT_THAT(sendto(sock, buf, sizeof(buf), 0 /* flags */, + reinterpret_cast(&sin6), sizeof(sin6)), + SyscallFailsWithErrno(EINVAL)); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/raw_socket_ipv4.cc b/test/syscalls/linux/raw_socket_ipv4.cc deleted file mode 100644 index 0116c3e94..000000000 --- a/test/syscalls/linux/raw_socket_ipv4.cc +++ /dev/null @@ -1,771 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "gtest/gtest.h" -#include "test/syscalls/linux/socket_test_util.h" -#include "test/syscalls/linux/unix_domain_socket_test_util.h" -#include "test/util/capability_util.h" -#include "test/util/file_descriptor.h" -#include "test/util/test_util.h" - -// Note: in order to run these tests, /proc/sys/net/ipv4/ping_group_range will -// need to be configured to let the superuser create ping sockets (see icmp(7)). - -namespace gvisor { -namespace testing { - -namespace { - -// Fixture for tests parameterized by protocol. -class RawSocketTest : public ::testing::TestWithParam { - protected: - // Creates a socket to be used in tests. - void SetUp() override; - - // Closes the socket created by SetUp(). - void TearDown() override; - - // Sends buf via s_. - void SendBuf(const char* buf, int buf_len); - - // Sends buf to the provided address via the provided socket. - void SendBufTo(int sock, const struct sockaddr_in& addr, const char* buf, - int buf_len); - - // Reads from s_ into recv_buf. - void ReceiveBuf(char* recv_buf, size_t recv_buf_len); - - int Protocol() { return GetParam(); } - - // The socket used for both reading and writing. - int s_; - - // The loopback address. - struct sockaddr_in addr_; -}; - -void RawSocketTest::SetUp() { - if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { - ASSERT_THAT(socket(AF_INET, SOCK_RAW, Protocol()), - SyscallFailsWithErrno(EPERM)); - GTEST_SKIP(); - } - - ASSERT_THAT(s_ = socket(AF_INET, SOCK_RAW, Protocol()), SyscallSucceeds()); - - addr_ = {}; - - // We don't set ports because raw sockets don't have a notion of ports. - addr_.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - addr_.sin_family = AF_INET; -} - -void RawSocketTest::TearDown() { - // TearDown will be run even if we skip the test. - if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { - EXPECT_THAT(close(s_), SyscallSucceeds()); - } -} - -// We should be able to create multiple raw sockets for the same protocol. -// BasicRawSocket::Setup creates the first one, so we only have to create one -// more here. -TEST_P(RawSocketTest, MultipleCreation) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - int s2; - ASSERT_THAT(s2 = socket(AF_INET, SOCK_RAW, Protocol()), SyscallSucceeds()); - - ASSERT_THAT(close(s2), SyscallSucceeds()); -} - -// Test that shutting down an unconnected socket fails. -TEST_P(RawSocketTest, FailShutdownWithoutConnect) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - ASSERT_THAT(shutdown(s_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN)); - ASSERT_THAT(shutdown(s_, SHUT_RD), SyscallFailsWithErrno(ENOTCONN)); -} - -// Shutdown is a no-op for raw sockets (and datagram sockets in general). -TEST_P(RawSocketTest, ShutdownWriteNoop) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - ASSERT_THAT( - connect(s_, reinterpret_cast(&addr_), sizeof(addr_)), - SyscallSucceeds()); - ASSERT_THAT(shutdown(s_, SHUT_WR), SyscallSucceeds()); - - // Arbitrary. - constexpr char kBuf[] = "noop"; - ASSERT_THAT(RetryEINTR(write)(s_, kBuf, sizeof(kBuf)), - SyscallSucceedsWithValue(sizeof(kBuf))); -} - -// Shutdown is a no-op for raw sockets (and datagram sockets in general). -TEST_P(RawSocketTest, ShutdownReadNoop) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - ASSERT_THAT( - connect(s_, reinterpret_cast(&addr_), sizeof(addr_)), - SyscallSucceeds()); - ASSERT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); - - // Arbitrary. - constexpr char kBuf[] = "gdg"; - ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); - - constexpr size_t kReadSize = sizeof(kBuf) + sizeof(struct iphdr); - char c[kReadSize]; - ASSERT_THAT(read(s_, &c, sizeof(c)), SyscallSucceedsWithValue(kReadSize)); -} - -// Test that listen() fails. -TEST_P(RawSocketTest, FailListen) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - ASSERT_THAT(listen(s_, 1), SyscallFailsWithErrno(ENOTSUP)); -} - -// Test that accept() fails. -TEST_P(RawSocketTest, FailAccept) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - struct sockaddr saddr; - socklen_t addrlen; - ASSERT_THAT(accept(s_, &saddr, &addrlen), SyscallFailsWithErrno(ENOTSUP)); -} - -// Test that getpeername() returns nothing before connect(). -TEST_P(RawSocketTest, FailGetPeerNameBeforeConnect) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - struct sockaddr saddr; - socklen_t addrlen = sizeof(saddr); - ASSERT_THAT(getpeername(s_, &saddr, &addrlen), - SyscallFailsWithErrno(ENOTCONN)); -} - -// Test that getpeername() returns something after connect(). -TEST_P(RawSocketTest, GetPeerName) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - ASSERT_THAT( - connect(s_, reinterpret_cast(&addr_), sizeof(addr_)), - SyscallSucceeds()); - struct sockaddr saddr; - socklen_t addrlen = sizeof(saddr); - ASSERT_THAT(getpeername(s_, &saddr, &addrlen), - SyscallFailsWithErrno(ENOTCONN)); - ASSERT_GT(addrlen, 0); -} - -// Test that the socket is writable immediately. -TEST_P(RawSocketTest, PollWritableImmediately) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - struct pollfd pfd = {}; - pfd.fd = s_; - pfd.events = POLLOUT; - ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 10000), SyscallSucceedsWithValue(1)); -} - -// Test that the socket isn't readable before receiving anything. -TEST_P(RawSocketTest, PollNotReadableInitially) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - // Try to receive data with MSG_DONTWAIT, which returns immediately if there's - // nothing to be read. - char buf[117]; - ASSERT_THAT(RetryEINTR(recv)(s_, buf, sizeof(buf), MSG_DONTWAIT), - SyscallFailsWithErrno(EAGAIN)); -} - -// Test that the socket becomes readable once something is written to it. -TEST_P(RawSocketTest, PollTriggeredOnWrite) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - // Write something so that there's data to be read. - // Arbitrary. - constexpr char kBuf[] = "JP5"; - ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); - - struct pollfd pfd = {}; - pfd.fd = s_; - pfd.events = POLLIN; - ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 10000), SyscallSucceedsWithValue(1)); -} - -// Test that we can connect() to a valid IP (loopback). -TEST_P(RawSocketTest, ConnectToLoopback) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - ASSERT_THAT( - connect(s_, reinterpret_cast(&addr_), sizeof(addr_)), - SyscallSucceeds()); -} - -// Test that calling send() without connect() fails. -TEST_P(RawSocketTest, SendWithoutConnectFails) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - // Arbitrary. - constexpr char kBuf[] = "Endgame was good"; - ASSERT_THAT(send(s_, kBuf, sizeof(kBuf), 0), - SyscallFailsWithErrno(EDESTADDRREQ)); -} - -// Bind to localhost. -TEST_P(RawSocketTest, BindToLocalhost) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - ASSERT_THAT( - bind(s_, reinterpret_cast(&addr_), sizeof(addr_)), - SyscallSucceeds()); -} - -// Bind to a different address. -TEST_P(RawSocketTest, BindToInvalid) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - struct sockaddr_in bind_addr = {}; - bind_addr.sin_family = AF_INET; - bind_addr.sin_addr = {1}; // 1.0.0.0 - An address that we can't bind to. - ASSERT_THAT(bind(s_, reinterpret_cast(&bind_addr), - sizeof(bind_addr)), - SyscallFailsWithErrno(EADDRNOTAVAIL)); -} - -// Send and receive an packet. -TEST_P(RawSocketTest, SendAndReceive) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - // Arbitrary. - constexpr char kBuf[] = "TB12"; - ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); - - // Receive the packet and make sure it's identical. - char recv_buf[sizeof(kBuf) + sizeof(struct iphdr)]; - ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf, sizeof(recv_buf))); - EXPECT_EQ(memcmp(recv_buf + sizeof(struct iphdr), kBuf, sizeof(kBuf)), 0); -} - -// We should be able to create multiple raw sockets for the same protocol and -// receive the same packet on both. -TEST_P(RawSocketTest, MultipleSocketReceive) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - int s2; - ASSERT_THAT(s2 = socket(AF_INET, SOCK_RAW, Protocol()), SyscallSucceeds()); - - // Arbitrary. - constexpr char kBuf[] = "TB10"; - ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); - - // Receive it on socket 1. - char recv_buf1[sizeof(kBuf) + sizeof(struct iphdr)]; - ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf1, sizeof(recv_buf1))); - - // Receive it on socket 2. - char recv_buf2[sizeof(kBuf) + sizeof(struct iphdr)]; - ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(s2, recv_buf2, sizeof(recv_buf2))); - - EXPECT_EQ(memcmp(recv_buf1 + sizeof(struct iphdr), - recv_buf2 + sizeof(struct iphdr), sizeof(kBuf)), - 0); - - ASSERT_THAT(close(s2), SyscallSucceeds()); -} - -// Test that connect sends packets to the right place. -TEST_P(RawSocketTest, SendAndReceiveViaConnect) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - ASSERT_THAT( - connect(s_, reinterpret_cast(&addr_), sizeof(addr_)), - SyscallSucceeds()); - - // Arbitrary. - constexpr char kBuf[] = "JH4"; - ASSERT_THAT(send(s_, kBuf, sizeof(kBuf), 0), - SyscallSucceedsWithValue(sizeof(kBuf))); - - // Receive the packet and make sure it's identical. - char recv_buf[sizeof(kBuf) + sizeof(struct iphdr)]; - ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf, sizeof(recv_buf))); - EXPECT_EQ(memcmp(recv_buf + sizeof(struct iphdr), kBuf, sizeof(kBuf)), 0); -} - -// Bind to localhost, then send and receive packets. -TEST_P(RawSocketTest, BindSendAndReceive) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - ASSERT_THAT( - bind(s_, reinterpret_cast(&addr_), sizeof(addr_)), - SyscallSucceeds()); - - // Arbitrary. - constexpr char kBuf[] = "DR16"; - ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); - - // Receive the packet and make sure it's identical. - char recv_buf[sizeof(kBuf) + sizeof(struct iphdr)]; - ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf, sizeof(recv_buf))); - EXPECT_EQ(memcmp(recv_buf + sizeof(struct iphdr), kBuf, sizeof(kBuf)), 0); -} - -// Bind and connect to localhost and send/receive packets. -TEST_P(RawSocketTest, BindConnectSendAndReceive) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - ASSERT_THAT( - bind(s_, reinterpret_cast(&addr_), sizeof(addr_)), - SyscallSucceeds()); - ASSERT_THAT( - connect(s_, reinterpret_cast(&addr_), sizeof(addr_)), - SyscallSucceeds()); - - // Arbitrary. - constexpr char kBuf[] = "DG88"; - ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); - - // Receive the packet and make sure it's identical. - char recv_buf[sizeof(kBuf) + sizeof(struct iphdr)]; - ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf, sizeof(recv_buf))); - EXPECT_EQ(memcmp(recv_buf + sizeof(struct iphdr), kBuf, sizeof(kBuf)), 0); -} - -// Check that setting SO_RCVBUF below min is clamped to the minimum -// receive buffer size. -TEST_P(RawSocketTest, SetSocketRecvBufBelowMin) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - // Discover minimum receive buf size by trying to set it to zero. - // See: - // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820 - constexpr int kRcvBufSz = 0; - ASSERT_THAT( - setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), - SyscallSucceeds()); - - int min = 0; - socklen_t min_len = sizeof(min); - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), - SyscallSucceeds()); - - // Linux doubles the value so let's use a value that when doubled will still - // be smaller than min. - int below_min = min / 2 - 1; - ASSERT_THAT( - setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &below_min, sizeof(below_min)), - SyscallSucceeds()); - - int val = 0; - socklen_t val_len = sizeof(val); - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), - SyscallSucceeds()); - - ASSERT_EQ(min, val); -} - -// Check that setting SO_RCVBUF above max is clamped to the maximum -// receive buffer size. -TEST_P(RawSocketTest, SetSocketRecvBufAboveMax) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - // Discover max buf size by trying to set the largest possible buffer size. - constexpr int kRcvBufSz = 0xffffffff; - ASSERT_THAT( - setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), - SyscallSucceeds()); - - int max = 0; - socklen_t max_len = sizeof(max); - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len), - SyscallSucceeds()); - - int above_max = max + 1; - ASSERT_THAT( - setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &above_max, sizeof(above_max)), - SyscallSucceeds()); - - int val = 0; - socklen_t val_len = sizeof(val); - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), - SyscallSucceeds()); - ASSERT_EQ(max, val); -} - -// Check that setting SO_RCVBUF min <= kRcvBufSz <= max is honored. -TEST_P(RawSocketTest, SetSocketRecvBuf) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - int max = 0; - int min = 0; - { - // Discover max buf size by trying to set a really large buffer size. - constexpr int kRcvBufSz = 0xffffffff; - ASSERT_THAT( - setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), - SyscallSucceeds()); - - max = 0; - socklen_t max_len = sizeof(max); - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len), - SyscallSucceeds()); - } - - { - // Discover minimum buffer size by trying to set a zero size receive buffer - // size. - // See: - // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820 - constexpr int kRcvBufSz = 0; - ASSERT_THAT( - setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), - SyscallSucceeds()); - - socklen_t min_len = sizeof(min); - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), - SyscallSucceeds()); - } - - int quarter_sz = min + (max - min) / 4; - ASSERT_THAT( - setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &quarter_sz, sizeof(quarter_sz)), - SyscallSucceeds()); - - int val = 0; - socklen_t val_len = sizeof(val); - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), - SyscallSucceeds()); - - // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF. - // TODO(gvisor.dev/issue/2926): Remove when Netstack matches linux behavior. - if (!IsRunningOnGvisor()) { - quarter_sz *= 2; - } - ASSERT_EQ(quarter_sz, val); -} - -// Check that setting SO_SNDBUF below min is clamped to the minimum -// receive buffer size. -TEST_P(RawSocketTest, SetSocketSendBufBelowMin) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - // Discover minimum buffer size by trying to set it to zero. - constexpr int kSndBufSz = 0; - ASSERT_THAT( - setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), - SyscallSucceeds()); - - int min = 0; - socklen_t min_len = sizeof(min); - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len), - SyscallSucceeds()); - - // Linux doubles the value so let's use a value that when doubled will still - // be smaller than min. - int below_min = min / 2 - 1; - ASSERT_THAT( - setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &below_min, sizeof(below_min)), - SyscallSucceeds()); - - int val = 0; - socklen_t val_len = sizeof(val); - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), - SyscallSucceeds()); - - ASSERT_EQ(min, val); -} - -// Check that setting SO_SNDBUF above max is clamped to the maximum -// send buffer size. -TEST_P(RawSocketTest, SetSocketSendBufAboveMax) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - // Discover maximum buffer size by trying to set it to a large value. - constexpr int kSndBufSz = 0xffffffff; - ASSERT_THAT( - setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), - SyscallSucceeds()); - - int max = 0; - socklen_t max_len = sizeof(max); - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len), - SyscallSucceeds()); - - int above_max = max + 1; - ASSERT_THAT( - setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &above_max, sizeof(above_max)), - SyscallSucceeds()); - - int val = 0; - socklen_t val_len = sizeof(val); - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), - SyscallSucceeds()); - ASSERT_EQ(max, val); -} - -// Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored. -TEST_P(RawSocketTest, SetSocketSendBuf) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - int max = 0; - int min = 0; - { - // Discover maximum buffer size by trying to set it to a large value. - constexpr int kSndBufSz = 0xffffffff; - ASSERT_THAT( - setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), - SyscallSucceeds()); - - max = 0; - socklen_t max_len = sizeof(max); - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len), - SyscallSucceeds()); - } - - { - // Discover minimum buffer size by trying to set it to zero. - constexpr int kSndBufSz = 0; - ASSERT_THAT( - setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), - SyscallSucceeds()); - - socklen_t min_len = sizeof(min); - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len), - SyscallSucceeds()); - } - - int quarter_sz = min + (max - min) / 4; - ASSERT_THAT( - setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &quarter_sz, sizeof(quarter_sz)), - SyscallSucceeds()); - - int val = 0; - socklen_t val_len = sizeof(val); - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), - SyscallSucceeds()); - - // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF. - // TODO(gvisor.dev/issue/2926): Remove the gvisor special casing when Netstack - // matches linux behavior. - if (!IsRunningOnGvisor()) { - quarter_sz *= 2; - } - - ASSERT_EQ(quarter_sz, val); -} - -void RawSocketTest::SendBuf(const char* buf, int buf_len) { - ASSERT_NO_FATAL_FAILURE(SendBufTo(s_, addr_, buf, buf_len)); -} - -// Test that receive buffer limits are not enforced when the recv buffer is -// empty. -TEST_P(RawSocketTest, RecvBufLimitsEmptyRecvBuffer) { - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - ASSERT_THAT( - bind(s_, reinterpret_cast(&addr_), sizeof(addr_)), - SyscallSucceeds()); - ASSERT_THAT( - connect(s_, reinterpret_cast(&addr_), sizeof(addr_)), - SyscallSucceeds()); - - int min = 0; - { - // Discover minimum buffer size by trying to set it to zero. - constexpr int kRcvBufSz = 0; - ASSERT_THAT( - setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), - SyscallSucceeds()); - - socklen_t min_len = sizeof(min); - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), - SyscallSucceeds()); - } - - { - // Send data of size min and verify that it's received. - std::vector buf(min); - RandomizeBuffer(buf.data(), buf.size()); - ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); - - // Receive the packet and make sure it's identical. - std::vector recv_buf(buf.size() + sizeof(struct iphdr)); - ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); - EXPECT_EQ( - memcmp(recv_buf.data() + sizeof(struct iphdr), buf.data(), buf.size()), - 0); - } - - { - // Send data of size min + 1 and verify that its received. Both linux and - // Netstack accept a dgram that exceeds rcvBuf limits if the receive buffer - // is currently empty. - std::vector buf(min + 1); - RandomizeBuffer(buf.data(), buf.size()); - ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); - // Receive the packet and make sure it's identical. - std::vector recv_buf(buf.size() + sizeof(struct iphdr)); - ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); - EXPECT_EQ( - memcmp(recv_buf.data() + sizeof(struct iphdr), buf.data(), buf.size()), - 0); - } -} - -TEST_P(RawSocketTest, RecvBufLimits) { - // TCP stack generates RSTs for unknown endpoints and it complicates the test - // as we have to deal with the RST packets as well. For testing the raw socket - // endpoints buffer limit enforcement we can just test for UDP. - // - // We don't use SKIP_IF here because root_test_runner explicitly fails if a - // test is skipped. - if (Protocol() == IPPROTO_TCP) { - return; - } - SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); - - ASSERT_THAT( - bind(s_, reinterpret_cast(&addr_), sizeof(addr_)), - SyscallSucceeds()); - ASSERT_THAT( - connect(s_, reinterpret_cast(&addr_), sizeof(addr_)), - SyscallSucceeds()); - - int min = 0; - { - // Discover minimum buffer size by trying to set it to zero. - constexpr int kRcvBufSz = 0; - ASSERT_THAT( - setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), - SyscallSucceeds()); - - socklen_t min_len = sizeof(min); - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), - SyscallSucceeds()); - } - - // Now set the limit to min * 2. - int new_rcv_buf_sz = min * 4; - if (!IsRunningOnGvisor()) { - // Linux doubles the value specified so just set to min. - new_rcv_buf_sz = min * 2; - } - - ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &new_rcv_buf_sz, - sizeof(new_rcv_buf_sz)), - SyscallSucceeds()); - int rcv_buf_sz = 0; - { - socklen_t rcv_buf_len = sizeof(rcv_buf_sz); - ASSERT_THAT( - getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz, &rcv_buf_len), - SyscallSucceeds()); - } - - // Set a receive timeout so that we don't block forever on reads if the test - // fails. - struct timeval tv { - .tv_sec = 1, .tv_usec = 0, - }; - ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), - SyscallSucceeds()); - - { - std::vector buf(min); - RandomizeBuffer(buf.data(), buf.size()); - - ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); - ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); - ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); - ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); - int sent = 4; - if (IsRunningOnGvisor()) { - // Linux seems to drop the 4th packet even though technically it should - // fit in the receive buffer. - ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); - sent++; - } - - // Verify that the expected number of packets are available to be read. - for (int i = 0; i < sent - 1; i++) { - // Receive the packet and make sure it's identical. - std::vector recv_buf(buf.size() + sizeof(struct iphdr)); - ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); - EXPECT_EQ(memcmp(recv_buf.data() + sizeof(struct iphdr), buf.data(), - buf.size()), - 0); - } - - // Assert that the last packet is dropped because the receive buffer should - // be full after the first four packets. - std::vector recv_buf(buf.size() + sizeof(struct iphdr)); - struct iovec iov = {}; - iov.iov_base = static_cast(const_cast(recv_buf.data())); - iov.iov_len = buf.size(); - struct msghdr msg = {}; - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; - ASSERT_THAT(RetryEINTR(recvmsg)(s_, &msg, MSG_DONTWAIT), - SyscallFailsWithErrno(EAGAIN)); - } -} - -void RawSocketTest::SendBufTo(int sock, const struct sockaddr_in& addr, - const char* buf, int buf_len) { - // It's safe to use const_cast here because sendmsg won't modify the iovec or - // address. - struct iovec iov = {}; - iov.iov_base = static_cast(const_cast(buf)); - iov.iov_len = static_cast(buf_len); - struct msghdr msg = {}; - msg.msg_name = static_cast(const_cast(&addr)); - msg.msg_namelen = sizeof(addr); - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; - ASSERT_THAT(sendmsg(sock, &msg, 0), SyscallSucceedsWithValue(buf_len)); -} - -void RawSocketTest::ReceiveBuf(char* recv_buf, size_t recv_buf_len) { - ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(s_, recv_buf, recv_buf_len)); -} - -INSTANTIATE_TEST_SUITE_P(AllInetTests, RawSocketTest, - ::testing::Values(IPPROTO_TCP, IPPROTO_UDP)); - -} // namespace - -} // namespace testing -} // namespace gvisor -- cgit v1.2.3 From 85be13d9a3faae27d0991b55cfb7944c9f1e1284 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Fri, 26 Jun 2020 19:40:52 -0700 Subject: Add tests for eventfd/timerfd/inotify operations that should return ESPIPE. PiperOrigin-RevId: 318585377 --- test/syscalls/linux/eventfd.cc | 23 ++++++++++++----------- test/syscalls/linux/inotify.cc | 22 ++++++++++++++++++++-- test/syscalls/linux/timerfd.cc | 29 +++++++++++++++++++++++------ 3 files changed, 55 insertions(+), 19 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/eventfd.cc b/test/syscalls/linux/eventfd.cc index 548b05a64..dc794415e 100644 --- a/test/syscalls/linux/eventfd.cc +++ b/test/syscalls/linux/eventfd.cc @@ -100,20 +100,21 @@ TEST(EventfdTest, SmallRead) { ASSERT_THAT(read(efd.get(), &l, 4), SyscallFailsWithErrno(EINVAL)); } -TEST(EventfdTest, PreadIllegalSeek) { - FileDescriptor efd = - ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE)); - - uint64_t l = 0; - ASSERT_THAT(pread(efd.get(), &l, 4, 0), SyscallFailsWithErrno(ESPIPE)); +TEST(EventfdTest, IllegalSeek) { + FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, 0)); + EXPECT_THAT(lseek(efd.get(), 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); } -TEST(EventfdTest, PwriteIllegalSeek) { - FileDescriptor efd = - ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE)); +TEST(EventfdTest, IllegalPread) { + FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, 0)); + int l; + EXPECT_THAT(pread(efd.get(), &l, sizeof(l), 0), + SyscallFailsWithErrno(ESPIPE)); +} - uint64_t l = 0; - ASSERT_THAT(pwrite(efd.get(), &l, 4, 0), SyscallFailsWithErrno(ESPIPE)); +TEST(EventfdTest, IllegalPwrite) { + FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, 0)); + EXPECT_THAT(pwrite(efd.get(), "x", 1, 0), SyscallFailsWithErrno(ESPIPE)); } TEST(EventfdTest, BigWrite) { diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc index bdb645c35..220874aeb 100644 --- a/test/syscalls/linux/inotify.cc +++ b/test/syscalls/linux/inotify.cc @@ -333,9 +333,27 @@ PosixErrorOr InotifyAddWatch(int fd, const std::string& path, return wd; } -TEST(Inotify, InotifyFdNotWritable) { +TEST(Inotify, IllegalSeek) { const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(0)); - EXPECT_THAT(write(fd.get(), "x", 1), SyscallFailsWithErrno(EBADF)); + EXPECT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); +} + +TEST(Inotify, IllegalPread) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(0)); + int val; + EXPECT_THAT(pread(fd.get(), &val, sizeof(val), 0), + SyscallFailsWithErrno(ESPIPE)); +} + +TEST(Inotify, IllegalPwrite) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(0)); + EXPECT_THAT(pwrite(fd.get(), "x", 1, 0), SyscallFailsWithErrno(ESPIPE)); +} + +TEST(Inotify, IllegalWrite) { + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(0)); + int val = 0; + EXPECT_THAT(write(fd.get(), &val, sizeof(val)), SyscallFailsWithErrno(EBADF)); } TEST(Inotify, InitFlags) { diff --git a/test/syscalls/linux/timerfd.cc b/test/syscalls/linux/timerfd.cc index 86ed87b7c..c4f8fdd7a 100644 --- a/test/syscalls/linux/timerfd.cc +++ b/test/syscalls/linux/timerfd.cc @@ -204,16 +204,33 @@ TEST_P(TimerfdTest, SetAbsoluteTime) { EXPECT_EQ(1, val); } -TEST_P(TimerfdTest, IllegalReadWrite) { +TEST_P(TimerfdTest, IllegalSeek) { + auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), 0)); + if (!IsRunningWithVFS1()) { + EXPECT_THAT(lseek(tfd.get(), 0, SEEK_SET), SyscallFailsWithErrno(ESPIPE)); + } +} + +TEST_P(TimerfdTest, IllegalPread) { + auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), 0)); + int val; + EXPECT_THAT(pread(tfd.get(), &val, sizeof(val), 0), + SyscallFailsWithErrno(ESPIPE)); +} + +TEST_P(TimerfdTest, IllegalPwrite) { + auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), 0)); + EXPECT_THAT(pwrite(tfd.get(), "x", 1, 0), SyscallFailsWithErrno(ESPIPE)); + if (!IsRunningWithVFS1()) { + } +} + +TEST_P(TimerfdTest, IllegalWrite) { auto const tfd = ASSERT_NO_ERRNO_AND_VALUE(TimerfdCreate(GetParam(), TFD_NONBLOCK)); uint64_t val = 0; - EXPECT_THAT(PreadFd(tfd.get(), &val, sizeof(val), 0), - SyscallFailsWithErrno(ESPIPE)); - EXPECT_THAT(WriteFd(tfd.get(), &val, sizeof(val)), + EXPECT_THAT(write(tfd.get(), &val, sizeof(val)), SyscallFailsWithErrno(EINVAL)); - EXPECT_THAT(PwriteFd(tfd.get(), &val, sizeof(val), 0), - SyscallFailsWithErrno(ESPIPE)); } std::string PrintClockId(::testing::TestParamInfo info) { -- cgit v1.2.3 From 02d552d07c4415978d2ce418fb16baf238d0ff78 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Sat, 27 Jun 2020 14:38:20 -0700 Subject: Support sticky bit in vfs2. Updates #2923. PiperOrigin-RevId: 318648128 --- pkg/sentry/fsimpl/gofer/filesystem.go | 65 ++++++++++++++++++++++++----- pkg/sentry/fsimpl/gofer/gofer.go | 4 ++ pkg/sentry/fsimpl/kernfs/inode_impl_util.go | 4 ++ pkg/sentry/fsimpl/tmpfs/directory.go | 4 ++ pkg/sentry/fsimpl/tmpfs/filesystem.go | 9 ++++ pkg/sentry/vfs/permissions.go | 14 +++++++ test/syscalls/BUILD | 1 + test/syscalls/linux/sticky.cc | 57 +++++++++++++++++-------- 8 files changed, 131 insertions(+), 27 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 73bac738d..51082359d 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -16,6 +16,7 @@ package gofer import ( "sync" + "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" @@ -464,21 +465,61 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b defer mntns.DecRef() parent.dirMu.Lock() defer parent.dirMu.Unlock() + child, ok := parent.children[name] if ok && child == nil { return syserror.ENOENT } - // We only need a dentry representing the file at name if it can be a mount - // point. If child is nil, then it can't be a mount point. If child is - // non-nil but stale, the actual file can't be a mount point either; we - // detect this case by just speculatively calling PrepareDeleteDentry and - // only revalidating the dentry if that fails (indicating that the existing - // dentry is a mount point). + + sticky := atomic.LoadUint32(&parent.mode)&linux.ModeSticky != 0 + if sticky { + if !ok { + // If the sticky bit is set, we need to retrieve the child to determine + // whether removing it is allowed. + child, err = fs.stepLocked(ctx, rp, parent, false /* mayFollowSymlinks */, &ds) + if err != nil { + return err + } + } else if child != nil && !child.cachedMetadataAuthoritative() { + // Make sure the dentry representing the file at name is up to date + // before examining its metadata. + child, err = fs.revalidateChildLocked(ctx, vfsObj, parent, name, child, &ds) + if err != nil { + return err + } + } + if err := parent.mayDelete(rp.Credentials(), child); err != nil { + return err + } + } + + // If a child dentry exists, prepare to delete it. This should fail if it is + // a mount point. We detect mount points by speculatively calling + // PrepareDeleteDentry, which fails if child is a mount point. However, we + // may need to revalidate the file in this case to make sure that it has not + // been deleted or replaced on the remote fs, in which case the mount point + // will have disappeared. If calling PrepareDeleteDentry fails again on the + // up-to-date dentry, we can be sure that it is a mount point. + // + // Also note that if child is nil, then it can't be a mount point. if child != nil { + // Hold child.dirMu so we can check child.children and + // child.syntheticChildren. We don't access these fields until a bit later, + // but locking child.dirMu after calling vfs.PrepareDeleteDentry() would + // create an inconsistent lock ordering between dentry.dirMu and + // vfs.Dentry.mu (in the VFS lock order, it would make dentry.dirMu both "a + // FilesystemImpl lock" and "a lock acquired by a FilesystemImpl between + // PrepareDeleteDentry and CommitDeleteDentry). To avoid this, lock + // child.dirMu before calling PrepareDeleteDentry. child.dirMu.Lock() defer child.dirMu.Unlock() if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil { - if parent.cachedMetadataAuthoritative() { + // We can skip revalidation in several cases: + // - We are not in InteropModeShared + // - The parent directory is synthetic, in which case the child must also + // be synthetic + // - We already updated the child during the sticky bit check above + if parent.cachedMetadataAuthoritative() || sticky { return err } child, err = fs.revalidateChildLocked(ctx, vfsObj, parent, name, child, &ds) @@ -1100,7 +1141,8 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa return err } } - if err := oldParent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil { + creds := rp.Credentials() + if err := oldParent.checkPermissions(creds, vfs.MayWrite|vfs.MayExec); err != nil { return err } vfsObj := rp.VirtualFilesystem() @@ -1115,12 +1157,15 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa if renamed == nil { return syserror.ENOENT } + if err := oldParent.mayDelete(creds, renamed); err != nil { + return err + } if renamed.isDir() { if renamed == newParent || genericIsAncestorDentry(renamed, newParent) { return syserror.EINVAL } if oldParent != newParent { - if err := renamed.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil { + if err := renamed.checkPermissions(creds, vfs.MayWrite); err != nil { return err } } @@ -1131,7 +1176,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa } if oldParent != newParent { - if err := newParent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil { + if err := newParent.checkPermissions(creds, vfs.MayWrite|vfs.MayExec); err != nil { return err } newParent.dirMu.Lock() diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index b0b6d8c64..71c8d3ae1 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -1003,6 +1003,10 @@ func (d *dentry) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes) return vfs.GenericCheckPermissions(creds, ats, linux.FileMode(atomic.LoadUint32(&d.mode)), auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))) } +func (d *dentry) mayDelete(creds *auth.Credentials, child *dentry) error { + return vfs.CheckDeleteSticky(creds, linux.FileMode(atomic.LoadUint32(&d.mode)), auth.KUID(atomic.LoadUint32(&child.uid))) +} + func dentryUIDFromP9UID(uid p9.UID) uint32 { if !uid.Ok() { return uint32(auth.OverflowUID) diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go index 53aec4918..4cb885d87 100644 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go @@ -471,6 +471,8 @@ func (o *OrderedChildren) Unlink(ctx context.Context, name string, child *vfs.De if err := o.checkExistingLocked(name, child); err != nil { return err } + + // TODO(gvisor.dev/issue/3027): Check sticky bit before removing. o.removeLocked(name) return nil } @@ -518,6 +520,8 @@ func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, c if err := o.checkExistingLocked(oldname, child); err != nil { return nil, err } + + // TODO(gvisor.dev/issue/3027): Check sticky bit before removing. replaced := dst.replaceChildLocked(newname, child) return replaced, nil } diff --git a/pkg/sentry/fsimpl/tmpfs/directory.go b/pkg/sentry/fsimpl/tmpfs/directory.go index b172abc15..0a1ad4765 100644 --- a/pkg/sentry/fsimpl/tmpfs/directory.go +++ b/pkg/sentry/fsimpl/tmpfs/directory.go @@ -81,6 +81,10 @@ func (dir *directory) removeChildLocked(child *dentry) { dir.iterMu.Unlock() } +func (dir *directory) mayDelete(creds *auth.Credentials, child *dentry) error { + return vfs.CheckDeleteSticky(creds, linux.FileMode(atomic.LoadUint32(&dir.inode.mode)), auth.KUID(atomic.LoadUint32(&child.inode.uid))) +} + type directoryFD struct { fileDescription vfs.DirectoryFileDescriptionDefaultImpl diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index f766b7ce2..71ac7b8e6 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -492,6 +492,9 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa if !ok { return syserror.ENOENT } + if err := oldParentDir.mayDelete(rp.Credentials(), renamed); err != nil { + return err + } // Note that we don't need to call rp.CheckMount(), since if renamed is a // mount point then we want to rename the mount point, not anything in the // mounted filesystem. @@ -606,6 +609,9 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error if !ok { return syserror.ENOENT } + if err := parentDir.mayDelete(rp.Credentials(), child); err != nil { + return err + } childDir, ok := child.inode.impl.(*directory) if !ok { return syserror.ENOTDIR @@ -716,6 +722,9 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error if !ok { return syserror.ENOENT } + if err := parentDir.mayDelete(rp.Credentials(), child); err != nil { + return err + } if child.inode.isDir() { return syserror.EISDIR } diff --git a/pkg/sentry/vfs/permissions.go b/pkg/sentry/vfs/permissions.go index afe2be8d7..9cb050597 100644 --- a/pkg/sentry/vfs/permissions.go +++ b/pkg/sentry/vfs/permissions.go @@ -230,6 +230,20 @@ func CheckSetStat(ctx context.Context, creds *auth.Credentials, stat *linux.Stat return nil } +// CheckDeleteSticky checks whether the sticky bit is set on a directory with +// the given file mode, and if so, checks whether creds has permission to +// remove a file owned by childKUID from a directory with the given mode. +// CheckDeleteSticky is consistent with fs/linux.h:check_sticky(). +func CheckDeleteSticky(creds *auth.Credentials, parentMode linux.FileMode, childKUID auth.KUID) error { + if parentMode&linux.ModeSticky == 0 { + return nil + } + if CanActAsOwner(creds, childKUID) { + return nil + } + return syserror.EPERM +} + // CanActAsOwner returns true if creds can act as the owner of a file with the // given owning UID, consistent with Linux's // fs/inode.c:inode_owner_or_capable(). diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index c4fff0ac8..36c178e4a 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -942,6 +942,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:sticky_test", + vfs2 = "True", ) syscall_test( diff --git a/test/syscalls/linux/sticky.cc b/test/syscalls/linux/sticky.cc index 92eec0449..39f4fb801 100644 --- a/test/syscalls/linux/sticky.cc +++ b/test/syscalls/linux/sticky.cc @@ -40,11 +40,14 @@ namespace { TEST(StickyTest, StickyBitPermDenied) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID))); - auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); - EXPECT_THAT(chmod(dir.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds()); - const FileDescriptor dirfd = - ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_DIRECTORY)); - ASSERT_THAT(mkdirat(dirfd.get(), "NewDir", 0755), SyscallSucceeds()); + const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(chmod(parent.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds()); + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(parent.path(), "some content", 0755)); + const TempPath dir = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirWith(parent.path(), 0755)); + const TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(parent.path(), file.path())); // Drop privileges and change IDs only in child thread, or else this parent // thread won't be able to open some log files after the test ends. @@ -62,18 +65,26 @@ TEST(StickyTest, StickyBitPermDenied) { syscall(SYS_setresuid, -1, absl::GetFlag(FLAGS_scratch_uid), -1), SyscallSucceeds()); - EXPECT_THAT(unlinkat(dirfd.get(), "NewDir", AT_REMOVEDIR), + std::string new_path = NewTempAbsPath(); + EXPECT_THAT(rename(file.path().c_str(), new_path.c_str()), SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(unlink(file.path().c_str()), SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(rmdir(dir.path().c_str()), SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(unlink(link.path().c_str()), SyscallFailsWithErrno(EPERM)); }); } TEST(StickyTest, StickyBitSameUID) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID))); - auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); - EXPECT_THAT(chmod(dir.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds()); - std::string path = JoinPath(dir.path(), "NewDir"); - ASSERT_THAT(mkdir(path.c_str(), 0755), SyscallSucceeds()); + const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(chmod(parent.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds()); + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(parent.path(), "some content", 0755)); + const TempPath dir = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirWith(parent.path(), 0755)); + const TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(parent.path(), file.path())); // Drop privileges and change IDs only in child thread, or else this parent // thread won't be able to open some log files after the test ends. @@ -89,18 +100,26 @@ TEST(StickyTest, StickyBitSameUID) { SyscallSucceeds()); // We still have the same EUID. - EXPECT_THAT(rmdir(path.c_str()), SyscallSucceeds()); + std::string new_path = NewTempAbsPath(); + EXPECT_THAT(rename(file.path().c_str(), new_path.c_str()), + SyscallSucceeds()); + EXPECT_THAT(unlink(new_path.c_str()), SyscallSucceeds()); + EXPECT_THAT(rmdir(dir.path().c_str()), SyscallSucceeds()); + EXPECT_THAT(unlink(link.path().c_str()), SyscallSucceeds()); }); } TEST(StickyTest, StickyBitCapFOWNER) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID))); - auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); - EXPECT_THAT(chmod(dir.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds()); - const FileDescriptor dirfd = - ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_DIRECTORY)); - ASSERT_THAT(mkdirat(dirfd.get(), "NewDir", 0755), SyscallSucceeds()); + const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(chmod(parent.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds()); + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(parent.path(), "some content", 0755)); + const TempPath dir = + ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirWith(parent.path(), 0755)); + const TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(parent.path(), file.path())); // Drop privileges and change IDs only in child thread, or else this parent // thread won't be able to open some log files after the test ends. @@ -117,8 +136,12 @@ TEST(StickyTest, StickyBitCapFOWNER) { SyscallSucceeds()); EXPECT_NO_ERRNO(SetCapability(CAP_FOWNER, true)); - EXPECT_THAT(unlinkat(dirfd.get(), "NewDir", AT_REMOVEDIR), + std::string new_path = NewTempAbsPath(); + EXPECT_THAT(rename(file.path().c_str(), new_path.c_str()), SyscallSucceeds()); + EXPECT_THAT(unlink(new_path.c_str()), SyscallSucceeds()); + EXPECT_THAT(rmdir(dir.path().c_str()), SyscallSucceeds()); + EXPECT_THAT(unlink(link.path().c_str()), SyscallSucceeds()); }); } } // namespace -- cgit v1.2.3 From e8f1a5c1f652ba7abb8c4bd842d6afdcab03865a Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Sat, 27 Jun 2020 21:32:16 -0700 Subject: Port GETOWN, SETOWN fcntls to vfs2. Also make some fixes to vfs1's F_SETOWN. The fcntl test now entirely passes on vfs2. Fixes #2920. PiperOrigin-RevId: 318669529 --- pkg/abi/linux/fcntl.go | 2 +- pkg/sentry/kernel/fasync/BUILD | 1 + pkg/sentry/kernel/fasync/fasync.go | 8 +- pkg/sentry/syscalls/linux/sys_file.go | 15 ++-- pkg/sentry/syscalls/linux/vfs2/BUILD | 1 + pkg/sentry/syscalls/linux/vfs2/fd.go | 93 ++++++++++++++++++++++ pkg/sentry/vfs/file_description.go | 74 ++++++++++++++++-- test/syscalls/linux/BUILD | 1 + test/syscalls/linux/fcntl.cc | 140 ++++++++++++++++++++++++++++++---- 9 files changed, 308 insertions(+), 27 deletions(-) (limited to 'test') diff --git a/pkg/abi/linux/fcntl.go b/pkg/abi/linux/fcntl.go index 6663a199c..9242e80a5 100644 --- a/pkg/abi/linux/fcntl.go +++ b/pkg/abi/linux/fcntl.go @@ -55,7 +55,7 @@ type Flock struct { _ [4]byte } -// Flags for F_SETOWN_EX and F_GETOWN_EX. +// Owner types for F_SETOWN_EX and F_GETOWN_EX. const ( F_OWNER_TID = 0 F_OWNER_PID = 1 diff --git a/pkg/sentry/kernel/fasync/BUILD b/pkg/sentry/kernel/fasync/BUILD index b9126e946..2b3955598 100644 --- a/pkg/sentry/kernel/fasync/BUILD +++ b/pkg/sentry/kernel/fasync/BUILD @@ -11,6 +11,7 @@ go_library( "//pkg/sentry/fs", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", + "//pkg/sentry/vfs", "//pkg/sync", "//pkg/waiter", ], diff --git a/pkg/sentry/kernel/fasync/fasync.go b/pkg/sentry/kernel/fasync/fasync.go index d32c3e90a..323f1dfa5 100644 --- a/pkg/sentry/kernel/fasync/fasync.go +++ b/pkg/sentry/kernel/fasync/fasync.go @@ -20,15 +20,21 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/waiter" ) -// New creates a new FileAsync. +// New creates a new fs.FileAsync. func New() fs.FileAsync { return &FileAsync{} } +// NewVFS2 creates a new vfs.FileAsync. +func NewVFS2() vfs.FileAsync { + return &FileAsync{} +} + // FileAsync sends signals when the registered file is ready for IO. // // +stateify savable diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index 35eba20c5..2797c6a72 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -900,14 +900,20 @@ func fGetOwn(t *kernel.Task, file *fs.File) int32 { // // If who is positive, it represents a PID. If negative, it represents a PGID. // If the PID or PGID is invalid, the owner is silently unset. -func fSetOwn(t *kernel.Task, file *fs.File, who int32) { +func fSetOwn(t *kernel.Task, file *fs.File, who int32) error { a := file.Async(fasync.New).(*fasync.FileAsync) if who < 0 { + // Check for overflow before flipping the sign. + if who-1 > who { + return syserror.EINVAL + } pg := t.PIDNamespace().ProcessGroupWithID(kernel.ProcessGroupID(-who)) a.SetOwnerProcessGroup(t, pg) + } else { + tg := t.PIDNamespace().ThreadGroupWithID(kernel.ThreadID(who)) + a.SetOwnerThreadGroup(t, tg) } - tg := t.PIDNamespace().ThreadGroupWithID(kernel.ThreadID(who)) - a.SetOwnerThreadGroup(t, tg) + return nil } // Fcntl implements linux syscall fcntl(2). @@ -1042,8 +1048,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case linux.F_GETOWN: return uintptr(fGetOwn(t, file)), nil, nil case linux.F_SETOWN: - fSetOwn(t, file, args[2].Int()) - return 0, nil, nil + return 0, nil, fSetOwn(t, file, args[2].Int()) case linux.F_GETOWN_EX: addr := args[2].Pointer() owner := fGetOwnEx(t, file) diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD index c301a0991..0c740335b 100644 --- a/pkg/sentry/syscalls/linux/vfs2/BUILD +++ b/pkg/sentry/syscalls/linux/vfs2/BUILD @@ -54,6 +54,7 @@ go_library( "//pkg/sentry/fsimpl/tmpfs", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/fasync", "//pkg/sentry/kernel/pipe", "//pkg/sentry/kernel/time", "//pkg/sentry/limits", diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go index e68b20bed..7e4c6a56e 100644 --- a/pkg/sentry/syscalls/linux/vfs2/fd.go +++ b/pkg/sentry/syscalls/linux/vfs2/fd.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/fasync" "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" "gvisor.dev/gvisor/pkg/sentry/vfs" @@ -154,6 +155,47 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, err } return uintptr(n), nil, nil + case linux.F_GETOWN: + a := file.AsyncHandler() + if a == nil { + return 0, nil, nil + } + owner := getAsyncOwner(t, a.(*fasync.FileAsync)) + if owner.Type == linux.F_OWNER_PGRP { + return uintptr(-owner.PID), nil, nil + } + return uintptr(owner.PID), nil, nil + case linux.F_SETOWN: + who := args[2].Int() + ownerType := int32(linux.F_OWNER_PID) + if who < 0 { + // Check for overflow before flipping the sign. + if who-1 > who { + return 0, nil, syserror.EINVAL + } + ownerType = linux.F_OWNER_PGRP + who = -who + } + a := file.SetAsyncHandler(fasync.NewVFS2).(*fasync.FileAsync) + return 0, nil, setAsyncOwner(t, a, ownerType, who) + case linux.F_GETOWN_EX: + a := file.AsyncHandler() + if a == nil { + return 0, nil, nil + } + addr := args[2].Pointer() + owner := getAsyncOwner(t, a.(*fasync.FileAsync)) + _, err := t.CopyOut(addr, &owner) + return 0, nil, err + case linux.F_SETOWN_EX: + addr := args[2].Pointer() + var owner linux.FOwnerEx + n, err := t.CopyIn(addr, &owner) + if err != nil { + return 0, nil, err + } + a := file.SetAsyncHandler(fasync.NewVFS2).(*fasync.FileAsync) + return uintptr(n), nil, setAsyncOwner(t, a, owner.Type, owner.PID) case linux.F_GETPIPE_SZ: pipefile, ok := file.Impl().(*pipe.VFSPipeFD) if !ok { @@ -177,6 +219,57 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } } +func getAsyncOwner(t *kernel.Task, a *fasync.FileAsync) linux.FOwnerEx { + ot, otg, opg := a.Owner() + switch { + case ot != nil: + return linux.FOwnerEx{ + Type: linux.F_OWNER_TID, + PID: int32(t.PIDNamespace().IDOfTask(ot)), + } + case otg != nil: + return linux.FOwnerEx{ + Type: linux.F_OWNER_PID, + PID: int32(t.PIDNamespace().IDOfThreadGroup(otg)), + } + case opg != nil: + return linux.FOwnerEx{ + Type: linux.F_OWNER_PGRP, + PID: int32(t.PIDNamespace().IDOfProcessGroup(opg)), + } + default: + return linux.FOwnerEx{} + } +} + +func setAsyncOwner(t *kernel.Task, a *fasync.FileAsync, ownerType, pid int32) error { + switch ownerType { + case linux.F_OWNER_TID: + task := t.PIDNamespace().TaskWithID(kernel.ThreadID(pid)) + if task == nil { + return syserror.ESRCH + } + a.SetOwnerTask(t, task) + return nil + case linux.F_OWNER_PID: + tg := t.PIDNamespace().ThreadGroupWithID(kernel.ThreadID(pid)) + if tg == nil { + return syserror.ESRCH + } + a.SetOwnerThreadGroup(t, tg) + return nil + case linux.F_OWNER_PGRP: + pg := t.PIDNamespace().ProcessGroupWithID(kernel.ProcessGroupID(pid)) + if pg == nil { + return syserror.ESRCH + } + a.SetOwnerProcessGroup(t, pg) + return nil + default: + return syserror.EINVAL + } +} + func posixLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescription, cmd int32) error { // Copy in the lock request. flockAddr := args[2].Pointer() diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go index e0538ea53..cd1db14ac 100644 --- a/pkg/sentry/vfs/file_description.go +++ b/pkg/sentry/vfs/file_description.go @@ -42,11 +42,20 @@ type FileDescription struct { // operations. refs int64 + // flagsMu protects statusFlags and asyncHandler below. + flagsMu sync.Mutex + // statusFlags contains status flags, "initialized by open(2) and possibly - // modified by fcntl()" - fcntl(2). statusFlags is accessed using atomic - // memory operations. + // modified by fcntl()" - fcntl(2). statusFlags can be read using atomic + // memory operations when it does not need to be synchronized with an + // access to asyncHandler. statusFlags uint32 + // asyncHandler handles O_ASYNC signal generation. It is set with the + // F_SETOWN or F_SETOWN_EX fcntls. For asyncHandler to be used, O_ASYNC must + // also be set by fcntl(2). + asyncHandler FileAsync + // epolls is the set of epollInterests registered for this FileDescription. // epolls is protected by epollMu. epollMu sync.Mutex @@ -193,6 +202,13 @@ func (fd *FileDescription) DecRef() { fd.vd.mount.EndWrite() } fd.vd.DecRef() + fd.flagsMu.Lock() + // TODO(gvisor.dev/issue/1663): We may need to unregister during save, as we do in VFS1. + if fd.statusFlags&linux.O_ASYNC != 0 && fd.asyncHandler != nil { + fd.asyncHandler.Unregister(fd) + } + fd.asyncHandler = nil + fd.flagsMu.Unlock() } else if refs < 0 { panic("FileDescription.DecRef() called without holding a reference") } @@ -276,7 +292,18 @@ func (fd *FileDescription) SetStatusFlags(ctx context.Context, creds *auth.Crede } // TODO(jamieliu): FileDescriptionImpl.SetOAsync()? const settableFlags = linux.O_APPEND | linux.O_ASYNC | linux.O_DIRECT | linux.O_NOATIME | linux.O_NONBLOCK - atomic.StoreUint32(&fd.statusFlags, (oldFlags&^settableFlags)|(flags&settableFlags)) + fd.flagsMu.Lock() + if fd.asyncHandler != nil { + // Use fd.statusFlags instead of oldFlags, which may have become outdated, + // to avoid double registering/unregistering. + if fd.statusFlags&linux.O_ASYNC == 0 && flags&linux.O_ASYNC != 0 { + fd.asyncHandler.Register(fd) + } else if fd.statusFlags&linux.O_ASYNC != 0 && flags&linux.O_ASYNC == 0 { + fd.asyncHandler.Unregister(fd) + } + } + fd.statusFlags = (oldFlags &^ settableFlags) | (flags & settableFlags) + fd.flagsMu.Unlock() return nil } @@ -533,17 +560,23 @@ func (fd *FileDescription) StatFS(ctx context.Context) (linux.Statfs, error) { return fd.impl.StatFS(ctx) } -// Readiness returns fd's I/O readiness. +// Readiness implements waiter.Waitable.Readiness. +// +// It returns fd's I/O readiness. func (fd *FileDescription) Readiness(mask waiter.EventMask) waiter.EventMask { return fd.impl.Readiness(mask) } -// EventRegister registers e for I/O readiness events in mask. +// EventRegister implements waiter.Waitable.EventRegister. +// +// It registers e for I/O readiness events in mask. func (fd *FileDescription) EventRegister(e *waiter.Entry, mask waiter.EventMask) { fd.impl.EventRegister(e, mask) } -// EventUnregister unregisters e for I/O readiness events. +// EventUnregister implements waiter.Waitable.EventUnregister. +// +// It unregisters e for I/O readiness events. func (fd *FileDescription) EventUnregister(e *waiter.Entry) { fd.impl.EventUnregister(e) } @@ -770,3 +803,32 @@ func (fd *FileDescription) LockPOSIX(ctx context.Context, uid lock.UniqueID, t l func (fd *FileDescription) UnlockPOSIX(ctx context.Context, uid lock.UniqueID, start, end uint64, whence int16) error { return fd.impl.UnlockPOSIX(ctx, uid, start, end, whence) } + +// A FileAsync sends signals to its owner when w is ready for IO. This is only +// implemented by pkg/sentry/fasync:FileAsync, but we unfortunately need this +// interface to avoid circular dependencies. +type FileAsync interface { + Register(w waiter.Waitable) + Unregister(w waiter.Waitable) +} + +// AsyncHandler returns the FileAsync for fd. +func (fd *FileDescription) AsyncHandler() FileAsync { + fd.flagsMu.Lock() + defer fd.flagsMu.Unlock() + return fd.asyncHandler +} + +// SetAsyncHandler sets fd.asyncHandler if it has not been set before and +// returns it. +func (fd *FileDescription) SetAsyncHandler(newHandler func() FileAsync) FileAsync { + fd.flagsMu.Lock() + defer fd.flagsMu.Unlock() + if fd.asyncHandler == nil { + fd.asyncHandler = newHandler() + if fd.statusFlags&linux.O_ASYNC != 0 { + fd.asyncHandler.Register(fd) + } + } + return fd.asyncHandler +} diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 270b9e4c4..e141a86bb 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -805,6 +805,7 @@ cc_binary( "//test/util:save_util", "//test/util:temp_path", "//test/util:test_util", + "//test/util:thread_util", "//test/util:timer_util", ], ) diff --git a/test/syscalls/linux/fcntl.cc b/test/syscalls/linux/fcntl.cc index 25bef2522..9130618fa 100644 --- a/test/syscalls/linux/fcntl.cc +++ b/test/syscalls/linux/fcntl.cc @@ -37,6 +37,7 @@ #include "test/util/save_util.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" +#include "test/util/thread_util.h" #include "test/util/timer_util.h" ABSL_FLAG(std::string, child_setlock_on, "", @@ -953,15 +954,18 @@ TEST(FcntlTest, DupAfterO_ASYNC) { EXPECT_EQ(after & O_ASYNC, O_ASYNC); } -TEST(FcntlTest, GetOwn) { +TEST(FcntlTest, GetOwnNone) { FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); - EXPECT_EQ(syscall(__NR_fcntl, s.get(), F_GETOWN), 0); + // Use the raw syscall because the glibc wrapper may convert F_{GET,SET}OWN + // into F_{GET,SET}OWN_EX. + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), + SyscallSucceedsWithValue(0)); MaybeSave(); } -TEST(FcntlTest, GetOwnEx) { +TEST(FcntlTest, GetOwnExNone) { FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); @@ -970,6 +974,70 @@ TEST(FcntlTest, GetOwnEx) { SyscallSucceedsWithValue(0)); } +TEST(FcntlTest, SetOwnInvalidPid) { + SKIP_IF(IsRunningWithVFS1()); + + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, 12345678), + SyscallFailsWithErrno(ESRCH)); +} + +TEST(FcntlTest, SetOwnInvalidPgrp) { + SKIP_IF(IsRunningWithVFS1()); + + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, -12345678), + SyscallFailsWithErrno(ESRCH)); +} + +TEST(FcntlTest, SetOwnPid) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + pid_t pid; + EXPECT_THAT(pid = getpid(), SyscallSucceeds()); + + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, pid), SyscallSucceeds()); + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), + SyscallSucceedsWithValue(pid)); + MaybeSave(); +} + +TEST(FcntlTest, SetOwnPgrp) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + pid_t pgid; + EXPECT_THAT(pgid = getpgrp(), SyscallSucceeds()); + + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, -pgid), SyscallSucceeds()); + + // Verify with F_GETOWN_EX; using F_GETOWN on Linux may incorrectly treat the + // negative return value as an error, converting the return value to -1 and + // setting errno accordingly. + f_owner_ex got_owner = {}; + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN_EX, &got_owner), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(got_owner.type, F_OWNER_PGRP); + EXPECT_EQ(got_owner.pid, pgid); + MaybeSave(); +} + +// F_SETOWN flips the sign of negative values, an operation that is guarded +// against overflow. +TEST(FcntlTest, SetOwnOverflow) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, INT_MIN), + SyscallFailsWithErrno(EINVAL)); +} + TEST(FcntlTest, SetOwnExInvalidType) { FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); @@ -1027,7 +1095,8 @@ TEST(FcntlTest, SetOwnExTid) { ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), SyscallSucceeds()); - EXPECT_EQ(syscall(__NR_fcntl, s.get(), F_GETOWN), owner.pid); + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), + SyscallSucceedsWithValue(owner.pid)); MaybeSave(); } @@ -1042,7 +1111,8 @@ TEST(FcntlTest, SetOwnExPid) { ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), SyscallSucceeds()); - EXPECT_EQ(syscall(__NR_fcntl, s.get(), F_GETOWN), owner.pid); + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), + SyscallSucceedsWithValue(owner.pid)); MaybeSave(); } @@ -1050,18 +1120,21 @@ TEST(FcntlTest, SetOwnExPgrp) { FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); - f_owner_ex owner = {}; - owner.type = F_OWNER_PGRP; - EXPECT_THAT(owner.pid = getpgrp(), SyscallSucceeds()); + f_owner_ex set_owner = {}; + set_owner.type = F_OWNER_PGRP; + EXPECT_THAT(set_owner.pid = getpgrp(), SyscallSucceeds()); - ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &set_owner), SyscallSucceeds()); - // NOTE(igudger): I don't understand why, but this is flaky on Linux. - // GetOwnExPgrp (below) does not have this issue. - SKIP_IF(!IsRunningOnGvisor()); - - EXPECT_EQ(syscall(__NR_fcntl, s.get(), F_GETOWN), -owner.pid); + // Verify with F_GETOWN_EX; using F_GETOWN on Linux may incorrectly treat the + // negative return value as an error, converting the return value to -1 and + // setting errno accordingly. + f_owner_ex got_owner = {}; + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN_EX, &got_owner), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(got_owner.type, set_owner.type); + EXPECT_EQ(got_owner.pid, set_owner.pid); MaybeSave(); } @@ -1119,6 +1192,45 @@ TEST(FcntlTest, GetOwnExPgrp) { EXPECT_EQ(got_owner.pid, set_owner.pid); } +// Make sure that making multiple concurrent changes to async signal generation +// does not cause any race issues. +TEST(FcntlTest, SetFlSetOwnDoNotRace) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + pid_t pid; + EXPECT_THAT(pid = getpid(), SyscallSucceeds()); + + constexpr absl::Duration runtime = absl::Milliseconds(300); + auto setAsync = [&s, &runtime] { + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETFL, O_ASYNC), + SyscallSucceeds()); + sched_yield(); + } + }; + auto resetAsync = [&s, &runtime] { + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETFL, 0), SyscallSucceeds()); + sched_yield(); + } + }; + auto setOwn = [&s, &pid, &runtime] { + for (auto start = absl::Now(); absl::Now() - start < runtime;) { + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, pid), + SyscallSucceeds()); + sched_yield(); + } + }; + + std::list threads; + for (int i = 0; i < 10; i++) { + threads.emplace_back(setAsync); + threads.emplace_back(resetAsync); + threads.emplace_back(setOwn); + } +} + } // namespace } // namespace testing -- cgit v1.2.3 From 5b0d8ff6919a071effb9cf69ad715487f55ca2f3 Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Mon, 29 Jun 2020 15:44:46 -0700 Subject: Refactor udp_socket_test Bring udp_socket_test into complianc by: - Eliminating IsRunningOnGvisor() invocations. - Wrapping sockets in RAII FileDescriptor objects. - Creating a Bind() method so that the first bind happens on port 0. PiperOrigin-RevId: 318909396 --- test/syscalls/linux/BUILD | 4 + .../linux/udp_socket_errqueue_test_case.cc | 2 +- test/syscalls/linux/udp_socket_test_cases.cc | 1341 ++++++++++---------- test/syscalls/linux/udp_socket_test_cases.h | 60 +- 4 files changed, 723 insertions(+), 684 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index e141a86bb..8c7d54b21 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -3548,11 +3548,15 @@ cc_library( hdrs = ["udp_socket_test_cases.h"], defines = select_system(), deps = [ + ":ip_socket_test_util", ":socket_test_util", ":unix_domain_socket_test_util", "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/time", gtest, + "//test/util:file_descriptor", + "//test/util:posix_error", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", diff --git a/test/syscalls/linux/udp_socket_errqueue_test_case.cc b/test/syscalls/linux/udp_socket_errqueue_test_case.cc index fcdba7279..54a0594f7 100644 --- a/test/syscalls/linux/udp_socket_errqueue_test_case.cc +++ b/test/syscalls/linux/udp_socket_errqueue_test_case.cc @@ -47,7 +47,7 @@ TEST_P(UdpSocketTest, ErrorQueue) { msg.msg_controllen = sizeof(cmsgbuf); // recv*(MSG_ERRQUEUE) never blocks, even without MSG_DONTWAIT. - EXPECT_THAT(RetryEINTR(recvmsg)(s_, &msg, MSG_ERRQUEUE), + EXPECT_THAT(RetryEINTR(recvmsg)(bind_.get(), &msg, MSG_ERRQUEUE), SyscallFailsWithErrno(EAGAIN)); } diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc index 1d13432ca..9cc6be4fb 100644 --- a/test/syscalls/linux/udp_socket_test_cases.cc +++ b/test/syscalls/linux/udp_socket_test_cases.cc @@ -22,6 +22,7 @@ #include #include +#include "absl/strings/str_format.h" #ifndef SIOCGSTAMP #include #endif @@ -30,8 +31,11 @@ #include "absl/base/macros.h" #include "absl/time/clock.h" #include "absl/time/time.h" +#include "test/syscalls/linux/ip_socket_test_util.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" @@ -54,517 +58,470 @@ uint16_t* Port(struct sockaddr_storage* addr) { return nullptr; } +// Sets addr port to "port". +void SetPort(struct sockaddr_storage* addr, uint16_t port) { + switch (addr->ss_family) { + case AF_INET: { + auto sin = reinterpret_cast(addr); + sin->sin_port = port; + break; + } + case AF_INET6: { + auto sin6 = reinterpret_cast(addr); + sin6->sin6_port = port; + break; + } + } +} + void UdpSocketTest::SetUp() { - int type; + addrlen_ = GetAddrLength(); + + bind_ = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, IPPROTO_UDP)); + memset(&bind_addr_storage_, 0, sizeof(bind_addr_storage_)); + bind_addr_ = reinterpret_cast(&bind_addr_storage_); + + sock_ = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, IPPROTO_UDP)); +} + +int UdpSocketTest::GetFamily() { if (GetParam() == AddressFamily::kIpv4) { - type = AF_INET; - auto sin = reinterpret_cast(&anyaddr_storage_); - addrlen_ = sizeof(*sin); - sin->sin_addr.s_addr = htonl(INADDR_ANY); - } else { - type = AF_INET6; - auto sin6 = reinterpret_cast(&anyaddr_storage_); - addrlen_ = sizeof(*sin6); - if (GetParam() == AddressFamily::kIpv6) { - sin6->sin6_addr = IN6ADDR_ANY_INIT; - } else { - TestAddress const& v4_mapped_any = V4MappedAny(); - sin6->sin6_addr = - reinterpret_cast(&v4_mapped_any.addr) - ->sin6_addr; - } + return AF_INET; } - ASSERT_THAT(s_ = socket(type, SOCK_DGRAM, IPPROTO_UDP), SyscallSucceeds()); + return AF_INET6; +} - ASSERT_THAT(t_ = socket(type, SOCK_DGRAM, IPPROTO_UDP), SyscallSucceeds()); +PosixError UdpSocketTest::BindLoopback() { + bind_addr_storage_ = InetLoopbackAddr(); + struct sockaddr* bind_addr_ = + reinterpret_cast(&bind_addr_storage_); + return BindSocket(bind_.get(), bind_addr_); +} - memset(&anyaddr_storage_, 0, sizeof(anyaddr_storage_)); - anyaddr_ = reinterpret_cast(&anyaddr_storage_); - anyaddr_->sa_family = type; +PosixError UdpSocketTest::BindAny() { + bind_addr_storage_ = InetAnyAddr(); + struct sockaddr* bind_addr_ = + reinterpret_cast(&bind_addr_storage_); + return BindSocket(bind_.get(), bind_addr_); +} - if (gvisor::testing::IsRunningOnGvisor()) { - for (size_t i = 0; i < ABSL_ARRAYSIZE(ports_); ++i) { - ports_[i] = TestPort + i; - } - } else { - // When not under gvisor, use utility function to pick port. Assert that - // all ports are different. - std::string error; - for (size_t i = 0; i < ABSL_ARRAYSIZE(ports_); ++i) { - // Find an unused port, we specify port 0 to allow the kernel to provide - // the port. - bool unique = true; - do { - ports_[i] = ASSERT_NO_ERRNO_AND_VALUE(PortAvailable( - 0, AddressFamily::kDualStack, SocketType::kUdp, false)); - ASSERT_GT(ports_[i], 0); - for (size_t j = 0; j < i; ++j) { - if (ports_[j] == ports_[i]) { - unique = false; - break; - } - } - } while (!unique); - } +PosixError UdpSocketTest::BindSocket(int socket, struct sockaddr* addr) { + socklen_t len = sizeof(bind_addr_storage_); + + // Bind, then check that we get the right address. + RETURN_ERROR_IF_SYSCALL_FAIL(bind(socket, addr, addrlen_)); + + RETURN_ERROR_IF_SYSCALL_FAIL(getsockname(socket, addr, &len)); + + if (addrlen_ != len) { + return PosixError( + EINVAL, + absl::StrFormat("getsockname len: %u expected: %u", len, addrlen_)); } + return PosixError(0); +} - // Initialize the sockaddrs. - for (size_t i = 0; i < ABSL_ARRAYSIZE(addr_); ++i) { - memset(&addr_storage_[i], 0, sizeof(addr_storage_[i])); - - addr_[i] = reinterpret_cast(&addr_storage_[i]); - addr_[i]->sa_family = type; - - switch (type) { - case AF_INET: { - auto sin = reinterpret_cast(addr_[i]); - sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); - sin->sin_port = htons(ports_[i]); - break; - } - case AF_INET6: { - auto sin6 = reinterpret_cast(addr_[i]); - sin6->sin6_addr = in6addr_loopback; - sin6->sin6_port = htons(ports_[i]); - break; - } - } +socklen_t UdpSocketTest::GetAddrLength() { + struct sockaddr_storage addr; + if (GetFamily() == AF_INET) { + auto sin = reinterpret_cast(&addr); + return sizeof(*sin); } + + auto sin6 = reinterpret_cast(&addr); + return sizeof(*sin6); } -TEST_P(UdpSocketTest, Creation) { - int type = AF_INET6; - if (GetParam() == AddressFamily::kIpv4) { - type = AF_INET; +sockaddr_storage UdpSocketTest::InetAnyAddr() { + struct sockaddr_storage addr; + memset(&addr, 0, sizeof(addr)); + reinterpret_cast(&addr)->sa_family = GetFamily(); + + if (GetFamily() == AF_INET) { + auto sin = reinterpret_cast(&addr); + sin->sin_addr.s_addr = htonl(INADDR_ANY); + sin->sin_port = htons(0); + return addr; } - int s_; + auto sin6 = reinterpret_cast(&addr); + sin6->sin6_addr = IN6ADDR_ANY_INIT; + sin6->sin6_port = htons(0); + return addr; +} - ASSERT_THAT(s_ = socket(type, SOCK_DGRAM, IPPROTO_UDP), SyscallSucceeds()); - EXPECT_THAT(close(s_), SyscallSucceeds()); +sockaddr_storage UdpSocketTest::InetLoopbackAddr() { + struct sockaddr_storage addr; + memset(&addr, 0, sizeof(addr)); + reinterpret_cast(&addr)->sa_family = GetFamily(); + + if (GetFamily() == AF_INET) { + auto sin = reinterpret_cast(&addr); + sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + sin->sin_port = htons(0); + return addr; + } + auto sin6 = reinterpret_cast(&addr); + sin6->sin6_addr = in6addr_loopback; + sin6->sin6_port = htons(0); + return addr; +} + +void UdpSocketTest::Disconnect(int sockfd) { + sockaddr_storage addr_storage = InetAnyAddr(); + sockaddr* addr = reinterpret_cast(&addr_storage); + socklen_t addrlen = sizeof(addr_storage); + + addr->sa_family = AF_UNSPEC; + ASSERT_THAT(connect(sockfd, addr, addrlen), SyscallSucceeds()); + + // Check that after disconnect the socket is bound to the ANY address. + EXPECT_THAT(getsockname(sockfd, addr, &addrlen), SyscallSucceeds()); + if (GetParam() == AddressFamily::kIpv4) { + auto addr_out = reinterpret_cast(addr); + EXPECT_EQ(addrlen, sizeof(*addr_out)); + EXPECT_EQ(addr_out->sin_addr.s_addr, htonl(INADDR_ANY)); + } else { + auto addr_out = reinterpret_cast(addr); + EXPECT_EQ(addrlen, sizeof(*addr_out)); + struct in6_addr loopback = IN6ADDR_ANY_INIT; + + EXPECT_EQ(memcmp(&addr_out->sin6_addr, &loopback, sizeof(in6_addr)), 0); + } +} + +TEST_P(UdpSocketTest, Creation) { + FileDescriptor sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, IPPROTO_UDP)); + EXPECT_THAT(close(sock.release()), SyscallSucceeds()); - ASSERT_THAT(s_ = socket(type, SOCK_DGRAM, 0), SyscallSucceeds()); - EXPECT_THAT(close(s_), SyscallSucceeds()); + sock = ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, 0)); + EXPECT_THAT(close(sock.release()), SyscallSucceeds()); - ASSERT_THAT(s_ = socket(type, SOCK_STREAM, IPPROTO_UDP), SyscallFails()); + ASSERT_THAT(socket(GetFamily(), SOCK_STREAM, IPPROTO_UDP), SyscallFails()); } TEST_P(UdpSocketTest, Getsockname) { // Check that we're not bound. struct sockaddr_storage addr; socklen_t addrlen = sizeof(addr); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); + EXPECT_THAT( + getsockname(bind_.get(), reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); EXPECT_EQ(addrlen, addrlen_); - EXPECT_EQ(memcmp(&addr, anyaddr_, addrlen_), 0); + struct sockaddr_storage any = InetAnyAddr(); + EXPECT_EQ(memcmp(&addr, reinterpret_cast(&any), addrlen_), + 0); - // Bind, then check that we get the right address. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); + + EXPECT_THAT( + getsockname(bind_.get(), reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); - addrlen = sizeof(addr); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); EXPECT_EQ(addrlen, addrlen_); - EXPECT_EQ(memcmp(&addr, addr_[0], addrlen_), 0); + EXPECT_EQ(memcmp(&addr, bind_addr_, addrlen_), 0); } TEST_P(UdpSocketTest, Getpeername) { + ASSERT_NO_ERRNO(BindLoopback()); + // Check that we're not connected. struct sockaddr_storage addr; socklen_t addrlen = sizeof(addr); - EXPECT_THAT(getpeername(s_, reinterpret_cast(&addr), &addrlen), - SyscallFailsWithErrno(ENOTCONN)); + EXPECT_THAT( + getpeername(sock_.get(), reinterpret_cast(&addr), &addrlen), + SyscallFailsWithErrno(ENOTCONN)); // Connect, then check that we get the right address. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); addrlen = sizeof(addr); - EXPECT_THAT(getpeername(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); + EXPECT_THAT( + getpeername(sock_.get(), reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); EXPECT_EQ(addrlen, addrlen_); - EXPECT_EQ(memcmp(&addr, addr_[0], addrlen_), 0); + EXPECT_EQ(memcmp(&addr, bind_addr_, addrlen_), 0); } TEST_P(UdpSocketTest, SendNotConnected) { + ASSERT_NO_ERRNO(BindLoopback()); + // Do send & write, they must fail. char buf[512]; - EXPECT_THAT(send(s_, buf, sizeof(buf), 0), + EXPECT_THAT(send(sock_.get(), buf, sizeof(buf), 0), SyscallFailsWithErrno(EDESTADDRREQ)); - EXPECT_THAT(write(s_, buf, sizeof(buf)), SyscallFailsWithErrno(EDESTADDRREQ)); + EXPECT_THAT(write(sock_.get(), buf, sizeof(buf)), + SyscallFailsWithErrno(EDESTADDRREQ)); // Use sendto. - ASSERT_THAT(sendto(s_, buf, sizeof(buf), 0, addr_[0], addrlen_), + ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_), SyscallSucceedsWithValue(sizeof(buf))); // Check that we're bound now. struct sockaddr_storage addr; socklen_t addrlen = sizeof(addr); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); + EXPECT_THAT( + getsockname(sock_.get(), reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); EXPECT_EQ(addrlen, addrlen_); EXPECT_NE(*Port(&addr), 0); } TEST_P(UdpSocketTest, ConnectBinds) { + ASSERT_NO_ERRNO(BindLoopback()); + // Connect the socket. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); // Check that we're bound now. struct sockaddr_storage addr; socklen_t addrlen = sizeof(addr); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); + EXPECT_THAT( + getsockname(sock_.get(), reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); EXPECT_EQ(addrlen, addrlen_); EXPECT_NE(*Port(&addr), 0); } TEST_P(UdpSocketTest, ReceiveNotBound) { char buf[512]; - EXPECT_THAT(recv(s_, buf, sizeof(buf), MSG_DONTWAIT), + EXPECT_THAT(recv(sock_.get(), buf, sizeof(buf), MSG_DONTWAIT), SyscallFailsWithErrno(EWOULDBLOCK)); } TEST_P(UdpSocketTest, Bind) { - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); // Try to bind again. - EXPECT_THAT(bind(s_, addr_[1], addrlen_), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(bind(bind_.get(), bind_addr_, addrlen_), + SyscallFailsWithErrno(EINVAL)); // Check that we're still bound to the original address. struct sockaddr_storage addr; socklen_t addrlen = sizeof(addr); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); + EXPECT_THAT( + getsockname(bind_.get(), reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); EXPECT_EQ(addrlen, addrlen_); - EXPECT_EQ(memcmp(&addr, addr_[0], addrlen_), 0); + EXPECT_EQ(memcmp(&addr, bind_addr_, addrlen_), 0); } TEST_P(UdpSocketTest, BindInUse) { - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); // Try to bind again. - EXPECT_THAT(bind(t_, addr_[0], addrlen_), SyscallFailsWithErrno(EADDRINUSE)); + EXPECT_THAT(bind(sock_.get(), bind_addr_, addrlen_), + SyscallFailsWithErrno(EADDRINUSE)); } TEST_P(UdpSocketTest, ReceiveAfterConnect) { - // Connect s_ to loopback:TestPort, and bind t_ to loopback:TestPort. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(bind(t_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); - // Get the address s_ was bound to during connect. - struct sockaddr_storage addr; - socklen_t addrlen = sizeof(addr); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); - EXPECT_EQ(addrlen, addrlen_); - - // Send from t_ to s_. + // Send from sock_ to bind_ char buf[512]; RandomizeBuffer(buf, sizeof(buf)); - ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, - reinterpret_cast(&addr), addrlen), + ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_), SyscallSucceedsWithValue(sizeof(buf))); // Receive the data. char received[sizeof(buf)]; - EXPECT_THAT(recv(s_, received, sizeof(received), 0), + EXPECT_THAT(recv(bind_.get(), received, sizeof(received), 0), SyscallSucceedsWithValue(sizeof(received))); EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); } TEST_P(UdpSocketTest, ReceiveAfterDisconnect) { - // Connect s_ to loopback:TestPort, and bind t_ to loopback:TestPort. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(bind(t_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(t_, addr_[1], addrlen_), SyscallSucceeds()); - - // Get the address s_ was bound to during connect. - struct sockaddr_storage addr; - socklen_t addrlen = sizeof(addr); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); - EXPECT_EQ(addrlen, addrlen_); + ASSERT_NO_ERRNO(BindLoopback()); for (int i = 0; i < 2; i++) { - // Send from t_ to s_. + // Connet sock_ to bound address. + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT( + getsockname(sock_.get(), reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); + EXPECT_EQ(addrlen, addrlen_); + + // Send from sock to bind_. char buf[512]; RandomizeBuffer(buf, sizeof(buf)); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); - ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, + + ASSERT_THAT(sendto(bind_.get(), buf, sizeof(buf), 0, reinterpret_cast(&addr), addrlen), SyscallSucceedsWithValue(sizeof(buf))); // Receive the data. char received[sizeof(buf)]; - EXPECT_THAT(recv(s_, received, sizeof(received), 0), + EXPECT_THAT(recv(sock_.get(), received, sizeof(received), 0), SyscallSucceedsWithValue(sizeof(received))); EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); - // Disconnect s_. - struct sockaddr addr = {}; - addr.sa_family = AF_UNSPEC; - ASSERT_THAT(connect(s_, &addr, sizeof(addr.sa_family)), SyscallSucceeds()); - // Connect s_ loopback:TestPort. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + // Disconnect sock_. + struct sockaddr unspec = {}; + unspec.sa_family = AF_UNSPEC; + ASSERT_THAT(connect(sock_.get(), &unspec, sizeof(unspec.sa_family)), + SyscallSucceeds()); } } TEST_P(UdpSocketTest, Connect) { - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); // Check that we're connected to the right peer. struct sockaddr_storage peer; socklen_t peerlen = sizeof(peer); - EXPECT_THAT(getpeername(s_, reinterpret_cast(&peer), &peerlen), - SyscallSucceeds()); + EXPECT_THAT( + getpeername(sock_.get(), reinterpret_cast(&peer), &peerlen), + SyscallSucceeds()); EXPECT_EQ(peerlen, addrlen_); - EXPECT_EQ(memcmp(&peer, addr_[0], addrlen_), 0); + EXPECT_EQ(memcmp(&peer, bind_addr_, addrlen_), 0); // Try to bind after connect. - EXPECT_THAT(bind(s_, addr_[1], addrlen_), SyscallFailsWithErrno(EINVAL)); + struct sockaddr_storage any = InetAnyAddr(); + EXPECT_THAT( + bind(sock_.get(), reinterpret_cast(&any), addrlen_), + SyscallFailsWithErrno(EINVAL)); + + struct sockaddr_storage bind2_storage = InetLoopbackAddr(); + struct sockaddr* bind2_addr = + reinterpret_cast(&bind2_storage); + FileDescriptor bind2 = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetFamily(), SOCK_DGRAM, IPPROTO_UDP)); + ASSERT_NO_ERRNO(BindSocket(bind2.get(), bind2_addr)); // Try to connect again. - EXPECT_THAT(connect(s_, addr_[2], addrlen_), SyscallSucceeds()); + EXPECT_THAT(connect(sock_.get(), bind2_addr, addrlen_), SyscallSucceeds()); // Check that peer name changed. peerlen = sizeof(peer); - EXPECT_THAT(getpeername(s_, reinterpret_cast(&peer), &peerlen), - SyscallSucceeds()); + EXPECT_THAT( + getpeername(sock_.get(), reinterpret_cast(&peer), &peerlen), + SyscallSucceeds()); EXPECT_EQ(peerlen, addrlen_); - EXPECT_EQ(memcmp(&peer, addr_[2], addrlen_), 0); + EXPECT_EQ(memcmp(&peer, bind2_addr, addrlen_), 0); } -void ConnectAny(AddressFamily family, int sockfd, uint16_t port) { - struct sockaddr_storage addr = {}; - - // Precondition check. - { - socklen_t addrlen = sizeof(addr); - EXPECT_THAT( - getsockname(sockfd, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); - - if (family == AddressFamily::kIpv4) { - auto addr_out = reinterpret_cast(&addr); - EXPECT_EQ(addrlen, sizeof(*addr_out)); - EXPECT_EQ(addr_out->sin_addr.s_addr, htonl(INADDR_ANY)); - } else { - auto addr_out = reinterpret_cast(&addr); - EXPECT_EQ(addrlen, sizeof(*addr_out)); - struct in6_addr any = IN6ADDR_ANY_INIT; - EXPECT_EQ(memcmp(&addr_out->sin6_addr, &any, sizeof(in6_addr)), 0); - } - - { - socklen_t addrlen = sizeof(addr); - EXPECT_THAT( - getpeername(sockfd, reinterpret_cast(&addr), &addrlen), - SyscallFailsWithErrno(ENOTCONN)); - } - - struct sockaddr_storage baddr = {}; - if (family == AddressFamily::kIpv4) { - auto addr_in = reinterpret_cast(&baddr); - addrlen = sizeof(*addr_in); - addr_in->sin_family = AF_INET; - addr_in->sin_addr.s_addr = htonl(INADDR_ANY); - addr_in->sin_port = port; - } else { - auto addr_in = reinterpret_cast(&baddr); - addrlen = sizeof(*addr_in); - addr_in->sin6_family = AF_INET6; - addr_in->sin6_port = port; - if (family == AddressFamily::kIpv6) { - addr_in->sin6_addr = IN6ADDR_ANY_INIT; - } else { - TestAddress const& v4_mapped_any = V4MappedAny(); - addr_in->sin6_addr = - reinterpret_cast(&v4_mapped_any.addr) - ->sin6_addr; - } - } - - // TODO(b/138658473): gVisor doesn't allow connecting to the zero port. - if (port == 0) { - SKIP_IF(IsRunningOnGvisor()); - } - - ASSERT_THAT(connect(sockfd, reinterpret_cast(&baddr), addrlen), - SyscallSucceeds()); - } - - // Postcondition check. - { - socklen_t addrlen = sizeof(addr); - EXPECT_THAT( - getsockname(sockfd, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); +TEST_P(UdpSocketTest, ConnectAnyZero) { + // TODO(138658473): Enable when we can connect to port 0 with gVisor. + SKIP_IF(IsRunningOnGvisor()); - if (family == AddressFamily::kIpv4) { - auto addr_out = reinterpret_cast(&addr); - EXPECT_EQ(addrlen, sizeof(*addr_out)); - EXPECT_EQ(addr_out->sin_addr.s_addr, htonl(INADDR_LOOPBACK)); - } else { - auto addr_out = reinterpret_cast(&addr); - EXPECT_EQ(addrlen, sizeof(*addr_out)); - struct in6_addr loopback; - if (family == AddressFamily::kIpv6) { - loopback = IN6ADDR_LOOPBACK_INIT; - } else { - TestAddress const& v4_mapped_loopback = V4MappedLoopback(); - loopback = reinterpret_cast( - &v4_mapped_loopback.addr) - ->sin6_addr; - } - - EXPECT_EQ(memcmp(&addr_out->sin6_addr, &loopback, sizeof(in6_addr)), 0); - } + struct sockaddr_storage any = InetAnyAddr(); + EXPECT_THAT( + connect(sock_.get(), reinterpret_cast(&any), addrlen_), + SyscallSucceeds()); - addrlen = sizeof(addr); - if (port == 0) { - EXPECT_THAT( - getpeername(sockfd, reinterpret_cast(&addr), &addrlen), - SyscallFailsWithErrno(ENOTCONN)); - } else { - EXPECT_THAT( - getpeername(sockfd, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); - } - } + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT( + getpeername(sock_.get(), reinterpret_cast(&addr), &addrlen), + SyscallFailsWithErrno(ENOTCONN)); } -TEST_P(UdpSocketTest, ConnectAny) { ConnectAny(GetParam(), s_, 0); } - TEST_P(UdpSocketTest, ConnectAnyWithPort) { - auto port = *Port(reinterpret_cast(addr_[1])); - ConnectAny(GetParam(), s_, port); + ASSERT_NO_ERRNO(BindAny()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT( + getpeername(sock_.get(), reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); } -void DisconnectAfterConnectAny(AddressFamily family, int sockfd, int port) { - struct sockaddr_storage addr = {}; +TEST_P(UdpSocketTest, DisconnectAfterConnectAny) { + // TODO(138658473): Enable when we can connect to port 0 with gVisor. + SKIP_IF(IsRunningOnGvisor()); + struct sockaddr_storage any = InetAnyAddr(); + EXPECT_THAT( + connect(sock_.get(), reinterpret_cast(&any), addrlen_), + SyscallSucceeds()); + struct sockaddr_storage addr; socklen_t addrlen = sizeof(addr); - struct sockaddr_storage baddr = {}; - if (family == AddressFamily::kIpv4) { - auto addr_in = reinterpret_cast(&baddr); - addrlen = sizeof(*addr_in); - addr_in->sin_family = AF_INET; - addr_in->sin_addr.s_addr = htonl(INADDR_ANY); - addr_in->sin_port = port; - } else { - auto addr_in = reinterpret_cast(&baddr); - addrlen = sizeof(*addr_in); - addr_in->sin6_family = AF_INET6; - addr_in->sin6_port = port; - if (family == AddressFamily::kIpv6) { - addr_in->sin6_addr = IN6ADDR_ANY_INIT; - } else { - TestAddress const& v4_mapped_any = V4MappedAny(); - addr_in->sin6_addr = - reinterpret_cast(&v4_mapped_any.addr) - ->sin6_addr; - } - } + EXPECT_THAT( + getpeername(sock_.get(), reinterpret_cast(&addr), &addrlen), + SyscallFailsWithErrno(ENOTCONN)); - // TODO(b/138658473): gVisor doesn't allow connecting to the zero port. - if (port == 0) { - SKIP_IF(IsRunningOnGvisor()); - } + Disconnect(sock_.get()); +} - ASSERT_THAT(connect(sockfd, reinterpret_cast(&baddr), addrlen), - SyscallSucceeds()); - // Now the socket is bound to the loopback address. +TEST_P(UdpSocketTest, DisconnectAfterConnectAnyWithPort) { + ASSERT_NO_ERRNO(BindAny()); + EXPECT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); - // Disconnect - addrlen = sizeof(addr); - addr.ss_family = AF_UNSPEC; - ASSERT_THAT(connect(sockfd, reinterpret_cast(&addr), addrlen), - SyscallSucceeds()); + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT( + getpeername(sock_.get(), reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); - // Check that after disconnect the socket is bound to the ANY address. - EXPECT_THAT(getsockname(sockfd, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); - if (family == AddressFamily::kIpv4) { - auto addr_out = reinterpret_cast(&addr); - EXPECT_EQ(addrlen, sizeof(*addr_out)); - EXPECT_EQ(addr_out->sin_addr.s_addr, htonl(INADDR_ANY)); - } else { - auto addr_out = reinterpret_cast(&addr); - EXPECT_EQ(addrlen, sizeof(*addr_out)); - struct in6_addr loopback = IN6ADDR_ANY_INIT; + EXPECT_EQ(addrlen, addrlen_); + EXPECT_EQ(*Port(&bind_addr_storage_), *Port(&addr)); - EXPECT_EQ(memcmp(&addr_out->sin6_addr, &loopback, sizeof(in6_addr)), 0); - } + Disconnect(sock_.get()); } -TEST_P(UdpSocketTest, DisconnectAfterConnectAny) { - DisconnectAfterConnectAny(GetParam(), s_, 0); -} +TEST_P(UdpSocketTest, DisconnectAfterBind) { + ASSERT_NO_ERRNO(BindLoopback()); -TEST_P(UdpSocketTest, DisconnectAfterConnectAnyWithPort) { - auto port = *Port(reinterpret_cast(addr_[1])); - DisconnectAfterConnectAny(GetParam(), s_, port); -} + // Bind to the next port above bind_. + struct sockaddr_storage addr_storage = InetLoopbackAddr(); + struct sockaddr* addr = reinterpret_cast(&addr_storage); + SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1); + ASSERT_NO_ERRNO(BindSocket(sock_.get(), addr)); -TEST_P(UdpSocketTest, DisconnectAfterBind) { - ASSERT_THAT(bind(s_, addr_[1], addrlen_), SyscallSucceeds()); // Connect the socket. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); - struct sockaddr_storage addr = {}; - addr.ss_family = AF_UNSPEC; - EXPECT_THAT( - connect(s_, reinterpret_cast(&addr), sizeof(addr.ss_family)), - SyscallSucceeds()); + struct sockaddr_storage unspec = {}; + unspec.ss_family = AF_UNSPEC; + EXPECT_THAT(connect(sock_.get(), reinterpret_cast(&unspec), + sizeof(unspec.ss_family)), + SyscallSucceeds()); // Check that we're still bound. - socklen_t addrlen = sizeof(addr); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); + socklen_t addrlen = sizeof(unspec); + EXPECT_THAT( + getsockname(sock_.get(), reinterpret_cast(&unspec), &addrlen), + SyscallSucceeds()); EXPECT_EQ(addrlen, addrlen_); - EXPECT_EQ(memcmp(&addr, addr_[1], addrlen_), 0); + EXPECT_EQ(memcmp(addr, &unspec, addrlen_), 0); addrlen = sizeof(addr); - EXPECT_THAT(getpeername(s_, reinterpret_cast(&addr), &addrlen), + EXPECT_THAT(getpeername(sock_.get(), addr, &addrlen), SyscallFailsWithErrno(ENOTCONN)); } TEST_P(UdpSocketTest, BindToAnyConnnectToLocalhost) { - struct sockaddr_storage baddr = {}; - auto port = *Port(reinterpret_cast(addr_[1])); - if (GetParam() == AddressFamily::kIpv4) { - auto addr_in = reinterpret_cast(&baddr); - addr_in->sin_family = AF_INET; - addr_in->sin_port = port; - addr_in->sin_addr.s_addr = htonl(INADDR_ANY); - } else { - auto addr_in = reinterpret_cast(&baddr); - addr_in->sin6_family = AF_INET6; - addr_in->sin6_port = port; - addr_in->sin6_scope_id = 0; - addr_in->sin6_addr = IN6ADDR_ANY_INIT; - } - ASSERT_THAT(bind(s_, reinterpret_cast(&baddr), addrlen_), - SyscallSucceeds()); - // Connect the socket. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindAny()); - struct sockaddr_storage addr = {}; + struct sockaddr_storage addr_storage = InetLoopbackAddr(); + struct sockaddr* addr = reinterpret_cast(&addr_storage); + SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1); socklen_t addrlen = sizeof(addr); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); + + // Connect the socket. + ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds()); + + EXPECT_THAT(getsockname(bind_.get(), addr, &addrlen), SyscallSucceeds()); // If the socket is bound to ANY and connected to a loopback address, // getsockname() has to return the loopback address. if (GetParam() == AddressFamily::kIpv4) { - auto addr_out = reinterpret_cast(&addr); + auto addr_out = reinterpret_cast(addr); EXPECT_EQ(addrlen, sizeof(*addr_out)); EXPECT_EQ(addr_out->sin_addr.s_addr, htonl(INADDR_LOOPBACK)); } else { - auto addr_out = reinterpret_cast(&addr); + auto addr_out = reinterpret_cast(addr); struct in6_addr loopback = IN6ADDR_LOOPBACK_INIT; EXPECT_EQ(addrlen, sizeof(*addr_out)); EXPECT_EQ(memcmp(&addr_out->sin6_addr, &loopback, sizeof(in6_addr)), 0); @@ -572,91 +529,97 @@ TEST_P(UdpSocketTest, BindToAnyConnnectToLocalhost) { } TEST_P(UdpSocketTest, DisconnectAfterBindToAny) { - struct sockaddr_storage baddr = {}; - socklen_t addrlen; - auto port = *Port(reinterpret_cast(addr_[1])); - if (GetParam() == AddressFamily::kIpv4) { - auto addr_in = reinterpret_cast(&baddr); - addr_in->sin_family = AF_INET; - addr_in->sin_port = port; - addr_in->sin_addr.s_addr = htonl(INADDR_ANY); - } else { - auto addr_in = reinterpret_cast(&baddr); - addr_in->sin6_family = AF_INET6; - addr_in->sin6_port = port; - addr_in->sin6_scope_id = 0; - addr_in->sin6_addr = IN6ADDR_ANY_INIT; - } - ASSERT_THAT(bind(s_, reinterpret_cast(&baddr), addrlen_), - SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); + + struct sockaddr_storage any_storage = InetAnyAddr(); + struct sockaddr* any = reinterpret_cast(&any_storage); + SetPort(&any_storage, *Port(&bind_addr_storage_) + 1); + + ASSERT_NO_ERRNO(BindSocket(sock_.get(), any)); + // Connect the socket. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); - struct sockaddr_storage addr = {}; - addr.ss_family = AF_UNSPEC; - EXPECT_THAT( - connect(s_, reinterpret_cast(&addr), sizeof(addr.ss_family)), - SyscallSucceeds()); + Disconnect(sock_.get()); // Check that we're still bound. - addrlen = sizeof(addr); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + EXPECT_THAT( + getsockname(sock_.get(), reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); EXPECT_EQ(addrlen, addrlen_); - EXPECT_EQ(memcmp(&addr, &baddr, addrlen), 0); + EXPECT_EQ(memcmp(&addr, any, addrlen), 0); addrlen = sizeof(addr); - EXPECT_THAT(getpeername(s_, reinterpret_cast(&addr), &addrlen), - SyscallFailsWithErrno(ENOTCONN)); + EXPECT_THAT( + getpeername(sock_.get(), reinterpret_cast(&addr), &addrlen), + SyscallFailsWithErrno(ENOTCONN)); } TEST_P(UdpSocketTest, Disconnect) { + ASSERT_NO_ERRNO(BindLoopback()); + + struct sockaddr_storage any_storage = InetAnyAddr(); + struct sockaddr* any = reinterpret_cast(&any_storage); + SetPort(&any_storage, *Port(&bind_addr_storage_) + 1); + ASSERT_NO_ERRNO(BindSocket(sock_.get(), any)); + for (int i = 0; i < 2; i++) { // Try to connect again. - EXPECT_THAT(connect(s_, addr_[2], addrlen_), SyscallSucceeds()); + EXPECT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); // Check that we're connected to the right peer. struct sockaddr_storage peer; socklen_t peerlen = sizeof(peer); - EXPECT_THAT(getpeername(s_, reinterpret_cast(&peer), &peerlen), - SyscallSucceeds()); + EXPECT_THAT( + getpeername(sock_.get(), reinterpret_cast(&peer), &peerlen), + SyscallSucceeds()); EXPECT_EQ(peerlen, addrlen_); - EXPECT_EQ(memcmp(&peer, addr_[2], addrlen_), 0); + EXPECT_EQ(memcmp(&peer, bind_addr_, addrlen_), 0); // Try to disconnect. struct sockaddr_storage addr = {}; addr.ss_family = AF_UNSPEC; - EXPECT_THAT( - connect(s_, reinterpret_cast(&addr), sizeof(addr.ss_family)), - SyscallSucceeds()); + EXPECT_THAT(connect(sock_.get(), reinterpret_cast(&addr), + sizeof(addr.ss_family)), + SyscallSucceeds()); peerlen = sizeof(peer); - EXPECT_THAT(getpeername(s_, reinterpret_cast(&peer), &peerlen), - SyscallFailsWithErrno(ENOTCONN)); + EXPECT_THAT( + getpeername(sock_.get(), reinterpret_cast(&peer), &peerlen), + SyscallFailsWithErrno(ENOTCONN)); // Check that we're still bound. socklen_t addrlen = sizeof(addr); - EXPECT_THAT(getsockname(s_, reinterpret_cast(&addr), &addrlen), - SyscallSucceeds()); + EXPECT_THAT( + getsockname(sock_.get(), reinterpret_cast(&addr), &addrlen), + SyscallSucceeds()); EXPECT_EQ(addrlen, addrlen_); - EXPECT_EQ(*Port(&addr), 0); + EXPECT_EQ(*Port(&addr), *Port(&any_storage)); } } TEST_P(UdpSocketTest, ConnectBadAddress) { struct sockaddr addr = {}; - addr.sa_family = addr_[0]->sa_family; - ASSERT_THAT(connect(s_, &addr, sizeof(addr.sa_family)), + addr.sa_family = GetFamily(); + ASSERT_THAT(connect(sock_.get(), &addr, sizeof(addr.sa_family)), SyscallFailsWithErrno(EINVAL)); } TEST_P(UdpSocketTest, SendToAddressOtherThanConnected) { - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); + + struct sockaddr_storage addr_storage = InetAnyAddr(); + struct sockaddr* addr = reinterpret_cast(&addr_storage); + SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1); + + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); // Send to a different destination than we're connected to. char buf[512]; - EXPECT_THAT(sendto(s_, buf, sizeof(buf), 0, addr_[1], addrlen_), + EXPECT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, addr, addrlen_), SyscallSucceedsWithValue(sizeof(buf))); } @@ -664,24 +627,27 @@ TEST_P(UdpSocketTest, ZerolengthWriteAllowed) { // TODO(gvisor.dev/issue/1202): Hostinet does not support zero length writes. SKIP_IF(IsRunningWithHostinet()); - // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); + // Connect to loopback:bind_addr_+1. + struct sockaddr_storage addr_storage = InetLoopbackAddr(); + struct sockaddr* addr = reinterpret_cast(&addr_storage); + SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1); + ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds()); - // Bind t_ to loopback:TestPort+1. - ASSERT_THAT(bind(t_, addr_[1], addrlen_), SyscallSucceeds()); + // Bind sock to loopback:bind_addr_+1. + ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds()); char buf[3]; - // Send zero length packet from s_ to t_. - ASSERT_THAT(write(s_, buf, 0), SyscallSucceedsWithValue(0)); + // Send zero length packet from bind_ to sock_. + ASSERT_THAT(write(bind_.get(), buf, 0), SyscallSucceedsWithValue(0)); - struct pollfd pfd = {t_, POLLIN, 0}; - ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), + struct pollfd pfd = {sock_.get(), POLLIN, 0}; + ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout*/ 1000), SyscallSucceedsWithValue(1)); // Receive the packet. char received[3]; - EXPECT_THAT(read(t_, received, sizeof(received)), + EXPECT_THAT(read(sock_.get(), received, sizeof(received)), SyscallSucceedsWithValue(0)); } @@ -689,216 +655,252 @@ TEST_P(UdpSocketTest, ZerolengthWriteAllowedNonBlockRead) { // TODO(gvisor.dev/issue/1202): Hostinet does not support zero length writes. SKIP_IF(IsRunningWithHostinet()); - // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); - // Bind t_ to loopback:TestPort+1. - ASSERT_THAT(bind(t_, addr_[1], addrlen_), SyscallSucceeds()); + // Connect to loopback:bind_addr_port+1. + struct sockaddr_storage addr_storage = InetLoopbackAddr(); + struct sockaddr* addr = reinterpret_cast(&addr_storage); + SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1); + ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds()); - // Set t_ to non-blocking. + // Bind sock to loopback:bind_addr_port+1. + ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds()); + + // Set sock to non-blocking. int opts = 0; - ASSERT_THAT(opts = fcntl(t_, F_GETFL), SyscallSucceeds()); - ASSERT_THAT(fcntl(t_, F_SETFL, opts | O_NONBLOCK), SyscallSucceeds()); + ASSERT_THAT(opts = fcntl(sock_.get(), F_GETFL), SyscallSucceeds()); + ASSERT_THAT(fcntl(sock_.get(), F_SETFL, opts | O_NONBLOCK), + SyscallSucceeds()); char buf[3]; - // Send zero length packet from s_ to t_. - ASSERT_THAT(write(s_, buf, 0), SyscallSucceedsWithValue(0)); + // Send zero length packet from bind_ to sock_. + ASSERT_THAT(write(bind_.get(), buf, 0), SyscallSucceedsWithValue(0)); - struct pollfd pfd = {t_, POLLIN, 0}; + struct pollfd pfd = {sock_.get(), POLLIN, 0}; ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), SyscallSucceedsWithValue(1)); // Receive the packet. char received[3]; - EXPECT_THAT(read(t_, received, sizeof(received)), + EXPECT_THAT(read(sock_.get(), received, sizeof(received)), SyscallSucceedsWithValue(0)); - EXPECT_THAT(read(t_, received, sizeof(received)), + EXPECT_THAT(read(sock_.get(), received, sizeof(received)), SyscallFailsWithErrno(EAGAIN)); } TEST_P(UdpSocketTest, SendAndReceiveNotConnected) { - // Bind s_ to loopback. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); - // Send some data to s_. + // Send some data to bind_. char buf[512]; RandomizeBuffer(buf, sizeof(buf)); - ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), + ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_), SyscallSucceedsWithValue(sizeof(buf))); // Receive the data. char received[sizeof(buf)]; - EXPECT_THAT(recv(s_, received, sizeof(received), 0), + EXPECT_THAT(recv(bind_.get(), received, sizeof(received), 0), SyscallSucceedsWithValue(sizeof(received))); EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); } TEST_P(UdpSocketTest, SendAndReceiveConnected) { - // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); + + // Connect to loopback:bind_addr_port+1. + struct sockaddr_storage addr_storage = InetLoopbackAddr(); + struct sockaddr* addr = reinterpret_cast(&addr_storage); + SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1); + ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds()); - // Bind t_ to loopback:TestPort+1. - ASSERT_THAT(bind(t_, addr_[1], addrlen_), SyscallSucceeds()); + // Bind sock to loopback:TestPort+1. + ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds()); - // Send some data from t_ to s_. + // Send some data from sock to bind_. char buf[512]; RandomizeBuffer(buf, sizeof(buf)); - ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), + ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_), SyscallSucceedsWithValue(sizeof(buf))); // Receive the data. char received[sizeof(buf)]; - EXPECT_THAT(recv(s_, received, sizeof(received), 0), + EXPECT_THAT(recv(bind_.get(), received, sizeof(received), 0), SyscallSucceedsWithValue(sizeof(received))); EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); } TEST_P(UdpSocketTest, ReceiveFromNotConnected) { - // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); - // Bind t_ to loopback:TestPort+2. - ASSERT_THAT(bind(t_, addr_[2], addrlen_), SyscallSucceeds()); + // Connect to loopback:bind_addr_port+1. + struct sockaddr_storage addr_storage = InetLoopbackAddr(); + struct sockaddr* addr = reinterpret_cast(&addr_storage); + SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1); + ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds()); - // Send some data from t_ to s_. + // Bind sock to loopback:bind_addr_port+2. + struct sockaddr_storage addr2_storage = InetLoopbackAddr(); + struct sockaddr* addr2 = reinterpret_cast(&addr2_storage); + SetPort(&addr2_storage, *Port(&bind_addr_storage_) + 2); + ASSERT_THAT(bind(sock_.get(), addr2, addrlen_), SyscallSucceeds()); + + // Send some data from sock to bind_. char buf[512]; - ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), + ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_), SyscallSucceedsWithValue(sizeof(buf))); - // Check that the data isn't_ received because it was sent from a different + // Check that the data isn't received because it was sent from a different // address than we're connected. - EXPECT_THAT(recv(s_, buf, sizeof(buf), MSG_DONTWAIT), + EXPECT_THAT(recv(sock_.get(), buf, sizeof(buf), MSG_DONTWAIT), SyscallFailsWithErrno(EWOULDBLOCK)); } TEST_P(UdpSocketTest, ReceiveBeforeConnect) { - // Bind s_ to loopback:TestPort. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); - // Bind t_ to loopback:TestPort+2. - ASSERT_THAT(bind(t_, addr_[2], addrlen_), SyscallSucceeds()); + // Bind sock to loopback:bind_addr_port+2. + struct sockaddr_storage addr2_storage = InetLoopbackAddr(); + struct sockaddr* addr2 = reinterpret_cast(&addr2_storage); + SetPort(&addr2_storage, *Port(&bind_addr_storage_) + 2); + ASSERT_THAT(bind(sock_.get(), addr2, addrlen_), SyscallSucceeds()); - // Send some data from t_ to s_. + // Send some data from sock to bind_. char buf[512]; RandomizeBuffer(buf, sizeof(buf)); - ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), + ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_), SyscallSucceedsWithValue(sizeof(buf))); // Connect to loopback:TestPort+1. - ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); + struct sockaddr_storage addr_storage = InetLoopbackAddr(); + struct sockaddr* addr = reinterpret_cast(&addr_storage); + SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1); + ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds()); // Receive the data. It works because it was sent before the connect. char received[sizeof(buf)]; - EXPECT_THAT(recv(s_, received, sizeof(received), 0), + EXPECT_THAT(recv(bind_.get(), received, sizeof(received), 0), SyscallSucceedsWithValue(sizeof(received))); EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); // Send again. This time it should not be received. - ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), + ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_), SyscallSucceedsWithValue(sizeof(buf))); - EXPECT_THAT(recv(s_, buf, sizeof(buf), MSG_DONTWAIT), + EXPECT_THAT(recv(bind_.get(), buf, sizeof(buf), MSG_DONTWAIT), SyscallFailsWithErrno(EWOULDBLOCK)); } TEST_P(UdpSocketTest, ReceiveFrom) { - // Bind s_ to loopback:TestPort, and connect to loopback:TestPort+1. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(s_, addr_[1], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); + + // Connect to loopback:bind_addr_port+1. + struct sockaddr_storage addr_storage = InetLoopbackAddr(); + struct sockaddr* addr = reinterpret_cast(&addr_storage); + SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1); + ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds()); - // Bind t_ to loopback:TestPort+1. - ASSERT_THAT(bind(t_, addr_[1], addrlen_), SyscallSucceeds()); + // Bind sock to loopback:TestPort+1. + ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds()); - // Send some data from t_ to s_. + // Send some data from sock to bind_. char buf[512]; RandomizeBuffer(buf, sizeof(buf)); - ASSERT_THAT(sendto(t_, buf, sizeof(buf), 0, addr_[0], addrlen_), + ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, bind_addr_, addrlen_), SyscallSucceedsWithValue(sizeof(buf))); // Receive the data and sender address. char received[sizeof(buf)]; - struct sockaddr_storage addr; - socklen_t addrlen = sizeof(addr); - EXPECT_THAT(recvfrom(s_, received, sizeof(received), 0, - reinterpret_cast(&addr), &addrlen), + struct sockaddr_storage addr2; + socklen_t addr2len = sizeof(addr2); + EXPECT_THAT(recvfrom(bind_.get(), received, sizeof(received), 0, + reinterpret_cast(&addr2), &addr2len), SyscallSucceedsWithValue(sizeof(received))); EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); - EXPECT_EQ(addrlen, addrlen_); - EXPECT_EQ(memcmp(&addr, addr_[1], addrlen_), 0); + EXPECT_EQ(addr2len, addrlen_); + EXPECT_EQ(memcmp(addr, &addr2, addrlen_), 0); } TEST_P(UdpSocketTest, Listen) { - ASSERT_THAT(listen(s_, SOMAXCONN), SyscallFailsWithErrno(EOPNOTSUPP)); + ASSERT_THAT(listen(sock_.get(), SOMAXCONN), + SyscallFailsWithErrno(EOPNOTSUPP)); } TEST_P(UdpSocketTest, Accept) { - ASSERT_THAT(accept(s_, nullptr, nullptr), SyscallFailsWithErrno(EOPNOTSUPP)); + ASSERT_THAT(accept(sock_.get(), nullptr, nullptr), + SyscallFailsWithErrno(EOPNOTSUPP)); } // This test validates that a read shutdown with pending data allows the read // to proceed with the data before returning EAGAIN. TEST_P(UdpSocketTest, ReadShutdownNonblockPendingData) { - char received[512]; + ASSERT_NO_ERRNO(BindLoopback()); - // Bind t_ to loopback:TestPort+2. - ASSERT_THAT(bind(t_, addr_[2], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(t_, addr_[1], addrlen_), SyscallSucceeds()); + // Connect to loopback:bind_addr_port+1. + struct sockaddr_storage addr_storage = InetLoopbackAddr(); + struct sockaddr* addr = reinterpret_cast(&addr_storage); + SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1); + ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds()); - // Connect the socket, then try to shutdown again. - ASSERT_THAT(bind(s_, addr_[1], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(s_, addr_[2], addrlen_), SyscallSucceeds()); + // Bind to loopback:bind_addr_port+1 and connect to bind_addr_. + ASSERT_THAT(bind(sock_.get(), addr, addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); // Verify that we get EWOULDBLOCK when there is nothing to read. - EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + char received[512]; + EXPECT_THAT(recv(bind_.get(), received, sizeof(received), MSG_DONTWAIT), SyscallFailsWithErrno(EWOULDBLOCK)); const char* buf = "abc"; - EXPECT_THAT(write(t_, buf, 3), SyscallSucceedsWithValue(3)); + EXPECT_THAT(write(sock_.get(), buf, 3), SyscallSucceedsWithValue(3)); int opts = 0; - ASSERT_THAT(opts = fcntl(s_, F_GETFL), SyscallSucceeds()); - ASSERT_THAT(fcntl(s_, F_SETFL, opts | O_NONBLOCK), SyscallSucceeds()); - ASSERT_THAT(opts = fcntl(s_, F_GETFL), SyscallSucceeds()); + ASSERT_THAT(opts = fcntl(bind_.get(), F_GETFL), SyscallSucceeds()); + ASSERT_THAT(fcntl(bind_.get(), F_SETFL, opts | O_NONBLOCK), + SyscallSucceeds()); + ASSERT_THAT(opts = fcntl(bind_.get(), F_GETFL), SyscallSucceeds()); ASSERT_NE(opts & O_NONBLOCK, 0); - EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); + EXPECT_THAT(shutdown(bind_.get(), SHUT_RD), SyscallSucceeds()); - struct pollfd pfd = {s_, POLLIN, 0}; + struct pollfd pfd = {bind_.get(), POLLIN, 0}; ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), SyscallSucceedsWithValue(1)); // We should get the data even though read has been shutdown. - EXPECT_THAT(recv(s_, received, 2, 0), SyscallSucceedsWithValue(2)); + EXPECT_THAT(recv(bind_.get(), received, 2, 0), SyscallSucceedsWithValue(2)); // Because we read less than the entire packet length, since it's a packet // based socket any subsequent reads should return EWOULDBLOCK. - EXPECT_THAT(recv(s_, received, 1, 0), SyscallFailsWithErrno(EWOULDBLOCK)); + EXPECT_THAT(recv(bind_.get(), received, 1, 0), + SyscallFailsWithErrno(EWOULDBLOCK)); } // This test is validating that even after a socket is shutdown if it's // reconnected it will reset the shutdown state. TEST_P(UdpSocketTest, ReadShutdownSameSocketResetsShutdownState) { char received[512]; - EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + EXPECT_THAT(recv(bind_.get(), received, sizeof(received), MSG_DONTWAIT), SyscallFailsWithErrno(EWOULDBLOCK)); - EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallFailsWithErrno(ENOTCONN)); + EXPECT_THAT(shutdown(bind_.get(), SHUT_RD), SyscallFailsWithErrno(ENOTCONN)); - EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + EXPECT_THAT(recv(bind_.get(), received, sizeof(received), MSG_DONTWAIT), SyscallFailsWithErrno(EWOULDBLOCK)); // Connect the socket, then try to shutdown again. - ASSERT_THAT(bind(s_, addr_[1], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(s_, addr_[2], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); + + // Connect to loopback:bind_addr_port+1. + struct sockaddr_storage addr_storage = InetLoopbackAddr(); + struct sockaddr* addr = reinterpret_cast(&addr_storage); + SetPort(&addr_storage, *Port(&bind_addr_storage_) + 1); + ASSERT_THAT(connect(bind_.get(), addr, addrlen_), SyscallSucceeds()); - EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + EXPECT_THAT(recv(bind_.get(), received, sizeof(received), MSG_DONTWAIT), SyscallFailsWithErrno(EWOULDBLOCK)); } @@ -907,24 +909,26 @@ TEST_P(UdpSocketTest, ReadShutdown) { // MSG_DONTWAIT blocks indefinitely. SKIP_IF(IsRunningWithHostinet()); + ASSERT_NO_ERRNO(BindLoopback()); + char received[512]; - EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT), SyscallFailsWithErrno(EWOULDBLOCK)); - EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallFailsWithErrno(ENOTCONN)); + EXPECT_THAT(shutdown(sock_.get(), SHUT_RD), SyscallFailsWithErrno(ENOTCONN)); - EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT), SyscallFailsWithErrno(EWOULDBLOCK)); // Connect the socket, then try to shutdown again. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); - EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT), SyscallFailsWithErrno(EWOULDBLOCK)); - EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); + EXPECT_THAT(shutdown(sock_.get(), SHUT_RD), SyscallSucceeds()); - EXPECT_THAT(recv(s_, received, sizeof(received), 0), + EXPECT_THAT(recv(sock_.get(), received, sizeof(received), 0), SyscallSucceedsWithValue(0)); } @@ -932,93 +936,94 @@ TEST_P(UdpSocketTest, ReadShutdownDifferentThread) { // TODO(gvisor.dev/issue/1202): Calling recv() after shutdown without // MSG_DONTWAIT blocks indefinitely. SKIP_IF(IsRunningWithHostinet()); + ASSERT_NO_ERRNO(BindLoopback()); char received[512]; - EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT), SyscallFailsWithErrno(EWOULDBLOCK)); // Connect the socket, then shutdown from another thread. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); - EXPECT_THAT(recv(s_, received, sizeof(received), MSG_DONTWAIT), + EXPECT_THAT(recv(sock_.get(), received, sizeof(received), MSG_DONTWAIT), SyscallFailsWithErrno(EWOULDBLOCK)); ScopedThread t([&] { absl::SleepFor(absl::Milliseconds(200)); - EXPECT_THAT(shutdown(this->s_, SHUT_RD), SyscallSucceeds()); + EXPECT_THAT(shutdown(sock_.get(), SHUT_RD), SyscallSucceeds()); }); - EXPECT_THAT(RetryEINTR(recv)(s_, received, sizeof(received), 0), + EXPECT_THAT(RetryEINTR(recv)(sock_.get(), received, sizeof(received), 0), SyscallSucceedsWithValue(0)); t.Join(); - EXPECT_THAT(RetryEINTR(recv)(s_, received, sizeof(received), 0), + EXPECT_THAT(RetryEINTR(recv)(sock_.get(), received, sizeof(received), 0), SyscallSucceedsWithValue(0)); } TEST_P(UdpSocketTest, WriteShutdown) { - EXPECT_THAT(shutdown(s_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN)); - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); - EXPECT_THAT(shutdown(s_, SHUT_WR), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); + EXPECT_THAT(shutdown(sock_.get(), SHUT_WR), SyscallFailsWithErrno(ENOTCONN)); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); + EXPECT_THAT(shutdown(sock_.get(), SHUT_WR), SyscallSucceeds()); } TEST_P(UdpSocketTest, SynchronousReceive) { - // Bind s_ to loopback. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); - // Send some data to s_ from another thread. + // Send some data to bind_ from another thread. char buf[512]; RandomizeBuffer(buf, sizeof(buf)); // Receive the data prior to actually starting the other thread. char received[512]; - EXPECT_THAT(RetryEINTR(recv)(s_, received, sizeof(received), MSG_DONTWAIT), - SyscallFailsWithErrno(EWOULDBLOCK)); + EXPECT_THAT( + RetryEINTR(recv)(bind_.get(), received, sizeof(received), MSG_DONTWAIT), + SyscallFailsWithErrno(EWOULDBLOCK)); // Start the thread. ScopedThread t([&] { absl::SleepFor(absl::Milliseconds(200)); - ASSERT_THAT( - sendto(this->t_, buf, sizeof(buf), 0, this->addr_[0], this->addrlen_), - SyscallSucceedsWithValue(sizeof(buf))); + ASSERT_THAT(sendto(sock_.get(), buf, sizeof(buf), 0, this->bind_addr_, + this->addrlen_), + SyscallSucceedsWithValue(sizeof(buf))); }); - EXPECT_THAT(RetryEINTR(recv)(s_, received, sizeof(received), 0), + EXPECT_THAT(RetryEINTR(recv)(bind_.get(), received, sizeof(received), 0), SyscallSucceedsWithValue(512)); EXPECT_EQ(memcmp(buf, received, sizeof(buf)), 0); } TEST_P(UdpSocketTest, BoundaryPreserved_SendRecv) { - // Bind s_ to loopback:TestPort. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); - // Send 3 packets from t_ to s_. + // Send 3 packets from sock to bind_. constexpr int psize = 100; char buf[3 * psize]; RandomizeBuffer(buf, sizeof(buf)); for (int i = 0; i < 3; ++i) { - ASSERT_THAT(sendto(t_, buf + i * psize, psize, 0, addr_[0], addrlen_), - SyscallSucceedsWithValue(psize)); + ASSERT_THAT( + sendto(sock_.get(), buf + i * psize, psize, 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(psize)); } // Receive the data as 3 separate packets. char received[6 * psize]; for (int i = 0; i < 3; ++i) { - EXPECT_THAT(recv(s_, received + i * psize, 3 * psize, 0), + EXPECT_THAT(recv(bind_.get(), received + i * psize, 3 * psize, 0), SyscallSucceedsWithValue(psize)); } EXPECT_EQ(memcmp(buf, received, 3 * psize), 0); } TEST_P(UdpSocketTest, BoundaryPreserved_WritevReadv) { - // Bind s_ to loopback:TestPort. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); - // Direct writes from t_ to s_. - ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); + // Direct writes from sock to bind_. + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); - // Send 2 packets from t_ to s_, where each packet's data consists of 2 - // discontiguous iovecs. + // Send 2 packets from sock to bind_, where each packet's data consists of + // 2 discontiguous iovecs. constexpr size_t kPieceSize = 100; char buf[4 * kPieceSize]; RandomizeBuffer(buf, sizeof(buf)); @@ -1030,7 +1035,8 @@ TEST_P(UdpSocketTest, BoundaryPreserved_WritevReadv) { reinterpret_cast(buf) + (i + 2 * j) * kPieceSize); iov[j].iov_len = kPieceSize; } - ASSERT_THAT(writev(t_, iov, 2), SyscallSucceedsWithValue(2 * kPieceSize)); + ASSERT_THAT(writev(sock_.get(), iov, 2), + SyscallSucceedsWithValue(2 * kPieceSize)); } // Receive the data as 2 separate packets. @@ -1042,17 +1048,17 @@ TEST_P(UdpSocketTest, BoundaryPreserved_WritevReadv) { reinterpret_cast(received) + (i + 2 * j) * kPieceSize); iov[j].iov_len = kPieceSize; } - ASSERT_THAT(readv(s_, iov, 3), SyscallSucceedsWithValue(2 * kPieceSize)); + ASSERT_THAT(readv(bind_.get(), iov, 3), + SyscallSucceedsWithValue(2 * kPieceSize)); } EXPECT_EQ(memcmp(buf, received, 4 * kPieceSize), 0); } TEST_P(UdpSocketTest, BoundaryPreserved_SendMsgRecvMsg) { - // Bind s_ to loopback:TestPort. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); - // Send 2 packets from t_ to s_, where each packet's data consists of 2 - // discontiguous iovecs. + // Send 2 packets from sock to bind_, where each packet's data consists of + // 2 discontiguous iovecs. constexpr size_t kPieceSize = 100; char buf[4 * kPieceSize]; RandomizeBuffer(buf, sizeof(buf)); @@ -1065,11 +1071,12 @@ TEST_P(UdpSocketTest, BoundaryPreserved_SendMsgRecvMsg) { iov[j].iov_len = kPieceSize; } struct msghdr msg = {}; - msg.msg_name = addr_[0]; + msg.msg_name = bind_addr_; msg.msg_namelen = addrlen_; msg.msg_iov = iov; msg.msg_iovlen = 2; - ASSERT_THAT(sendmsg(t_, &msg, 0), SyscallSucceedsWithValue(2 * kPieceSize)); + ASSERT_THAT(sendmsg(sock_.get(), &msg, 0), + SyscallSucceedsWithValue(2 * kPieceSize)); } // Receive the data as 2 separate packets. @@ -1084,85 +1091,87 @@ TEST_P(UdpSocketTest, BoundaryPreserved_SendMsgRecvMsg) { struct msghdr msg = {}; msg.msg_iov = iov; msg.msg_iovlen = 3; - ASSERT_THAT(recvmsg(s_, &msg, 0), SyscallSucceedsWithValue(2 * kPieceSize)); + ASSERT_THAT(recvmsg(bind_.get(), &msg, 0), + SyscallSucceedsWithValue(2 * kPieceSize)); } EXPECT_EQ(memcmp(buf, received, 4 * kPieceSize), 0); } TEST_P(UdpSocketTest, FIONREADShutdown) { + ASSERT_NO_ERRNO(BindLoopback()); + int n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_THAT(ioctl(sock_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); EXPECT_EQ(n, 0); // A UDP socket must be connected before it can be shutdown. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_THAT(ioctl(sock_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); EXPECT_EQ(n, 0); - EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); + EXPECT_THAT(shutdown(sock_.get(), SHUT_RD), SyscallSucceeds()); n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_THAT(ioctl(sock_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); EXPECT_EQ(n, 0); } TEST_P(UdpSocketTest, FIONREADWriteShutdown) { int n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); EXPECT_EQ(n, 0); - // Bind s_ to loopback:TestPort. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); // A UDP socket must be connected before it can be shutdown. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(bind_.get(), bind_addr_, addrlen_), SyscallSucceeds()); n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); EXPECT_EQ(n, 0); const char str[] = "abc"; - ASSERT_THAT(send(s_, str, sizeof(str), 0), + ASSERT_THAT(send(bind_.get(), str, sizeof(str), 0), SyscallSucceedsWithValue(sizeof(str))); - struct pollfd pfd = {s_, POLLIN, 0}; + struct pollfd pfd = {bind_.get(), POLLIN, 0}; ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), SyscallSucceedsWithValue(1)); n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); EXPECT_EQ(n, sizeof(str)); - EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); + EXPECT_THAT(shutdown(bind_.get(), SHUT_RD), SyscallSucceeds()); n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); EXPECT_EQ(n, sizeof(str)); } // NOTE: Do not use `FIONREAD` as test name because it will be replaced by the // corresponding macro and become `0x541B`. TEST_P(UdpSocketTest, Fionread) { - // Bind s_ to loopback:TestPort. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); // Check that the bound socket with an empty buffer reports an empty first // packet. int n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); EXPECT_EQ(n, 0); - // Send 3 packets from t_ to s_. + // Send 3 packets from sock to bind_. constexpr int psize = 100; char buf[3 * psize]; RandomizeBuffer(buf, sizeof(buf)); - struct pollfd pfd = {s_, POLLIN, 0}; + struct pollfd pfd = {bind_.get(), POLLIN, 0}; for (int i = 0; i < 3; ++i) { - ASSERT_THAT(sendto(t_, buf + i * psize, psize, 0, addr_[0], addrlen_), - SyscallSucceedsWithValue(psize)); + ASSERT_THAT( + sendto(sock_.get(), buf + i * psize, psize, 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(psize)); ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), SyscallSucceedsWithValue(1)); @@ -1170,30 +1179,30 @@ TEST_P(UdpSocketTest, Fionread) { // Check that regardless of how many packets are in the queue, the size // reported is that of a single packet. n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); EXPECT_EQ(n, psize); } } TEST_P(UdpSocketTest, FIONREADZeroLengthPacket) { - // Bind s_ to loopback:TestPort. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); // Check that the bound socket with an empty buffer reports an empty first // packet. int n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); EXPECT_EQ(n, 0); - // Send 3 packets from t_ to s_. + // Send 3 packets from sock to bind_. constexpr int psize = 100; char buf[3 * psize]; RandomizeBuffer(buf, sizeof(buf)); - struct pollfd pfd = {s_, POLLIN, 0}; + struct pollfd pfd = {bind_.get(), POLLIN, 0}; for (int i = 0; i < 3; ++i) { - ASSERT_THAT(sendto(t_, buf + i * psize, 0, 0, addr_[0], addrlen_), - SyscallSucceedsWithValue(0)); + ASSERT_THAT( + sendto(sock_.get(), buf + i * psize, 0, 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(0)); // TODO(gvisor.dev/issue/2726): sending a zero-length message to a hostinet // socket does not cause a poll event to be triggered. @@ -1205,37 +1214,36 @@ TEST_P(UdpSocketTest, FIONREADZeroLengthPacket) { // Check that regardless of how many packets are in the queue, the size // reported is that of a single packet. n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); EXPECT_EQ(n, 0); } } TEST_P(UdpSocketTest, FIONREADZeroLengthWriteShutdown) { int n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); EXPECT_EQ(n, 0); - // Bind s_ to loopback:TestPort. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); // A UDP socket must be connected before it can be shutdown. - ASSERT_THAT(connect(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_THAT(connect(bind_.get(), bind_addr_, addrlen_), SyscallSucceeds()); n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); EXPECT_EQ(n, 0); const char str[] = "abc"; - ASSERT_THAT(send(s_, str, 0, 0), SyscallSucceedsWithValue(0)); + ASSERT_THAT(send(bind_.get(), str, 0, 0), SyscallSucceedsWithValue(0)); n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); EXPECT_EQ(n, 0); - EXPECT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); + EXPECT_THAT(shutdown(bind_.get(), SHUT_RD), SyscallSucceeds()); n = -1; - EXPECT_THAT(ioctl(s_, FIONREAD, &n), SyscallSucceedsWithValue(0)); + EXPECT_THAT(ioctl(bind_.get(), FIONREAD, &n), SyscallSucceedsWithValue(0)); EXPECT_EQ(n, 0); } @@ -1246,7 +1254,7 @@ TEST_P(UdpSocketTest, SoNoCheckOffByDefault) { int v = -1; socklen_t optlen = sizeof(v); - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_NO_CHECK, &v, &optlen), + ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, &optlen), SyscallSucceeds()); ASSERT_EQ(v, kSockOptOff); ASSERT_EQ(optlen, sizeof(v)); @@ -1259,19 +1267,19 @@ TEST_P(UdpSocketTest, SoNoCheck) { int v = kSockOptOn; socklen_t optlen = sizeof(v); - ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_NO_CHECK, &v, optlen), + ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, optlen), SyscallSucceeds()); v = -1; - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_NO_CHECK, &v, &optlen), + ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, &optlen), SyscallSucceeds()); ASSERT_EQ(v, kSockOptOn); ASSERT_EQ(optlen, sizeof(v)); v = kSockOptOff; - ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_NO_CHECK, &v, optlen), + ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, optlen), SyscallSucceeds()); v = -1; - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_NO_CHECK, &v, &optlen), + ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_NO_CHECK, &v, &optlen), SyscallSucceeds()); ASSERT_EQ(v, kSockOptOff); ASSERT_EQ(optlen, sizeof(v)); @@ -1284,7 +1292,7 @@ TEST_P(UdpSocketTest, SoTimestampOffByDefault) { int v = -1; socklen_t optlen = sizeof(v); - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_TIMESTAMP, &v, &optlen), + ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_TIMESTAMP, &v, &optlen), SyscallSucceeds()); ASSERT_EQ(v, kSockOptOff); ASSERT_EQ(optlen, sizeof(v)); @@ -1295,18 +1303,19 @@ TEST_P(UdpSocketTest, SoTimestamp) { // supported by hostinet. SKIP_IF(IsRunningWithHostinet()); - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); int v = 1; - ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_TIMESTAMP, &v, sizeof(v)), + ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_TIMESTAMP, &v, sizeof(v)), SyscallSucceeds()); char buf[3]; - // Send zero length packet from t_ to s_. - ASSERT_THAT(RetryEINTR(write)(t_, buf, 0), SyscallSucceedsWithValue(0)); + // Send zero length packet from sock to bind_. + ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, 0), + SyscallSucceedsWithValue(0)); - struct pollfd pfd = {s_, POLLIN, 0}; + struct pollfd pfd = {bind_.get(), POLLIN, 0}; ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), SyscallSucceedsWithValue(1)); @@ -1320,7 +1329,8 @@ TEST_P(UdpSocketTest, SoTimestamp) { msg.msg_control = cmsgbuf; msg.msg_controllen = sizeof(cmsgbuf); - ASSERT_THAT(RetryEINTR(recvmsg)(s_, &msg, 0), SyscallSucceedsWithValue(0)); + ASSERT_THAT(RetryEINTR(recvmsg)(bind_.get(), &msg, 0), + SyscallSucceedsWithValue(0)); struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); ASSERT_NE(cmsg, nullptr); @@ -1334,36 +1344,37 @@ TEST_P(UdpSocketTest, SoTimestamp) { ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0); // There should be nothing to get via ioctl. - ASSERT_THAT(ioctl(s_, SIOCGSTAMP, &tv), SyscallFailsWithErrno(ENOENT)); + ASSERT_THAT(ioctl(bind_.get(), SIOCGSTAMP, &tv), + SyscallFailsWithErrno(ENOENT)); } TEST_P(UdpSocketTest, WriteShutdownNotConnected) { - EXPECT_THAT(shutdown(s_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN)); + EXPECT_THAT(shutdown(bind_.get(), SHUT_WR), SyscallFailsWithErrno(ENOTCONN)); } TEST_P(UdpSocketTest, TimestampIoctl) { // TODO(gvisor.dev/issue/1202): ioctl() is not supported by hostinet. SKIP_IF(IsRunningWithHostinet()); - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); char buf[3]; - // Send packet from t_ to s_. - ASSERT_THAT(RetryEINTR(write)(t_, buf, sizeof(buf)), + // Send packet from sock to bind_. + ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); - struct pollfd pfd = {s_, POLLIN, 0}; + struct pollfd pfd = {bind_.get(), POLLIN, 0}; ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), SyscallSucceedsWithValue(1)); // There should be no control messages. char recv_buf[sizeof(buf)]; - ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(s_, recv_buf, sizeof(recv_buf))); + ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(bind_.get(), recv_buf, sizeof(recv_buf))); // A nonzero timeval should be available via ioctl. struct timeval tv = {}; - ASSERT_THAT(ioctl(s_, SIOCGSTAMP, &tv), SyscallSucceeds()); + ASSERT_THAT(ioctl(bind_.get(), SIOCGSTAMP, &tv), SyscallSucceeds()); ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0); } @@ -1371,11 +1382,12 @@ TEST_P(UdpSocketTest, TimestampIoctlNothingRead) { // TODO(gvisor.dev/issue/1202): ioctl() is not supported by hostinet. SKIP_IF(IsRunningWithHostinet()); - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); struct timeval tv = {}; - ASSERT_THAT(ioctl(s_, SIOCGSTAMP, &tv), SyscallFailsWithErrno(ENOENT)); + ASSERT_THAT(ioctl(sock_.get(), SIOCGSTAMP, &tv), + SyscallFailsWithErrno(ENOENT)); } // Test that the timestamp accessed via SIOCGSTAMP is still accessible after @@ -1385,33 +1397,35 @@ TEST_P(UdpSocketTest, TimestampIoctlPersistence) { // supported by hostinet. SKIP_IF(IsRunningWithHostinet()); - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); char buf[3]; - // Send packet from t_ to s_. - ASSERT_THAT(RetryEINTR(write)(t_, buf, sizeof(buf)), + // Send packet from sock to bind_. + ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); - ASSERT_THAT(RetryEINTR(write)(t_, buf, 0), SyscallSucceedsWithValue(0)); + ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, 0), + SyscallSucceedsWithValue(0)); - struct pollfd pfd = {s_, POLLIN, 0}; + struct pollfd pfd = {bind_.get(), POLLIN, 0}; ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), SyscallSucceedsWithValue(1)); // There should be no control messages. char recv_buf[sizeof(buf)]; - ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(s_, recv_buf, sizeof(recv_buf))); + ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(bind_.get(), recv_buf, sizeof(recv_buf))); // A nonzero timeval should be available via ioctl. struct timeval tv = {}; - ASSERT_THAT(ioctl(s_, SIOCGSTAMP, &tv), SyscallSucceeds()); + ASSERT_THAT(ioctl(bind_.get(), SIOCGSTAMP, &tv), SyscallSucceeds()); ASSERT_TRUE(tv.tv_sec != 0 || tv.tv_usec != 0); // Enable SO_TIMESTAMP and send a message. int v = 1; - EXPECT_THAT(setsockopt(s_, SOL_SOCKET, SO_TIMESTAMP, &v, sizeof(v)), + EXPECT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_TIMESTAMP, &v, sizeof(v)), SyscallSucceeds()); - ASSERT_THAT(RetryEINTR(write)(t_, buf, 0), SyscallSucceedsWithValue(0)); + ASSERT_THAT(RetryEINTR(write)(sock_.get(), buf, 0), + SyscallSucceedsWithValue(0)); ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, /*timeout=*/1000), SyscallSucceedsWithValue(1)); @@ -1424,13 +1438,14 @@ TEST_P(UdpSocketTest, TimestampIoctlPersistence) { msg.msg_iovlen = 1; msg.msg_control = cmsgbuf; msg.msg_controllen = sizeof(cmsgbuf); - ASSERT_THAT(RetryEINTR(recvmsg)(s_, &msg, 0), SyscallSucceedsWithValue(0)); + ASSERT_THAT(RetryEINTR(recvmsg)(bind_.get(), &msg, 0), + SyscallSucceedsWithValue(0)); struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); ASSERT_NE(cmsg, nullptr); // The ioctl should return the exact same values as before. struct timeval tv2 = {}; - ASSERT_THAT(ioctl(s_, SIOCGSTAMP, &tv2), SyscallSucceeds()); + ASSERT_THAT(ioctl(bind_.get(), SIOCGSTAMP, &tv2), SyscallSucceeds()); ASSERT_EQ(tv.tv_sec, tv2.tv_sec); ASSERT_EQ(tv.tv_usec, tv2.tv_usec); } @@ -1439,8 +1454,8 @@ TEST_P(UdpSocketTest, TimestampIoctlPersistence) { // outgoing packets, and that a receiving socket with IP_RECVTOS or // IPV6_RECVTCLASS will create the corresponding control message. TEST_P(UdpSocketTest, SetAndReceiveTOS) { - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); // Allow socket to receive control message. int recv_level = SOL_IP; @@ -1449,9 +1464,9 @@ TEST_P(UdpSocketTest, SetAndReceiveTOS) { recv_level = SOL_IPV6; recv_type = IPV6_RECVTCLASS; } - ASSERT_THAT( - setsockopt(s_, recv_level, recv_type, &kSockOptOn, sizeof(kSockOptOn)), - SyscallSucceeds()); + ASSERT_THAT(setsockopt(bind_.get(), recv_level, recv_type, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); // Set socket TOS. int sent_level = recv_level; @@ -1460,9 +1475,9 @@ TEST_P(UdpSocketTest, SetAndReceiveTOS) { sent_type = IPV6_TCLASS; } int sent_tos = IPTOS_LOWDELAY; // Choose some TOS value. - ASSERT_THAT( - setsockopt(t_, sent_level, sent_type, &sent_tos, sizeof(sent_tos)), - SyscallSucceeds()); + ASSERT_THAT(setsockopt(sock_.get(), sent_level, sent_type, &sent_tos, + sizeof(sent_tos)), + SyscallSucceeds()); // Prepare message to send. constexpr size_t kDataLength = 1024; @@ -1474,7 +1489,7 @@ TEST_P(UdpSocketTest, SetAndReceiveTOS) { sent_msg.msg_iov = &sent_iov; sent_msg.msg_iovlen = 1; - ASSERT_THAT(RetryEINTR(sendmsg)(t_, &sent_msg, 0), + ASSERT_THAT(RetryEINTR(sendmsg)(sock_.get(), &sent_msg, 0), SyscallSucceedsWithValue(kDataLength)); // Receive message. @@ -1492,7 +1507,7 @@ TEST_P(UdpSocketTest, SetAndReceiveTOS) { std::vector received_cmsgbuf(CMSG_SPACE(cmsg_data_len)); received_msg.msg_control = &received_cmsgbuf[0]; received_msg.msg_controllen = received_cmsgbuf.size(); - ASSERT_THAT(RetryEINTR(recvmsg)(s_, &received_msg, 0), + ASSERT_THAT(RetryEINTR(recvmsg)(bind_.get(), &received_msg, 0), SyscallSucceedsWithValue(kDataLength)); struct cmsghdr* cmsg = CMSG_FIRSTHDR(&received_msg); @@ -1511,8 +1526,9 @@ TEST_P(UdpSocketTest, SetAndReceiveTOS) { TEST_P(UdpSocketTest, SendAndReceiveTOS) { // TODO(b/146661005): Setting TOS via cmsg not supported for netstack. SKIP_IF(IsRunningOnGvisor() && !IsRunningWithHostinet()); - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); - ASSERT_THAT(connect(t_, addr_[0], addrlen_), SyscallSucceeds()); + + ASSERT_NO_ERRNO(BindLoopback()); + ASSERT_THAT(connect(sock_.get(), bind_addr_, addrlen_), SyscallSucceeds()); // Allow socket to receive control message. int recv_level = SOL_IP; @@ -1522,9 +1538,9 @@ TEST_P(UdpSocketTest, SendAndReceiveTOS) { recv_type = IPV6_RECVTCLASS; } int recv_opt = kSockOptOn; - ASSERT_THAT( - setsockopt(s_, recv_level, recv_type, &recv_opt, sizeof(recv_opt)), - SyscallSucceeds()); + ASSERT_THAT(setsockopt(bind_.get(), recv_level, recv_type, &recv_opt, + sizeof(recv_opt)), + SyscallSucceeds()); // Prepare message to send. constexpr size_t kDataLength = 1024; @@ -1555,7 +1571,7 @@ TEST_P(UdpSocketTest, SendAndReceiveTOS) { sent_cmsg->cmsg_type = sent_type; *(int8_t*)CMSG_DATA(sent_cmsg) = sent_tos; - ASSERT_THAT(RetryEINTR(sendmsg)(t_, &sent_msg, 0), + ASSERT_THAT(RetryEINTR(sendmsg)(sock_.get(), &sent_msg, 0), SyscallSucceedsWithValue(kDataLength)); // Receive message. @@ -1569,7 +1585,7 @@ TEST_P(UdpSocketTest, SendAndReceiveTOS) { std::vector received_cmsgbuf(CMSG_SPACE(cmsg_data_len)); received_msg.msg_control = &received_cmsgbuf[0]; received_msg.msg_controllen = CMSG_LEN(cmsg_data_len); - ASSERT_THAT(RetryEINTR(recvmsg)(s_, &received_msg, 0), + ASSERT_THAT(RetryEINTR(recvmsg)(bind_.get(), &received_msg, 0), SyscallSucceedsWithValue(kDataLength)); struct cmsghdr* cmsg = CMSG_FIRSTHDR(&received_msg); @@ -1585,27 +1601,29 @@ TEST_P(UdpSocketTest, SendAndReceiveTOS) { TEST_P(UdpSocketTest, RecvBufLimitsEmptyRcvBuf) { // Discover minimum buffer size by setting it to zero. constexpr int kRcvBufSz = 0; - ASSERT_THAT( - setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), - SyscallSucceeds()); + ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, + sizeof(kRcvBufSz)), + SyscallSucceeds()); int min = 0; socklen_t min_len = sizeof(min); - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), + ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &min, &min_len), SyscallSucceeds()); - // Bind s_ to loopback. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + // Bind bind_ to loopback. + ASSERT_NO_ERRNO(BindLoopback()); { // Send data of size min and verify that it's received. std::vector buf(min); RandomizeBuffer(buf.data(), buf.size()); - ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_), - SyscallSucceedsWithValue(buf.size())); + ASSERT_THAT( + sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(buf.size())); std::vector received(buf.size()); - EXPECT_THAT(recv(s_, received.data(), received.size(), MSG_DONTWAIT), - SyscallSucceedsWithValue(received.size())); + EXPECT_THAT( + recv(bind_.get(), received.data(), received.size(), MSG_DONTWAIT), + SyscallSucceedsWithValue(received.size())); } { @@ -1614,30 +1632,32 @@ TEST_P(UdpSocketTest, RecvBufLimitsEmptyRcvBuf) { // is currently empty. std::vector buf(min + 1); RandomizeBuffer(buf.data(), buf.size()); - ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_), - SyscallSucceedsWithValue(buf.size())); + ASSERT_THAT( + sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(buf.size())); std::vector received(buf.size()); - EXPECT_THAT(recv(s_, received.data(), received.size(), MSG_DONTWAIT), - SyscallSucceedsWithValue(received.size())); + EXPECT_THAT( + recv(bind_.get(), received.data(), received.size(), MSG_DONTWAIT), + SyscallSucceedsWithValue(received.size())); } } // Test that receive buffer limits are enforced. TEST_P(UdpSocketTest, RecvBufLimits) { // Bind s_ to loopback. - ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds()); + ASSERT_NO_ERRNO(BindLoopback()); int min = 0; { // Discover minimum buffer size by trying to set it to zero. constexpr int kRcvBufSz = 0; - ASSERT_THAT( - setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), - SyscallSucceeds()); + ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, + sizeof(kRcvBufSz)), + SyscallSucceeds()); socklen_t min_len = sizeof(min); - ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), + ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &min, &min_len), SyscallSucceeds()); } @@ -1648,51 +1668,58 @@ TEST_P(UdpSocketTest, RecvBufLimits) { new_rcv_buf_sz = min * 2; } - ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &new_rcv_buf_sz, + ASSERT_THAT(setsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &new_rcv_buf_sz, sizeof(new_rcv_buf_sz)), SyscallSucceeds()); int rcv_buf_sz = 0; { socklen_t rcv_buf_len = sizeof(rcv_buf_sz); - ASSERT_THAT( - getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz, &rcv_buf_len), - SyscallSucceeds()); + ASSERT_THAT(getsockopt(bind_.get(), SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz, + &rcv_buf_len), + SyscallSucceeds()); } { std::vector buf(min); RandomizeBuffer(buf.data(), buf.size()); - ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_), - SyscallSucceedsWithValue(buf.size())); - ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_), - SyscallSucceedsWithValue(buf.size())); - ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_), - SyscallSucceedsWithValue(buf.size())); - ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_), - SyscallSucceedsWithValue(buf.size())); + ASSERT_THAT( + sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(buf.size())); + ASSERT_THAT( + sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(buf.size())); + ASSERT_THAT( + sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(buf.size())); + ASSERT_THAT( + sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(buf.size())); int sent = 4; if (IsRunningOnGvisor() && !IsRunningWithHostinet()) { // Linux seems to drop the 4th packet even though technically it should // fit in the receive buffer. - ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_), - SyscallSucceedsWithValue(buf.size())); + ASSERT_THAT( + sendto(sock_.get(), buf.data(), buf.size(), 0, bind_addr_, addrlen_), + SyscallSucceedsWithValue(buf.size())); sent++; } for (int i = 0; i < sent - 1; i++) { // Receive the data. std::vector received(buf.size()); - EXPECT_THAT(recv(s_, received.data(), received.size(), MSG_DONTWAIT), - SyscallSucceedsWithValue(received.size())); + EXPECT_THAT( + recv(bind_.get(), received.data(), received.size(), MSG_DONTWAIT), + SyscallSucceedsWithValue(received.size())); EXPECT_EQ(memcmp(buf.data(), received.data(), buf.size()), 0); } // The last receive should fail with EAGAIN as the last packet should have // been dropped due to lack of space in the receive buffer. std::vector received(buf.size()); - EXPECT_THAT(recv(s_, received.data(), received.size(), MSG_DONTWAIT), - SyscallFailsWithErrno(EAGAIN)); + EXPECT_THAT( + recv(bind_.get(), received.data(), received.size(), MSG_DONTWAIT), + SyscallFailsWithErrno(EAGAIN)); } } diff --git a/test/syscalls/linux/udp_socket_test_cases.h b/test/syscalls/linux/udp_socket_test_cases.h index 2fd79d99e..f7e25c805 100644 --- a/test/syscalls/linux/udp_socket_test_cases.h +++ b/test/syscalls/linux/udp_socket_test_cases.h @@ -15,8 +15,12 @@ #ifndef THIRD_PARTY_GOLANG_GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_H_ #define THIRD_PARTY_GOLANG_GVISOR_TEST_SYSCALLS_LINUX_SOCKET_IPV4_UDP_UNBOUND_H_ +#include + #include "gtest/gtest.h" #include "test/syscalls/linux/socket_test_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" namespace gvisor { namespace testing { @@ -32,42 +36,46 @@ class UdpSocketTest // Creates two sockets that will be used by test cases. void SetUp() override; - // Closes the sockets created by SetUp(). - void TearDown() override { - EXPECT_THAT(close(s_), SyscallSucceeds()); - EXPECT_THAT(close(t_), SyscallSucceeds()); + // Binds the socket bind_ to the loopback and updates bind_addr_. + PosixError BindLoopback(); - for (size_t i = 0; i < ABSL_ARRAYSIZE(ports_); ++i) { - ASSERT_NO_ERRNO(FreeAvailablePort(ports_[i])); - } - } + // Binds the socket bind_ to Any and updates bind_addr_. + PosixError BindAny(); - // First UDP socket. - int s_; + // Binds given socket to address addr and updates. + PosixError BindSocket(int socket, struct sockaddr* addr); - // Second UDP socket. - int t_; + // Return initialized Any address to port 0. + struct sockaddr_storage InetAnyAddr(); - // The length of the socket address. - socklen_t addrlen_; + // Return initialized Loopback address to port 0. + struct sockaddr_storage InetLoopbackAddr(); - // Initialized address pointing to loopback and port TestPort+i. - struct sockaddr* addr_[3]; + // Disconnects socket sockfd. + void Disconnect(int sockfd); - // Initialize "any" address. - struct sockaddr* anyaddr_; + // Get family for the test. + int GetFamily(); - // Used ports. - int ports_[3]; + // Socket used by Bind methods + FileDescriptor bind_; - private: - // Storage for the loopback addresses. - struct sockaddr_storage addr_storage_[3]; + // Second socket used for tests. + FileDescriptor sock_; - // Storage for the "any" address. - struct sockaddr_storage anyaddr_storage_; -}; + // Address for bind_ socket. + struct sockaddr* bind_addr_; + // Initialized to the length based on GetFamily(). + socklen_t addrlen_; + + // Storage for bind_addr_. + struct sockaddr_storage bind_addr_storage_; + + private: + // Helper to initialize addrlen_ for the test case. + socklen_t GetAddrLength(); +}; } // namespace testing } // namespace gvisor -- cgit v1.2.3 From 09b7791e89d9d487d03cf03cf2ba2d0fb1c9386a Mon Sep 17 00:00:00 2001 From: Craig Chi Date: Tue, 30 Jun 2020 09:03:14 -0700 Subject: Add missing ICRNL flag in master termios test Closes #2768 --- test/syscalls/linux/pty.cc | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/pty.cc b/test/syscalls/linux/pty.cc index aabfa6955..f9392b9e0 100644 --- a/test/syscalls/linux/pty.cc +++ b/test/syscalls/linux/pty.cc @@ -634,6 +634,11 @@ TEST_F(PtyTest, TermiosAffectsSlave) { // Verify this by setting ICRNL (which rewrites input \r to \n) and verify that // it has no effect on the master. TEST_F(PtyTest, MasterTermiosUnchangable) { + struct kernel_termios master_termios = {}; + EXPECT_THAT(ioctl(master_.get(), TCGETS, &master_termios), SyscallSucceeds()); + master_termios.c_lflag |= ICRNL; + EXPECT_THAT(ioctl(master_.get(), TCSETS, &master_termios), SyscallSucceeds()); + char c = '\r'; ASSERT_THAT(WriteFd(slave_.get(), &c, 1), SyscallSucceedsWithValue(1)); -- cgit v1.2.3 From dce2dfae046cdc29b2ac3b949a76d6b0ee2348a5 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Tue, 30 Jun 2020 11:24:10 -0700 Subject: Add build target for the provisional style guide. This includes the provisional style guide in the website and fixes the broken link from CONTRIBUTING.md. The style guide will be located under the "Community" category as it's related to contributing to the project. Also, add missing includes that were causing some presubmits to fail. PiperOrigin-RevId: 319061410 --- g3doc/BUILD | 9 +++++++++ test/syscalls/linux/fcntl.cc | 3 +++ website/BUILD | 1 + 3 files changed, 13 insertions(+) (limited to 'test') diff --git a/g3doc/BUILD b/g3doc/BUILD index dbbf96204..c315d38be 100644 --- a/g3doc/BUILD +++ b/g3doc/BUILD @@ -33,3 +33,12 @@ doc( subcategory = "Community", weight = "95", ) + +doc( + name = "style", + src = "style.md", + category = "Project", + permalink = "/community/style_guide/", + subcategory = "Community", + weight = "10", +) diff --git a/test/syscalls/linux/fcntl.cc b/test/syscalls/linux/fcntl.cc index 9130618fa..4b9b4db99 100644 --- a/test/syscalls/linux/fcntl.cc +++ b/test/syscalls/linux/fcntl.cc @@ -18,7 +18,10 @@ #include #include +#include +#include #include +#include #include "gtest/gtest.h" #include "absl/base/macros.h" diff --git a/website/BUILD b/website/BUILD index c97b2560b..4488cb543 100644 --- a/website/BUILD +++ b/website/BUILD @@ -138,6 +138,7 @@ docs( "//g3doc:community", "//g3doc:index", "//g3doc:roadmap", + "//g3doc:style", "//g3doc/architecture_guide:performance", "//g3doc/architecture_guide:platforms", "//g3doc/architecture_guide:resources", -- cgit v1.2.3 From 44071cc7fae8a7143b0cd386b402375aafecd979 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Tue, 30 Jun 2020 15:28:06 -0700 Subject: Remove struct packing to fix compiler warning. -Waddress-of-packed-member warns on inet_aton() being used with a packed struct member. This was added in cl/291990716. PiperOrigin-RevId: 319111253 --- test/syscalls/linux/socket_netlink_route.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc index fbe61c5a0..e6647a1c3 100644 --- a/test/syscalls/linux/socket_netlink_route.cc +++ b/test/syscalls/linux/socket_netlink_route.cc @@ -595,7 +595,7 @@ TEST(NetlinkRouteTest, GetRouteRequest) { ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get())); - struct __attribute__((__packed__)) request { + struct request { struct nlmsghdr hdr; struct rtmsg rtm; struct nlattr nla; -- cgit v1.2.3 From c9446f05347b865d75bfdacd6769fce265171b9b Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Tue, 30 Jun 2020 23:55:16 -0700 Subject: Fix two bugs in TCP sender. a) When GSO is in use we should not cap the segment to maxPayloadSize in sender.maybeSendSegment as the GSO logic will cap the segment to the correct size. Without this the host GSO is not used as we end up breaking up large segments into small MSS sized segments before writing the packets to the host. b) The check to not split a segment due to it not fitting in the receiver window when there are pending segments is incorrect as segments in writeList can be really large as we just take the write call's buffer size and create a single large segment. So a write of say 128KB will just be 1 segment in the writeList. The linux code checks if 1 MSS sized segments fits in the receiver's window and if not then does not split the current segment. gVisor's check was incorrect that it was checking if the whole segment which could be >>> 1 MSS would fit in the receiver's window. This was causing us to prematurely stop sending and falling back to retransmit timer/probe from the other end to send data. This was seen when running HTTPD benchmarks where @ HEAD when sending large files the benchmark was taking forever to run. The tcp_splitseg_mss_test.go is being deleted as the test as written doesn't test what is intended correctly. This is because GSO is enabled by default and the reason the MSS+1 sized segment is sent is because GSO is in use. A proper test will require disabling GSO on linux and netstack which is going to take a bit of work in packetimpact to do it correctly. Separately a new test probably should be written that verifies that a segment > availableWindow is not split if the availableWindow is < 1 MSS. Fixes #3107 PiperOrigin-RevId: 319172089 --- pkg/tcpip/transport/tcp/snd.go | 63 +++++++++++++-------- test/packetimpact/runner/packetimpact_test.go | 8 ++- test/packetimpact/tests/BUILD | 10 ---- test/packetimpact/tests/tcp_splitseg_mss_test.go | 71 ------------------------ 4 files changed, 46 insertions(+), 106 deletions(-) delete mode 100644 test/packetimpact/tests/tcp_splitseg_mss_test.go (limited to 'test') diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index acacb42e4..5862c32f2 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -833,25 +833,6 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se panic("Netstack queues FIN segments without data.") } - segEnd = seg.sequenceNumber.Add(seqnum.Size(seg.data.Size())) - // If the entire segment cannot be accomodated in the receiver - // advertized window, skip splitting and sending of the segment. - // ref: net/ipv4/tcp_output.c::tcp_snd_wnd_test() - // - // Linux checks this for all segment transmits not triggered - // by a probe timer. On this condition, it defers the segment - // split and transmit to a short probe timer. - // ref: include/net/tcp.h::tcp_check_probe_timer() - // ref: net/ipv4/tcp_output.c::tcp_write_wakeup() - // - // Instead of defining a new transmit timer, we attempt to split the - // segment right here if there are no pending segments. - // If there are pending segments, segment transmits are deferred - // to the retransmit timer handler. - if s.sndUna != s.sndNxt && !segEnd.LessThan(end) { - return false - } - if !seg.sequenceNumber.LessThan(end) { return false } @@ -861,14 +842,48 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se return false } - // The segment size limit is computed as a function of sender congestion - // window and MSS. When sender congestion window is > 1, this limit can - // be larger than MSS. Ensure that the currently available send space - // is not greater than minimum of this limit and MSS. + // If the whole segment or at least 1MSS sized segment cannot + // be accomodated in the receiver advertized window, skip + // splitting and sending of the segment. ref: + // net/ipv4/tcp_output.c::tcp_snd_wnd_test() + // + // Linux checks this for all segment transmits not triggered by + // a probe timer. On this condition, it defers the segment split + // and transmit to a short probe timer. + // + // ref: include/net/tcp.h::tcp_check_probe_timer() + // ref: net/ipv4/tcp_output.c::tcp_write_wakeup() + // + // Instead of defining a new transmit timer, we attempt to split + // the segment right here if there are no pending segments. If + // there are pending segments, segment transmits are deferred to + // the retransmit timer handler. + if s.sndUna != s.sndNxt { + switch { + case available >= seg.data.Size(): + // OK to send, the whole segments fits in the + // receiver's advertised window. + case available >= s.maxPayloadSize: + // OK to send, at least 1 MSS sized segment fits + // in the receiver's advertised window. + default: + return false + } + } + + // The segment size limit is computed as a function of sender + // congestion window and MSS. When sender congestion window is > + // 1, this limit can be larger than MSS. Ensure that the + // currently available send space is not greater than minimum of + // this limit and MSS. if available > limit { available = limit } - if available > s.maxPayloadSize { + + // If GSO is not in use then cap available to + // maxPayloadSize. When GSO is in use the gVisor GSO logic or + // the host GSO logic will cap the segment to the correct size. + if s.ep.gso == nil && available > s.maxPayloadSize { available = s.maxPayloadSize } diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go index 3cac5915f..c0a2620de 100644 --- a/test/packetimpact/runner/packetimpact_test.go +++ b/test/packetimpact/runner/packetimpact_test.go @@ -242,7 +242,13 @@ func TestOne(t *testing.T) { } // Kill so that it will flush output. - defer testbench.Exec(dockerutil.RunOpts{}, "killall", snifferArgs[0]) + defer func() { + // Wait 1 second before killing tcpdump to give it time to flush + // any packets. On linux tests killing it immediately can + // sometimes result in partial pcaps. + time.Sleep(1 * time.Second) + testbench.Exec(dockerutil.RunOpts{}, "killall", snifferArgs[0]) + }() if _, err := testbench.WaitForOutput(snifferRegex, 60*time.Second); err != nil { t.Fatalf("sniffer on %s never listened: %s", dut.Name, err) diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 1c9f59df5..4711c1ae6 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -212,16 +212,6 @@ packetimpact_go_test( ], ) -packetimpact_go_test( - name = "tcp_splitseg_mss", - srcs = ["tcp_splitseg_mss_test.go"], - deps = [ - "//pkg/tcpip/header", - "//test/packetimpact/testbench", - "@org_golang_x_sys//unix:go_default_library", - ], -) - packetimpact_go_test( name = "tcp_cork_mss", srcs = ["tcp_cork_mss_test.go"], diff --git a/test/packetimpact/tests/tcp_splitseg_mss_test.go b/test/packetimpact/tests/tcp_splitseg_mss_test.go deleted file mode 100644 index 9350d0988..000000000 --- a/test/packetimpact/tests/tcp_splitseg_mss_test.go +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tcp_splitseg_mss_test - -import ( - "flag" - "testing" - "time" - - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/tcpip/header" - "gvisor.dev/gvisor/test/packetimpact/testbench" -) - -func init() { - testbench.RegisterFlags(flag.CommandLine) -} - -// TestTCPSplitSegMSS lets the dut try to send segments larger than MSS. -// It tests if the transmitted segments are capped at MSS and are split. -func TestTCPSplitSegMSS(t *testing.T) { - dut := testbench.NewDUT(t) - defer dut.TearDown() - listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) - defer dut.Close(listenFD) - conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) - defer conn.Close() - - const mss = uint32(header.TCPDefaultMSS) - options := make([]byte, header.TCPOptionMSSLength) - header.EncodeMSSOption(mss, options) - conn.ConnectWithOptions(options) - - acceptFD, _ := dut.Accept(listenFD) - defer dut.Close(acceptFD) - - // Let the dut send a segment larger than MSS. - largeData := make([]byte, mss+1) - for i := 0; i < 2; i++ { - dut.Send(acceptFD, largeData, 0) - if i == 0 { - // On Linux, the initial segment goes out beyond MSS and the segment - // split occurs on retransmission. Call ExpectData to wait to - // receive the split segment. - if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: largeData[:mss]}, time.Second); err != nil { - t.Fatalf("expected payload was not received: %s", err) - } - } else { - if _, err := conn.ExpectNextData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: largeData[:mss]}, time.Second); err != nil { - t.Fatalf("expected payload was not received: %s", err) - } - } - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) - if _, err := conn.ExpectNextData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &testbench.Payload{Bytes: largeData[mss:]}, time.Second); err != nil { - t.Fatalf("expected payload was not received: %s", err) - } - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) - } -} -- cgit v1.2.3 From cda2979b63fad37a33706f8aa430664a9c4d0b3b Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Wed, 1 Jul 2020 08:40:31 -0700 Subject: Complete async signal delivery support in vfs2. - Support FIOASYNC, FIO{SET,GET}OWN, SIOC{G,S}PGRP (refactor getting/setting owner in the process). - Unset signal recipient when setting owner with pid == 0 and valid owner type. Updates #2923. PiperOrigin-RevId: 319231420 --- pkg/sentry/kernel/fasync/fasync.go | 10 ++++++ pkg/sentry/syscalls/linux/vfs2/fd.go | 54 +++++++++++++++++++------------ pkg/sentry/syscalls/linux/vfs2/ioctl.go | 43 +++++++++++++++++++++++++ test/syscalls/BUILD | 1 + test/syscalls/linux/fcntl.cc | 57 +++++++++++++++++++++++++++++++++ 5 files changed, 144 insertions(+), 21 deletions(-) (limited to 'test') diff --git a/pkg/sentry/kernel/fasync/fasync.go b/pkg/sentry/kernel/fasync/fasync.go index 323f1dfa5..153d2cd9b 100644 --- a/pkg/sentry/kernel/fasync/fasync.go +++ b/pkg/sentry/kernel/fasync/fasync.go @@ -176,3 +176,13 @@ func (a *FileAsync) SetOwnerProcessGroup(requester *kernel.Task, recipient *kern a.recipientTG = nil a.recipientPG = recipient } + +// ClearOwner unsets the current signal recipient. +func (a *FileAsync) ClearOwner() { + a.mu.Lock() + defer a.mu.Unlock() + a.requester = nil + a.recipientT = nil + a.recipientTG = nil + a.recipientPG = nil +} diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go index 7e4c6a56e..517394ba9 100644 --- a/pkg/sentry/syscalls/linux/vfs2/fd.go +++ b/pkg/sentry/syscalls/linux/vfs2/fd.go @@ -156,11 +156,10 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } return uintptr(n), nil, nil case linux.F_GETOWN: - a := file.AsyncHandler() - if a == nil { + owner, hasOwner := getAsyncOwner(t, file) + if !hasOwner { return 0, nil, nil } - owner := getAsyncOwner(t, a.(*fasync.FileAsync)) if owner.Type == linux.F_OWNER_PGRP { return uintptr(-owner.PID), nil, nil } @@ -176,26 +175,21 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall ownerType = linux.F_OWNER_PGRP who = -who } - a := file.SetAsyncHandler(fasync.NewVFS2).(*fasync.FileAsync) - return 0, nil, setAsyncOwner(t, a, ownerType, who) + return 0, nil, setAsyncOwner(t, file, ownerType, who) case linux.F_GETOWN_EX: - a := file.AsyncHandler() - if a == nil { + owner, hasOwner := getAsyncOwner(t, file) + if !hasOwner { return 0, nil, nil } - addr := args[2].Pointer() - owner := getAsyncOwner(t, a.(*fasync.FileAsync)) - _, err := t.CopyOut(addr, &owner) + _, err := t.CopyOut(args[2].Pointer(), &owner) return 0, nil, err case linux.F_SETOWN_EX: - addr := args[2].Pointer() var owner linux.FOwnerEx - n, err := t.CopyIn(addr, &owner) + n, err := t.CopyIn(args[2].Pointer(), &owner) if err != nil { return 0, nil, err } - a := file.SetAsyncHandler(fasync.NewVFS2).(*fasync.FileAsync) - return uintptr(n), nil, setAsyncOwner(t, a, owner.Type, owner.PID) + return uintptr(n), nil, setAsyncOwner(t, file, owner.Type, owner.PID) case linux.F_GETPIPE_SZ: pipefile, ok := file.Impl().(*pipe.VFSPipeFD) if !ok { @@ -219,30 +213,48 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } } -func getAsyncOwner(t *kernel.Task, a *fasync.FileAsync) linux.FOwnerEx { - ot, otg, opg := a.Owner() +func getAsyncOwner(t *kernel.Task, fd *vfs.FileDescription) (ownerEx linux.FOwnerEx, hasOwner bool) { + a := fd.AsyncHandler() + if a == nil { + return linux.FOwnerEx{}, false + } + + ot, otg, opg := a.(*fasync.FileAsync).Owner() switch { case ot != nil: return linux.FOwnerEx{ Type: linux.F_OWNER_TID, PID: int32(t.PIDNamespace().IDOfTask(ot)), - } + }, true case otg != nil: return linux.FOwnerEx{ Type: linux.F_OWNER_PID, PID: int32(t.PIDNamespace().IDOfThreadGroup(otg)), - } + }, true case opg != nil: return linux.FOwnerEx{ Type: linux.F_OWNER_PGRP, PID: int32(t.PIDNamespace().IDOfProcessGroup(opg)), - } + }, true default: - return linux.FOwnerEx{} + return linux.FOwnerEx{}, true } } -func setAsyncOwner(t *kernel.Task, a *fasync.FileAsync, ownerType, pid int32) error { +func setAsyncOwner(t *kernel.Task, fd *vfs.FileDescription, ownerType, pid int32) error { + switch ownerType { + case linux.F_OWNER_TID, linux.F_OWNER_PID, linux.F_OWNER_PGRP: + // Acceptable type. + default: + return syserror.EINVAL + } + + a := fd.SetAsyncHandler(fasync.NewVFS2).(*fasync.FileAsync) + if pid == 0 { + a.ClearOwner() + return nil + } + switch ownerType { case linux.F_OWNER_TID: task := t.PIDNamespace().TaskWithID(kernel.ThreadID(pid)) diff --git a/pkg/sentry/syscalls/linux/vfs2/ioctl.go b/pkg/sentry/syscalls/linux/vfs2/ioctl.go index 0399c0db4..fd6ab94b2 100644 --- a/pkg/sentry/syscalls/linux/vfs2/ioctl.go +++ b/pkg/sentry/syscalls/linux/vfs2/ioctl.go @@ -57,6 +57,49 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall flags &^= linux.O_NONBLOCK } return 0, nil, file.SetStatusFlags(t, t.Credentials(), flags) + + case linux.FIOASYNC: + var set int32 + if _, err := t.CopyIn(args[2].Pointer(), &set); err != nil { + return 0, nil, err + } + flags := file.StatusFlags() + if set != 0 { + flags |= linux.O_ASYNC + } else { + flags &^= linux.O_ASYNC + } + file.SetStatusFlags(t, t.Credentials(), flags) + return 0, nil, nil + + case linux.FIOGETOWN, linux.SIOCGPGRP: + var who int32 + owner, hasOwner := getAsyncOwner(t, file) + if hasOwner { + if owner.Type == linux.F_OWNER_PGRP { + who = -owner.PID + } else { + who = owner.PID + } + } + _, err := t.CopyOut(args[2].Pointer(), &who) + return 0, nil, err + + case linux.FIOSETOWN, linux.SIOCSPGRP: + var who int32 + if _, err := t.CopyIn(args[2].Pointer(), &who); err != nil { + return 0, nil, err + } + ownerType := int32(linux.F_OWNER_PID) + if who < 0 { + // Check for overflow before flipping the sign. + if who-1 > who { + return 0, nil, syserror.EINVAL + } + ownerType = linux.F_OWNER_PGRP + who = -who + } + return 0, nil, setAsyncOwner(t, file, ownerType, who) } ret, err := file.Ioctl(t, t.MemoryManager(), args) diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 36c178e4a..88ed36b69 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -288,6 +288,7 @@ syscall_test( size = "medium", add_overlay = True, test = "//test/syscalls/linux:ioctl_test", + vfs2 = "True", ) syscall_test( diff --git a/test/syscalls/linux/fcntl.cc b/test/syscalls/linux/fcntl.cc index 4b9b4db99..5467fa2c8 100644 --- a/test/syscalls/linux/fcntl.cc +++ b/test/syscalls/linux/fcntl.cc @@ -1031,6 +1031,30 @@ TEST(FcntlTest, SetOwnPgrp) { MaybeSave(); } +TEST(FcntlTest, SetOwnUnset) { + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + // Set and unset pid. + pid_t pid; + EXPECT_THAT(pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, pid), SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, 0), SyscallSucceeds()); + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), + SyscallSucceedsWithValue(0)); + + // Set and unset pgid. + pid_t pgid; + EXPECT_THAT(pgid = getpgrp(), SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, -pgid), SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, 0), SyscallSucceeds()); + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), + SyscallSucceedsWithValue(0)); + MaybeSave(); +} + // F_SETOWN flips the sign of negative values, an operation that is guarded // against overflow. TEST(FcntlTest, SetOwnOverflow) { @@ -1141,6 +1165,39 @@ TEST(FcntlTest, SetOwnExPgrp) { MaybeSave(); } +TEST(FcntlTest, SetOwnExUnset) { + SKIP_IF(IsRunningWithVFS1()); + + FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( + Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); + + // Set and unset pid. + f_owner_ex owner = {}; + owner.type = F_OWNER_PID; + EXPECT_THAT(owner.pid = getpid(), SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), + SyscallSucceeds()); + owner.pid = 0; + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), + SyscallSucceeds()); + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), + SyscallSucceedsWithValue(0)); + + // Set and unset pgid. + owner.type = F_OWNER_PGRP; + EXPECT_THAT(owner.pid = getpgrp(), SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), + SyscallSucceeds()); + owner.pid = 0; + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), + SyscallSucceeds()); + + EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), + SyscallSucceedsWithValue(0)); + MaybeSave(); +} + TEST(FcntlTest, GetOwnExTid) { FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0)); -- cgit v1.2.3 From 68e1c870d3ef5db71226927aee703f2fba61cab7 Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Wed, 1 Jul 2020 09:28:20 -0700 Subject: Add test env variable "fuchsia" ... so that Fuchsia gets the same special cases applied to gVisor in tests when this envrionment variable is set. PiperOrigin-RevId: 319239064 --- test/util/test_util.cc | 11 +++++------ test/util/test_util.h | 3 +++ 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'test') diff --git a/test/util/test_util.cc b/test/util/test_util.cc index b20758626..8a037f45f 100644 --- a/test/util/test_util.cc +++ b/test/util/test_util.cc @@ -40,15 +40,14 @@ namespace gvisor { namespace testing { -#define TEST_ON_GVISOR "TEST_ON_GVISOR" -#define GVISOR_NETWORK "GVISOR_NETWORK" -#define GVISOR_VFS "GVISOR_VFS" +constexpr char kGvisorNetwork[] = "GVISOR_NETWORK"; +constexpr char kGvisorVfs[] = "GVISOR_VFS"; bool IsRunningOnGvisor() { return GvisorPlatform() != Platform::kNative; } const std::string GvisorPlatform() { // Set by runner.go. - const char* env = getenv(TEST_ON_GVISOR); + const char* env = getenv(kTestOnGvisor); if (!env) { return Platform::kNative; } @@ -56,12 +55,12 @@ const std::string GvisorPlatform() { } bool IsRunningWithHostinet() { - const char* env = getenv(GVISOR_NETWORK); + const char* env = getenv(kGvisorNetwork); return env && strcmp(env, "host") == 0; } bool IsRunningWithVFS1() { - const char* env = getenv(GVISOR_VFS); + const char* env = getenv(kGvisorVfs); if (env == nullptr) { // If not set, it's running on Linux. return false; diff --git a/test/util/test_util.h b/test/util/test_util.h index e635827e6..109078fc7 100644 --- a/test/util/test_util.h +++ b/test/util/test_util.h @@ -195,6 +195,8 @@ namespace gvisor { namespace testing { +constexpr char kTestOnGvisor[] = "TEST_ON_GVISOR"; + // TestInit must be called prior to RUN_ALL_TESTS. // // This parses all arguments and adjusts argc and argv appropriately. @@ -215,6 +217,7 @@ namespace Platform { constexpr char kNative[] = "native"; constexpr char kPtrace[] = "ptrace"; constexpr char kKVM[] = "kvm"; +constexpr char kFuchsia[] = "fuchsia"; } // namespace Platform bool IsRunningOnGvisor(); -- cgit v1.2.3 From 6a90c88b97481a6d81b05f09d5c8ed7158225dd5 Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Wed, 1 Jul 2020 13:13:04 -0700 Subject: Port fallocate to VFS2. PiperOrigin-RevId: 319283715 --- pkg/p9/p9.go | 13 ++++++++ pkg/sentry/fsimpl/gofer/regular_file.go | 29 ++++++++++++++++ pkg/sentry/fsimpl/host/host.go | 10 ++++++ pkg/sentry/fsimpl/kernfs/fd_impl_util.go | 5 +++ pkg/sentry/fsimpl/tmpfs/regular_file.go | 15 +++++++++ pkg/sentry/kernel/pipe/vfs.go | 5 +++ pkg/sentry/socket/hostinet/socket_vfs2.go | 7 +++- pkg/sentry/syscalls/linux/vfs2/filesystem.go | 50 ++++++++++++++++++++++++++++ pkg/sentry/syscalls/linux/vfs2/vfs2.go | 2 +- pkg/sentry/vfs/file_description.go | 4 +++ pkg/sentry/vfs/file_description_impl_util.go | 11 ++++++ pkg/sentry/vfs/inotify.go | 5 +++ test/syscalls/BUILD | 1 + test/syscalls/linux/BUILD | 5 +++ test/syscalls/linux/fallocate.cc | 45 +++++++++++++++++++++++++ 15 files changed, 205 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/pkg/p9/p9.go b/pkg/p9/p9.go index 28d851ff5..122c457d2 100644 --- a/pkg/p9/p9.go +++ b/pkg/p9/p9.go @@ -1091,6 +1091,19 @@ type AllocateMode struct { Unshare bool } +// ToAllocateMode returns an AllocateMode from a fallocate(2) mode. +func ToAllocateMode(mode uint64) AllocateMode { + return AllocateMode{ + KeepSize: mode&unix.FALLOC_FL_KEEP_SIZE != 0, + PunchHole: mode&unix.FALLOC_FL_PUNCH_HOLE != 0, + NoHideStale: mode&unix.FALLOC_FL_NO_HIDE_STALE != 0, + CollapseRange: mode&unix.FALLOC_FL_COLLAPSE_RANGE != 0, + ZeroRange: mode&unix.FALLOC_FL_ZERO_RANGE != 0, + InsertRange: mode&unix.FALLOC_FL_INSERT_RANGE != 0, + Unshare: mode&unix.FALLOC_FL_UNSHARE_RANGE != 0, + } +} + // ToLinux converts to a value compatible with fallocate(2)'s mode. func (a *AllocateMode) ToLinux() uint32 { rv := uint32(0) diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go index 404a452c4..faba73af2 100644 --- a/pkg/sentry/fsimpl/gofer/regular_file.go +++ b/pkg/sentry/fsimpl/gofer/regular_file.go @@ -24,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/memmap" @@ -67,6 +68,34 @@ func (fd *regularFileFD) OnClose(ctx context.Context) error { return d.handle.file.flush(ctx) } +// Allocate implements vfs.FileDescriptionImpl.Allocate. +func (fd *regularFileFD) Allocate(ctx context.Context, mode, offset, length uint64) error { + + d := fd.dentry() + d.metadataMu.Lock() + defer d.metadataMu.Unlock() + + size := offset + length + + // Allocating a smaller size is a noop. + if size <= d.size { + return nil + } + + d.handleMu.Lock() + defer d.handleMu.Unlock() + + err := d.handle.file.allocate(ctx, p9.ToAllocateMode(mode), offset, length) + if err != nil { + return err + } + d.size = size + if !d.cachedMetadataAuthoritative() { + d.touchCMtimeLocked() + } + return nil +} + // PRead implements vfs.FileDescriptionImpl.PRead. func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { if offset < 0 { diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index d5e73ddae..007b3332e 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -543,6 +543,16 @@ func (f *fileDescription) Release() { // noop } +// Allocate implements vfs.FileDescriptionImpl. +func (f *fileDescription) Allocate(ctx context.Context, mode, offset, length uint64) error { + if !f.inode.seekable { + return syserror.ESPIPE + } + + // TODO(gvisor.dev/issue/2923): Implement Allocate for non-pipe hostfds. + return syserror.EOPNOTSUPP +} + // PRead implements FileDescriptionImpl. func (f *fileDescription) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { i := f.inode diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go index 5f7853a2a..ca8b8c63b 100644 --- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go @@ -236,6 +236,11 @@ func (fd *GenericDirectoryFD) SetStat(ctx context.Context, opts vfs.SetStatOptio return inode.SetStat(ctx, fd.filesystem(), creds, opts) } +// Allocate implements vfs.FileDescriptionImpl.Allocate. +func (fd *GenericDirectoryFD) Allocate(ctx context.Context, mode, offset, length uint64) error { + return fd.DirectoryFileDescriptionDefaultImpl.Allocate(ctx, mode, offset, length) +} + // LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. func (fd *GenericDirectoryFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error { return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block) diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go index b805aadd0..6691add96 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go @@ -274,6 +274,21 @@ func (fd *regularFileFD) Release() { // noop } +// Allocate implements vfs.FileDescriptionImpl.Allocate. +func (fd *regularFileFD) Allocate(ctx context.Context, mode, offset, length uint64) error { + f := fd.inode().impl.(*regularFile) + + f.inode.mu.Lock() + defer f.inode.mu.Unlock() + oldSize := f.size + size := offset + length + if oldSize >= size { + return nil + } + _, err := f.truncateLocked(size) + return err +} + // PRead implements vfs.FileDescriptionImpl.PRead. func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { if offset < 0 { diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go index a4519363f..45d4c5fc1 100644 --- a/pkg/sentry/kernel/pipe/vfs.go +++ b/pkg/sentry/kernel/pipe/vfs.go @@ -200,6 +200,11 @@ func (fd *VFSPipeFD) Readiness(mask waiter.EventMask) waiter.EventMask { } } +// Allocate implements vfs.FileDescriptionImpl.Allocate. +func (fd *VFSPipeFD) Allocate(ctx context.Context, mode, offset, length uint64) error { + return syserror.ESPIPE +} + // EventRegister implements waiter.Waitable.EventRegister. func (fd *VFSPipeFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) { fd.pipe.EventRegister(e, mask) diff --git a/pkg/sentry/socket/hostinet/socket_vfs2.go b/pkg/sentry/socket/hostinet/socket_vfs2.go index ad5f64799..8f192c62f 100644 --- a/pkg/sentry/socket/hostinet/socket_vfs2.go +++ b/pkg/sentry/socket/hostinet/socket_vfs2.go @@ -96,7 +96,12 @@ func (s *socketVFS2) Ioctl(ctx context.Context, uio usermem.IO, args arch.Syscal return ioctl(ctx, s.fd, uio, args) } -// PRead implements vfs.FileDescriptionImpl. +// Allocate implements vfs.FileDescriptionImpl.Allocate. +func (s *socketVFS2) Allocate(ctx context.Context, mode, offset, length uint64) error { + return syserror.ENODEV +} + +// PRead implements vfs.FileDescriptionImpl.PRead. func (s *socketVFS2) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { return 0, syserror.ESPIPE } diff --git a/pkg/sentry/syscalls/linux/vfs2/filesystem.go b/pkg/sentry/syscalls/linux/vfs2/filesystem.go index 5dac77e4d..b12b5967b 100644 --- a/pkg/sentry/syscalls/linux/vfs2/filesystem.go +++ b/pkg/sentry/syscalls/linux/vfs2/filesystem.go @@ -18,6 +18,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -239,6 +240,55 @@ func renameat(t *kernel.Task, olddirfd int32, oldpathAddr usermem.Addr, newdirfd }) } +// Fallocate implements linux system call fallocate(2). +func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + mode := args[1].Uint64() + offset := args[2].Int64() + length := args[3].Int64() + + file := t.GetFileVFS2(fd) + + if file == nil { + return 0, nil, syserror.EBADF + } + defer file.DecRef() + + if !file.IsWritable() { + return 0, nil, syserror.EBADF + } + + if mode != 0 { + return 0, nil, syserror.ENOTSUP + } + + if offset < 0 || length <= 0 { + return 0, nil, syserror.EINVAL + } + + size := offset + length + + if size < 0 { + return 0, nil, syserror.EFBIG + } + + limit := limits.FromContext(t).Get(limits.FileSize).Cur + + if uint64(size) >= limit { + t.SendSignal(&arch.SignalInfo{ + Signo: int32(linux.SIGXFSZ), + Code: arch.SignalInfoUser, + }) + return 0, nil, syserror.EFBIG + } + + return 0, nil, file.Impl().Allocate(t, mode, uint64(offset), uint64(length)) + + // File length modified, generate notification. + // TODO(gvisor.dev/issue/1479): Reenable when Inotify is ported. + // file.Dirent.InotifyEvent(linux.IN_MODIFY, 0) +} + // Rmdir implements Linux syscall rmdir(2). func Rmdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { pathAddr := args[0].Pointer() diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go index 9e60c4a1c..8f497ecc7 100644 --- a/pkg/sentry/syscalls/linux/vfs2/vfs2.go +++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go @@ -138,7 +138,7 @@ func Override() { s.Table[282] = syscalls.Supported("signalfd", Signalfd) s.Table[283] = syscalls.Supported("timerfd_create", TimerfdCreate) s.Table[284] = syscalls.Supported("eventfd", Eventfd) - delete(s.Table, 285) // fallocate + s.Table[285] = syscalls.PartiallySupported("fallocate", Fallocate, "Not all options are supported.", nil) s.Table[286] = syscalls.Supported("timerfd_settime", TimerfdSettime) s.Table[287] = syscalls.Supported("timerfd_gettime", TimerfdGettime) s.Table[288] = syscalls.Supported("accept4", Accept4) diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go index eb5dfd7e2..0c42574db 100644 --- a/pkg/sentry/vfs/file_description.go +++ b/pkg/sentry/vfs/file_description.go @@ -354,6 +354,10 @@ type FileDescriptionImpl interface { // represented by the FileDescription. StatFS(ctx context.Context) (linux.Statfs, error) + // Allocate grows file represented by FileDescription to offset + length bytes. + // Only mode == 0 is supported currently. + Allocate(ctx context.Context, mode, offset, length uint64) error + // waiter.Waitable methods may be used to poll for I/O events. waiter.Waitable diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go index 3fec0d6d6..6b8b4ad49 100644 --- a/pkg/sentry/vfs/file_description_impl_util.go +++ b/pkg/sentry/vfs/file_description_impl_util.go @@ -56,6 +56,12 @@ func (FileDescriptionDefaultImpl) StatFS(ctx context.Context) (linux.Statfs, err return linux.Statfs{}, syserror.ENOSYS } +// Allocate implements FileDescriptionImpl.Allocate analogously to +// fallocate called on regular file, directory or FIFO in Linux. +func (FileDescriptionDefaultImpl) Allocate(ctx context.Context, mode, offset, length uint64) error { + return syserror.ENODEV +} + // Readiness implements waiter.Waitable.Readiness analogously to // file_operations::poll == NULL in Linux. func (FileDescriptionDefaultImpl) Readiness(mask waiter.EventMask) waiter.EventMask { @@ -158,6 +164,11 @@ func (FileDescriptionDefaultImpl) Removexattr(ctx context.Context, name string) // implementations of non-directory I/O methods that return EISDIR. type DirectoryFileDescriptionDefaultImpl struct{} +// Allocate implements DirectoryFileDescriptionDefaultImpl.Allocate. +func (DirectoryFileDescriptionDefaultImpl) Allocate(ctx context.Context, mode, offset, length uint64) error { + return syserror.EISDIR +} + // PRead implements FileDescriptionImpl.PRead. func (DirectoryFileDescriptionDefaultImpl) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) { return 0, syserror.EISDIR diff --git a/pkg/sentry/vfs/inotify.go b/pkg/sentry/vfs/inotify.go index 509034531..c2e21ac5f 100644 --- a/pkg/sentry/vfs/inotify.go +++ b/pkg/sentry/vfs/inotify.go @@ -148,6 +148,11 @@ func (i *Inotify) Release() { } } +// Allocate implements FileDescription.Allocate. +func (i *Inotify) Allocate(ctx context.Context, mode, offset, length uint64) error { + panic("Allocate should not be called on read-only inotify fds") +} + // EventRegister implements waiter.Waitable. func (i *Inotify) EventRegister(e *waiter.Entry, mask waiter.EventMask) { i.queue.EventRegister(e, mask) diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 88ed36b69..67645cc83 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -199,6 +199,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:fallocate_test", + vfs2 = "True", ) syscall_test( diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 8c7d54b21..9e097c888 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -748,9 +748,14 @@ cc_binary( linkstatic = 1, deps = [ ":file_base", + ":socket_test_util", "//test/util:cleanup", + "//test/util:eventfd_util", "//test/util:file_descriptor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/time", gtest, + "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", diff --git a/test/syscalls/linux/fallocate.cc b/test/syscalls/linux/fallocate.cc index 7819f4ac3..cabc2b751 100644 --- a/test/syscalls/linux/fallocate.cc +++ b/test/syscalls/linux/fallocate.cc @@ -15,16 +15,27 @@ #include #include #include +#include #include +#include +#include #include +#include #include #include #include +#include + #include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "absl/time/time.h" #include "test/syscalls/linux/file_base.h" +#include "test/syscalls/linux/socket_test_util.h" #include "test/util/cleanup.h" +#include "test/util/eventfd_util.h" #include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" @@ -70,6 +81,12 @@ TEST_F(AllocateTest, Fallocate) { ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 39, 1), SyscallSucceeds()); ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); EXPECT_EQ(buf.st_size, 40); + + // Given length 0 should fail with EINVAL. + ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 50, 0), + SyscallFailsWithErrno(EINVAL)); + ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds()); + EXPECT_EQ(buf.st_size, 40); } TEST_F(AllocateTest, FallocateInvalid) { @@ -136,6 +153,34 @@ TEST_F(AllocateTest, FallocateRlimit) { ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &new_mask, nullptr), SyscallSucceeds()); } +TEST_F(AllocateTest, FallocateOtherFDs) { + int fd; + ASSERT_THAT(fd = timerfd_create(CLOCK_MONOTONIC, 0), SyscallSucceeds()); + auto timer_fd = FileDescriptor(fd); + EXPECT_THAT(fallocate(timer_fd.get(), 0, 0, 10), + SyscallFailsWithErrno(ENODEV)); + + sigset_t mask; + sigemptyset(&mask); + ASSERT_THAT(fd = signalfd(-1, &mask, 0), SyscallSucceeds()); + auto sfd = FileDescriptor(fd); + EXPECT_THAT(fallocate(sfd.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV)); + + auto efd = + ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE)); + EXPECT_THAT(fallocate(efd.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV)); + + auto sockfd = ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + EXPECT_THAT(fallocate(sockfd.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV)); + + int socks[2]; + ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, PF_UNIX, socks), + SyscallSucceeds()); + auto sock0 = FileDescriptor(socks[0]); + auto sock1 = FileDescriptor(socks[1]); + EXPECT_THAT(fallocate(sock0.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV)); +} + } // namespace } // namespace testing } // namespace gvisor -- cgit v1.2.3 From e4b2087602a9217098559347d82d316a8cef8140 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Wed, 1 Jul 2020 15:17:39 -0700 Subject: Use directory fds in sticky test to avoid permission issues. After we change credentials, it is possible that we no longer have access to the sticky directory where we are trying to delete files. Use an open fd so this is not an issue. PiperOrigin-RevId: 319306255 --- test/syscalls/linux/sticky.cc | 77 ++++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 33 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/sticky.cc b/test/syscalls/linux/sticky.cc index 39f4fb801..4afed6d08 100644 --- a/test/syscalls/linux/sticky.cc +++ b/test/syscalls/linux/sticky.cc @@ -42,12 +42,15 @@ TEST(StickyTest, StickyBitPermDenied) { const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); EXPECT_THAT(chmod(parent.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds()); - const TempPath file = ASSERT_NO_ERRNO_AND_VALUE( - TempPath::CreateFileWith(parent.path(), "some content", 0755)); - const TempPath dir = - ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirWith(parent.path(), 0755)); - const TempPath link = ASSERT_NO_ERRNO_AND_VALUE( - TempPath::CreateSymlinkTo(parent.path(), file.path())); + + // After changing credentials below, we need to use an open fd to make + // modifications in the parent dir, because there is no guarantee that we will + // still have the ability to open it. + const FileDescriptor parent_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(parent.path(), O_DIRECTORY)); + ASSERT_THAT(openat(parent_fd.get(), "file", O_CREAT), SyscallSucceeds()); + ASSERT_THAT(mkdirat(parent_fd.get(), "dir", 0777), SyscallSucceeds()); + ASSERT_THAT(symlinkat("xyz", parent_fd.get(), "link"), SyscallSucceeds()); // Drop privileges and change IDs only in child thread, or else this parent // thread won't be able to open some log files after the test ends. @@ -65,12 +68,14 @@ TEST(StickyTest, StickyBitPermDenied) { syscall(SYS_setresuid, -1, absl::GetFlag(FLAGS_scratch_uid), -1), SyscallSucceeds()); - std::string new_path = NewTempAbsPath(); - EXPECT_THAT(rename(file.path().c_str(), new_path.c_str()), + EXPECT_THAT(renameat(parent_fd.get(), "file", parent_fd.get(), "file2"), + SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(unlinkat(parent_fd.get(), "file", 0), + SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(unlinkat(parent_fd.get(), "dir", AT_REMOVEDIR), + SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(unlinkat(parent_fd.get(), "link", 0), SyscallFailsWithErrno(EPERM)); - EXPECT_THAT(unlink(file.path().c_str()), SyscallFailsWithErrno(EPERM)); - EXPECT_THAT(rmdir(dir.path().c_str()), SyscallFailsWithErrno(EPERM)); - EXPECT_THAT(unlink(link.path().c_str()), SyscallFailsWithErrno(EPERM)); }); } @@ -79,12 +84,15 @@ TEST(StickyTest, StickyBitSameUID) { const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); EXPECT_THAT(chmod(parent.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds()); - const TempPath file = ASSERT_NO_ERRNO_AND_VALUE( - TempPath::CreateFileWith(parent.path(), "some content", 0755)); - const TempPath dir = - ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirWith(parent.path(), 0755)); - const TempPath link = ASSERT_NO_ERRNO_AND_VALUE( - TempPath::CreateSymlinkTo(parent.path(), file.path())); + + // After changing credentials below, we need to use an open fd to make + // modifications in the parent dir, because there is no guarantee that we will + // still have the ability to open it. + const FileDescriptor parent_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(parent.path(), O_DIRECTORY)); + ASSERT_THAT(openat(parent_fd.get(), "file", O_CREAT), SyscallSucceeds()); + ASSERT_THAT(mkdirat(parent_fd.get(), "dir", 0777), SyscallSucceeds()); + ASSERT_THAT(symlinkat("xyz", parent_fd.get(), "link"), SyscallSucceeds()); // Drop privileges and change IDs only in child thread, or else this parent // thread won't be able to open some log files after the test ends. @@ -100,12 +108,12 @@ TEST(StickyTest, StickyBitSameUID) { SyscallSucceeds()); // We still have the same EUID. - std::string new_path = NewTempAbsPath(); - EXPECT_THAT(rename(file.path().c_str(), new_path.c_str()), + EXPECT_THAT(renameat(parent_fd.get(), "file", parent_fd.get(), "file2"), + SyscallSucceeds()); + EXPECT_THAT(unlinkat(parent_fd.get(), "file2", 0), SyscallSucceeds()); + EXPECT_THAT(unlinkat(parent_fd.get(), "dir", AT_REMOVEDIR), SyscallSucceeds()); - EXPECT_THAT(unlink(new_path.c_str()), SyscallSucceeds()); - EXPECT_THAT(rmdir(dir.path().c_str()), SyscallSucceeds()); - EXPECT_THAT(unlink(link.path().c_str()), SyscallSucceeds()); + EXPECT_THAT(unlinkat(parent_fd.get(), "link", 0), SyscallSucceeds()); }); } @@ -114,12 +122,15 @@ TEST(StickyTest, StickyBitCapFOWNER) { const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); EXPECT_THAT(chmod(parent.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds()); - const TempPath file = ASSERT_NO_ERRNO_AND_VALUE( - TempPath::CreateFileWith(parent.path(), "some content", 0755)); - const TempPath dir = - ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirWith(parent.path(), 0755)); - const TempPath link = ASSERT_NO_ERRNO_AND_VALUE( - TempPath::CreateSymlinkTo(parent.path(), file.path())); + + // After changing credentials below, we need to use an open fd to make + // modifications in the parent dir, because there is no guarantee that we will + // still have the ability to open it. + const FileDescriptor parent_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(parent.path(), O_DIRECTORY)); + ASSERT_THAT(openat(parent_fd.get(), "file", O_CREAT), SyscallSucceeds()); + ASSERT_THAT(mkdirat(parent_fd.get(), "dir", 0777), SyscallSucceeds()); + ASSERT_THAT(symlinkat("xyz", parent_fd.get(), "link"), SyscallSucceeds()); // Drop privileges and change IDs only in child thread, or else this parent // thread won't be able to open some log files after the test ends. @@ -136,12 +147,12 @@ TEST(StickyTest, StickyBitCapFOWNER) { SyscallSucceeds()); EXPECT_NO_ERRNO(SetCapability(CAP_FOWNER, true)); - std::string new_path = NewTempAbsPath(); - EXPECT_THAT(rename(file.path().c_str(), new_path.c_str()), + EXPECT_THAT(renameat(parent_fd.get(), "file", parent_fd.get(), "file2"), + SyscallSucceeds()); + EXPECT_THAT(unlinkat(parent_fd.get(), "file2", 0), SyscallSucceeds()); + EXPECT_THAT(unlinkat(parent_fd.get(), "dir", AT_REMOVEDIR), SyscallSucceeds()); - EXPECT_THAT(unlink(new_path.c_str()), SyscallSucceeds()); - EXPECT_THAT(rmdir(dir.path().c_str()), SyscallSucceeds()); - EXPECT_THAT(unlink(link.path().c_str()), SyscallSucceeds()); + EXPECT_THAT(unlinkat(parent_fd.get(), "link", 0), SyscallSucceeds()); }); } } // namespace -- cgit v1.2.3 From 31b27adf9b63dcefd0a753908bf984aa1f78b394 Mon Sep 17 00:00:00 2001 From: Mithun Iyer Date: Wed, 1 Jul 2020 15:46:02 -0700 Subject: TCP receive should block when in SYN-SENT state. The application can choose to initiate a non-blocking connect and later block on a read, when the endpoint is still in SYN-SENT state. PiperOrigin-RevId: 319311016 --- pkg/tcpip/transport/tcp/endpoint.go | 12 +- test/packetimpact/tests/BUILD | 2 - .../tests/tcp_queue_receive_in_syn_sent_test.go | 131 ++++++++++++++------- 3 files changed, 98 insertions(+), 47 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 99a691815..bd3ec5a8d 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -1212,6 +1212,16 @@ func (e *endpoint) SetOwner(owner tcpip.PacketOwner) { // Read reads data from the endpoint. func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) { e.LockUser() + defer e.UnlockUser() + + // When in SYN-SENT state, let the caller block on the receive. + // An application can initiate a non-blocking connect and then block + // on a receive. It can expect to read any data after the handshake + // is complete. RFC793, section 3.9, p58. + if e.EndpointState() == StateSynSent { + return buffer.View{}, tcpip.ControlMessages{}, tcpip.ErrWouldBlock + } + // The endpoint can be read if it's connected, or if it's already closed // but has some pending unread data. Also note that a RST being received // would cause the state to become StateError so we should allow the @@ -1221,7 +1231,6 @@ func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, if s := e.EndpointState(); !s.connected() && s != StateClose && bufUsed == 0 { e.rcvListMu.Unlock() he := e.HardError - e.UnlockUser() if s == StateError { return buffer.View{}, tcpip.ControlMessages{}, he } @@ -1231,7 +1240,6 @@ func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, v, err := e.readLocked() e.rcvListMu.Unlock() - e.UnlockUser() if err == tcpip.ErrClosedForReceive { e.stats.ReadErrors.ReadClosed.Increment() diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 4711c1ae6..85749c559 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -183,8 +183,6 @@ packetimpact_go_test( packetimpact_go_test( name = "tcp_queue_receive_in_syn_sent", srcs = ["tcp_queue_receive_in_syn_sent_test.go"], - # TODO(b/157658105): Fix netstack then remove the line below. - expect_netstack_failure = True, deps = [ "//pkg/tcpip/header", "//test/packetimpact/testbench", diff --git a/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go b/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go index b640d8673..8fbec893b 100644 --- a/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go +++ b/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go @@ -35,53 +35,98 @@ func init() { testbench.RegisterFlags(flag.CommandLine) } +// TestQueueReceiveInSynSent tests receive behavior when the TCP state +// is SYN-SENT. +// It tests for 2 variants where the receive is blocked and: +// (1) we complete handshake and send sample data. +// (2) we send a TCP RST. func TestQueueReceiveInSynSent(t *testing.T) { - dut := testbench.NewDUT(t) - defer dut.TearDown() + for _, tt := range []struct { + description string + reset bool + }{ + {description: "Send DATA", reset: false}, + {description: "Send RST", reset: true}, + } { + t.Run(tt.description, func(t *testing.T) { + dut := testbench.NewDUT(t) + defer dut.TearDown() - socket, remotePort := dut.CreateBoundSocket(unix.SOCK_STREAM, unix.IPPROTO_TCP, net.ParseIP(testbench.RemoteIPv4)) - conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) - defer conn.Close() + socket, remotePort := dut.CreateBoundSocket(unix.SOCK_STREAM, unix.IPPROTO_TCP, net.ParseIP(testbench.RemoteIPv4)) + conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) + defer conn.Close() - sampleData := []byte("Sample Data") + sampleData := []byte("Sample Data") - dut.SetNonBlocking(socket, true) - if _, err := dut.ConnectWithErrno(context.Background(), socket, conn.LocalAddr()); !errors.Is(err, syscall.EINPROGRESS) { - t.Fatalf("failed to bring DUT to SYN-SENT, got: %s, want EINPROGRESS", err) - } - if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}, time.Second); err != nil { - t.Fatalf("expected a SYN from DUT, but got none: %s", err) - } + dut.SetNonBlocking(socket, true) + if _, err := dut.ConnectWithErrno(context.Background(), socket, conn.LocalAddr()); !errors.Is(err, syscall.EINPROGRESS) { + t.Fatalf("failed to bring DUT to SYN-SENT, got: %s, want EINPROGRESS", err) + } + if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}, time.Second); err != nil { + t.Fatalf("expected a SYN from DUT, but got none: %s", err) + } - // Issue RECEIVE call in SYN-SENT, this should be queued for process until the connection - // is established. - dut.SetNonBlocking(socket, false) - var wg sync.WaitGroup - defer wg.Wait() - wg.Add(1) - go func() { - defer wg.Done() - ctx, cancel := context.WithTimeout(context.Background(), time.Second*3) - defer cancel() - n, buff, err := dut.RecvWithErrno(ctx, socket, int32(len(sampleData)), 0) - if n == -1 { - t.Fatalf("failed to recv on DUT: %s", err) - } - if got := buff[:n]; !bytes.Equal(got, sampleData) { - t.Fatalf("received data don't match, got:\n%s, want:\n%s", hex.Dump(got), hex.Dump(sampleData)) - } - }() - - // The following sleep is used to prevent the connection from being established while the - // RPC is in flight. - time.Sleep(time.Second) - - // Bring the connection to Established. - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}) - if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second); err != nil { - t.Fatalf("expected an ACK from DUT, but got none: %s", err) - } + if _, _, err := dut.RecvWithErrno(context.Background(), socket, int32(len(sampleData)), 0); err != syscall.Errno(unix.EWOULDBLOCK) { + t.Fatalf("expected error %s, got %s", syscall.Errno(unix.EWOULDBLOCK), err) + } + + // Test blocking read. + dut.SetNonBlocking(socket, false) + + var wg sync.WaitGroup + defer wg.Wait() + wg.Add(1) + var block sync.WaitGroup + block.Add(1) + go func() { + defer wg.Done() + ctx, cancel := context.WithTimeout(context.Background(), time.Second*3) + defer cancel() - // Send sample data to DUT. - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: sampleData}) + block.Done() + // Issue RECEIVE call in SYN-SENT, this should be queued for + // process until the connection is established. + n, buff, err := dut.RecvWithErrno(ctx, socket, int32(len(sampleData)), 0) + if tt.reset { + if err != syscall.Errno(unix.ECONNREFUSED) { + t.Errorf("expected error %s, got %s", syscall.Errno(unix.ECONNREFUSED), err) + } + if n != -1 { + t.Errorf("expected return value %d, got %d", -1, n) + } + return + } + if n == -1 { + t.Errorf("failed to recv on DUT: %s", err) + } + if got := buff[:n]; !bytes.Equal(got, sampleData) { + t.Errorf("received data doesn't match, got:\n%s, want:\n%s", hex.Dump(got), hex.Dump(sampleData)) + } + }() + + // Wait for the goroutine to be scheduled and before it + // blocks on endpoint receive. + block.Wait() + // The following sleep is used to prevent the connection + // from being established before we are blocked on Recv. + time.Sleep(100 * time.Millisecond) + + if tt.reset { + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst | header.TCPFlagAck)}) + return + } + + // Bring the connection to Established. + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}) + if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second); err != nil { + t.Fatalf("expected an ACK from DUT, but got none: %s", err) + } + + // Send sample payload and expect an ACK. + conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: sampleData}) + if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second); err != nil { + t.Fatalf("expected an ACK from DUT, but got none: %s", err) + } + }) + } } -- cgit v1.2.3 From 65d99855583a21b6ea511ea74aa52318d0a1e5b2 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Wed, 1 Jul 2020 17:09:26 -0700 Subject: Port vfs1 implementation of sync_file_range to vfs2. Currently, we always perform a full-file sync which could be extremely expensive for some applications. Although vfs1 did not fully support sync_file_range, there were some optimizations that allowed us skip some unnecessary write-outs. Updates #2923, #1897. PiperOrigin-RevId: 319324213 --- pkg/sentry/fsimpl/gofer/special_file.go | 5 +--- pkg/sentry/syscalls/linux/vfs2/sync.go | 42 +++++++++++++++++++++++++++------ test/syscalls/BUILD | 1 + 3 files changed, 37 insertions(+), 11 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go index a016cbae1..2b381af05 100644 --- a/pkg/sentry/fsimpl/gofer/special_file.go +++ b/pkg/sentry/fsimpl/gofer/special_file.go @@ -235,8 +235,5 @@ func (fd *specialFileFD) Seek(ctx context.Context, offset int64, whence int32) ( // Sync implements vfs.FileDescriptionImpl.Sync. func (fd *specialFileFD) Sync(ctx context.Context) error { - if !fd.vfsfd.IsWritable() { - return nil - } - return fd.handle.sync(ctx) + return fd.dentry().syncSharedHandle(ctx) } diff --git a/pkg/sentry/syscalls/linux/vfs2/sync.go b/pkg/sentry/syscalls/linux/vfs2/sync.go index 365250b0b..0d0ebf46a 100644 --- a/pkg/sentry/syscalls/linux/vfs2/sync.go +++ b/pkg/sentry/syscalls/linux/vfs2/sync.go @@ -65,10 +65,8 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel nbytes := args[2].Int64() flags := args[3].Uint() - if offset < 0 { - return 0, nil, syserror.EINVAL - } - if nbytes < 0 { + // Check for negative values and overflow. + if offset < 0 || offset+nbytes < 0 { return 0, nil, syserror.EINVAL } if flags&^(linux.SYNC_FILE_RANGE_WAIT_BEFORE|linux.SYNC_FILE_RANGE_WRITE|linux.SYNC_FILE_RANGE_WAIT_AFTER) != 0 { @@ -81,7 +79,37 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel } defer file.DecRef() - // TODO(gvisor.dev/issue/1897): Avoid writeback of data ranges outside of - // [offset, offset+nbytes). - return 0, nil, file.Sync(t) + // TODO(gvisor.dev/issue/1897): Currently, the only file syncing we support + // is a full-file sync, i.e. fsync(2). As a result, there are severe + // limitations on how much we support sync_file_range: + // - In Linux, sync_file_range(2) doesn't write out the file's metadata, even + // if the file size is changed. We do. + // - We always sync the entire file instead of [offset, offset+nbytes). + // - We do not support the use of WAIT_BEFORE without WAIT_AFTER. For + // correctness, we would have to perform a write-out every time WAIT_BEFORE + // was used, but this would be much more expensive than expected if there + // were no write-out operations in progress. + // - Whenever WAIT_AFTER is used, we sync the file. + // - Ignore WRITE. If this flag is used with WAIT_AFTER, then the file will + // be synced anyway. If this flag is used without WAIT_AFTER, then it is + // safe (and less expensive) to do nothing, because the syscall will not + // wait for the write-out to complete--we only need to make sure that the + // next time WAIT_BEFORE or WAIT_AFTER are used, the write-out completes. + // - According to fs/sync.c, WAIT_BEFORE|WAIT_AFTER "will detect any I/O + // errors or ENOSPC conditions and will return those to the caller, after + // clearing the EIO and ENOSPC flags in the address_space." We don't do + // this. + + if flags&linux.SYNC_FILE_RANGE_WAIT_BEFORE != 0 && + flags&linux.SYNC_FILE_RANGE_WAIT_AFTER == 0 { + t.Kernel().EmitUnimplementedEvent(t) + return 0, nil, syserror.ENOSYS + } + + if flags&linux.SYNC_FILE_RANGE_WAIT_AFTER != 0 { + if err := file.Sync(t); err != nil { + return 0, nil, syserror.ConvertIntr(err, kernel.ERESTARTSYS) + } + } + return 0, nil, nil } diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 67645cc83..790190273 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -962,6 +962,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:sync_file_range_test", + vfs2 = "True", ) syscall_test( -- cgit v1.2.3 From 514955c1a8f3927c928a57935d511ffbbf9c6f01 Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Wed, 1 Jul 2020 21:04:43 -0700 Subject: [vfs2][gofer] Fix mmap syscall test. We were not invalidating mappings when the file size changed in shared mode. Enabled the syscall test for vfs2. Updates #2923 PiperOrigin-RevId: 319346569 --- pkg/sentry/fsimpl/gofer/gofer.go | 9 ++++++--- test/syscalls/BUILD | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 709b5e496..8e74e60a5 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -900,6 +900,12 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *lin } d.metadataMu.Lock() defer d.metadataMu.Unlock() + if stat.Mask&linux.STATX_SIZE != 0 { + // The size needs to be changed even when + // !d.cachedMetadataAuthoritative() because d.mappings has to be + // updated. + d.updateFileSizeLocked(stat.Size) + } if !d.isSynthetic() { if stat.Mask != 0 { if err := d.file.setAttr(ctx, p9.SetAttrMask{ @@ -961,9 +967,6 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *lin stat.Mask |= linux.STATX_MTIME } atomic.StoreInt64(&d.ctime, now) - if stat.Mask&linux.STATX_SIZE != 0 { - d.updateFileSizeLocked(stat.Size) - } return nil } diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 790190273..6fe397af9 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -357,6 +357,7 @@ syscall_test( size = "medium", shard_count = 5, test = "//test/syscalls/linux:mmap_test", + vfs2 = "True", ) syscall_test( -- cgit v1.2.3 From 6c099d830091b3153e0dc39cf500dba441f45707 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Wed, 1 Jul 2020 22:03:14 -0700 Subject: Update preadv2/pwritev2 flag handling in vfs2. We do not support RWF_SYNC/RWF_DSYNC and probably shouldn't silently accept them, since the user may incorrectly believe that we are synchronizing I/O. Remove the pwritev2 test verifying that we support these flags. gvisor.dev/issue/2601 is the tracking bug for deciding which RWF_.* flags we need and supporting them. Updates #2923, #2601. PiperOrigin-RevId: 319351286 --- pkg/abi/linux/file.go | 5 +-- pkg/sentry/fsimpl/gofer/regular_file.go | 8 +++-- pkg/sentry/fsimpl/gofer/special_file.go | 8 +++-- pkg/sentry/fsimpl/host/host.go | 4 ++- pkg/sentry/fsimpl/tmpfs/regular_file.go | 14 +++++--- test/syscalls/BUILD | 1 + test/syscalls/linux/pwritev2.cc | 59 ++++++--------------------------- 7 files changed, 39 insertions(+), 60 deletions(-) (limited to 'test') diff --git a/pkg/abi/linux/file.go b/pkg/abi/linux/file.go index 055ac1d7c..e11ca2d62 100644 --- a/pkg/abi/linux/file.go +++ b/pkg/abi/linux/file.go @@ -191,8 +191,9 @@ var DirentType = abi.ValueSet{ // Values for preadv2/pwritev2. const ( - // Note: gVisor does not implement the RWF_HIPRI feature, but the flag is - // accepted as a valid flag argument for preadv2/pwritev2. + // NOTE(b/120162627): gVisor does not implement the RWF_HIPRI feature, but + // the flag is accepted as a valid flag argument for preadv2/pwritev2 and + // silently ignored. RWF_HIPRI = 0x00000001 RWF_DSYNC = 0x00000002 RWF_SYNC = 0x00000004 diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go index faba73af2..3d2d3530a 100644 --- a/pkg/sentry/fsimpl/gofer/regular_file.go +++ b/pkg/sentry/fsimpl/gofer/regular_file.go @@ -102,7 +102,9 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs return 0, syserror.EINVAL } - // Check that flags are supported. Silently ignore RWF_HIPRI. + // Check that flags are supported. + // + // TODO(gvisor.dev/issue/2601): Support select preadv2 flags. if opts.Flags&^linux.RWF_HIPRI != 0 { return 0, syserror.EOPNOTSUPP } @@ -155,7 +157,9 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off return 0, syserror.EINVAL } - // Check that flags are supported. Silently ignore RWF_HIPRI. + // Check that flags are supported. + // + // TODO(gvisor.dev/issue/2601): Support select pwritev2 flags. if opts.Flags&^linux.RWF_HIPRI != 0 { return 0, syserror.EOPNOTSUPP } diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go index 2b381af05..3c4e7e2e4 100644 --- a/pkg/sentry/fsimpl/gofer/special_file.go +++ b/pkg/sentry/fsimpl/gofer/special_file.go @@ -130,7 +130,9 @@ func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs return 0, syserror.EINVAL } - // Check that flags are supported. Silently ignore RWF_HIPRI. + // Check that flags are supported. + // + // TODO(gvisor.dev/issue/2601): Support select preadv2 flags. if opts.Flags&^linux.RWF_HIPRI != 0 { return 0, syserror.EOPNOTSUPP } @@ -176,7 +178,9 @@ func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off return 0, syserror.EINVAL } - // Check that flags are supported. Silently ignore RWF_HIPRI. + // Check that flags are supported. + // + // TODO(gvisor.dev/issue/2601): Support select pwritev2 flags. if opts.Flags&^linux.RWF_HIPRI != 0 { return 0, syserror.EOPNOTSUPP } diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index 007b3332e..1cd2982cb 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -588,8 +588,10 @@ func (f *fileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts } func readFromHostFD(ctx context.Context, hostFD int, dst usermem.IOSequence, offset int64, flags uint32) (int64, error) { + // Check that flags are supported. + // // TODO(gvisor.dev/issue/2601): Support select preadv2 flags. - if flags != 0 { + if flags&^linux.RWF_HIPRI != 0 { return 0, syserror.EOPNOTSUPP } reader := hostfd.GetReadWriterAt(int32(hostFD), offset, flags) diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go index 6691add96..1cdb46e6f 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go @@ -295,8 +295,11 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs return 0, syserror.EINVAL } - // Check that flags are supported. Silently ignore RWF_HIPRI. - if opts.Flags&^linux.RWF_HIPRI != 0 { + // Check that flags are supported. RWF_DSYNC/RWF_SYNC can be ignored since + // all state is in-memory. + // + // TODO(gvisor.dev/issue/2601): Support select preadv2 flags. + if opts.Flags&^(linux.RWF_HIPRI|linux.RWF_DSYNC|linux.RWF_SYNC) != 0 { return 0, syserror.EOPNOTSUPP } @@ -326,8 +329,11 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off return 0, syserror.EINVAL } - // Check that flags are supported. Silently ignore RWF_HIPRI. - if opts.Flags&^linux.RWF_HIPRI != 0 { + // Check that flags are supported. RWF_DSYNC/RWF_SYNC can be ignored since + // all state is in-memory. + // + // TODO(gvisor.dev/issue/2601): Support select preadv2 flags. + if opts.Flags&^(linux.RWF_HIPRI|linux.RWF_DSYNC|linux.RWF_SYNC) != 0 { return 0, syserror.EOPNOTSUPP } diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 6fe397af9..7c4cd8192 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -531,6 +531,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:pwritev2_test", + vfs2 = "True", ) syscall_test( diff --git a/test/syscalls/linux/pwritev2.cc b/test/syscalls/linux/pwritev2.cc index 3fe5a600f..63b686c62 100644 --- a/test/syscalls/linux/pwritev2.cc +++ b/test/syscalls/linux/pwritev2.cc @@ -69,7 +69,7 @@ ssize_t pwritev2(unsigned long fd, const struct iovec* iov, } // This test is the base case where we call pwritev (no offset, no flags). -TEST(Writev2Test, TestBaseCall) { +TEST(Writev2Test, BaseCall) { SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( @@ -97,7 +97,7 @@ TEST(Writev2Test, TestBaseCall) { } // This test is where we call pwritev2 with a positive offset and no flags. -TEST(Pwritev2Test, TestValidPositiveOffset) { +TEST(Pwritev2Test, ValidPositiveOffset) { SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); std::string prefix(kBufSize, '0'); @@ -129,7 +129,7 @@ TEST(Pwritev2Test, TestValidPositiveOffset) { // This test is the base case where we call writev by using -1 as the offset. // The write should use the file offset, so the test increments the file offset // prior to call pwritev2. -TEST(Pwritev2Test, TestNegativeOneOffset) { +TEST(Pwritev2Test, NegativeOneOffset) { SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); const std::string prefix = "00"; @@ -164,7 +164,7 @@ TEST(Pwritev2Test, TestNegativeOneOffset) { // pwritev2 requires if the RWF_HIPRI flag is passed, the fd must be opened with // O_DIRECT. This test implements a correct call with the RWF_HIPRI flag. -TEST(Pwritev2Test, TestCallWithRWF_HIPRI) { +TEST(Pwritev2Test, CallWithRWF_HIPRI) { SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( @@ -189,47 +189,8 @@ TEST(Pwritev2Test, TestCallWithRWF_HIPRI) { EXPECT_EQ(buf, content); } -// This test checks that pwritev2 can be called with valid flags -TEST(Pwritev2Test, TestCallWithValidFlags) { - SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); - - const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( - GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode)); - const FileDescriptor fd = - ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); - - std::vector content(kBufSize, '0'); - struct iovec iov; - iov.iov_base = content.data(); - iov.iov_len = content.size(); - - EXPECT_THAT(pwritev2(fd.get(), &iov, /*iovcnt=*/1, - /*offset=*/0, /*flags=*/RWF_DSYNC), - SyscallSucceedsWithValue(kBufSize)); - - std::vector buf(content.size()); - EXPECT_THAT(read(fd.get(), buf.data(), buf.size()), - SyscallSucceedsWithValue(buf.size())); - - EXPECT_EQ(buf, content); - - SetContent(content); - - EXPECT_THAT(pwritev2(fd.get(), &iov, /*iovcnt=*/1, - /*offset=*/0, /*flags=*/0x4), - SyscallSucceedsWithValue(kBufSize)); - - ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR), - SyscallSucceedsWithValue(content.size())); - - EXPECT_THAT(pread(fd.get(), buf.data(), buf.size(), /*offset=*/0), - SyscallSucceedsWithValue(buf.size())); - - EXPECT_EQ(buf, content); -} - // This test calls pwritev2 with a bad file descriptor. -TEST(Writev2Test, TestBadFile) { +TEST(Writev2Test, BadFile) { SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); ASSERT_THAT(pwritev2(/*fd=*/-1, /*iov=*/nullptr, /*iovcnt=*/0, /*offset=*/0, /*flags=*/0), @@ -237,7 +198,7 @@ TEST(Writev2Test, TestBadFile) { } // This test calls pwrite2 with an invalid offset. -TEST(Pwritev2Test, TestInvalidOffset) { +TEST(Pwritev2Test, InvalidOffset) { SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( @@ -255,7 +216,7 @@ TEST(Pwritev2Test, TestInvalidOffset) { SyscallFailsWithErrno(EINVAL)); } -TEST(Pwritev2Test, TestUnseekableFileValid) { +TEST(Pwritev2Test, UnseekableFileValid) { SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); int pipe_fds[2]; @@ -285,7 +246,7 @@ TEST(Pwritev2Test, TestUnseekableFileValid) { // Calling pwritev2 with a non-negative offset calls pwritev. Calling pwritev // with an unseekable file is not allowed. A pipe is used for an unseekable // file. -TEST(Pwritev2Test, TestUnseekableFileInValid) { +TEST(Pwritev2Test, UnseekableFileInvalid) { SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); int pipe_fds[2]; @@ -304,7 +265,7 @@ TEST(Pwritev2Test, TestUnseekableFileInValid) { EXPECT_THAT(close(pipe_fds[1]), SyscallSucceeds()); } -TEST(Pwritev2Test, TestReadOnlyFile) { +TEST(Pwritev2Test, ReadOnlyFile) { SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( @@ -323,7 +284,7 @@ TEST(Pwritev2Test, TestReadOnlyFile) { } // This test calls pwritev2 with an invalid flag. -TEST(Pwritev2Test, TestInvalidFlag) { +TEST(Pwritev2Test, InvalidFlag) { SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS); const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( -- cgit v1.2.3 From 418db67e2fa1564d64bce9a8fba5264186822af6 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Fri, 3 Jul 2020 17:41:31 -0700 Subject: Update build rule to appease deprecation Before this change, running packetimpact tests produces: parameter 'direct' must contain a list of elements, and may no longer accept a depset. The deprecated behavior may be temporarily re-enabled by setting --incompatible_disable_depset_inputs=false The positional parameter to depset has been changed to mean `direct` rather than its previous meaning of `items`. The documentation[0] explains: A positional parameter distinct from other parameters for legacy support. If --incompatible_disable_depset_items is false, this parameter serves as the value of items. If --incompatible_disable_depset_items is true, this parameter serves as the value of direct. See the documentation for these parameters for more details. [0] https://docs.bazel.build/versions/master/skylark/lib/globals.html PiperOrigin-RevId: 319555138 --- test/packetimpact/runner/defs.bzl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/test/packetimpact/runner/defs.bzl b/test/packetimpact/runner/defs.bzl index ea66b9756..77cdfea12 100644 --- a/test/packetimpact/runner/defs.bzl +++ b/test/packetimpact/runner/defs.bzl @@ -20,12 +20,12 @@ def _packetimpact_test_impl(ctx): ]) ctx.actions.write(bench, bench_content, is_executable = True) - transitive_files = depset() + transitive_files = [] if hasattr(ctx.attr._test_runner, "data_runfiles"): - transitive_files = depset(ctx.attr._test_runner.data_runfiles.files) + transitive_files.append(ctx.attr._test_runner.data_runfiles.files) runfiles = ctx.runfiles( files = [test_runner] + ctx.files.testbench_binary + ctx.files._posix_server_binary, - transitive_files = transitive_files, + transitive_files = depset(transitive = transitive_files), collect_default = True, collect_data = True, ) -- cgit v1.2.3 From 5ac34386a73bb37e5d99201327e0b3eed7abdb46 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Fri, 3 Jul 2020 23:18:46 -0700 Subject: Improve failure message Currently this test produces an error resembling tcp_zero_window_probe_retransmit_test.go:92: zero probe came sooner interval 3200179405 probe 4 which is approximately useless. PiperOrigin-RevId: 319572263 --- test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go b/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go index 5ab193181..8c89d57c9 100644 --- a/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go +++ b/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go @@ -88,8 +88,8 @@ func TestZeroWindowProbeRetransmit(t *testing.T) { continue } // Check if the probes came at exponentially increasing intervals. - if p := time.Since(start); p < current-startProbeDuration { - t.Fatalf("zero probe came sooner interval %d probe %d\n", p, i) + if got, want := time.Since(start), current-startProbeDuration; got < want { + t.Errorf("got zero probe %d after %s, want >= %s", i, got, want) } // Acknowledge the zero-window probes from the dut. conn.Send(testbench.TCP{AckNum: ackProbe, Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)}) -- cgit v1.2.3 From b0f656184ef249182882dc323babf9e7c8ac3fc0 Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Mon, 6 Jul 2020 17:21:03 -0700 Subject: Add support for SO_RCVBUF/SO_SNDBUF for AF_PACKET sockets. Updates #2746 PiperOrigin-RevId: 319887810 --- pkg/tcpip/transport/packet/endpoint.go | 92 ++++++++++- test/syscalls/linux/packet_socket_raw.cc | 273 +++++++++++++++++++++++++++++-- 2 files changed, 345 insertions(+), 20 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/transport/packet/endpoint.go b/pkg/tcpip/transport/packet/endpoint.go index baf08eda6..a8f8454dd 100644 --- a/pkg/tcpip/transport/packet/endpoint.go +++ b/pkg/tcpip/transport/packet/endpoint.go @@ -25,6 +25,8 @@ package packet import ( + "fmt" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" @@ -71,11 +73,12 @@ type endpoint struct { rcvClosed bool // The following fields are protected by mu. - mu sync.RWMutex `state:"nosave"` - sndBufSize int - closed bool - stats tcpip.TransportEndpointStats `state:"nosave"` - bound bool + mu sync.RWMutex `state:"nosave"` + sndBufSize int + sndBufSizeMax int + closed bool + stats tcpip.TransportEndpointStats `state:"nosave"` + bound bool } // NewEndpoint returns a new packet endpoint. @@ -92,6 +95,17 @@ func NewEndpoint(s *stack.Stack, cooked bool, netProto tcpip.NetworkProtocolNumb sndBufSize: 32 * 1024, } + // Override with stack defaults. + var ss stack.SendBufferSizeOption + if err := s.Option(&ss); err == nil { + ep.sndBufSizeMax = ss.Default + } + + var rs stack.ReceiveBufferSizeOption + if err := s.Option(&rs); err == nil { + ep.rcvBufSizeMax = rs.Default + } + if err := s.RegisterPacketEndpoint(0, netProto, ep); err != nil { return nil, err } @@ -274,7 +288,46 @@ func (ep *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error { // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt. func (ep *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { - return tcpip.ErrUnknownProtocolOption + switch opt { + case tcpip.SendBufferSizeOption: + // Make sure the send buffer size is within the min and max + // allowed. + var ss stack.SendBufferSizeOption + if err := ep.stack.Option(&ss); err != nil { + panic(fmt.Sprintf("s.Option(%#v) = %s", ss, err)) + } + if v > ss.Max { + v = ss.Max + } + if v < ss.Min { + v = ss.Min + } + ep.mu.Lock() + ep.sndBufSizeMax = v + ep.mu.Unlock() + return nil + + case tcpip.ReceiveBufferSizeOption: + // Make sure the receive buffer size is within the min and max + // allowed. + var rs stack.ReceiveBufferSizeOption + if err := ep.stack.Option(&rs); err != nil { + panic(fmt.Sprintf("s.Option(%#v) = %s", rs, err)) + } + if v > rs.Max { + v = rs.Max + } + if v < rs.Min { + v = rs.Min + } + ep.rcvMu.Lock() + ep.rcvBufSizeMax = v + ep.rcvMu.Unlock() + return nil + + default: + return tcpip.ErrUnknownProtocolOption + } } // GetSockOpt implements tcpip.Endpoint.GetSockOpt. @@ -289,7 +342,32 @@ func (ep *endpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) { // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt. func (ep *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { - return 0, tcpip.ErrNotSupported + switch opt { + case tcpip.ReceiveQueueSizeOption: + v := 0 + ep.rcvMu.Lock() + if !ep.rcvList.Empty() { + p := ep.rcvList.Front() + v = p.data.Size() + } + ep.rcvMu.Unlock() + return v, nil + + case tcpip.SendBufferSizeOption: + ep.mu.Lock() + v := ep.sndBufSizeMax + ep.mu.Unlock() + return v, nil + + case tcpip.ReceiveBufferSizeOption: + ep.rcvMu.Lock() + v := ep.rcvBufSizeMax + ep.rcvMu.Unlock() + return v, nil + + default: + return -1, tcpip.ErrUnknownProtocolOption + } } // HandlePacket implements stack.PacketEndpoint.HandlePacket. diff --git a/test/syscalls/linux/packet_socket_raw.cc b/test/syscalls/linux/packet_socket_raw.cc index d258d353c..4093ac813 100644 --- a/test/syscalls/linux/packet_socket_raw.cc +++ b/test/syscalls/linux/packet_socket_raw.cc @@ -97,7 +97,7 @@ class RawPacketTest : public ::testing::TestWithParam { int GetLoopbackIndex(); // The socket used for both reading and writing. - int socket_; + int s_; }; void RawPacketTest::SetUp() { @@ -108,34 +108,58 @@ void RawPacketTest::SetUp() { } if (!IsRunningOnGvisor()) { + // Ensure that looped back packets aren't rejected by the kernel. FileDescriptor acceptLocal = ASSERT_NO_ERRNO_AND_VALUE( - Open("/proc/sys/net/ipv4/conf/lo/accept_local", O_RDONLY)); + Open("/proc/sys/net/ipv4/conf/lo/accept_local", O_RDWR)); FileDescriptor routeLocalnet = ASSERT_NO_ERRNO_AND_VALUE( - Open("/proc/sys/net/ipv4/conf/lo/route_localnet", O_RDONLY)); + Open("/proc/sys/net/ipv4/conf/lo/route_localnet", O_RDWR)); char enabled; ASSERT_THAT(read(acceptLocal.get(), &enabled, 1), SyscallSucceedsWithValue(1)); - ASSERT_EQ(enabled, '1'); + if (enabled != '1') { + enabled = '1'; + ASSERT_THAT(lseek(acceptLocal.get(), 0, SEEK_SET), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(write(acceptLocal.get(), &enabled, 1), + SyscallSucceedsWithValue(1)); + ASSERT_THAT(lseek(acceptLocal.get(), 0, SEEK_SET), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(read(acceptLocal.get(), &enabled, 1), + SyscallSucceedsWithValue(1)); + ASSERT_EQ(enabled, '1'); + } + ASSERT_THAT(read(routeLocalnet.get(), &enabled, 1), SyscallSucceedsWithValue(1)); - ASSERT_EQ(enabled, '1'); + if (enabled != '1') { + enabled = '1'; + ASSERT_THAT(lseek(routeLocalnet.get(), 0, SEEK_SET), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(write(routeLocalnet.get(), &enabled, 1), + SyscallSucceedsWithValue(1)); + ASSERT_THAT(lseek(routeLocalnet.get(), 0, SEEK_SET), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(read(routeLocalnet.get(), &enabled, 1), + SyscallSucceedsWithValue(1)); + ASSERT_EQ(enabled, '1'); + } } - ASSERT_THAT(socket_ = socket(AF_PACKET, SOCK_RAW, htons(GetParam())), + ASSERT_THAT(s_ = socket(AF_PACKET, SOCK_RAW, htons(GetParam())), SyscallSucceeds()); } void RawPacketTest::TearDown() { // TearDown will be run even if we skip the test. if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { - EXPECT_THAT(close(socket_), SyscallSucceeds()); + EXPECT_THAT(close(s_), SyscallSucceeds()); } } int RawPacketTest::GetLoopbackIndex() { struct ifreq ifr; snprintf(ifr.ifr_name, IFNAMSIZ, "lo"); - EXPECT_THAT(ioctl(socket_, SIOCGIFINDEX, &ifr), SyscallSucceeds()); + EXPECT_THAT(ioctl(s_, SIOCGIFINDEX, &ifr), SyscallSucceeds()); EXPECT_NE(ifr.ifr_ifindex, 0); return ifr.ifr_ifindex; } @@ -149,7 +173,7 @@ TEST_P(RawPacketTest, Receive) { // Wait for the socket to become readable. struct pollfd pfd = {}; - pfd.fd = socket_; + pfd.fd = s_; pfd.events = POLLIN; EXPECT_THAT(RetryEINTR(poll)(&pfd, 1, 2000), SyscallSucceedsWithValue(1)); @@ -159,7 +183,7 @@ TEST_P(RawPacketTest, Receive) { char buf[64]; struct sockaddr_ll src = {}; socklen_t src_len = sizeof(src); - ASSERT_THAT(recvfrom(socket_, buf, sizeof(buf), 0, + ASSERT_THAT(recvfrom(s_, buf, sizeof(buf), 0, reinterpret_cast(&src), &src_len), SyscallSucceedsWithValue(packet_size)); // sockaddr_ll ends with an 8 byte physical address field, but ethernet @@ -277,7 +301,7 @@ TEST_P(RawPacketTest, Send) { sizeof(kMessage)); // Send it. - ASSERT_THAT(sendto(socket_, send_buf, sizeof(send_buf), 0, + ASSERT_THAT(sendto(s_, send_buf, sizeof(send_buf), 0, reinterpret_cast(&dest), sizeof(dest)), SyscallSucceedsWithValue(sizeof(send_buf))); @@ -286,13 +310,13 @@ TEST_P(RawPacketTest, Send) { pfd.fd = udp_sock.get(); pfd.events = POLLIN; ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 5000), SyscallSucceedsWithValue(1)); - pfd.fd = socket_; + pfd.fd = s_; pfd.events = POLLIN; ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 5000), SyscallSucceedsWithValue(1)); // Receive on the packet socket. char recv_buf[sizeof(send_buf)]; - ASSERT_THAT(recv(socket_, recv_buf, sizeof(recv_buf), 0), + ASSERT_THAT(recv(s_, recv_buf, sizeof(recv_buf), 0), SyscallSucceedsWithValue(sizeof(recv_buf))); ASSERT_EQ(memcmp(recv_buf, send_buf, sizeof(send_buf)), 0); @@ -309,6 +333,229 @@ TEST_P(RawPacketTest, Send) { EXPECT_EQ(src.sin_addr.s_addr, htonl(INADDR_LOOPBACK)); } +// Check that setting SO_RCVBUF below min is clamped to the minimum +// receive buffer size. +TEST_P(RawPacketTest, SetSocketRecvBufBelowMin) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Discover minimum receive buf size by trying to set it to zero. + // See: + // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820 + constexpr int kRcvBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + int min = 0; + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + + // Linux doubles the value so let's use a value that when doubled will still + // be smaller than min. + int below_min = min / 2 - 1; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &below_min, sizeof(below_min)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), + SyscallSucceeds()); + + ASSERT_EQ(min, val); +} + +// Check that setting SO_RCVBUF above max is clamped to the maximum +// receive buffer size. +TEST_P(RawPacketTest, SetSocketRecvBufAboveMax) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Discover max buf size by trying to set the largest possible buffer size. + constexpr int kRcvBufSz = 0xffffffff; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + int max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len), + SyscallSucceeds()); + + int above_max = max + 1; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &above_max, sizeof(above_max)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), + SyscallSucceeds()); + ASSERT_EQ(max, val); +} + +// Check that setting SO_RCVBUF min <= kRcvBufSz <= max is honored. +TEST_P(RawPacketTest, SetSocketRecvBuf) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int max = 0; + int min = 0; + { + // Discover max buf size by trying to set a really large buffer size. + constexpr int kRcvBufSz = 0xffffffff; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len), + SyscallSucceeds()); + } + + { + // Discover minimum buffer size by trying to set a zero size receive buffer + // size. + // See: + // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820 + constexpr int kRcvBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), + SyscallSucceeds()); + + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), + SyscallSucceeds()); + } + + int quarter_sz = min + (max - min) / 4; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &quarter_sz, sizeof(quarter_sz)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), + SyscallSucceeds()); + + // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF. + // TODO(gvisor.dev/issue/2926): Remove when Netstack matches linux behavior. + if (!IsRunningOnGvisor()) { + quarter_sz *= 2; + } + ASSERT_EQ(quarter_sz, val); +} + +// Check that setting SO_SNDBUF below min is clamped to the minimum +// receive buffer size. +TEST_P(RawPacketTest, SetSocketSendBufBelowMin) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Discover minimum buffer size by trying to set it to zero. + constexpr int kSndBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), + SyscallSucceeds()); + + int min = 0; + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len), + SyscallSucceeds()); + + // Linux doubles the value so let's use a value that when doubled will still + // be smaller than min. + int below_min = min / 2 - 1; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &below_min, sizeof(below_min)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), + SyscallSucceeds()); + + ASSERT_EQ(min, val); +} + +// Check that setting SO_SNDBUF above max is clamped to the maximum +// send buffer size. +TEST_P(RawPacketTest, SetSocketSendBufAboveMax) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + // Discover maximum buffer size by trying to set it to a large value. + constexpr int kSndBufSz = 0xffffffff; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), + SyscallSucceeds()); + + int max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len), + SyscallSucceeds()); + + int above_max = max + 1; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &above_max, sizeof(above_max)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), + SyscallSucceeds()); + ASSERT_EQ(max, val); +} + +// Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored. +TEST_P(RawPacketTest, SetSocketSendBuf) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int max = 0; + int min = 0; + { + // Discover maximum buffer size by trying to set it to a large value. + constexpr int kSndBufSz = 0xffffffff; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), + SyscallSucceeds()); + + max = 0; + socklen_t max_len = sizeof(max); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len), + SyscallSucceeds()); + } + + { + // Discover minimum buffer size by trying to set it to zero. + constexpr int kSndBufSz = 0; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), + SyscallSucceeds()); + + socklen_t min_len = sizeof(min); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len), + SyscallSucceeds()); + } + + int quarter_sz = min + (max - min) / 4; + ASSERT_THAT( + setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &quarter_sz, sizeof(quarter_sz)), + SyscallSucceeds()); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), + SyscallSucceeds()); + + // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF. + // TODO(gvisor.dev/issue/2926): Remove the gvisor special casing when Netstack + // matches linux behavior. + if (!IsRunningOnGvisor()) { + quarter_sz *= 2; + } + + ASSERT_EQ(quarter_sz, val); +} + INSTANTIATE_TEST_SUITE_P(AllInetTests, RawPacketTest, ::testing::Values(ETH_P_IP, ETH_P_ALL)); -- cgit v1.2.3 From 10930189c3cd938e7526c55188ba2d814a7b8a43 Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Tue, 7 Jul 2020 15:33:32 -0700 Subject: Fix mknod and inotify syscall test This change fixes a few things: - creating sockets using mknod(2) is supported via vfs2 - fsgofer can create regular files via mknod(2) - mode = 0 for mknod(2) will be interpreted as regular file in vfs2 as well Updates #2923 PiperOrigin-RevId: 320074267 --- pkg/sentry/fsimpl/tmpfs/filesystem.go | 2 +- pkg/sentry/syscalls/linux/vfs2/filesystem.go | 13 ++++-- runsc/fsgofer/filter/config.go | 1 + runsc/fsgofer/fsgofer.go | 60 ++++++++++++++++++++++------ test/syscalls/BUILD | 2 + test/syscalls/linux/mknod.cc | 26 +++++++++++- 6 files changed, 84 insertions(+), 20 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index ed40f6b52..a0f20c2d4 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -277,7 +277,7 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v creds := rp.Credentials() var childInode *inode switch opts.Mode.FileType() { - case 0, linux.S_IFREG: + case linux.S_IFREG: childInode = fs.newRegularFile(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode) case linux.S_IFIFO: childInode = fs.newNamedPipe(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode) diff --git a/pkg/sentry/syscalls/linux/vfs2/filesystem.go b/pkg/sentry/syscalls/linux/vfs2/filesystem.go index b12b5967b..6b14c2bef 100644 --- a/pkg/sentry/syscalls/linux/vfs2/filesystem.go +++ b/pkg/sentry/syscalls/linux/vfs2/filesystem.go @@ -107,7 +107,7 @@ func Mknod(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall addr := args[0].Pointer() mode := args[1].ModeT() dev := args[2].Uint() - return 0, nil, mknodat(t, linux.AT_FDCWD, addr, mode, dev) + return 0, nil, mknodat(t, linux.AT_FDCWD, addr, linux.FileMode(mode), dev) } // Mknodat implements Linux syscall mknodat(2). @@ -116,10 +116,10 @@ func Mknodat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca addr := args[1].Pointer() mode := args[2].ModeT() dev := args[3].Uint() - return 0, nil, mknodat(t, dirfd, addr, mode, dev) + return 0, nil, mknodat(t, dirfd, addr, linux.FileMode(mode), dev) } -func mknodat(t *kernel.Task, dirfd int32, addr usermem.Addr, mode uint, dev uint32) error { +func mknodat(t *kernel.Task, dirfd int32, addr usermem.Addr, mode linux.FileMode, dev uint32) error { path, err := copyInPath(t, addr) if err != nil { return err @@ -129,9 +129,14 @@ func mknodat(t *kernel.Task, dirfd int32, addr usermem.Addr, mode uint, dev uint return err } defer tpop.Release() + + // "Zero file type is equivalent to type S_IFREG." - mknod(2) + if mode.FileType() == 0 { + mode |= linux.ModeRegular + } major, minor := linux.DecodeDeviceID(dev) return t.Kernel().VFS().MknodAt(t, t.Credentials(), &tpop.pop, &vfs.MknodOptions{ - Mode: linux.FileMode(mode &^ t.FSContext().Umask()), + Mode: mode &^ linux.FileMode(t.FSContext().Umask()), DevMajor: uint32(major), DevMinor: minor, }) diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go index 1dce36965..88814b83c 100644 --- a/runsc/fsgofer/filter/config.go +++ b/runsc/fsgofer/filter/config.go @@ -128,6 +128,7 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_MADVISE: {}, unix.SYS_MEMFD_CREATE: {}, /// Used by flipcall.PacketWindowAllocator.Init(). syscall.SYS_MKDIRAT: {}, + syscall.SYS_MKNODAT: {}, // Used by the Go runtime as a temporarily workaround for a Linux // 5.2-5.4 bug. // diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index 74977c313..b7521bda7 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -139,7 +139,7 @@ func (a *attachPoint) Attach() (p9.File, error) { return nil, fmt.Errorf("unable to open %q: %v", a.prefix, err) } - stat, err := stat(f.FD()) + stat, err := fstat(f.FD()) if err != nil { return nil, fmt.Errorf("unable to stat %q: %v", a.prefix, err) } @@ -352,7 +352,7 @@ func newFDMaybe(file *fd.FD) *fd.FD { return dup } -func stat(fd int) (syscall.Stat_t, error) { +func fstat(fd int) (syscall.Stat_t, error) { var stat syscall.Stat_t if err := syscall.Fstat(fd, &stat); err != nil { return syscall.Stat_t{}, err @@ -360,6 +360,14 @@ func stat(fd int) (syscall.Stat_t, error) { return stat, nil } +func stat(path string) (syscall.Stat_t, error) { + var stat syscall.Stat_t + if err := syscall.Stat(path, &stat); err != nil { + return syscall.Stat_t{}, err + } + return stat, nil +} + func fchown(fd int, uid p9.UID, gid p9.GID) error { return syscall.Fchownat(fd, "", int(uid), int(gid), linux.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW) } @@ -388,7 +396,7 @@ func (l *localFile) Open(flags p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) { } } - stat, err := stat(newFile.FD()) + stat, err := fstat(newFile.FD()) if err != nil { if newFile != l.file { newFile.Close() @@ -449,7 +457,7 @@ func (l *localFile) Create(name string, mode p9.OpenFlags, perm p9.FileMode, uid if err := fchown(child.FD(), uid, gid); err != nil { return nil, nil, p9.QID{}, 0, extractErrno(err) } - stat, err := stat(child.FD()) + stat, err := fstat(child.FD()) if err != nil { return nil, nil, p9.QID{}, 0, extractErrno(err) } @@ -497,7 +505,7 @@ func (l *localFile) Mkdir(name string, perm p9.FileMode, uid p9.UID, gid p9.GID) if err := fchown(f.FD(), uid, gid); err != nil { return p9.QID{}, extractErrno(err) } - stat, err := stat(f.FD()) + stat, err := fstat(f.FD()) if err != nil { return p9.QID{}, extractErrno(err) } @@ -517,7 +525,7 @@ func (l *localFile) Walk(names []string) ([]p9.QID, p9.File, error) { return nil, nil, extractErrno(err) } - stat, err := stat(newFile.FD()) + stat, err := fstat(newFile.FD()) if err != nil { newFile.Close() return nil, nil, extractErrno(err) @@ -542,7 +550,7 @@ func (l *localFile) Walk(names []string) ([]p9.QID, p9.File, error) { if err != nil { return nil, nil, extractErrno(err) } - stat, err := stat(f.FD()) + stat, err := fstat(f.FD()) if err != nil { f.Close() return nil, nil, extractErrno(err) @@ -592,7 +600,7 @@ func (l *localFile) FSync() error { // GetAttr implements p9.File. func (l *localFile) GetAttr(_ p9.AttrMask) (p9.QID, p9.AttrMask, p9.Attr, error) { - stat, err := stat(l.file.FD()) + stat, err := fstat(l.file.FD()) if err != nil { return p9.QID{}, p9.AttrMask{}, p9.Attr{}, extractErrno(err) } @@ -880,7 +888,7 @@ func (l *localFile) Symlink(target, newName string, uid p9.UID, gid p9.GID) (p9. if err := fchown(f.FD(), uid, gid); err != nil { return p9.QID{}, extractErrno(err) } - stat, err := stat(f.FD()) + stat, err := fstat(f.FD()) if err != nil { return p9.QID{}, extractErrno(err) } @@ -907,13 +915,39 @@ func (l *localFile) Link(target p9.File, newName string) error { } // Mknod implements p9.File. -// -// Not implemented. -func (*localFile) Mknod(_ string, _ p9.FileMode, _ uint32, _ uint32, _ p9.UID, _ p9.GID) (p9.QID, error) { +func (l *localFile) Mknod(name string, mode p9.FileMode, _ uint32, _ uint32, uid p9.UID, gid p9.GID) (p9.QID, error) { + conf := l.attachPoint.conf + if conf.ROMount { + if conf.PanicOnWrite { + panic("attempt to write to RO mount") + } + return p9.QID{}, syscall.EROFS + } + + hostPath := path.Join(l.hostPath, name) + + // Return EEXIST if the file already exists. + if _, err := stat(hostPath); err == nil { + return p9.QID{}, syscall.EEXIST + } + // From mknod(2) man page: // "EPERM: [...] if the filesystem containing pathname does not support // the type of node requested." - return p9.QID{}, syscall.EPERM + if mode.FileType() != p9.ModeRegular { + return p9.QID{}, syscall.EPERM + } + + // Allow Mknod to create regular files. + if err := syscall.Mknod(hostPath, uint32(mode), 0); err != nil { + return p9.QID{}, err + } + + stat, err := stat(hostPath) + if err != nil { + return p9.QID{}, extractErrno(err) + } + return l.attachPoint.makeQID(stat), nil } // UnlinkAt implements p9.File. diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 7c4cd8192..28ef55945 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -283,6 +283,7 @@ syscall_test( size = "medium", add_overlay = False, # TODO(gvisor.dev/issue/317): enable when fixed. test = "//test/syscalls/linux:inotify_test", + vfs2 = "True", ) syscall_test( @@ -351,6 +352,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:mknod_test", + vfs2 = "True", ) syscall_test( diff --git a/test/syscalls/linux/mknod.cc b/test/syscalls/linux/mknod.cc index 4c45766c7..05dfb375a 100644 --- a/test/syscalls/linux/mknod.cc +++ b/test/syscalls/linux/mknod.cc @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -39,7 +40,28 @@ TEST(MknodTest, RegularFile) { EXPECT_THAT(mknod(node1.c_str(), 0, 0), SyscallSucceeds()); } -TEST(MknodTest, MknodAtRegularFile) { +TEST(MknodTest, RegularFilePermissions) { + const std::string node = NewTempAbsPath(); + mode_t newUmask = 0077; + umask(newUmask); + + // Attempt to open file with mode 0777. Not specifying file type should create + // a regualar file. + mode_t perms = S_IRWXU | S_IRWXG | S_IRWXO; + EXPECT_THAT(mknod(node.c_str(), perms, 0), SyscallSucceeds()); + + // In the absence of a default ACL, the permissions of the created node are + // (mode & ~umask). -- mknod(2) + mode_t wantPerms = perms & ~newUmask; + struct stat st; + ASSERT_THAT(stat(node.c_str(), &st), SyscallSucceeds()); + ASSERT_EQ(st.st_mode & 0777, wantPerms); + + // "Zero file type is equivalent to type S_IFREG." - mknod(2) + ASSERT_EQ(st.st_mode & S_IFMT, S_IFREG); +} + +TEST(MknodTest, MknodAtFIFO) { const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); const std::string fifo_relpath = NewTempRelPath(); const std::string fifo = JoinPath(dir.path(), fifo_relpath); @@ -72,7 +94,7 @@ TEST(MknodTest, MknodOnExistingPathFails) { TEST(MknodTest, UnimplementedTypesReturnError) { const std::string path = NewTempAbsPath(); - if (IsRunningOnGvisor()) { + if (IsRunningWithVFS1()) { ASSERT_THAT(mknod(path.c_str(), S_IFSOCK, 0), SyscallFailsWithErrno(EOPNOTSUPP)); } -- cgit v1.2.3 From 76c7bc51b7b02c4ba83c0a064c3629bb5ee91340 Mon Sep 17 00:00:00 2001 From: Tony Gong Date: Tue, 7 Jul 2020 16:13:21 -0700 Subject: Set IPv4 ID on all non-atomic datagrams RFC 6864 imposes various restrictions on the uniqueness of the IPv4 Identification field for non-atomic datagrams, defined as an IP datagram that either can be fragmented (DF=0) or is already a fragment (MF=1 or positive fragment offset). In order to be compliant, the ID field is assigned for all non-atomic datagrams. Add a TCP unit test that induces retransmissions and checks that the IPv4 ID field is unique every time. Add basic handling of the IP_MTU_DISCOVER socket option so that the option can be used to disable PMTU discovery, effectively setting DF=0. Attempting to set the sockopt to anything other than disabled will fail because PMTU discovery is currently not implemented, and the default behavior matches that of disabled. PiperOrigin-RevId: 320081842 --- pkg/tcpip/network/ipv4/ipv4.go | 21 +++++------- pkg/tcpip/tcpip.go | 25 ++++++++++++++ pkg/tcpip/transport/tcp/endpoint.go | 12 +++++++ pkg/tcpip/transport/tcp/tcp_test.go | 57 +++++++++++++++++++++++++++++++ pkg/tcpip/transport/udp/endpoint.go | 11 ++++++ test/packetimpact/tests/BUILD | 2 -- test/syscalls/linux/raw_socket_hdrincl.cc | 51 ++------------------------- 7 files changed, 117 insertions(+), 62 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go index 7e9f16c90..b1776e5ee 100644 --- a/pkg/tcpip/network/ipv4/ipv4.go +++ b/pkg/tcpip/network/ipv4/ipv4.go @@ -225,12 +225,10 @@ func (e *endpoint) writePacketFragments(r *stack.Route, gso *stack.GSO, mtu int, func (e *endpoint) addIPHeader(r *stack.Route, hdr *buffer.Prependable, payloadSize int, params stack.NetworkHeaderParams) header.IPv4 { ip := header.IPv4(hdr.Prepend(header.IPv4MinimumSize)) length := uint16(hdr.UsedLength() + payloadSize) - id := uint32(0) - if length > header.IPv4MaximumHeaderSize+8 { - // Packets of 68 bytes or less are required by RFC 791 to not be - // fragmented, so we only assign ids to larger packets. - id = atomic.AddUint32(&e.protocol.ids[hashRoute(r, params.Protocol, e.protocol.hashIV)%buckets], 1) - } + // RFC 6864 section 4.3 mandates uniqueness of ID values for non-atomic + // datagrams. Since the DF bit is never being set here, all datagrams + // are non-atomic and need an ID. + id := atomic.AddUint32(&e.protocol.ids[hashRoute(r, params.Protocol, e.protocol.hashIV)%buckets], 1) ip.Encode(&header.IPv4Fields{ IHL: header.IPv4MinimumSize, TotalLength: length, @@ -376,13 +374,12 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBu // Set the packet ID when zero. if ip.ID() == 0 { - id := uint32(0) - if pkt.Data.Size() > header.IPv4MaximumHeaderSize+8 { - // Packets of 68 bytes or less are required by RFC 791 to not be - // fragmented, so we only assign ids to larger packets. - id = atomic.AddUint32(&e.protocol.ids[hashRoute(r, 0 /* protocol */, e.protocol.hashIV)%buckets], 1) + // RFC 6864 section 4.3 mandates uniqueness of ID values for + // non-atomic datagrams, so assign an ID to all such datagrams + // according to the definition given in RFC 6864 section 4. + if ip.Flags()&header.IPv4FlagDontFragment == 0 || ip.Flags()&header.IPv4FlagMoreFragments != 0 || ip.FragmentOffset() > 0 { + ip.SetID(uint16(atomic.AddUint32(&e.protocol.ids[hashRoute(r, 0 /* protocol */, e.protocol.hashIV)%buckets], 1))) } - ip.SetID(uint16(id)) } // Always set the checksum. diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 2be1c107a..2f9872dc6 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -673,6 +673,13 @@ const ( // TCP_MAXSEG option. MaxSegOption + // MTUDiscoverOption is used to set/get the path MTU discovery setting. + // + // NOTE: Setting this option to any other value than PMTUDiscoveryDont + // is not supported and will fail as such, and getting this option will + // always return PMTUDiscoveryDont. + MTUDiscoverOption + // MulticastTTLOption is used by SetSockOptInt/GetSockOptInt to control // the default TTL value for multicast messages. The default is 1. MulticastTTLOption @@ -714,6 +721,24 @@ const ( TCPWindowClampOption ) +const ( + // PMTUDiscoveryWant is a setting of the MTUDiscoverOption to use + // per-route settings. + PMTUDiscoveryWant int = iota + + // PMTUDiscoveryDont is a setting of the MTUDiscoverOption to disable + // path MTU discovery. + PMTUDiscoveryDont + + // PMTUDiscoveryDo is a setting of the MTUDiscoverOption to always do + // path MTU discovery. + PMTUDiscoveryDo + + // PMTUDiscoveryProbe is a setting of the MTUDiscoverOption to set DF + // but ignore path MTU. + PMTUDiscoveryProbe +) + // ErrorOption is used in GetSockOpt to specify that the last error reported by // the endpoint should be cleared and returned. type ErrorOption struct{} diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index bd3ec5a8d..caac6ef57 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -1589,6 +1589,13 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { e.UnlockUser() e.notifyProtocolGoroutine(notifyMSSChanged) + case tcpip.MTUDiscoverOption: + // Return not supported if attempting to set this option to + // anything other than path MTU discovery disabled. + if v != tcpip.PMTUDiscoveryDont { + return tcpip.ErrNotSupported + } + case tcpip.ReceiveBufferSizeOption: // Make sure the receive buffer size is within the min and max // allowed. @@ -1896,6 +1903,11 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { v := header.TCPDefaultMSS return v, nil + case tcpip.MTUDiscoverOption: + // Always return the path MTU discovery disabled setting since + // it's the only one supported. + return tcpip.PMTUDiscoveryDont, nil + case tcpip.ReceiveQueueSizeOption: return e.readyReceiveSize() diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 169adb16b..e67ec42b1 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -3095,6 +3095,63 @@ func TestMaxRTO(t *testing.T) { } } +// TestRetransmitIPv4IDUniqueness tests that the IPv4 Identification field is +// unique on retransmits. +func TestRetransmitIPv4IDUniqueness(t *testing.T) { + for _, tc := range []struct { + name string + size int + }{ + {"1Byte", 1}, + {"512Bytes", 512}, + } { + t.Run(tc.name, func(t *testing.T) { + c := context.New(t, defaultMTU) + defer c.Cleanup() + + c.CreateConnected(789 /* iss */, 30000 /* rcvWnd */, -1 /* epRcvBuf */) + + // Disabling PMTU discovery causes all packets sent from this socket to + // have DF=0. This needs to be done because the IPv4 ID uniqueness + // applies only to non-atomic IPv4 datagrams as defined in RFC 6864 + // Section 4, and datagrams with DF=0 are non-atomic. + if err := c.EP.SetSockOptInt(tcpip.MTUDiscoverOption, tcpip.PMTUDiscoveryDont); err != nil { + t.Fatalf("disabling PMTU discovery via sockopt to force DF=0 failed: %s", err) + } + + if _, _, err := c.EP.Write(tcpip.SlicePayload(buffer.NewView(tc.size)), tcpip.WriteOptions{}); err != nil { + t.Fatalf("Write failed: %s", err) + } + pkt := c.GetPacket() + checker.IPv4(t, pkt, + checker.FragmentFlags(0), + checker.TCP( + checker.DstPort(context.TestPort), + checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)), + ), + ) + idSet := map[uint16]struct{}{header.IPv4(pkt).ID(): struct{}{}} + // Expect two retransmitted packets, and that all packets received have + // unique IPv4 ID values. + for i := 0; i <= 2; i++ { + pkt := c.GetPacket() + checker.IPv4(t, pkt, + checker.FragmentFlags(0), + checker.TCP( + checker.DstPort(context.TestPort), + checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)), + ), + ) + id := header.IPv4(pkt).ID() + if _, exists := idSet[id]; exists { + t.Fatalf("duplicate IPv4 ID=%d found in retransmitted packet", id) + } + idSet[id] = struct{}{} + } + }) + } +} + func TestFinImmediately(t *testing.T) { c := context.New(t, defaultMTU) defer c.Cleanup() diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index cae29fbff..0584ec8dc 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -612,6 +612,13 @@ func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error { // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt. func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { switch opt { + case tcpip.MTUDiscoverOption: + // Return not supported if the value is not disabling path + // MTU discovery. + if v != tcpip.PMTUDiscoveryDont { + return tcpip.ErrNotSupported + } + case tcpip.MulticastTTLOption: e.mu.Lock() e.multicastTTL = uint8(v) @@ -906,6 +913,10 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { e.mu.RUnlock() return v, nil + case tcpip.MTUDiscoverOption: + // The only supported setting is path MTU discovery disabled. + return tcpip.PMTUDiscoveryDont, nil + case tcpip.MulticastTTLOption: e.mu.Lock() v := int(e.multicastTTL) diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 85749c559..3ecbe83eb 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -18,8 +18,6 @@ packetimpact_go_test( packetimpact_go_test( name = "ipv4_id_uniqueness", srcs = ["ipv4_id_uniqueness_test.go"], - # TODO(b/157506701) Fix netstack then remove the line below. - expect_netstack_failure = True, deps = [ "//pkg/abi/linux", "//pkg/tcpip/header", diff --git a/test/syscalls/linux/raw_socket_hdrincl.cc b/test/syscalls/linux/raw_socket_hdrincl.cc index 0a27506aa..16cfc1d75 100644 --- a/test/syscalls/linux/raw_socket_hdrincl.cc +++ b/test/syscalls/linux/raw_socket_hdrincl.cc @@ -273,52 +273,7 @@ TEST_F(RawHDRINCL, SendAndReceive) { // The network stack should have set the source address. EXPECT_EQ(src.sin_family, AF_INET); EXPECT_EQ(absl::gbswap_32(src.sin_addr.s_addr), INADDR_LOOPBACK); - // The packet ID should be 0, as the packet is less than 68 bytes. - struct iphdr iphdr = {}; - memcpy(&iphdr, recv_buf, sizeof(iphdr)); - EXPECT_EQ(iphdr.id, 0); -} - -// Send and receive a packet with nonzero IP ID. -TEST_F(RawHDRINCL, SendAndReceiveNonzeroID) { - int port = 40000; - if (!IsRunningOnGvisor()) { - port = static_cast(ASSERT_NO_ERRNO_AND_VALUE( - PortAvailable(0, AddressFamily::kIpv4, SocketType::kUdp, false))); - } - - // IPPROTO_RAW sockets are write-only. We'll have to open another socket to - // read what we write. - FileDescriptor udp_sock = - ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP)); - - // Construct a packet with an IP header, UDP header, and payload. Make the - // payload large enough to force an IP ID to be assigned. - constexpr char kPayload[128] = {}; - char packet[sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kPayload)]; - ASSERT_TRUE( - FillPacket(packet, sizeof(packet), port, kPayload, sizeof(kPayload))); - - socklen_t addrlen = sizeof(addr_); - ASSERT_NO_FATAL_FAILURE(sendto(socket_, &packet, sizeof(packet), 0, - reinterpret_cast(&addr_), - addrlen)); - - // Receive the payload. - char recv_buf[sizeof(packet)]; - struct sockaddr_in src; - socklen_t src_size = sizeof(src); - ASSERT_THAT(recvfrom(udp_sock.get(), recv_buf, sizeof(recv_buf), 0, - reinterpret_cast(&src), &src_size), - SyscallSucceedsWithValue(sizeof(packet))); - EXPECT_EQ( - memcmp(kPayload, recv_buf + sizeof(struct iphdr) + sizeof(struct udphdr), - sizeof(kPayload)), - 0); - // The network stack should have set the source address. - EXPECT_EQ(src.sin_family, AF_INET); - EXPECT_EQ(absl::gbswap_32(src.sin_addr.s_addr), INADDR_LOOPBACK); - // The packet ID should not be 0, as the packet was more than 68 bytes. + // The packet ID should not be 0, as the packet has DF=0. struct iphdr* iphdr = reinterpret_cast(recv_buf); EXPECT_NE(iphdr->id, 0); } @@ -368,10 +323,10 @@ TEST_F(RawHDRINCL, SendAndReceiveDifferentAddress) { // The network stack should have set the source address. EXPECT_EQ(src.sin_family, AF_INET); EXPECT_EQ(absl::gbswap_32(src.sin_addr.s_addr), INADDR_LOOPBACK); - // The packet ID should be 0, as the packet is less than 68 bytes. + // The packet ID should not be 0, as the packet has DF=0. struct iphdr recv_iphdr = {}; memcpy(&recv_iphdr, recv_buf, sizeof(recv_iphdr)); - EXPECT_EQ(recv_iphdr.id, 0); + EXPECT_NE(recv_iphdr.id, 0); // The destination address should be localhost, not the bad IP we set // initially. EXPECT_EQ(absl::gbswap_32(recv_iphdr.daddr), INADDR_LOOPBACK); -- cgit v1.2.3 From 5e05950c1c520724e2e03963850868befb95efeb Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Tue, 7 Jul 2020 19:44:03 -0700 Subject: Deflake exec test. - Only use MAXSYMLINKS/2+1 symlinks for each of the interpreter and script paths in SymlinkLimitRefreshedForInterpreter to tolerate cases where the original paths (/tmp, /bin, or /bin/echo) themselves contain symlinks. - Ensure that UnshareFiles performs execve immediately after clone(CLONE_VFORK) (no heap allocation for ExecveArray/RunfilesPath). - Use lstat() rather than stat() for the existence check in fs_util's Exists; the latter will fail if the symlink target does not exist, even if the symlink does. PiperOrigin-RevId: 320110156 --- test/syscalls/linux/exec.cc | 23 +++++++++++++---------- test/util/fs_util.cc | 4 ++-- test/util/fs_util.h | 7 ++++++- test/util/temp_path.cc | 2 +- 4 files changed, 22 insertions(+), 14 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/exec.cc b/test/syscalls/linux/exec.cc index e09afafe9..c5acfc794 100644 --- a/test/syscalls/linux/exec.cc +++ b/test/syscalls/linux/exec.cc @@ -553,7 +553,12 @@ TEST(ExecTest, SymlinkLimitRefreshedForInterpreter) { // Hold onto TempPath objects so they are not destructed prematurely. std::vector interpreter_symlinks; std::vector script_symlinks; - for (int i = 0; i < kLinuxMaxSymlinks; i++) { + // Replace both the interpreter and script paths with symlink chains of just + // over half the symlink limit each; this is the minimum required to test that + // the symlink limit applies separately to each traversal, while tolerating + // some symlinks in the resolution of (the original) interpreter_path and + // script_path. + for (int i = 0; i < (kLinuxMaxSymlinks / 2) + 1; i++) { interpreter_symlinks.push_back(ASSERT_NO_ERRNO_AND_VALUE( TempPath::CreateSymlinkTo(tmp_dir, interpreter_path))); interpreter_path = interpreter_symlinks[i].path(); @@ -679,18 +684,16 @@ TEST(ExecveatTest, UnshareFiles) { const FileDescriptor fd_closed_on_exec = ASSERT_NO_ERRNO_AND_VALUE(Open(tempFile.path(), O_RDONLY | O_CLOEXEC)); - pid_t child; - EXPECT_THAT(child = syscall(__NR_clone, SIGCHLD | CLONE_VFORK | CLONE_FILES, - 0, 0, 0, 0), - SyscallSucceeds()); + ExecveArray argv = {"test"}; + ExecveArray envp; + std::string child_path = RunfilePath(kBasicWorkload); + pid_t child = + syscall(__NR_clone, SIGCHLD | CLONE_VFORK | CLONE_FILES, 0, 0, 0, 0); if (child == 0) { - ExecveArray argv = {"test"}; - ExecveArray envp; - ASSERT_THAT( - execve(RunfilePath(kBasicWorkload).c_str(), argv.get(), envp.get()), - SyscallSucceeds()); + execve(child_path.c_str(), argv.get(), envp.get()); _exit(1); } + ASSERT_THAT(child, SyscallSucceeds()); int status; ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), SyscallSucceeds()); diff --git a/test/util/fs_util.cc b/test/util/fs_util.cc index 052781445..5418948fe 100644 --- a/test/util/fs_util.cc +++ b/test/util/fs_util.cc @@ -125,12 +125,12 @@ PosixErrorOr Fstat(int fd) { PosixErrorOr Exists(absl::string_view path) { struct stat stat_buf; - int res = stat(std::string(path).c_str(), &stat_buf); + int res = lstat(std::string(path).c_str(), &stat_buf); if (res < 0) { if (errno == ENOENT) { return false; } - return PosixError(errno, absl::StrCat("stat ", path)); + return PosixError(errno, absl::StrCat("lstat ", path)); } return true; } diff --git a/test/util/fs_util.h b/test/util/fs_util.h index caf19b24d..8cdac23a1 100644 --- a/test/util/fs_util.h +++ b/test/util/fs_util.h @@ -44,9 +44,14 @@ PosixErrorOr GetCWD(); // can't be determined. PosixErrorOr Exists(absl::string_view path); -// Returns a stat structure for the given path or an error. +// Returns a stat structure for the given path or an error. If the path +// represents a symlink, it will be traversed. PosixErrorOr Stat(absl::string_view path); +// Returns a stat structure for the given path or an error. If the path +// represents a symlink, it will not be traversed. +PosixErrorOr Lstat(absl::string_view path); + // Returns a stat struct for the given fd. PosixErrorOr Fstat(int fd); diff --git a/test/util/temp_path.cc b/test/util/temp_path.cc index 9c10b6674..e1bdee7fd 100644 --- a/test/util/temp_path.cc +++ b/test/util/temp_path.cc @@ -56,7 +56,7 @@ void TryDeleteRecursively(std::string const& path) { if (undeleted_dirs || undeleted_files || !status.ok()) { std::cerr << path << ": failed to delete " << undeleted_dirs << " directories and " << undeleted_files - << " files: " << status; + << " files: " << status << std::endl; } } } -- cgit v1.2.3 From e3db9bda60580df127ea445fc1f862864c5451f9 Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Wed, 8 Jul 2020 12:03:28 -0700 Subject: Enable shards in runtime test runner. Fixed an issue with the runtime test runner which enables us to run tests in shards. We had to touch the status file as indicated by an env var. PiperOrigin-RevId: 320236205 --- pkg/test/testutil/testutil.go | 15 +++++++++++++++ test/runtimes/runner/main.go | 5 +++++ 2 files changed, 20 insertions(+) (limited to 'test') diff --git a/pkg/test/testutil/testutil.go b/pkg/test/testutil/testutil.go index f21d6769a..64c292698 100644 --- a/pkg/test/testutil/testutil.go +++ b/pkg/test/testutil/testutil.go @@ -482,6 +482,21 @@ func IsStatic(filename string) (bool, error) { return true, nil } +// TouchShardStatusFile indicates to Bazel that the test runner supports +// sharding by creating or updating the last modified date of the file +// specified by TEST_SHARD_STATUS_FILE. +// +// See https://docs.bazel.build/versions/master/test-encyclopedia.html#role-of-the-test-runner. +func TouchShardStatusFile() error { + if statusFile := os.Getenv("TEST_SHARD_STATUS_FILE"); statusFile != "" { + cmd := exec.Command("touch", statusFile) + if b, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("touch %q failed:\n output: %s\n error: %s", statusFile, string(b), err.Error()) + } + } + return nil +} + // TestIndicesForShard returns indices for this test shard based on the // TEST_SHARD_INDEX and TEST_TOTAL_SHARDS environment vars. // diff --git a/test/runtimes/runner/main.go b/test/runtimes/runner/main.go index 54d1169ef..a00c64d27 100644 --- a/test/runtimes/runner/main.go +++ b/test/runtimes/runner/main.go @@ -63,6 +63,11 @@ func runTests() int { d := dockerutil.MakeDocker(testutil.DefaultLogger(*lang)) defer d.CleanUp() + if err := testutil.TouchShardStatusFile(); err != nil { + fmt.Fprintf(os.Stderr, "error touching status shard file: %v\n", err) + return 1 + } + // Get a slice of tests to run. This will also start a single Docker // container that will be used to run each test. The final test will // stop the Docker container. -- cgit v1.2.3 From f3fa43cf237109c1e1cb0cf7ba63bb793cc2ff1f Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Wed, 8 Jul 2020 13:24:58 -0700 Subject: Move all tests to new docker API. Moves following to new dockerutil API: - //test/e2e:integration_test - //test/image:image_test - //test/iptables:iptables_test - //test/root:root_test - //test/packetimpact:packetimpact_test PiperOrigin-RevId: 320253118 --- WORKSPACE | 107 +++++ pkg/test/dockerutil/BUILD | 15 +- pkg/test/dockerutil/container.go | 501 +++++++++++++++++++++ pkg/test/dockerutil/dockerutil.go | 600 -------------------------- pkg/test/dockerutil/exec.go | 194 +++++++++ pkg/test/dockerutil/network.go | 113 +++++ test/e2e/BUILD | 1 + test/e2e/exec_test.go | 136 +++--- test/e2e/integration_test.go | 167 ++++--- test/e2e/regression_test.go | 8 +- test/image/image_test.go | 77 ++-- test/iptables/iptables_test.go | 12 +- test/packetimpact/runner/BUILD | 1 + test/packetimpact/runner/packetimpact_test.go | 117 +++-- test/root/cgroup_test.go | 193 +++++---- test/root/chroot_test.go | 21 +- test/runtimes/runner/main.go | 16 +- 17 files changed, 1336 insertions(+), 943 deletions(-) create mode 100644 pkg/test/dockerutil/container.go create mode 100644 pkg/test/dockerutil/exec.go create mode 100644 pkg/test/dockerutil/network.go (limited to 'test') diff --git a/WORKSPACE b/WORKSPACE index e1873e5c0..417ec6100 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -370,6 +370,112 @@ go_repository( version = "v1.5.0", ) +# Docker API dependencies. +go_repository( + name = "com_github_docker_docker", + importpath = "github.com/docker/docker", + sum = "h1:iWPIG7pWIsCwT6ZtHnTUpoVMnete7O/pzd9HFE3+tn8=", + version = "v17.12.0-ce-rc1.0.20200618181300-9dc6525e6118+incompatible", +) + +go_repository( + name = "com_github_docker_go_connections", + importpath = "github.com/docker/go-connections", + sum = "h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ=", + version = "v0.4.0", +) + +go_repository( + name = "com_github_pkg_errors", + importpath = "github.com/pkg/errors", + sum = "h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=", + version = "v0.9.1", +) + +go_repository( + name = "com_github_docker_go_units", + importpath = "github.com/docker/go-units", + sum = "h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw=", + version = "v0.4.0", +) + +go_repository( + name = "com_github_opencontainers_go_digest", + importpath = "github.com/opencontainers/go-digest", + sum = "h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=", + version = "v1.0.0", +) + +go_repository( + name = "com_github_docker_distribution", + importpath = "github.com/docker/distribution", + sum = "h1:a5mlkVzth6W5A4fOsS3D2EO5BUmsJpcB+cRlLU7cSug=", + version = "v2.7.1+incompatible", +) + +go_repository( + name = "com_github_davecgh_go_spew", + importpath = "github.com/davecgh/go-spew", + sum = "h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=", + version = "v1.1.1", +) + +go_repository( + name = "com_github_konsorten_go_windows_terminal_sequences", + importpath = "github.com/konsorten/go-windows-terminal-sequences", + sum = "h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8=", + version = "v2.7.1+incompatible", +) + +go_repository( + name = "com_github_pmezard_go_difflib", + importpath = "github.com/pmezard/go-difflib", + sum = "h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=", + version = "v1.0.0", +) + +go_repository( + name = "com_github_sirupsen_logrus", + importpath = "github.com/sirupsen/logrus", + sum = "h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I=", + version = "v1.6.0", +) + +go_repository( + name = "com_github_stretchr_testify", + importpath = "github.com/stretchr/testify", + sum = "h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=", + version = "v1.2.2", +) + +go_repository( + name = "com_github_opencontainers_image_spec", + importpath = "github.com/opencontainers/image-spec", + sum = "h1:JMemWkRwHx4Zj+fVxWoMCFm/8sYGGrUVojFA6h/TRcI=", + version = "v1.0.1", +) + +go_repository( + name = "com_github_containerd_containerd", + importpath = "github.com/containerd/containerd", + sum = "h1:3o0smo5SKY7H6AJCmJhsnCjR2/V2T8VmiHt7seN2/kI=", + version = "v1.3.4", +) + +go_repository( + name = "com_github_microsoft_go_winio", + importpath = "github.com/Microsoft/go-winio", + sum = "h1:+hMXMk01us9KgxGb7ftKQt2Xpf5hH/yky+TDA+qxleU=", + version = "v0.4.14", +) + +go_repository( + name = "com_github_stretchr_objx", + importpath = "github.com/stretchr/objx", + sum = "h1:2vfRuCMp5sSVIDSqO8oNnWJq7mPa6KVP3iPIwFBuy8A=", + version = "v0.1.1", +) + go_repository( name = "org_golang_google_api", importpath = "google.golang.org/api", @@ -450,3 +556,4 @@ http_archive( "https://github.com/google/benchmark/archive/v1.5.0.tar.gz", ], ) + diff --git a/pkg/test/dockerutil/BUILD b/pkg/test/dockerutil/BUILD index 7c8758e35..83b80c8bc 100644 --- a/pkg/test/dockerutil/BUILD +++ b/pkg/test/dockerutil/BUILD @@ -5,10 +5,21 @@ package(licenses = ["notice"]) go_library( name = "dockerutil", testonly = 1, - srcs = ["dockerutil.go"], + srcs = [ + "container.go", + "dockerutil.go", + "exec.go", + "network.go", + ], visibility = ["//:sandbox"], deps = [ "//pkg/test/testutil", - "@com_github_kr_pty//:go_default_library", + "@com_github_docker_docker//api/types:go_default_library", + "@com_github_docker_docker//api/types/container:go_default_library", + "@com_github_docker_docker//api/types/mount:go_default_library", + "@com_github_docker_docker//api/types/network:go_default_library", + "@com_github_docker_docker//client:go_default_library", + "@com_github_docker_docker//pkg/stdcopy:go_default_library", + "@com_github_docker_go_connections//nat:go_default_library", ], ) diff --git a/pkg/test/dockerutil/container.go b/pkg/test/dockerutil/container.go new file mode 100644 index 000000000..17acdaf6f --- /dev/null +++ b/pkg/test/dockerutil/container.go @@ -0,0 +1,501 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dockerutil + +import ( + "bytes" + "context" + "fmt" + "io/ioutil" + "net" + "os" + "path" + "regexp" + "strconv" + "strings" + "time" + + "github.com/docker/docker/api/types" + "github.com/docker/docker/api/types/container" + "github.com/docker/docker/api/types/mount" + "github.com/docker/docker/api/types/network" + "github.com/docker/docker/client" + "github.com/docker/docker/pkg/stdcopy" + "github.com/docker/go-connections/nat" + "gvisor.dev/gvisor/pkg/test/testutil" +) + +// Container represents a Docker Container allowing +// user to configure and control as one would with the 'docker' +// client. Container is backed by the offical golang docker API. +// See: https://pkg.go.dev/github.com/docker/docker. +type Container struct { + Name string + Runtime string + + logger testutil.Logger + client *client.Client + id string + mounts []mount.Mount + links []string + cleanups []func() + copyErr error + + // Stores streams attached to the container. Used by WaitForOutputSubmatch. + streams types.HijackedResponse + + // stores previously read data from the attached streams. + streamBuf bytes.Buffer +} + +// RunOpts are options for running a container. +type RunOpts struct { + // Image is the image relative to images/. This will be mangled + // appropriately, to ensure that only first-party images are used. + Image string + + // Memory is the memory limit in bytes. + Memory int + + // Cpus in which to allow execution. ("0", "1", "0-2"). + CpusetCpus string + + // Ports are the ports to be allocated. + Ports []int + + // WorkDir sets the working directory. + WorkDir string + + // ReadOnly sets the read-only flag. + ReadOnly bool + + // Env are additional environment variables. + Env []string + + // User is the user to use. + User string + + // Privileged enables privileged mode. + Privileged bool + + // CapAdd are the extra set of capabilities to add. + CapAdd []string + + // CapDrop are the extra set of capabilities to drop. + CapDrop []string + + // Mounts is the list of directories/files to be mounted inside the container. + Mounts []mount.Mount + + // Links is the list of containers to be connected to the container. + Links []string +} + +// MakeContainer sets up the struct for a Docker container. +// +// Names of containers will be unique. +func MakeContainer(ctx context.Context, logger testutil.Logger) *Container { + // Slashes are not allowed in container names. + name := testutil.RandomID(logger.Name()) + name = strings.ReplaceAll(name, "/", "-") + client, err := client.NewClientWithOpts(client.FromEnv) + if err != nil { + return nil + } + + client.NegotiateAPIVersion(ctx) + + return &Container{ + logger: logger, + Name: name, + Runtime: *runtime, + client: client, + } +} + +// Spawn is analogous to 'docker run -d'. +func (c *Container) Spawn(ctx context.Context, r RunOpts, args ...string) error { + if err := c.create(ctx, r, args); err != nil { + return err + } + return c.Start(ctx) +} + +// SpawnProcess is analogous to 'docker run -it'. It returns a process +// which represents the root process. +func (c *Container) SpawnProcess(ctx context.Context, r RunOpts, args ...string) (Process, error) { + config, hostconf, netconf := c.ConfigsFrom(r, args...) + config.Tty = true + config.OpenStdin = true + + if err := c.CreateFrom(ctx, config, hostconf, netconf); err != nil { + return Process{}, err + } + + if err := c.Start(ctx); err != nil { + return Process{}, err + } + + return Process{container: c, conn: c.streams}, nil +} + +// Run is analogous to 'docker run'. +func (c *Container) Run(ctx context.Context, r RunOpts, args ...string) (string, error) { + if err := c.create(ctx, r, args); err != nil { + return "", err + } + + if err := c.Start(ctx); err != nil { + return "", err + } + + if err := c.Wait(ctx); err != nil { + return "", err + } + + return c.Logs(ctx) +} + +// ConfigsFrom returns container configs from RunOpts and args. The caller should call 'CreateFrom' +// and Start. +func (c *Container) ConfigsFrom(r RunOpts, args ...string) (*container.Config, *container.HostConfig, *network.NetworkingConfig) { + return c.config(r, args), c.hostConfig(r), &network.NetworkingConfig{} +} + +// MakeLink formats a link to add to a RunOpts. +func (c *Container) MakeLink(target string) string { + return fmt.Sprintf("%s:%s", c.Name, target) +} + +// CreateFrom creates a container from the given configs. +func (c *Container) CreateFrom(ctx context.Context, conf *container.Config, hostconf *container.HostConfig, netconf *network.NetworkingConfig) error { + cont, err := c.client.ContainerCreate(ctx, conf, hostconf, netconf, c.Name) + if err != nil { + return err + } + c.id = cont.ID + return nil +} + +// Create is analogous to 'docker create'. +func (c *Container) Create(ctx context.Context, r RunOpts, args ...string) error { + return c.create(ctx, r, args) +} + +func (c *Container) create(ctx context.Context, r RunOpts, args []string) error { + conf := c.config(r, args) + hostconf := c.hostConfig(r) + cont, err := c.client.ContainerCreate(ctx, conf, hostconf, nil, c.Name) + if err != nil { + return err + } + c.id = cont.ID + return nil +} + +func (c *Container) config(r RunOpts, args []string) *container.Config { + ports := nat.PortSet{} + for _, p := range r.Ports { + port := nat.Port(fmt.Sprintf("%d", p)) + ports[port] = struct{}{} + } + env := append(r.Env, fmt.Sprintf("RUNSC_TEST_NAME=%s", c.Name)) + + return &container.Config{ + Image: testutil.ImageByName(r.Image), + Cmd: args, + ExposedPorts: ports, + Env: env, + WorkingDir: r.WorkDir, + User: r.User, + } +} + +func (c *Container) hostConfig(r RunOpts) *container.HostConfig { + c.mounts = append(c.mounts, r.Mounts...) + + return &container.HostConfig{ + Runtime: c.Runtime, + Mounts: c.mounts, + PublishAllPorts: true, + Links: r.Links, + CapAdd: r.CapAdd, + CapDrop: r.CapDrop, + Privileged: r.Privileged, + ReadonlyRootfs: r.ReadOnly, + Resources: container.Resources{ + Memory: int64(r.Memory), // In bytes. + CpusetCpus: r.CpusetCpus, + }, + } +} + +// Start is analogous to 'docker start'. +func (c *Container) Start(ctx context.Context) error { + + // Open a connection to the container for parsing logs and for TTY. + streams, err := c.client.ContainerAttach(ctx, c.id, + types.ContainerAttachOptions{ + Stream: true, + Stdin: true, + Stdout: true, + Stderr: true, + }) + if err != nil { + return fmt.Errorf("failed to connect to container: %v", err) + } + + c.streams = streams + c.cleanups = append(c.cleanups, func() { + c.streams.Close() + }) + + return c.client.ContainerStart(ctx, c.id, types.ContainerStartOptions{}) +} + +// Stop is analogous to 'docker stop'. +func (c *Container) Stop(ctx context.Context) error { + return c.client.ContainerStop(ctx, c.id, nil) +} + +// Pause is analogous to'docker pause'. +func (c *Container) Pause(ctx context.Context) error { + return c.client.ContainerPause(ctx, c.id) +} + +// Unpause is analogous to 'docker unpause'. +func (c *Container) Unpause(ctx context.Context) error { + return c.client.ContainerUnpause(ctx, c.id) +} + +// Checkpoint is analogous to 'docker checkpoint'. +func (c *Container) Checkpoint(ctx context.Context, name string) error { + return c.client.CheckpointCreate(ctx, c.Name, types.CheckpointCreateOptions{CheckpointID: name, Exit: true}) +} + +// Restore is analogous to 'docker start --checkname [name]'. +func (c *Container) Restore(ctx context.Context, name string) error { + return c.client.ContainerStart(ctx, c.id, types.ContainerStartOptions{CheckpointID: name}) +} + +// Logs is analogous 'docker logs'. +func (c *Container) Logs(ctx context.Context) (string, error) { + var out bytes.Buffer + err := c.logs(ctx, &out, &out) + return out.String(), err +} + +func (c *Container) logs(ctx context.Context, stdout, stderr *bytes.Buffer) error { + opts := types.ContainerLogsOptions{ShowStdout: true, ShowStderr: true} + writer, err := c.client.ContainerLogs(ctx, c.id, opts) + if err != nil { + return err + } + defer writer.Close() + _, err = stdcopy.StdCopy(stdout, stderr, writer) + + return err +} + +// ID returns the container id. +func (c *Container) ID() string { + return c.id +} + +// SandboxPid returns the container's pid. +func (c *Container) SandboxPid(ctx context.Context) (int, error) { + resp, err := c.client.ContainerInspect(ctx, c.id) + if err != nil { + return -1, err + } + return resp.ContainerJSONBase.State.Pid, nil +} + +// FindIP returns the IP address of the container. +func (c *Container) FindIP(ctx context.Context) (net.IP, error) { + resp, err := c.client.ContainerInspect(ctx, c.id) + if err != nil { + return nil, err + } + + ip := net.ParseIP(resp.NetworkSettings.DefaultNetworkSettings.IPAddress) + if ip == nil { + return net.IP{}, fmt.Errorf("invalid IP: %q", ip) + } + return ip, nil +} + +// FindPort returns the host port that is mapped to 'sandboxPort'. +func (c *Container) FindPort(ctx context.Context, sandboxPort int) (int, error) { + desc, err := c.client.ContainerInspect(ctx, c.id) + if err != nil { + return -1, fmt.Errorf("error retrieving port: %v", err) + } + + format := fmt.Sprintf("%d/tcp", sandboxPort) + ports, ok := desc.NetworkSettings.Ports[nat.Port(format)] + if !ok { + return -1, fmt.Errorf("error retrieving port: %v", err) + + } + + port, err := strconv.Atoi(ports[0].HostPort) + if err != nil { + return -1, fmt.Errorf("error parsing port %q: %v", port, err) + } + return port, nil +} + +// CopyFiles copies in and mounts the given files. They are always ReadOnly. +func (c *Container) CopyFiles(opts *RunOpts, target string, sources ...string) { + dir, err := ioutil.TempDir("", c.Name) + if err != nil { + c.copyErr = fmt.Errorf("ioutil.TempDir failed: %v", err) + return + } + c.cleanups = append(c.cleanups, func() { os.RemoveAll(dir) }) + if err := os.Chmod(dir, 0755); err != nil { + c.copyErr = fmt.Errorf("os.Chmod(%q, 0755) failed: %v", dir, err) + return + } + for _, name := range sources { + src, err := testutil.FindFile(name) + if err != nil { + c.copyErr = fmt.Errorf("testutil.FindFile(%q) failed: %v", name, err) + return + } + dst := path.Join(dir, path.Base(name)) + if err := testutil.Copy(src, dst); err != nil { + c.copyErr = fmt.Errorf("testutil.Copy(%q, %q) failed: %v", src, dst, err) + return + } + c.logger.Logf("copy: %s -> %s", src, dst) + } + opts.Mounts = append(opts.Mounts, mount.Mount{ + Type: mount.TypeBind, + Source: dir, + Target: target, + ReadOnly: false, + }) +} + +// Status inspects the container returns its status. +func (c *Container) Status(ctx context.Context) (types.ContainerState, error) { + resp, err := c.client.ContainerInspect(ctx, c.id) + if err != nil { + return types.ContainerState{}, err + } + return *resp.State, err +} + +// Wait waits for the container to exit. +func (c *Container) Wait(ctx context.Context) error { + statusChan, errChan := c.client.ContainerWait(ctx, c.id, container.WaitConditionNotRunning) + select { + case err := <-errChan: + return err + case <-statusChan: + return nil + } +} + +// WaitTimeout waits for the container to exit with a timeout. +func (c *Container) WaitTimeout(ctx context.Context, timeout time.Duration) error { + timeoutChan := time.After(timeout) + statusChan, errChan := c.client.ContainerWait(ctx, c.id, container.WaitConditionNotRunning) + select { + case err := <-errChan: + return err + case <-statusChan: + return nil + case <-timeoutChan: + return fmt.Errorf("container %s timed out after %v seconds", c.Name, timeout.Seconds()) + } +} + +// WaitForOutput searches container logs for pattern and returns or timesout. +func (c *Container) WaitForOutput(ctx context.Context, pattern string, timeout time.Duration) (string, error) { + matches, err := c.WaitForOutputSubmatch(ctx, pattern, timeout) + if err != nil { + return "", err + } + if len(matches) == 0 { + return "", fmt.Errorf("didn't find pattern %s logs", pattern) + } + return matches[0], nil +} + +// WaitForOutputSubmatch searches container logs for the given +// pattern or times out. It returns any regexp submatches as well. +func (c *Container) WaitForOutputSubmatch(ctx context.Context, pattern string, timeout time.Duration) ([]string, error) { + re := regexp.MustCompile(pattern) + if matches := re.FindStringSubmatch(c.streamBuf.String()); matches != nil { + return matches, nil + } + + for exp := time.Now().Add(timeout); time.Now().Before(exp); { + c.streams.Conn.SetDeadline(time.Now().Add(50 * time.Millisecond)) + _, err := stdcopy.StdCopy(&c.streamBuf, &c.streamBuf, c.streams.Reader) + + if err != nil { + // check that it wasn't a timeout + if nerr, ok := err.(net.Error); !ok || !nerr.Timeout() { + return nil, err + } + } + + if matches := re.FindStringSubmatch(c.streamBuf.String()); matches != nil { + return matches, nil + } + } + + return nil, fmt.Errorf("timeout waiting for output %q: out: %s", re.String(), c.streamBuf.String()) +} + +// Kill kills the container. +func (c *Container) Kill(ctx context.Context) error { + return c.client.ContainerKill(ctx, c.id, "") +} + +// Remove is analogous to 'docker rm'. +func (c *Container) Remove(ctx context.Context) error { + // Remove the image. + remove := types.ContainerRemoveOptions{ + RemoveVolumes: c.mounts != nil, + RemoveLinks: c.links != nil, + Force: true, + } + return c.client.ContainerRemove(ctx, c.Name, remove) +} + +// CleanUp kills and deletes the container (best effort). +func (c *Container) CleanUp(ctx context.Context) { + // Kill the container. + if err := c.Kill(ctx); err != nil && !strings.Contains(err.Error(), "is not running") { + // Just log; can't do anything here. + c.logger.Logf("error killing container %q: %v", c.Name, err) + } + // Remove the image. + if err := c.Remove(ctx); err != nil { + c.logger.Logf("error removing container %q: %v", c.Name, err) + } + // Forget all mounts. + c.mounts = nil + // Execute all cleanups. + for _, c := range c.cleanups { + c() + } + c.cleanups = nil +} diff --git a/pkg/test/dockerutil/dockerutil.go b/pkg/test/dockerutil/dockerutil.go index 819dd0a59..f95ae3cd1 100644 --- a/pkg/test/dockerutil/dockerutil.go +++ b/pkg/test/dockerutil/dockerutil.go @@ -22,17 +22,10 @@ import ( "io" "io/ioutil" "log" - "net" - "os" "os/exec" - "path" "regexp" "strconv" - "strings" - "syscall" - "time" - "github.com/kr/pty" "gvisor.dev/gvisor/pkg/test/testutil" ) @@ -126,596 +119,3 @@ func Save(logger testutil.Logger, image string, w io.Writer) error { cmd.Stdout = w // Send directly to the writer. return cmd.Run() } - -// MountMode describes if the mount should be ro or rw. -type MountMode int - -const ( - // ReadOnly is what the name says. - ReadOnly MountMode = iota - // ReadWrite is what the name says. - ReadWrite -) - -// String returns the mount mode argument for this MountMode. -func (m MountMode) String() string { - switch m { - case ReadOnly: - return "ro" - case ReadWrite: - return "rw" - } - panic(fmt.Sprintf("invalid mode: %d", m)) -} - -// DockerNetwork contains the name of a docker network. -type DockerNetwork struct { - logger testutil.Logger - Name string - Subnet *net.IPNet - containers []*Docker -} - -// NewDockerNetwork sets up the struct for a Docker network. Names of networks -// will be unique. -func NewDockerNetwork(logger testutil.Logger) *DockerNetwork { - return &DockerNetwork{ - logger: logger, - Name: testutil.RandomID(logger.Name()), - } -} - -// Create calls 'docker network create'. -func (n *DockerNetwork) Create(args ...string) error { - a := []string{"docker", "network", "create"} - if n.Subnet != nil { - a = append(a, fmt.Sprintf("--subnet=%s", n.Subnet)) - } - a = append(a, args...) - a = append(a, n.Name) - return testutil.Command(n.logger, a...).Run() -} - -// Connect calls 'docker network connect' with the arguments provided. -func (n *DockerNetwork) Connect(container *Docker, args ...string) error { - a := []string{"docker", "network", "connect"} - a = append(a, args...) - a = append(a, n.Name, container.Name) - if err := testutil.Command(n.logger, a...).Run(); err != nil { - return err - } - n.containers = append(n.containers, container) - return nil -} - -// Cleanup cleans up the docker network and all the containers attached to it. -func (n *DockerNetwork) Cleanup() error { - for _, c := range n.containers { - // Don't propagate the error, it might be that the container - // was already cleaned up. - if err := c.Kill(); err != nil { - n.logger.Logf("unable to kill container during cleanup: %s", err) - } - } - - if err := testutil.Command(n.logger, "docker", "network", "rm", n.Name).Run(); err != nil { - return err - } - return nil -} - -// Docker contains the name and the runtime of a docker container. -type Docker struct { - logger testutil.Logger - Runtime string - Name string - copyErr error - cleanups []func() -} - -// MakeDocker sets up the struct for a Docker container. -// -// Names of containers will be unique. -func MakeDocker(logger testutil.Logger) *Docker { - // Slashes are not allowed in container names. - name := testutil.RandomID(logger.Name()) - name = strings.ReplaceAll(name, "/", "-") - - return &Docker{ - logger: logger, - Name: name, - Runtime: *runtime, - } -} - -// CopyFiles copies in and mounts the given files. They are always ReadOnly. -func (d *Docker) CopyFiles(opts *RunOpts, targetDir string, sources ...string) { - dir, err := ioutil.TempDir("", d.Name) - if err != nil { - d.copyErr = fmt.Errorf("ioutil.TempDir failed: %v", err) - return - } - d.cleanups = append(d.cleanups, func() { os.RemoveAll(dir) }) - if err := os.Chmod(dir, 0755); err != nil { - d.copyErr = fmt.Errorf("os.Chmod(%q, 0755) failed: %v", dir, err) - return - } - for _, name := range sources { - src, err := testutil.FindFile(name) - if err != nil { - d.copyErr = fmt.Errorf("testutil.FindFile(%q) failed: %v", name, err) - return - } - dst := path.Join(dir, path.Base(name)) - if err := testutil.Copy(src, dst); err != nil { - d.copyErr = fmt.Errorf("testutil.Copy(%q, %q) failed: %v", src, dst, err) - return - } - d.logger.Logf("copy: %s -> %s", src, dst) - } - opts.Mounts = append(opts.Mounts, Mount{ - Source: dir, - Target: targetDir, - Mode: ReadOnly, - }) -} - -// Mount describes a mount point inside the container. -type Mount struct { - // Source is the path outside the container. - Source string - - // Target is the path inside the container. - Target string - - // Mode tells whether the mount inside the container should be readonly. - Mode MountMode -} - -// Link informs dockers that a given container needs to be made accessible from -// the container being configured. -type Link struct { - // Source is the container to connect to. - Source *Docker - - // Target is the alias for the container. - Target string -} - -// RunOpts are options for running a container. -type RunOpts struct { - // Image is the image relative to images/. This will be mangled - // appropriately, to ensure that only first-party images are used. - Image string - - // Memory is the memory limit in kB. - Memory int - - // Ports are the ports to be allocated. - Ports []int - - // WorkDir sets the working directory. - WorkDir string - - // ReadOnly sets the read-only flag. - ReadOnly bool - - // Env are additional environment variables. - Env []string - - // User is the user to use. - User string - - // Privileged enables privileged mode. - Privileged bool - - // CapAdd are the extra set of capabilities to add. - CapAdd []string - - // CapDrop are the extra set of capabilities to drop. - CapDrop []string - - // Pty indicates that a pty will be allocated. If this is non-nil, then - // this will run after start-up with the *exec.Command and Pty file - // passed in to the function. - Pty func(*exec.Cmd, *os.File) - - // Foreground indicates that the container should be run in the - // foreground. If this is true, then the output will be available as a - // return value from the Run function. - Foreground bool - - // Mounts is the list of directories/files to be mounted inside the container. - Mounts []Mount - - // Links is the list of containers to be connected to the container. - Links []Link - - // Extra are extra arguments that may be passed. - Extra []string -} - -// args returns common arguments. -// -// Note that this does not define the complete behavior. -func (d *Docker) argsFor(r *RunOpts, command string, p []string) (rv []string) { - isExec := command == "exec" - isRun := command == "run" - - if isRun || isExec { - rv = append(rv, "-i") - } - if r.Pty != nil { - rv = append(rv, "-t") - } - if r.User != "" { - rv = append(rv, fmt.Sprintf("--user=%s", r.User)) - } - if r.Privileged { - rv = append(rv, "--privileged") - } - for _, c := range r.CapAdd { - rv = append(rv, fmt.Sprintf("--cap-add=%s", c)) - } - for _, c := range r.CapDrop { - rv = append(rv, fmt.Sprintf("--cap-drop=%s", c)) - } - for _, e := range r.Env { - rv = append(rv, fmt.Sprintf("--env=%s", e)) - } - if r.WorkDir != "" { - rv = append(rv, fmt.Sprintf("--workdir=%s", r.WorkDir)) - } - if !isExec { - if r.Memory != 0 { - rv = append(rv, fmt.Sprintf("--memory=%dk", r.Memory)) - } - for _, p := range r.Ports { - rv = append(rv, fmt.Sprintf("--publish=%d", p)) - } - if r.ReadOnly { - rv = append(rv, fmt.Sprintf("--read-only")) - } - if len(p) > 0 { - rv = append(rv, "--entrypoint=") - } - } - - // Always attach the test environment & Extra. - rv = append(rv, fmt.Sprintf("--env=RUNSC_TEST_NAME=%s", d.Name)) - rv = append(rv, r.Extra...) - - // Attach necessary bits. - if isExec { - rv = append(rv, d.Name) - } else { - for _, m := range r.Mounts { - rv = append(rv, fmt.Sprintf("-v=%s:%s:%v", m.Source, m.Target, m.Mode)) - } - for _, l := range r.Links { - rv = append(rv, fmt.Sprintf("--link=%s:%s", l.Source.Name, l.Target)) - } - - if len(d.Runtime) > 0 { - rv = append(rv, fmt.Sprintf("--runtime=%s", d.Runtime)) - } - rv = append(rv, fmt.Sprintf("--name=%s", d.Name)) - rv = append(rv, testutil.ImageByName(r.Image)) - } - - // Attach other arguments. - rv = append(rv, p...) - return rv -} - -// run runs a complete command. -func (d *Docker) run(r RunOpts, command string, p ...string) (string, error) { - if d.copyErr != nil { - return "", d.copyErr - } - basicArgs := []string{"docker"} - if command == "spawn" { - command = "run" - basicArgs = append(basicArgs, command) - basicArgs = append(basicArgs, "-d") - } else { - basicArgs = append(basicArgs, command) - } - customArgs := d.argsFor(&r, command, p) - cmd := testutil.Command(d.logger, append(basicArgs, customArgs...)...) - if r.Pty != nil { - // If allocating a terminal, then we just ignore the output - // from the command. - ptmx, err := pty.Start(cmd.Cmd) - if err != nil { - return "", err - } - defer cmd.Wait() // Best effort. - r.Pty(cmd.Cmd, ptmx) - } else { - // Can't support PTY or streaming. - out, err := cmd.CombinedOutput() - return string(out), err - } - return "", nil -} - -// Create calls 'docker create' with the arguments provided. -func (d *Docker) Create(r RunOpts, args ...string) error { - out, err := d.run(r, "create", args...) - if strings.Contains(out, "Unable to find image") { - return fmt.Errorf("unable to find image, did you remember to `make load-%s`: %w", r.Image, err) - } - return err -} - -// Start calls 'docker start'. -func (d *Docker) Start() error { - return testutil.Command(d.logger, "docker", "start", d.Name).Run() -} - -// Stop calls 'docker stop'. -func (d *Docker) Stop() error { - return testutil.Command(d.logger, "docker", "stop", d.Name).Run() -} - -// Run calls 'docker run' with the arguments provided. -func (d *Docker) Run(r RunOpts, args ...string) (string, error) { - return d.run(r, "run", args...) -} - -// Spawn starts the container and detaches. -func (d *Docker) Spawn(r RunOpts, args ...string) error { - _, err := d.run(r, "spawn", args...) - return err -} - -// Logs calls 'docker logs'. -func (d *Docker) Logs() (string, error) { - // Don't capture the output; since it will swamp the logs. - out, err := exec.Command("docker", "logs", d.Name).CombinedOutput() - return string(out), err -} - -// Exec calls 'docker exec' with the arguments provided. -func (d *Docker) Exec(r RunOpts, args ...string) (string, error) { - return d.run(r, "exec", args...) -} - -// Pause calls 'docker pause'. -func (d *Docker) Pause() error { - return testutil.Command(d.logger, "docker", "pause", d.Name).Run() -} - -// Unpause calls 'docker pause'. -func (d *Docker) Unpause() error { - return testutil.Command(d.logger, "docker", "unpause", d.Name).Run() -} - -// Checkpoint calls 'docker checkpoint'. -func (d *Docker) Checkpoint(name string) error { - return testutil.Command(d.logger, "docker", "checkpoint", "create", d.Name, name).Run() -} - -// Restore calls 'docker start --checkname [name]'. -func (d *Docker) Restore(name string) error { - return testutil.Command(d.logger, "docker", "start", fmt.Sprintf("--checkpoint=%s", name), d.Name).Run() -} - -// Kill calls 'docker kill'. -func (d *Docker) Kill() error { - // Skip logging this command, it will likely be an error. - out, err := exec.Command("docker", "kill", d.Name).CombinedOutput() - if err != nil && !strings.Contains(string(out), "is not running") { - return err - } - return nil -} - -// Remove calls 'docker rm'. -func (d *Docker) Remove() error { - return testutil.Command(d.logger, "docker", "rm", d.Name).Run() -} - -// CleanUp kills and deletes the container (best effort). -func (d *Docker) CleanUp() { - // Kill the container. - if err := d.Kill(); err != nil { - // Just log; can't do anything here. - d.logger.Logf("error killing container %q: %v", d.Name, err) - } - // Remove the image. - if err := d.Remove(); err != nil { - d.logger.Logf("error removing container %q: %v", d.Name, err) - } - // Execute all cleanups. - for _, c := range d.cleanups { - c() - } - d.cleanups = nil -} - -// FindPort returns the host port that is mapped to 'sandboxPort'. This calls -// docker to allocate a free port in the host and prevent conflicts. -func (d *Docker) FindPort(sandboxPort int) (int, error) { - format := fmt.Sprintf(`{{ (index (index .NetworkSettings.Ports "%d/tcp") 0).HostPort }}`, sandboxPort) - out, err := testutil.Command(d.logger, "docker", "inspect", "-f", format, d.Name).CombinedOutput() - if err != nil { - return -1, fmt.Errorf("error retrieving port: %v", err) - } - port, err := strconv.Atoi(strings.TrimSuffix(string(out), "\n")) - if err != nil { - return -1, fmt.Errorf("error parsing port %q: %v", out, err) - } - return port, nil -} - -// FindIP returns the IP address of the container. -func (d *Docker) FindIP() (net.IP, error) { - const format = `{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}` - out, err := testutil.Command(d.logger, "docker", "inspect", "-f", format, d.Name).CombinedOutput() - if err != nil { - return net.IP{}, fmt.Errorf("error retrieving IP: %v", err) - } - ip := net.ParseIP(strings.TrimSpace(string(out))) - if ip == nil { - return net.IP{}, fmt.Errorf("invalid IP: %q", string(out)) - } - return ip, nil -} - -// A NetworkInterface is container's network interface information. -type NetworkInterface struct { - IPv4 net.IP - MAC net.HardwareAddr -} - -// ListNetworks returns the network interfaces of the container, keyed by -// Docker network name. -func (d *Docker) ListNetworks() (map[string]NetworkInterface, error) { - const format = `{{json .NetworkSettings.Networks}}` - out, err := testutil.Command(d.logger, "docker", "inspect", "-f", format, d.Name).CombinedOutput() - if err != nil { - return nil, fmt.Errorf("error network interfaces: %q: %w", string(out), err) - } - - networks := map[string]map[string]string{} - if err := json.Unmarshal(out, &networks); err != nil { - return nil, fmt.Errorf("error decoding network interfaces: %w", err) - } - - interfaces := map[string]NetworkInterface{} - for name, iface := range networks { - var netface NetworkInterface - - rawIP := strings.TrimSpace(iface["IPAddress"]) - if rawIP != "" { - ip := net.ParseIP(rawIP) - if ip == nil { - return nil, fmt.Errorf("invalid IP: %q", rawIP) - } - // Docker's IPAddress field is IPv4. The IPv6 address - // is stored in the GlobalIPv6Address field. - netface.IPv4 = ip - } - - rawMAC := strings.TrimSpace(iface["MacAddress"]) - if rawMAC != "" { - mac, err := net.ParseMAC(rawMAC) - if err != nil { - return nil, fmt.Errorf("invalid MAC: %q: %w", rawMAC, err) - } - netface.MAC = mac - } - - interfaces[name] = netface - } - - return interfaces, nil -} - -// SandboxPid returns the PID to the sandbox process. -func (d *Docker) SandboxPid() (int, error) { - out, err := testutil.Command(d.logger, "docker", "inspect", "-f={{.State.Pid}}", d.Name).CombinedOutput() - if err != nil { - return -1, fmt.Errorf("error retrieving pid: %v", err) - } - pid, err := strconv.Atoi(strings.TrimSuffix(string(out), "\n")) - if err != nil { - return -1, fmt.Errorf("error parsing pid %q: %v", out, err) - } - return pid, nil -} - -// ID returns the container ID. -func (d *Docker) ID() (string, error) { - out, err := testutil.Command(d.logger, "docker", "inspect", "-f={{.Id}}", d.Name).CombinedOutput() - if err != nil { - return "", fmt.Errorf("error retrieving ID: %v", err) - } - return strings.TrimSpace(string(out)), nil -} - -// Wait waits for container to exit, up to the given timeout. Returns error if -// wait fails or timeout is hit. Returns the application return code otherwise. -// Note that the application may have failed even if err == nil, always check -// the exit code. -func (d *Docker) Wait(timeout time.Duration) (syscall.WaitStatus, error) { - timeoutChan := time.After(timeout) - waitChan := make(chan (syscall.WaitStatus)) - errChan := make(chan (error)) - - go func() { - out, err := testutil.Command(d.logger, "docker", "wait", d.Name).CombinedOutput() - if err != nil { - errChan <- fmt.Errorf("error waiting for container %q: %v", d.Name, err) - } - exit, err := strconv.Atoi(strings.TrimSuffix(string(out), "\n")) - if err != nil { - errChan <- fmt.Errorf("error parsing exit code %q: %v", out, err) - } - waitChan <- syscall.WaitStatus(uint32(exit)) - }() - - select { - case ws := <-waitChan: - return ws, nil - case err := <-errChan: - return syscall.WaitStatus(1), err - case <-timeoutChan: - return syscall.WaitStatus(1), fmt.Errorf("timeout waiting for container %q", d.Name) - } -} - -// WaitForOutput calls 'docker logs' to retrieve containers output and searches -// for the given pattern. -func (d *Docker) WaitForOutput(pattern string, timeout time.Duration) (string, error) { - matches, err := d.WaitForOutputSubmatch(pattern, timeout) - if err != nil { - return "", err - } - if len(matches) == 0 { - return "", nil - } - return matches[0], nil -} - -// WaitForOutputSubmatch calls 'docker logs' to retrieve containers output and -// searches for the given pattern. It returns any regexp submatches as well. -func (d *Docker) WaitForOutputSubmatch(pattern string, timeout time.Duration) ([]string, error) { - re := regexp.MustCompile(pattern) - var ( - lastOut string - stopped bool - ) - for exp := time.Now().Add(timeout); time.Now().Before(exp); { - out, err := d.Logs() - if err != nil { - return nil, err - } - if out != lastOut { - if lastOut == "" { - d.logger.Logf("output (start): %s", out) - } else if strings.HasPrefix(out, lastOut) { - d.logger.Logf("output (contn): %s", out[len(lastOut):]) - } else { - d.logger.Logf("output (trunc): %s", out) - } - lastOut = out // Save for future. - if matches := re.FindStringSubmatch(lastOut); matches != nil { - return matches, nil // Success! - } - } else if stopped { - // The sandbox stopped and we looked at the - // logs at least once since determining that. - return nil, fmt.Errorf("no longer running: %v", err) - } else if pid, err := d.SandboxPid(); pid == 0 || err != nil { - // The sandbox may have stopped, but it's - // possible that it has emitted the terminal - // line between the last call to Logs and here. - stopped = true - } - time.Sleep(100 * time.Millisecond) - } - return nil, fmt.Errorf("timeout waiting for output %q: %s", re.String(), lastOut) -} diff --git a/pkg/test/dockerutil/exec.go b/pkg/test/dockerutil/exec.go new file mode 100644 index 000000000..921d1da9e --- /dev/null +++ b/pkg/test/dockerutil/exec.go @@ -0,0 +1,194 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dockerutil + +import ( + "bytes" + "context" + "fmt" + "time" + + "github.com/docker/docker/api/types" + "github.com/docker/docker/pkg/stdcopy" +) + +// ExecOpts holds arguments for Exec calls. +type ExecOpts struct { + // Env are additional environment variables. + Env []string + + // Privileged enables privileged mode. + Privileged bool + + // User is the user to use. + User string + + // Enables Tty and stdin for the created process. + UseTTY bool + + // WorkDir is the working directory of the process. + WorkDir string +} + +// Exec creates a process inside the container. +func (c *Container) Exec(ctx context.Context, opts ExecOpts, args ...string) (string, error) { + p, err := c.doExec(ctx, opts, args) + if err != nil { + return "", err + } + + if exitStatus, err := p.WaitExitStatus(ctx); err != nil { + return "", err + } else if exitStatus != 0 { + out, _ := p.Logs() + return out, fmt.Errorf("process terminated with status: %d", exitStatus) + } + + return p.Logs() +} + +// ExecProcess creates a process inside the container and returns a process struct +// for the caller to use. +func (c *Container) ExecProcess(ctx context.Context, opts ExecOpts, args ...string) (Process, error) { + return c.doExec(ctx, opts, args) +} + +func (c *Container) doExec(ctx context.Context, r ExecOpts, args []string) (Process, error) { + config := c.execConfig(r, args) + resp, err := c.client.ContainerExecCreate(ctx, c.id, config) + if err != nil { + return Process{}, fmt.Errorf("exec create failed with err: %v", err) + } + + hijack, err := c.client.ContainerExecAttach(ctx, resp.ID, types.ExecStartCheck{}) + if err != nil { + return Process{}, fmt.Errorf("exec attach failed with err: %v", err) + } + + if err := c.client.ContainerExecStart(ctx, resp.ID, types.ExecStartCheck{}); err != nil { + hijack.Close() + return Process{}, fmt.Errorf("exec start failed with err: %v", err) + } + + return Process{ + container: c, + execid: resp.ID, + conn: hijack, + }, nil + +} + +func (c *Container) execConfig(r ExecOpts, cmd []string) types.ExecConfig { + env := append(r.Env, fmt.Sprintf("RUNSC_TEST_NAME=%s", c.Name)) + return types.ExecConfig{ + AttachStdin: r.UseTTY, + AttachStderr: true, + AttachStdout: true, + Cmd: cmd, + Privileged: r.Privileged, + WorkingDir: r.WorkDir, + Env: env, + Tty: r.UseTTY, + User: r.User, + } + +} + +// Process represents a containerized process. +type Process struct { + container *Container + execid string + conn types.HijackedResponse +} + +// Write writes buf to the process's stdin. +func (p *Process) Write(timeout time.Duration, buf []byte) (int, error) { + p.conn.Conn.SetDeadline(time.Now().Add(timeout)) + return p.conn.Conn.Write(buf) +} + +// Read returns process's stdout and stderr. +func (p *Process) Read() (string, string, error) { + var stdout, stderr bytes.Buffer + if err := p.read(&stdout, &stderr); err != nil { + return "", "", err + } + return stdout.String(), stderr.String(), nil +} + +// Logs returns combined stdout/stderr from the process. +func (p *Process) Logs() (string, error) { + var out bytes.Buffer + if err := p.read(&out, &out); err != nil { + return "", err + } + return out.String(), nil +} + +func (p *Process) read(stdout, stderr *bytes.Buffer) error { + _, err := stdcopy.StdCopy(stdout, stderr, p.conn.Reader) + return err +} + +// ExitCode returns the process's exit code. +func (p *Process) ExitCode(ctx context.Context) (int, error) { + _, exitCode, err := p.runningExitCode(ctx) + return exitCode, err +} + +// IsRunning checks if the process is running. +func (p *Process) IsRunning(ctx context.Context) (bool, error) { + running, _, err := p.runningExitCode(ctx) + return running, err +} + +// WaitExitStatus until process completes and returns exit status. +func (p *Process) WaitExitStatus(ctx context.Context) (int, error) { + waitChan := make(chan (int)) + errChan := make(chan (error)) + + go func() { + for { + running, exitcode, err := p.runningExitCode(ctx) + if err != nil { + errChan <- fmt.Errorf("error waiting process %s: container %v", p.execid, p.container.Name) + } + if !running { + waitChan <- exitcode + } + time.Sleep(time.Millisecond * 500) + } + }() + + select { + case ws := <-waitChan: + return ws, nil + case err := <-errChan: + return -1, err + } +} + +// runningExitCode collects if the process is running and the exit code. +// The exit code is only valid if the process has exited. +func (p *Process) runningExitCode(ctx context.Context) (bool, int, error) { + // If execid is not empty, this is a execed process. + if p.execid != "" { + status, err := p.container.client.ContainerExecInspect(ctx, p.execid) + return status.Running, status.ExitCode, err + } + // else this is the root process. + status, err := p.container.Status(ctx) + return status.Running, status.ExitCode, err +} diff --git a/pkg/test/dockerutil/network.go b/pkg/test/dockerutil/network.go new file mode 100644 index 000000000..047091e75 --- /dev/null +++ b/pkg/test/dockerutil/network.go @@ -0,0 +1,113 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dockerutil + +import ( + "context" + "net" + + "github.com/docker/docker/api/types" + "github.com/docker/docker/api/types/network" + "github.com/docker/docker/client" + "gvisor.dev/gvisor/pkg/test/testutil" +) + +// Network is a docker network. +type Network struct { + client *client.Client + id string + logger testutil.Logger + Name string + containers []*Container + Subnet *net.IPNet +} + +// NewNetwork sets up the struct for a Docker network. Names of networks +// will be unique. +func NewNetwork(ctx context.Context, logger testutil.Logger) *Network { + client, err := client.NewClientWithOpts(client.FromEnv) + if err != nil { + logger.Logf("create client failed with: %v", err) + return nil + } + client.NegotiateAPIVersion(ctx) + + return &Network{ + logger: logger, + Name: testutil.RandomID(logger.Name()), + client: client, + } +} + +func (n *Network) networkCreate() types.NetworkCreate { + + var subnet string + if n.Subnet != nil { + subnet = n.Subnet.String() + } + + ipam := network.IPAM{ + Config: []network.IPAMConfig{{ + Subnet: subnet, + }}, + } + + return types.NetworkCreate{ + CheckDuplicate: true, + IPAM: &ipam, + } +} + +// Create is analogous to 'docker network create'. +func (n *Network) Create(ctx context.Context) error { + + opts := n.networkCreate() + resp, err := n.client.NetworkCreate(ctx, n.Name, opts) + if err != nil { + return err + } + n.id = resp.ID + return nil +} + +// Connect is analogous to 'docker network connect' with the arguments provided. +func (n *Network) Connect(ctx context.Context, container *Container, ipv4, ipv6 string) error { + settings := network.EndpointSettings{ + IPAMConfig: &network.EndpointIPAMConfig{ + IPv4Address: ipv4, + IPv6Address: ipv6, + }, + } + err := n.client.NetworkConnect(ctx, n.id, container.id, &settings) + if err == nil { + n.containers = append(n.containers, container) + } + return err +} + +// Inspect returns this network's info. +func (n *Network) Inspect(ctx context.Context) (types.NetworkResource, error) { + return n.client.NetworkInspect(ctx, n.id, types.NetworkInspectOptions{Verbose: true}) +} + +// Cleanup cleans up the docker network and all the containers attached to it. +func (n *Network) Cleanup(ctx context.Context) error { + for _, c := range n.containers { + c.CleanUp(ctx) + } + n.containers = nil + + return n.client.NetworkRemove(ctx, n.id) +} diff --git a/test/e2e/BUILD b/test/e2e/BUILD index 44cce0e3b..29a84f184 100644 --- a/test/e2e/BUILD +++ b/test/e2e/BUILD @@ -23,6 +23,7 @@ go_test( "//pkg/test/dockerutil", "//pkg/test/testutil", "//runsc/specutils", + "@com_github_docker_docker//api/types/mount:go_default_library", ], ) diff --git a/test/e2e/exec_test.go b/test/e2e/exec_test.go index 6a63b1232..b47df447c 100644 --- a/test/e2e/exec_test.go +++ b/test/e2e/exec_test.go @@ -22,12 +22,10 @@ package integration import ( + "context" "fmt" - "os" - "os/exec" "strconv" "strings" - "syscall" "testing" "time" @@ -39,18 +37,19 @@ import ( // Test that exec uses the exact same capability set as the container. func TestExecCapabilities(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // Start the container. - if err := d.Spawn(dockerutil.RunOpts{ + if err := d.Spawn(ctx, dockerutil.RunOpts{ Image: "basic/alpine", }, "sh", "-c", "cat /proc/self/status; sleep 100"); err != nil { t.Fatalf("docker run failed: %v", err) } // Check that capability. - matches, err := d.WaitForOutputSubmatch("CapEff:\t([0-9a-f]+)\n", 5*time.Second) + matches, err := d.WaitForOutputSubmatch(ctx, "CapEff:\t([0-9a-f]+)\n", 5*time.Second) if err != nil { t.Fatalf("WaitForOutputSubmatch() timeout: %v", err) } @@ -61,7 +60,7 @@ func TestExecCapabilities(t *testing.T) { t.Log("Root capabilities:", want) // Now check that exec'd process capabilities match the root. - got, err := d.Exec(dockerutil.RunOpts{}, "grep", "CapEff:", "/proc/self/status") + got, err := d.Exec(ctx, dockerutil.ExecOpts{}, "grep", "CapEff:", "/proc/self/status") if err != nil { t.Fatalf("docker exec failed: %v", err) } @@ -74,11 +73,12 @@ func TestExecCapabilities(t *testing.T) { // Test that 'exec --privileged' adds all capabilities, except for CAP_NET_RAW // which is removed from the container when --net-raw=false. func TestExecPrivileged(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // Start the container with all capabilities dropped. - if err := d.Spawn(dockerutil.RunOpts{ + if err := d.Spawn(ctx, dockerutil.RunOpts{ Image: "basic/alpine", CapDrop: []string{"all"}, }, "sh", "-c", "cat /proc/self/status; sleep 100"); err != nil { @@ -86,7 +86,7 @@ func TestExecPrivileged(t *testing.T) { } // Check that all capabilities where dropped from container. - matches, err := d.WaitForOutputSubmatch("CapEff:\t([0-9a-f]+)\n", 5*time.Second) + matches, err := d.WaitForOutputSubmatch(ctx, "CapEff:\t([0-9a-f]+)\n", 5*time.Second) if err != nil { t.Fatalf("WaitForOutputSubmatch() timeout: %v", err) } @@ -104,7 +104,7 @@ func TestExecPrivileged(t *testing.T) { // Check that 'exec --privileged' adds all capabilities, except for // CAP_NET_RAW. - got, err := d.Exec(dockerutil.RunOpts{ + got, err := d.Exec(ctx, dockerutil.ExecOpts{ Privileged: true, }, "grep", "CapEff:", "/proc/self/status") if err != nil { @@ -118,76 +118,59 @@ func TestExecPrivileged(t *testing.T) { } func TestExecJobControl(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // Start the container. - if err := d.Spawn(dockerutil.RunOpts{ + if err := d.Spawn(ctx, dockerutil.RunOpts{ Image: "basic/alpine", }, "sleep", "1000"); err != nil { t.Fatalf("docker run failed: %v", err) } - // Exec 'sh' with an attached pty. - if _, err := d.Exec(dockerutil.RunOpts{ - Pty: func(cmd *exec.Cmd, ptmx *os.File) { - // Call "sleep 100 | cat" in the shell. We pipe to cat - // so that there will be two processes in the - // foreground process group. - if _, err := ptmx.Write([]byte("sleep 100 | cat\n")); err != nil { - t.Fatalf("error writing to pty: %v", err) - } - - // Give shell a few seconds to start executing the sleep. - time.Sleep(2 * time.Second) - - // Send a ^C to the pty, which should kill sleep and - // cat, but not the shell. \x03 is ASCII "end of - // text", which is the same as ^C. - if _, err := ptmx.Write([]byte{'\x03'}); err != nil { - t.Fatalf("error writing to pty: %v", err) - } - - // The shell should still be alive at this point. Sleep - // should have exited with code 2+128=130. We'll exit - // with 10 plus that number, so that we can be sure - // that the shell did not get signalled. - if _, err := ptmx.Write([]byte("exit $(expr $? + 10)\n")); err != nil { - t.Fatalf("error writing to pty: %v", err) - } - - // Exec process should exit with code 10+130=140. - ps, err := cmd.Process.Wait() - if err != nil { - t.Fatalf("error waiting for exec process: %v", err) - } - ws := ps.Sys().(syscall.WaitStatus) - if !ws.Exited() { - t.Errorf("ws.Exited got false, want true") - } - if got, want := ws.ExitStatus(), 140; got != want { - t.Errorf("ws.ExitedStatus got %d, want %d", got, want) - } - }, - }, "sh"); err != nil { + p, err := d.ExecProcess(ctx, dockerutil.ExecOpts{UseTTY: true}, "/bin/sh") + if err != nil { t.Fatalf("docker exec failed: %v", err) } + + if _, err = p.Write(time.Second, []byte("sleep 100 | cat\n")); err != nil { + t.Fatalf("error exit: %v", err) + } + time.Sleep(time.Second) + + if _, err = p.Write(time.Second, []byte{0x03}); err != nil { + t.Fatalf("error exit: %v", err) + } + + if _, err = p.Write(time.Second, []byte("exit $(expr $? + 10)\n")); err != nil { + t.Fatalf("error exit: %v", err) + } + + want := 140 + got, err := p.WaitExitStatus(ctx) + if err != nil { + t.Fatalf("wait for exit failed with: %v", err) + } else if got != want { + t.Fatalf("wait for exit returned: %d want: %d", got, want) + } } // Test that failure to exec returns proper error message. func TestExecError(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // Start the container. - if err := d.Spawn(dockerutil.RunOpts{ + if err := d.Spawn(ctx, dockerutil.RunOpts{ Image: "basic/alpine", }, "sleep", "1000"); err != nil { t.Fatalf("docker run failed: %v", err) } // Attempt to exec a binary that doesn't exist. - out, err := d.Exec(dockerutil.RunOpts{}, "no_can_find") + out, err := d.Exec(ctx, dockerutil.ExecOpts{}, "no_can_find") if err == nil { t.Fatalf("docker exec didn't fail") } @@ -198,11 +181,12 @@ func TestExecError(t *testing.T) { // Test that exec inherits environment from run. func TestExecEnv(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // Start the container with env FOO=BAR. - if err := d.Spawn(dockerutil.RunOpts{ + if err := d.Spawn(ctx, dockerutil.RunOpts{ Image: "basic/alpine", Env: []string{"FOO=BAR"}, }, "sleep", "1000"); err != nil { @@ -210,7 +194,7 @@ func TestExecEnv(t *testing.T) { } // Exec "echo $FOO". - got, err := d.Exec(dockerutil.RunOpts{}, "/bin/sh", "-c", "echo $FOO") + got, err := d.Exec(ctx, dockerutil.ExecOpts{}, "/bin/sh", "-c", "echo $FOO") if err != nil { t.Fatalf("docker exec failed: %v", err) } @@ -222,11 +206,12 @@ func TestExecEnv(t *testing.T) { // TestRunEnvHasHome tests that run always has HOME environment set. func TestRunEnvHasHome(t *testing.T) { // Base alpine image does not have any environment variables set. - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // Exec "echo $HOME". The 'bin' user's home dir is '/bin'. - got, err := d.Run(dockerutil.RunOpts{ + got, err := d.Run(ctx, dockerutil.RunOpts{ Image: "basic/alpine", User: "bin", }, "/bin/sh", "-c", "echo $HOME") @@ -243,17 +228,18 @@ func TestRunEnvHasHome(t *testing.T) { // Test that exec always has HOME environment set, even when not set in run. func TestExecEnvHasHome(t *testing.T) { // Base alpine image does not have any environment variables set. - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) - if err := d.Spawn(dockerutil.RunOpts{ + if err := d.Spawn(ctx, dockerutil.RunOpts{ Image: "basic/alpine", }, "sleep", "1000"); err != nil { t.Fatalf("docker run failed: %v", err) } // Exec "echo $HOME", and expect to see "/root". - got, err := d.Exec(dockerutil.RunOpts{}, "/bin/sh", "-c", "echo $HOME") + got, err := d.Exec(ctx, dockerutil.ExecOpts{}, "/bin/sh", "-c", "echo $HOME") if err != nil { t.Fatalf("docker exec failed: %v", err) } @@ -265,12 +251,12 @@ func TestExecEnvHasHome(t *testing.T) { newUID := 1234 newHome := "/foo/bar" cmd := fmt.Sprintf("mkdir -p -m 777 %q && adduser foo -D -u %d -h %q", newHome, newUID, newHome) - if _, err := d.Exec(dockerutil.RunOpts{}, "/bin/sh", "-c", cmd); err != nil { + if _, err := d.Exec(ctx, dockerutil.ExecOpts{}, "/bin/sh", "-c", cmd); err != nil { t.Fatalf("docker exec failed: %v", err) } // Execute the same as the new user and expect newHome. - got, err = d.Exec(dockerutil.RunOpts{ + got, err = d.Exec(ctx, dockerutil.ExecOpts{ User: strconv.Itoa(newUID), }, "/bin/sh", "-c", "echo $HOME") if err != nil { diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go index 60e739c6a..5a9455b33 100644 --- a/test/e2e/integration_test.go +++ b/test/e2e/integration_test.go @@ -22,20 +22,20 @@ package integration import ( + "context" "flag" "fmt" "io/ioutil" "net" "net/http" "os" - "os/exec" "path/filepath" "strconv" "strings" - "syscall" "testing" "time" + "github.com/docker/docker/api/types/mount" "gvisor.dev/gvisor/pkg/test/dockerutil" "gvisor.dev/gvisor/pkg/test/testutil" ) @@ -56,22 +56,23 @@ func httpRequestSucceeds(client http.Client, server string, port int) error { // TestLifeCycle tests a basic Create/Start/Stop docker container life cycle. func TestLifeCycle(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // Start the container. - if err := d.Create(dockerutil.RunOpts{ + if err := d.Create(ctx, dockerutil.RunOpts{ Image: "basic/nginx", Ports: []int{80}, }); err != nil { t.Fatalf("docker create failed: %v", err) } - if err := d.Start(); err != nil { + if err := d.Start(ctx); err != nil { t.Fatalf("docker start failed: %v", err) } // Test that container is working. - port, err := d.FindPort(80) + port, err := d.FindPort(ctx, 80) if err != nil { t.Fatalf("docker.FindPort(80) failed: %v", err) } @@ -83,10 +84,10 @@ func TestLifeCycle(t *testing.T) { t.Errorf("http request failed: %v", err) } - if err := d.Stop(); err != nil { + if err := d.Stop(ctx); err != nil { t.Fatalf("docker stop failed: %v", err) } - if err := d.Remove(); err != nil { + if err := d.Remove(ctx); err != nil { t.Fatalf("docker rm failed: %v", err) } } @@ -96,11 +97,12 @@ func TestPauseResume(t *testing.T) { t.Skip("Checkpoint is not supported.") } - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // Start the container. - if err := d.Spawn(dockerutil.RunOpts{ + if err := d.Spawn(ctx, dockerutil.RunOpts{ Image: "basic/python", Ports: []int{8080}, // See Dockerfile. }); err != nil { @@ -108,7 +110,7 @@ func TestPauseResume(t *testing.T) { } // Find where port 8080 is mapped to. - port, err := d.FindPort(8080) + port, err := d.FindPort(ctx, 8080) if err != nil { t.Fatalf("docker.FindPort(8080) failed: %v", err) } @@ -124,7 +126,7 @@ func TestPauseResume(t *testing.T) { t.Error("http request failed:", err) } - if err := d.Pause(); err != nil { + if err := d.Pause(ctx); err != nil { t.Fatalf("docker pause failed: %v", err) } @@ -140,7 +142,7 @@ func TestPauseResume(t *testing.T) { t.Errorf("http req got unexpected error %v", v) } - if err := d.Unpause(); err != nil { + if err := d.Unpause(ctx); err != nil { t.Fatalf("docker unpause failed: %v", err) } @@ -160,11 +162,12 @@ func TestCheckpointRestore(t *testing.T) { t.Skip("Pause/resume is not supported.") } - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // Start the container. - if err := d.Spawn(dockerutil.RunOpts{ + if err := d.Spawn(ctx, dockerutil.RunOpts{ Image: "basic/python", Ports: []int{8080}, // See Dockerfile. }); err != nil { @@ -172,20 +175,20 @@ func TestCheckpointRestore(t *testing.T) { } // Create a snapshot. - if err := d.Checkpoint("test"); err != nil { + if err := d.Checkpoint(ctx, "test"); err != nil { t.Fatalf("docker checkpoint failed: %v", err) } - if _, err := d.Wait(30 * time.Second); err != nil { + if err := d.WaitTimeout(ctx, 30*time.Second); err != nil { t.Fatalf("wait failed: %v", err) } // TODO(b/143498576): Remove Poll after github.com/moby/moby/issues/38963 is fixed. - if err := testutil.Poll(func() error { return d.Restore("test") }, 15*time.Second); err != nil { + if err := testutil.Poll(func() error { return d.Restore(ctx, "test") }, 15*time.Second); err != nil { t.Fatalf("docker restore failed: %v", err) } // Find where port 8080 is mapped to. - port, err := d.FindPort(8080) + port, err := d.FindPort(ctx, 8080) if err != nil { t.Fatalf("docker.FindPort(8080) failed: %v", err) } @@ -204,26 +207,27 @@ func TestCheckpointRestore(t *testing.T) { // Create client and server that talk to each other using the local IP. func TestConnectToSelf(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // Creates server that replies "server" and exists. Sleeps at the end because // 'docker exec' gets killed if the init process exists before it can finish. - if err := d.Spawn(dockerutil.RunOpts{ + if err := d.Spawn(ctx, dockerutil.RunOpts{ Image: "basic/ubuntu", }, "/bin/sh", "-c", "echo server | nc -l -p 8080 && sleep 1"); err != nil { t.Fatalf("docker run failed: %v", err) } // Finds IP address for host. - ip, err := d.Exec(dockerutil.RunOpts{}, "/bin/sh", "-c", "cat /etc/hosts | grep ${HOSTNAME} | awk '{print $1}'") + ip, err := d.Exec(ctx, dockerutil.ExecOpts{}, "/bin/sh", "-c", "cat /etc/hosts | grep ${HOSTNAME} | awk '{print $1}'") if err != nil { t.Fatalf("docker exec failed: %v", err) } ip = strings.TrimRight(ip, "\n") // Runs client that sends "client" to the server and exits. - reply, err := d.Exec(dockerutil.RunOpts{}, "/bin/sh", "-c", fmt.Sprintf("echo client | nc %s 8080", ip)) + reply, err := d.Exec(ctx, dockerutil.ExecOpts{}, "/bin/sh", "-c", fmt.Sprintf("echo client | nc %s 8080", ip)) if err != nil { t.Fatalf("docker exec failed: %v", err) } @@ -232,21 +236,22 @@ func TestConnectToSelf(t *testing.T) { if want := "server\n"; reply != want { t.Errorf("Error on server, want: %q, got: %q", want, reply) } - if _, err := d.WaitForOutput("^client\n$", 1*time.Second); err != nil { + if _, err := d.WaitForOutput(ctx, "^client\n$", 1*time.Second); err != nil { t.Fatalf("docker.WaitForOutput(client) timeout: %v", err) } } func TestMemLimit(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // N.B. Because the size of the memory file may grow in large chunks, // there is a minimum threshold of 1GB for the MemTotal figure. - allocMemory := 1024 * 1024 - out, err := d.Run(dockerutil.RunOpts{ + allocMemory := 1024 * 1024 // In kb. + out, err := d.Run(ctx, dockerutil.RunOpts{ Image: "basic/alpine", - Memory: allocMemory, // In kB. + Memory: allocMemory * 1024, // In bytes. }, "sh", "-c", "cat /proc/meminfo | grep MemTotal: | awk '{print $2}'") if err != nil { t.Fatalf("docker run failed: %v", err) @@ -272,13 +277,14 @@ func TestMemLimit(t *testing.T) { } func TestNumCPU(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // Read how many cores are in the container. - out, err := d.Run(dockerutil.RunOpts{ - Image: "basic/alpine", - Extra: []string{"--cpuset-cpus=0"}, + out, err := d.Run(ctx, dockerutil.RunOpts{ + Image: "basic/alpine", + CpusetCpus: "0", }, "sh", "-c", "cat /proc/cpuinfo | grep 'processor.*:' | wc -l") if err != nil { t.Fatalf("docker run failed: %v", err) @@ -296,48 +302,34 @@ func TestNumCPU(t *testing.T) { // TestJobControl tests that job control characters are handled properly. func TestJobControl(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // Start the container with an attached PTY. - if _, err := d.Run(dockerutil.RunOpts{ + p, err := d.SpawnProcess(ctx, dockerutil.RunOpts{ Image: "basic/alpine", - Pty: func(_ *exec.Cmd, ptmx *os.File) { - // Call "sleep 100" in the shell. - if _, err := ptmx.Write([]byte("sleep 100\n")); err != nil { - t.Fatalf("error writing to pty: %v", err) - } - - // Give shell a few seconds to start executing the sleep. - time.Sleep(2 * time.Second) + }, "sh", "-c", "sleep 100 | cat") + if err != nil { + t.Fatalf("docker run failed: %v", err) + } + // Give shell a few seconds to start executing the sleep. + time.Sleep(2 * time.Second) - // Send a ^C to the pty, which should kill sleep, but - // not the shell. \x03 is ASCII "end of text", which - // is the same as ^C. - if _, err := ptmx.Write([]byte{'\x03'}); err != nil { - t.Fatalf("error writing to pty: %v", err) - } + if _, err := p.Write(time.Second, []byte{0x03}); err != nil { + t.Fatalf("error exit: %v", err) + } - // The shell should still be alive at this point. Sleep - // should have exited with code 2+128=130. We'll exit - // with 10 plus that number, so that we can be sure - // that the shell did not get signalled. - if _, err := ptmx.Write([]byte("exit $(expr $? + 10)\n")); err != nil { - t.Fatalf("error writing to pty: %v", err) - } - }, - }, "sh"); err != nil { - t.Fatalf("docker run failed: %v", err) + if err := d.WaitTimeout(ctx, 3*time.Second); err != nil { + t.Fatalf("WaitTimeout failed: %v", err) } - // Wait for the container to exit. - got, err := d.Wait(5 * time.Second) + want := 130 + got, err := p.WaitExitStatus(ctx) if err != nil { - t.Fatalf("error getting exit code: %v", err) - } - // Container should exit with code 10+130=140. - if want := syscall.WaitStatus(140); got != want { - t.Errorf("container exited with code %d want %d", got, want) + t.Fatalf("wait for exit failed with: %v", err) + } else if got != want { + t.Fatalf("got: %d want: %d", got, want) } } @@ -356,15 +348,16 @@ func TestWorkingDirCreation(t *testing.T) { name += "-readonly" } t.Run(name, func(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) opts := dockerutil.RunOpts{ Image: "basic/alpine", WorkDir: tc.workingDir, ReadOnly: readonly, } - got, err := d.Run(opts, "sh", "-c", "echo ${PWD}") + got, err := d.Run(ctx, opts, "sh", "-c", "echo ${PWD}") if err != nil { t.Fatalf("docker run failed: %v", err) } @@ -378,11 +371,12 @@ func TestWorkingDirCreation(t *testing.T) { // TestTmpFile checks that files inside '/tmp' are not overridden. func TestTmpFile(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) opts := dockerutil.RunOpts{Image: "tmpfile"} - got, err := d.Run(opts, "cat", "/tmp/foo/file.txt") + got, err := d.Run(ctx, opts, "cat", "/tmp/foo/file.txt") if err != nil { t.Fatalf("docker run failed: %v", err) } @@ -393,6 +387,7 @@ func TestTmpFile(t *testing.T) { // TestTmpMount checks that mounts inside '/tmp' are not overridden. func TestTmpMount(t *testing.T) { + ctx := context.Background() dir, err := ioutil.TempDir(testutil.TmpDir(), "tmp-mount") if err != nil { t.Fatalf("TempDir(): %v", err) @@ -401,19 +396,20 @@ func TestTmpMount(t *testing.T) { if err := ioutil.WriteFile(filepath.Join(dir, "file.txt"), []byte("123"), 0666); err != nil { t.Fatalf("WriteFile(): %v", err) } - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) opts := dockerutil.RunOpts{ Image: "basic/alpine", - Mounts: []dockerutil.Mount{ + Mounts: []mount.Mount{ { + Type: mount.TypeBind, Source: dir, Target: "/tmp/foo", }, }, } - got, err := d.Run(opts, "cat", "/tmp/foo/file.txt") + got, err := d.Run(ctx, opts, "cat", "/tmp/foo/file.txt") if err != nil { t.Fatalf("docker run failed: %v", err) } @@ -426,10 +422,11 @@ func TestTmpMount(t *testing.T) { // runsc to hide the incoherence of FDs opened before and after overlayfs // copy-up on the host. func TestHostOverlayfsCopyUp(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) - if _, err := d.Run(dockerutil.RunOpts{ + if _, err := d.Run(ctx, dockerutil.RunOpts{ Image: "hostoverlaytest", WorkDir: "/root", }, "./test"); err != nil { diff --git a/test/e2e/regression_test.go b/test/e2e/regression_test.go index 327a2174c..70bbe5121 100644 --- a/test/e2e/regression_test.go +++ b/test/e2e/regression_test.go @@ -15,6 +15,7 @@ package integration import ( + "context" "strings" "testing" @@ -27,11 +28,12 @@ import ( // Prerequisite: the directory where the socket file is created must not have // been open for write before bind(2) is called. func TestBindOverlay(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // Run the container. - got, err := d.Run(dockerutil.RunOpts{ + got, err := d.Run(ctx, dockerutil.RunOpts{ Image: "basic/ubuntu", }, "bash", "-c", "nc -l -U /var/run/sock & p=$! && sleep 1 && echo foobar-asdf | nc -U /var/run/sock && wait $p") if err != nil { diff --git a/test/image/image_test.go b/test/image/image_test.go index 3e4321480..8aa78035f 100644 --- a/test/image/image_test.go +++ b/test/image/image_test.go @@ -22,6 +22,7 @@ package image import ( + "context" "flag" "fmt" "io/ioutil" @@ -37,11 +38,12 @@ import ( ) func TestHelloWorld(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // Run the basic container. - out, err := d.Run(dockerutil.RunOpts{ + out, err := d.Run(ctx, dockerutil.RunOpts{ Image: "basic/alpine", }, "echo", "Hello world!") if err != nil { @@ -107,8 +109,9 @@ func testHTTPServer(t *testing.T, port int) { } func TestHttpd(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // Start the container. opts := dockerutil.RunOpts{ @@ -116,12 +119,12 @@ func TestHttpd(t *testing.T) { Ports: []int{80}, } d.CopyFiles(&opts, "/usr/local/apache2/htdocs", "test/image/latin10k.txt") - if err := d.Spawn(opts); err != nil { + if err := d.Spawn(ctx, opts); err != nil { t.Fatalf("docker run failed: %v", err) } // Find where port 80 is mapped to. - port, err := d.FindPort(80) + port, err := d.FindPort(ctx, 80) if err != nil { t.Fatalf("FindPort(80) failed: %v", err) } @@ -135,8 +138,9 @@ func TestHttpd(t *testing.T) { } func TestNginx(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // Start the container. opts := dockerutil.RunOpts{ @@ -144,12 +148,12 @@ func TestNginx(t *testing.T) { Ports: []int{80}, } d.CopyFiles(&opts, "/usr/share/nginx/html", "test/image/latin10k.txt") - if err := d.Spawn(opts); err != nil { + if err := d.Spawn(ctx, opts); err != nil { t.Fatalf("docker run failed: %v", err) } // Find where port 80 is mapped to. - port, err := d.FindPort(80) + port, err := d.FindPort(ctx, 80) if err != nil { t.Fatalf("FindPort(80) failed: %v", err) } @@ -163,11 +167,12 @@ func TestNginx(t *testing.T) { } func TestMysql(t *testing.T) { - server := dockerutil.MakeDocker(t) - defer server.CleanUp() + ctx := context.Background() + server := dockerutil.MakeContainer(ctx, t) + defer server.CleanUp(ctx) // Start the container. - if err := server.Spawn(dockerutil.RunOpts{ + if err := server.Spawn(ctx, dockerutil.RunOpts{ Image: "basic/mysql", Env: []string{"MYSQL_ROOT_PASSWORD=foobar123"}, }); err != nil { @@ -175,42 +180,38 @@ func TestMysql(t *testing.T) { } // Wait until it's up and running. - if _, err := server.WaitForOutput("port: 3306 MySQL Community Server", 3*time.Minute); err != nil { + if _, err := server.WaitForOutput(ctx, "port: 3306 MySQL Community Server", 3*time.Minute); err != nil { t.Fatalf("WaitForOutput() timeout: %v", err) } // Generate the client and copy in the SQL payload. - client := dockerutil.MakeDocker(t) - defer client.CleanUp() + client := dockerutil.MakeContainer(ctx, t) + defer client.CleanUp(ctx) // Tell mysql client to connect to the server and execute the file in // verbose mode to verify the output. opts := dockerutil.RunOpts{ Image: "basic/mysql", - Links: []dockerutil.Link{ - { - Source: server, - Target: "mysql", - }, - }, + Links: []string{server.MakeLink("mysql")}, } client.CopyFiles(&opts, "/sql", "test/image/mysql.sql") - if _, err := client.Run(opts, "mysql", "-hmysql", "-uroot", "-pfoobar123", "-v", "-e", "source /sql/mysql.sql"); err != nil { + if _, err := client.Run(ctx, opts, "mysql", "-hmysql", "-uroot", "-pfoobar123", "-v", "-e", "source /sql/mysql.sql"); err != nil { t.Fatalf("docker run failed: %v", err) } // Ensure file executed to the end and shutdown mysql. - if _, err := server.WaitForOutput("mysqld: Shutdown complete", 30*time.Second); err != nil { + if _, err := server.WaitForOutput(ctx, "mysqld: Shutdown complete", 30*time.Second); err != nil { t.Fatalf("WaitForOutput() timeout: %v", err) } } func TestTomcat(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // Start the server. - if err := d.Spawn(dockerutil.RunOpts{ + if err := d.Spawn(ctx, dockerutil.RunOpts{ Image: "basic/tomcat", Ports: []int{8080}, }); err != nil { @@ -218,7 +219,7 @@ func TestTomcat(t *testing.T) { } // Find where port 8080 is mapped to. - port, err := d.FindPort(8080) + port, err := d.FindPort(ctx, 8080) if err != nil { t.Fatalf("FindPort(8080) failed: %v", err) } @@ -240,8 +241,9 @@ func TestTomcat(t *testing.T) { } func TestRuby(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // Execute the ruby workload. opts := dockerutil.RunOpts{ @@ -249,12 +251,12 @@ func TestRuby(t *testing.T) { Ports: []int{8080}, } d.CopyFiles(&opts, "/src", "test/image/ruby.rb", "test/image/ruby.sh") - if err := d.Spawn(opts, "/src/ruby.sh"); err != nil { + if err := d.Spawn(ctx, opts, "/src/ruby.sh"); err != nil { t.Fatalf("docker run failed: %v", err) } // Find where port 8080 is mapped to. - port, err := d.FindPort(8080) + port, err := d.FindPort(ctx, 8080) if err != nil { t.Fatalf("FindPort(8080) failed: %v", err) } @@ -283,20 +285,21 @@ func TestRuby(t *testing.T) { } func TestStdio(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) wantStdout := "hello stdout" wantStderr := "bonjour stderr" cmd := fmt.Sprintf("echo %q; echo %q 1>&2;", wantStdout, wantStderr) - if err := d.Spawn(dockerutil.RunOpts{ + if err := d.Spawn(ctx, dockerutil.RunOpts{ Image: "basic/alpine", }, "/bin/sh", "-c", cmd); err != nil { t.Fatalf("docker run failed: %v", err) } for _, want := range []string{wantStdout, wantStderr} { - if _, err := d.WaitForOutput(want, 5*time.Second); err != nil { + if _, err := d.WaitForOutput(ctx, want, 5*time.Second); err != nil { t.Fatalf("docker didn't get output %q : %v", want, err) } } diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 340f9426e..9dc64f655 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -15,6 +15,7 @@ package iptables import ( + "context" "fmt" "net" "testing" @@ -37,8 +38,9 @@ func singleTest(t *testing.T, test TestCase) { t.Fatalf("no test found with name %q. Has it been registered?", test.Name()) } - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // Create and start the container. opts := dockerutil.RunOpts{ @@ -46,12 +48,12 @@ func singleTest(t *testing.T, test TestCase) { CapAdd: []string{"NET_ADMIN"}, } d.CopyFiles(&opts, "/runner", "test/iptables/runner/runner") - if err := d.Spawn(opts, "/runner/runner", "-name", test.Name()); err != nil { + if err := d.Spawn(ctx, opts, "/runner/runner", "-name", test.Name()); err != nil { t.Fatalf("docker run failed: %v", err) } // Get the container IP. - ip, err := d.FindIP() + ip, err := d.FindIP(ctx) if err != nil { t.Fatalf("failed to get container IP: %v", err) } @@ -69,7 +71,7 @@ func singleTest(t *testing.T, test TestCase) { // Wait for the final statement. This structure has the side effect // that all container logs will appear within the individual test // context. - if _, err := d.WaitForOutput(TerminalStatement, TestTimeout); err != nil { + if _, err := d.WaitForOutput(ctx, TerminalStatement, TestTimeout); err != nil { t.Fatalf("test failed: %v", err) } } diff --git a/test/packetimpact/runner/BUILD b/test/packetimpact/runner/BUILD index 0b68a760a..bad4f0183 100644 --- a/test/packetimpact/runner/BUILD +++ b/test/packetimpact/runner/BUILD @@ -16,5 +16,6 @@ go_test( deps = [ "//pkg/test/dockerutil", "//test/packetimpact/netdevs", + "@com_github_docker_docker//api/types/mount:go_default_library", ], ) diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go index c0a2620de..9290d5112 100644 --- a/test/packetimpact/runner/packetimpact_test.go +++ b/test/packetimpact/runner/packetimpact_test.go @@ -16,6 +16,7 @@ package packetimpact_test import ( + "context" "flag" "fmt" "io/ioutil" @@ -29,6 +30,7 @@ import ( "testing" "time" + "github.com/docker/docker/api/types/mount" "gvisor.dev/gvisor/pkg/test/dockerutil" "gvisor.dev/gvisor/test/packetimpact/netdevs" ) @@ -94,15 +96,16 @@ func TestOne(t *testing.T) { } } dockerutil.EnsureSupportedDockerVersion() + ctx := context.Background() // Create the networks needed for the test. One control network is needed for // the gRPC control packets and one test network on which to transmit the test // packets. - ctrlNet := dockerutil.NewDockerNetwork(logger("ctrlNet")) - testNet := dockerutil.NewDockerNetwork(logger("testNet")) - for _, dn := range []*dockerutil.DockerNetwork{ctrlNet, testNet} { + ctrlNet := dockerutil.NewNetwork(ctx, logger("ctrlNet")) + testNet := dockerutil.NewNetwork(ctx, logger("testNet")) + for _, dn := range []*dockerutil.Network{ctrlNet, testNet} { for { - if err := createDockerNetwork(dn); err != nil { + if err := createDockerNetwork(ctx, dn); err != nil { t.Log("creating docker network:", err) const wait = 100 * time.Millisecond t.Logf("sleeping %s and will try creating docker network again", wait) @@ -113,11 +116,19 @@ func TestOne(t *testing.T) { } break } - defer func(dn *dockerutil.DockerNetwork) { - if err := dn.Cleanup(); err != nil { + defer func(dn *dockerutil.Network) { + if err := dn.Cleanup(ctx); err != nil { t.Errorf("unable to cleanup container %s: %s", dn.Name, err) } }(dn) + // Sanity check. + inspect, err := dn.Inspect(ctx) + if err != nil { + t.Fatalf("failed to inspect network %s: %v", dn.Name, err) + } else if inspect.Name != dn.Name { + t.Fatalf("name mismatch for network want: %s got: %s", dn.Name, inspect.Name) + } + } tmpDir, err := ioutil.TempDir("", "container-output") @@ -128,42 +139,51 @@ func TestOne(t *testing.T) { const testOutputDir = "/tmp/testoutput" - runOpts := dockerutil.RunOpts{ - Image: "packetimpact", - CapAdd: []string{"NET_ADMIN"}, - Extra: []string{"--sysctl", "net.ipv6.conf.all.disable_ipv6=0", "--rm", "-v", tmpDir + ":" + testOutputDir}, - Foreground: true, - } - // Create the Docker container for the DUT. - dut := dockerutil.MakeDocker(logger("dut")) + dut := dockerutil.MakeContainer(ctx, logger("dut")) if *dutPlatform == "linux" { dut.Runtime = "" } + runOpts := dockerutil.RunOpts{ + Image: "packetimpact", + CapAdd: []string{"NET_ADMIN"}, + Mounts: []mount.Mount{mount.Mount{ + Type: mount.TypeBind, + Source: tmpDir, + Target: testOutputDir, + ReadOnly: false, + }}, + } + const containerPosixServerBinary = "/packetimpact/posix_server" dut.CopyFiles(&runOpts, "/packetimpact", "/test/packetimpact/dut/posix_server") - if err := dut.Create(runOpts, containerPosixServerBinary, "--ip=0.0.0.0", "--port="+ctrlPort); err != nil { - t.Fatalf("unable to create container %s: %s", dut.Name, err) + conf, hostconf, _ := dut.ConfigsFrom(runOpts, containerPosixServerBinary, "--ip=0.0.0.0", "--port="+ctrlPort) + hostconf.AutoRemove = true + hostconf.Sysctls = map[string]string{"net.ipv6.conf.all.disable_ipv6": "0"} + + if err := dut.CreateFrom(ctx, conf, hostconf, nil); err != nil { + t.Fatalf("unable to create container %s: %v", dut.Name, err) } - defer dut.CleanUp() + + defer dut.CleanUp(ctx) // Add ctrlNet as eth1 and testNet as eth2. const testNetDev = "eth2" - if err := addNetworks(dut, dutAddr, []*dockerutil.DockerNetwork{ctrlNet, testNet}); err != nil { + if err := addNetworks(ctx, dut, dutAddr, []*dockerutil.Network{ctrlNet, testNet}); err != nil { t.Fatal(err) } - if err := dut.Start(); err != nil { + if err := dut.Start(ctx); err != nil { t.Fatalf("unable to start container %s: %s", dut.Name, err) } - if _, err := dut.WaitForOutput("Server listening.*\n", 60*time.Second); err != nil { + if _, err := dut.WaitForOutput(ctx, "Server listening.*\n", 60*time.Second); err != nil { t.Fatalf("%s on container %s never listened: %s", containerPosixServerBinary, dut.Name, err) } - dutTestDevice, dutDeviceInfo, err := deviceByIP(dut, addressInSubnet(dutAddr, *testNet.Subnet)) + dutTestDevice, dutDeviceInfo, err := deviceByIP(ctx, dut, addressInSubnet(dutAddr, *testNet.Subnet)) if err != nil { t.Fatal(err) } @@ -173,11 +193,11 @@ func TestOne(t *testing.T) { // Netstack as DUT doesn't assign IPv6 addresses automatically so do it if // needed. if remoteIPv6 == nil { - if _, err := dut.Exec(dockerutil.RunOpts{}, "ip", "addr", "add", netdevs.MACToIP(remoteMAC).String(), "scope", "link", "dev", dutTestDevice); err != nil { + if _, err := dut.Exec(ctx, dockerutil.ExecOpts{}, "ip", "addr", "add", netdevs.MACToIP(remoteMAC).String(), "scope", "link", "dev", dutTestDevice); err != nil { t.Fatalf("unable to ip addr add on container %s: %s", dut.Name, err) } // Now try again, to make sure that it worked. - _, dutDeviceInfo, err = deviceByIP(dut, addressInSubnet(dutAddr, *testNet.Subnet)) + _, dutDeviceInfo, err = deviceByIP(ctx, dut, addressInSubnet(dutAddr, *testNet.Subnet)) if err != nil { t.Fatal(err) } @@ -188,16 +208,20 @@ func TestOne(t *testing.T) { } // Create the Docker container for the testbench. - testbench := dockerutil.MakeDocker(logger("testbench")) + testbench := dockerutil.MakeContainer(ctx, logger("testbench")) testbench.Runtime = "" // The testbench always runs on Linux. tbb := path.Base(*testbenchBinary) containerTestbenchBinary := "/packetimpact/" + tbb runOpts = dockerutil.RunOpts{ - Image: "packetimpact", - CapAdd: []string{"NET_ADMIN"}, - Extra: []string{"--sysctl", "net.ipv6.conf.all.disable_ipv6=0", "--rm", "-v", tmpDir + ":" + testOutputDir}, - Foreground: true, + Image: "packetimpact", + CapAdd: []string{"NET_ADMIN"}, + Mounts: []mount.Mount{mount.Mount{ + Type: mount.TypeBind, + Source: tmpDir, + Target: testOutputDir, + ReadOnly: false, + }}, } testbench.CopyFiles(&runOpts, "/packetimpact", "/test/packetimpact/tests/"+tbb) @@ -227,30 +251,31 @@ func TestOne(t *testing.T) { } }() - if err := testbench.Create(runOpts, snifferArgs...); err != nil { + conf, hostconf, _ = testbench.ConfigsFrom(runOpts, snifferArgs...) + hostconf.AutoRemove = true + hostconf.Sysctls = map[string]string{"net.ipv6.conf.all.disable_ipv6": "0"} + + if err := testbench.CreateFrom(ctx, conf, hostconf, nil); err != nil { t.Fatalf("unable to create container %s: %s", testbench.Name, err) } - defer testbench.CleanUp() + defer testbench.CleanUp(ctx) // Add ctrlNet as eth1 and testNet as eth2. - if err := addNetworks(testbench, testbenchAddr, []*dockerutil.DockerNetwork{ctrlNet, testNet}); err != nil { + if err := addNetworks(ctx, testbench, testbenchAddr, []*dockerutil.Network{ctrlNet, testNet}); err != nil { t.Fatal(err) } - if err := testbench.Start(); err != nil { + if err := testbench.Start(ctx); err != nil { t.Fatalf("unable to start container %s: %s", testbench.Name, err) } // Kill so that it will flush output. defer func() { - // Wait 1 second before killing tcpdump to give it time to flush - // any packets. On linux tests killing it immediately can - // sometimes result in partial pcaps. time.Sleep(1 * time.Second) - testbench.Exec(dockerutil.RunOpts{}, "killall", snifferArgs[0]) + testbench.Exec(ctx, dockerutil.ExecOpts{}, "killall", snifferArgs[0]) }() - if _, err := testbench.WaitForOutput(snifferRegex, 60*time.Second); err != nil { + if _, err := testbench.WaitForOutput(ctx, snifferRegex, 60*time.Second); err != nil { t.Fatalf("sniffer on %s never listened: %s", dut.Name, err) } @@ -258,7 +283,7 @@ func TestOne(t *testing.T) { // will issue a RST. To prevent this IPtables can be used to filter out all // incoming packets. The raw socket that packetimpact tests use will still see // everything. - if _, err := testbench.Exec(dockerutil.RunOpts{}, "iptables", "-A", "INPUT", "-i", testNetDev, "-j", "DROP"); err != nil { + if _, err := testbench.Exec(ctx, dockerutil.ExecOpts{}, "iptables", "-A", "INPUT", "-i", testNetDev, "-j", "DROP"); err != nil { t.Fatalf("unable to Exec iptables on container %s: %s", testbench.Name, err) } @@ -282,7 +307,7 @@ func TestOne(t *testing.T) { "--device", testNetDev, "--dut_type", *dutPlatform, ) - _, err = testbench.Exec(dockerutil.RunOpts{}, testArgs...) + _, err = testbench.Exec(ctx, dockerutil.ExecOpts{}, testArgs...) if !*expectFailure && err != nil { t.Fatal("test failed:", err) } @@ -291,11 +316,11 @@ func TestOne(t *testing.T) { } } -func addNetworks(d *dockerutil.Docker, addr net.IP, networks []*dockerutil.DockerNetwork) error { +func addNetworks(ctx context.Context, d *dockerutil.Container, addr net.IP, networks []*dockerutil.Network) error { for _, dn := range networks { ip := addressInSubnet(addr, *dn.Subnet) // Connect to the network with the specified IP address. - if err := dn.Connect(d, "--ip", ip.String()); err != nil { + if err := dn.Connect(ctx, d, ip.String(), ""); err != nil { return fmt.Errorf("unable to connect container %s to network %s: %w", d.Name, dn.Name, err) } } @@ -313,9 +338,9 @@ func addressInSubnet(addr net.IP, subnet net.IPNet) net.IP { return net.IP(octets) } -// makeDockerNetwork makes a randomly-named network that will start with the +// createDockerNetwork makes a randomly-named network that will start with the // namePrefix. The network will be a random /24 subnet. -func createDockerNetwork(n *dockerutil.DockerNetwork) error { +func createDockerNetwork(ctx context.Context, n *dockerutil.Network) error { randSource := rand.NewSource(time.Now().UnixNano()) r1 := rand.New(randSource) // Class C, 192.0.0.0 to 223.255.255.255, transitionally has mask 24. @@ -324,12 +349,12 @@ func createDockerNetwork(n *dockerutil.DockerNetwork) error { IP: ip, Mask: ip.DefaultMask(), } - return n.Create() + return n.Create(ctx) } // deviceByIP finds a deviceInfo and device name from an IP address. -func deviceByIP(d *dockerutil.Docker, ip net.IP) (string, netdevs.DeviceInfo, error) { - out, err := d.Exec(dockerutil.RunOpts{}, "ip", "addr", "show") +func deviceByIP(ctx context.Context, d *dockerutil.Container, ip net.IP) (string, netdevs.DeviceInfo, error) { + out, err := d.Exec(ctx, dockerutil.ExecOpts{}, "ip", "addr", "show") if err != nil { return "", netdevs.DeviceInfo{}, fmt.Errorf("listing devices on %s container: %w", d.Name, err) } diff --git a/test/root/cgroup_test.go b/test/root/cgroup_test.go index d0634b5c3..a26b83081 100644 --- a/test/root/cgroup_test.go +++ b/test/root/cgroup_test.go @@ -16,6 +16,7 @@ package root import ( "bufio" + "context" "fmt" "io/ioutil" "os" @@ -56,25 +57,24 @@ func verifyPid(pid int, path string) error { return fmt.Errorf("got: %v, want: %d", gots, pid) } -func TestMemCGroup(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() +func TestMemCgroup(t *testing.T) { + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // Start a new container and allocate the specified about of memory. allocMemSize := 128 << 20 allocMemLimit := 2 * allocMemSize - if err := d.Spawn(dockerutil.RunOpts{ - Image: "basic/python", - Memory: allocMemLimit / 1024, // Must be in Kb. - }, "python", "-c", fmt.Sprintf("import time; s = 'a' * %d; time.sleep(100)", allocMemSize)); err != nil { + + if err := d.Spawn(ctx, dockerutil.RunOpts{ + Image: "basic/ubuntu", + Memory: allocMemLimit, // Must be in bytes. + }, "python3", "-c", fmt.Sprintf("import time; s = 'a' * %d; time.sleep(100)", allocMemSize)); err != nil { t.Fatalf("docker run failed: %v", err) } // Extract the ID to lookup the cgroup. - gid, err := d.ID() - if err != nil { - t.Fatalf("Docker.ID() failed: %v", err) - } + gid := d.ID() t.Logf("cgroup ID: %s", gid) // Wait when the container will allocate memory. @@ -127,8 +127,9 @@ func TestMemCGroup(t *testing.T) { // TestCgroup sets cgroup options and checks that cgroup was properly configured. func TestCgroup(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // This is not a comprehensive list of attributes. // @@ -137,94 +138,133 @@ func TestCgroup(t *testing.T) { // are often run on a single core virtual machine, and there is only a single // CPU available in our current set, and every container's set. attrs := []struct { - arg string + field string + value int64 ctrl string file string want string skipIfNotFound bool }{ { - arg: "--cpu-shares=1000", - ctrl: "cpu", - file: "cpu.shares", - want: "1000", + field: "cpu-shares", + value: 1000, + ctrl: "cpu", + file: "cpu.shares", + want: "1000", }, { - arg: "--cpu-period=2000", - ctrl: "cpu", - file: "cpu.cfs_period_us", - want: "2000", + field: "cpu-period", + value: 2000, + ctrl: "cpu", + file: "cpu.cfs_period_us", + want: "2000", }, { - arg: "--cpu-quota=3000", - ctrl: "cpu", - file: "cpu.cfs_quota_us", - want: "3000", + field: "cpu-quota", + value: 3000, + ctrl: "cpu", + file: "cpu.cfs_quota_us", + want: "3000", }, { - arg: "--kernel-memory=100MB", - ctrl: "memory", - file: "memory.kmem.limit_in_bytes", - want: "104857600", + field: "kernel-memory", + value: 100 << 20, + ctrl: "memory", + file: "memory.kmem.limit_in_bytes", + want: "104857600", }, { - arg: "--memory=1GB", - ctrl: "memory", - file: "memory.limit_in_bytes", - want: "1073741824", + field: "memory", + value: 1 << 30, + ctrl: "memory", + file: "memory.limit_in_bytes", + want: "1073741824", }, { - arg: "--memory-reservation=500MB", - ctrl: "memory", - file: "memory.soft_limit_in_bytes", - want: "524288000", + field: "memory-reservation", + value: 500 << 20, + ctrl: "memory", + file: "memory.soft_limit_in_bytes", + want: "524288000", }, { - arg: "--memory-swap=2GB", + field: "memory-swap", + value: 2 << 30, ctrl: "memory", file: "memory.memsw.limit_in_bytes", want: "2147483648", skipIfNotFound: true, // swap may be disabled on the machine. }, { - arg: "--memory-swappiness=5", - ctrl: "memory", - file: "memory.swappiness", - want: "5", + field: "memory-swappiness", + value: 5, + ctrl: "memory", + file: "memory.swappiness", + want: "5", }, { - arg: "--blkio-weight=750", + field: "blkio-weight", + value: 750, ctrl: "blkio", file: "blkio.weight", want: "750", skipIfNotFound: true, // blkio groups may not be available. }, { - arg: "--pids-limit=1000", - ctrl: "pids", - file: "pids.max", - want: "1000", + field: "pids-limit", + value: 1000, + ctrl: "pids", + file: "pids.max", + want: "1000", }, } - args := make([]string, 0, len(attrs)) + // Make configs. + conf, hostconf, _ := d.ConfigsFrom(dockerutil.RunOpts{ + Image: "basic/alpine", + }, "sleep", "10000") + + // Add Cgroup arguments to configs. for _, attr := range attrs { - args = append(args, attr.arg) + switch attr.field { + case "cpu-shares": + hostconf.Resources.CPUShares = attr.value + case "cpu-period": + hostconf.Resources.CPUPeriod = attr.value + case "cpu-quota": + hostconf.Resources.CPUQuota = attr.value + case "kernel-memory": + hostconf.Resources.KernelMemory = attr.value + case "memory": + hostconf.Resources.Memory = attr.value + case "memory-reservation": + hostconf.Resources.MemoryReservation = attr.value + case "memory-swap": + hostconf.Resources.MemorySwap = attr.value + case "memory-swappiness": + val := attr.value + hostconf.Resources.MemorySwappiness = &val + case "blkio-weight": + hostconf.Resources.BlkioWeight = uint16(attr.value) + case "pids-limit": + val := attr.value + hostconf.Resources.PidsLimit = &val + + } } - // Start the container. - if err := d.Spawn(dockerutil.RunOpts{ - Image: "basic/alpine", - Extra: args, // Cgroup arguments. - }, "sleep", "10000"); err != nil { - t.Fatalf("docker run failed: %v", err) + // Create container. + if err := d.CreateFrom(ctx, conf, hostconf, nil); err != nil { + t.Fatalf("create failed with: %v", err) } - // Lookup the relevant cgroup ID. - gid, err := d.ID() - if err != nil { - t.Fatalf("Docker.ID() failed: %v", err) + // Start container. + if err := d.Start(ctx); err != nil { + t.Fatalf("start failed with: %v", err) } + + // Lookup the relevant cgroup ID. + gid := d.ID() t.Logf("cgroup ID: %s", gid) // Check list of attributes defined above. @@ -239,7 +279,7 @@ func TestCgroup(t *testing.T) { t.Fatalf("failed to read %q: %v", path, err) } if got := strings.TrimSpace(string(out)); got != attr.want { - t.Errorf("arg: %q, cgroup attribute %s/%s, got: %q, want: %q", attr.arg, attr.ctrl, attr.file, got, attr.want) + t.Errorf("field: %q, cgroup attribute %s/%s, got: %q, want: %q", attr.field, attr.ctrl, attr.file, got, attr.want) } } @@ -257,7 +297,7 @@ func TestCgroup(t *testing.T) { "pids", "systemd", } - pid, err := d.SandboxPid() + pid, err := d.SandboxPid(ctx) if err != nil { t.Fatalf("SandboxPid: %v", err) } @@ -269,29 +309,34 @@ func TestCgroup(t *testing.T) { } } -// TestCgroup sets cgroup options and checks that cgroup was properly configured. +// TestCgroupParent sets the "CgroupParent" option and checks that the child and parent's +// cgroups are created correctly relative to each other. func TestCgroupParent(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) // Construct a known cgroup name. parent := testutil.RandomID("runsc-") - if err := d.Spawn(dockerutil.RunOpts{ + conf, hostconf, _ := d.ConfigsFrom(dockerutil.RunOpts{ Image: "basic/alpine", - Extra: []string{fmt.Sprintf("--cgroup-parent=%s", parent)}, - }, "sleep", "10000"); err != nil { - t.Fatalf("docker run failed: %v", err) + }, "sleep", "10000") + hostconf.Resources.CgroupParent = parent + + if err := d.CreateFrom(ctx, conf, hostconf, nil); err != nil { + t.Fatalf("create failed with: %v", err) } - // Extract the ID to look up the cgroup. - gid, err := d.ID() - if err != nil { - t.Fatalf("Docker.ID() failed: %v", err) + if err := d.Start(ctx); err != nil { + t.Fatalf("start failed with: %v", err) } + + // Extract the ID to look up the cgroup. + gid := d.ID() t.Logf("cgroup ID: %s", gid) // Check that sandbox is inside cgroup. - pid, err := d.SandboxPid() + pid, err := d.SandboxPid(ctx) if err != nil { t.Fatalf("SandboxPid: %v", err) } diff --git a/test/root/chroot_test.go b/test/root/chroot_test.go index a306132a4..58fcd6f08 100644 --- a/test/root/chroot_test.go +++ b/test/root/chroot_test.go @@ -16,6 +16,7 @@ package root import ( + "context" "fmt" "io/ioutil" "os/exec" @@ -30,16 +31,17 @@ import ( // TestChroot verifies that the sandbox is chroot'd and that mounts are cleaned // up after the sandbox is destroyed. func TestChroot(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) - if err := d.Spawn(dockerutil.RunOpts{ + if err := d.Spawn(ctx, dockerutil.RunOpts{ Image: "basic/alpine", }, "sleep", "10000"); err != nil { t.Fatalf("docker run failed: %v", err) } - pid, err := d.SandboxPid() + pid, err := d.SandboxPid(ctx) if err != nil { t.Fatalf("Docker.SandboxPid(): %v", err) } @@ -75,14 +77,15 @@ func TestChroot(t *testing.T) { t.Errorf("chroot got children %v, want %v", fi[0].Name(), "proc") } - d.CleanUp() + d.CleanUp(ctx) } func TestChrootGofer(t *testing.T) { - d := dockerutil.MakeDocker(t) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) - if err := d.Spawn(dockerutil.RunOpts{ + if err := d.Spawn(ctx, dockerutil.RunOpts{ Image: "basic/alpine", }, "sleep", "10000"); err != nil { t.Fatalf("docker run failed: %v", err) @@ -91,7 +94,7 @@ func TestChrootGofer(t *testing.T) { // It's tricky to find gofers. Get sandbox PID first, then find parent. From // parent get all immediate children, remove the sandbox, and everything else // are gofers. - sandPID, err := d.SandboxPid() + sandPID, err := d.SandboxPid(ctx) if err != nil { t.Fatalf("Docker.SandboxPid(): %v", err) } diff --git a/test/runtimes/runner/main.go b/test/runtimes/runner/main.go index a00c64d27..2a0f62c73 100644 --- a/test/runtimes/runner/main.go +++ b/test/runtimes/runner/main.go @@ -16,6 +16,7 @@ package main import ( + "context" "encoding/csv" "flag" "fmt" @@ -60,8 +61,9 @@ func runTests() int { } // Construct the shared docker instance. - d := dockerutil.MakeDocker(testutil.DefaultLogger(*lang)) - defer d.CleanUp() + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, testutil.DefaultLogger(*lang)) + defer d.CleanUp(ctx) if err := testutil.TouchShardStatusFile(); err != nil { fmt.Fprintf(os.Stderr, "error touching status shard file: %v\n", err) @@ -71,7 +73,7 @@ func runTests() int { // Get a slice of tests to run. This will also start a single Docker // container that will be used to run each test. The final test will // stop the Docker container. - tests, err := getTests(d, excludes) + tests, err := getTests(ctx, d, excludes) if err != nil { fmt.Fprintf(os.Stderr, "%s\n", err.Error()) return 1 @@ -82,18 +84,18 @@ func runTests() int { } // getTests executes all tests as table tests. -func getTests(d *dockerutil.Docker, excludes map[string]struct{}) ([]testing.InternalTest, error) { +func getTests(ctx context.Context, d *dockerutil.Container, excludes map[string]struct{}) ([]testing.InternalTest, error) { // Start the container. opts := dockerutil.RunOpts{ Image: fmt.Sprintf("runtimes/%s", *image), } d.CopyFiles(&opts, "/proctor", "test/runtimes/proctor/proctor") - if err := d.Spawn(opts, "/proctor/proctor", "--pause"); err != nil { + if err := d.Spawn(ctx, opts, "/proctor/proctor", "--pause"); err != nil { return nil, fmt.Errorf("docker run failed: %v", err) } // Get a list of all tests in the image. - list, err := d.Exec(dockerutil.RunOpts{}, "/proctor/proctor", "--runtime", *lang, "--list") + list, err := d.Exec(ctx, dockerutil.ExecOpts{}, "/proctor/proctor", "--runtime", *lang, "--list") if err != nil { return nil, fmt.Errorf("docker exec failed: %v", err) } @@ -128,7 +130,7 @@ func getTests(d *dockerutil.Docker, excludes map[string]struct{}) ([]testing.Int go func() { fmt.Printf("RUNNING %s...\n", tc) - output, err = d.Exec(dockerutil.RunOpts{}, "/proctor/proctor", "--runtime", *lang, "--test", tc) + output, err = d.Exec(ctx, dockerutil.ExecOpts{}, "/proctor/proctor", "--runtime", *lang, "--test", tc) close(done) }() -- cgit v1.2.3 From 14ff2ea9bfc83fb37afe8a5e17e8b8173f85eb68 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Wed, 8 Jul 2020 10:54:23 -0700 Subject: ip6tables: handle both IPv4 and v6 addresses Enabling IPv6 in Docker caused IPv4 tests to fail because localAddrs didn't distinguish between address types. Example failure: https://source.cloud.google.com/results/invocations/203b2401-3333-4bec-9a56-72cc53d68ddd/log --- test/iptables/filter_input.go | 2 +- test/iptables/iptables_test.go | 26 ++++++++++++++++++++++++++ test/iptables/iptables_util.go | 21 ++++++++++++++++++--- test/iptables/nat.go | 2 +- 4 files changed, 46 insertions(+), 5 deletions(-) (limited to 'test') diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index 872021358..068f228bd 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -618,7 +618,7 @@ func (FilterInputDestination) Name() string { // ContainerAction implements TestCase.ContainerAction. func (FilterInputDestination) ContainerAction(ip net.IP) error { - addrs, err := localAddrs() + addrs, err := localAddrs(false) if err != nil { return err } diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 340f9426e..12825e5d2 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -17,6 +17,7 @@ package iptables import ( "fmt" "net" + "reflect" "testing" "gvisor.dev/gvisor/pkg/test/dockerutil" @@ -315,3 +316,28 @@ func TestInputSource(t *testing.T) { func TestInputInvertSource(t *testing.T) { singleTest(t, FilterInputInvertSource{}) } + +func TestFilterAddrs(t *testing.T) { + tcs := []struct { + ipv6 bool + addrs []string + want []string + }{ + { + ipv6: false, + addrs: []string{"192.168.0.1", "192.168.0.2/24", "::1", "::2/128"}, + want: []string{"192.168.0.1", "192.168.0.2"}, + }, + { + ipv6: true, + addrs: []string{"192.168.0.1", "192.168.0.2/24", "::1", "::2/128"}, + want: []string{"::1", "::2"}, + }, + } + + for _, tc := range tcs { + if got := filterAddrs(tc.addrs, tc.ipv6); !reflect.DeepEqual(got, tc.want) { + t.Errorf("%v with IPv6 %t: got %v, but wanted %v", tc.addrs, tc.ipv6, got, tc.want) + } + } +} diff --git a/test/iptables/iptables_util.go b/test/iptables/iptables_util.go index 7146edbb9..d4bc55b24 100644 --- a/test/iptables/iptables_util.go +++ b/test/iptables/iptables_util.go @@ -18,6 +18,7 @@ import ( "fmt" "net" "os/exec" + "strings" "time" "gvisor.dev/gvisor/pkg/test/testutil" @@ -157,8 +158,10 @@ func connectTCP(ip net.IP, port int, timeout time.Duration) error { return nil } -// localAddrs returns a list of local network interface addresses. -func localAddrs() ([]string, error) { +// localAddrs returns a list of local network interface addresses. When ipv6 is +// true, only IPv6 addresses are returned. Otherwise only IPv4 addresses are +// returned. +func localAddrs(ipv6 bool) ([]string, error) { addrs, err := net.InterfaceAddrs() if err != nil { return nil, err @@ -167,7 +170,19 @@ func localAddrs() ([]string, error) { for _, addr := range addrs { addrStrs = append(addrStrs, addr.String()) } - return addrStrs, nil + return filterAddrs(addrStrs, ipv6), nil +} + +func filterAddrs(addrs []string, ipv6 bool) []string { + addrStrs := make([]string, 0, len(addrs)) + for _, addr := range addrs { + // Add only IPv4 or only IPv6 addresses. + parts := strings.Split(addr, "/") + if isIPv6 := net.ParseIP(parts[0]).To4() == nil; isIPv6 == ipv6 { + addrStrs = append(addrStrs, parts[0]) + } + } + return addrStrs } // getInterfaceName returns the name of the interface other than loopback. diff --git a/test/iptables/nat.go b/test/iptables/nat.go index 5e54a3963..8562b0820 100644 --- a/test/iptables/nat.go +++ b/test/iptables/nat.go @@ -241,7 +241,7 @@ func (NATPreRedirectIP) Name() string { // ContainerAction implements TestCase.ContainerAction. func (NATPreRedirectIP) ContainerAction(ip net.IP) error { - addrs, err := localAddrs() + addrs, err := localAddrs(false) if err != nil { return err } -- cgit v1.2.3 From abffebde7be2dcdb4564e45f845d7c150ced0ccb Mon Sep 17 00:00:00 2001 From: Ridwan Sharif Date: Tue, 7 Jul 2020 21:48:25 -0400 Subject: Gate FUSE behind a runsc flag This change gates all FUSE commands (by gating /dev/fuse) behind a runsc flag. In order to use FUSE commands, use the --fuse flag with the --vfs2 flag. Check if FUSE is enabled by running dmesg in the sandbox. --- pkg/sentry/fsimpl/fuse/BUILD | 1 + pkg/sentry/fsimpl/fuse/dev.go | 5 +++++ pkg/sentry/kernel/kernel.go | 4 ++++ pkg/sentry/kernel/syslog.go | 9 +++++++++ runsc/boot/config.go | 7 +++++++ runsc/boot/loader.go | 4 ++++ runsc/boot/vfs.go | 14 ++++++++++---- runsc/main.go | 2 ++ test/runner/defs.bzl | 20 +++++++++++++++++++- test/runner/runner.go | 10 ++++++++++ test/syscalls/BUILD | 1 + test/syscalls/linux/dev.cc | 2 +- test/util/test_util.cc | 6 ++++++ test/util/test_util.h | 1 + 14 files changed, 80 insertions(+), 6 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD index 41567967d..3e00c2abb 100644 --- a/pkg/sentry/fsimpl/fuse/BUILD +++ b/pkg/sentry/fsimpl/fuse/BUILD @@ -12,6 +12,7 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/sentry/fsimpl/devtmpfs", + "//pkg/sentry/kernel", "//pkg/sentry/vfs", "//pkg/syserror", "//pkg/usermem", diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go index f6a67d005..dc33268af 100644 --- a/pkg/sentry/fsimpl/fuse/dev.go +++ b/pkg/sentry/fsimpl/fuse/dev.go @@ -18,6 +18,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -30,6 +31,10 @@ type fuseDevice struct{} // Open implements vfs.Device.Open. func (fuseDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + if !kernel.FUSEEnabled { + return nil, syserror.ENOENT + } + var fd DeviceFD if err := fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{ UseDentryMetadata: true, diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 2177b785a..240cd6fe0 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -81,6 +81,10 @@ import ( // easy access everywhere. To be removed once VFS2 becomes the default. var VFS2Enabled = false +// FUSEEnabled is set to true when FUSE is enabled. Added as a global for allow +// easy access everywhere. To be removed once FUSE is completed. +var FUSEEnabled = false + // Kernel represents an emulated Linux kernel. It must be initialized by calling // Init() or LoadFrom(). // diff --git a/pkg/sentry/kernel/syslog.go b/pkg/sentry/kernel/syslog.go index 4607cde2f..a83ce219c 100644 --- a/pkg/sentry/kernel/syslog.go +++ b/pkg/sentry/kernel/syslog.go @@ -98,6 +98,15 @@ func (s *syslog) Log() []byte { s.msg = append(s.msg, []byte(fmt.Sprintf(format, time, selectMessage()))...) } + if VFS2Enabled { + time += rand.Float64() / 2 + s.msg = append(s.msg, []byte(fmt.Sprintf(format, time, "Setting up VFS2..."))...) + if FUSEEnabled { + time += rand.Float64() / 2 + s.msg = append(s.msg, []byte(fmt.Sprintf(format, time, "Setting up FUSE..."))...) + } + } + time += rand.Float64() / 2 s.msg = append(s.msg, []byte(fmt.Sprintf(format, time, "Ready!"))...) diff --git a/runsc/boot/config.go b/runsc/boot/config.go index bb01b8fb5..80da8b3e6 100644 --- a/runsc/boot/config.go +++ b/runsc/boot/config.go @@ -274,6 +274,9 @@ type Config struct { // Enables VFS2 (not plumbled through yet). VFS2 bool + + // Enables FUSE usage (not plumbled through yet). + FUSE bool } // ToFlags returns a slice of flags that correspond to the given Config. @@ -325,5 +328,9 @@ func (c *Config) ToFlags() []string { f = append(f, "--vfs2=true") } + if c.FUSE { + f = append(f, "--fuse=true") + } + return f } diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 0c0423ab2..93ac7ec41 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -205,6 +205,10 @@ func New(args Args) (*Loader, error) { // Is this a VFSv2 kernel? if args.Conf.VFS2 { kernel.VFS2Enabled = true + if args.Conf.FUSE { + kernel.FUSEEnabled = true + } + vfs2.Override() } diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 6ee6fae04..56f4ba15d 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -86,9 +86,12 @@ func registerFilesystems(k *kernel.Kernel) error { return fmt.Errorf("registering ttydev: %w", err) } - if err := fuse.Register(vfsObj); err != nil { - return fmt.Errorf("registering fusedev: %w", err) + if kernel.FUSEEnabled { + if err := fuse.Register(vfsObj); err != nil { + return fmt.Errorf("registering fusedev: %w", err) + } } + if err := tundev.Register(vfsObj); err != nil { return fmt.Errorf("registering tundev: %v", err) } @@ -110,8 +113,11 @@ func registerFilesystems(k *kernel.Kernel) error { if err := tundev.CreateDevtmpfsFiles(ctx, a); err != nil { return fmt.Errorf("creating tundev devtmpfs files: %v", err) } - if err := fuse.CreateDevtmpfsFile(ctx, a); err != nil { - return fmt.Errorf("creating fusedev devtmpfs files: %w", err) + + if kernel.FUSEEnabled { + if err := fuse.CreateDevtmpfsFile(ctx, a); err != nil { + return fmt.Errorf("creating fusedev devtmpfs files: %w", err) + } } return nil } diff --git a/runsc/main.go b/runsc/main.go index c9f47c579..69cb505fa 100644 --- a/runsc/main.go +++ b/runsc/main.go @@ -88,6 +88,7 @@ var ( referenceLeakMode = flag.String("ref-leak-mode", "disabled", "sets reference leak check mode: disabled (default), log-names, log-traces.") cpuNumFromQuota = flag.Bool("cpu-num-from-quota", false, "set cpu number to cpu quota (least integer greater or equal to quota value, but not less than 2)") vfs2Enabled = flag.Bool("vfs2", false, "TEST ONLY; use while VFSv2 is landing. This uses the new experimental VFS layer.") + fuseEnabled = flag.Bool("fuse", false, "TEST ONLY; use while FUSE in VFSv2 is landing. This allows the use of the new experimental FUSE filesystem.") // Test flags, not to be used outside tests, ever. testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.") @@ -242,6 +243,7 @@ func main() { OverlayfsStaleRead: *overlayfsStaleRead, CPUNumFromQuota: *cpuNumFromQuota, VFS2: *vfs2Enabled, + FUSE: *fuseEnabled, QDisc: queueingDiscipline, TestOnlyAllowRunAsCurrentUserWithoutChroot: *testOnlyAllowRunAsCurrentUserWithoutChroot, TestOnlyTestNameEnv: *testOnlyTestNameEnv, diff --git a/test/runner/defs.bzl b/test/runner/defs.bzl index 921e499be..600cb5192 100644 --- a/test/runner/defs.bzl +++ b/test/runner/defs.bzl @@ -61,7 +61,8 @@ def _syscall_test( file_access = "exclusive", overlay = False, add_uds_tree = False, - vfs2 = False): + vfs2 = False, + fuse = False): # Prepend "runsc" to non-native platform names. full_platform = platform if platform == "native" else "runsc_" + platform @@ -73,6 +74,8 @@ def _syscall_test( name += "_overlay" if vfs2: name += "_vfs2" + if fuse: + name += "_fuse" if network != "none": name += "_" + network + "net" @@ -107,6 +110,7 @@ def _syscall_test( "--overlay=" + str(overlay), "--add-uds-tree=" + str(add_uds_tree), "--vfs2=" + str(vfs2), + "--fuse=" + str(fuse), ] # Call the rule above. @@ -129,6 +133,7 @@ def syscall_test( add_uds_tree = False, add_hostinet = False, vfs2 = False, + fuse = False, tags = None): """syscall_test is a macro that will create targets for all platforms. @@ -188,6 +193,19 @@ def syscall_test( vfs2 = True, ) + if vfs2 and fuse: + _syscall_test( + test = test, + shard_count = shard_count, + size = size, + platform = default_platform, + use_tmpfs = use_tmpfs, + add_uds_tree = add_uds_tree, + tags = platforms[default_platform] + vfs2_tags, + vfs2 = True, + fuse = True, + ) + # TODO(gvisor.dev/issue/1487): Enable VFS2 overlay tests. if add_overlay: _syscall_test( diff --git a/test/runner/runner.go b/test/runner/runner.go index 5456e46a6..2296f3a46 100644 --- a/test/runner/runner.go +++ b/test/runner/runner.go @@ -47,6 +47,7 @@ var ( fileAccess = flag.String("file-access", "exclusive", "mounts root in exclusive or shared mode") overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable tmpfs overlay") vfs2 = flag.Bool("vfs2", false, "enable VFS2") + fuse = flag.Bool("fuse", false, "enable FUSE") parallel = flag.Bool("parallel", false, "run tests in parallel") runscPath = flag.String("runsc", "", "path to runsc binary") @@ -149,6 +150,9 @@ func runRunsc(tc gtest.TestCase, spec *specs.Spec) error { } if *vfs2 { args = append(args, "-vfs2") + if *fuse { + args = append(args, "-fuse") + } } if *debug { args = append(args, "-debug", "-log-packets=true") @@ -358,6 +362,12 @@ func runTestCaseRunsc(testBin string, tc gtest.TestCase, t *testing.T) { vfsVar := "GVISOR_VFS" if *vfs2 { env = append(env, vfsVar+"=VFS2") + fuseVar := "FUSE_ENABLED" + if *fuse { + env = append(env, fuseVar+"=TRUE") + } else { + env = append(env, fuseVar+"=FALSE") + } } else { env = append(env, vfsVar+"=VFS1") } diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 28ef55945..c06a75ada 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -146,6 +146,7 @@ syscall_test( ) syscall_test( + fuse = "True", test = "//test/syscalls/linux:dev_test", vfs2 = "True", ) diff --git a/test/syscalls/linux/dev.cc b/test/syscalls/linux/dev.cc index 3c88c4cbd..6fa16208e 100644 --- a/test/syscalls/linux/dev.cc +++ b/test/syscalls/linux/dev.cc @@ -156,7 +156,7 @@ TEST(DevTest, TTYExists) { TEST(DevTest, OpenDevFuse) { // Note(gvisor.dev/issue/3076) This won't work in the sentry until the new // device registration is complete. - SKIP_IF(IsRunningWithVFS1() || IsRunningOnGvisor()); + SKIP_IF(IsRunningWithVFS1() || IsRunningOnGvisor() || !IsFUSEEnabled()); ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/fuse", O_RDONLY)); } diff --git a/test/util/test_util.cc b/test/util/test_util.cc index 8a037f45f..d0c1d6426 100644 --- a/test/util/test_util.cc +++ b/test/util/test_util.cc @@ -42,6 +42,7 @@ namespace testing { constexpr char kGvisorNetwork[] = "GVISOR_NETWORK"; constexpr char kGvisorVfs[] = "GVISOR_VFS"; +constexpr char kFuseEnabled[] = "FUSE_ENABLED"; bool IsRunningOnGvisor() { return GvisorPlatform() != Platform::kNative; } @@ -68,6 +69,11 @@ bool IsRunningWithVFS1() { return strcmp(env, "VFS1") == 0; } +bool IsFUSEEnabled() { + const char* env = getenv(kFuseEnabled); + return env && strcmp(env, "TRUE") == 0; +} + // Inline cpuid instruction. Preserve %ebx/%rbx register. In PIC compilations // %ebx contains the address of the global offset table. %rbx is occasionally // used to address stack variables in presence of dynamic allocas. diff --git a/test/util/test_util.h b/test/util/test_util.h index 109078fc7..89ac575bd 100644 --- a/test/util/test_util.h +++ b/test/util/test_util.h @@ -225,6 +225,7 @@ const std::string GvisorPlatform(); bool IsRunningWithHostinet(); // TODO(gvisor.dev/issue/1624): Delete once VFS1 is gone. bool IsRunningWithVFS1(); +bool IsFUSEEnabled(); #ifdef __linux__ void SetupGvisorDeathTest(); -- cgit v1.2.3 From 5946f111827fa4e342a2e6e9c043c198d2e5cb03 Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Thu, 9 Jul 2020 16:24:43 -0700 Subject: Add support for IP_HDRINCL IP option for raw sockets. Updates #2746 Fixes #3158 PiperOrigin-RevId: 320497190 --- pkg/sentry/socket/netstack/netstack.go | 11 ++++- pkg/tcpip/tcpip.go | 5 +++ pkg/tcpip/transport/raw/endpoint.go | 34 +++++++++++---- test/syscalls/linux/raw_socket_hdrincl.cc | 70 ++++++++++++++++++++++++++++++- 4 files changed, 110 insertions(+), 10 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index e7d2c83d7..3b248a953 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -2112,13 +2112,22 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s } return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.ReceiveIPPacketInfoOption, v != 0)) + case linux.IP_HDRINCL: + if len(optVal) == 0 { + return nil + } + v, err := parseIntOrChar(optVal) + if err != nil { + return err + } + return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.IPHdrIncludedOption, v != 0)) + case linux.IP_ADD_SOURCE_MEMBERSHIP, linux.IP_BIND_ADDRESS_NO_PORT, linux.IP_BLOCK_SOURCE, linux.IP_CHECKSUM, linux.IP_DROP_SOURCE_MEMBERSHIP, linux.IP_FREEBIND, - linux.IP_HDRINCL, linux.IP_IPSEC_POLICY, linux.IP_MINTTL, linux.IP_MSFILTER, diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 2f9872dc6..25534a10d 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -648,6 +648,11 @@ const ( // whether an IPv6 socket is to be restricted to sending and receiving // IPv6 packets only. V6OnlyOption + + // IPHdrIncludedOption is used by SetSockOpt to indicate for a raw + // endpoint that all packets being written have an IP header and the + // endpoint should not attach an IP header. + IPHdrIncludedOption ) // SockOptInt represents socket options which values have the int type. diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go index 766c7648e..5b6e7d102 100644 --- a/pkg/tcpip/transport/raw/endpoint.go +++ b/pkg/tcpip/transport/raw/endpoint.go @@ -63,6 +63,7 @@ type endpoint struct { stack *stack.Stack `state:"manual"` waiterQueue *waiter.Queue associated bool + hdrIncluded bool // The following fields are used to manage the receive queue and are // protected by rcvMu. @@ -108,6 +109,7 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProt rcvBufSizeMax: 32 * 1024, sndBufSizeMax: 32 * 1024, associated: associated, + hdrIncluded: !associated, } // Override with stack defaults. @@ -182,10 +184,6 @@ func (e *endpoint) SetOwner(owner tcpip.PacketOwner) { // Read implements tcpip.Endpoint.Read. func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) { - if !e.associated { - return buffer.View{}, tcpip.ControlMessages{}, tcpip.ErrInvalidOptionValue - } - e.rcvMu.Lock() // If there's no data to read, return that read would block or that the @@ -263,7 +261,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c // If this is an unassociated socket and callee provided a nonzero // destination address, route using that address. - if !e.associated { + if e.hdrIncluded { ip := header.IPv4(payloadBytes) if !ip.IsValid(len(payloadBytes)) { e.mu.RUnlock() @@ -353,7 +351,7 @@ func (e *endpoint) finishWrite(payloadBytes []byte, route *stack.Route) (int64, } } - if !e.associated { + if e.hdrIncluded { if err := route.WriteHeaderIncludedPacket(&stack.PacketBuffer{ Data: buffer.View(payloadBytes).ToVectorisedView(), }); err != nil { @@ -513,6 +511,13 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { // SetSockOptBool implements tcpip.Endpoint.SetSockOptBool. func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error { + switch opt { + case tcpip.IPHdrIncludedOption: + e.mu.Lock() + e.hdrIncluded = v + e.mu.Unlock() + return nil + } return tcpip.ErrUnknownProtocolOption } @@ -577,6 +582,12 @@ func (e *endpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) { case tcpip.KeepaliveEnabledOption: return false, nil + case tcpip.IPHdrIncludedOption: + e.mu.Lock() + v := e.hdrIncluded + e.mu.Unlock() + return v, nil + default: return false, tcpip.ErrUnknownProtocolOption } @@ -616,8 +627,15 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { func (e *endpoint) HandlePacket(route *stack.Route, pkt *stack.PacketBuffer) { e.rcvMu.Lock() - // Drop the packet if our buffer is currently full. - if e.rcvClosed { + // Drop the packet if our buffer is currently full or if this is an unassociated + // endpoint (i.e endpoint created w/ IPPROTO_RAW). Such endpoints are send only + // See: https://man7.org/linux/man-pages/man7/raw.7.html + // + // An IPPROTO_RAW socket is send only. If you really want to receive + // all IP packets, use a packet(7) socket with the ETH_P_IP protocol. + // Note that packet sockets don't reassemble IP fragments, unlike raw + // sockets. + if e.rcvClosed || !e.associated { e.rcvMu.Unlock() e.stack.Stats().DroppedPackets.Increment() e.stats.ReceiveErrors.ClosedReceiver.Increment() diff --git a/test/syscalls/linux/raw_socket_hdrincl.cc b/test/syscalls/linux/raw_socket_hdrincl.cc index 16cfc1d75..5bb14d57c 100644 --- a/test/syscalls/linux/raw_socket_hdrincl.cc +++ b/test/syscalls/linux/raw_socket_hdrincl.cc @@ -167,7 +167,7 @@ TEST_F(RawHDRINCL, NotReadable) { // nothing to be read. char buf[117]; ASSERT_THAT(RetryEINTR(recv)(socket_, buf, sizeof(buf), MSG_DONTWAIT), - SyscallFailsWithErrno(EINVAL)); + SyscallFailsWithErrno(EAGAIN)); } // Test that we can connect() to a valid IP (loopback). @@ -332,6 +332,74 @@ TEST_F(RawHDRINCL, SendAndReceiveDifferentAddress) { EXPECT_EQ(absl::gbswap_32(recv_iphdr.daddr), INADDR_LOOPBACK); } +// Send and receive a packet w/ the IP_HDRINCL option set. +TEST_F(RawHDRINCL, SendAndReceiveIPHdrIncl) { + int port = 40000; + if (!IsRunningOnGvisor()) { + port = static_cast(ASSERT_NO_ERRNO_AND_VALUE( + PortAvailable(0, AddressFamily::kIpv4, SocketType::kUdp, false))); + } + + FileDescriptor recv_sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP)); + + FileDescriptor send_sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP)); + + // Enable IP_HDRINCL option so that we can build and send w/ an IP + // header. + constexpr int kSockOptOn = 1; + ASSERT_THAT(setsockopt(send_sock.get(), SOL_IP, IP_HDRINCL, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + // This is not strictly required but we do it to make sure that setting + // IP_HDRINCL on a non IPPROTO_RAW socket does not prevent it from receiving + // packets. + ASSERT_THAT(setsockopt(recv_sock.get(), SOL_IP, IP_HDRINCL, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + + // Construct a packet with an IP header, UDP header, and payload. + constexpr char kPayload[] = "toto"; + char packet[sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kPayload)]; + ASSERT_TRUE( + FillPacket(packet, sizeof(packet), port, kPayload, sizeof(kPayload))); + + socklen_t addrlen = sizeof(addr_); + ASSERT_NO_FATAL_FAILURE(sendto(send_sock.get(), &packet, sizeof(packet), 0, + reinterpret_cast(&addr_), + addrlen)); + + // Receive the payload. + char recv_buf[sizeof(packet)]; + struct sockaddr_in src; + socklen_t src_size = sizeof(src); + ASSERT_THAT(recvfrom(recv_sock.get(), recv_buf, sizeof(recv_buf), 0, + reinterpret_cast(&src), &src_size), + SyscallSucceedsWithValue(sizeof(packet))); + EXPECT_EQ( + memcmp(kPayload, recv_buf + sizeof(struct iphdr) + sizeof(struct udphdr), + sizeof(kPayload)), + 0); + // The network stack should have set the source address. + EXPECT_EQ(src.sin_family, AF_INET); + EXPECT_EQ(absl::gbswap_32(src.sin_addr.s_addr), INADDR_LOOPBACK); + struct iphdr iphdr = {}; + memcpy(&iphdr, recv_buf, sizeof(iphdr)); + EXPECT_NE(iphdr.id, 0); + + // Also verify that the packet we just sent was not delivered to the + // IPPROTO_RAW socket. + { + char recv_buf[sizeof(packet)]; + struct sockaddr_in src; + socklen_t src_size = sizeof(src); + ASSERT_THAT(recvfrom(socket_, recv_buf, sizeof(recv_buf), MSG_DONTWAIT, + reinterpret_cast(&src), &src_size), + SyscallFailsWithErrno(EAGAIN)); + } +} + } // namespace } // namespace testing -- cgit v1.2.3 From 2afff44403e046078301de39f0252bb57fc018c7 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Tue, 5 May 2020 22:00:14 -0700 Subject: Update shim to build using bazel. The go.mod dependency tree for the shim was somehow contradictory. After resolving these issues (e.g. explicitly imported k8s 1.14, pulling a specific dbus version), and adding all dependencies, the shim can now be build as part of the regular bazel tree. As part of this process, minor cleanup was done in all the source files: headers were standardized (and include "The gVisor Authors" in addition to the "The containerd Authors" if originally derived from containerd sources), and comments were cleaned up to meet coding standards. This change makes the containerd installation dynamic, so that multiple versions can be tested, and drops the static installer for the VM image itself. This change also updates test/root/crictl_test.go and related utilities, so that the containerd tests can be run on any version (and in cases where it applies, they can be run on both v1 and v2 as parameterized tests). --- BUILD | 3 + Makefile | 23 +- WORKSPACE | 597 +++++++++++++++++++++++--- g3doc/user_guide/BUILD | 9 + g3doc/user_guide/runtimeclass.md | 44 ++ go.mod | 22 +- go.sum | 123 +++++- pkg/shim/runsc/BUILD | 16 + pkg/shim/runsc/runsc.go | 49 ++- pkg/shim/v1/proc/BUILD | 35 ++ pkg/shim/v1/proc/deleted_state.go | 12 +- pkg/shim/v1/proc/exec.go | 25 +- pkg/shim/v1/proc/exec_state.go | 16 +- pkg/shim/v1/proc/init.go | 80 ++-- pkg/shim/v1/proc/init_state.go | 18 +- pkg/shim/v1/proc/io.go | 5 +- pkg/shim/v1/proc/process.go | 6 +- pkg/shim/v1/proc/types.go | 10 +- pkg/shim/v1/proc/utils.go | 2 - pkg/shim/v1/shim/BUILD | 38 ++ pkg/shim/v1/shim/platform.go | 10 +- pkg/shim/v1/shim/service.go | 68 ++- pkg/shim/v1/utils/BUILD | 26 ++ pkg/shim/v1/utils/volumes.go | 41 +- pkg/shim/v2/BUILD | 41 ++ pkg/shim/v2/epoll.go | 6 +- pkg/shim/v2/options/BUILD | 11 + pkg/shim/v2/service.go | 115 ++--- pkg/shim/v2/service_linux.go | 10 +- pkg/test/criutil/criutil.go | 66 ++- runsc/BUILD | 12 +- runsc/debian/postinst.sh | 9 +- shim/BUILD | 15 + shim/README.md | 21 +- shim/configure-containerd-shim-runsc-v1.md | 72 ---- shim/configure-gvisor-containerd-shim.md | 42 -- shim/runsc.toml | 6 + shim/runtime-handler-quickstart.md | 251 ----------- shim/runtime-handler-shim-v2-quickstart.md | 232 ---------- shim/untrusted-workload-quickstart.md | 212 --------- shim/v1/BUILD | 43 ++ shim/v1/README.md | 50 +++ shim/v1/config.go | 40 ++ shim/v1/main.go | 247 ++++++++++- shim/v2/BUILD | 32 ++ shim/v2/README.md | 90 ++++ shim/v2/config.go | 40 -- shim/v2/main.go | 298 +------------ shim/v2/runtime-handler-shim-v2-quickstart.md | 232 ++++++++++ test/root/BUILD | 5 +- test/root/crictl_test.go | 452 ++++++++++++------- test/shim/containerd-install.sh | 44 -- test/shim/crictl-install.sh | 17 - test/shim/run-container.sh | 30 -- test/shim/runsc-install.sh | 8 - test/shim/runtime-handler-shim-v2/install.sh | 21 - test/shim/runtime-handler-shim-v2/test.sh | 34 -- test/shim/runtime-handler-shim-v2/validate.sh | 7 - test/shim/runtime-handler/install.sh | 24 -- test/shim/runtime-handler/test.sh | 33 -- test/shim/runtime-handler/usage.sh | 30 -- test/shim/runtimeclass-install.sh | 33 -- test/shim/shim-install.sh | 28 -- test/shim/untrusted-workload/install.sh | 27 -- test/shim/untrusted-workload/test.sh | 33 -- test/shim/untrusted-workload/usage.sh | 33 -- test/shim/validate.sh | 17 - tools/bazel.mk | 2 +- tools/installers/BUILD | 10 +- tools/installers/containerd.sh | 114 +++++ tools/installers/head.sh | 8 +- tools/vm/ubuntu1604/25_docker.sh | 65 --- tools/vm/ubuntu1604/30_containerd.sh | 86 ---- tools/vm/ubuntu1604/30_docker.sh | 65 +++ website/BUILD | 3 + 75 files changed, 2503 insertions(+), 2197 deletions(-) create mode 100644 g3doc/user_guide/runtimeclass.md create mode 100644 pkg/shim/runsc/BUILD create mode 100644 pkg/shim/v1/proc/BUILD create mode 100644 pkg/shim/v1/shim/BUILD create mode 100644 pkg/shim/v1/utils/BUILD create mode 100644 pkg/shim/v2/BUILD create mode 100644 pkg/shim/v2/options/BUILD create mode 100644 shim/BUILD delete mode 100644 shim/configure-containerd-shim-runsc-v1.md delete mode 100644 shim/configure-gvisor-containerd-shim.md create mode 100644 shim/runsc.toml delete mode 100644 shim/runtime-handler-quickstart.md delete mode 100644 shim/runtime-handler-shim-v2-quickstart.md delete mode 100644 shim/untrusted-workload-quickstart.md create mode 100644 shim/v1/BUILD create mode 100644 shim/v1/README.md create mode 100644 shim/v1/config.go create mode 100644 shim/v2/BUILD create mode 100644 shim/v2/README.md delete mode 100644 shim/v2/config.go create mode 100644 shim/v2/runtime-handler-shim-v2-quickstart.md delete mode 100755 test/shim/containerd-install.sh delete mode 100755 test/shim/crictl-install.sh delete mode 100755 test/shim/run-container.sh delete mode 100755 test/shim/runsc-install.sh delete mode 100755 test/shim/runtime-handler-shim-v2/install.sh delete mode 100755 test/shim/runtime-handler-shim-v2/test.sh delete mode 100755 test/shim/runtime-handler-shim-v2/validate.sh delete mode 100755 test/shim/runtime-handler/install.sh delete mode 100755 test/shim/runtime-handler/test.sh delete mode 100755 test/shim/runtime-handler/usage.sh delete mode 100755 test/shim/runtimeclass-install.sh delete mode 100755 test/shim/shim-install.sh delete mode 100755 test/shim/untrusted-workload/install.sh delete mode 100755 test/shim/untrusted-workload/test.sh delete mode 100755 test/shim/untrusted-workload/usage.sh delete mode 100755 test/shim/validate.sh create mode 100755 tools/installers/containerd.sh delete mode 100755 tools/vm/ubuntu1604/25_docker.sh delete mode 100755 tools/vm/ubuntu1604/30_containerd.sh create mode 100755 tools/vm/ubuntu1604/30_docker.sh (limited to 'test') diff --git a/BUILD b/BUILD index 962d54821..5d0dbde4c 100644 --- a/BUILD +++ b/BUILD @@ -69,7 +69,10 @@ go_path( name = "gopath", mode = "link", deps = [ + # Main binary. "//runsc", + "//shim/v1:gvisor-containerd-shim", + "//shim/v2:containerd-shim-runsc-v1", # Packages that are not dependencies of //runsc. "//pkg/sentry/kernel/memevent", diff --git a/Makefile b/Makefile index 85818ebea..58648f5d2 100644 --- a/Makefile +++ b/Makefile @@ -121,6 +121,22 @@ tests: ## Runs all local ptrace system call tests. @$(MAKE) test OPTIONS="--test_tag_filters runsc_ptrace test/syscalls/..." .PHONY: tests +containerd-test-%: ## Runs a local containerd test. +containerd-test-%: load-basic_alpine load-basic_python load-basic_busybox load-basic_resolv load-basic_httpd +containerd-test-%: install-test-runtime + @CONTAINERD_VERSION=$* $(MAKE) sudo TARGETS="tools/installers:containerd" + @$(MAKE) sudo TARGETS="test/root:root_test" ARGS="-test.v" + +# Note that we can't run containerd-test-1.1.8 tests here. +# +# Containerd 1.1.8 should work, but because of a bug in loading images locally +# (https://github.com/kubernetes-sigs/cri-tools/issues/421), we are unable to +# actually drive the tests. The v1 API is tested exclusively through 1.2.13. +containerd-tests: ## Runs all supported containerd version tests. +containerd-tests: containerd-test-1.2.13 +containerd-tests: containerd-test-1.3.4 +containerd-tests: containerd-test-1.4.0-beta.0 + ## ## Website & documentation helpers. ## @@ -233,11 +249,14 @@ dev: ## Installs a set of local runtimes. Requires sudo. refresh: ## Refreshes the runtime binary (for development only). Must have called 'dev' or 'test-install' first. @mkdir -p "$(RUNTIME_DIR)" - @$(MAKE) copy TARGETS=runsc DESTINATION="$(RUNTIME_BIN)" && chmod 0755 "$(RUNTIME_BIN)" + @$(MAKE) copy TARGETS=runsc DESTINATION="$(RUNTIME_BIN)" + @$(MAKE) copy TARGETS=shim/v1:gvisor-containerd-shim DESTINATION="$(RUNTIME_DIR)" + @$(MAKE) copy TARGETS=shim/v2:containerd-shim-runsc-v1 DESTINATION="$(RUNTIME_DIR)" .PHONY: install -test-install: ## Installs the runtime for testing. Requires sudo. +install-test-runtime: ## Installs the runtime for testing. Requires sudo. @$(MAKE) refresh ARGS="--net-raw --TESTONLY-test-name-env=RUNSC_TEST_NAME --debug --strace --log-packets $(ARGS)" + @$(MAKE) configure RUNTIME=runsc @$(MAKE) configure @sudo systemctl restart docker .PHONY: install-test diff --git a/WORKSPACE b/WORKSPACE index 417ec6100..b3e97b0cc 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -4,11 +4,11 @@ load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") # Bazel/starlark utilities. http_archive( name = "bazel_skylib", + sha256 = "97e70364e9249702246c0e9444bccdc4b847bed1eb03c5a3ece4f83dfe6abc44", urls = [ "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.0.2/bazel-skylib-1.0.2.tar.gz", "https://github.com/bazelbuild/bazel-skylib/releases/download/1.0.2/bazel-skylib-1.0.2.tar.gz", ], - sha256 = "97e70364e9249702246c0e9444bccdc4b847bed1eb03c5a3ece4f83dfe6abc44", ) load("@bazel_skylib//:workspace.bzl", "bazel_skylib_workspace") @@ -159,7 +159,38 @@ load("@com_github_grpc_grpc//bazel:grpc_extra_deps.bzl", "grpc_extra_deps") grpc_extra_deps() -# External repositories, in sorted order. +# System Call test dependencies. +http_archive( + name = "com_google_absl", + sha256 = "56775f1283a59e6274c28d99981a9717ff4e0b1161e9129fdb2fcf22531d8d93", + strip_prefix = "abseil-cpp-a0d1e098c2f99694fa399b175a7ccf920762030e", + urls = [ + "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/a0d1e098c2f99694fa399b175a7ccf920762030e.tar.gz", + "https://github.com/abseil/abseil-cpp/archive/a0d1e098c2f99694fa399b175a7ccf920762030e.tar.gz", + ], +) + +http_archive( + name = "com_google_googletest", + sha256 = "0a10bea96d8670e5eef948d79d824162b1577bb7889539e49ec786bfc3e48912", + strip_prefix = "googletest-565f1b848215b77c3732bca345fe76a0431d8b34", + urls = [ + "https://mirror.bazel.build/github.com/google/googletest/archive/565f1b848215b77c3732bca345fe76a0431d8b34.tar.gz", + "https://github.com/google/googletest/archive/565f1b848215b77c3732bca345fe76a0431d8b34.tar.gz", + ], +) + +http_archive( + name = "com_google_benchmark", + sha256 = "3c6a165b6ecc948967a1ead710d4a181d7b0fbcaa183ef7ea84604994966221a", + strip_prefix = "benchmark-1.5.0", + urls = [ + "https://mirror.bazel.build/github.com/google/benchmark/archive/v1.5.0.tar.gz", + "https://github.com/google/benchmark/archive/v1.5.0.tar.gz", + ], +) + +# Go repositories, in gazelle order. go_repository( name = "com_github_cenkalti_backoff", importpath = "github.com/cenkalti/backoff", @@ -177,15 +208,15 @@ go_repository( go_repository( name = "com_github_golang_mock", importpath = "github.com/golang/mock", - sum = "h1:qGJ6qTW+x6xX/my+8YUVl4WNpX9B7+/l2tRsHGZ7f2s=", - version = "v1.3.1", + sum = "h1:G5FRp8JnTd7RQH5kemVNlMeyXQAztQ3mOWV95KxsXH8=", + version = "v1.1.1", ) go_repository( name = "com_github_google_go-cmp", importpath = "github.com/google/go-cmp", - sum = "h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ=", - version = "v0.2.0", + sum = "h1:Xye71clBPdm5HgqGwUkwhbynsUJZhDbS20FvLhQ2izg=", + version = "v0.3.1", ) go_repository( @@ -232,8 +263,8 @@ go_repository( go_repository( name = "com_github_opencontainers_runtime-spec", importpath = "github.com/opencontainers/runtime-spec", - sum = "h1:d9F+LNYwMyi3BDN4GzZdaSiq4otb8duVEWyZjeUtOQI=", - version = "v0.1.2-0.20171211145439-b2d941ef6a78", + sum = "h1:UfAcuLBJB9Coz72x1hgl8O5RVzTdNiaglX6v2DM6FI0=", + version = "v1.0.2", ) go_repository( @@ -261,8 +292,8 @@ go_repository( name = "org_golang_google_grpc", build_file_proto_mode = "disable", importpath = "google.golang.org/grpc", - sum = "h1:zvIju4sqAGvwKspUQOhwnpcqSbzi7/H6QomNNjTL4sk=", - version = "v1.27.1", + sum = "h1:Mm8atZtkT+P6R43n/dqNDWkPPu5BwRVu/1rJnJCeZH8=", + version = "v1.12.0", ) go_repository( @@ -289,8 +320,8 @@ go_repository( go_repository( name = "org_golang_x_net", importpath = "golang.org/x/net", - sum = "h1:R/3boaszxrf1GEUWTVDzSKVwLmSJpwZ1yqXm8j0v2QI=", - version = "v0.0.0-20190620200207-3b0461eec859", + sum = "h1:FCqk7JXVeupwwnGVopQCC0a0xRK0Rj7SL5AyjjWo4pk=", + version = "v0.0.0-20170716174642-b3756b4b77d7", ) go_repository( @@ -303,8 +334,8 @@ go_repository( go_repository( name = "org_golang_x_text", importpath = "golang.org/x/text", - sum = "h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=", - version = "v0.3.0", + sum = "h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=", + version = "v0.3.2", ) go_repository( @@ -317,8 +348,8 @@ go_repository( go_repository( name = "org_golang_x_tools", importpath = "golang.org/x/tools", - sum = "h1:Uglradbb4KfUWaYasZhlsDsGRwHHvRsHoNAEONef0W8=", - version = "v0.0.0-20200131233409-575de47986ce", + sum = "h1:NIou6eNFigscvKJmsbyez16S2cIS6idossORlFtSt2E=", + version = "v0.0.0-20181030221726-6c7e314b6563", ) go_repository( @@ -352,8 +383,8 @@ go_repository( go_repository( name = "org_golang_x_oauth2", importpath = "golang.org/x/oauth2", - sum = "h1:pE8b58s1HRDMi8RDc79m0HISf9D4TzseP40cEA6IGfs=", - version = "v0.0.0-20191202225959-858c2ad4c8b6", + sum = "h1:vEDujvNQGv4jgYKudGeI/+DAX4Jffq6hpD55MmoEvKs=", + version = "v0.0.0-20180821212333-d2e6202438be", ) go_repository( @@ -497,12 +528,11 @@ go_repository( version = "v1.5.0", ) -# BigQuery Dependencies for Benchmarks go_repository( name = "com_google_cloud_go", importpath = "cloud.google.com/go", - sum = "h1:eoz/lYxKSL4CNAiaUJ0ZfD1J3bfMYbU5B3rwM1C1EIU=", - version = "v0.55.0", + sum = "h1:e0WKqKTd5BnrG8aKH3J3h+QvEIQtSUcf2n5UZ5ZgLtQ=", + version = "v0.26.0", ) go_repository( @@ -515,8 +545,8 @@ go_repository( go_repository( name = "io_opencensus_go", importpath = "go.opencensus.io", - sum = "h1:8sGtKOrtQqkN1bp2AtX+misvLIlOmsEsNd+9NIcPEm8=", - version = "v0.22.3", + sum = "h1:C9hSCOW830chIVkdja34wa6Ky+IzWllkUinR+BtRZd4=", + version = "v0.22.0", ) go_repository( @@ -526,34 +556,505 @@ go_repository( version = "v0.0.0-20200121045136-8c9f03a8e57e", ) -# System Call test dependencies. -http_archive( - name = "com_google_absl", - sha256 = "56775f1283a59e6274c28d99981a9717ff4e0b1161e9129fdb2fcf22531d8d93", - strip_prefix = "abseil-cpp-a0d1e098c2f99694fa399b175a7ccf920762030e", - urls = [ - "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/a0d1e098c2f99694fa399b175a7ccf920762030e.tar.gz", - "https://github.com/abseil/abseil-cpp/archive/a0d1e098c2f99694fa399b175a7ccf920762030e.tar.gz", - ], +go_repository( + name = "com_github_census_instrumentation_opencensus_proto", + importpath = "github.com/census-instrumentation/opencensus-proto", + sum = "h1:glEXhBS5PSLLv4IXzLA5yPRVX4bilULVyxxbrfOtDAk=", + version = "v0.2.1", ) -http_archive( - name = "com_google_googletest", - sha256 = "0a10bea96d8670e5eef948d79d824162b1577bb7889539e49ec786bfc3e48912", - strip_prefix = "googletest-565f1b848215b77c3732bca345fe76a0431d8b34", - urls = [ - "https://mirror.bazel.build/github.com/google/googletest/archive/565f1b848215b77c3732bca345fe76a0431d8b34.tar.gz", - "https://github.com/google/googletest/archive/565f1b848215b77c3732bca345fe76a0431d8b34.tar.gz", - ], +go_repository( + name = "com_github_client9_misspell", + importpath = "github.com/client9/misspell", + sum = "h1:ta993UF76GwbvJcIo3Y68y/M3WxlpEHPWIGDkJYwzJI=", + version = "v0.3.4", ) -http_archive( - name = "com_google_benchmark", - sha256 = "3c6a165b6ecc948967a1ead710d4a181d7b0fbcaa183ef7ea84604994966221a", - strip_prefix = "benchmark-1.5.0", - urls = [ - "https://mirror.bazel.build/github.com/google/benchmark/archive/v1.5.0.tar.gz", - "https://github.com/google/benchmark/archive/v1.5.0.tar.gz", - ], +go_repository( + name = "com_github_cncf_udpa_go", + importpath = "github.com/cncf/udpa/go", + sum = "h1:WBZRG4aNOuI15bLRrCgN8fCq8E5Xuty6jGbmSNEvSsU=", + version = "v0.0.0-20191209042840-269d4d468f6f", +) + +# K8s must be pinned to 1.14, prior to APIs being split into separate +# repositories. The containerd versions below assume the existence of the cri +# API within the kubelet package. +go_repository( + name = "io_k8s_kubernetes", + importpath = "k8s.io/kubernetes", + sum = "h1:6T2iAEoOYQnzQb3WvPlUkcczEEXZ7+YPlAO8olwujRw=", + version = "v1.14.0", +) + +go_repository( + name = "com_github_containerd_cgroups", + build_file_proto_mode = "disable", + importpath = "github.com/containerd/cgroups", + sum = "h1:5/YbYIVboEqSpFSC/iMO9FOIP/q8RIuKfYS2TA1M8WE=", + version = "v0.0.0-20200520162414-666f4a009ffb", +) + +go_repository( + name = "com_github_containerd_console", + build_file_proto_mode = "disable", + importpath = "github.com/containerd/console", + sum = "h1:uict5mhHFTzKLUCufdSLym7z/J0CbBJT59lYbP9wtbg=", + version = "v0.0.0-20180822173158-c12b1e7919c1", +) + +go_repository( + name = "com_github_containerd_containerd", + build_file_proto_mode = "disable", + importpath = "github.com/containerd/containerd", + sum = "h1:BmZa1bGjKctYrIbyjbhZJlGvHceJASpdW5pIDSQcw1E=", + version = "v0.0.0-20190510190154-d0319ec44af6", +) + +go_repository( + name = "com_github_containerd_continuity", + build_file_proto_mode = "disable", + importpath = "github.com/containerd/continuity", + sum = "h1:tN9D97v5A5QuKdcKHKt+UMKrkQ5YXUnD8iM7IAAjEfI=", + version = "v0.0.0-20190815185530-f2a389ac0a02", +) + +go_repository( + name = "com_github_containerd_cri", + build_file_proto_mode = "disable", + importpath = "github.com/containerd/cri", + sum = "h1:+bW7GQb2q32/Liy0ZiR6pkpRXdDHShUXRoWg8OGVWZs=", + version = "v0.0.0-20190308093238-8a0bd84b9a4c", +) + +go_repository( + name = "com_github_containerd_fifo", + build_file_proto_mode = "disable", + importpath = "github.com/containerd/fifo", + sum = "h1:XGyg7oTtD0DoRFhbpV6x1WfV0flKC4UxXU7ab1zC08U=", + version = "v0.0.0-20180307165137-3d5202aec260", +) + +go_repository( + name = "com_github_containerd_go_runc", + build_file_proto_mode = "disable", + importpath = "github.com/containerd/go-runc", + sum = "h1:esQOJREg8nw8aXj6uCN5dfW5cKUBiEJ/+nni1Q/D/sw=", + version = "v0.0.0-20180907222934-5a6d9f37cfa3", +) + +go_repository( + name = "com_github_containerd_ttrpc", + build_file_proto_mode = "disable", + importpath = "github.com/containerd/ttrpc", + sum = "h1:SKDlsIhYxNE1LO0xwuOR+3QWj3zRibVQu5jWIMQmOfU=", + version = "v0.0.0-20190411181408-699c4e40d1e7", +) + +go_repository( + name = "com_github_containerd_typeurl", + build_file_proto_mode = "disable", + importpath = "github.com/containerd/typeurl", + sum = "h1:JNn81o/xG+8NEo3bC/vx9pbi/g2WI8mtP2/nXzu297Y=", + version = "v0.0.0-20180627222232-a93fcdb778cd", +) + +go_repository( + name = "com_github_coreos_go_systemd", + importpath = "github.com/coreos/go-systemd", + sum = "h1:iW4rZ826su+pqaw19uhpSCzhj44qo35pNgKFGqzDKkU=", + version = "v0.0.0-20191104093116-d3cd4ed1dbcf", +) + +go_repository( + name = "com_github_davecgh_go_spew", + importpath = "github.com/davecgh/go-spew", + sum = "h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=", + version = "v1.1.1", +) + +go_repository( + name = "com_github_docker_distribution", + importpath = "github.com/docker/distribution", + sum = "h1:a5mlkVzth6W5A4fOsS3D2EO5BUmsJpcB+cRlLU7cSug=", + version = "v2.7.1+incompatible", +) + +go_repository( + name = "com_github_docker_go_events", + importpath = "github.com/docker/go-events", + sum = "h1:+pKlWGMw7gf6bQ+oDZB4KHQFypsfjYlq/C4rfL7D3g8=", + version = "v0.0.0-20190806004212-e31b211e4f1c", +) + +go_repository( + name = "com_github_docker_go_units", + importpath = "github.com/docker/go-units", + sum = "h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw=", + version = "v0.4.0", +) + +go_repository( + name = "com_github_dustin_go_humanize", + importpath = "github.com/dustin/go-humanize", + sum = "h1:qk/FSDDxo05wdJH28W+p5yivv7LuLYLRXPPD8KQCtZs=", + version = "v0.0.0-20171111073723-bb3d318650d4", +) + +go_repository( + name = "com_github_envoyproxy_go_control_plane", + importpath = "github.com/envoyproxy/go-control-plane", + sum = "h1:rEvIZUSZ3fx39WIi3JkQqQBitGwpELBIYWeBVh6wn+E=", + version = "v0.9.4", +) + +go_repository( + name = "com_github_envoyproxy_protoc_gen_validate", + importpath = "github.com/envoyproxy/protoc-gen-validate", + sum = "h1:EQciDnbrYxy13PgWoY8AqoxGiPrpgBZ1R8UNe3ddc+A=", + version = "v0.1.0", +) + +go_repository( + name = "com_github_fsnotify_fsnotify", + importpath = "github.com/fsnotify/fsnotify", + sum = "h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=", + version = "v1.4.7", +) + +go_repository( + name = "com_github_godbus_dbus", + importpath = "github.com/godbus/dbus", + sum = "h1:ZpnhV/YsD2/4cESfV5+Hoeu/iUR3ruzNvZ+yQfO03a0=", + version = "v0.0.0-20190726142602-4481cbc300e2", +) + +go_repository( + name = "com_github_gogo_googleapis", + importpath = "github.com/gogo/googleapis", + sum = "h1:zgVt4UpGxcqVOw97aRGxT4svlcmdK35fynLNctY32zI=", + version = "v1.4.0", +) + +go_repository( + name = "com_github_gogo_protobuf", + importpath = "github.com/gogo/protobuf", + sum = "h1:DqDEcV5aeaTmdFBePNpYsp3FlcVH/2ISVVM9Qf8PSls=", + version = "v1.3.1", +) + +go_repository( + name = "com_github_golang_glog", + importpath = "github.com/golang/glog", + sum = "h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58=", + version = "v0.0.0-20160126235308-23def4e6c14b", +) + +go_repository( + name = "com_github_hashicorp_golang_lru", + importpath = "github.com/hashicorp/golang-lru", + sum = "h1:0hERBMJE1eitiLkihrMvRVBYAkpHzc/J3QdDN+dAcgU=", + version = "v0.5.1", +) + +go_repository( + name = "com_github_hpcloud_tail", + importpath = "github.com/hpcloud/tail", + sum = "h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=", + version = "v1.0.0", +) + +go_repository( + name = "com_github_inconshreveable_mousetrap", + importpath = "github.com/inconshreveable/mousetrap", + sum = "h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=", + version = "v1.0.0", +) + +go_repository( + name = "com_github_kisielk_errcheck", + importpath = "github.com/kisielk/errcheck", + sum = "h1:reN85Pxc5larApoH1keMBiu2GWtPqXQ1nc9gx+jOU+E=", + version = "v1.2.0", +) + +go_repository( + name = "com_github_kisielk_gotool", + importpath = "github.com/kisielk/gotool", + sum = "h1:AV2c/EiW3KqPNT9ZKl07ehoAGi4C5/01Cfbblndcapg=", + version = "v1.0.0", +) + +go_repository( + name = "com_github_konsorten_go_windows_terminal_sequences", + importpath = "github.com/konsorten/go-windows-terminal-sequences", + sum = "h1:DB17ag19krx9CFsz4o3enTrPXyIXCl+2iCXH/aMAp9s=", + version = "v1.0.2", +) + +go_repository( + name = "com_github_microsoft_go_winio", + importpath = "github.com/Microsoft/go-winio", + sum = "h1:+hMXMk01us9KgxGb7ftKQt2Xpf5hH/yky+TDA+qxleU=", + version = "v0.4.14", +) + +go_repository( + name = "com_github_microsoft_hcsshim", + importpath = "github.com/Microsoft/hcsshim", + sum = "h1:ZfF0+zZeYdzMIVMZHKtDKJvLHj76XCuVae/jNkjj0IA=", + version = "v0.8.6", +) + +go_repository( + name = "com_github_onsi_ginkgo", + importpath = "github.com/onsi/ginkgo", + sum = "h1:q/mM8GF/n0shIN8SaAZ0V+jnLPzen6WIVZdiwrRlMlo=", + version = "v1.10.1", +) + +go_repository( + name = "com_github_onsi_gomega", + importpath = "github.com/onsi/gomega", + sum = "h1:XPnZz8VVBHjVsy1vzJmRwIcSwiUO+JFfrv/xGiigmME=", + version = "v1.7.0", +) + +go_repository( + name = "com_github_opencontainers_go_digest", + importpath = "github.com/opencontainers/go-digest", + sum = "h1:QhPf3A2AZW3tTGvHPg0TA+CR3oHbVLlXUhlghqISp1I=", + version = "v0.0.0-20180430190053-c9281466c8b2", +) + +go_repository( + name = "com_github_opencontainers_image_spec", + importpath = "github.com/opencontainers/image-spec", + sum = "h1:JMemWkRwHx4Zj+fVxWoMCFm/8sYGGrUVojFA6h/TRcI=", + version = "v1.0.1", +) + +go_repository( + name = "com_github_opencontainers_runc", + importpath = "github.com/opencontainers/runc", + sum = "h1:dDCFes8Hj1r/i5qnypONo5jdOme/8HWZC/aNDyhECt0=", + version = "v1.0.0-rc8", +) + +go_repository( + name = "com_github_pkg_errors", + importpath = "github.com/pkg/errors", + sum = "h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=", + version = "v0.9.1", +) + +go_repository( + name = "com_github_pmezard_go_difflib", + importpath = "github.com/pmezard/go-difflib", + sum = "h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=", + version = "v1.0.0", +) + +go_repository( + name = "com_github_prometheus_client_model", + importpath = "github.com/prometheus/client_model", + sum = "h1:gQz4mCbXsO+nc9n1hCxHcGA3Zx3Eo+UHZoInFGUIXNM=", + version = "v0.0.0-20190812154241-14fe0d1b01d4", +) + +go_repository( + name = "com_github_prometheus_procfs", + importpath = "github.com/prometheus/procfs", + sum = "h1:hhvfGDVThBnd4kYisSFmYuHYeUhglxcwag7FhVPH9zM=", + version = "v0.0.0-20180125133057-cb4147076ac7", +) + +go_repository( + name = "com_github_sirupsen_logrus", + importpath = "github.com/sirupsen/logrus", + sum = "h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4=", + version = "v1.4.2", +) + +go_repository( + name = "com_github_spf13_cobra", + importpath = "github.com/spf13/cobra", + sum = "h1:GQkkv3XSnxhAMjdq2wLfEnptEVr+2BNvmHizILHn+d4=", + version = "v0.0.2-0.20171109065643-2da4a54c5cee", +) + +go_repository( + name = "com_github_spf13_pflag", + importpath = "github.com/spf13/pflag", + sum = "h1:j8jxLbQ0+T1DFggy6XoGvyUnrJWPR/JybflPvu5rwS4=", + version = "v1.0.1-0.20171106142849-4c012f6dcd95", +) + +go_repository( + name = "com_github_stretchr_objx", + importpath = "github.com/stretchr/objx", + sum = "h1:2vfRuCMp5sSVIDSqO8oNnWJq7mPa6KVP3iPIwFBuy8A=", + version = "v0.1.1", +) + +go_repository( + name = "com_github_stretchr_testify", + importpath = "github.com/stretchr/testify", + sum = "h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=", + version = "v1.2.2", +) + +go_repository( + name = "com_github_urfave_cli", + importpath = "github.com/urfave/cli", + sum = "h1:gsqYFH8bb9ekPA12kRo0hfjngWQjkJPlN9R0N78BoUo=", + version = "v1.22.2", +) + +go_repository( + name = "in_gopkg_airbrake_gobrake_v2", + importpath = "gopkg.in/airbrake/gobrake.v2", + sum = "h1:7z2uVWwn7oVeeugY1DtlPAy5H+KYgB1KeKTnqjNatLo=", + version = "v2.0.9", +) + +go_repository( + name = "in_gopkg_fsnotify_v1", + importpath = "gopkg.in/fsnotify.v1", + sum = "h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=", + version = "v1.4.7", +) + +go_repository( + name = "in_gopkg_gemnasium_logrus_airbrake_hook_v2", + importpath = "gopkg.in/gemnasium/logrus-airbrake-hook.v2", + sum = "h1:OAj3g0cR6Dx/R07QgQe8wkA9RNjB2u4i700xBkIT4e0=", + version = "v2.1.2", +) + +go_repository( + name = "in_gopkg_tomb_v1", + importpath = "gopkg.in/tomb.v1", + sum = "h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=", + version = "v1.0.0-20141024135613-dd632973f1e7", +) + +go_repository( + name = "in_gopkg_yaml_v2", + importpath = "gopkg.in/yaml.v2", + sum = "h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=", + version = "v2.2.2", +) + +go_repository( + name = "org_bazil_fuse", + importpath = "bazil.org/fuse", + sum = "h1:SC+c6A1qTFstO9qmB86mPV2IpYme/2ZoEQ0hrP+wo+Q=", + version = "v0.0.0-20160811212531-371fbbdaa898", +) + +go_repository( + name = "org_golang_google_appengine", + importpath = "google.golang.org/appengine", + sum = "h1:/wp5JvzpHIxhs/dumFmF7BXTf3Z+dd4uXta4kVyO508=", + version = "v1.4.0", +) + +go_repository( + name = "org_golang_google_genproto", + importpath = "google.golang.org/genproto", + sum = "h1:wVJP1pATLVPNxCz4R2mTO6HUJgfGE0PmIu2E10RuhCw=", + version = "v0.0.0-20170523043604-d80a6e20e776", +) + +go_repository( + name = "org_golang_x_exp", + importpath = "golang.org/x/exp", + sum = "h1:c2HOrn5iMezYjSlGPncknSEr/8x5LELb/ilJbXi9DEA=", + version = "v0.0.0-20190121172915-509febef88a4", +) + +go_repository( + name = "org_golang_x_lint", + importpath = "golang.org/x/lint", + sum = "h1:XQyxROzUlZH+WIQwySDgnISgOivlhjIEwaQaJEJrrN0=", + version = "v0.0.0-20190313153728-d0100b6bd8b3", +) + +go_repository( + name = "tools_gotest", + importpath = "gotest.tools", + sum = "h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo=", + version = "v2.2.0+incompatible", +) + +go_repository( + name = "co_honnef_go_tools", + importpath = "honnef.co/go/tools", + sum = "h1:/hemPrYIhOhy8zYrNj+069zDB68us2sMGsfkFJO0iZs=", + version = "v0.0.0-20190523083050-ea95bdfd59fc", +) + +go_repository( + name = "com_github_burntsushi_toml", + importpath = "github.com/BurntSushi/toml", + sum = "h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=", + version = "v0.3.1", +) + +go_repository( + name = "io_k8s_cri_api", + importpath = "k8s.io/cri-api", + sum = "h1:bykYbClh5Bnjo2EMjlYbYQ3ksxHjjLcbriKPm831hVk=", + version = "v0.18.2", +) + +go_repository( + name = "com_github_docker_docker", + importpath = "github.com/docker/docker", + sum = "h1:Bh3QS4GYuVi8QeNskrV3ivn8p0bupmk0PfY4xmVulo4=", + version = "v17.12.0-ce-rc1.0.20200514230353-811a247d06e8+incompatible", +) + +go_repository( + name = "com_github_cilium_ebpf", + importpath = "github.com/cilium/ebpf", + sum = "h1:i8+1fuPLjSgAYXUyBlHNhFwjcfAsP4ufiuH1+PWkyDU=", + version = "v0.0.0-20200110133405-4032b1d8aae3", +) + +go_repository( + name = "com_github_coreos_go_systemd_v22", + importpath = "github.com/coreos/go-systemd/v22", + sum = "h1:XJIw/+VlJ+87J+doOxznsAWIdmWuViOVhkQamW5YV28=", + version = "v22.0.0", +) + +go_repository( + name = "com_github_cpuguy83_go_md2man_v2", + importpath = "github.com/cpuguy83/go-md2man/v2", + sum = "h1:EoUDS0afbrsXAZ9YQ9jdu/mZ2sXgT1/2yyNng4PGlyM=", + version = "v2.0.0", +) + +go_repository( + name = "com_github_godbus_dbus_v5", + importpath = "github.com/godbus/dbus/v5", + sum = "h1:ZqHaoEF7TBzh4jzPmqVhE/5A1z9of6orkAe5uHoAeME=", + version = "v5.0.3", +) + +go_repository( + name = "com_github_russross_blackfriday_v2", + importpath = "github.com/russross/blackfriday/v2", + sum = "h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q=", + version = "v2.0.1", +) + +go_repository( + name = "com_github_shurcool_sanitized_anchor_name", + importpath = "github.com/shurcooL/sanitized_anchor_name", + sum = "h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=", + version = "v1.0.0", ) diff --git a/g3doc/user_guide/BUILD b/g3doc/user_guide/BUILD index b69aee12c..355dd49b3 100644 --- a/g3doc/user_guide/BUILD +++ b/g3doc/user_guide/BUILD @@ -68,3 +68,12 @@ doc( permalink = "/docs/user_guide/platforms/", weight = "30", ) + +doc( + name = "runtimeclass", + src = "runtimeclass.md", + category = "User Guide", + permalink = "/docs/user_guide/runtimeclass/", + subcategory = "Advanced", + weight = "91", +) diff --git a/g3doc/user_guide/runtimeclass.md b/g3doc/user_guide/runtimeclass.md new file mode 100644 index 000000000..9f2d794c3 --- /dev/null +++ b/g3doc/user_guide/runtimeclass.md @@ -0,0 +1,44 @@ +# RuntimeClass + +First, follow the appropriate installation instructions for your version of +containerd. + +* For 1.1 or lower, use `gvisor-containerd-shim`. +* For 1.2 or higher, use `containerd-shim-runsc-v1`. + +# Set up the Kubernetes RuntimeClass + +Creating the RuntimeClass in kubernetes is simple once the runtime is available +for containerd: + +```shell +cat <: -// +// runOrError will run the provided command. +// +// If an error is encountered and neither Stdout or Stderr was set the error +// will be returned in the format of : . func (r *Runsc) runOrError(cmd *exec.Cmd) error { if cmd.Stdout != nil || cmd.Stderr != nil { ec, err := Monitor.Start(cmd) diff --git a/pkg/shim/v1/proc/BUILD b/pkg/shim/v1/proc/BUILD new file mode 100644 index 000000000..a59bf198d --- /dev/null +++ b/pkg/shim/v1/proc/BUILD @@ -0,0 +1,35 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "proc", + srcs = [ + "deleted_state.go", + "exec.go", + "exec_state.go", + "init.go", + "init_state.go", + "io.go", + "process.go", + "types.go", + "utils.go", + ], + visibility = [ + "//pkg/shim:__subpackages__", + "//shim:__subpackages__", + ], + deps = [ + "//pkg/shim/runsc", + "@com_github_containerd_console//:go_default_library", + "@com_github_containerd_containerd//errdefs:go_default_library", + "@com_github_containerd_containerd//log:go_default_library", + "@com_github_containerd_containerd//mount:go_default_library", + "@com_github_containerd_containerd//runtime/proc:go_default_library", + "@com_github_containerd_fifo//:go_default_library", + "@com_github_containerd_go_runc//:go_default_library", + "@com_github_gogo_protobuf//types:go_default_library", + "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", + "@org_golang_x_sys//unix:go_default_library", + ], +) diff --git a/pkg/shim/v1/proc/deleted_state.go b/pkg/shim/v1/proc/deleted_state.go index 0196c96dd..52aad40ac 100644 --- a/pkg/shim/v1/proc/deleted_state.go +++ b/pkg/shim/v1/proc/deleted_state.go @@ -17,33 +17,33 @@ package proc import ( "context" + "fmt" "github.com/containerd/console" "github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/runtime/proc" - "github.com/pkg/errors" ) type deletedState struct{} func (*deletedState) Resize(ws console.WinSize) error { - return errors.Errorf("cannot resize a deleted process") + return fmt.Errorf("cannot resize a deleted process") } func (*deletedState) Start(ctx context.Context) error { - return errors.Errorf("cannot start a deleted process") + return fmt.Errorf("cannot start a deleted process") } func (*deletedState) Delete(ctx context.Context) error { - return errors.Wrap(errdefs.ErrNotFound, "cannot delete a deleted process") + return fmt.Errorf("cannot delete a deleted process: %w", errdefs.ErrNotFound) } func (*deletedState) Kill(ctx context.Context, sig uint32, all bool) error { - return errors.Wrap(errdefs.ErrNotFound, "cannot kill a deleted process") + return fmt.Errorf("cannot kill a deleted process: %w", errdefs.ErrNotFound) } func (*deletedState) SetExited(status int) {} func (*deletedState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { - return nil, errors.Errorf("cannot exec in a deleted state") + return nil, fmt.Errorf("cannot exec in a deleted state") } diff --git a/pkg/shim/v1/proc/exec.go b/pkg/shim/v1/proc/exec.go index 6821e09cd..4ef9cf6cf 100644 --- a/pkg/shim/v1/proc/exec.go +++ b/pkg/shim/v1/proc/exec.go @@ -31,7 +31,6 @@ import ( "github.com/containerd/fifo" runc "github.com/containerd/go-runc" specs "github.com/opencontainers/runtime-spec/specs-go" - "github.com/pkg/errors" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/shim/runsc" @@ -151,9 +150,11 @@ func (e *execProcess) kill(ctx context.Context, sig uint32, _ bool) error { if err := e.parent.runtime.Kill(ctx, e.parent.id, int(sig), &runsc.KillOpts{ Pid: internalPid, }); err != nil { - // If this returns error, consider the process has already stopped. + // If this returns error, consider the process has + // already stopped. + // // TODO: Fix after signal handling is fixed. - return errors.Wrapf(errdefs.ErrNotFound, err.Error()) + return fmt.Errorf("%s: %w", err.Error(), errdefs.ErrNotFound) } } return nil @@ -182,16 +183,16 @@ func (e *execProcess) start(ctx context.Context) (err error) { ) if e.stdio.Terminal { if socket, err = runc.NewTempConsoleSocket(); err != nil { - return errors.Wrap(err, "failed to create runc console socket") + return fmt.Errorf("failed to create runc console socket: %w", err) } defer socket.Close() } else if e.stdio.IsNull() { if e.io, err = runc.NewNullIO(); err != nil { - return errors.Wrap(err, "creating new NULL IO") + return fmt.Errorf("creating new NULL IO: %w", err) } } else { if e.io, err = runc.NewPipeIO(e.parent.IoUID, e.parent.IoGID, withConditionalIO(e.stdio)); err != nil { - return errors.Wrap(err, "failed to create runc io pipes") + return fmt.Errorf("failed to create runc io pipes: %w", err) } } opts := &runsc.ExecOpts{ @@ -217,7 +218,7 @@ func (e *execProcess) start(ctx context.Context) (err error) { if e.stdio.Stdin != "" { sc, err := fifo.OpenFifo(context.Background(), e.stdio.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0) if err != nil { - return errors.Wrapf(err, "failed to open stdin fifo %s", e.stdio.Stdin) + return fmt.Errorf("failed to open stdin fifo %s: %w", e.stdio.Stdin, err) } e.closers = append(e.closers, sc) e.stdin = sc @@ -228,25 +229,25 @@ func (e *execProcess) start(ctx context.Context) (err error) { if socket != nil { console, err := socket.ReceiveMaster() if err != nil { - return errors.Wrap(err, "failed to retrieve console master") + return fmt.Errorf("failed to retrieve console master: %w", err) } if e.console, err = e.parent.Platform.CopyConsole(ctx, console, e.stdio.Stdin, e.stdio.Stdout, e.stdio.Stderr, &e.wg, ©WaitGroup); err != nil { - return errors.Wrap(err, "failed to start console copy") + return fmt.Errorf("failed to start console copy: %w", err) } } else if !e.stdio.IsNull() { if err := copyPipes(ctx, e.io, e.stdio.Stdin, e.stdio.Stdout, e.stdio.Stderr, &e.wg, ©WaitGroup); err != nil { - return errors.Wrap(err, "failed to start io pipe copy") + return fmt.Errorf("failed to start io pipe copy: %w", err) } } copyWaitGroup.Wait() pid, err := runc.ReadPidFile(opts.PidFile) if err != nil { - return errors.Wrap(err, "failed to retrieve OCI runtime exec pid") + return fmt.Errorf("failed to retrieve OCI runtime exec pid: %w", err) } e.pid = pid internalPid, err := runc.ReadPidFile(opts.InternalPidFile) if err != nil { - return errors.Wrap(err, "failed to retrieve OCI runtime exec internal pid") + return fmt.Errorf("failed to retrieve OCI runtime exec internal pid: %w", err) } e.internalPid = internalPid go func() { diff --git a/pkg/shim/v1/proc/exec_state.go b/pkg/shim/v1/proc/exec_state.go index 5416cb601..4dcda8b44 100644 --- a/pkg/shim/v1/proc/exec_state.go +++ b/pkg/shim/v1/proc/exec_state.go @@ -17,9 +17,9 @@ package proc import ( "context" + "fmt" "github.com/containerd/console" - "github.com/pkg/errors" ) type execState interface { @@ -43,7 +43,7 @@ func (s *execCreatedState) transition(name string) error { case "deleted": s.p.execState = &deletedState{} default: - return errors.Errorf("invalid state transition %q to %q", stateName(s), name) + return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) } return nil } @@ -87,7 +87,7 @@ func (s *execRunningState) transition(name string) error { case "stopped": s.p.execState = &execStoppedState{p: s.p} default: - return errors.Errorf("invalid state transition %q to %q", stateName(s), name) + return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) } return nil } @@ -97,11 +97,11 @@ func (s *execRunningState) Resize(ws console.WinSize) error { } func (s *execRunningState) Start(ctx context.Context) error { - return errors.Errorf("cannot start a running process") + return fmt.Errorf("cannot start a running process") } func (s *execRunningState) Delete(ctx context.Context) error { - return errors.Errorf("cannot delete a running process") + return fmt.Errorf("cannot delete a running process") } func (s *execRunningState) Kill(ctx context.Context, sig uint32, all bool) error { @@ -125,17 +125,17 @@ func (s *execStoppedState) transition(name string) error { case "deleted": s.p.execState = &deletedState{} default: - return errors.Errorf("invalid state transition %q to %q", stateName(s), name) + return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) } return nil } func (s *execStoppedState) Resize(ws console.WinSize) error { - return errors.Errorf("cannot resize a stopped container") + return fmt.Errorf("cannot resize a stopped container") } func (s *execStoppedState) Start(ctx context.Context) error { - return errors.Errorf("cannot start a stopped process") + return fmt.Errorf("cannot start a stopped process") } func (s *execStoppedState) Delete(ctx context.Context) error { diff --git a/pkg/shim/v1/proc/init.go b/pkg/shim/v1/proc/init.go index 0771540a9..b429cb94f 100644 --- a/pkg/shim/v1/proc/init.go +++ b/pkg/shim/v1/proc/init.go @@ -18,6 +18,7 @@ package proc import ( "context" "encoding/json" + "fmt" "io" "path/filepath" "strings" @@ -33,23 +34,22 @@ import ( "github.com/containerd/fifo" runc "github.com/containerd/go-runc" specs "github.com/opencontainers/runtime-spec/specs-go" - "github.com/pkg/errors" "gvisor.dev/gvisor/pkg/shim/runsc" ) -// InitPidFile name of the file that contains the init pid +// InitPidFile name of the file that contains the init pid. const InitPidFile = "init.pid" -// Init represents an initial process for a container +// Init represents an initial process for a container. type Init struct { wg sync.WaitGroup initState initState // mu is used to ensure that `Start()` and `Exited()` calls return in - // the right order when invoked in separate go routines. - // This is the case within the shim implementation as it makes use of - // the reaper interface. + // the right order when invoked in separate go routines. This is the + // case within the shim implementation as it makes use of the reaper + // interface. mu sync.Mutex waitBlock chan struct{} @@ -76,7 +76,7 @@ type Init struct { Monitor ProcessMonitor } -// NewRunsc returns a new runsc instance for a process +// NewRunsc returns a new runsc instance for a process. func NewRunsc(root, path, namespace, runtime string, config map[string]string) *runsc.Runsc { if root == "" { root = RunscRoot @@ -91,7 +91,7 @@ func NewRunsc(root, path, namespace, runtime string, config map[string]string) * } } -// New returns a new init process +// New returns a new init process. func New(id string, runtime *runsc.Runsc, stdio proc.Stdio) *Init { p := &Init{ id: id, @@ -104,21 +104,21 @@ func New(id string, runtime *runsc.Runsc, stdio proc.Stdio) *Init { return p } -// Create the process with the provided config +// Create the process with the provided config. func (p *Init) Create(ctx context.Context, r *CreateConfig) (err error) { var socket *runc.Socket if r.Terminal { if socket, err = runc.NewTempConsoleSocket(); err != nil { - return errors.Wrap(err, "failed to create OCI runtime console socket") + return fmt.Errorf("failed to create OCI runtime console socket: %w", err) } defer socket.Close() } else if hasNoIO(r) { if p.io, err = runc.NewNullIO(); err != nil { - return errors.Wrap(err, "creating new NULL IO") + return fmt.Errorf("creating new NULL IO: %w", err) } } else { if p.io, err = runc.NewPipeIO(p.IoUID, p.IoGID, withConditionalIO(p.stdio)); err != nil { - return errors.Wrap(err, "failed to create OCI runtime io pipes") + return fmt.Errorf("failed to create OCI runtime io pipes: %w", err) } } pidFile := filepath.Join(p.Bundle, InitPidFile) @@ -139,7 +139,7 @@ func (p *Init) Create(ctx context.Context, r *CreateConfig) (err error) { if r.Stdin != "" { sc, err := fifo.OpenFifo(context.Background(), r.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0) if err != nil { - return errors.Wrapf(err, "failed to open stdin fifo %s", r.Stdin) + return fmt.Errorf("failed to open stdin fifo %s: %w", r.Stdin, err) } p.stdin = sc p.closers = append(p.closers, sc) @@ -150,58 +150,58 @@ func (p *Init) Create(ctx context.Context, r *CreateConfig) (err error) { if socket != nil { console, err := socket.ReceiveMaster() if err != nil { - return errors.Wrap(err, "failed to retrieve console master") + return fmt.Errorf("failed to retrieve console master: %w", err) } console, err = p.Platform.CopyConsole(ctx, console, r.Stdin, r.Stdout, r.Stderr, &p.wg, ©WaitGroup) if err != nil { - return errors.Wrap(err, "failed to start console copy") + return fmt.Errorf("failed to start console copy: %w", err) } p.console = console } else if !hasNoIO(r) { if err := copyPipes(ctx, p.io, r.Stdin, r.Stdout, r.Stderr, &p.wg, ©WaitGroup); err != nil { - return errors.Wrap(err, "failed to start io pipe copy") + return fmt.Errorf("failed to start io pipe copy: %w", err) } } copyWaitGroup.Wait() pid, err := runc.ReadPidFile(pidFile) if err != nil { - return errors.Wrap(err, "failed to retrieve OCI runtime container pid") + return fmt.Errorf("failed to retrieve OCI runtime container pid: %w", err) } p.pid = pid return nil } -// Wait for the process to exit +// Wait waits for the process to exit. func (p *Init) Wait() { <-p.waitBlock } -// ID of the process +// ID returns the ID of the process. func (p *Init) ID() string { return p.id } -// Pid of the process +// Pid returns the PID of the process. func (p *Init) Pid() int { return p.pid } -// ExitStatus of the process +// ExitStatus returns the exit status of the process. func (p *Init) ExitStatus() int { p.mu.Lock() defer p.mu.Unlock() return p.status } -// ExitedAt at time when the process exited +// ExitedAt returns the time when the process exited. func (p *Init) ExitedAt() time.Time { p.mu.Lock() defer p.mu.Unlock() return p.exited } -// Status of the process +// Status returns the status of the process. func (p *Init) Status(ctx context.Context) (string, error) { p.mu.Lock() defer p.mu.Unlock() @@ -215,7 +215,7 @@ func (p *Init) Status(ctx context.Context) (string, error) { return p.convertStatus(c.Status), nil } -// Start the init process +// Start starts the init process. func (p *Init) Start(ctx context.Context) error { p.mu.Lock() defer p.mu.Unlock() @@ -250,7 +250,7 @@ func (p *Init) start(ctx context.Context) error { return nil } -// SetExited of the init process with the next status +// SetExited set the exit stauts of the init process. func (p *Init) SetExited(status int) { p.mu.Lock() defer p.mu.Unlock() @@ -265,7 +265,7 @@ func (p *Init) setExited(status int) { close(p.waitBlock) } -// Delete the init process +// Delete deletes the init process. func (p *Init) Delete(ctx context.Context) error { p.mu.Lock() defer p.mu.Unlock() @@ -298,13 +298,13 @@ func (p *Init) delete(ctx context.Context) error { if err2 := mount.UnmountAll(p.Rootfs, 0); err2 != nil { log.G(ctx).WithError(err2).Warn("failed to cleanup rootfs mount") if err == nil { - err = errors.Wrap(err2, "failed rootfs umount") + err = fmt.Errorf("failed rootfs umount: %w", err2) } } return err } -// Resize the init processes console +// Resize resizes the init processes console. func (p *Init) Resize(ws console.WinSize) error { p.mu.Lock() defer p.mu.Unlock() @@ -322,7 +322,7 @@ func (p *Init) resize(ws console.WinSize) error { return p.console.Resize(ws) } -// Kill the init process +// Kill kills the init process. func (p *Init) Kill(ctx context.Context, signal uint32, all bool) error { p.mu.Lock() defer p.mu.Unlock() @@ -340,7 +340,7 @@ func (p *Init) kill(context context.Context, signal uint32, all bool) error { c, err := p.runtime.State(context, p.id) if err != nil { if strings.Contains(err.Error(), "does not exist") { - return errors.Wrapf(errdefs.ErrNotFound, "no such process") + return fmt.Errorf("no such process: %w", errdefs.ErrNotFound) } return p.runtimeError(err, "OCI runtime state failed") } @@ -348,7 +348,7 @@ func (p *Init) kill(context context.Context, signal uint32, all bool) error { // If the container is not in running state, directly return // "no such process" if p.convertStatus(c.Status) == "stopped" { - return errors.Wrapf(errdefs.ErrNotFound, "no such process") + return fmt.Errorf("no such process: %w", errdefs.ErrNotFound) } killErr = p.runtime.Kill(context, p.id, int(signal), &runsc.KillOpts{ All: all, @@ -362,7 +362,7 @@ func (p *Init) kill(context context.Context, signal uint32, all bool) error { return p.runtimeError(killErr, "kill timeout") } -// KillAll processes belonging to the init process +// KillAll kills all processes belonging to the init process. func (p *Init) KillAll(context context.Context) error { p.mu.Lock() defer p.mu.Unlock() @@ -380,17 +380,17 @@ func (p *Init) killAll(context context.Context) error { return nil } -// Stdin of the process +// Stdin returns the stdin of the process. func (p *Init) Stdin() io.Closer { return p.stdin } -// Runtime returns the OCI runtime configured for the init process +// Runtime returns the OCI runtime configured for the init process. func (p *Init) Runtime() *runsc.Runsc { return p.runtime } -// Exec returns a new child process +// Exec returns a new child process. func (p *Init) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { p.mu.Lock() defer p.mu.Unlock() @@ -398,7 +398,7 @@ func (p *Init) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Proce return p.initState.Exec(ctx, path, r) } -// exec returns a new exec'd process +// exec returns a new exec'd process. func (p *Init) exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { // process exec request var spec specs.Process @@ -424,7 +424,7 @@ func (p *Init) exec(ctx context.Context, path string, r *ExecConfig) (proc.Proce return e, nil } -// Stdio of the process +// Stdio returns the stdio of the process. func (p *Init) Stdio() proc.Stdio { return p.stdio } @@ -437,11 +437,11 @@ func (p *Init) runtimeError(rErr error, msg string) error { rMsg, err := getLastRuntimeError(p.runtime) switch { case err != nil: - return errors.Wrapf(rErr, "%s: %s (%s)", msg, "unable to retrieve OCI runtime error", err.Error()) + return fmt.Errorf("%s: %w (unable to retrieve OCI runtime error: %v)", msg, rErr, err) case rMsg == "": - return errors.Wrap(rErr, msg) + return fmt.Errorf("%s: %w", msg, rErr) default: - return errors.Errorf("%s: %s", msg, rMsg) + return fmt.Errorf("%s: %s", msg, rMsg) } } diff --git a/pkg/shim/v1/proc/init_state.go b/pkg/shim/v1/proc/init_state.go index 868646b6c..509f27762 100644 --- a/pkg/shim/v1/proc/init_state.go +++ b/pkg/shim/v1/proc/init_state.go @@ -17,11 +17,11 @@ package proc import ( "context" + "fmt" "github.com/containerd/console" "github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/runtime/proc" - "github.com/pkg/errors" ) type initState interface { @@ -46,7 +46,7 @@ func (s *createdState) transition(name string) error { case "deleted": s.p.initState = &deletedState{} default: - return errors.Errorf("invalid state transition %q to %q", stateName(s), name) + return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) } return nil } @@ -107,7 +107,7 @@ func (s *runningState) transition(name string) error { case "stopped": s.p.initState = &stoppedState{p: s.p} default: - return errors.Errorf("invalid state transition %q to %q", stateName(s), name) + return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) } return nil } @@ -117,11 +117,11 @@ func (s *runningState) Resize(ws console.WinSize) error { } func (s *runningState) Start(ctx context.Context) error { - return errors.Errorf("cannot start a running process") + return fmt.Errorf("cannot start a running process") } func (s *runningState) Delete(ctx context.Context) error { - return errors.Errorf("cannot delete a running process") + return fmt.Errorf("cannot delete a running process") } func (s *runningState) Kill(ctx context.Context, sig uint32, all bool) error { @@ -149,17 +149,17 @@ func (s *stoppedState) transition(name string) error { case "deleted": s.p.initState = &deletedState{} default: - return errors.Errorf("invalid state transition %q to %q", stateName(s), name) + return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) } return nil } func (s *stoppedState) Resize(ws console.WinSize) error { - return errors.Errorf("cannot resize a stopped container") + return fmt.Errorf("cannot resize a stopped container") } func (s *stoppedState) Start(ctx context.Context) error { - return errors.Errorf("cannot start a stopped process") + return fmt.Errorf("cannot start a stopped process") } func (s *stoppedState) Delete(ctx context.Context) error { @@ -178,5 +178,5 @@ func (s *stoppedState) SetExited(status int) { } func (s *stoppedState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { - return nil, errors.Errorf("cannot exec in a stopped state") + return nil, fmt.Errorf("cannot exec in a stopped state") } diff --git a/pkg/shim/v1/proc/io.go b/pkg/shim/v1/proc/io.go index 2677b4e54..5313c7a50 100644 --- a/pkg/shim/v1/proc/io.go +++ b/pkg/shim/v1/proc/io.go @@ -150,8 +150,9 @@ func (c *countingWriteCloser) Close() error { return c.WriteCloser.Close() } -// isFifo checks if a file is a fifo -// if the file does not exist then it returns false +// isFifo checks if a file is a fifo. +// +// If the file does not exist then it returns false. func isFifo(path string) (bool, error) { stat, err := os.Stat(path) if err != nil { diff --git a/pkg/shim/v1/proc/process.go b/pkg/shim/v1/proc/process.go index 1bfa99f4c..d462c3eef 100644 --- a/pkg/shim/v1/proc/process.go +++ b/pkg/shim/v1/proc/process.go @@ -16,10 +16,10 @@ package proc import ( - "github.com/pkg/errors" + "fmt" ) -// RunscRoot is the path to the root runsc state directory +// RunscRoot is the path to the root runsc state directory. const RunscRoot = "/run/containerd/runsc" func stateName(v interface{}) string { @@ -33,5 +33,5 @@ func stateName(v interface{}) string { case *stoppedState: return "stopped" } - panic(errors.Errorf("invalid state %v", v)) + panic(fmt.Errorf("invalid state %v", v)) } diff --git a/pkg/shim/v1/proc/types.go b/pkg/shim/v1/proc/types.go index dcd43bcca..5c215de5f 100644 --- a/pkg/shim/v1/proc/types.go +++ b/pkg/shim/v1/proc/types.go @@ -23,7 +23,7 @@ import ( runc "github.com/containerd/go-runc" ) -// Mount holds filesystem mount configuration +// Mount holds filesystem mount configuration. type Mount struct { Type string Source string @@ -31,7 +31,7 @@ type Mount struct { Options []string } -// CreateConfig hold task creation configuration +// CreateConfig hold task creation configuration. type CreateConfig struct { ID string Bundle string @@ -44,7 +44,7 @@ type CreateConfig struct { Options *google_protobuf.Any } -// ExecConfig holds exec creation configuration +// ExecConfig holds exec creation configuration. type ExecConfig struct { ID string Terminal bool @@ -54,14 +54,14 @@ type ExecConfig struct { Spec *google_protobuf.Any } -// Exit is the type of exit events +// Exit is the type of exit events. type Exit struct { Timestamp time.Time ID string Status int } -// ProcessMonitor monitors process exit changes +// ProcessMonitor monitors process exit changes. type ProcessMonitor interface { // Subscribe to process exit changes Subscribe() chan runc.Exit diff --git a/pkg/shim/v1/proc/utils.go b/pkg/shim/v1/proc/utils.go index 6e7dbdad7..716de2f59 100644 --- a/pkg/shim/v1/proc/utils.go +++ b/pkg/shim/v1/proc/utils.go @@ -34,8 +34,6 @@ const ( // inside the sandbox. var ExitCh = make(chan Exit, bufferSize) -// TODO(random-liu): This can be a utility. - // TODO(mlaventure): move to runc package? func getLastRuntimeError(r *runsc.Runsc) (string, error) { if r.Log == "" { diff --git a/pkg/shim/v1/shim/BUILD b/pkg/shim/v1/shim/BUILD new file mode 100644 index 000000000..02cffbb01 --- /dev/null +++ b/pkg/shim/v1/shim/BUILD @@ -0,0 +1,38 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "shim", + srcs = [ + "platform.go", + "service.go", + ], + visibility = [ + "//pkg/shim:__subpackages__", + "//shim:__subpackages__", + ], + deps = [ + "//pkg/shim/runsc", + "//pkg/shim/v1/proc", + "//pkg/shim/v1/utils", + "@com_github_containerd_console//:go_default_library", + "@com_github_containerd_containerd//api/events:go_default_library", + "@com_github_containerd_containerd//api/types/task:go_default_library", + "@com_github_containerd_containerd//errdefs:go_default_library", + "@com_github_containerd_containerd//events:go_default_library", + "@com_github_containerd_containerd//log:go_default_library", + "@com_github_containerd_containerd//mount:go_default_library", + "@com_github_containerd_containerd//namespaces:go_default_library", + "@com_github_containerd_containerd//runtime:go_default_library", + "@com_github_containerd_containerd//runtime/linux/runctypes:go_default_library", + "@com_github_containerd_containerd//runtime/proc:go_default_library", + "@com_github_containerd_containerd//runtime/v1/shim:go_default_library", + "@com_github_containerd_containerd//runtime/v1/shim/v1:go_default_library", + "@com_github_containerd_fifo//:go_default_library", + "@com_github_containerd_typeurl//:go_default_library", + "@com_github_gogo_protobuf//types:go_default_library", + "@org_golang_google_grpc//codes:go_default_library", + "@org_golang_google_grpc//status:go_default_library", + ], +) diff --git a/pkg/shim/v1/shim/platform.go b/pkg/shim/v1/shim/platform.go index 86252c3f5..f6795563c 100644 --- a/pkg/shim/v1/shim/platform.go +++ b/pkg/shim/v1/shim/platform.go @@ -17,13 +17,13 @@ package shim import ( "context" + "fmt" "io" "sync" "syscall" "github.com/containerd/console" "github.com/containerd/fifo" - "github.com/pkg/errors" ) type linuxPlatform struct { @@ -32,7 +32,7 @@ type linuxPlatform struct { func (p *linuxPlatform) CopyConsole(ctx context.Context, console console.Console, stdin, stdout, stderr string, wg, cwg *sync.WaitGroup) (console.Console, error) { if p.epoller == nil { - return nil, errors.New("uninitialized epoller") + return nil, fmt.Errorf("uninitialized epoller") } epollConsole, err := p.epoller.Add(console) @@ -79,11 +79,11 @@ func (p *linuxPlatform) CopyConsole(ctx context.Context, console console.Console func (p *linuxPlatform) ShutdownConsole(ctx context.Context, cons console.Console) error { if p.epoller == nil { - return errors.New("uninitialized epoller") + return fmt.Errorf("uninitialized epoller") } epollConsole, ok := cons.(*console.EpollConsole) if !ok { - return errors.Errorf("expected EpollConsole, got %#v", cons) + return fmt.Errorf("expected EpollConsole, got %#v", cons) } return epollConsole.Shutdown(p.epoller.CloseConsole) } @@ -100,7 +100,7 @@ func (s *Service) initPlatform() error { } epoller, err := console.NewEpoller() if err != nil { - return errors.Wrap(err, "failed to initialize epoller") + return fmt.Errorf("failed to initialize epoller: %w", err) } s.platform = &linuxPlatform{ epoller: epoller, diff --git a/pkg/shim/v1/shim/service.go b/pkg/shim/v1/shim/service.go index aac172801..7b0280ed2 100644 --- a/pkg/shim/v1/shim/service.go +++ b/pkg/shim/v1/shim/service.go @@ -37,8 +37,6 @@ import ( shimapi "github.com/containerd/containerd/runtime/v1/shim/v1" "github.com/containerd/typeurl" ptypes "github.com/gogo/protobuf/types" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" @@ -57,7 +55,7 @@ var ( } ) -// Config contains shim specific configuration +// Config contains shim specific configuration. type Config struct { Path string Namespace string @@ -66,17 +64,12 @@ type Config struct { RunscConfig map[string]string } -// NewService returns a new shim service that can be used via GRPC +// NewService returns a new shim service that can be used via GRPC. func NewService(config Config, publisher events.Publisher) (*Service, error) { if config.Namespace == "" { return nil, fmt.Errorf("shim namespace cannot be empty") } ctx := namespaces.WithNamespace(context.Background(), config.Namespace) - ctx = log.WithLogger(ctx, logrus.WithFields(logrus.Fields{ - "namespace": config.Namespace, - "path": config.Path, - "pid": os.Getpid(), - })) s := &Service{ config: config, context: ctx, @@ -86,13 +79,13 @@ func NewService(config Config, publisher events.Publisher) (*Service, error) { } go s.processExits() if err := s.initPlatform(); err != nil { - return nil, errors.Wrap(err, "failed to initialized platform behavior") + return nil, fmt.Errorf("failed to initialized platform behavior: %w", err) } go s.forward(publisher) return s, nil } -// Service is the shim implementation of a remote shim over GRPC +// Service is the shim implementation of a remote shim over GRPC. type Service struct { mu sync.Mutex @@ -108,7 +101,7 @@ type Service struct { bundle string } -// Create a new initial process and container with the underlying OCI runtime +// Create creates a new initial process and container with the underlying OCI runtime. func (s *Service) Create(ctx context.Context, r *shimapi.CreateTaskRequest) (_ *shimapi.CreateTaskResponse, err error) { s.mu.Lock() defer s.mu.Unlock() @@ -153,7 +146,7 @@ func (s *Service) Create(ctx context.Context, r *shimapi.CreateTaskRequest) (_ * Options: rm.Options, } if err := m.Mount(rootfs); err != nil { - return nil, errors.Wrapf(err, "failed to mount rootfs component %v", m) + return nil, fmt.Errorf("failed to mount rootfs component %v: %w", m, err) } } process, err := newInit( @@ -169,7 +162,8 @@ func (s *Service) Create(ctx context.Context, r *shimapi.CreateTaskRequest) (_ * if err := process.Create(ctx, config); err != nil { return nil, errdefs.ToGRPC(err) } - // save the main task id and bundle to the shim for additional requests + // Save the main task id and bundle to the shim for additional + // requests. s.id = r.ID s.bundle = r.Bundle pid := process.Pid() @@ -179,7 +173,7 @@ func (s *Service) Create(ctx context.Context, r *shimapi.CreateTaskRequest) (_ * }, nil } -// Start a process +// Start starts a process. func (s *Service) Start(ctx context.Context, r *shimapi.StartRequest) (*shimapi.StartResponse, error) { p, err := s.getExecProcess(r.ID) if err != nil { @@ -194,7 +188,7 @@ func (s *Service) Start(ctx context.Context, r *shimapi.StartRequest) (*shimapi. }, nil } -// Delete the initial process and container +// Delete deletes the initial process and container. func (s *Service) Delete(ctx context.Context, r *ptypes.Empty) (*shimapi.DeleteResponse, error) { p, err := s.getInitProcess() if err != nil { @@ -214,7 +208,7 @@ func (s *Service) Delete(ctx context.Context, r *ptypes.Empty) (*shimapi.DeleteR }, nil } -// DeleteProcess deletes an exec'd process +// DeleteProcess deletes an exec'd process. func (s *Service) DeleteProcess(ctx context.Context, r *shimapi.DeleteProcessRequest) (*shimapi.DeleteResponse, error) { if r.ID == s.id { return nil, status.Errorf(codes.InvalidArgument, "cannot delete init process with DeleteProcess") @@ -236,7 +230,7 @@ func (s *Service) DeleteProcess(ctx context.Context, r *shimapi.DeleteProcessReq }, nil } -// Exec an additional process inside the container +// Exec spawns an additional process inside the container. func (s *Service) Exec(ctx context.Context, r *shimapi.ExecProcessRequest) (*ptypes.Empty, error) { s.mu.Lock() @@ -268,7 +262,7 @@ func (s *Service) Exec(ctx context.Context, r *shimapi.ExecProcessRequest) (*pty return empty, nil } -// ResizePty of a process +// ResizePty resises the terminal of a process. func (s *Service) ResizePty(ctx context.Context, r *shimapi.ResizePtyRequest) (*ptypes.Empty, error) { if r.ID == "" { return nil, errdefs.ToGRPCf(errdefs.ErrInvalidArgument, "id not provided") @@ -287,7 +281,7 @@ func (s *Service) ResizePty(ctx context.Context, r *shimapi.ResizePtyRequest) (* return empty, nil } -// State returns runtime state information for a process +// State returns runtime state information for a process. func (s *Service) State(ctx context.Context, r *shimapi.StateRequest) (*shimapi.StateResponse, error) { p, err := s.getExecProcess(r.ID) if err != nil { @@ -321,17 +315,17 @@ func (s *Service) State(ctx context.Context, r *shimapi.StateRequest) (*shimapi. }, nil } -// Pause the container +// Pause pauses the container. func (s *Service) Pause(ctx context.Context, r *ptypes.Empty) (*ptypes.Empty, error) { return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) } -// Resume the container +// Resume resumes the container. func (s *Service) Resume(ctx context.Context, r *ptypes.Empty) (*ptypes.Empty, error) { return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) } -// Kill a process with the provided signal +// Kill kills a process with the provided signal. func (s *Service) Kill(ctx context.Context, r *shimapi.KillRequest) (*ptypes.Empty, error) { if r.ID == "" { p, err := s.getInitProcess() @@ -354,7 +348,7 @@ func (s *Service) Kill(ctx context.Context, r *shimapi.KillRequest) (*ptypes.Emp return empty, nil } -// ListPids returns all pids inside the container +// ListPids returns all pids inside the container. func (s *Service) ListPids(ctx context.Context, r *shimapi.ListPidsRequest) (*shimapi.ListPidsResponse, error) { pids, err := s.getContainerPids(ctx, r.ID) if err != nil { @@ -372,7 +366,7 @@ func (s *Service) ListPids(ctx context.Context, r *shimapi.ListPidsRequest) (*sh } a, err := typeurl.MarshalAny(d) if err != nil { - return nil, errors.Wrapf(err, "failed to marshal process %d info", pid) + return nil, fmt.Errorf("failed to marshal process %d info: %w", pid, err) } pInfo.Info = a break @@ -385,7 +379,7 @@ func (s *Service) ListPids(ctx context.Context, r *shimapi.ListPidsRequest) (*sh }, nil } -// CloseIO of a process +// CloseIO closes the I/O context of a process. func (s *Service) CloseIO(ctx context.Context, r *shimapi.CloseIORequest) (*ptypes.Empty, error) { p, err := s.getExecProcess(r.ID) if err != nil { @@ -393,30 +387,30 @@ func (s *Service) CloseIO(ctx context.Context, r *shimapi.CloseIORequest) (*ptyp } if stdin := p.Stdin(); stdin != nil { if err := stdin.Close(); err != nil { - return nil, errors.Wrap(err, "close stdin") + return nil, fmt.Errorf("close stdin: %w", err) } } return empty, nil } -// Checkpoint the container +// Checkpoint checkpoints the container. func (s *Service) Checkpoint(ctx context.Context, r *shimapi.CheckpointTaskRequest) (*ptypes.Empty, error) { return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) } -// ShimInfo returns shim information such as the shim's pid +// ShimInfo returns shim information such as the shim's pid. func (s *Service) ShimInfo(ctx context.Context, r *ptypes.Empty) (*shimapi.ShimInfoResponse, error) { return &shimapi.ShimInfoResponse{ ShimPid: uint32(os.Getpid()), }, nil } -// Update a running container +// Update updates a running container. func (s *Service) Update(ctx context.Context, r *shimapi.UpdateTaskRequest) (*ptypes.Empty, error) { return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) } -// Wait for a process to exit +// Wait waits for a process to exit. func (s *Service) Wait(ctx context.Context, r *shimapi.WaitRequest) (*shimapi.WaitResponse, error) { p, err := s.getExecProcess(r.ID) if err != nil { @@ -451,7 +445,7 @@ func (s *Service) checkProcesses(e proc.Exit) { for _, p := range s.allProcesses() { if p.ID() == e.ID { if ip, ok := p.(*proc.Init); ok { - // Ensure all children are killed + // Ensure all children are killed. if err := ip.KillAll(s.context); err != nil { log.G(s.context).WithError(err).WithField("id", ip.ID()). Error("failed to kill init's children") @@ -495,7 +489,7 @@ func (s *Service) forward(publisher events.Publisher) { } } -// getInitProcess returns initial process +// getInitProcess returns the init process. func (s *Service) getInitProcess() (rproc.Process, error) { s.mu.Lock() defer s.mu.Unlock() @@ -506,7 +500,7 @@ func (s *Service) getInitProcess() (rproc.Process, error) { return p, nil } -// getExecProcess returns exec process +// getExecProcess returns the given exec process. func (s *Service) getExecProcess(id string) (rproc.Process, error) { s.mu.Lock() defer s.mu.Unlock() @@ -534,7 +528,7 @@ func getTopic(ctx context.Context, e interface{}) string { case *eventstypes.TaskExecStarted: return runtime.TaskExecStartedEventTopic default: - logrus.Warnf("no topic for type %#v", e) + log.L.Printf("no topic for type %#v", e) } return runtime.TaskUnknownTopic } @@ -551,10 +545,10 @@ func newInit(ctx context.Context, path, workDir, runtimeRoot, namespace string, spec, err := utils.ReadSpec(r.Bundle) if err != nil { - return nil, errors.Wrap(err, "read oci spec") + return nil, fmt.Errorf("read oci spec: %w", err) } if err := utils.UpdateVolumeAnnotations(r.Bundle, spec); err != nil { - return nil, errors.Wrap(err, "update volume annotations") + return nil, fmt.Errorf("update volume annotations: %w", err) } runsc.FormatLogPath(r.ID, config) diff --git a/pkg/shim/v1/utils/BUILD b/pkg/shim/v1/utils/BUILD new file mode 100644 index 000000000..9045781e1 --- /dev/null +++ b/pkg/shim/v1/utils/BUILD @@ -0,0 +1,26 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "utils", + srcs = [ + "utils.go", + "volumes.go", + ], + visibility = [ + "//pkg/shim:__subpackages__", + "//shim:__subpackages__", + ], + deps = [ + "@com_github_containerd_cri//pkg/annotations:go_default_library", + "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", + ], +) + +go_test( + name = "utils_test", + size = "small", + srcs = ["volumes_test.go"], + library = ":utils", +) diff --git a/pkg/shim/v1/utils/volumes.go b/pkg/shim/v1/utils/volumes.go index 7323e7245..e4e9bf9b1 100644 --- a/pkg/shim/v1/utils/volumes.go +++ b/pkg/shim/v1/utils/volumes.go @@ -23,8 +23,6 @@ import ( "github.com/containerd/cri/pkg/annotations" specs "github.com/opencontainers/runtime-spec/specs-go" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" ) const volumeKeyPrefix = "dev.gvisor.spec.mount." @@ -32,13 +30,13 @@ const volumeKeyPrefix = "dev.gvisor.spec.mount." var kubeletPodsDir = "/var/lib/kubelet/pods" // volumeName gets volume name from volume annotation key, example: -// dev.gvisor.spec.mount.NAME.share +// dev.gvisor.spec.mount.NAME.share func volumeName(k string) string { return strings.SplitN(strings.TrimPrefix(k, volumeKeyPrefix), ".", 2)[0] } // volumeFieldName gets volume field name from volume annotation key, example: -// `type` is the field of dev.gvisor.spec.mount.NAME.type +// `type` is the field of dev.gvisor.spec.mount.NAME.type func volumeFieldName(k string) string { parts := strings.Split(strings.TrimPrefix(k, volumeKeyPrefix), ".") return parts[len(parts)-1] @@ -48,16 +46,16 @@ func volumeFieldName(k string) string { func podUID(s *specs.Spec) (string, error) { sandboxLogDir := s.Annotations[annotations.SandboxLogDir] if sandboxLogDir == "" { - return "", errors.New("no sandbox log path annotation") + return "", fmt.Errorf("no sandbox log path annotation") } fields := strings.Split(filepath.Base(sandboxLogDir), "_") switch len(fields) { - case 1: // This is the old CRI logging path + case 1: // This is the old CRI logging path. return fields[0], nil - case 3: // This is the new CRI logging path + case 3: // This is the new CRI logging path. return fields[2], nil } - return "", errors.Errorf("unexpected sandbox log path %q", sandboxLogDir) + return "", fmt.Errorf("unexpected sandbox log path %q", sandboxLogDir) } // isVolumeKey checks whether an annotation key is for volume. @@ -79,7 +77,7 @@ func volumePath(volume, uid string) (string, error) { return "", err } if len(dirs) != 1 { - return "", errors.Errorf("unexpected matched volume list %v", dirs) + return "", fmt.Errorf("unexpected matched volume list %v", dirs) } return dirs[0], nil } @@ -103,7 +101,7 @@ func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error { if err != nil { // Skip if we can't get pod UID, because this doesn't work // for containerd 1.1. - logrus.WithError(err).Error("Can't get pod uid") + fmt.Errorf("Can't get pod uid: %w", err) return nil } } @@ -117,22 +115,25 @@ func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error { } volume := volumeName(k) if uid != "" { - // This is a sandbox + // This is a sandbox. path, err := volumePath(volume, uid) if err != nil { - return errors.Wrapf(err, "get volume path for %q", volume) + return fmt.Errorf("get volume path for %q: %w", volume, err) } s.Annotations[volumeSourceKey(volume)] = path updated = true } else { - // This is a container + // This is a container. for i := range s.Mounts { - // An error is returned for sandbox if source annotation - // is not successfully applied, so it is guaranteed that - // the source annotation for sandbox has already been - // successfully applied at this point. - // The volume name is unique inside a pod, so matching without - // podUID is fine here. + // An error is returned for sandbox if source + // annotation is not successfully applied, so + // it is guaranteed that the source annotation + // for sandbox has already been successfully + // applied at this point. + // + // The volume name is unique inside a pod, so + // matching without podUID is fine here. + // // TODO: Pass podUID down to shim for containers to do // more accurate matching. if yes, _ := isVolumePath(volume, s.Mounts[i].Source); yes { @@ -147,7 +148,7 @@ func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error { if !updated { return nil } - // Update bundle + // Update bundle. b, err := json.Marshal(s) if err != nil { return err diff --git a/pkg/shim/v2/BUILD b/pkg/shim/v2/BUILD new file mode 100644 index 000000000..450f62979 --- /dev/null +++ b/pkg/shim/v2/BUILD @@ -0,0 +1,41 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "v2", + srcs = [ + "epoll.go", + "service.go", + "service_linux.go", + ], + visibility = ["//shim:__subpackages__"], + deps = [ + "//pkg/shim/runsc", + "//pkg/shim/v1/proc", + "//pkg/shim/v1/utils", + "//pkg/shim/v2/options", + "//runsc/specutils", + "@com_github_burntsushi_toml//:go_default_library", + "@com_github_containerd_cgroups//:go_default_library", + "@com_github_containerd_cgroups//stats/v1:go_default_library", + "@com_github_containerd_console//:go_default_library", + "@com_github_containerd_containerd//api/events:go_default_library", + "@com_github_containerd_containerd//api/types/task:go_default_library", + "@com_github_containerd_containerd//errdefs:go_default_library", + "@com_github_containerd_containerd//events:go_default_library", + "@com_github_containerd_containerd//log:go_default_library", + "@com_github_containerd_containerd//mount:go_default_library", + "@com_github_containerd_containerd//namespaces:go_default_library", + "@com_github_containerd_containerd//runtime:go_default_library", + "@com_github_containerd_containerd//runtime/linux/runctypes:go_default_library", + "@com_github_containerd_containerd//runtime/proc:go_default_library", + "@com_github_containerd_containerd//runtime/v2/shim:go_default_library", + "@com_github_containerd_containerd//runtime/v2/task:go_default_library", + "@com_github_containerd_cri//pkg/api/runtimeoptions/v1:go_default_library", + "@com_github_containerd_fifo//:go_default_library", + "@com_github_containerd_typeurl//:go_default_library", + "@com_github_gogo_protobuf//types:go_default_library", + "@org_golang_x_sys//unix:go_default_library", + ], +) diff --git a/pkg/shim/v2/epoll.go b/pkg/shim/v2/epoll.go index 57a2c5452..45cc38c2a 100644 --- a/pkg/shim/v2/epoll.go +++ b/pkg/shim/v2/epoll.go @@ -19,13 +19,13 @@ package v2 import ( "context" + "fmt" "sync" "github.com/containerd/cgroups" eventstypes "github.com/containerd/containerd/api/events" "github.com/containerd/containerd/events" "github.com/containerd/containerd/runtime" - "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) @@ -71,7 +71,7 @@ func (e *epoller) run(ctx context.Context) { if err == unix.EINTR { continue } - logrus.WithError(err).Error("cgroups: epoll wait") + fmt.Errorf("cgroups: epoll wait: %w", err) } for i := 0; i < n; i++ { e.process(ctx, uintptr(events[i].Fd)) @@ -117,7 +117,7 @@ func (e *epoller) process(ctx context.Context, fd uintptr) { if err := e.publisher.Publish(ctx, runtime.TaskOOMEventTopic, &eventstypes.TaskOOM{ ContainerID: i.id, }); err != nil { - logrus.WithError(err).Error("publish OOM event") + fmt.Errorf("publish OOM event: %w", err) } } diff --git a/pkg/shim/v2/options/BUILD b/pkg/shim/v2/options/BUILD new file mode 100644 index 000000000..ca212e874 --- /dev/null +++ b/pkg/shim/v2/options/BUILD @@ -0,0 +1,11 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "options", + srcs = [ + "options.go", + ], + visibility = ["//:sandbox"], +) diff --git a/pkg/shim/v2/service.go b/pkg/shim/v2/service.go index 0cff82a89..c67b1beba 100644 --- a/pkg/shim/v2/service.go +++ b/pkg/shim/v2/service.go @@ -16,6 +16,7 @@ package v2 import ( "context" + "fmt" "io/ioutil" "os" "os/exec" @@ -26,6 +27,7 @@ import ( "github.com/BurntSushi/toml" "github.com/containerd/cgroups" + metrics "github.com/containerd/cgroups/stats/v1" "github.com/containerd/console" eventstypes "github.com/containerd/containerd/api/events" "github.com/containerd/containerd/api/types/task" @@ -42,14 +44,13 @@ import ( runtimeoptions "github.com/containerd/cri/pkg/api/runtimeoptions/v1" "github.com/containerd/typeurl" ptypes "github.com/gogo/protobuf/types" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/shim/runsc" "gvisor.dev/gvisor/pkg/shim/v1/proc" "gvisor.dev/gvisor/pkg/shim/v1/utils" "gvisor.dev/gvisor/pkg/shim/v2/options" + "gvisor.dev/gvisor/runsc/specutils" ) var ( @@ -68,7 +69,7 @@ var _ = (taskAPI.TaskService)(&service{}) // we assume that a config.toml should exist in the runtime root. const configFile = "config.toml" -// New returns a new shim service that can be used via GRPC +// New returns a new shim service that can be used via GRPC. func New(ctx context.Context, id string, publisher events.Publisher) (shim.Shim, error) { ep, err := newOOMEpoller(publisher) if err != nil { @@ -89,13 +90,13 @@ func New(ctx context.Context, id string, publisher events.Publisher) (shim.Shim, runsc.Monitor = shim.Default if err := s.initPlatform(); err != nil { cancel() - return nil, errors.Wrap(err, "failed to initialized platform behavior") + return nil, fmt.Errorf("failed to initialized platform behavior: %w", err) } go s.forward(publisher) return s, nil } -// service is the shim implementation of a remote shim over GRPC +// service is the shim implementation of a remote shim over GRPC. type service struct { mu sync.Mutex @@ -179,7 +180,7 @@ func (s *service) StartShim(ctx context.Context, id, containerdBinary, container return "", err } if err := shim.SetScore(cmd.Process.Pid); err != nil { - return "", errors.Wrap(err, "failed to set OOM Score on shim") + return "", fmt.Errorf("failed to set OOM Score on shim: %w", err) } return address, nil } @@ -201,10 +202,10 @@ func (s *service) Cleanup(ctx context.Context) (*taskAPI.DeleteResponse, error) if err := r.Delete(ctx, s.id, &runsc.DeleteOpts{ Force: true, }); err != nil { - logrus.WithError(err).Warn("failed to remove runc container") + log.L.Printf("failed to remove runc container: %v", err) } if err := mount.UnmountAll(filepath.Join(path, "rootfs"), 0); err != nil { - logrus.WithError(err).Warn("failed to cleanup rootfs mount") + log.L.Printf("failed to cleanup rootfs mount: %v", err) } return &taskAPI.DeleteResponse{ ExitedAt: time.Now(), @@ -224,14 +225,15 @@ func (s *service) writeRuntime(path, runtime string) error { return ioutil.WriteFile(filepath.Join(path, "runtime"), []byte(runtime), 0600) } -// Create a new initial process and container with the underlying OCI runtime +// Create creates a new initial process and container with the underlying OCI +// runtime. func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ *taskAPI.CreateTaskResponse, err error) { s.mu.Lock() defer s.mu.Unlock() ns, err := namespaces.NamespaceRequired(ctx) if err != nil { - return nil, errors.Wrap(err, "create namespace") + return nil, fmt.Errorf("create namespace: %w", err) } // Read from root for now. @@ -258,7 +260,7 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ * path = filepath.Join(root, configFile) if _, err := os.Stat(path); err != nil { if !os.IsNotExist(err) { - return nil, errors.Wrapf(err, "stat config file %q", path) + return nil, fmt.Errorf("stat config file %q: %w", path, err) } // A config file in runtime root is not required. path = "" @@ -268,15 +270,15 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ * break } if o.TypeUrl != options.OptionType { - return nil, errors.Errorf("unsupported runtimeoptions %q", o.TypeUrl) + return nil, fmt.Errorf("unsupported runtimeoptions %q", o.TypeUrl) } path = o.ConfigPath default: - return nil, errors.Errorf("unsupported option type %q", r.Options.TypeUrl) + return nil, fmt.Errorf("unsupported option type %q", r.Options.TypeUrl) } if path != "" { if _, err = toml.DecodeFile(path, &opts); err != nil { - return nil, errors.Wrapf(err, "decode config file %q", path) + return nil, fmt.Errorf("decode config file %q: %w", path, err) } } } @@ -312,8 +314,8 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ * } defer func() { if err != nil { - if err2 := mount.UnmountAll(rootfs, 0); err2 != nil { - logrus.WithError(err2).Warn("failed to cleanup rootfs mount") + if err := mount.UnmountAll(rootfs, 0); err != nil { + log.L.Printf("failed to cleanup rootfs mount: %v", err) } } }() @@ -324,7 +326,7 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ * Options: rm.Options, } if err := m.Mount(rootfs); err != nil { - return nil, errors.Wrapf(err, "failed to mount rootfs component %v", m) + return nil, fmt.Errorf("failed to mount rootfs component %v: %w", m, err) } } process, err := newInit( @@ -343,20 +345,21 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ * if err := process.Create(ctx, config); err != nil { return nil, errdefs.ToGRPC(err) } - // save the main task id and bundle to the shim for additional requests + // Save the main task id and bundle to the shim for additional + // requests. s.id = r.ID s.bundle = r.Bundle - // Set up OOM notification on the sandbox's cgroup. This is done on sandbox - // create since the sandbox process will be created here. + // Set up OOM notification on the sandbox's cgroup. This is done on + // sandbox create since the sandbox process will be created here. pid := process.Pid() if pid > 0 { cg, err := cgroups.Load(cgroups.V1, cgroups.PidPath(pid)) if err != nil { - return nil, errors.Wrapf(err, "loading cgroup for %d", pid) + return nil, fmt.Errorf("loading cgroup for %d: %w", pid, err) } if err := s.oomPoller.add(s.id, cg); err != nil { - return nil, errors.Wrapf(err, "add cg to OOM monitor") + return nil, fmt.Errorf("add cg to OOM monitor: %w", err) } } s.task = process @@ -367,7 +370,7 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ * } -// Start a process +// Start starts a process. func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI.StartResponse, error) { p, err := s.getProcess(r.ExecID) if err != nil { @@ -383,7 +386,7 @@ func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI. }, nil } -// Delete the initial process and container +// Delete deletes the initial process and container. func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (*taskAPI.DeleteResponse, error) { p, err := s.getProcess(r.ExecID) if err != nil { @@ -411,7 +414,7 @@ func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (*taskAP }, nil } -// Exec an additional process inside the container +// Exec spawns an additional process inside the container. func (s *service) Exec(ctx context.Context, r *taskAPI.ExecProcessRequest) (*ptypes.Empty, error) { s.mu.Lock() p := s.processes[r.ExecID] @@ -440,7 +443,7 @@ func (s *service) Exec(ctx context.Context, r *taskAPI.ExecProcessRequest) (*pty return empty, nil } -// ResizePty of a process +// ResizePty resizes the terminal of a process. func (s *service) ResizePty(ctx context.Context, r *taskAPI.ResizePtyRequest) (*ptypes.Empty, error) { p, err := s.getProcess(r.ExecID) if err != nil { @@ -456,7 +459,7 @@ func (s *service) ResizePty(ctx context.Context, r *taskAPI.ResizePtyRequest) (* return empty, nil } -// State returns runtime state information for a process +// State returns runtime state information for a process. func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (*taskAPI.StateResponse, error) { p, err := s.getProcess(r.ExecID) if err != nil { @@ -490,17 +493,17 @@ func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (*taskAPI. }, nil } -// Pause the container +// Pause the container. func (s *service) Pause(ctx context.Context, r *taskAPI.PauseRequest) (*ptypes.Empty, error) { return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) } -// Resume the container +// Resume the container. func (s *service) Resume(ctx context.Context, r *taskAPI.ResumeRequest) (*ptypes.Empty, error) { return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) } -// Kill a process with the provided signal +// Kill a process with the provided signal. func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (*ptypes.Empty, error) { p, err := s.getProcess(r.ExecID) if err != nil { @@ -515,7 +518,7 @@ func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (*ptypes.Emp return empty, nil } -// Pids returns all pids inside the container +// Pids returns all pids inside the container. func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (*taskAPI.PidsResponse, error) { pids, err := s.getContainerPids(ctx, r.ID) if err != nil { @@ -533,7 +536,7 @@ func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (*taskAPI.Pi } a, err := typeurl.MarshalAny(d) if err != nil { - return nil, errors.Wrapf(err, "failed to marshal process %d info", pid) + return nil, fmt.Errorf("failed to marshal process %d info: %w", pid, err) } pInfo.Info = a break @@ -546,7 +549,7 @@ func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (*taskAPI.Pi }, nil } -// CloseIO of a process +// CloseIO closes the I/O context of a process. func (s *service) CloseIO(ctx context.Context, r *taskAPI.CloseIORequest) (*ptypes.Empty, error) { p, err := s.getProcess(r.ExecID) if err != nil { @@ -554,18 +557,18 @@ func (s *service) CloseIO(ctx context.Context, r *taskAPI.CloseIORequest) (*ptyp } if stdin := p.Stdin(); stdin != nil { if err := stdin.Close(); err != nil { - return nil, errors.Wrap(err, "close stdin") + return nil, fmt.Errorf("close stdin: %w", err) } } return empty, nil } -// Checkpoint the container +// Checkpoint checkpoints the container. func (s *service) Checkpoint(ctx context.Context, r *taskAPI.CheckpointTaskRequest) (*ptypes.Empty, error) { return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) } -// Connect returns shim information such as the shim's pid +// Connect returns shim information such as the shim's pid. func (s *service) Connect(ctx context.Context, r *taskAPI.ConnectRequest) (*taskAPI.ConnectResponse, error) { var pid int if s.task != nil { @@ -605,52 +608,52 @@ func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*taskAPI. // gvisor currently (as of 2020-03-03) only returns the total memory // usage and current PID value[0]. However, we copy the common fields here // so that future updates will propagate correct information. We're - // using the cgroups.Metrics structure so we're returning the same type + // using the metrics.Metrics structure so we're returning the same type // as runc. // // [0]: https://github.com/google/gvisor/blob/277a0d5a1fbe8272d4729c01ee4c6e374d047ebc/runsc/boot/events.go#L61-L81 - data, err := typeurl.MarshalAny(&cgroups.Metrics{ - CPU: &cgroups.CPUStat{ - Usage: &cgroups.CPUUsage{ + data, err := typeurl.MarshalAny(&metrics.Metrics{ + CPU: &metrics.CPUStat{ + Usage: &metrics.CPUUsage{ Total: stats.Cpu.Usage.Total, Kernel: stats.Cpu.Usage.Kernel, User: stats.Cpu.Usage.User, PerCPU: stats.Cpu.Usage.Percpu, }, - Throttling: &cgroups.Throttle{ + Throttling: &metrics.Throttle{ Periods: stats.Cpu.Throttling.Periods, ThrottledPeriods: stats.Cpu.Throttling.ThrottledPeriods, ThrottledTime: stats.Cpu.Throttling.ThrottledTime, }, }, - Memory: &cgroups.MemoryStat{ + Memory: &metrics.MemoryStat{ Cache: stats.Memory.Cache, - Usage: &cgroups.MemoryEntry{ + Usage: &metrics.MemoryEntry{ Limit: stats.Memory.Usage.Limit, Usage: stats.Memory.Usage.Usage, Max: stats.Memory.Usage.Max, Failcnt: stats.Memory.Usage.Failcnt, }, - Swap: &cgroups.MemoryEntry{ + Swap: &metrics.MemoryEntry{ Limit: stats.Memory.Swap.Limit, Usage: stats.Memory.Swap.Usage, Max: stats.Memory.Swap.Max, Failcnt: stats.Memory.Swap.Failcnt, }, - Kernel: &cgroups.MemoryEntry{ + Kernel: &metrics.MemoryEntry{ Limit: stats.Memory.Kernel.Limit, Usage: stats.Memory.Kernel.Usage, Max: stats.Memory.Kernel.Max, Failcnt: stats.Memory.Kernel.Failcnt, }, - KernelTCP: &cgroups.MemoryEntry{ + KernelTCP: &metrics.MemoryEntry{ Limit: stats.Memory.KernelTCP.Limit, Usage: stats.Memory.KernelTCP.Usage, Max: stats.Memory.KernelTCP.Max, Failcnt: stats.Memory.KernelTCP.Failcnt, }, }, - Pids: &cgroups.PidsStat{ + Pids: &metrics.PidsStat{ Current: stats.Pids.Current, Limit: stats.Pids.Limit, }, @@ -663,12 +666,12 @@ func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*taskAPI. }, nil } -// Update a running container +// Update updates a running container. func (s *service) Update(ctx context.Context, r *taskAPI.UpdateTaskRequest) (*ptypes.Empty, error) { return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) } -// Wait for a process to exit +// Wait waits for a process to exit. func (s *service) Wait(ctx context.Context, r *taskAPI.WaitRequest) (*taskAPI.WaitResponse, error) { p, err := s.getProcess(r.ExecID) if err != nil { @@ -697,7 +700,7 @@ func (s *service) checkProcesses(e proc.Exit) { for _, p := range s.allProcesses() { if p.ID() == e.ID { if ip, ok := p.(*proc.Init); ok { - // Ensure all children are killed + // Ensure all children are killed. if err := ip.KillAll(s.context); err != nil { log.G(s.context).WithError(err).WithField("id", ip.ID()). Error("failed to kill init's children") @@ -733,7 +736,7 @@ func (s *service) getContainerPids(ctx context.Context, id string) ([]uint32, er p := s.task s.mu.Unlock() if p == nil { - return nil, errors.Wrapf(errdefs.ErrFailedPrecondition, "container must be created") + return nil, fmt.Errorf("container must be created: %w", errdefs.ErrFailedPrecondition) } ps, err := p.(*proc.Init).Runtime().Ps(ctx, id) if err != nil { @@ -752,7 +755,7 @@ func (s *service) forward(publisher events.Publisher) { err := publisher.Publish(ctx, getTopic(e), e) cancel() if err != nil { - logrus.WithError(err).Error("post event") + fmt.Errorf("post event: %w", err) } } } @@ -787,7 +790,7 @@ func getTopic(e interface{}) string { case *eventstypes.TaskExecStarted: return runtime.TaskExecStartedEventTopic default: - logrus.Warnf("no topic for type %#v", e) + log.L.Printf("no topic for type %#v", e) } return runtime.TaskUnknownTopic } @@ -795,10 +798,10 @@ func getTopic(e interface{}) string { func newInit(ctx context.Context, path, workDir, namespace string, platform rproc.Platform, r *proc.CreateConfig, options *options.Options, rootfs string) (*proc.Init, error) { spec, err := utils.ReadSpec(r.Bundle) if err != nil { - return nil, errors.Wrap(err, "read oci spec") + return nil, fmt.Errorf("read oci spec: %w", err) } if err := utils.UpdateVolumeAnnotations(r.Bundle, spec); err != nil { - return nil, errors.Wrap(err, "update volume annotations") + return nil, fmt.Errorf("update volume annotations: %w", err) } runsc.FormatLogPath(r.ID, options.RunscConfig) runtime := proc.NewRunsc(options.Root, path, namespace, options.BinaryName, options.RunscConfig) @@ -814,7 +817,7 @@ func newInit(ctx context.Context, path, workDir, namespace string, platform rpro p.WorkDir = workDir p.IoUID = int(options.IoUid) p.IoGID = int(options.IoGid) - p.Sandbox = utils.IsSandbox(spec) + p.Sandbox = specutils.SpecContainerType(spec) == specutils.ContainerTypeSandbox p.UserLog = utils.UserLogPath(spec) p.Monitor = shim.Default return p, nil diff --git a/pkg/shim/v2/service_linux.go b/pkg/shim/v2/service_linux.go index cd259cd44..257c58812 100644 --- a/pkg/shim/v2/service_linux.go +++ b/pkg/shim/v2/service_linux.go @@ -19,13 +19,13 @@ package v2 import ( "context" + "fmt" "io" "sync" "syscall" "github.com/containerd/console" "github.com/containerd/fifo" - "github.com/pkg/errors" ) type linuxPlatform struct { @@ -34,7 +34,7 @@ type linuxPlatform struct { func (p *linuxPlatform) CopyConsole(ctx context.Context, console console.Console, stdin, stdout, stderr string, wg, cwg *sync.WaitGroup) (console.Console, error) { if p.epoller == nil { - return nil, errors.New("uninitialized epoller") + return nil, fmt.Errorf("uninitialized epoller") } epollConsole, err := p.epoller.Add(console) @@ -81,11 +81,11 @@ func (p *linuxPlatform) CopyConsole(ctx context.Context, console console.Console func (p *linuxPlatform) ShutdownConsole(ctx context.Context, cons console.Console) error { if p.epoller == nil { - return errors.New("uninitialized epoller") + return fmt.Errorf("uninitialized epoller") } epollConsole, ok := cons.(*console.EpollConsole) if !ok { - return errors.Errorf("expected EpollConsole, got %#v", cons) + return fmt.Errorf("expected EpollConsole, got %#v", cons) } return epollConsole.Shutdown(p.epoller.CloseConsole) } @@ -102,7 +102,7 @@ func (s *service) initPlatform() error { } epoller, err := console.NewEpoller() if err != nil { - return errors.Wrap(err, "failed to initialize epoller") + return fmt.Errorf("failed to initialize epoller: %w", err) } s.platform = &linuxPlatform{ epoller: epoller, diff --git a/pkg/test/criutil/criutil.go b/pkg/test/criutil/criutil.go index 8fed29ff5..4c63d669a 100644 --- a/pkg/test/criutil/criutil.go +++ b/pkg/test/criutil/criutil.go @@ -22,6 +22,9 @@ import ( "fmt" "os" "os/exec" + "path" + "regexp" + "strconv" "strings" "time" @@ -33,28 +36,44 @@ import ( type Crictl struct { logger testutil.Logger endpoint string + runpArgs []string cleanup []func() } // resolvePath attempts to find binary paths. It may set the path to invalid, // which will cause the execution to fail with a sensible error. func resolvePath(executable string) string { + runtime, err := dockerutil.RuntimePath() + if err == nil { + // Check first the directory of the runtime itself. + if dir := path.Dir(runtime); dir != "" && dir != "." { + guess := path.Join(dir, executable) + if fi, err := os.Stat(guess); err == nil && (fi.Mode()&0111) != 0 { + return guess + } + } + } + + // Try to find via the path. guess, err := exec.LookPath(executable) - if err != nil { - guess = fmt.Sprintf("/usr/local/bin/%s", executable) + if err == nil { + return guess } - return guess + + // Return a default path. + return fmt.Sprintf("/usr/local/bin/%s", executable) } // NewCrictl returns a Crictl configured with a timeout and an endpoint over // which it will talk to containerd. -func NewCrictl(logger testutil.Logger, endpoint string) *Crictl { +func NewCrictl(logger testutil.Logger, endpoint string, runpArgs []string) *Crictl { // Attempt to find the executable, but don't bother propagating the // error at this point. The first command executed will return with a // binary not found error. return &Crictl{ logger: logger, endpoint: endpoint, + runpArgs: runpArgs, } } @@ -67,8 +86,8 @@ func (cc *Crictl) CleanUp() { } // RunPod creates a sandbox. It corresponds to `crictl runp`. -func (cc *Crictl) RunPod(sbSpecFile string) (string, error) { - podID, err := cc.run("runp", sbSpecFile) +func (cc *Crictl) RunPod(runtime, sbSpecFile string) (string, error) { + podID, err := cc.run("runp", "--runtime", runtime, sbSpecFile) if err != nil { return "", fmt.Errorf("runp failed: %v", err) } @@ -79,10 +98,39 @@ func (cc *Crictl) RunPod(sbSpecFile string) (string, error) { // Create creates a container within a sandbox. It corresponds to `crictl // create`. func (cc *Crictl) Create(podID, contSpecFile, sbSpecFile string) (string, error) { - podID, err := cc.run("create", podID, contSpecFile, sbSpecFile) + // In version 1.16.0, crictl annoying starting attempting to pull the + // container, even if it was already available locally. We therefore + // need to parse the version and add an appropriate --no-pull argument + // since the image has already been loaded locally. + out, err := cc.run("-v") + r := regexp.MustCompile("crictl version ([0-9]+)\\.([0-9]+)\\.([0-9+])") + vs := r.FindStringSubmatch(out) + if len(vs) != 4 { + return "", fmt.Errorf("crictl -v had unexpected output: %s", out) + } + major, err := strconv.ParseUint(vs[1], 10, 64) + if err != nil { + return "", fmt.Errorf("crictl had invalid version: %v (%s)", err, out) + } + minor, err := strconv.ParseUint(vs[2], 10, 64) if err != nil { + return "", fmt.Errorf("crictl had invalid version: %v (%s)", err, out) + } + + args := []string{"create"} + if (major == 1 && minor >= 16) || major > 1 { + args = append(args, "--no-pull") + } + args = append(args, podID) + args = append(args, contSpecFile) + args = append(args, sbSpecFile) + + podID, err = cc.run(args...) + if err != nil { + time.Sleep(10 * time.Minute) // XXX return "", fmt.Errorf("create failed: %v", err) } + // Strip the trailing newline from crictl output. return strings.TrimSpace(podID), nil } @@ -260,7 +308,7 @@ func (cc *Crictl) StopContainer(contID string) error { // StartPodAndContainer starts a sandbox and container in that sandbox. It // returns the pod ID and container ID. -func (cc *Crictl) StartPodAndContainer(image, sbSpec, contSpec string) (string, string, error) { +func (cc *Crictl) StartPodAndContainer(runtime, image, sbSpec, contSpec string) (string, string, error) { if err := cc.Import(image); err != nil { return "", "", err } @@ -277,7 +325,7 @@ func (cc *Crictl) StartPodAndContainer(image, sbSpec, contSpec string) (string, } cc.cleanup = append(cc.cleanup, cleanup) - podID, err := cc.RunPod(sbSpecFile) + podID, err := cc.RunPod(runtime, sbSpecFile) if err != nil { return "", "", err } diff --git a/runsc/BUILD b/runsc/BUILD index 757f6d44c..96f697a5f 100644 --- a/runsc/BUILD +++ b/runsc/BUILD @@ -62,18 +62,22 @@ go_binary( ) pkg_tar( - name = "runsc-bin", - srcs = [":runsc"], + name = "debian-bin", + srcs = [ + ":runsc", + "//shim/v1:gvisor-containerd-shim", + "//shim/v2:containerd-shim-runsc-v1", + ], mode = "0755", package_dir = "/usr/bin", - strip_prefix = "/runsc/linux_amd64_pure_stripped", ) pkg_tar( name = "debian-data", extension = "tar.gz", deps = [ - ":runsc-bin", + ":debian-bin", + "//shim:config", ], ) diff --git a/runsc/debian/postinst.sh b/runsc/debian/postinst.sh index dc7aeee87..d1e28e17b 100755 --- a/runsc/debian/postinst.sh +++ b/runsc/debian/postinst.sh @@ -18,7 +18,14 @@ if [ "$1" != configure ]; then exit 0 fi +# Update docker configuration. if [ -f /etc/docker/daemon.json ]; then runsc install - systemctl restart docker || echo "unable to restart docker; you must do so manually." >&2 + if systemctl status docker 2>/dev/null; then + systemctl restart docker || echo "unable to restart docker; you must do so manually." >&2 + fi fi + +# For containerd-based installers, we don't automatically update the +# configuration. If it uses a v2 shim, then it will find the package binaries +# automatically when provided the appropriate annotation. diff --git a/shim/BUILD b/shim/BUILD new file mode 100644 index 000000000..e581618b2 --- /dev/null +++ b/shim/BUILD @@ -0,0 +1,15 @@ +load("//tools:defs.bzl", "pkg_tar") + +package(licenses = ["notice"]) + +pkg_tar( + name = "config", + srcs = [ + "runsc.toml", + ], + mode = "0644", + package_dir = "/etc/containerd", + visibility = [ + "//runsc:__pkg__", + ], +) diff --git a/shim/README.md b/shim/README.md index e446ec970..c6824ebdc 100644 --- a/shim/README.md +++ b/shim/README.md @@ -1,16 +1,11 @@ -# gvisor-containerd-shim +# gVisor Containerd shims -gvisor-containerd-shim is a containerd shim. It implements the containerd v1 -shim API. It can be used as a drop-in replacement for -[containerd-shim][containerd-shim] -(though containerd-shim must still be installed). It allows the use of both -gVisor (runsc) and normal containers in the same containerd installation by -deferring to the runc shim if the desired runtime engine is not runsc. +There are various shims supported for differt versions of +[containerd][containerd]. -- [Untrusted Workload Quick Start (containerd >=1.1)](docs/untrusted-workload-quickstart.md) -- [Runtime Handler/RuntimeClass Quick Start (containerd >=1.2)](docs/runtime-handler-quickstart.md) -- [Runtime Handler/RuntimeClass Quick Start (shim v2) (containerd >=1.2)](docs/runtime-handler-shim-v2-quickstart.md) -- [Configure containerd-shim-runsc-v1 (shim v2) (containerd >= 1.3)](docs/configure-containerd-shim-runsc-v1.md) -- [Configure gvisor-containerd-shim (shim v1) (containerd <= 1.2)](docs/configure-gvisor-containerd-shim.md) +- [Configure gvisor-containerd-shim (shim v1) (containerd ≤ 1.2)](v1/configure-gvisor-containerd-shim.md) +- [Runtime Handler/RuntimeClass Quick Start (containerd >= 1.2)](v2/runtime-handler-quickstart.md) +- [Runtime Handler/RuntimeClass Quick Start (shim v2) (containerd >= 1.2)](v2/runtime-handler-shim-v2-quickstart.md) +- [Configure containerd-shim-runsc-v1 (shim v2) (containerd >= 1.3)](v2/configure-containerd-shim-runsc-v1.md) -[containerd-shim]: https://github.com/containerd/containerd/tree/master/cmd/containerd-shim +[containerd]: https://github.com/containerd/containerd diff --git a/shim/configure-containerd-shim-runsc-v1.md b/shim/configure-containerd-shim-runsc-v1.md deleted file mode 100644 index 977ceacbd..000000000 --- a/shim/configure-containerd-shim-runsc-v1.md +++ /dev/null @@ -1,72 +0,0 @@ -# Configure containerd-shim-runsc-v1 (Shim V2) - -This document describes how to configure runtime options for -`containerd-shim-runsc-v1`. This is follows on to the instructions of -[Runtime Handler Quick Start (shim v2) (containerd >=1.2)](runtime-handler-shim-v2-quickstart.md) -and requires containerd 1.3 or later. - -### Update `/etc/containerd/config.toml` to point to a configuration file for `containerd-shim-runsc-v1`. - -`containerd-shim-runsc-v1` supports a few different configuration options based -on the version of containerd that is used. For versions >= 1.3, it supports a -configurable config path in the containerd runtime configuration. - -```shell -{ # Step 1: Update runtime options for runsc in containerd config.toml -cat < -[embedmd]:# (../test/e2e/shim-install.sh shell /{ # Step 1\(dev\)/ /^}/) -```shell -{ # Step 1(dev): Build and install gvisor-containerd-shim and containerd-shim-runsc-v1 - make - sudo make install -} -``` - -### Configure containerd - -1. Update `/etc/containerd/config.toml`. Make sure `containerd-shim-runsc-v1` is - in `${PATH}`. - -[embedmd]:# (../test/e2e/runtime-handler-shim-v2/install.sh shell /{ # Step 1/ /^}/) -```shell -{ # Step 1: Create containerd config.toml -cat < Note: This shim version is supported only for containerd versions less than +> 1.2. If you are using a containerd version greater than or equal to 1.2, then +> please use `containerd-shim-runsc-v1` (Shim API v1). +> +> This containerd shim is supported only in a best-effort capacity. + +This document describes how to configure and use `gvisor-containerd-shim`. + +## Containerd Configuration + +To use this shim, you must configure `/etc/containerd/config.toml` as follows: + +``` +[plugins.linux] + shim = "/usr/bin/gvisor-containerd-shim" +[plugins.cri.containerd.runtimes.gvisor] + runtime_type = "io.containerd.runtime.v1.linux" + runtime_engine = "/usr/bin/runsc" + runtime_root = "/run/containerd/runsc" +``` + +In order to pick-up the new configuration, you may need to restart containerd: + +```shell +sudo systemctl restart containerd +``` + +## Shim Confguration + +The shim configuration is stored in `/etc/containerd/runsc.toml`. The +configuration file supports two values. + +* `runc_shim`: The path to the runc shim. This is used by + `gvisor-containerd-shim` to run standard containers. + +* `runsc_config`: This is a set of key/value pairs that are converted into + `runsc` command line flags. You can learn more about which flags are available + by running `runsc flags`. + +For example, a configuration might look as follows: + +``` +runc_shim = "/usr/local/bin/containerd-shim" +[runsc_config] +platform = "kvm" +debug = true +debug-log = /var/log/%ID%/gvisor.log +``` diff --git a/shim/v1/config.go b/shim/v1/config.go new file mode 100644 index 000000000..a72cc7754 --- /dev/null +++ b/shim/v1/config.go @@ -0,0 +1,40 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import "github.com/BurntSushi/toml" + +// config is the configuration for gvisor containerd shim. +type config struct { + // RuncShim is the shim binary path for standard containerd-shim for runc. + // When the runtime is `runc`, gvisor containerd shim will exec current + // process to standard containerd-shim. This is a work around for containerd + // 1.1. In containerd 1.2, containerd will choose different containerd-shims + // based on runtime. + RuncShim string `toml:"runc_shim"` + // RunscConfig is configuration for runsc. The key value will be converted + // to runsc flags --key=value directly. + RunscConfig map[string]string `toml:"runsc_config"` +} + +// loadConfig load gvisor containerd shim config from config file. +func loadConfig(path string) (*config, error) { + var c config + _, err := toml.DecodeFile(path, &c) + if err != nil { + return &c, err + } + return &c, nil +} diff --git a/shim/v1/main.go b/shim/v1/main.go index 41c77394a..43deee858 100644 --- a/shim/v1/main.go +++ b/shim/v1/main.go @@ -16,11 +16,252 @@ package main import ( - "github.com/containerd/containerd/runtime/v2/shim" + "bytes" + "context" + "flag" + "fmt" + "log" + "net" + "os" + "os/exec" + "os/signal" + "path/filepath" + "strings" + "sync" + "syscall" - runsc "gvisor.dev/gvisor/pkg/shim/v2" + "github.com/containerd/containerd/events" + "github.com/containerd/containerd/namespaces" + "github.com/containerd/containerd/runtime/v1/linux/proc" + containerdshim "github.com/containerd/containerd/runtime/v1/shim" + shimapi "github.com/containerd/containerd/runtime/v1/shim/v1" + "github.com/containerd/ttrpc" + "github.com/containerd/typeurl" + ptypes "github.com/gogo/protobuf/types" + "github.com/opencontainers/runc/libcontainer/system" + "golang.org/x/sys/unix" + + "gvisor.dev/gvisor/pkg/shim/runsc" + "gvisor.dev/gvisor/pkg/shim/v1/shim" +) + +var ( + debugFlag bool + namespaceFlag string + socketFlag string + addressFlag string + workdirFlag string + runtimeRootFlag string + containerdBinaryFlag string + shimConfigFlag string ) +func init() { + flag.BoolVar(&debugFlag, "debug", false, "enable debug output in logs") + flag.StringVar(&namespaceFlag, "namespace", "", "namespace that owns the shim") + flag.StringVar(&socketFlag, "socket", "", "abstract socket path to serve") + flag.StringVar(&addressFlag, "address", "", "grpc address back to main containerd") + flag.StringVar(&workdirFlag, "workdir", "", "path used to storge large temporary data") + + // Containerd default to runc, unless another runtime is explicitly + // specified. We keep the same default to make the default behavior + // consistent. + flag.StringVar(&runtimeRootFlag, "runtime-root", proc.RuncRoot, "root directory for the runtime") + + // Currently, the `containerd publish` utility is embedded in the + // daemon binary. The daemon invokes `containerd-shim + // -containerd-binary ...` with its own os.Executable() path. + flag.StringVar(&containerdBinaryFlag, "containerd-binary", "containerd", "path to containerd binary (used for `containerd publish`)") + flag.StringVar(&shimConfigFlag, "config", "/etc/containerd/runsc.toml", "path to the shim configuration file") +} + func main() { - shim.Run("io.containerd.runsc.v1", runsc.New) + flag.Parse() + + // This is a hack. Exec current process to run standard containerd-shim + // if runtime root is not `runsc`. We don't need this for shim v2 api. + if filepath.Base(runtimeRootFlag) != "runsc" { + if err := executeRuncShim(); err != nil { + fmt.Fprintf(os.Stderr, "gvisor-containerd-shim: %s\n", err) + os.Exit(1) + } + } + + // Run regular shim if needed. + if err := executeShim(); err != nil { + fmt.Fprintf(os.Stderr, "gvisor-containerd-shim: %s\n", err) + os.Exit(1) + } +} + +// executeRuncShim execs current process to a containerd-shim process and +// retains all flags and envs. +func executeRuncShim() error { + c, err := loadConfig(shimConfigFlag) + if err != nil && !os.IsNotExist(err) { + return fmt.Errorf("failed to load shim config: %w", err) + } + shimPath := c.RuncShim + if shimPath == "" { + shimPath, err = exec.LookPath("containerd-shim") + if err != nil { + return fmt.Errorf("lookup containerd-shim failed: %w", err) + } + } + + args := append([]string{shimPath}, os.Args[1:]...) + if err := syscall.Exec(shimPath, args, os.Environ()); err != nil { + return fmt.Errorf("exec containerd-shim @ %q failed: %w", shimPath, err) + } + return nil +} + +func executeShim() error { + // start handling signals as soon as possible so that things are + // properly reaped or if runtime exits before we hit the handler. + signals, err := setupSignals() + if err != nil { + return err + } + path, err := os.Getwd() + if err != nil { + return err + } + server, err := ttrpc.NewServer(ttrpc.WithServerHandshaker(ttrpc.UnixSocketRequireSameUser())) + if err != nil { + return fmt.Errorf("failed creating server: %w", err) + } + c, err := loadConfig(shimConfigFlag) + if err != nil && !os.IsNotExist(err) { + return fmt.Errorf("failed to load shim config: %w", err) + } + sv, err := shim.NewService( + shim.Config{ + Path: path, + Namespace: namespaceFlag, + WorkDir: workdirFlag, + RuntimeRoot: runtimeRootFlag, + RunscConfig: c.RunscConfig, + }, + &remoteEventsPublisher{address: addressFlag}, + ) + if err != nil { + return err + } + shimapi.RegisterShimService(server, sv) + if err := serve(server, socketFlag); err != nil { + return err + } + return handleSignals(signals, server, sv) +} + +// serve serves the ttrpc API over a unix socket at the provided path this +// function does not block. +func serve(server *ttrpc.Server, path string) error { + var ( + l net.Listener + err error + ) + if path == "" { + l, err = net.FileListener(os.NewFile(3, "socket")) + path = "[inherited from parent]" + } else { + if len(path) > 106 { + return fmt.Errorf("%q: unix socket path too long (> 106)", path) + } + l, err = net.Listen("unix", "\x00"+path) + } + if err != nil { + return err + } + go func() { + defer l.Close() + err := server.Serve(context.Background(), l) + if err != nil && !strings.Contains(err.Error(), "use of closed network connection") { + log.Fatalf("ttrpc server failure: %v", err) + } + }() + return nil +} + +// setupSignals creates a new signal handler for all signals and sets the shim +// as a sub-reaper so that the container processes are reparented. +func setupSignals() (chan os.Signal, error) { + signals := make(chan os.Signal, 32) + signal.Notify(signals, unix.SIGTERM, unix.SIGINT, unix.SIGCHLD, unix.SIGPIPE) + // make sure runc is setup to use the monitor for waiting on processes. + // TODO(random-liu): Move shim/reaper.go to a separate package. + runsc.Monitor = containerdshim.Default + // Set the shim as the subreaper for all orphaned processes created by + // the container. + if err := system.SetSubreaper(1); err != nil { + return nil, err + } + return signals, nil +} + +func handleSignals(signals chan os.Signal, server *ttrpc.Server, sv *shim.Service) error { + var ( + termOnce sync.Once + done = make(chan struct{}) + ) + + for { + select { + case <-done: + return nil + case s := <-signals: + switch s { + case unix.SIGCHLD: + if err := containerdshim.Reap(); err != nil { + log.Printf("reap exit status: %v") + } + case unix.SIGTERM, unix.SIGINT: + go termOnce.Do(func() { + ctx := context.TODO() + if err := server.Shutdown(ctx); err != nil { + log.Printf("failed to shutdown server: %v") + } + // Ensure our child is dead if any. + sv.Kill(ctx, &shimapi.KillRequest{ + Signal: uint32(syscall.SIGKILL), + All: true, + }) + sv.Delete(context.Background(), &ptypes.Empty{}) + close(done) + }) + case unix.SIGPIPE: + } + } + } +} + +type remoteEventsPublisher struct { + address string +} + +func (l *remoteEventsPublisher) Publish(ctx context.Context, topic string, event events.Event) error { + ns, _ := namespaces.Namespace(ctx) + encoded, err := typeurl.MarshalAny(event) + if err != nil { + return err + } + data, err := encoded.Marshal() + if err != nil { + return err + } + cmd := exec.CommandContext(ctx, containerdBinaryFlag, "--address", l.address, "publish", "--topic", topic, "--namespace", ns) + cmd.Stdin = bytes.NewReader(data) + c, err := containerdshim.Default.Start(cmd) + if err != nil { + return err + } + status, err := containerdshim.Default.Wait(cmd, c) + if err != nil { + return fmt.Errorf("failed to publish event: %w", err) + } + if status != 0 { + return fmt.Errorf("failed to publish event: status %d", status) + } + return nil } diff --git a/shim/v2/BUILD b/shim/v2/BUILD new file mode 100644 index 000000000..1e1947dab --- /dev/null +++ b/shim/v2/BUILD @@ -0,0 +1,32 @@ +load("//tools:defs.bzl", "go_binary", "pkg_tar") +load("//website:defs.bzl", "doc") + +package(licenses = ["notice"]) + +go_binary( + name = "containerd-shim-runsc-v1", + srcs = [ + "main.go", + ], + pure = True, + visibility = [ + "//visibility:public", + ], + deps = [ + "//pkg/shim/runsc", + "//pkg/shim/v1/shim", + "//pkg/shim/v2", + "@com_github_burntsushi_toml//:go_default_library", + "@com_github_containerd_containerd//runtime/v2/shim:go_default_library", + ], +) + +doc( + name = "doc", + src = "README.md", + category = "User Guide", + permalink = "/docs/user_guide/containerd-shim-runsc-v1/", + subcategory = "Advanced", + visibility = ["//website:__pkg__"], + weight = "92", +) diff --git a/shim/v2/README.md b/shim/v2/README.md new file mode 100644 index 000000000..2fd625415 --- /dev/null +++ b/shim/v2/README.md @@ -0,0 +1,90 @@ +# containerd-shim-runsc-v1 + +> Note: This shim version is the recommended shim for containerd versions +> greater than or equal to 1.2. For older versions of containerd, use +> `gvisor-containerd-shim`. + +This document describes how to configure and use `containerd-shim-runsc-v1`. + +## Configuring Containerd 1.2 + +To configure containerd 1.2 to use this shim, add the runtime to +`/etc/containerd/config.toml` as follows: + +``` +[plugins.cri.containerd.runtimes.runsc] + runtime_type = "io.containerd.runsc.v1" + runtime_root = "/run/containerd/runsc" +[plugins.cri.containerd.runtimes.runsc.options] + TypeUrl = "io.containerd.runsc.v1.options" +``` + +The configuration will optionally loaded from a file named `config.toml` in the +`runtime_root` configured above. + +In order to pick up the new configuration, you may need to restart containerd: + +```shell +sudo systemctl restart containerd +``` + +## Configuring Containerd 1.3 and above + +To configure containerd 1.3 to use this shim, add the runtime to +`/etc/containerd/config.toml` as follows: + +``` +[plugins.cri.containerd.runtimes.runsc] + runtime_type = "io.containerd.runsc.v1" +[plugins.cri.containerd.runtimes.runsc.options] + TypeUrl = "io.containerd.runsc.v1.options" + ConfigPath = "/etc/containerd/runsc.toml" +``` + +The `ConfigPath` above will be used to provide a pointer to the configuration +file to be loaded. + +> Note that there will be configuration file loaded if `ConfigPath` is not set. + +In order to pick up the new configuration, you may need to restart containerd: + +```shell +sudo systemctl restart containerd +``` +## Shim Confguration + +The shim configuration may carry the following options: + +* `shim_cgroup`: The cgroup to use for the shim itself. +* `io_uid`: The UID to use for pipes. +* `ui_gid`: The GID to use for pipes. +* `binary_name`: The runtime binary name (defaults to `runsc`). +* `root`: The root directory for the runtime. +* `runsc_config`: A dictionary of key-value pairs that will be passed to the + runtime as arguments. + +### Example: Enable the KVM platform + +gVisor enables the use of a number of platforms. This example shows how to +configure `containerd-shim-runsc-v1` to use gVisor with the KVM platform: + +```shell +cat < 106 { - return errors.Errorf("%q: unix socket path too long (> 106)", path) - } - l, err = net.Listen("unix", "\x00"+path) - } - if err != nil { - return err - } - logrus.WithField("socket", path).Debug("serving api on unix socket") - go func() { - defer l.Close() - if err := server.Serve(context.Background(), l); err != nil && - !strings.Contains(err.Error(), "use of closed network connection") { - logrus.WithError(err).Fatal("gvisor-containerd-shim: ttrpc server failure") - } - }() - return nil -} - -// setupSignals creates a new signal handler for all signals and sets the shim as a -// sub-reaper so that the container processes are reparented -func setupSignals() (chan os.Signal, error) { - signals := make(chan os.Signal, 32) - signal.Notify(signals, unix.SIGTERM, unix.SIGINT, unix.SIGCHLD, unix.SIGPIPE) - // make sure runc is setup to use the monitor - // for waiting on processes - // TODO(random-liu): Move shim/reaper.go to a separate package. - runsc.Monitor = containerdshim.Default - // set the shim as the subreaper for all orphaned processes created by the container - if err := system.SetSubreaper(1); err != nil { - return nil, err - } - return signals, nil -} - -func handleSignals(logger *logrus.Entry, signals chan os.Signal, server *ttrpc.Server, sv *shim.Service) error { - var ( - termOnce sync.Once - done = make(chan struct{}) - ) - - for { - select { - case <-done: - return nil - case s := <-signals: - switch s { - case unix.SIGCHLD: - if err := containerdshim.Reap(); err != nil { - logger.WithError(err).Error("reap exit status") - } - case unix.SIGTERM, unix.SIGINT: - go termOnce.Do(func() { - ctx := context.TODO() - if err := server.Shutdown(ctx); err != nil { - logger.WithError(err).Error("failed to shutdown server") - } - // Ensure our child is dead if any - sv.Kill(ctx, &shimapi.KillRequest{ - Signal: uint32(syscall.SIGKILL), - All: true, - }) - sv.Delete(context.Background(), &ptypes.Empty{}) - close(done) - }) - case unix.SIGPIPE: - } - } - } -} - -func dumpStacks(logger *logrus.Entry) { - var ( - buf []byte - stackSize int - ) - bufferLen := 16384 - for stackSize == len(buf) { - buf = make([]byte, bufferLen) - stackSize = runtime.Stack(buf, true) - bufferLen *= 2 - } - buf = buf[:stackSize] - logger.Infof("=== BEGIN goroutine stack dump ===\n%s\n=== END goroutine stack dump ===", buf) -} - -type remoteEventsPublisher struct { - address string -} - -func (l *remoteEventsPublisher) Publish(ctx context.Context, topic string, event events.Event) error { - ns, _ := namespaces.Namespace(ctx) - encoded, err := typeurl.MarshalAny(event) - if err != nil { - return err - } - data, err := encoded.Marshal() - if err != nil { - return err - } - cmd := exec.CommandContext(ctx, containerdBinaryFlag, "--address", l.address, "publish", "--topic", topic, "--namespace", ns) - cmd.Stdin = bytes.NewReader(data) - c, err := containerdshim.Default.Start(cmd) - if err != nil { - return err - } - status, err := containerdshim.Default.Wait(cmd, c) - if err != nil { - return err - } - if status != 0 { - return errors.New("failed to publish event") - } - return nil + shim.Run("io.containerd.runsc.v1", runsc.New) } diff --git a/shim/v2/runtime-handler-shim-v2-quickstart.md b/shim/v2/runtime-handler-shim-v2-quickstart.md new file mode 100644 index 000000000..ca8336089 --- /dev/null +++ b/shim/v2/runtime-handler-shim-v2-quickstart.md @@ -0,0 +1,232 @@ +# Runtime Handler Quickstart (Shim V2) + +This document describes how to install and run `containerd-shim-runsc-v1` using +the containerd runtime handler support. This requires containerd 1.2 or later. + +## Requirements + +- **runsc**: See the [gVisor documentation](https://github.com/google/gvisor) for information on how to install runsc. +- **containerd**: See the [containerd website](https://containerd.io/) for information on how to install containerd. + +## Install + +### Install containerd-shim-runsc-v1 + +1. Build and install `containerd-shim-runsc-v1`. + + +[embedmd]:# (../test/e2e/shim-install.sh shell /{ # Step 1\(dev\)/ /^}/) +```shell +{ # Step 1(dev): Build and install gvisor-containerd-shim and containerd-shim-runsc-v1 + make + sudo make install +} +``` + +### Configure containerd + +1. Update `/etc/containerd/config.toml`. Make sure `containerd-shim-runsc-v1` is + in `${PATH}`. + +[embedmd]:# (../test/e2e/runtime-handler-shim-v2/install.sh shell /{ # Step 1/ /^}/) +```shell +{ # Step 1: Create containerd config.toml +cat < 0 { // Omit if empty. s["command"] = cmd @@ -95,25 +98,29 @@ var Httpd = SimpleSpec("httpd", "basic/httpd", nil, nil) // TestCrictlSanity refers to b/112433158. func TestCrictlSanity(t *testing.T) { - // Setup containerd and crictl. - crictl, cleanup, err := setup(t) - if err != nil { - t.Fatalf("failed to setup crictl: %v", err) - } - defer cleanup() - podID, contID, err := crictl.StartPodAndContainer("basic/httpd", Sandbox("default"), Httpd) - if err != nil { - t.Fatalf("start failed: %v", err) - } - - // Look for the httpd page. - if err = httpGet(crictl, podID, "index.html"); err != nil { - t.Fatalf("failed to get page: %v", err) - } - - // Stop everything. - if err := crictl.StopPodAndContainer(podID, contID); err != nil { - t.Fatalf("stop failed: %v", err) + for _, version := range allVersions { + t.Run(version, func(t *testing.T) { + // Setup containerd and crictl. + crictl, cleanup, err := setup(t, version) + if err != nil { + t.Fatalf("failed to setup crictl: %v", err) + } + defer cleanup() + podID, contID, err := crictl.StartPodAndContainer(containerdRuntime, "basic/httpd", Sandbox("default"), Httpd) + if err != nil { + t.Fatalf("start failed: %v", err) + } + + // Look for the httpd page. + if err = httpGet(crictl, podID, "index.html"); err != nil { + t.Fatalf("failed to get page: %v", err) + } + + // Stop everything. + if err := crictl.StopPodAndContainer(podID, contID); err != nil { + t.Fatalf("stop failed: %v", err) + } + }) } } @@ -147,146 +154,179 @@ var HttpdMountPaths = SimpleSpec("httpd", "basic/httpd", nil, map[string]interfa // TestMountPaths refers to b/117635704. func TestMountPaths(t *testing.T) { - // Setup containerd and crictl. - crictl, cleanup, err := setup(t) - if err != nil { - t.Fatalf("failed to setup crictl: %v", err) - } - defer cleanup() - podID, contID, err := crictl.StartPodAndContainer("basic/httpd", Sandbox("default"), HttpdMountPaths) - if err != nil { - t.Fatalf("start failed: %v", err) - } - - // Look for the directory available at /test. - if err = httpGet(crictl, podID, "test"); err != nil { - t.Fatalf("failed to get page: %v", err) - } - - // Stop everything. - if err := crictl.StopPodAndContainer(podID, contID); err != nil { - t.Fatalf("stop failed: %v", err) + for _, version := range allVersions { + t.Run(version, func(t *testing.T) { + // Setup containerd and crictl. + crictl, cleanup, err := setup(t, version) + if err != nil { + t.Fatalf("failed to setup crictl: %v", err) + } + defer cleanup() + podID, contID, err := crictl.StartPodAndContainer(containerdRuntime, "basic/httpd", Sandbox("default"), HttpdMountPaths) + if err != nil { + t.Fatalf("start failed: %v", err) + } + + // Look for the directory available at /test. + if err = httpGet(crictl, podID, "test"); err != nil { + t.Fatalf("failed to get page: %v", err) + } + + // Stop everything. + if err := crictl.StopPodAndContainer(podID, contID); err != nil { + t.Fatalf("stop failed: %v", err) + } + }) } } // TestMountPaths refers to b/118728671. func TestMountOverSymlinks(t *testing.T) { - // Setup containerd and crictl. - crictl, cleanup, err := setup(t) - if err != nil { - t.Fatalf("failed to setup crictl: %v", err) - } - defer cleanup() - - spec := SimpleSpec("busybox", "basic/resolv", []string{"sleep", "1000"}, nil) - podID, contID, err := crictl.StartPodAndContainer("basic/resolv", Sandbox("default"), spec) - if err != nil { - t.Fatalf("start failed: %v", err) - } - - out, err := crictl.Exec(contID, "readlink", "/etc/resolv.conf") - if err != nil { - t.Fatalf("readlink failed: %v, out: %s", err, out) - } - if want := "/tmp/resolv.conf"; !strings.Contains(string(out), want) { - t.Fatalf("/etc/resolv.conf is not pointing to %q: %q", want, string(out)) - } - - etc, err := crictl.Exec(contID, "cat", "/etc/resolv.conf") - if err != nil { - t.Fatalf("cat failed: %v, out: %s", err, etc) - } - tmp, err := crictl.Exec(contID, "cat", "/tmp/resolv.conf") - if err != nil { - t.Fatalf("cat failed: %v, out: %s", err, out) - } - if tmp != etc { - t.Fatalf("file content doesn't match:\n\t/etc/resolv.conf: %s\n\t/tmp/resolv.conf: %s", string(etc), string(tmp)) - } - - // Stop everything. - if err := crictl.StopPodAndContainer(podID, contID); err != nil { - t.Fatalf("stop failed: %v", err) + for _, version := range allVersions { + t.Run(version, func(t *testing.T) { + // Setup containerd and crictl. + crictl, cleanup, err := setup(t, version) + if err != nil { + t.Fatalf("failed to setup crictl: %v", err) + } + defer cleanup() + + spec := SimpleSpec("busybox", "basic/resolv", []string{"sleep", "1000"}, nil) + podID, contID, err := crictl.StartPodAndContainer(containerdRuntime, "basic/resolv", Sandbox("default"), spec) + if err != nil { + t.Fatalf("start failed: %v", err) + } + + out, err := crictl.Exec(contID, "readlink", "/etc/resolv.conf") + if err != nil { + t.Fatalf("readlink failed: %v, out: %s", err, out) + } + if want := "/tmp/resolv.conf"; !strings.Contains(string(out), want) { + t.Fatalf("/etc/resolv.conf is not pointing to %q: %q", want, string(out)) + } + + etc, err := crictl.Exec(contID, "cat", "/etc/resolv.conf") + if err != nil { + t.Fatalf("cat failed: %v, out: %s", err, etc) + } + tmp, err := crictl.Exec(contID, "cat", "/tmp/resolv.conf") + if err != nil { + t.Fatalf("cat failed: %v, out: %s", err, out) + } + if tmp != etc { + t.Fatalf("file content doesn't match:\n\t/etc/resolv.conf: %s\n\t/tmp/resolv.conf: %s", string(etc), string(tmp)) + } + + // Stop everything. + if err := crictl.StopPodAndContainer(podID, contID); err != nil { + t.Fatalf("stop failed: %v", err) + } + }) } } // TestHomeDir tests that the HOME environment variable is set for // Pod containers. func TestHomeDir(t *testing.T) { - // Setup containerd and crictl. - crictl, cleanup, err := setup(t) - if err != nil { - t.Fatalf("failed to setup crictl: %v", err) + for _, version := range allVersions { + t.Run(version, func(t *testing.T) { + // Setup containerd and crictl. + crictl, cleanup, err := setup(t, version) + if err != nil { + t.Fatalf("failed to setup crictl: %v", err) + } + defer cleanup() + + // Note that container ID returned here is a sub-container. All Pod + // containers are sub-containers. The root container of the sandbox is the + // pause container. + t.Run("sub-container", func(t *testing.T) { + contSpec := SimpleSpec("subcontainer", "basic/busybox", []string{"sh", "-c", "echo $HOME"}, nil) + podID, contID, err := crictl.StartPodAndContainer(containerdRuntime, "basic/busybox", Sandbox("subcont-sandbox"), contSpec) + if err != nil { + t.Fatalf("start failed: %v", err) + } + + out, err := crictl.Logs(contID) + if err != nil { + t.Fatalf("failed retrieving container logs: %v, out: %s", err, out) + } + if got, want := strings.TrimSpace(string(out)), "/root"; got != want { + t.Fatalf("Home directory invalid. Got %q, Want : %q", got, want) + } + + // Stop everything; note that the pod may have already stopped. + crictl.StopPodAndContainer(podID, contID) + }) + + // Tests that HOME is set for the exec process. + t.Run("exec", func(t *testing.T) { + contSpec := SimpleSpec("exec", "basic/busybox", []string{"sleep", "1000"}, nil) + podID, contID, err := crictl.StartPodAndContainer(containerdRuntime, "basic/busybox", Sandbox("exec-sandbox"), contSpec) + if err != nil { + t.Fatalf("start failed: %v", err) + } + + out, err := crictl.Exec(contID, "sh", "-c", "echo $HOME") + if err != nil { + t.Fatalf("failed retrieving container logs: %v, out: %s", err, out) + } + if got, want := strings.TrimSpace(string(out)), "/root"; got != want { + t.Fatalf("Home directory invalid. Got %q, Want : %q", got, want) + } + + // Stop everything. + if err := crictl.StopPodAndContainer(podID, contID); err != nil { + t.Fatalf("stop failed: %v", err) + } + }) + }) } - defer cleanup() - - // Note that container ID returned here is a sub-container. All Pod - // containers are sub-containers. The root container of the sandbox is the - // pause container. - t.Run("sub-container", func(t *testing.T) { - contSpec := SimpleSpec("subcontainer", "basic/busybox", []string{"sh", "-c", "echo $HOME"}, nil) - podID, contID, err := crictl.StartPodAndContainer("basic/busybox", Sandbox("subcont-sandbox"), contSpec) - if err != nil { - t.Fatalf("start failed: %v", err) - } - - out, err := crictl.Logs(contID) - if err != nil { - t.Fatalf("failed retrieving container logs: %v, out: %s", err, out) - } - if got, want := strings.TrimSpace(string(out)), "/root"; got != want { - t.Fatalf("Home directory invalid. Got %q, Want : %q", got, want) - } - - // Stop everything. - if err := crictl.StopPodAndContainer(podID, contID); err != nil { - t.Fatalf("stop failed: %v", err) - } - }) - - // Tests that HOME is set for the exec process. - t.Run("exec", func(t *testing.T) { - contSpec := SimpleSpec("exec", "basic/busybox", []string{"sleep", "1000"}, nil) - podID, contID, err := crictl.StartPodAndContainer("basic/busybox", Sandbox("exec-sandbox"), contSpec) - if err != nil { - t.Fatalf("start failed: %v", err) - } - - out, err := crictl.Exec(contID, "sh", "-c", "echo $HOME") - if err != nil { - t.Fatalf("failed retrieving container logs: %v, out: %s", err, out) - } - if got, want := strings.TrimSpace(string(out)), "/root"; got != want { - t.Fatalf("Home directory invalid. Got %q, Want : %q", got, want) - } - - // Stop everything. - if err := crictl.StopPodAndContainer(podID, contID); err != nil { - t.Fatalf("stop failed: %v", err) - } - }) } -// containerdConfigTemplate is a .toml config for containerd. It contains a -// formatting verb so the runtime field can be set via fmt.Sprintf. -const containerdConfigTemplate = ` +const containerdRuntime = "runsc" + +const v1Template = ` disabled_plugins = ["restart"] +[plugins.cri] + disable_tcp_service = true [plugins.linux] - runtime = "%s" - runtime_root = "/tmp/test-containerd/runsc" - shim = "/usr/local/bin/gvisor-containerd-shim" + shim = "%s" shim_debug = true - -[plugins.cri.containerd.runtimes.runsc] +[plugins.cri.containerd.runtimes.` + containerdRuntime + `] runtime_type = "io.containerd.runtime.v1.linux" runtime_engine = "%s" + runtime_root = "%s/root/runsc" ` +const v2Template = ` +disabled_plugins = ["restart"] +[plugins.cri] + disable_tcp_service = true +[plugins.linux] + shim_debug = true +[plugins.cri.containerd.runtimes.` + containerdRuntime + `] + runtime_type = "io.containerd.` + containerdRuntime + `.v1" +[plugins.cri.containerd.runtimes.` + containerdRuntime + `.options] + TypeUrl = "io.containerd.` + containerdRuntime + `.v1.options" +` + +const ( + // v1 is the containerd API v1. + v1 string = "v1" + + // v1 is the containerd API v21. + v2 string = "v2" +) + +// allVersions is the set of known versions. +var allVersions = []string{v1, v2} + // setup sets up before a test. Specifically it: // * Creates directories and a socket for containerd to utilize. // * Runs containerd and waits for it to reach a "ready" state for testing. // * Returns a cleanup function that should be called at the end of the test. -func setup(t *testing.T) (*criutil.Crictl, func(), error) { +func setup(t *testing.T, version string) (*criutil.Crictl, func(), error) { // Create temporary containerd root and state directories, and a socket // via which crictl and containerd communicate. containerdRoot, err := ioutil.TempDir(testutil.TmpDir(), "containerd-root") @@ -295,13 +335,43 @@ func setup(t *testing.T) (*criutil.Crictl, func(), error) { } cu := cleanup.Make(func() { os.RemoveAll(containerdRoot) }) defer cu.Clean() + t.Logf("Using containerd root: %s", containerdRoot) containerdState, err := ioutil.TempDir(testutil.TmpDir(), "containerd-state") if err != nil { t.Fatalf("failed to create containerd state: %v", err) } cu.Add(func() { os.RemoveAll(containerdState) }) - sockAddr := filepath.Join(testutil.TmpDir(), "containerd-test.sock") + t.Logf("Using containerd state: %s", containerdState) + + sockDir, err := ioutil.TempDir(testutil.TmpDir(), "containerd-sock") + if err != nil { + t.Fatalf("failed to create containerd socket directory: %v", err) + } + cu.Add(func() { os.RemoveAll(sockDir) }) + sockAddr := path.Join(sockDir, "test.sock") + t.Logf("Using containerd socket: %s", sockAddr) + + // Extract the containerd version. + versionCmd := exec.Command(getContainerd(), "-v") + out, err := versionCmd.CombinedOutput() + if err != nil { + t.Fatalf("error extracting containerd version: %v (%s)", err, string(out)) + } + r := regexp.MustCompile(" v([0-9]+)\\.([0-9]+)\\.([0-9+])") + vs := r.FindStringSubmatch(string(out)) + if len(vs) != 4 { + t.Fatalf("error unexpected version string: %s", string(out)) + } + major, err := strconv.ParseUint(vs[1], 10, 64) + if err != nil { + t.Fatalf("error parsing containerd major version: %v (%s)", err, string(out)) + } + minor, err := strconv.ParseUint(vs[2], 10, 64) + if err != nil { + t.Fatalf("error parsing containerd minor version: %v (%s)", err, string(out)) + } + t.Logf("Using containerd version: %d.%d", major, minor) // We rewrite a configuration. This is based on the current docker // configuration for the runtime under test. @@ -309,28 +379,100 @@ func setup(t *testing.T) (*criutil.Crictl, func(), error) { if err != nil { t.Fatalf("error discovering runtime path: %v", err) } - config, configCleanup, err := testutil.WriteTmpFile("containerd-config", fmt.Sprintf(containerdConfigTemplate, runtime, runtime)) + t.Logf("Using runtime: %v", runtime) + + // Construct a PATH that includes the runtime directory. This is + // because the shims will be installed there, and containerd may infer + // the binary name and search the PATH. + runtimeDir := path.Dir(runtime) + modifiedPath := os.Getenv("PATH") + if modifiedPath != "" { + modifiedPath = ":" + modifiedPath // We prepend below. + } + modifiedPath = path.Dir(getContainerd()) + modifiedPath + modifiedPath = runtimeDir + ":" + modifiedPath + t.Logf("Using PATH: %v", modifiedPath) + + var ( + config string + runpArgs []string + ) + switch version { + case v1: + // This is only supported less than 1.3. + if major > 1 || (major == 1 && minor >= 3) { + t.Skipf("skipping unsupported containerd (want less than 1.3, got %d.%d)", major, minor) + } + + // We provide the shim, followed by the runtime, and then a + // temporary root directory. Note that we can safely assume + // that the shim has been installed in the same directory as + // the runtime (for test installs and for normal installs). + // Since this is v1, the binary name will be fixed. + config = fmt.Sprintf(v1Template, path.Join(runtimeDir, "gvisor-containerd-shim"), runtime, runtimeDir) + case v2: + // This is only supported past 1.2. + if major < 1 || (major == 1 && minor <= 1) { + t.Skipf("skipping incompatible containerd (want at least 1.2, got %d.%d)", major, minor) + } + + // The runtime is provided via parameter. Note that the v2 shim + // binary name is always containerd-shim-* so we don't actually + // care about the docker runtime name. + config = v2Template + default: + t.Fatalf("unknown version: %d", version) + } + t.Logf("Using config: %s", config) + + // Generate the configuration for the test. + configFile, configCleanup, err := testutil.WriteTmpFile("containerd-config", config) if err != nil { t.Fatalf("failed to write containerd config") } cu.Add(configCleanup) // Start containerd. - cmd := exec.Command(getContainerd(), - "--config", config, + args := []string{ + getContainerd(), + "--config", configFile, "--log-level", "debug", "--root", containerdRoot, "--state", containerdState, - "--address", sockAddr) + "--address", sockAddr, + } + t.Logf("Using args: %s", strings.Join(args, " ")) + cmd := exec.Command(args[0], args[1:]...) + cmd.Env = append(os.Environ(), "PATH="+modifiedPath) + + // Include output in logs. + stderrPipe, err := cmd.StderrPipe() + if err != nil { + t.Fatalf("failed to create stderr pipe: %v", err) + } + cu.Add(func() { stderrPipe.Close() }) + stdoutPipe, err := cmd.StdoutPipe() + if err != nil { + t.Fatalf("failed to create stdout pipe: %v", err) + } + cu.Add(func() { stdoutPipe.Close() }) + var ( + wg sync.WaitGroup + stderr bytes.Buffer + stdout bytes.Buffer + ) startupR, startupW := io.Pipe() - defer startupR.Close() - defer startupW.Close() - stderr := &bytes.Buffer{} - stdout := &bytes.Buffer{} - cmd.Stderr = io.MultiWriter(startupW, stderr) - cmd.Stdout = io.MultiWriter(startupW, stdout) + wg.Add(2) + go func() { + defer wg.Done() + io.Copy(io.MultiWriter(startupW, &stderr), stderrPipe) + }() + go func() { + defer wg.Done() + io.Copy(io.MultiWriter(startupW, &stdout), stdoutPipe) + }() cu.Add(func() { - // Log output in case of failure. + wg.Wait() t.Logf("containerd stdout: %s", stdout.String()) t.Logf("containerd stderr: %s", stderr.String()) }) @@ -345,13 +487,17 @@ func setup(t *testing.T) (*criutil.Crictl, func(), error) { t.Fatalf("failed to start containerd: %v", err) } + // Discard all subsequent data. + go io.Copy(ioutil.Discard, startupR) + + // Create the crictl interface. + cc := criutil.NewCrictl(t, sockAddr, runpArgs) + cu.Add(cc.CleanUp) + // Kill must be the last cleanup (as it will be executed first). - cc := criutil.NewCrictl(t, sockAddr) cu.Add(func() { - cc.CleanUp() // Remove tmp files, etc. - if err := testutil.KillCommand(cmd); err != nil { - log.Printf("error killing containerd: %v", err) - } + // Best effort: ignore errors. + testutil.KillCommand(cmd) }) return cc, cu.Release(), nil diff --git a/test/shim/containerd-install.sh b/test/shim/containerd-install.sh deleted file mode 100755 index 400819245..000000000 --- a/test/shim/containerd-install.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash - -# A script to install containerd and CNI plugins for e2e testing - -wget -q --https-only \ - https://github.com/containerd/containerd/releases/download/v${CONTAINERD_VERSION}/containerd-${CONTAINERD_VERSION}.linux-amd64.tar.gz \ - https://github.com/containernetworking/plugins/releases/download/v0.7.0/cni-plugins-amd64-v0.7.0.tgz - -sudo mkdir -p /etc/containerd /etc/cni/net.d /opt/cni/bin -sudo tar -xvf cni-plugins-amd64-v0.7.0.tgz -C /opt/cni/bin/ -sudo tar -xvf containerd-${CONTAINERD_VERSION}.linux-amd64.tar.gz -C / - -cat </tmp/containerd-cri.log & diff --git a/test/shim/crictl-install.sh b/test/shim/crictl-install.sh deleted file mode 100755 index 1d63c889b..000000000 --- a/test/shim/crictl-install.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -# A sample script for installing crictl. - -set -ex - -{ # Step 1: Download crictl -wget https://github.com/kubernetes-sigs/cri-tools/releases/download/v1.13.0/crictl-v1.13.0-linux-amd64.tar.gz -tar xf crictl-v1.13.0-linux-amd64.tar.gz -sudo mv crictl /usr/local/bin -} - -{ # Step 2: Configure crictl -cat < /tmp/containerd-cri.log & -} diff --git a/test/shim/runtime-handler-shim-v2/test.sh b/test/shim/runtime-handler-shim-v2/test.sh deleted file mode 100755 index e33655ec1..000000000 --- a/test/shim/runtime-handler-shim-v2/test.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash - -# Runs end-to-end tests for gvisor-containerd-shim to test the use of runtime -# handler. This should work on containerd 1.2+ - -# This is meant to be run in a VM as it makes a fairly invasive install of -# containerd. - -set -ex - -# Install containerd -. ./test/e2e/containerd-install.sh - -# Install gVisor -. ./test/e2e/runsc-install.sh - -# Install gvisor-containerd-shim -. ./test/e2e/shim-install.sh - -# Test installation/configuration -. ./test/e2e/runtime-handler-shim-v2/install.sh - -# Install crictl -. ./test/e2e/crictl-install.sh - -# Test usage (the same with runtime-handler) -. ./test/e2e/runtime-handler/usage.sh - -# Run a container in the sandbox -. ./test/e2e/run-container.sh - -# Validate the pod and container -. ./test/e2e/validate.sh -. ./test/e2e/runtime-handler-shim-v2/validate.sh diff --git a/test/shim/runtime-handler-shim-v2/validate.sh b/test/shim/runtime-handler-shim-v2/validate.sh deleted file mode 100755 index b74a059ef..000000000 --- a/test/shim/runtime-handler-shim-v2/validate.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -# A sample script to validating the running containerd-shim-runsc-v1. - -set -ex - -ps aux | grep [c]ontainerd-shim-runsc-v1 diff --git a/test/shim/runtime-handler/install.sh b/test/shim/runtime-handler/install.sh deleted file mode 100755 index ebe9d3580..000000000 --- a/test/shim/runtime-handler/install.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -# A sample script for installing and configuring the gvisor-containerd-shim to -# use the containerd runtime handler. - -set -ex - -{ # Step 1: Create containerd config.toml -cat < /tmp/containerd-cri.log & -} diff --git a/test/shim/runtime-handler/test.sh b/test/shim/runtime-handler/test.sh deleted file mode 100755 index 99f3565b6..000000000 --- a/test/shim/runtime-handler/test.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -# Runs end-to-end tests for gvisor-containerd-shim to test the use of runtime -# handler. This should work on containerd 1.2+ - -# This is meant to be run in a VM as it makes a fairly invasive install of -# containerd. - -set -ex - -# Install containerd -. ./test/e2e/containerd-install.sh - -# Install gVisor -. ./test/e2e/runsc-install.sh - -# Install gvisor-containerd-shim -. ./test/e2e/shim-install.sh - -# Test installation/configuration -. ./test/e2e/runtime-handler/install.sh - -# Install crictl -. ./test/e2e/crictl-install.sh - -# Test usage -. ./test/e2e/runtime-handler/usage.sh - -# Run a container in the sandbox -. ./test/e2e/run-container.sh - -# Validate the pod and container -. ./test/e2e/validate.sh diff --git a/test/shim/runtime-handler/usage.sh b/test/shim/runtime-handler/usage.sh deleted file mode 100755 index 350c720c2..000000000 --- a/test/shim/runtime-handler/usage.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -# A sample script for testing the gvisor-containerd-shim -# using runtime handler. - -set -ex - -{ # Step 1: Pull the nginx image -sudo crictl pull nginx -} - -{ # Step 2: Create sandbox.json -cat </tmp/containerd-cri.log & -} diff --git a/test/shim/untrusted-workload/test.sh b/test/shim/untrusted-workload/test.sh deleted file mode 100755 index 6e312cf6d..000000000 --- a/test/shim/untrusted-workload/test.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -# Runs end-to-end tests for gvisor-containerd-shim to test using the -# untrusted workload extension. This should work on containerd 1.1+ - -# This is meant to be run in a VM as it makes a fairly invasive install of -# containerd. - -set -ex - -# Install containerd -. ./test/e2e/containerd-install.sh - -# Install gVisor -. ./test/e2e/runsc-install.sh - -# Install gvisor-containerd-shim -. ./test/e2e/shim-install.sh - -# Test installation/configuration -. ./test/e2e/untrusted-workload/install.sh - -# Install crictl -. ./test/e2e/crictl-install.sh - -# Test usage -. ./test/e2e/untrusted-workload/usage.sh - -# Run a container in the sandbox -. ./test/e2e/run-container.sh - -# Validate the pod and container -. ./test/e2e/validate.sh diff --git a/test/shim/untrusted-workload/usage.sh b/test/shim/untrusted-workload/usage.sh deleted file mode 100755 index db8206964..000000000 --- a/test/shim/untrusted-workload/usage.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -# A sample script for testing the gvisor-containerd-shim # using untrusted -# workload extension. - -set -ex - -{ # Step 1: Pull the nginx image -sudo crictl pull nginx -} - -{ # Step 2: Create sandbox.json -cat < ${shim_config_path} <<-EOF + runc_shim = "/usr/bin/containerd-shim" + +[runsc_config] + debug = "true" + debug-log = "/tmp/runsc-logs/" + strace = "true" + file-access = "shared" +EOF +fi + +# Configure CNI. +(cd "${GOPATH}" && src/github.com/containerd/containerd/script/setup/install-cni) +cat </dev/null; then + service docker restart +fi diff --git a/tools/vm/ubuntu1604/25_docker.sh b/tools/vm/ubuntu1604/25_docker.sh deleted file mode 100755 index 53d8ca588..000000000 --- a/tools/vm/ubuntu1604/25_docker.sh +++ /dev/null @@ -1,65 +0,0 @@ -#!/bin/bash - -# Copyright 2019 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Add dependencies. -while true; do - if (apt-get update && apt-get install -y \ - apt-transport-https \ - ca-certificates \ - curl \ - gnupg-agent \ - software-properties-common); then - break - fi - result=$? - if [[ $result -ne 100 ]]; then - exit $result - fi -done - -# Install the key. -curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - - -# Add the repository. -add-apt-repository \ - "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ - $(lsb_release -cs) \ - stable" - -# Install docker. -while true; do - if (apt-get update && apt-get install -y \ - docker-ce \ - docker-ce-cli \ - containerd.io); then - break - fi - result=$? - if [[ $result -ne 100 ]]; then - exit $result - fi -done - -# Enable Docker IPv6. -cat > /etc/docker/daemon.json < ${shim_config_tmp_path} <<-EOF - runc_shim = "/usr/local/bin/containerd-shim" - -[runsc_config] - debug = "true" - debug-log = "/tmp/runsc-logs/" - strace = "true" - file-access = "shared" -EOF -mv ${shim_config_tmp_path} ${shim_config_path} - -# Configure CNI. -(cd "${GOPATH}" && GOPATH="${GOPATH}" \ - src/github.com/containerd/containerd/script/setup/install-cni) - -# Cleanup the above. -rm -rf "${GOPATH}" -rm -rf "${latest}" -rm -rf "${shim_path}" -rm -rf "${shim_config_tmp_path}" diff --git a/tools/vm/ubuntu1604/30_docker.sh b/tools/vm/ubuntu1604/30_docker.sh new file mode 100755 index 000000000..53d8ca588 --- /dev/null +++ b/tools/vm/ubuntu1604/30_docker.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +# Copyright 2019 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Add dependencies. +while true; do + if (apt-get update && apt-get install -y \ + apt-transport-https \ + ca-certificates \ + curl \ + gnupg-agent \ + software-properties-common); then + break + fi + result=$? + if [[ $result -ne 100 ]]; then + exit $result + fi +done + +# Install the key. +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - + +# Add the repository. +add-apt-repository \ + "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ + $(lsb_release -cs) \ + stable" + +# Install docker. +while true; do + if (apt-get update && apt-get install -y \ + docker-ce \ + docker-ce-cli \ + containerd.io); then + break + fi + result=$? + if [[ $result -ne 100 ]]; then + exit $result + fi +done + +# Enable Docker IPv6. +cat > /etc/docker/daemon.json < Date: Sat, 11 Jul 2020 06:21:34 -0700 Subject: Stub out SO_DETACH_FILTER. Updates #2746 PiperOrigin-RevId: 320757963 --- pkg/sentry/socket/netstack/netstack.go | 5 +++ pkg/tcpip/tcpip.go | 5 ++- pkg/tcpip/transport/icmp/endpoint.go | 4 ++ pkg/tcpip/transport/packet/endpoint.go | 8 +++- pkg/tcpip/transport/raw/endpoint.go | 8 +++- pkg/tcpip/transport/tcp/endpoint.go | 3 ++ pkg/tcpip/transport/udp/endpoint.go | 3 ++ test/syscalls/linux/BUILD | 3 ++ test/syscalls/linux/packet_socket_raw.cc | 34 ++++++++++++++++ test/syscalls/linux/raw_socket.cc | 37 +++++++++++++++-- test/syscalls/linux/tcp_socket.cc | 60 ++++++++++++++++++++++++++++ test/syscalls/linux/udp_socket_test_cases.cc | 54 +++++++++++++++++++++++++ 12 files changed, 217 insertions(+), 7 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 5a3cedd7c..78a842973 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -1754,6 +1754,11 @@ func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, nam return nil + case linux.SO_DETACH_FILTER: + // optval is ignored. + var v tcpip.SocketDetachFilterOption + return syserr.TranslateNetstackError(ep.SetSockOpt(v)) + default: socket.SetSockOptEmitUnimplementedEvent(t, name) } diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index cf7291d09..71bcee785 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -855,7 +855,10 @@ type OutOfBandInlineOption int // a default TTL. type DefaultTTLOption uint8 -// +// SocketDetachFilterOption is used by SetSockOpt to detach a previously attached +// classic BPF filter on a given endpoint. +type SocketDetachFilterOption int + // IPPacketInfo is the message structure for IP_PKTINFO. // // +stateify savable diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go index 62d1acad4..678f4e016 100644 --- a/pkg/tcpip/transport/icmp/endpoint.go +++ b/pkg/tcpip/transport/icmp/endpoint.go @@ -344,6 +344,10 @@ func (e *endpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) { // SetSockOpt sets a socket option. func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { + switch opt.(type) { + case tcpip.SocketDetachFilterOption: + return nil + } return nil } diff --git a/pkg/tcpip/transport/packet/endpoint.go b/pkg/tcpip/transport/packet/endpoint.go index a8f8454dd..57b7f5c19 100644 --- a/pkg/tcpip/transport/packet/endpoint.go +++ b/pkg/tcpip/transport/packet/endpoint.go @@ -278,7 +278,13 @@ func (ep *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask { // used with SetSockOpt, and this function always returns // tcpip.ErrNotSupported. func (ep *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { - return tcpip.ErrUnknownProtocolOption + switch opt.(type) { + case tcpip.SocketDetachFilterOption: + return nil + + default: + return tcpip.ErrUnknownProtocolOption + } } // SetSockOptBool implements tcpip.Endpoint.SetSockOptBool. diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go index 5b6e7d102..c2e9fd29f 100644 --- a/pkg/tcpip/transport/raw/endpoint.go +++ b/pkg/tcpip/transport/raw/endpoint.go @@ -506,7 +506,13 @@ func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask { // SetSockOpt implements tcpip.Endpoint.SetSockOpt. func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { - return tcpip.ErrUnknownProtocolOption + switch opt.(type) { + case tcpip.SocketDetachFilterOption: + return nil + + default: + return tcpip.ErrUnknownProtocolOption + } } // SetSockOptBool implements tcpip.Endpoint.SetSockOptBool. diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index caac6ef57..83dc10ed0 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -1792,6 +1792,9 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { e.deferAccept = time.Duration(v) e.UnlockUser() + case tcpip.SocketDetachFilterOption: + return nil + default: return nil } diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 4e9e114a9..a14643ae8 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -816,6 +816,9 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { e.mu.Lock() e.bindToDevice = id e.mu.Unlock() + + case tcpip.SocketDetachFilterOption: + return nil } return nil } diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 9e097c888..662d780d8 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1330,6 +1330,7 @@ cc_binary( name = "packet_socket_raw_test", testonly = 1, srcs = ["packet_socket_raw.cc"], + defines = select_system(), linkstatic = 1, deps = [ ":socket_test_util", @@ -1809,6 +1810,7 @@ cc_binary( name = "raw_socket_test", testonly = 1, srcs = ["raw_socket.cc"], + defines = select_system(), linkstatic = 1, deps = [ ":socket_test_util", @@ -3407,6 +3409,7 @@ cc_binary( name = "tcp_socket_test", testonly = 1, srcs = ["tcp_socket.cc"], + defines = select_system(), linkstatic = 1, deps = [ ":socket_test_util", diff --git a/test/syscalls/linux/packet_socket_raw.cc b/test/syscalls/linux/packet_socket_raw.cc index 4093ac813..6a963b12c 100644 --- a/test/syscalls/linux/packet_socket_raw.cc +++ b/test/syscalls/linux/packet_socket_raw.cc @@ -14,6 +14,9 @@ #include #include +#ifndef __fuchsia__ +#include +#endif // __fuchsia__ #include #include #include @@ -556,6 +559,37 @@ TEST_P(RawPacketTest, SetSocketSendBuf) { ASSERT_EQ(quarter_sz, val); } +#ifndef __fuchsia__ + +TEST_P(RawPacketTest, SetSocketDetachFilterNoInstalledFilter) { + // TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER. + // + // gVisor returns no error on SO_DETACH_FILTER even if there is no filter + // attached unlike linux which does return ENOENT in such cases. This is + // because gVisor doesn't support SO_ATTACH_FILTER and just silently returns + // success. + if (IsRunningOnGvisor()) { + constexpr int val = 0; + ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)), + SyscallSucceeds()); + return; + } + constexpr int val = 0; + ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)), + SyscallFailsWithErrno(ENOENT)); +} + +TEST_P(RawPacketTest, GetSocketDetachFilter) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_DETACH_FILTER, &val, &val_len), + SyscallFailsWithErrno(ENOPROTOOPT)); +} + +#endif // __fuchsia__ + INSTANTIATE_TEST_SUITE_P(AllInetTests, RawPacketTest, ::testing::Values(ETH_P_IP, ETH_P_ALL)); diff --git a/test/syscalls/linux/raw_socket.cc b/test/syscalls/linux/raw_socket.cc index 05c4ed03f..ce54dc064 100644 --- a/test/syscalls/linux/raw_socket.cc +++ b/test/syscalls/linux/raw_socket.cc @@ -13,6 +13,9 @@ // limitations under the License. #include +#ifndef __fuchsia__ +#include +#endif // __fuchsia__ #include #include #include @@ -21,6 +24,7 @@ #include #include #include + #include #include "gtest/gtest.h" @@ -790,10 +794,30 @@ void RawSocketTest::ReceiveBufFrom(int sock, char* recv_buf, ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(sock, recv_buf, recv_buf_len)); } -INSTANTIATE_TEST_SUITE_P(AllInetTests, RawSocketTest, - ::testing::Combine( - ::testing::Values(IPPROTO_TCP, IPPROTO_UDP), - ::testing::Values(AF_INET, AF_INET6))); +#ifndef __fuchsia__ + +TEST_P(RawSocketTest, SetSocketDetachFilterNoInstalledFilter) { + // TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER. + if (IsRunningOnGvisor()) { + constexpr int val = 0; + ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)), + SyscallSucceeds()); + return; + } + + constexpr int val = 0; + ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)), + SyscallFailsWithErrno(ENOENT)); +} + +TEST_P(RawSocketTest, GetSocketDetachFilter) { + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_DETACH_FILTER, &val, &val_len), + SyscallFailsWithErrno(ENOPROTOOPT)); +} + +#endif // __fuchsia__ // AF_INET6+SOCK_RAW+IPPROTO_RAW sockets can be created, but not written to. TEST(RawSocketTest, IPv6ProtoRaw) { @@ -813,6 +837,11 @@ TEST(RawSocketTest, IPv6ProtoRaw) { SyscallFailsWithErrno(EINVAL)); } +INSTANTIATE_TEST_SUITE_P( + AllInetTests, RawSocketTest, + ::testing::Combine(::testing::Values(IPPROTO_TCP, IPPROTO_UDP), + ::testing::Values(AF_INET, AF_INET6))); + } // namespace } // namespace testing diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc index a4d2953e1..0cea7d11f 100644 --- a/test/syscalls/linux/tcp_socket.cc +++ b/test/syscalls/linux/tcp_socket.cc @@ -13,6 +13,9 @@ // limitations under the License. #include +#ifndef __fuchsia__ +#include +#endif // __fuchsia__ #include #include #include @@ -1559,6 +1562,63 @@ TEST_P(SimpleTcpSocketTest, SetTCPWindowClampAboveHalfMinRcvBuf) { } } +#ifndef __fuchsia__ + +// TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER. +// gVisor currently silently ignores attaching a filter. +TEST_P(SimpleTcpSocketTest, SetSocketAttachDetachFilter) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + // Program generated using sudo tcpdump -i lo tcp and port 1234 -dd + struct sock_filter code[] = { + {0x28, 0, 0, 0x0000000c}, {0x15, 0, 6, 0x000086dd}, + {0x30, 0, 0, 0x00000014}, {0x15, 0, 15, 0x00000006}, + {0x28, 0, 0, 0x00000036}, {0x15, 12, 0, 0x000004d2}, + {0x28, 0, 0, 0x00000038}, {0x15, 10, 11, 0x000004d2}, + {0x15, 0, 10, 0x00000800}, {0x30, 0, 0, 0x00000017}, + {0x15, 0, 8, 0x00000006}, {0x28, 0, 0, 0x00000014}, + {0x45, 6, 0, 0x00001fff}, {0xb1, 0, 0, 0x0000000e}, + {0x48, 0, 0, 0x0000000e}, {0x15, 2, 0, 0x000004d2}, + {0x48, 0, 0, 0x00000010}, {0x15, 0, 1, 0x000004d2}, + {0x6, 0, 0, 0x00040000}, {0x6, 0, 0, 0x00000000}, + }; + struct sock_fprog bpf = { + .len = ABSL_ARRAYSIZE(code), + .filter = code, + }; + ASSERT_THAT( + setsockopt(s.get(), SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)), + SyscallSucceeds()); + + constexpr int val = 0; + ASSERT_THAT( + setsockopt(s.get(), SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)), + SyscallSucceeds()); +} + +TEST_P(SimpleTcpSocketTest, SetSocketDetachFilterNoInstalledFilter) { + // TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER. + SKIP_IF(IsRunningOnGvisor()); + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + constexpr int val = 0; + ASSERT_THAT( + setsockopt(s.get(), SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)), + SyscallFailsWithErrno(ENOENT)); +} + +TEST_P(SimpleTcpSocketTest, GetSocketDetachFilter) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_DETACH_FILTER, &val, &val_len), + SyscallFailsWithErrno(ENOPROTOOPT)); +} + +#endif // __fuchsia__ + INSTANTIATE_TEST_SUITE_P(AllInetTests, SimpleTcpSocketTest, ::testing::Values(AF_INET, AF_INET6)); diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc index 9cc6be4fb..60c48ed6e 100644 --- a/test/syscalls/linux/udp_socket_test_cases.cc +++ b/test/syscalls/linux/udp_socket_test_cases.cc @@ -16,6 +16,9 @@ #include #include +#ifndef __fuchsia__ +#include +#endif // __fuchsia__ #include #include #include @@ -1723,5 +1726,56 @@ TEST_P(UdpSocketTest, RecvBufLimits) { } } +#ifndef __fuchsia__ + +// TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER. +// gVisor currently silently ignores attaching a filter. +TEST_P(UdpSocketTest, SetSocketDetachFilter) { + // Program generated using sudo tcpdump -i lo udp and port 1234 -dd + struct sock_filter code[] = { + {0x28, 0, 0, 0x0000000c}, {0x15, 0, 6, 0x000086dd}, + {0x30, 0, 0, 0x00000014}, {0x15, 0, 15, 0x00000011}, + {0x28, 0, 0, 0x00000036}, {0x15, 12, 0, 0x000004d2}, + {0x28, 0, 0, 0x00000038}, {0x15, 10, 11, 0x000004d2}, + {0x15, 0, 10, 0x00000800}, {0x30, 0, 0, 0x00000017}, + {0x15, 0, 8, 0x00000011}, {0x28, 0, 0, 0x00000014}, + {0x45, 6, 0, 0x00001fff}, {0xb1, 0, 0, 0x0000000e}, + {0x48, 0, 0, 0x0000000e}, {0x15, 2, 0, 0x000004d2}, + {0x48, 0, 0, 0x00000010}, {0x15, 0, 1, 0x000004d2}, + {0x6, 0, 0, 0x00040000}, {0x6, 0, 0, 0x00000000}, + }; + struct sock_fprog bpf = { + .len = ABSL_ARRAYSIZE(code), + .filter = code, + }; + ASSERT_THAT( + setsockopt(sock_.get(), SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)), + SyscallSucceeds()); + + constexpr int val = 0; + ASSERT_THAT( + setsockopt(sock_.get(), SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)), + SyscallSucceeds()); +} + +TEST_P(UdpSocketTest, SetSocketDetachFilterNoInstalledFilter) { + // TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER. + SKIP_IF(IsRunningOnGvisor()); + constexpr int val = 0; + ASSERT_THAT( + setsockopt(sock_.get(), SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)), + SyscallFailsWithErrno(ENOENT)); +} + +TEST_P(UdpSocketTest, GetSocketDetachFilter) { + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT( + getsockopt(sock_.get(), SOL_SOCKET, SO_DETACH_FILTER, &val, &val_len), + SyscallFailsWithErrno(ENOPROTOOPT)); +} + +#endif // __fuchsia__ + } // namespace testing } // namespace gvisor -- cgit v1.2.3 From 69f2059e5d38bacac4bcda7912cca580ab70914d Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Sat, 11 Jul 2020 08:17:07 -0700 Subject: Runtime test batch executor Earlier we were docker exec-ing each test at a time. However invoking the test framework has a fixed overhead which made it infeasible to make the runtime tests run as presubmits. This change now executes tests in batches of 50 (can be altered). This really speeds up testing process. With this change, the following tests can be run in reasonable times: - Go - Nodejs - Php - Python PiperOrigin-RevId: 320763916 --- test/runtimes/BUILD | 5 +++++ test/runtimes/exclude_nodejs12.4.0.csv | 3 +++ test/runtimes/exclude_php7.3.6.csv | 6 ++++++ test/runtimes/proctor/go.go | 29 +++++++++++++++++--------- test/runtimes/proctor/java.go | 18 +++++++++------- test/runtimes/proctor/nodejs.go | 8 +++---- test/runtimes/proctor/php.go | 9 ++++---- test/runtimes/proctor/proctor.go | 28 +++++++++++++++---------- test/runtimes/proctor/python.go | 8 +++---- test/runtimes/runner/BUILD | 1 + test/runtimes/runner/exclude_test.go | 2 +- test/runtimes/runner/main.go | 38 ++++++++++++++++++++-------------- 12 files changed, 98 insertions(+), 57 deletions(-) (limited to 'test') diff --git a/test/runtimes/BUILD b/test/runtimes/BUILD index 022de5ff7..f98d02e00 100644 --- a/test/runtimes/BUILD +++ b/test/runtimes/BUILD @@ -6,28 +6,33 @@ runtime_test( name = "go1.12", exclude_file = "exclude_go1.12.csv", lang = "go", + shard_count = 5, ) runtime_test( name = "java11", exclude_file = "exclude_java11.csv", lang = "java", + shard_count = 10, ) runtime_test( name = "nodejs12.4.0", exclude_file = "exclude_nodejs12.4.0.csv", lang = "nodejs", + shard_count = 5, ) runtime_test( name = "php7.3.6", exclude_file = "exclude_php7.3.6.csv", lang = "php", + shard_count = 5, ) runtime_test( name = "python3.7.3", exclude_file = "exclude_python3.7.3.csv", lang = "python", + shard_count = 5, ) diff --git a/test/runtimes/exclude_nodejs12.4.0.csv b/test/runtimes/exclude_nodejs12.4.0.csv index 4ab4e2927..e7edfa0a5 100644 --- a/test/runtimes/exclude_nodejs12.4.0.csv +++ b/test/runtimes/exclude_nodejs12.4.0.csv @@ -9,6 +9,8 @@ fixtures/test-fs-stat-sync-overflow.js,, internet/test-dgram-broadcast-multi-process.js,, internet/test-dgram-multicast-multi-process.js,, internet/test-dgram-multicast-set-interface-lo.js,, +internet/test-doctool-versions.js,, +internet/test-uv-threadpool-schedule.js,, parallel/test-cluster-dgram-reuse.js,b/64024294, parallel/test-dgram-bind-fd.js,b/132447356, parallel/test-dgram-create-socket-handle-fd.js,b/132447238, @@ -45,3 +47,4 @@ pseudo-tty/test-tty-window-size.js,, pseudo-tty/test-tty-wrap.js,, pummel/test-net-pingpong.js,, pummel/test-vm-memleak.js,, +tick-processor/test-tick-processor-builtin.js,, diff --git a/test/runtimes/exclude_php7.3.6.csv b/test/runtimes/exclude_php7.3.6.csv index 456bf7487..f3606bfe8 100644 --- a/test/runtimes/exclude_php7.3.6.csv +++ b/test/runtimes/exclude_php7.3.6.csv @@ -8,6 +8,9 @@ ext/mbstring/tests/bug77165.phpt,, ext/mbstring/tests/bug77454.phpt,, ext/mbstring/tests/mb_convert_encoding_leak.phpt,, ext/mbstring/tests/mb_strrpos_encoding_3rd_param.phpt,, +ext/session/tests/session_set_save_handler_class_018.phpt,, +ext/session/tests/session_set_save_handler_iface_003.phpt,, +ext/session/tests/session_set_save_handler_variation4.phpt,, ext/standard/tests/file/filetype_variation.phpt,, ext/standard/tests/file/fopen_variation19.phpt,, ext/standard/tests/file/php_fd_wrapper_01.phpt,, @@ -21,9 +24,12 @@ ext/standard/tests/file/symlink_link_linkinfo_is_link_variation8.phpt,, ext/standard/tests/general_functions/escapeshellarg_bug71270.phpt,, ext/standard/tests/general_functions/escapeshellcmd_bug71270.phpt,, ext/standard/tests/network/bug20134.phpt,, +ext/standard/tests/streams/stream_socket_sendto.phpt,, +ext/standard/tests/strings/007.phpt,, tests/output/stream_isatty_err.phpt,b/68720279, tests/output/stream_isatty_in-err.phpt,b/68720282, tests/output/stream_isatty_in-out-err.phpt,, tests/output/stream_isatty_in-out.phpt,b/68720299, tests/output/stream_isatty_out-err.phpt,b/68720311, tests/output/stream_isatty_out.phpt,b/68720325, +Zend/tests/concat_003.phpt,, diff --git a/test/runtimes/proctor/go.go b/test/runtimes/proctor/go.go index 3e2d5d8db..073c2959d 100644 --- a/test/runtimes/proctor/go.go +++ b/test/runtimes/proctor/go.go @@ -74,17 +74,26 @@ func (goRunner) ListTests() ([]string, error) { return append(toolSlice, diskFiltered...), nil } -// TestCmd implements TestRunner.TestCmd. -func (goRunner) TestCmd(test string) *exec.Cmd { - // Check if test exists on disk by searching for file of the same name. - // This will determine whether or not it is a Go test on disk. - if strings.HasSuffix(test, ".go") { - // Test has suffix ".go" which indicates a disk test, run it as such. - cmd := exec.Command("go", "run", "run.go", "-v", "--", test) +// TestCmds implements TestRunner.TestCmds. +func (goRunner) TestCmds(tests []string) []*exec.Cmd { + var toolTests, onDiskTests []string + for _, test := range tests { + if strings.HasSuffix(test, ".go") { + onDiskTests = append(onDiskTests, test) + } else { + toolTests = append(toolTests, test) + } + } + + var cmds []*exec.Cmd + if len(toolTests) > 0 { + cmds = append(cmds, exec.Command("go", "tool", "dist", "test", "-run", strings.Join(toolTests, "\\|"))) + } + if len(onDiskTests) > 0 { + cmd := exec.Command("go", append([]string{"run", "run.go", "-v", "--"}, onDiskTests...)...) cmd.Dir = goTestDir - return cmd + cmds = append(cmds, cmd) } - // No ".go" suffix, run as a tool test. - return exec.Command("go", "tool", "dist", "test", "-run", test) + return cmds } diff --git a/test/runtimes/proctor/java.go b/test/runtimes/proctor/java.go index 8b362029d..737fbe23e 100644 --- a/test/runtimes/proctor/java.go +++ b/test/runtimes/proctor/java.go @@ -60,12 +60,14 @@ func (javaRunner) ListTests() ([]string, error) { return testSlice, nil } -// TestCmd implements TestRunner.TestCmd. -func (javaRunner) TestCmd(test string) *exec.Cmd { - args := []string{ - "-noreport", - "-dir:" + javaTestDir, - test, - } - return exec.Command("jtreg", args...) +// TestCmds implements TestRunner.TestCmds. +func (javaRunner) TestCmds(tests []string) []*exec.Cmd { + args := append( + []string{ + "-noreport", + "-dir:" + javaTestDir, + }, + tests..., + ) + return []*exec.Cmd{exec.Command("jtreg", args...)} } diff --git a/test/runtimes/proctor/nodejs.go b/test/runtimes/proctor/nodejs.go index bd57db444..23d6edc72 100644 --- a/test/runtimes/proctor/nodejs.go +++ b/test/runtimes/proctor/nodejs.go @@ -39,8 +39,8 @@ func (nodejsRunner) ListTests() ([]string, error) { return testSlice, nil } -// TestCmd implements TestRunner.TestCmd. -func (nodejsRunner) TestCmd(test string) *exec.Cmd { - args := []string{filepath.Join("tools", "test.py"), test} - return exec.Command("/usr/bin/python", args...) +// TestCmds implements TestRunner.TestCmds. +func (nodejsRunner) TestCmds(tests []string) []*exec.Cmd { + args := append([]string{filepath.Join("tools", "test.py")}, tests...) + return []*exec.Cmd{exec.Command("/usr/bin/python", args...)} } diff --git a/test/runtimes/proctor/php.go b/test/runtimes/proctor/php.go index 9115040e1..6a83d64e3 100644 --- a/test/runtimes/proctor/php.go +++ b/test/runtimes/proctor/php.go @@ -17,6 +17,7 @@ package main import ( "os/exec" "regexp" + "strings" ) var phpTestRegEx = regexp.MustCompile(`^.+\.phpt$`) @@ -35,8 +36,8 @@ func (phpRunner) ListTests() ([]string, error) { return testSlice, nil } -// TestCmd implements TestRunner.TestCmd. -func (phpRunner) TestCmd(test string) *exec.Cmd { - args := []string{"test", "TESTS=" + test} - return exec.Command("make", args...) +// TestCmds implements TestRunner.TestCmds. +func (phpRunner) TestCmds(tests []string) []*exec.Cmd { + args := []string{"test", "TESTS=" + strings.Join(tests, " ")} + return []*exec.Cmd{exec.Command("make", args...)} } diff --git a/test/runtimes/proctor/proctor.go b/test/runtimes/proctor/proctor.go index b54abe434..9e0642424 100644 --- a/test/runtimes/proctor/proctor.go +++ b/test/runtimes/proctor/proctor.go @@ -25,6 +25,7 @@ import ( "os/signal" "path/filepath" "regexp" + "strings" "syscall" ) @@ -34,15 +35,18 @@ type TestRunner interface { // ListTests returns a string slice of tests available to run. ListTests() ([]string, error) - // TestCmd returns an *exec.Cmd that will run the given test. - TestCmd(test string) *exec.Cmd + // TestCmds returns a slice of *exec.Cmd that will run the given tests. + // There is no correlation between the number of exec.Cmds returned and the + // number of tests. It could return one command to run all tests or a few + // commands that collectively run all. + TestCmds(tests []string) []*exec.Cmd } var ( - runtime = flag.String("runtime", "", "name of runtime") - list = flag.Bool("list", false, "list all available tests") - testName = flag.String("test", "", "run a single test from the list of available tests") - pause = flag.Bool("pause", false, "cause container to pause indefinitely, reaping any zombie children") + runtime = flag.String("runtime", "", "name of runtime") + list = flag.Bool("list", false, "list all available tests") + testNames = flag.String("tests", "", "run a subset of the available tests") + pause = flag.Bool("pause", false, "cause container to pause indefinitely, reaping any zombie children") ) func main() { @@ -75,18 +79,20 @@ func main() { } var tests []string - if *testName == "" { + if *testNames == "" { // Run every test. tests, err = tr.ListTests() if err != nil { log.Fatalf("failed to get all tests: %v", err) } } else { - // Run a single test. - tests = []string{*testName} + // Run subset of test. + tests = strings.Split(*testNames, ",") } - for _, test := range tests { - cmd := tr.TestCmd(test) + + // Run tests. + cmds := tr.TestCmds(tests) + for _, cmd := range cmds { cmd.Stdout, cmd.Stderr = os.Stdout, os.Stderr if err := cmd.Run(); err != nil { log.Fatalf("FAIL: %v", err) diff --git a/test/runtimes/proctor/python.go b/test/runtimes/proctor/python.go index b9e0fbe6f..7c598801b 100644 --- a/test/runtimes/proctor/python.go +++ b/test/runtimes/proctor/python.go @@ -42,8 +42,8 @@ func (pythonRunner) ListTests() ([]string, error) { return toolSlice, nil } -// TestCmd implements TestRunner.TestCmd. -func (pythonRunner) TestCmd(test string) *exec.Cmd { - args := []string{"-m", "test", test} - return exec.Command("./python", args...) +// TestCmds implements TestRunner.TestCmds. +func (pythonRunner) TestCmds(tests []string) []*exec.Cmd { + args := append([]string{"-m", "test"}, tests...) + return []*exec.Cmd{exec.Command("./python", args...)} } diff --git a/test/runtimes/runner/BUILD b/test/runtimes/runner/BUILD index 3972244b9..dc0d5d5b4 100644 --- a/test/runtimes/runner/BUILD +++ b/test/runtimes/runner/BUILD @@ -8,6 +8,7 @@ go_binary( srcs = ["main.go"], visibility = ["//test/runtimes:__pkg__"], deps = [ + "//pkg/log", "//pkg/test/dockerutil", "//pkg/test/testutil", ], diff --git a/test/runtimes/runner/exclude_test.go b/test/runtimes/runner/exclude_test.go index c08755894..67c2170c8 100644 --- a/test/runtimes/runner/exclude_test.go +++ b/test/runtimes/runner/exclude_test.go @@ -26,7 +26,7 @@ func TestMain(m *testing.M) { } // Test that the exclude file parses without error. -func TestBlacklists(t *testing.T) { +func TestExcludelist(t *testing.T) { ex, err := getExcludes() if err != nil { t.Fatalf("error parsing exclude file: %v", err) diff --git a/test/runtimes/runner/main.go b/test/runtimes/runner/main.go index 2a0f62c73..e230912c9 100644 --- a/test/runtimes/runner/main.go +++ b/test/runtimes/runner/main.go @@ -27,6 +27,7 @@ import ( "testing" "time" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/test/dockerutil" "gvisor.dev/gvisor/pkg/test/testutil" ) @@ -35,10 +36,11 @@ var ( lang = flag.String("lang", "", "language runtime to test") image = flag.String("image", "", "docker image with runtime tests") excludeFile = flag.String("exclude_file", "", "file containing list of tests to exclude, in CSV format with fields: test name, bug id, comment") + batchSize = flag.Int("batch", 50, "number of test cases run in one command") ) // Wait time for each test to run. -const timeout = 5 * time.Minute +const timeout = 45 * time.Minute func main() { flag.Parse() @@ -110,17 +112,23 @@ func getTests(ctx context.Context, d *dockerutil.Container, excludes map[string] } var itests []testing.InternalTest - for _, tci := range indices { - // Capture tc in this scope. - tc := tests[tci] + for i := 0; i < len(indices); i += *batchSize { + var tcs []string + end := i + *batchSize + if end > len(indices) { + end = len(indices) + } + for _, tc := range indices[i:end] { + // Add test if not excluded. + if _, ok := excludes[tests[tc]]; ok { + log.Infof("Skipping test case %s\n", tests[tc]) + continue + } + tcs = append(tcs, tests[tc]) + } itests = append(itests, testing.InternalTest{ - Name: tc, + Name: strings.Join(tcs, ", "), F: func(t *testing.T) { - // Is the test excluded? - if _, ok := excludes[tc]; ok { - t.Skipf("SKIP: excluded test %q", tc) - } - var ( now = time.Now() done = make(chan struct{}) @@ -129,20 +137,20 @@ func getTests(ctx context.Context, d *dockerutil.Container, excludes map[string] ) go func() { - fmt.Printf("RUNNING %s...\n", tc) - output, err = d.Exec(ctx, dockerutil.ExecOpts{}, "/proctor/proctor", "--runtime", *lang, "--test", tc) + fmt.Printf("RUNNING the following in a batch\n%s\n", strings.Join(tcs, "\n")) + output, err = d.Exec(ctx, dockerutil.ExecOpts{}, "/proctor/proctor", "--runtime", *lang, "--tests", strings.Join(tcs, ",")) close(done) }() select { case <-done: if err == nil { - fmt.Printf("PASS: %s (%v)\n\n", tc, time.Since(now)) + fmt.Printf("PASS: (%v)\n\n", time.Since(now)) return } - t.Errorf("FAIL: %s (%v):\n%s\n", tc, time.Since(now), output) + t.Errorf("FAIL: (%v):\n%s\n", time.Since(now), output) case <-time.After(timeout): - t.Errorf("TIMEOUT: %s (%v):\n%s\n", tc, time.Since(now), output) + t.Errorf("TIMEOUT: (%v):\n%s\n", time.Since(now), output) } }, }) -- cgit v1.2.3 From bafef1cf3afadb2b686b01b4f589f68265410009 Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Mon, 13 Jul 2020 10:56:51 -0700 Subject: Print testbench log on packetimpact failures These logs include flags passed to packetimpact tests (the Go tests), and test failure messages. PiperOrigin-RevId: 320989521 --- test/packetimpact/runner/packetimpact_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'test') diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go index 9290d5112..397ca3ba5 100644 --- a/test/packetimpact/runner/packetimpact_test.go +++ b/test/packetimpact/runner/packetimpact_test.go @@ -283,8 +283,8 @@ func TestOne(t *testing.T) { // will issue a RST. To prevent this IPtables can be used to filter out all // incoming packets. The raw socket that packetimpact tests use will still see // everything. - if _, err := testbench.Exec(ctx, dockerutil.ExecOpts{}, "iptables", "-A", "INPUT", "-i", testNetDev, "-j", "DROP"); err != nil { - t.Fatalf("unable to Exec iptables on container %s: %s", testbench.Name, err) + if logs, err := testbench.Exec(ctx, dockerutil.ExecOpts{}, "iptables", "-A", "INPUT", "-i", testNetDev, "-j", "DROP"); err != nil { + t.Fatalf("unable to Exec iptables on container %s: %s, logs from testbench:\n%s", testbench.Name, err, logs) } // FIXME(b/156449515): Some piece of the system has a race. The old @@ -307,12 +307,12 @@ func TestOne(t *testing.T) { "--device", testNetDev, "--dut_type", *dutPlatform, ) - _, err = testbench.Exec(ctx, dockerutil.ExecOpts{}, testArgs...) + logs, err := testbench.Exec(ctx, dockerutil.ExecOpts{}, testArgs...) if !*expectFailure && err != nil { - t.Fatal("test failed:", err) + t.Fatalf("test failed: %v, logs from testbench:\n%s", err, logs) } if *expectFailure && err == nil { - t.Fatal("test failure expected but the test succeeded, enable the test and mark the corresponding bug as fixed") + t.Fatalf("test failure expected but the test succeeded, enable the test and mark the corresponding bug as fixed, logs from testbench:\n%s", logs) } } -- cgit v1.2.3 From 76b392bc262d5c0af10b3127b7aad85a4130da78 Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Mon, 13 Jul 2020 11:44:41 -0700 Subject: Create packetimpact test for UDP broadcast PiperOrigin-RevId: 321000340 --- test/packetimpact/testbench/testbench.go | 25 ++++++--- test/packetimpact/tests/BUILD | 4 +- .../tests/udp_recv_mcast_bcast_test.go | 63 ++++++++++++++++++++++ test/packetimpact/tests/udp_recv_multicast_test.go | 40 -------------- 4 files changed, 84 insertions(+), 48 deletions(-) create mode 100644 test/packetimpact/tests/udp_recv_mcast_bcast_test.go delete mode 100644 test/packetimpact/tests/udp_recv_multicast_test.go (limited to 'test') diff --git a/test/packetimpact/testbench/testbench.go b/test/packetimpact/testbench/testbench.go index d64f32a5b..6530036a8 100644 --- a/test/packetimpact/testbench/testbench.go +++ b/test/packetimpact/testbench/testbench.go @@ -31,23 +31,30 @@ var ( DUTType = "" // Device is the local device on the test network. Device = "" + // LocalIPv4 is the local IPv4 address on the test network. LocalIPv4 = "" + // RemoteIPv4 is the DUT's IPv4 address on the test network. + RemoteIPv4 = "" + // IPv4PrefixLength is the network prefix length of the IPv4 test network. + IPv4PrefixLength = 0 + // LocalIPv6 is the local IPv6 address on the test network. LocalIPv6 = "" + // RemoteIPv6 is the DUT's IPv6 address on the test network. + RemoteIPv6 = "" + // LocalMAC is the local MAC address on the test network. LocalMAC = "" + // RemoteMAC is the DUT's MAC address on the test network. + RemoteMAC = "" + // POSIXServerIP is the POSIX server's IP address on the control network. POSIXServerIP = "" // POSIXServerPort is the UDP port the POSIX server is bound to on the // control network. POSIXServerPort = 40000 - // RemoteIPv4 is the DUT's IPv4 address on the test network. - RemoteIPv4 = "" - // RemoteIPv6 is the DUT's IPv6 address on the test network. - RemoteIPv6 = "" - // RemoteMAC is the DUT's MAC address on the test network. - RemoteMAC = "" + // RPCKeepalive is the gRPC keepalive. RPCKeepalive = 10 * time.Second // RPCTimeout is the gRPC timeout. @@ -91,6 +98,12 @@ func genPseudoFlags() error { LocalMAC = deviceInfo.MAC.String() LocalIPv6 = deviceInfo.IPv6Addr.String() + if deviceInfo.IPv4Net != nil { + IPv4PrefixLength, _ = deviceInfo.IPv4Net.Mask.Size() + } else { + IPv4PrefixLength, _ = net.ParseIP(LocalIPv4).DefaultMask().Size() + } + return nil } diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 3ecbe83eb..3f538b5c6 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -27,8 +27,8 @@ packetimpact_go_test( ) packetimpact_go_test( - name = "udp_recv_multicast", - srcs = ["udp_recv_multicast_test.go"], + name = "udp_recv_mcast_bcast", + srcs = ["udp_recv_mcast_bcast_test.go"], # TODO(b/152813495): Fix netstack then remove the line below. expect_netstack_failure = True, deps = [ diff --git a/test/packetimpact/tests/udp_recv_mcast_bcast_test.go b/test/packetimpact/tests/udp_recv_mcast_bcast_test.go new file mode 100644 index 000000000..263a54291 --- /dev/null +++ b/test/packetimpact/tests/udp_recv_mcast_bcast_test.go @@ -0,0 +1,63 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package udp_recv_mcast_bcast_test + +import ( + "flag" + "net" + "testing" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func init() { + testbench.RegisterFlags(flag.CommandLine) +} + +func TestUDPRecvMulticastBroadcast(t *testing.T) { + dut := testbench.NewDUT(t) + defer dut.TearDown() + boundFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.IPv4(0, 0, 0, 0)) + defer dut.Close(boundFD) + conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) + defer conn.Close() + + for _, bcastAddr := range []net.IP{ + broadcastAddr(net.ParseIP(testbench.RemoteIPv4), net.CIDRMask(testbench.IPv4PrefixLength, 32)), + net.IPv4(255, 255, 255, 255), + net.IPv4(224, 0, 0, 1), + } { + payload := testbench.GenerateRandomPayload(t, 1<<10) + conn.SendIP( + testbench.IPv4{DstAddr: testbench.Address(tcpip.Address(bcastAddr.To4()))}, + testbench.UDP{}, + &testbench.Payload{Bytes: payload}, + ) + t.Logf("Receiving packet sent to address: %s", bcastAddr) + if got, want := string(dut.Recv(boundFD, int32(len(payload)), 0)), string(payload); got != want { + t.Errorf("received payload does not match sent payload got: %s, want: %s", got, want) + } + } +} + +func broadcastAddr(ip net.IP, mask net.IPMask) net.IP { + ip4 := ip.To4() + for i := range ip4 { + ip4[i] |= ^mask[i] + } + return ip4 +} diff --git a/test/packetimpact/tests/udp_recv_multicast_test.go b/test/packetimpact/tests/udp_recv_multicast_test.go deleted file mode 100644 index 77a9bfa1d..000000000 --- a/test/packetimpact/tests/udp_recv_multicast_test.go +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package udp_recv_multicast_test - -import ( - "flag" - "net" - "testing" - - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/tcpip" - "gvisor.dev/gvisor/test/packetimpact/testbench" -) - -func init() { - testbench.RegisterFlags(flag.CommandLine) -} - -func TestUDPRecvMulticast(t *testing.T) { - dut := testbench.NewDUT(t) - defer dut.TearDown() - boundFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) - defer dut.Close(boundFD) - conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) - defer conn.Close() - conn.SendIP(testbench.IPv4{DstAddr: testbench.Address(tcpip.Address(net.ParseIP("224.0.0.1").To4()))}, testbench.UDP{}) - dut.Recv(boundFD, 100, 0) -} -- cgit v1.2.3 From b7e8ce93de54a0f897832877255bed7005b08f14 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Mon, 13 Jul 2020 12:22:01 -0700 Subject: Add ReadAllFd to test util PiperOrigin-RevId: 321008185 --- test/util/test_util.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'test') diff --git a/test/util/test_util.h b/test/util/test_util.h index 109078fc7..0f9781038 100644 --- a/test/util/test_util.h +++ b/test/util/test_util.h @@ -567,6 +567,25 @@ ssize_t ApplyFileIoSyscall(F const& f, size_t const count) { } // namespace internal +inline PosixErrorOr ReadAllFd(int fd) { + std::string all; + all.reserve(128 * 1024); // arbitrary. + + std::vector buffer(16 * 1024); + for (;;) { + auto const bytes = RetryEINTR(read)(fd, buffer.data(), buffer.size()); + if (bytes < 0) { + return PosixError(errno, "file read"); + } + if (bytes == 0) { + return std::move(all); + } + if (bytes > 0) { + all.append(buffer.data(), bytes); + } + } +} + inline ssize_t ReadFd(int fd, void* buf, size_t count) { return internal::ApplyFileIoSyscall( [&](size_t completed) { -- cgit v1.2.3 From b8d3d09bd16f1f6d684ce2ca6f16109567cb0fc2 Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Mon, 13 Jul 2020 13:23:50 -0700 Subject: Initial golang Benchmarks PiperOrigin-RevId: 321021071 --- images/benchmarks/absl/Dockerfile | 21 +++++ images/benchmarks/iperf/Dockerfile | 8 ++ pkg/test/dockerutil/dockerutil.go | 5 ++ scripts/benchmark.sh | 33 ++------ scripts/common.sh | 27 ++++++ scripts/common_build.sh | 2 +- scripts/packetdrill_tests.sh | 2 +- scripts/packetimpact_tests.sh | 2 +- test/benchmarks/README.md | 118 ++++++++++++++++++++++++++ test/benchmarks/fs/BUILD | 24 ++++++ test/benchmarks/fs/bazel_test.go | 103 +++++++++++++++++++++++ test/benchmarks/fs/fs.go | 16 ++++ test/benchmarks/harness/BUILD | 18 ++++ test/benchmarks/harness/harness.go | 38 +++++++++ test/benchmarks/harness/machine.go | 64 ++++++++++++++ test/benchmarks/harness/util.go | 38 +++++++++ test/benchmarks/network/BUILD | 25 ++++++ test/benchmarks/network/iperf_test.go | 151 ++++++++++++++++++++++++++++++++++ test/benchmarks/network/network.go | 16 ++++ 19 files changed, 684 insertions(+), 27 deletions(-) create mode 100644 images/benchmarks/absl/Dockerfile create mode 100644 images/benchmarks/iperf/Dockerfile create mode 100644 test/benchmarks/README.md create mode 100644 test/benchmarks/fs/BUILD create mode 100644 test/benchmarks/fs/bazel_test.go create mode 100644 test/benchmarks/fs/fs.go create mode 100644 test/benchmarks/harness/BUILD create mode 100644 test/benchmarks/harness/harness.go create mode 100644 test/benchmarks/harness/machine.go create mode 100644 test/benchmarks/harness/util.go create mode 100644 test/benchmarks/network/BUILD create mode 100644 test/benchmarks/network/iperf_test.go create mode 100644 test/benchmarks/network/network.go (limited to 'test') diff --git a/images/benchmarks/absl/Dockerfile b/images/benchmarks/absl/Dockerfile new file mode 100644 index 000000000..b0dd97695 --- /dev/null +++ b/images/benchmarks/absl/Dockerfile @@ -0,0 +1,21 @@ +FROM ubuntu:18.04 + +RUN set -x \ + && apt-get update \ + && apt-get install -y \ + wget \ + git \ + pkg-config \ + zip \ + g++ \ + zlib1g-dev \ + unzip \ + python3 \ + && rm -rf /var/lib/apt/lists/* +RUN wget https://github.com/bazelbuild/bazel/releases/download/0.27.0/bazel-0.27.0-installer-linux-x86_64.sh +RUN chmod +x bazel-0.27.0-installer-linux-x86_64.sh +RUN ./bazel-0.27.0-installer-linux-x86_64.sh + +RUN mkdir abseil-cpp && cd abseil-cpp \ + && git init && git remote add origin https://github.com/abseil/abseil-cpp.git \ + && git fetch --depth 1 origin 43ef2148c0936ebf7cb4be6b19927a9d9d145b8f && git checkout FETCH_HEAD diff --git a/images/benchmarks/iperf/Dockerfile b/images/benchmarks/iperf/Dockerfile new file mode 100644 index 000000000..4cbfd0d70 --- /dev/null +++ b/images/benchmarks/iperf/Dockerfile @@ -0,0 +1,8 @@ +FROM ubuntu:18.04 + +RUN set -x \ + && apt-get update \ + && apt-get install -y \ + iperf \ + && rm -rf /var/lib/apt/lists/* + diff --git a/pkg/test/dockerutil/dockerutil.go b/pkg/test/dockerutil/dockerutil.go index f95ae3cd1..df09babf3 100644 --- a/pkg/test/dockerutil/dockerutil.go +++ b/pkg/test/dockerutil/dockerutil.go @@ -119,3 +119,8 @@ func Save(logger testutil.Logger, image string, w io.Writer) error { cmd.Stdout = w // Send directly to the writer. return cmd.Run() } + +// Runtime returns the value of the flag runtime. +func Runtime() string { + return *runtime +} diff --git a/scripts/benchmark.sh b/scripts/benchmark.sh index e0f6df438..c49f988b8 100755 --- a/scripts/benchmark.sh +++ b/scripts/benchmark.sh @@ -16,30 +16,15 @@ source $(dirname $0)/common.sh -# gcloud may be installed as a "snap". If it is, include it in PATH. -declare -r snap="/snap/bin" -if [[ -d "${snap}" ]]; then - export PATH="${PATH}:${snap}" -fi - -# Make sure we can find gcloud and exit if not. -which gcloud +make load-all-images -# Exporting for subprocesses as GCP APIs and tools check this environmental -# variable for authentication. -export GOOGLE_APPLICATION_CREDENTIALS="${KOKORO_KEYSTORE_DIR}/${GCLOUD_CREDENTIALS}" - -gcloud auth activate-service-account \ - --key-file "${GOOGLE_APPLICATION_CREDENTIALS}" +if [[ -z "${1:-}" ]]; then + target=$(query "attr(tags, manual, tests(//test/benchmarks/...))") +else + target="$1" +fi -gcloud config set project ${PROJECT} -gcloud config set compute/zone ${ZONE} +install_runsc_for_benchmarks benchmark -bazel run //benchmarks:benchmarks -- \ - --verbose \ - run-gcp \ - "(startup|absl)" \ - --internal \ - --runtime=runc \ - --runtime=runsc \ - --installers=head +echo $target +benchmark_runsc $target "${@:2}" diff --git a/scripts/common.sh b/scripts/common.sh index 3ca699e4a..36158654f 100755 --- a/scripts/common.sh +++ b/scripts/common.sh @@ -42,6 +42,15 @@ function test_runsc() { test --test_arg=--runtime=${RUNTIME} "$@" } +function benchmark_runsc() { + test_runsc -c opt \ + --nocache_test_results \ + --test_arg=-test.bench=. \ + --test_arg=-test.benchmem \ + --jobs=1 \ + "$@" +} + function install_runsc_for_test() { local -r test_name=$1 shift @@ -63,6 +72,24 @@ function install_runsc_for_test() { "$@" } +function install_runsc_for_benchmarks() { + local -r test_name=$1 + shift + if [[ -z "${test_name}" ]]; then + echo "Missing mandatory test name" + exit 1 + fi + + # Add test to the name, so it doesn't conflict with other runtimes. + set_runtime $(find_branch_name)_"${test_name}" + + # ${RUNSC_TEST_NAME} is set by tests (see dockerutil) to pass the test name + # down to the runtime. + install_runsc "${RUNTIME}" \ + --TESTONLY-test-name-env=RUNSC_TEST_NAME \ + "$@" +} + # Installs the runsc with given runtime name. set_runtime must have been called # to set runtime and logs location. function install_runsc() { diff --git a/scripts/common_build.sh b/scripts/common_build.sh index 0d9a191b5..f4210a3f3 100755 --- a/scripts/common_build.sh +++ b/scripts/common_build.sh @@ -64,7 +64,7 @@ function run_as_root() { } function query() { - QUERY_RESULT=$(bazel query "$@") + bazel query "$@" } function collect_logs() { diff --git a/scripts/packetdrill_tests.sh b/scripts/packetdrill_tests.sh index 727503bce..1a8181ac8 100755 --- a/scripts/packetdrill_tests.sh +++ b/scripts/packetdrill_tests.sh @@ -19,5 +19,5 @@ source $(dirname $0)/common.sh make load-packetdrill install_runsc_for_test runsc-d -query "attr(tags, manual, tests(//test/packetdrill/...))" +QUERY_RESULT=$(query "attr(tags, manual, tests(//test/packetdrill/...))") test_runsc $QUERY_RESULT diff --git a/scripts/packetimpact_tests.sh b/scripts/packetimpact_tests.sh index 51c11f23f..77fb84bc3 100755 --- a/scripts/packetimpact_tests.sh +++ b/scripts/packetimpact_tests.sh @@ -19,5 +19,5 @@ source $(dirname $0)/common.sh make load-packetimpact install_runsc_for_test runsc-d -query "attr(tags, packetimpact, tests(//test/packetimpact/...))" +QUERY_RESULT=$(query "attr(tags, packetimpact, tests(//test/packetimpact/...))") test_runsc $QUERY_RESULT diff --git a/test/benchmarks/README.md b/test/benchmarks/README.md new file mode 100644 index 000000000..9ff602cf1 --- /dev/null +++ b/test/benchmarks/README.md @@ -0,0 +1,118 @@ +# Benchmark tools + +This package and subpackages are for running macro benchmarks on `runsc`. They +are meant to replace the previous //benchmarks benchmark-tools written in +python. + +Benchmarks are meant to look like regular golang benchmarks using the testing.B +library. + +## Setup + +To run benchmarks you will need: + +* Docker installed (17.09.0 or greater). + +The easiest way to run benchmarks is to use the script at +//scripts/benchmark.sh. + +If not using the script, you will need: + +* `runsc` configured with docker + +Note: benchmarks call the runtime by name. If docker can run it with +`--runtime=` flag, these tools should work. + +## Running benchmarks + +The easiest way to run is with the script at //scripts/benchmarks.sh. The script +will run all benchmarks under //test/benchmarks if a target is not provided. + +```bash +./script/benchmarks.sh //path/to/target +``` + +If you want to run benchmarks manually: + +* Run `make load-all-images` from `//` +* Run with: + +```bash +bazel test --test_arg=--runtime=RUNTIME -c opt --test_output=streamed --test_timeout=600 --test_arg=-test.bench=. --nocache_test_results //path/to/target +``` + +## Writing benchmarks + +Benchmarks consist of docker images as Dockerfiles and golang testing.B +benchmarks. + +### Dockerfiles: + +* Are stored at //images. +* New Dockerfiles go in an appropriately named directory at + `//images/benchmarks/my-cool-dockerfile`. +* Dockerfiles for benchmarks should: + * Use explicitly versioned packages. + * Not use ENV and CMD statements...it is easy to add these in the API. +* Note: A common pattern for getting access to a tmpfs mount is to copy files + there after container start. See: //test/benchmarks/build/bazel_test.go. You + can also make your own with `RunOpts.Mounts`. + +### testing.B packages + +In general, benchmarks should look like this: + +```golang + +var h harness.Harness + +func BenchmarkMyCoolOne(b *testing.B) { + machine, err := h.GetMachine() + // check err + + ctx := context.Background() + container := machine.GetContainer(ctx, b) + defer container.CleanUp(ctx) + + b.ResetTimer() + + //Respect b.N. + for i := 0; i < b.N; i++ { + out, err := container.Run(ctx, dockerutil.RunOpts{ + Image: "benchmarks/my-cool-image", + Env: []string{"MY_VAR=awesome"}, + other options...see dockerutil + }, "sh", "-c", "echo MY_VAR" ...) + //check err + b.StopTimer() + + // Do parsing and reporting outside of the timer. + number := parseMyMetric(out) + b.ReportMetric(number, "my-cool-custom-metric") + + b.StartTimer() + } +} + +func TestMain(m *testing.M) { + h.Init() + os.Exit(m.Run()) +} +``` + +Some notes on the above: + +* The harness is initiated in the TestMain method and made global to test + module. The harness will handle any presetup that needs to happen with + flags, remote virtual machines (eventually), and other services. +* Respect `b.N` in that users of the benchmark may want to "run for an hour" + or something of the sort. +* Use the `b.ReportMetric` method to report custom metrics. +* Set the timer if time is useful for reporting. There isn't a way to turn off + default metrics in testing.B (B/op, allocs/op, ns/op). +* Take a look at dockerutil at //pkg/test/dockerutil to see all methods + available from containers. The API is based on the "official" + [docker API for golang](https://pkg.go.dev/mod/github.com/docker/docker). +* `harness.GetMachine` marks how many machines this tests needs. If you have a + client and server and to mark them as multiple machines, call it + `GetMachine` twice. diff --git a/test/benchmarks/fs/BUILD b/test/benchmarks/fs/BUILD new file mode 100644 index 000000000..606331895 --- /dev/null +++ b/test/benchmarks/fs/BUILD @@ -0,0 +1,24 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "fs", + srcs = ["fs.go"], +) + +go_test( + name = "fs_test", + size = "large", + srcs = ["bazel_test.go"], + library = ":fs", + tags = [ + # Requires docker and runsc to be configured before test runs. + "local", + "manual", + ], + deps = [ + "//pkg/test/dockerutil", + "//test/benchmarks/harness", + ], +) diff --git a/test/benchmarks/fs/bazel_test.go b/test/benchmarks/fs/bazel_test.go new file mode 100644 index 000000000..aabcdbd87 --- /dev/null +++ b/test/benchmarks/fs/bazel_test.go @@ -0,0 +1,103 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package fs + +import ( + "context" + "os" + "strings" + "testing" + + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/test/benchmarks/harness" +) + +var h harness.Harness + +// Note: CleanCache versions of this test require running with root permissions. +func BenchmarkABSL(b *testing.B) { + // Get a machine from the Harness on which to run. + machine, err := h.GetMachine() + if err != nil { + b.Fatalf("failed to get machine: %v", err) + } + + // Dimensions here are clean/dirty cache (do or don't drop caches) + // and if the mount on which we are compiling is a tmpfs/bind mount. + benchmarks := []struct { + name string + clearCache bool // clearCache drops caches before running. + tmpfs bool // tmpfs will run compilation on a tmpfs. + }{ + {name: "CleanCache", clearCache: true, tmpfs: false}, + {name: "DirtyCache", clearCache: false, tmpfs: false}, + {name: "CleanCacheTmpfs", clearCache: true, tmpfs: true}, + {name: "DirtyCacheTmpfs", clearCache: false, tmpfs: true}, + } + for _, bm := range benchmarks { + b.Run(bm.name, func(b *testing.B) { + // Grab a container. + ctx := context.Background() + container := machine.GetContainer(ctx, b) + defer container.CleanUp(ctx) + + workdir := "/abseil-cpp" + + // Start a container. + if err := container.Spawn(ctx, dockerutil.RunOpts{ + Image: "benchmarks/absl", + }, "sleep", "1000"); err != nil { + b.Fatalf("run failed with: %v", err) + } + + // If we are running on a tmpfs, copy to /tmp which is a tmpfs. + if bm.tmpfs { + if _, err := container.Exec(ctx, dockerutil.ExecOpts{}, + "cp", "-r", "/abseil-cpp", "/tmp/."); err != nil { + b.Fatal("failed to copy directory: %v", err) + } + workdir = "/tmp" + workdir + } + + // Drop Caches. + if bm.clearCache { + if out, err := machine.RunCommand("/bin/sh -c sync; echo 3 > /proc/sys/vm/drop_caches"); err != nil { + b.Fatalf("failed to drop caches: %v %s", err, out) + } + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + got, err := container.Exec(ctx, dockerutil.ExecOpts{ + WorkDir: workdir, + }, "bazel", "build", "-c", "opt", "absl/base/...") + if err != nil { + b.Fatalf("build failed with: %v", err) + } + b.StopTimer() + + want := "Build completed successfully" + if !strings.Contains(got, want) { + b.Fatalf("string %s not in: %s", want, got) + } + b.StartTimer() + } + }) + } +} + +func TestMain(m *testing.M) { + h.Init() + os.Exit(m.Run()) +} diff --git a/test/benchmarks/fs/fs.go b/test/benchmarks/fs/fs.go new file mode 100644 index 000000000..27eb6c56a --- /dev/null +++ b/test/benchmarks/fs/fs.go @@ -0,0 +1,16 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fs holds benchmarks around filesystem performance. +package fs diff --git a/test/benchmarks/harness/BUILD b/test/benchmarks/harness/BUILD new file mode 100644 index 000000000..c2e316709 --- /dev/null +++ b/test/benchmarks/harness/BUILD @@ -0,0 +1,18 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "harness", + testonly = 1, + srcs = [ + "harness.go", + "machine.go", + "util.go", + ], + visibility = ["//:sandbox"], + deps = [ + "//pkg/test/dockerutil", + "//pkg/test/testutil", + ], +) diff --git a/test/benchmarks/harness/harness.go b/test/benchmarks/harness/harness.go new file mode 100644 index 000000000..68bd7b4cf --- /dev/null +++ b/test/benchmarks/harness/harness.go @@ -0,0 +1,38 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package harness holds utility code for running benchmarks on Docker. +package harness + +import ( + "flag" + + "gvisor.dev/gvisor/pkg/test/dockerutil" +) + +// Harness is a handle for managing state in benchmark runs. +type Harness struct { +} + +// Init performs any harness initilialization before runs. +func (h *Harness) Init() error { + flag.Parse() + dockerutil.EnsureSupportedDockerVersion() + return nil +} + +// GetMachine returns this run's implementation of machine. +func (h *Harness) GetMachine() (Machine, error) { + return &localMachine{}, nil +} diff --git a/test/benchmarks/harness/machine.go b/test/benchmarks/harness/machine.go new file mode 100644 index 000000000..032b387fc --- /dev/null +++ b/test/benchmarks/harness/machine.go @@ -0,0 +1,64 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package harness + +import ( + "context" + "net" + "os/exec" + + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/pkg/test/testutil" +) + +// Machine describes a real machine for use in benchmarks. +type Machine interface { + // GetContainer gets a container from the machine, + GetContainer(ctx context.Context, log testutil.Logger) *dockerutil.Container + + // RunCommand runs cmd on this machine. + RunCommand(cmd string, args ...string) (string, error) + + // Returns IP Address for the machine. + IPAddress() (net.IP, error) +} + +// localMachine describes this machine. +type localMachine struct { +} + +// GetContainer implements Machine.GetContainer for localMachine. +func (l *localMachine) GetContainer(ctx context.Context, logger testutil.Logger) *dockerutil.Container { + return dockerutil.MakeContainer(ctx, logger) +} + +// RunCommand implements Machine.RunCommand for localMachine. +func (l *localMachine) RunCommand(cmd string, args ...string) (string, error) { + c := exec.Command(cmd, args...) + out, err := c.CombinedOutput() + return string(out), err +} + +// IPAddress implements Machine.IPAddress. +func (l *localMachine) IPAddress() (net.IP, error) { + conn, err := net.Dial("udp", "8.8.8.8:80") + if err != nil { + return nil, err + } + defer conn.Close() + + addr := conn.LocalAddr().(*net.UDPAddr) + return addr.IP, nil +} diff --git a/test/benchmarks/harness/util.go b/test/benchmarks/harness/util.go new file mode 100644 index 000000000..cc7de6426 --- /dev/null +++ b/test/benchmarks/harness/util.go @@ -0,0 +1,38 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package harness + +import ( + "context" + "fmt" + "net" + + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/pkg/test/testutil" +) + +// WaitUntilServing grabs a container from `machine` and waits for a server at +// IP:port. +func WaitUntilServing(ctx context.Context, machine Machine, server net.IP, port int) error { + var logger testutil.DefaultLogger = "netcat" + netcat := machine.GetContainer(ctx, logger) + defer netcat.CleanUp(ctx) + + cmd := fmt.Sprintf("while ! nc -zv %s %d; do true; done", server.String(), port) + _, err := netcat.Run(ctx, dockerutil.RunOpts{ + Image: "packetdrill", + }, "sh", "-c", cmd) + return err +} diff --git a/test/benchmarks/network/BUILD b/test/benchmarks/network/BUILD new file mode 100644 index 000000000..57328456d --- /dev/null +++ b/test/benchmarks/network/BUILD @@ -0,0 +1,25 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "network", + testonly = 1, + srcs = ["network.go"], +) + +go_test( + name = "network_test", + size = "large", + srcs = ["iperf_test.go"], + library = ":network", + tags = [ + # Requires docker and runsc to be configured before test runs. + "manual", + "local", + ], + deps = [ + "//pkg/test/dockerutil", + "//test/benchmarks/harness", + ], +) diff --git a/test/benchmarks/network/iperf_test.go b/test/benchmarks/network/iperf_test.go new file mode 100644 index 000000000..72e9c99a8 --- /dev/null +++ b/test/benchmarks/network/iperf_test.go @@ -0,0 +1,151 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package network + +import ( + "context" + "fmt" + "os" + "regexp" + "strconv" + "strings" + "testing" + + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/test/benchmarks/harness" +) + +var h harness.Harness + +func BenchmarkIperf(b *testing.B) { + + // Get two machines + clientMachine, err := h.GetMachine() + if err != nil { + b.Fatalf("failed to get machine: %v", err) + } + + serverMachine, err := h.GetMachine() + if err != nil { + b.Fatalf("failed to get machine: %v", err) + } + + for _, bm := range []struct { + name string + clientRuntime string + serverRuntime string + }{ + // We are either measuring the server or the client. The other should be + // runc. e.g. Upload sees how fast the runtime under test uploads to a native + // server. + {name: "Upload", clientRuntime: dockerutil.Runtime(), serverRuntime: "runc"}, + {name: "Download", clientRuntime: "runc", serverRuntime: dockerutil.Runtime()}, + } { + b.Run(bm.name, func(b *testing.B) { + + // Get a container from the server and set its runtime. + ctx := context.Background() + server := serverMachine.GetContainer(ctx, b) + defer server.CleanUp(ctx) + server.Runtime = bm.serverRuntime + + // Get a container from the client and set its runtime. + client := clientMachine.GetContainer(ctx, b) + defer client.CleanUp(ctx) + client.Runtime = bm.clientRuntime + + // iperf serves on port 5001 by default. + port := 5001 + + // Start the server. + if err := server.Spawn(ctx, dockerutil.RunOpts{ + Image: "benchmarks/iperf", + Ports: []int{port}, + }, "iperf", "-s"); err != nil { + b.Fatalf("failed to start server with: %v", err) + } + + ip, err := serverMachine.IPAddress() + if err != nil { + b.Fatalf("failed to find server ip: %v", err) + } + + servingPort, err := server.FindPort(ctx, port) + if err != nil { + b.Fatalf("failed to find port %d: %v", port, err) + } + + // Make sure the server is up and serving before we run. + if err := harness.WaitUntilServing(ctx, clientMachine, ip, servingPort); err != nil { + b.Fatalf("failed to wait for server: %v", err) + } + + // iperf report in Kb realtime + cmd := fmt.Sprintf("iperf -f K --realtime -c %s -p %d", ip.String(), servingPort) + + // Run the client. + b.ResetTimer() + + for i := 0; i < b.N; i++ { + out, err := client.Run(ctx, dockerutil.RunOpts{ + Image: "benchmarks/iperf", + }, strings.Split(cmd, " ")...) + if err != nil { + b.Fatalf("failed to run client: %v", err) + } + b.StopTimer() + + // Parse bandwidth and report it. + bW, err := bandwidth(out) + if err != nil { + b.Fatalf("failed to parse bandwitdth from %s: %v", out, err) + } + b.ReportMetric(bW, "KBytes/sec") + b.StartTimer() + } + }) + } +} + +// bandwidth parses the Bandwidth number from an iperf report. A sample is below. +func bandwidth(data string) (float64, error) { + re := regexp.MustCompile(`\[\s*\d+\][^\n]+\s+(\d+\.?\d*)\s+KBytes/sec`) + match := re.FindStringSubmatch(data) + if len(match) < 1 { + return 0, fmt.Errorf("failed get bandwidth: %s", data) + } + return strconv.ParseFloat(match[1], 64) +} + +func TestParser(t *testing.T) { + sampleData := ` +------------------------------------------------------------ +Client connecting to 10.138.15.215, TCP port 32779 +TCP window size: 45.0 KByte (default) +------------------------------------------------------------ +[ 3] local 10.138.15.216 port 32866 connected with 10.138.15.215 port 32779 +[ ID] Interval Transfer Bandwidth +[ 3] 0.0-10.0 sec 459520 KBytes 45900 KBytes/sec +` + bandwidth, err := bandwidth(sampleData) + if err != nil || bandwidth != 45900 { + t.Fatalf("failed with: %v and %f", err, bandwidth) + } + +} + +func TestMain(m *testing.M) { + h.Init() + os.Exit(m.Run()) +} diff --git a/test/benchmarks/network/network.go b/test/benchmarks/network/network.go new file mode 100644 index 000000000..f480b5bcd --- /dev/null +++ b/test/benchmarks/network/network.go @@ -0,0 +1,16 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package network holds benchmarks around raw network performance. +package network -- cgit v1.2.3 From 822fc99ecd65c6c93094d93e005a03ae25989fc9 Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Tue, 14 Jul 2020 11:48:26 -0700 Subject: Add support for UDP IPv6 Also ironed out all the bugs found on the IPv6 code path that affects socket bind, send and receive. PiperOrigin-RevId: 321202653 --- test/packetimpact/dut/posix_server.cc | 10 +- test/packetimpact/netdevs/BUILD | 10 +- test/packetimpact/netdevs/netdevs.go | 17 +- test/packetimpact/netdevs/netdevs_test.go | 227 +++++++++++++++++++++ test/packetimpact/runner/packetimpact_test.go | 1 + test/packetimpact/testbench/connections.go | 137 +++++++++++-- test/packetimpact/testbench/dut.go | 6 +- test/packetimpact/testbench/layers.go | 10 +- test/packetimpact/testbench/testbench.go | 9 + .../packetimpact/tests/udp_send_recv_dgram_test.go | 110 +++++----- 10 files changed, 464 insertions(+), 73 deletions(-) create mode 100644 test/packetimpact/netdevs/netdevs_test.go (limited to 'test') diff --git a/test/packetimpact/dut/posix_server.cc b/test/packetimpact/dut/posix_server.cc index a1a5c3612..29d4cc6fe 100644 --- a/test/packetimpact/dut/posix_server.cc +++ b/test/packetimpact/dut/posix_server.cc @@ -53,7 +53,10 @@ response_in6->set_flowinfo(ntohl(addr_in6->sin6_flowinfo)); response_in6->mutable_addr()->assign( reinterpret_cast(&addr_in6->sin6_addr.s6_addr), 16); - response_in6->set_scope_id(ntohl(addr_in6->sin6_scope_id)); + // sin6_scope_id is stored in host byte order. + // + // https://www.gnu.org/software/libc/manual/html_node/Internet-Address-Formats.html + response_in6->set_scope_id(addr_in6->sin6_scope_id); return ::grpc::Status::OK; } } @@ -89,7 +92,10 @@ addr_in6->sin6_flowinfo = htonl(proto_in6.flowinfo()); proto_in6.addr().copy( reinterpret_cast(&addr_in6->sin6_addr.s6_addr), 16); - addr_in6->sin6_scope_id = htonl(proto_in6.scope_id()); + // sin6_scope_id is stored in host byte order. + // + // https://www.gnu.org/software/libc/manual/html_node/Internet-Address-Formats.html + addr_in6->sin6_scope_id = proto_in6.scope_id(); *addr_len = sizeof(*addr_in6); break; } diff --git a/test/packetimpact/netdevs/BUILD b/test/packetimpact/netdevs/BUILD index 422bb9b0c..8d1193fed 100644 --- a/test/packetimpact/netdevs/BUILD +++ b/test/packetimpact/netdevs/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package( licenses = ["notice"], @@ -13,3 +13,11 @@ go_library( "//pkg/tcpip/header", ], ) + +go_test( + name = "netdevs_test", + size = "small", + srcs = ["netdevs_test.go"], + library = ":netdevs", + deps = ["@com_github_google_go_cmp//cmp:go_default_library"], +) diff --git a/test/packetimpact/netdevs/netdevs.go b/test/packetimpact/netdevs/netdevs.go index d2c9cfeaf..eecfe0730 100644 --- a/test/packetimpact/netdevs/netdevs.go +++ b/test/packetimpact/netdevs/netdevs.go @@ -19,6 +19,7 @@ import ( "fmt" "net" "regexp" + "strconv" "strings" "gvisor.dev/gvisor/pkg/tcpip" @@ -27,6 +28,7 @@ import ( // A DeviceInfo represents a network device. type DeviceInfo struct { + ID uint32 MAC net.HardwareAddr IPv4Addr net.IP IPv4Net *net.IPNet @@ -35,7 +37,7 @@ type DeviceInfo struct { } var ( - deviceLine = regexp.MustCompile(`^\s*\d+: (\w+)`) + deviceLine = regexp.MustCompile(`^\s*(\d+): (\w+)`) linkLine = regexp.MustCompile(`^\s*link/\w+ ([0-9a-fA-F:]+)`) inetLine = regexp.MustCompile(`^\s*inet ([0-9./]+)`) inet6Line = regexp.MustCompile(`^\s*inet6 ([0-9a-fA-Z:/]+)`) @@ -43,6 +45,11 @@ var ( // ParseDevices parses the output from `ip addr show` into a map from device // name to information about the device. +// +// Note: if multiple IPv6 addresses are assigned to a device, the last address +// displayed by `ip addr show` will be used. This is fine for packetimpact +// because we will always only have at most one IPv6 address assigned to each +// device. func ParseDevices(cmdOutput string) (map[string]DeviceInfo, error) { var currentDevice string var currentInfo DeviceInfo @@ -52,8 +59,12 @@ func ParseDevices(cmdOutput string) (map[string]DeviceInfo, error) { if currentDevice != "" { deviceInfos[currentDevice] = currentInfo } - currentInfo = DeviceInfo{} - currentDevice = m[1] + id, err := strconv.ParseUint(m[1], 10, 32) + if err != nil { + return nil, fmt.Errorf("parsing device ID %s: %w", m[1], err) + } + currentInfo = DeviceInfo{ID: uint32(id)} + currentDevice = m[2] } else if m := linkLine.FindStringSubmatch(line); m != nil { mac, err := net.ParseMAC(m[1]) if err != nil { diff --git a/test/packetimpact/netdevs/netdevs_test.go b/test/packetimpact/netdevs/netdevs_test.go new file mode 100644 index 000000000..24ad12198 --- /dev/null +++ b/test/packetimpact/netdevs/netdevs_test.go @@ -0,0 +1,227 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package netdevs + +import ( + "fmt" + "net" + "testing" + + "github.com/google/go-cmp/cmp" +) + +func mustParseMAC(s string) net.HardwareAddr { + mac, err := net.ParseMAC(s) + if err != nil { + panic(fmt.Sprintf("failed to parse test MAC %q: %s", s, err)) + } + return mac +} + +func TestParseDevices(t *testing.T) { + for _, v := range []struct { + desc string + cmdOutput string + want map[string]DeviceInfo + }{ + { + desc: "v4 and v6", + cmdOutput: ` +1: lo: mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000 + link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 + inet 127.0.0.1/8 scope host lo + valid_lft forever preferred_lft forever + inet6 ::1/128 scope host + valid_lft forever preferred_lft forever +2613: eth0@if2614: mtu 1500 qdisc noqueue state UP group default + link/ether 02:42:c0:a8:09:02 brd ff:ff:ff:ff:ff:ff link-netnsid 0 + inet 192.168.9.2/24 brd 192.168.9.255 scope global eth0 + valid_lft forever preferred_lft forever + inet6 fe80::42:c0ff:fea8:902/64 scope link tentative + valid_lft forever preferred_lft forever +2615: eth2@if2616: mtu 1500 qdisc noqueue state UP group default + link/ether 02:42:df:f5:e1:0a brd ff:ff:ff:ff:ff:ff link-netnsid 0 + inet 223.245.225.10/24 brd 223.245.225.255 scope global eth2 + valid_lft forever preferred_lft forever + inet6 fe80::42:dfff:fef5:e10a/64 scope link tentative + valid_lft forever preferred_lft forever +2617: eth1@if2618: mtu 1500 qdisc noqueue state UP group default + link/ether 02:42:da:33:13:0a brd ff:ff:ff:ff:ff:ff link-netnsid 0 + inet 218.51.19.10/24 brd 218.51.19.255 scope global eth1 + valid_lft forever preferred_lft forever + inet6 fe80::42:daff:fe33:130a/64 scope link tentative + valid_lft forever preferred_lft forever`, + want: map[string]DeviceInfo{ + "lo": DeviceInfo{ + ID: 1, + MAC: mustParseMAC("00:00:00:00:00:00"), + IPv4Addr: net.IPv4(127, 0, 0, 1), + IPv4Net: &net.IPNet{ + IP: net.IPv4(127, 0, 0, 0), + Mask: net.CIDRMask(8, 32), + }, + IPv6Addr: net.ParseIP("::1"), + IPv6Net: &net.IPNet{ + IP: net.ParseIP("::1"), + Mask: net.CIDRMask(128, 128), + }, + }, + "eth0": DeviceInfo{ + ID: 2613, + MAC: mustParseMAC("02:42:c0:a8:09:02"), + IPv4Addr: net.IPv4(192, 168, 9, 2), + IPv4Net: &net.IPNet{ + IP: net.IPv4(192, 168, 9, 0), + Mask: net.CIDRMask(24, 32), + }, + IPv6Addr: net.ParseIP("fe80::42:c0ff:fea8:902"), + IPv6Net: &net.IPNet{ + IP: net.ParseIP("fe80::"), + Mask: net.CIDRMask(64, 128), + }, + }, + "eth1": DeviceInfo{ + ID: 2617, + MAC: mustParseMAC("02:42:da:33:13:0a"), + IPv4Addr: net.IPv4(218, 51, 19, 10), + IPv4Net: &net.IPNet{ + IP: net.IPv4(218, 51, 19, 0), + Mask: net.CIDRMask(24, 32), + }, + IPv6Addr: net.ParseIP("fe80::42:daff:fe33:130a"), + IPv6Net: &net.IPNet{ + IP: net.ParseIP("fe80::"), + Mask: net.CIDRMask(64, 128), + }, + }, + "eth2": DeviceInfo{ + ID: 2615, + MAC: mustParseMAC("02:42:df:f5:e1:0a"), + IPv4Addr: net.IPv4(223, 245, 225, 10), + IPv4Net: &net.IPNet{ + IP: net.IPv4(223, 245, 225, 0), + Mask: net.CIDRMask(24, 32), + }, + IPv6Addr: net.ParseIP("fe80::42:dfff:fef5:e10a"), + IPv6Net: &net.IPNet{ + IP: net.ParseIP("fe80::"), + Mask: net.CIDRMask(64, 128), + }, + }, + }, + }, + { + desc: "v4 only", + cmdOutput: ` +2613: eth0@if2614: mtu 1500 qdisc noqueue state UP group default + link/ether 02:42:c0:a8:09:02 brd ff:ff:ff:ff:ff:ff link-netnsid 0 + inet 192.168.9.2/24 brd 192.168.9.255 scope global eth0 + valid_lft forever preferred_lft forever`, + want: map[string]DeviceInfo{ + "eth0": DeviceInfo{ + ID: 2613, + MAC: mustParseMAC("02:42:c0:a8:09:02"), + IPv4Addr: net.IPv4(192, 168, 9, 2), + IPv4Net: &net.IPNet{ + IP: net.IPv4(192, 168, 9, 0), + Mask: net.CIDRMask(24, 32), + }, + }, + }, + }, + { + desc: "v6 only", + cmdOutput: ` +2615: eth2@if2616: mtu 1500 qdisc noqueue state UP group default + link/ether 02:42:df:f5:e1:0a brd ff:ff:ff:ff:ff:ff link-netnsid 0 + inet6 fe80::42:dfff:fef5:e10a/64 scope link tentative + valid_lft forever preferred_lft forever`, + want: map[string]DeviceInfo{ + "eth2": DeviceInfo{ + ID: 2615, + MAC: mustParseMAC("02:42:df:f5:e1:0a"), + IPv6Addr: net.ParseIP("fe80::42:dfff:fef5:e10a"), + IPv6Net: &net.IPNet{ + IP: net.ParseIP("fe80::"), + Mask: net.CIDRMask(64, 128), + }, + }, + }, + }, + } { + t.Run(v.desc, func(t *testing.T) { + got, err := ParseDevices(v.cmdOutput) + if err != nil { + t.Errorf("ParseDevices(\n%s\n) got unexpected error: %s", v.cmdOutput, err) + } + if diff := cmp.Diff(v.want, got); diff != "" { + t.Errorf("ParseDevices(\n%s\n) got output diff (-want, +got):\n%s", v.cmdOutput, diff) + } + }) + } +} + +func TestParseDevicesErrors(t *testing.T) { + for _, v := range []struct { + desc string + cmdOutput string + }{ + { + desc: "invalid MAC addr", + cmdOutput: ` +2617: eth1@if2618: mtu 1500 qdisc noqueue state UP group default + link/ether 02:42:da:33:13:0a:ffffffff brd ff:ff:ff:ff:ff:ff link-netnsid 0 + inet 218.51.19.10/24 brd 218.51.19.255 scope global eth1 + valid_lft forever preferred_lft forever + inet6 fe80::42:daff:fe33:130a/64 scope link tentative + valid_lft forever preferred_lft forever`, + }, + { + desc: "invalid v4 addr", + cmdOutput: ` +2617: eth1@if2618: mtu 1500 qdisc noqueue state UP group default + link/ether 02:42:da:33:13:0a brd ff:ff:ff:ff:ff:ff link-netnsid 0 + inet 1234.4321.424242.0/24 brd 218.51.19.255 scope global eth1 + valid_lft forever preferred_lft forever + inet6 fe80::42:daff:fe33:130a/64 scope link tentative + valid_lft forever preferred_lft forever`, + }, + { + desc: "invalid v6 addr", + cmdOutput: ` +2617: eth1@if2618: mtu 1500 qdisc noqueue state UP group default + link/ether 02:42:da:33:13:0a brd ff:ff:ff:ff:ff:ff link-netnsid 0 + inet 218.51.19.10/24 brd 218.51.19.255 scope global eth1 + valid_lft forever preferred_lft forever + inet6 fe80:ffffffff::42:daff:fe33:130a/64 scope link tentative + valid_lft forever preferred_lft forever`, + }, + { + desc: "invalid CIDR missing prefixlen", + cmdOutput: ` +2617: eth1@if2618: mtu 1500 qdisc noqueue state UP group default + link/ether 02:42:da:33:13:0a brd ff:ff:ff:ff:ff:ff link-netnsid 0 + inet 218.51.19.10 brd 218.51.19.255 scope global eth1 + valid_lft forever preferred_lft forever + inet6 fe80::42:daff:fe33:130a scope link tentative + valid_lft forever preferred_lft forever`, + }, + } { + t.Run(v.desc, func(t *testing.T) { + if _, err := ParseDevices(v.cmdOutput); err == nil { + t.Errorf("ParseDevices(\n%s\n) succeeded unexpectedly, want error", v.cmdOutput) + } + }) + } +} diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go index 397ca3ba5..1a99719c2 100644 --- a/test/packetimpact/runner/packetimpact_test.go +++ b/test/packetimpact/runner/packetimpact_test.go @@ -304,6 +304,7 @@ func TestOne(t *testing.T) { "--local_ipv4", addressInSubnet(testbenchAddr, *testNet.Subnet).String(), "--remote_ipv6", remoteIPv6.String(), "--remote_mac", remoteMAC.String(), + "--remote_interface_id", fmt.Sprintf("%d", dutDeviceInfo.ID), "--device", testNetDev, "--dut_type", *dutPlatform, ) diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index 8b4a4d905..5d9cec73e 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -43,14 +43,16 @@ func portFromSockaddr(sa unix.Sockaddr) (uint16, error) { // pickPort makes a new socket and returns the socket FD and port. The domain should be AF_INET or AF_INET6. The caller must close the FD when done with // the port if there is no error. -func pickPort(domain, typ int) (int, uint16, error) { - fd, err := unix.Socket(domain, typ, 0) +func pickPort(domain, typ int) (fd int, port uint16, err error) { + fd, err = unix.Socket(domain, typ, 0) if err != nil { - return -1, 0, err + return -1, 0, fmt.Errorf("creating socket: %w", err) } defer func() { if err != nil { - err = multierr.Append(err, unix.Close(fd)) + if cerr := unix.Close(fd); cerr != nil { + err = multierr.Append(err, fmt.Errorf("failed to close socket %d: %w", fd, cerr)) + } } }() var sa unix.Sockaddr @@ -60,22 +62,22 @@ func pickPort(domain, typ int) (int, uint16, error) { copy(sa4.Addr[:], net.ParseIP(LocalIPv4).To4()) sa = &sa4 case unix.AF_INET6: - var sa6 unix.SockaddrInet6 + sa6 := unix.SockaddrInet6{ZoneId: uint32(LocalInterfaceID)} copy(sa6.Addr[:], net.ParseIP(LocalIPv6).To16()) sa = &sa6 default: return -1, 0, fmt.Errorf("invalid domain %d, it should be one of unix.AF_INET or unix.AF_INET6", domain) } if err = unix.Bind(fd, sa); err != nil { - return -1, 0, err + return -1, 0, fmt.Errorf("binding to %+v: %w", sa, err) } sa, err = unix.Getsockname(fd) if err != nil { - return -1, 0, err + return -1, 0, fmt.Errorf("Getsocketname(%d): %w", fd, err) } - port, err := portFromSockaddr(sa) + port, err = portFromSockaddr(sa) if err != nil { - return -1, 0, err + return -1, 0, fmt.Errorf("extracting port from socket address %+v: %w", sa, err) } return fd, port, nil } @@ -378,7 +380,7 @@ var _ layerState = (*udpState)(nil) func newUDPState(domain int, out, in UDP) (*udpState, error) { portPickerFD, localPort, err := pickPort(domain, unix.SOCK_DGRAM) if err != nil { - return nil, err + return nil, fmt.Errorf("picking port: %w", err) } s := udpState{ out: UDP{SrcPort: &localPort}, @@ -916,14 +918,14 @@ func (conn *UDPIPv4) SendIP(ip IPv4, udp UDP, additionalLayers ...Layer) { func (conn *UDPIPv4) Expect(udp UDP, timeout time.Duration) (*UDP, error) { conn.t.Helper() layer, err := (*Connection)(conn).Expect(&udp, timeout) - if layer == nil { + if err != nil { return nil, err } gotUDP, ok := layer.(*UDP) if !ok { conn.t.Fatalf("expected %s to be UDP", layer) } - return gotUDP, err + return gotUDP, nil } // ExpectData is a convenient method that expects a Layer and the Layer after @@ -948,3 +950,114 @@ func (conn *UDPIPv4) Close() { func (conn *UDPIPv4) Drain() { conn.sniffer.Drain() } + +// UDPIPv6 maintains the state for all the layers in a UDP/IPv6 connection. +type UDPIPv6 Connection + +// NewUDPIPv6 creates a new UDPIPv6 connection with reasonable defaults. +func NewUDPIPv6(t *testing.T, outgoingUDP, incomingUDP UDP) UDPIPv6 { + etherState, err := newEtherState(Ether{}, Ether{}) + if err != nil { + t.Fatalf("can't make etherState: %s", err) + } + ipv6State, err := newIPv6State(IPv6{}, IPv6{}) + if err != nil { + t.Fatalf("can't make IPv6State: %s", err) + } + udpState, err := newUDPState(unix.AF_INET6, outgoingUDP, incomingUDP) + if err != nil { + t.Fatalf("can't make udpState: %s", err) + } + injector, err := NewInjector(t) + if err != nil { + t.Fatalf("can't make injector: %s", err) + } + sniffer, err := NewSniffer(t) + if err != nil { + t.Fatalf("can't make sniffer: %s", err) + } + return UDPIPv6{ + layerStates: []layerState{etherState, ipv6State, udpState}, + injector: injector, + sniffer: sniffer, + t: t, + } +} + +func (conn *UDPIPv6) udpState() *udpState { + state, ok := conn.layerStates[2].(*udpState) + if !ok { + conn.t.Fatalf("got transport-layer state type=%T, expected udpState", conn.layerStates[2]) + } + return state +} + +func (conn *UDPIPv6) ipv6State() *ipv6State { + state, ok := conn.layerStates[1].(*ipv6State) + if !ok { + conn.t.Fatalf("got network-layer state type=%T, expected ipv6State", conn.layerStates[1]) + } + return state +} + +// LocalAddr gets the local socket address of this connection. +func (conn *UDPIPv6) LocalAddr() *unix.SockaddrInet6 { + sa := &unix.SockaddrInet6{ + Port: int(*conn.udpState().out.SrcPort), + // Local address is in perspective to the remote host, so it's scoped to the + // ID of the remote interface. + ZoneId: uint32(RemoteInterfaceID), + } + copy(sa.Addr[:], *conn.ipv6State().out.SrcAddr) + return sa +} + +// Send sends a packet with reasonable defaults, potentially overriding the UDP +// layer and adding additionLayers. +func (conn *UDPIPv6) Send(udp UDP, additionalLayers ...Layer) { + (*Connection)(conn).send(Layers{&udp}, additionalLayers...) +} + +// SendIPv6 sends a packet with reasonable defaults, potentially overriding the +// UDP and IPv6 headers and adding additionLayers. +func (conn *UDPIPv6) SendIPv6(ip IPv6, udp UDP, additionalLayers ...Layer) { + (*Connection)(conn).send(Layers{&ip, &udp}, additionalLayers...) +} + +// Expect expects a frame with the UDP layer matching the provided UDP within +// the timeout specified. If it doesn't arrive in time, an error is returned. +func (conn *UDPIPv6) Expect(udp UDP, timeout time.Duration) (*UDP, error) { + conn.t.Helper() + layer, err := (*Connection)(conn).Expect(&udp, timeout) + if err != nil { + return nil, err + } + gotUDP, ok := layer.(*UDP) + if !ok { + conn.t.Fatalf("expected %s to be UDP", layer) + } + return gotUDP, nil +} + +// ExpectData is a convenient method that expects a Layer and the Layer after +// it. If it doens't arrive in time, it returns nil. +func (conn *UDPIPv6) ExpectData(udp UDP, payload Payload, timeout time.Duration) (Layers, error) { + conn.t.Helper() + expected := make([]Layer, len(conn.layerStates)) + expected[len(expected)-1] = &udp + if payload.length() != 0 { + expected = append(expected, &payload) + } + return (*Connection)(conn).ExpectFrame(expected, timeout) +} + +// Close frees associated resources held by the UDPIPv6 connection. +func (conn *UDPIPv6) Close() { + (*Connection)(conn).Close() +} + +// Drain drains the sniffer's receive buffer by receiving packets until there's +// nothing else to receive. +func (conn *UDPIPv6) Drain() { + conn.sniffer.Drain() +} diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go index 2a2afecb5..51be13759 100644 --- a/test/packetimpact/testbench/dut.go +++ b/test/packetimpact/testbench/dut.go @@ -87,7 +87,7 @@ func (dut *DUT) sockaddrToProto(sa unix.Sockaddr) *pb.Sockaddr { }, } } - dut.t.Fatalf("can't parse Sockaddr: %+v", sa) + dut.t.Fatalf("can't parse Sockaddr struct: %+v", sa) return nil } @@ -106,8 +106,9 @@ func (dut *DUT) protoToSockaddr(sa *pb.Sockaddr) unix.Sockaddr { ZoneId: s.In6.GetScopeId(), } copy(ret.Addr[:], s.In6.GetAddr()) + return &ret } - dut.t.Fatalf("can't parse Sockaddr: %+v", sa) + dut.t.Fatalf("can't parse Sockaddr proto: %+v", sa) return nil } @@ -126,6 +127,7 @@ func (dut *DUT) CreateBoundSocket(typ, proto int32, addr net.IP) (int32, uint16) fd = dut.Socket(unix.AF_INET6, typ, proto) sa := unix.SockaddrInet6{} copy(sa.Addr[:], addr.To16()) + sa.ZoneId = uint32(RemoteInterfaceID) dut.Bind(fd, &sa) } else { dut.t.Fatalf("unknown ip addr type for remoteIP") diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go index a8121b0da..95bafa876 100644 --- a/test/packetimpact/testbench/layers.go +++ b/test/packetimpact/testbench/layers.go @@ -904,12 +904,14 @@ func payload(l Layer) (buffer.VectorisedView, error) { func layerChecksum(l Layer, protoNumber tcpip.TransportProtocolNumber) (uint16, error) { totalLength := uint16(totalLength(l)) var xsum uint16 - switch s := l.Prev().(type) { + switch p := l.Prev().(type) { case *IPv4: - xsum = header.PseudoHeaderChecksum(protoNumber, *s.SrcAddr, *s.DstAddr, totalLength) + xsum = header.PseudoHeaderChecksum(protoNumber, *p.SrcAddr, *p.DstAddr, totalLength) + case *IPv6: + xsum = header.PseudoHeaderChecksum(protoNumber, *p.SrcAddr, *p.DstAddr, totalLength) default: - // TODO(b/150301488): Support more protocols, like IPv6. - return 0, fmt.Errorf("can't get src and dst addr from previous layer: %#v", s) + // TODO(b/161246171): Support more protocols. + return 0, fmt.Errorf("checksum for protocol %d is not supported when previous layer is %T", protoNumber, p) } payloadBytes, err := payload(l) if err != nil { diff --git a/test/packetimpact/testbench/testbench.go b/test/packetimpact/testbench/testbench.go index 6530036a8..242464e3a 100644 --- a/test/packetimpact/testbench/testbench.go +++ b/test/packetimpact/testbench/testbench.go @@ -44,6 +44,13 @@ var ( // RemoteIPv6 is the DUT's IPv6 address on the test network. RemoteIPv6 = "" + // LocalInterfaceID is the ID of the local interface on the test network. + LocalInterfaceID uint32 + // RemoteInterfaceID is the ID of the remote interface on the test network. + // + // Not using uint32 because package flag does not support uint32. + RemoteInterfaceID uint64 + // LocalMAC is the local MAC address on the test network. LocalMAC = "" // RemoteMAC is the DUT's MAC address on the test network. @@ -75,6 +82,7 @@ func RegisterFlags(fs *flag.FlagSet) { fs.StringVar(&RemoteMAC, "remote_mac", RemoteMAC, "remote mac address for test packets") fs.StringVar(&Device, "device", Device, "local device for test packets") fs.StringVar(&DUTType, "dut_type", DUTType, "type of device under test") + fs.Uint64Var(&RemoteInterfaceID, "remote_interface_id", RemoteInterfaceID, "remote interface ID for test packets") } // genPseudoFlags populates flag-like global config based on real flags. @@ -97,6 +105,7 @@ func genPseudoFlags() error { LocalMAC = deviceInfo.MAC.String() LocalIPv6 = deviceInfo.IPv6Addr.String() + LocalInterfaceID = deviceInfo.ID if deviceInfo.IPv4Net != nil { IPv4PrefixLength, _ = deviceInfo.IPv4Net.Mask.Size() diff --git a/test/packetimpact/tests/udp_send_recv_dgram_test.go b/test/packetimpact/tests/udp_send_recv_dgram_test.go index 224feef85..bd53ad90b 100644 --- a/test/packetimpact/tests/udp_send_recv_dgram_test.go +++ b/test/packetimpact/tests/udp_send_recv_dgram_test.go @@ -28,62 +28,74 @@ func init() { testbench.RegisterFlags(flag.CommandLine) } -func TestUDPRecv(t *testing.T) { +type udpConn interface { + Send(testbench.UDP, ...testbench.Layer) + ExpectData(testbench.UDP, testbench.Payload, time.Duration) (testbench.Layers, error) + Drain() + Close() +} + +func TestUDP(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - boundFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) - defer dut.Close(boundFD) - conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) - defer conn.Close() - testCases := []struct { - name string - payload []byte - }{ - {"emptypayload", nil}, - {"small payload", []byte("hello world")}, - {"1kPayload", testbench.GenerateRandomPayload(t, 1<<10)}, - // Even though UDP allows larger dgrams we don't test it here as - // they need to be fragmented and written out as individual - // frames. - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - conn.Send(testbench.UDP{}, &testbench.Payload{Bytes: tc.payload}) - if got, want := string(dut.Recv(boundFD, int32(len(tc.payload)), 0)), string(tc.payload); got != want { - t.Fatalf("received payload does not match sent payload got: %s, want: %s", got, want) + for _, isIPv4 := range []bool{true, false} { + ipVersionName := "IPv6" + if isIPv4 { + ipVersionName = "IPv4" + } + t.Run(ipVersionName, func(t *testing.T) { + var addr string + if isIPv4 { + addr = testbench.RemoteIPv4 + } else { + addr = testbench.RemoteIPv6 } - }) - } -} + boundFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP(addr)) + defer dut.Close(boundFD) -func TestUDPSend(t *testing.T) { - dut := testbench.NewDUT(t) - defer dut.TearDown() - boundFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) - defer dut.Close(boundFD) - conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) - defer conn.Close() + var conn udpConn + var localAddr unix.Sockaddr + if isIPv4 { + v4Conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) + localAddr = v4Conn.LocalAddr() + conn = &v4Conn + } else { + v6Conn := testbench.NewUDPIPv6(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) + localAddr = v6Conn.LocalAddr() + conn = &v6Conn + } + defer conn.Close() - testCases := []struct { - name string - payload []byte - }{ - {"emptypayload", nil}, - {"small payload", []byte("hello world")}, - {"1kPayload", testbench.GenerateRandomPayload(t, 1<<10)}, - // Even though UDP allows larger dgrams we don't test it here as - // they need to be fragmented and written out as individual - // frames. - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - conn.Drain() - if got, want := int(dut.SendTo(boundFD, tc.payload, 0, conn.LocalAddr())), len(tc.payload); got != want { - t.Fatalf("short write got: %d, want: %d", got, want) + testCases := []struct { + name string + payload []byte + }{ + {"emptypayload", nil}, + {"small payload", []byte("hello world")}, + {"1kPayload", testbench.GenerateRandomPayload(t, 1<<10)}, + // Even though UDP allows larger dgrams we don't test it here as + // they need to be fragmented and written out as individual + // frames. } - if _, err := conn.ExpectData(testbench.UDP{SrcPort: &remotePort}, testbench.Payload{Bytes: tc.payload}, 1*time.Second); err != nil { - t.Fatal(err) + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Run("Send", func(t *testing.T) { + conn.Send(testbench.UDP{}, &testbench.Payload{Bytes: tc.payload}) + if got, want := string(dut.Recv(boundFD, int32(len(tc.payload)), 0)), string(tc.payload); got != want { + t.Fatalf("received payload does not match sent payload got: %s, want: %s", got, want) + } + }) + t.Run("Recv", func(t *testing.T) { + conn.Drain() + if got, want := int(dut.SendTo(boundFD, tc.payload, 0, localAddr)), len(tc.payload); got != want { + t.Fatalf("short write got: %d, want: %d", got, want) + } + if _, err := conn.ExpectData(testbench.UDP{SrcPort: &remotePort}, testbench.Payload{Bytes: tc.payload}, time.Second); err != nil { + t.Fatal(err) + } + }) + }) } }) } -- cgit v1.2.3 From 221e1da947f8f892e29d1ff0a382b7a171ab0437 Mon Sep 17 00:00:00 2001 From: Zeling Feng Date: Tue, 14 Jul 2020 12:27:17 -0700 Subject: Test IPv6 fragment reassembly A packetimpact test for: "A node must be able to accept a fragmented packet that, after reassembly, is as large as 1500 octets." PiperOrigin-RevId: 321210729 --- test/packetimpact/testbench/layers.go | 172 +++++++++++++++++---- test/packetimpact/testbench/layers_test.go | 118 +++++++++++++- test/packetimpact/tests/BUILD | 14 ++ .../tests/icmpv6_param_problem_test.go | 8 +- .../tests/ipv6_fragment_reassembly_test.go | 168 ++++++++++++++++++++ .../tests/ipv6_unknown_options_action_test.go | 6 +- 6 files changed, 441 insertions(+), 45 deletions(-) create mode 100644 test/packetimpact/tests/ipv6_fragment_reassembly_test.go (limited to 'test') diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go index 95bafa876..645f6c1a9 100644 --- a/test/packetimpact/testbench/layers.go +++ b/test/packetimpact/testbench/layers.go @@ -15,6 +15,7 @@ package testbench import ( + "encoding/binary" "encoding/hex" "fmt" "reflect" @@ -470,21 +471,11 @@ func (l *IPv6) ToBytes() ([]byte, error) { if l.NextHeader != nil { fields.NextHeader = *l.NextHeader } else { - switch n := l.next().(type) { - case *TCP: - fields.NextHeader = uint8(header.TCPProtocolNumber) - case *UDP: - fields.NextHeader = uint8(header.UDPProtocolNumber) - case *ICMPv6: - fields.NextHeader = uint8(header.ICMPv6ProtocolNumber) - case *IPv6HopByHopOptionsExtHdr: - fields.NextHeader = uint8(header.IPv6HopByHopOptionsExtHdrIdentifier) - case *IPv6DestinationOptionsExtHdr: - fields.NextHeader = uint8(header.IPv6DestinationOptionsExtHdrIdentifier) - default: - // TODO(b/150301488): Support more protocols as needed. - return nil, fmt.Errorf("ToBytes can't deduce the IPv6 header's next protocol: %#v", n) + nh, err := nextHeaderByLayer(l.next()) + if err != nil { + return nil, err } + fields.NextHeader = nh } if l.HopLimit != nil { fields.HopLimit = *l.HopLimit @@ -514,6 +505,8 @@ func nextIPv6PayloadParser(nextHeader uint8) layerParser { return parseIPv6HopByHopOptionsExtHdr case header.IPv6DestinationOptionsExtHdrIdentifier: return parseIPv6DestinationOptionsExtHdr + case header.IPv6FragmentExtHdrIdentifier: + return parseIPv6FragmentExtHdr } return parsePayload } @@ -566,14 +559,56 @@ type IPv6DestinationOptionsExtHdr struct { Options []byte } +// IPv6FragmentExtHdr can construct and match an IPv6 Fragment Extension Header. +type IPv6FragmentExtHdr struct { + LayerBase + NextHeader *header.IPv6ExtensionHeaderIdentifier + FragmentOffset *uint16 + MoreFragments *bool + Identification *uint32 +} + +// nextHeaderByLayer finds the correct next header protocol value for layer l. +func nextHeaderByLayer(l Layer) (uint8, error) { + if l == nil { + return uint8(header.IPv6NoNextHeaderIdentifier), nil + } + switch l.(type) { + case *TCP: + return uint8(header.TCPProtocolNumber), nil + case *UDP: + return uint8(header.UDPProtocolNumber), nil + case *ICMPv6: + return uint8(header.ICMPv6ProtocolNumber), nil + case *Payload: + return uint8(header.IPv6NoNextHeaderIdentifier), nil + case *IPv6HopByHopOptionsExtHdr: + return uint8(header.IPv6HopByHopOptionsExtHdrIdentifier), nil + case *IPv6DestinationOptionsExtHdr: + return uint8(header.IPv6DestinationOptionsExtHdrIdentifier), nil + case *IPv6FragmentExtHdr: + return uint8(header.IPv6FragmentExtHdrIdentifier), nil + default: + // TODO(b/161005083): Support more protocols as needed. + return 0, fmt.Errorf("failed to deduce the IPv6 header's next protocol: %T", l) + } +} + // ipv6OptionsExtHdrToBytes serializes an options extension header into bytes. -func ipv6OptionsExtHdrToBytes(nextHeader *header.IPv6ExtensionHeaderIdentifier, options []byte) []byte { +func ipv6OptionsExtHdrToBytes(nextHeader *header.IPv6ExtensionHeaderIdentifier, nextLayer Layer, options []byte) ([]byte, error) { length := len(options) + 2 + if length%8 != 0 { + return nil, fmt.Errorf("IPv6 extension headers must be a multiple of 8 octets long, but the length given: %d, options: %s", length, hex.Dump(options)) + } bytes := make([]byte, length) - if nextHeader == nil { - bytes[0] = byte(header.IPv6NoNextHeaderIdentifier) - } else { + if nextHeader != nil { bytes[0] = byte(*nextHeader) + } else { + nh, err := nextHeaderByLayer(nextLayer) + if err != nil { + return nil, err + } + bytes[0] = nh } // ExtHdrLen field is the length of the extension header // in 8-octet unit, ignoring the first 8 octets. @@ -581,7 +616,7 @@ func ipv6OptionsExtHdrToBytes(nextHeader *header.IPv6ExtensionHeaderIdentifier, // https://tools.ietf.org/html/rfc2460#section-4.6 bytes[1] = uint8((length - 8) / 8) copy(bytes[2:], options) - return bytes + return bytes, nil } // IPv6ExtHdrIdent is a helper routine that allocates a new @@ -591,14 +626,45 @@ func IPv6ExtHdrIdent(id header.IPv6ExtensionHeaderIdentifier) *header.IPv6Extens return &id } -// ToBytes implements Layer.ToBytes +// ToBytes implements Layer.ToBytes. func (l *IPv6HopByHopOptionsExtHdr) ToBytes() ([]byte, error) { - return ipv6OptionsExtHdrToBytes(l.NextHeader, l.Options), nil + return ipv6OptionsExtHdrToBytes(l.NextHeader, l.next(), l.Options) } -// ToBytes implements Layer.ToBytes +// ToBytes implements Layer.ToBytes. func (l *IPv6DestinationOptionsExtHdr) ToBytes() ([]byte, error) { - return ipv6OptionsExtHdrToBytes(l.NextHeader, l.Options), nil + return ipv6OptionsExtHdrToBytes(l.NextHeader, l.next(), l.Options) +} + +// ToBytes implements Layer.ToBytes. +func (l *IPv6FragmentExtHdr) ToBytes() ([]byte, error) { + var offset, mflag uint16 + var ident uint32 + bytes := make([]byte, header.IPv6FragmentExtHdrLength) + if l.NextHeader != nil { + bytes[0] = byte(*l.NextHeader) + } else { + nh, err := nextHeaderByLayer(l.next()) + if err != nil { + return nil, err + } + bytes[0] = nh + } + bytes[1] = 0 // reserved + if l.MoreFragments != nil && *l.MoreFragments { + mflag = 1 + } + if l.FragmentOffset != nil { + offset = *l.FragmentOffset + } + if l.Identification != nil { + ident = *l.Identification + } + offsetAndMflag := offset<<3 | mflag + binary.BigEndian.PutUint16(bytes[2:], offsetAndMflag) + binary.BigEndian.PutUint32(bytes[4:], ident) + + return bytes, nil } // parseIPv6ExtHdr parses an IPv6 extension header and returns the NextHeader @@ -631,6 +697,26 @@ func parseIPv6DestinationOptionsExtHdr(b []byte) (Layer, layerParser) { return &IPv6DestinationOptionsExtHdr{NextHeader: &nextHeader, Options: options}, nextParser } +// Bool is a helper routine that allocates a new +// bool value to store v and returns a pointer to it. +func Bool(v bool) *bool { + return &v +} + +// parseIPv6FragmentExtHdr parses the bytes assuming that they start +// with an IPv6 Fragment Extension Header. +func parseIPv6FragmentExtHdr(b []byte) (Layer, layerParser) { + nextHeader := b[0] + var extHdr header.IPv6FragmentExtHdr + copy(extHdr[:], b[2:]) + return &IPv6FragmentExtHdr{ + NextHeader: IPv6ExtHdrIdent(header.IPv6ExtensionHeaderIdentifier(nextHeader)), + FragmentOffset: Uint16(extHdr.FragmentOffset()), + MoreFragments: Bool(extHdr.More()), + Identification: Uint32(extHdr.ID()), + }, nextIPv6PayloadParser(nextHeader) +} + func (l *IPv6HopByHopOptionsExtHdr) length() int { return len(l.Options) + 2 } @@ -667,13 +753,31 @@ func (l *IPv6DestinationOptionsExtHdr) String() string { return stringLayer(l) } +func (*IPv6FragmentExtHdr) length() int { + return header.IPv6FragmentExtHdrLength +} + +func (l *IPv6FragmentExtHdr) match(other Layer) bool { + return equalLayer(l, other) +} + +// merge overrides the values in l with the values from other but only in fields +// where the value is not nil. +func (l *IPv6FragmentExtHdr) merge(other Layer) error { + return mergeLayer(l, other) +} + +func (l *IPv6FragmentExtHdr) String() string { + return stringLayer(l) +} + // ICMPv6 can construct and match an ICMPv6 encapsulation. type ICMPv6 struct { LayerBase - Type *header.ICMPv6Type - Code *byte - Checksum *uint16 - NDPPayload []byte + Type *header.ICMPv6Type + Code *byte + Checksum *uint16 + Payload []byte } func (l *ICMPv6) String() string { @@ -684,7 +788,7 @@ func (l *ICMPv6) String() string { // ToBytes implements Layer.ToBytes. func (l *ICMPv6) ToBytes() ([]byte, error) { - b := make([]byte, header.ICMPv6HeaderSize+len(l.NDPPayload)) + b := make([]byte, header.ICMPv6HeaderSize+len(l.Payload)) h := header.ICMPv6(b) if l.Type != nil { h.SetType(*l.Type) @@ -692,7 +796,7 @@ func (l *ICMPv6) ToBytes() ([]byte, error) { if l.Code != nil { h.SetCode(*l.Code) } - copy(h.NDPPayload(), l.NDPPayload) + copy(h.NDPPayload(), l.Payload) if l.Checksum != nil { h.SetChecksum(*l.Checksum) } else { @@ -725,10 +829,10 @@ func Byte(v byte) *byte { func parseICMPv6(b []byte) (Layer, layerParser) { h := header.ICMPv6(b) icmpv6 := ICMPv6{ - Type: ICMPv6Type(h.Type()), - Code: Byte(h.Code()), - Checksum: Uint16(h.Checksum()), - NDPPayload: h.NDPPayload(), + Type: ICMPv6Type(h.Type()), + Code: Byte(h.Code()), + Checksum: Uint16(h.Checksum()), + Payload: h.NDPPayload(), } return &icmpv6, nil } @@ -738,7 +842,7 @@ func (l *ICMPv6) match(other Layer) bool { } func (l *ICMPv6) length() int { - return header.ICMPv6HeaderSize + len(l.NDPPayload) + return header.ICMPv6HeaderSize + len(l.Payload) } // merge overrides the values in l with the values from other but only in fields diff --git a/test/packetimpact/testbench/layers_test.go b/test/packetimpact/testbench/layers_test.go index 382a983a1..a2a763034 100644 --- a/test/packetimpact/testbench/layers_test.go +++ b/test/packetimpact/testbench/layers_test.go @@ -593,10 +593,107 @@ func TestIPv6ExtHdrOptions(t *testing.T) { Options: []byte{0x05, 0x02, 0x00, 0x00, 0x01, 0x00}, }, &ICMPv6{ - Type: ICMPv6Type(header.ICMPv6ParamProblem), - Code: Byte(0), - Checksum: Uint16(0x5f98), - NDPPayload: []byte{0x00, 0x00, 0x00, 0x06}, + Type: ICMPv6Type(header.ICMPv6ParamProblem), + Code: Byte(0), + Checksum: Uint16(0x5f98), + Payload: []byte{0x00, 0x00, 0x00, 0x06}, + }, + }, + }, + { + description: "IPv6/HopByHop/Fragment", + wantBytes: []byte{ + // IPv6 Header + 0x60, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0xad, 0xbe, 0xef, + // HopByHop Options + 0x2c, 0x00, 0x05, 0x02, 0x00, 0x00, 0x01, 0x00, + // Fragment ExtHdr + 0x3b, 0x00, 0x03, 0x20, 0x00, 0x00, 0x00, 0x2a, + }, + wantLayers: []Layer{ + &IPv6{ + SrcAddr: Address(tcpip.Address(net.ParseIP("::1"))), + DstAddr: Address(tcpip.Address(net.ParseIP("fe80::dead:beef"))), + }, + &IPv6HopByHopOptionsExtHdr{ + NextHeader: IPv6ExtHdrIdent(header.IPv6FragmentExtHdrIdentifier), + Options: []byte{0x05, 0x02, 0x00, 0x00, 0x01, 0x00}, + }, + &IPv6FragmentExtHdr{ + NextHeader: IPv6ExtHdrIdent(header.IPv6NoNextHeaderIdentifier), + FragmentOffset: Uint16(100), + MoreFragments: Bool(false), + Identification: Uint32(42), + }, + &Payload{ + Bytes: nil, + }, + }, + }, + { + description: "IPv6/DestOpt/Fragment/Payload", + wantBytes: []byte{ + // IPv6 Header + 0x60, 0x00, 0x00, 0x00, 0x00, 0x1b, 0x3c, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0xad, 0xbe, 0xef, + // Destination Options + 0x2c, 0x00, 0x05, 0x02, 0x00, 0x00, 0x01, 0x00, + // Fragment ExtHdr + 0x3b, 0x00, 0x03, 0x21, 0x00, 0x00, 0x00, 0x2a, + // Sample Data + 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x20, 0x44, 0x61, 0x74, 0x61, + }, + wantLayers: []Layer{ + &IPv6{ + SrcAddr: Address(tcpip.Address(net.ParseIP("::1"))), + DstAddr: Address(tcpip.Address(net.ParseIP("fe80::dead:beef"))), + }, + &IPv6DestinationOptionsExtHdr{ + NextHeader: IPv6ExtHdrIdent(header.IPv6FragmentExtHdrIdentifier), + Options: []byte{0x05, 0x02, 0x00, 0x00, 0x01, 0x00}, + }, + &IPv6FragmentExtHdr{ + NextHeader: IPv6ExtHdrIdent(header.IPv6NoNextHeaderIdentifier), + FragmentOffset: Uint16(100), + MoreFragments: Bool(true), + Identification: Uint32(42), + }, + &Payload{ + Bytes: []byte("Sample Data"), + }, + }, + }, + { + description: "IPv6/Fragment/Payload", + wantBytes: []byte{ + // IPv6 Header + 0x60, 0x00, 0x00, 0x00, 0x00, 0x13, 0x2c, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0xad, 0xbe, 0xef, + // Fragment ExtHdr + 0x3b, 0x00, 0x03, 0x21, 0x00, 0x00, 0x00, 0x2a, + // Sample Data + 0x53, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x20, 0x44, 0x61, 0x74, 0x61, + }, + wantLayers: []Layer{ + &IPv6{ + SrcAddr: Address(tcpip.Address(net.ParseIP("::1"))), + DstAddr: Address(tcpip.Address(net.ParseIP("fe80::dead:beef"))), + }, + &IPv6FragmentExtHdr{ + NextHeader: IPv6ExtHdrIdent(header.IPv6NoNextHeaderIdentifier), + FragmentOffset: Uint16(100), + MoreFragments: Bool(true), + Identification: Uint32(42), + }, + &Payload{ + Bytes: []byte("Sample Data"), }, }, }, @@ -606,6 +703,19 @@ func TestIPv6ExtHdrOptions(t *testing.T) { if !layers.match(tt.wantLayers) { t.Fatalf("match failed with diff: %s", layers.diff(tt.wantLayers)) } + // Make sure we can generate correct next header values and checksums + for _, layer := range layers { + switch layer := layer.(type) { + case *IPv6HopByHopOptionsExtHdr: + layer.NextHeader = nil + case *IPv6DestinationOptionsExtHdr: + layer.NextHeader = nil + case *IPv6FragmentExtHdr: + layer.NextHeader = nil + case *ICMPv6: + layer.Checksum = nil + } + } gotBytes, err := layers.ToBytes() if err != nil { t.Fatalf("ToBytes() failed on %s: %s", &layers, err) diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 3f538b5c6..8051562b2 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -254,6 +254,20 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "ipv6_fragment_reassembly", + srcs = ["ipv6_fragment_reassembly_test.go"], + # TODO(b/160919104): Fix netstack then remove the line below. + expect_netstack_failure = True, + deps = [ + "//pkg/tcpip", + "//pkg/tcpip/buffer", + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + packetimpact_go_test( name = "udp_send_recv_dgram", srcs = ["udp_send_recv_dgram_test.go"], diff --git a/test/packetimpact/tests/icmpv6_param_problem_test.go b/test/packetimpact/tests/icmpv6_param_problem_test.go index 4d1d9a7f5..8dfd26ee8 100644 --- a/test/packetimpact/tests/icmpv6_param_problem_test.go +++ b/test/packetimpact/tests/icmpv6_param_problem_test.go @@ -41,8 +41,8 @@ func TestICMPv6ParamProblemTest(t *testing.T) { NextHeader: testbench.Uint8(254), } icmpv6 := testbench.ICMPv6{ - Type: testbench.ICMPv6Type(header.ICMPv6EchoRequest), - NDPPayload: []byte("hello world"), + Type: testbench.ICMPv6Type(header.ICMPv6EchoRequest), + Payload: []byte("hello world"), } toSend := (*testbench.Connection)(&conn).CreateFrame(testbench.Layers{&ipv6}, &icmpv6) @@ -62,8 +62,8 @@ func TestICMPv6ParamProblemTest(t *testing.T) { binary.BigEndian.PutUint32(b, header.IPv6NextHeaderOffset) expectedPayload = append(b, expectedPayload...) expectedICMPv6 := testbench.ICMPv6{ - Type: testbench.ICMPv6Type(header.ICMPv6ParamProblem), - NDPPayload: expectedPayload, + Type: testbench.ICMPv6Type(header.ICMPv6ParamProblem), + Payload: expectedPayload, } paramProblem := testbench.Layers{ diff --git a/test/packetimpact/tests/ipv6_fragment_reassembly_test.go b/test/packetimpact/tests/ipv6_fragment_reassembly_test.go new file mode 100644 index 000000000..7b462c8e2 --- /dev/null +++ b/test/packetimpact/tests/ipv6_fragment_reassembly_test.go @@ -0,0 +1,168 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ipv6_fragment_reassembly_test + +import ( + "bytes" + "encoding/binary" + "encoding/hex" + "flag" + "net" + "testing" + "time" + + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +const ( + // The payload length for the first fragment we send. This number + // is a multiple of 8 near 750 (half of 1500). + firstPayloadLength = 752 + // The ID field for our outgoing fragments. + fragmentID = 1 + // A node must be able to accept a fragmented packet that, + // after reassembly, is as large as 1500 octets. + reassemblyCap = 1500 +) + +func init() { + testbench.RegisterFlags(flag.CommandLine) +} + +func TestIPv6FragmentReassembly(t *testing.T) { + dut := testbench.NewDUT(t) + defer dut.TearDown() + conn := testbench.NewIPv6Conn(t, testbench.IPv6{}, testbench.IPv6{}) + defer conn.Close() + + firstPayloadToSend := make([]byte, firstPayloadLength) + for i := range firstPayloadToSend { + firstPayloadToSend[i] = 'A' + } + + secondPayloadLength := reassemblyCap - firstPayloadLength - header.ICMPv6EchoMinimumSize + secondPayloadToSend := firstPayloadToSend[:secondPayloadLength] + + icmpv6EchoPayload := make([]byte, 4) + binary.BigEndian.PutUint16(icmpv6EchoPayload[0:], 0) + binary.BigEndian.PutUint16(icmpv6EchoPayload[2:], 0) + icmpv6EchoPayload = append(icmpv6EchoPayload, firstPayloadToSend...) + + lIP := tcpip.Address(net.ParseIP(testbench.LocalIPv6).To16()) + rIP := tcpip.Address(net.ParseIP(testbench.RemoteIPv6).To16()) + icmpv6 := testbench.ICMPv6{ + Type: testbench.ICMPv6Type(header.ICMPv6EchoRequest), + Code: testbench.Byte(0), + Payload: icmpv6EchoPayload, + } + icmpv6Bytes, err := icmpv6.ToBytes() + if err != nil { + t.Fatalf("failed to serialize ICMPv6: %s", err) + } + cksum := header.ICMPv6Checksum( + header.ICMPv6(icmpv6Bytes), + lIP, + rIP, + buffer.NewVectorisedView(len(secondPayloadToSend), []buffer.View{secondPayloadToSend}), + ) + + conn.Send(testbench.IPv6{}, + &testbench.IPv6FragmentExtHdr{ + FragmentOffset: testbench.Uint16(0), + MoreFragments: testbench.Bool(true), + Identification: testbench.Uint32(fragmentID), + }, + &testbench.ICMPv6{ + Type: testbench.ICMPv6Type(header.ICMPv6EchoRequest), + Code: testbench.Byte(0), + Payload: icmpv6EchoPayload, + Checksum: &cksum, + }) + + icmpv6ProtoNum := header.IPv6ExtensionHeaderIdentifier(header.ICMPv6ProtocolNumber) + + conn.Send(testbench.IPv6{}, + &testbench.IPv6FragmentExtHdr{ + NextHeader: &icmpv6ProtoNum, + FragmentOffset: testbench.Uint16((firstPayloadLength + header.ICMPv6EchoMinimumSize) / 8), + MoreFragments: testbench.Bool(false), + Identification: testbench.Uint32(fragmentID), + }, + &testbench.Payload{ + Bytes: secondPayloadToSend, + }) + + gotEchoReplyFirstPart, err := conn.ExpectFrame(testbench.Layers{ + &testbench.Ether{}, + &testbench.IPv6{}, + &testbench.IPv6FragmentExtHdr{ + FragmentOffset: testbench.Uint16(0), + MoreFragments: testbench.Bool(true), + }, + &testbench.ICMPv6{ + Type: testbench.ICMPv6Type(header.ICMPv6EchoReply), + Code: testbench.Byte(0), + }, + }, time.Second) + if err != nil { + t.Fatalf("expected a fragmented ICMPv6 Echo Reply, but got none: %s", err) + } + + id := *gotEchoReplyFirstPart[2].(*testbench.IPv6FragmentExtHdr).Identification + gotFirstPayload, err := gotEchoReplyFirstPart[len(gotEchoReplyFirstPart)-1].ToBytes() + if err != nil { + t.Fatalf("failed to serialize ICMPv6: %s", err) + } + icmpPayload := gotFirstPayload[header.ICMPv6EchoMinimumSize:] + receivedLen := len(icmpPayload) + wantSecondPayloadLen := reassemblyCap - header.ICMPv6EchoMinimumSize - receivedLen + wantFirstPayload := make([]byte, receivedLen) + for i := range wantFirstPayload { + wantFirstPayload[i] = 'A' + } + wantSecondPayload := wantFirstPayload[:wantSecondPayloadLen] + if !bytes.Equal(icmpPayload, wantFirstPayload) { + t.Fatalf("received unexpected payload, got: %s, want: %s", + hex.Dump(icmpPayload), + hex.Dump(wantFirstPayload)) + } + + gotEchoReplySecondPart, err := conn.ExpectFrame(testbench.Layers{ + &testbench.Ether{}, + &testbench.IPv6{}, + &testbench.IPv6FragmentExtHdr{ + NextHeader: &icmpv6ProtoNum, + FragmentOffset: testbench.Uint16(uint16((receivedLen + header.ICMPv6EchoMinimumSize) / 8)), + MoreFragments: testbench.Bool(false), + Identification: &id, + }, + &testbench.ICMPv6{}, + }, time.Second) + if err != nil { + t.Fatalf("expected the rest of ICMPv6 Echo Reply, but got none: %s", err) + } + secondPayload, err := gotEchoReplySecondPart[len(gotEchoReplySecondPart)-1].ToBytes() + if err != nil { + t.Fatalf("failed to serialize ICMPv6 Echo Reply: %s", err) + } + if !bytes.Equal(secondPayload, wantSecondPayload) { + t.Fatalf("received unexpected payload, got: %s, want: %s", + hex.Dump(secondPayload), + hex.Dump(wantSecondPayload)) + } +} diff --git a/test/packetimpact/tests/ipv6_unknown_options_action_test.go b/test/packetimpact/tests/ipv6_unknown_options_action_test.go index d301d8829..100b30ad7 100644 --- a/test/packetimpact/tests/ipv6_unknown_options_action_test.go +++ b/test/packetimpact/tests/ipv6_unknown_options_action_test.go @@ -171,9 +171,9 @@ func TestIPv6UnknownOptionAction(t *testing.T) { &tb.Ether{}, &tb.IPv6{}, &tb.ICMPv6{ - Type: tb.ICMPv6Type(header.ICMPv6ParamProblem), - Code: tb.Byte(2), - NDPPayload: icmpv6Payload, + Type: tb.ICMPv6Type(header.ICMPv6ParamProblem), + Code: tb.Byte(2), + Payload: icmpv6Payload, }, }, time.Second) if tt.wantICMPv6 && err != nil { -- cgit v1.2.3 From fef90c61c6186c113cfdb0bbcf53f4ca70f9741a Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Wed, 15 Jul 2020 14:13:42 -0700 Subject: Fix minor bugs in a couple of interface IOCTLs. gVisor incorrectly returns the wrong ARP type for SIOGIFHWADDR. This breaks tcpdump as it tries to interpret the packets incorrectly. Similarly, SIOCETHTOOL is used by tcpdump to query interface properties which fails with an EINVAL since we don't implement it. For now change it to return EOPNOTSUPP to indicate that we don't support the query rather than return EINVAL. NOTE: ARPHRD types for link endpoints are distinct from NIC capabilities and NIC flags. In Linux all 3 exist eg. ARPHRD types are stored in dev->type field while NIC capabilities are more like the device features which can be queried using SIOCETHTOOL but not modified and NIC Flags are fields that can be modified from user space. eg. NIC status (UP/DOWN/MULTICAST/BROADCAST) etc. Updates #2746 PiperOrigin-RevId: 321436525 --- pkg/abi/linux/ioctl.go | 27 +++++++++-- pkg/abi/linux/netlink_route.go | 2 + pkg/sentry/socket/netstack/netstack.go | 74 ++++++++++++++++++------------ pkg/sentry/socket/netstack/stack.go | 22 ++++++--- pkg/tcpip/header/arp.go | 77 +++++++++++++++++++++----------- pkg/tcpip/link/channel/BUILD | 1 + pkg/tcpip/link/channel/channel.go | 6 +++ pkg/tcpip/link/fdbased/endpoint.go | 8 ++++ pkg/tcpip/link/loopback/loopback.go | 5 +++ pkg/tcpip/link/muxed/BUILD | 1 + pkg/tcpip/link/muxed/injectable.go | 6 +++ pkg/tcpip/link/nested/BUILD | 1 + pkg/tcpip/link/nested/nested.go | 6 +++ pkg/tcpip/link/qdisc/fifo/BUILD | 1 + pkg/tcpip/link/qdisc/fifo/endpoint.go | 6 +++ pkg/tcpip/link/sharedmem/sharedmem.go | 5 +++ pkg/tcpip/link/tun/device.go | 10 +++++ pkg/tcpip/link/waitable/BUILD | 2 + pkg/tcpip/link/waitable/waitable.go | 6 +++ pkg/tcpip/link/waitable/waitable_test.go | 6 +++ pkg/tcpip/network/ip_test.go | 9 +++- pkg/tcpip/stack/forwarder_test.go | 6 +++ pkg/tcpip/stack/nic_test.go | 5 +++ pkg/tcpip/stack/registration.go | 7 +++ pkg/tcpip/stack/stack.go | 6 +++ test/syscalls/linux/socket_netdevice.cc | 23 ++++++++++ 26 files changed, 263 insertions(+), 65 deletions(-) (limited to 'test') diff --git a/pkg/abi/linux/ioctl.go b/pkg/abi/linux/ioctl.go index 2062e6a4b..2c5e56ae5 100644 --- a/pkg/abi/linux/ioctl.go +++ b/pkg/abi/linux/ioctl.go @@ -67,10 +67,29 @@ const ( // ioctl(2) requests provided by uapi/linux/sockios.h const ( - SIOCGIFMEM = 0x891f - SIOCGIFPFLAGS = 0x8935 - SIOCGMIIPHY = 0x8947 - SIOCGMIIREG = 0x8948 + SIOCGIFNAME = 0x8910 + SIOCGIFCONF = 0x8912 + SIOCGIFFLAGS = 0x8913 + SIOCGIFADDR = 0x8915 + SIOCGIFDSTADDR = 0x8917 + SIOCGIFBRDADDR = 0x8919 + SIOCGIFNETMASK = 0x891b + SIOCGIFMETRIC = 0x891d + SIOCGIFMTU = 0x8921 + SIOCGIFMEM = 0x891f + SIOCGIFHWADDR = 0x8927 + SIOCGIFINDEX = 0x8933 + SIOCGIFPFLAGS = 0x8935 + SIOCGIFTXQLEN = 0x8942 + SIOCETHTOOL = 0x8946 + SIOCGMIIPHY = 0x8947 + SIOCGMIIREG = 0x8948 + SIOCGIFMAP = 0x8970 +) + +// ioctl(2) requests provided by uapi/asm-generic/sockios.h +const ( + SIOCGSTAMP = 0x8906 ) // ioctl(2) directions. Used to calculate requests number. diff --git a/pkg/abi/linux/netlink_route.go b/pkg/abi/linux/netlink_route.go index 40bec566c..ceda0a8d3 100644 --- a/pkg/abi/linux/netlink_route.go +++ b/pkg/abi/linux/netlink_route.go @@ -187,6 +187,8 @@ const ( // Device types, from uapi/linux/if_arp.h. const ( + ARPHRD_NONE = 65534 + ARPHRD_ETHER = 1 ARPHRD_LOOPBACK = 772 ) diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 78a842973..0b1be1bd2 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -2747,7 +2747,7 @@ func (s *socketOpsCommon) ioctl(ctx context.Context, io usermem.IO, args arch.Sy // sockets. // TODO(b/78348848): Add a commonEndpoint method to support SIOCGSTAMP. switch args[1].Int() { - case syscall.SIOCGSTAMP: + case linux.SIOCGSTAMP: s.readMu.Lock() defer s.readMu.Unlock() if !s.timestampValid { @@ -2788,18 +2788,19 @@ func (s *socketOpsCommon) ioctl(ctx context.Context, io usermem.IO, args arch.Sy // Ioctl performs a socket ioctl. func Ioctl(ctx context.Context, ep commonEndpoint, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { switch arg := int(args[1].Int()); arg { - case syscall.SIOCGIFFLAGS, - syscall.SIOCGIFADDR, - syscall.SIOCGIFBRDADDR, - syscall.SIOCGIFDSTADDR, - syscall.SIOCGIFHWADDR, - syscall.SIOCGIFINDEX, - syscall.SIOCGIFMAP, - syscall.SIOCGIFMETRIC, - syscall.SIOCGIFMTU, - syscall.SIOCGIFNAME, - syscall.SIOCGIFNETMASK, - syscall.SIOCGIFTXQLEN: + case linux.SIOCGIFFLAGS, + linux.SIOCGIFADDR, + linux.SIOCGIFBRDADDR, + linux.SIOCGIFDSTADDR, + linux.SIOCGIFHWADDR, + linux.SIOCGIFINDEX, + linux.SIOCGIFMAP, + linux.SIOCGIFMETRIC, + linux.SIOCGIFMTU, + linux.SIOCGIFNAME, + linux.SIOCGIFNETMASK, + linux.SIOCGIFTXQLEN, + linux.SIOCETHTOOL: var ifr linux.IFReq if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &ifr, usermem.IOOpts{ @@ -2815,7 +2816,7 @@ func Ioctl(ctx context.Context, ep commonEndpoint, io usermem.IO, args arch.Sysc }) return 0, err - case syscall.SIOCGIFCONF: + case linux.SIOCGIFCONF: // Return a list of interface addresses or the buffer size // necessary to hold the list. var ifc linux.IFConf @@ -2889,7 +2890,7 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe // SIOCGIFNAME uses ifr.ifr_ifindex rather than ifr.ifr_name to // identify a device. - if arg == syscall.SIOCGIFNAME { + if arg == linux.SIOCGIFNAME { // Gets the name of the interface given the interface index // stored in ifr_ifindex. index = int32(usermem.ByteOrder.Uint32(ifr.Data[:4])) @@ -2912,21 +2913,28 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe } switch arg { - case syscall.SIOCGIFINDEX: + case linux.SIOCGIFINDEX: // Copy out the index to the data. usermem.ByteOrder.PutUint32(ifr.Data[:], uint32(index)) - case syscall.SIOCGIFHWADDR: + case linux.SIOCGIFHWADDR: // Copy the hardware address out. - ifr.Data[0] = 6 // IEEE802.2 arp type. - ifr.Data[1] = 0 + // + // Refer: https://linux.die.net/man/7/netdevice + // SIOCGIFHWADDR, SIOCSIFHWADDR + // + // Get or set the hardware address of a device using + // ifr_hwaddr. The hardware address is specified in a struct + // sockaddr. sa_family contains the ARPHRD_* device type, + // sa_data the L2 hardware address starting from byte 0. Setting + // the hardware address is a privileged operation. + usermem.ByteOrder.PutUint16(ifr.Data[:], iface.DeviceType) n := copy(ifr.Data[2:], iface.Addr) for i := 2 + n; i < len(ifr.Data); i++ { ifr.Data[i] = 0 // Clear padding. } - usermem.ByteOrder.PutUint16(ifr.Data[:2], uint16(n)) - case syscall.SIOCGIFFLAGS: + case linux.SIOCGIFFLAGS: f, err := interfaceStatusFlags(stack, iface.Name) if err != nil { return err @@ -2935,7 +2943,7 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe // matches Linux behavior. usermem.ByteOrder.PutUint16(ifr.Data[:2], uint16(f)) - case syscall.SIOCGIFADDR: + case linux.SIOCGIFADDR: // Copy the IPv4 address out. for _, addr := range stack.InterfaceAddrs()[index] { // This ioctl is only compatible with AF_INET addresses. @@ -2946,32 +2954,32 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe break } - case syscall.SIOCGIFMETRIC: + case linux.SIOCGIFMETRIC: // Gets the metric of the device. As per netdevice(7), this // always just sets ifr_metric to 0. usermem.ByteOrder.PutUint32(ifr.Data[:4], 0) - case syscall.SIOCGIFMTU: + case linux.SIOCGIFMTU: // Gets the MTU of the device. usermem.ByteOrder.PutUint32(ifr.Data[:4], iface.MTU) - case syscall.SIOCGIFMAP: + case linux.SIOCGIFMAP: // Gets the hardware parameters of the device. // TODO(gvisor.dev/issue/505): Implement. - case syscall.SIOCGIFTXQLEN: + case linux.SIOCGIFTXQLEN: // Gets the transmit queue length of the device. // TODO(gvisor.dev/issue/505): Implement. - case syscall.SIOCGIFDSTADDR: + case linux.SIOCGIFDSTADDR: // Gets the destination address of a point-to-point device. // TODO(gvisor.dev/issue/505): Implement. - case syscall.SIOCGIFBRDADDR: + case linux.SIOCGIFBRDADDR: // Gets the broadcast address of a device. // TODO(gvisor.dev/issue/505): Implement. - case syscall.SIOCGIFNETMASK: + case linux.SIOCGIFNETMASK: // Gets the network mask of a device. for _, addr := range stack.InterfaceAddrs()[index] { // This ioctl is only compatible with AF_INET addresses. @@ -2988,6 +2996,14 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe break } + case linux.SIOCETHTOOL: + // Stubbed out for now, Ideally we should implement the required + // sub-commands for ETHTOOL + // + // See: + // https://github.com/torvalds/linux/blob/aa0c9086b40c17a7ad94425b3b70dd1fdd7497bf/net/core/dev_ioctl.c + return syserr.ErrEndpointOperation + default: // Not a valid call. return syserr.ErrInvalidArgument diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go index 548442b96..67737ae87 100644 --- a/pkg/sentry/socket/netstack/stack.go +++ b/pkg/sentry/socket/netstack/stack.go @@ -15,6 +15,8 @@ package netstack import ( + "fmt" + "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/inet" @@ -40,19 +42,29 @@ func (s *Stack) SupportsIPv6() bool { return s.Stack.CheckNetworkProtocol(ipv6.ProtocolNumber) } +// Converts Netstack's ARPHardwareType to equivalent linux constants. +func toLinuxARPHardwareType(t header.ARPHardwareType) uint16 { + switch t { + case header.ARPHardwareNone: + return linux.ARPHRD_NONE + case header.ARPHardwareLoopback: + return linux.ARPHRD_LOOPBACK + case header.ARPHardwareEther: + return linux.ARPHRD_ETHER + default: + panic(fmt.Sprintf("unknown ARPHRD type: %d", t)) + } +} + // Interfaces implements inet.Stack.Interfaces. func (s *Stack) Interfaces() map[int32]inet.Interface { is := make(map[int32]inet.Interface) for id, ni := range s.Stack.NICInfo() { - var devType uint16 - if ni.Flags.Loopback { - devType = linux.ARPHRD_LOOPBACK - } is[int32(id)] = inet.Interface{ Name: ni.Name, Addr: []byte(ni.LinkAddress), Flags: uint32(nicStateFlagsToLinux(ni.Flags)), - DeviceType: devType, + DeviceType: toLinuxARPHardwareType(ni.ARPHardwareType), MTU: ni.MTU, } } diff --git a/pkg/tcpip/header/arp.go b/pkg/tcpip/header/arp.go index 718a4720a..83189676e 100644 --- a/pkg/tcpip/header/arp.go +++ b/pkg/tcpip/header/arp.go @@ -14,14 +14,33 @@ package header -import "gvisor.dev/gvisor/pkg/tcpip" +import ( + "encoding/binary" + + "gvisor.dev/gvisor/pkg/tcpip" +) const ( // ARPProtocolNumber is the ARP network protocol number. ARPProtocolNumber tcpip.NetworkProtocolNumber = 0x0806 // ARPSize is the size of an IPv4-over-Ethernet ARP packet. - ARPSize = 2 + 2 + 1 + 1 + 2 + 2*6 + 2*4 + ARPSize = 28 +) + +// ARPHardwareType is the hardware type for LinkEndpoint in an ARP header. +type ARPHardwareType uint16 + +// Typical ARP HardwareType values. Some of the constants have to be specific +// values as they are egressed on the wire in the HTYPE field of an ARP header. +const ( + ARPHardwareNone ARPHardwareType = 0 + // ARPHardwareEther specifically is the HTYPE for Ethernet as specified + // in the IANA list here: + // + // https://www.iana.org/assignments/arp-parameters/arp-parameters.xhtml#arp-parameters-2 + ARPHardwareEther ARPHardwareType = 1 + ARPHardwareLoopback ARPHardwareType = 2 ) // ARPOp is an ARP opcode. @@ -36,54 +55,64 @@ const ( // ARP is an ARP packet stored in a byte array as described in RFC 826. type ARP []byte -func (a ARP) hardwareAddressSpace() uint16 { return uint16(a[0])<<8 | uint16(a[1]) } -func (a ARP) protocolAddressSpace() uint16 { return uint16(a[2])<<8 | uint16(a[3]) } -func (a ARP) hardwareAddressSize() int { return int(a[4]) } -func (a ARP) protocolAddressSize() int { return int(a[5]) } +const ( + hTypeOffset = 0 + protocolOffset = 2 + haAddressSizeOffset = 4 + protoAddressSizeOffset = 5 + opCodeOffset = 6 + senderHAAddressOffset = 8 + senderProtocolAddressOffset = senderHAAddressOffset + EthernetAddressSize + targetHAAddressOffset = senderProtocolAddressOffset + IPv4AddressSize + targetProtocolAddressOffset = targetHAAddressOffset + EthernetAddressSize +) + +func (a ARP) hardwareAddressType() ARPHardwareType { + return ARPHardwareType(binary.BigEndian.Uint16(a[hTypeOffset:])) +} + +func (a ARP) protocolAddressSpace() uint16 { return binary.BigEndian.Uint16(a[protocolOffset:]) } +func (a ARP) hardwareAddressSize() int { return int(a[haAddressSizeOffset]) } +func (a ARP) protocolAddressSize() int { return int(a[protoAddressSizeOffset]) } // Op is the ARP opcode. -func (a ARP) Op() ARPOp { return ARPOp(a[6])<<8 | ARPOp(a[7]) } +func (a ARP) Op() ARPOp { return ARPOp(binary.BigEndian.Uint16(a[opCodeOffset:])) } // SetOp sets the ARP opcode. func (a ARP) SetOp(op ARPOp) { - a[6] = uint8(op >> 8) - a[7] = uint8(op) + binary.BigEndian.PutUint16(a[opCodeOffset:], uint16(op)) } // SetIPv4OverEthernet configures the ARP packet for IPv4-over-Ethernet. func (a ARP) SetIPv4OverEthernet() { - a[0], a[1] = 0, 1 // htypeEthernet - a[2], a[3] = 0x08, 0x00 // IPv4ProtocolNumber - a[4] = 6 // macSize - a[5] = uint8(IPv4AddressSize) + binary.BigEndian.PutUint16(a[hTypeOffset:], uint16(ARPHardwareEther)) + binary.BigEndian.PutUint16(a[protocolOffset:], uint16(IPv4ProtocolNumber)) + a[haAddressSizeOffset] = EthernetAddressSize + a[protoAddressSizeOffset] = uint8(IPv4AddressSize) } // HardwareAddressSender is the link address of the sender. // It is a view on to the ARP packet so it can be used to set the value. func (a ARP) HardwareAddressSender() []byte { - const s = 8 - return a[s : s+6] + return a[senderHAAddressOffset : senderHAAddressOffset+EthernetAddressSize] } // ProtocolAddressSender is the protocol address of the sender. // It is a view on to the ARP packet so it can be used to set the value. func (a ARP) ProtocolAddressSender() []byte { - const s = 8 + 6 - return a[s : s+4] + return a[senderProtocolAddressOffset : senderProtocolAddressOffset+IPv4AddressSize] } // HardwareAddressTarget is the link address of the target. // It is a view on to the ARP packet so it can be used to set the value. func (a ARP) HardwareAddressTarget() []byte { - const s = 8 + 6 + 4 - return a[s : s+6] + return a[targetHAAddressOffset : targetHAAddressOffset+EthernetAddressSize] } // ProtocolAddressTarget is the protocol address of the target. // It is a view on to the ARP packet so it can be used to set the value. func (a ARP) ProtocolAddressTarget() []byte { - const s = 8 + 6 + 4 + 6 - return a[s : s+4] + return a[targetProtocolAddressOffset : targetProtocolAddressOffset+IPv4AddressSize] } // IsValid reports whether this is an ARP packet for IPv4 over Ethernet. @@ -91,10 +120,8 @@ func (a ARP) IsValid() bool { if len(a) < ARPSize { return false } - const htypeEthernet = 1 - const macSize = 6 - return a.hardwareAddressSpace() == htypeEthernet && + return a.hardwareAddressType() == ARPHardwareEther && a.protocolAddressSpace() == uint16(IPv4ProtocolNumber) && - a.hardwareAddressSize() == macSize && + a.hardwareAddressSize() == EthernetAddressSize && a.protocolAddressSize() == IPv4AddressSize } diff --git a/pkg/tcpip/link/channel/BUILD b/pkg/tcpip/link/channel/BUILD index b8b93e78e..39ca774ef 100644 --- a/pkg/tcpip/link/channel/BUILD +++ b/pkg/tcpip/link/channel/BUILD @@ -10,6 +10,7 @@ go_library( "//pkg/sync", "//pkg/tcpip", "//pkg/tcpip/buffer", + "//pkg/tcpip/header", "//pkg/tcpip/stack", ], ) diff --git a/pkg/tcpip/link/channel/channel.go b/pkg/tcpip/link/channel/channel.go index 20b183da0..a2bb773d4 100644 --- a/pkg/tcpip/link/channel/channel.go +++ b/pkg/tcpip/link/channel/channel.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/stack" ) @@ -296,3 +297,8 @@ func (e *Endpoint) AddNotify(notify Notification) *NotificationHandle { func (e *Endpoint) RemoveNotify(handle *NotificationHandle) { e.q.RemoveNotify(handle) } + +// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType. +func (*Endpoint) ARPHardwareType() header.ARPHardwareType { + return header.ARPHardwareNone +} diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go index f34082e1a..32abe2a13 100644 --- a/pkg/tcpip/link/fdbased/endpoint.go +++ b/pkg/tcpip/link/fdbased/endpoint.go @@ -626,6 +626,14 @@ func (e *endpoint) GSOMaxSize() uint32 { return e.gsoMaxSize } +// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType. +func (e *endpoint) ARPHardwareType() header.ARPHardwareType { + if e.hdrSize > 0 { + return header.ARPHardwareEther + } + return header.ARPHardwareNone +} + // InjectableEndpoint is an injectable fd-based endpoint. The endpoint writes // to the FD, but does not read from it. All reads come from injected packets. type InjectableEndpoint struct { diff --git a/pkg/tcpip/link/loopback/loopback.go b/pkg/tcpip/link/loopback/loopback.go index 568c6874f..3b17d8c28 100644 --- a/pkg/tcpip/link/loopback/loopback.go +++ b/pkg/tcpip/link/loopback/loopback.go @@ -113,3 +113,8 @@ func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error { return nil } + +// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType. +func (*endpoint) ARPHardwareType() header.ARPHardwareType { + return header.ARPHardwareLoopback +} diff --git a/pkg/tcpip/link/muxed/BUILD b/pkg/tcpip/link/muxed/BUILD index 82b441b79..e7493e5c5 100644 --- a/pkg/tcpip/link/muxed/BUILD +++ b/pkg/tcpip/link/muxed/BUILD @@ -9,6 +9,7 @@ go_library( deps = [ "//pkg/tcpip", "//pkg/tcpip/buffer", + "//pkg/tcpip/header", "//pkg/tcpip/stack", ], ) diff --git a/pkg/tcpip/link/muxed/injectable.go b/pkg/tcpip/link/muxed/injectable.go index c69d6b7e9..c305d9e86 100644 --- a/pkg/tcpip/link/muxed/injectable.go +++ b/pkg/tcpip/link/muxed/injectable.go @@ -18,6 +18,7 @@ package muxed import ( "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/stack" ) @@ -129,6 +130,11 @@ func (m *InjectableEndpoint) Wait() { } } +// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType. +func (*InjectableEndpoint) ARPHardwareType() header.ARPHardwareType { + panic("unsupported operation") +} + // NewInjectableEndpoint creates a new multi-endpoint injectable endpoint. func NewInjectableEndpoint(routes map[tcpip.Address]stack.InjectableLinkEndpoint) *InjectableEndpoint { return &InjectableEndpoint{ diff --git a/pkg/tcpip/link/nested/BUILD b/pkg/tcpip/link/nested/BUILD index bdd5276ad..2cdb23475 100644 --- a/pkg/tcpip/link/nested/BUILD +++ b/pkg/tcpip/link/nested/BUILD @@ -12,6 +12,7 @@ go_library( "//pkg/sync", "//pkg/tcpip", "//pkg/tcpip/buffer", + "//pkg/tcpip/header", "//pkg/tcpip/stack", ], ) diff --git a/pkg/tcpip/link/nested/nested.go b/pkg/tcpip/link/nested/nested.go index 2998f9c4f..328bd048e 100644 --- a/pkg/tcpip/link/nested/nested.go +++ b/pkg/tcpip/link/nested/nested.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/stack" ) @@ -129,3 +130,8 @@ func (e *Endpoint) GSOMaxSize() uint32 { } return 0 } + +// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType +func (e *Endpoint) ARPHardwareType() header.ARPHardwareType { + return e.child.ARPHardwareType() +} diff --git a/pkg/tcpip/link/qdisc/fifo/BUILD b/pkg/tcpip/link/qdisc/fifo/BUILD index 054c213bc..1d0079bd6 100644 --- a/pkg/tcpip/link/qdisc/fifo/BUILD +++ b/pkg/tcpip/link/qdisc/fifo/BUILD @@ -14,6 +14,7 @@ go_library( "//pkg/sync", "//pkg/tcpip", "//pkg/tcpip/buffer", + "//pkg/tcpip/header", "//pkg/tcpip/stack", ], ) diff --git a/pkg/tcpip/link/qdisc/fifo/endpoint.go b/pkg/tcpip/link/qdisc/fifo/endpoint.go index b5dfb7850..c84fe1bb9 100644 --- a/pkg/tcpip/link/qdisc/fifo/endpoint.go +++ b/pkg/tcpip/link/qdisc/fifo/endpoint.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/stack" ) @@ -207,3 +208,8 @@ func (e *endpoint) Wait() { e.wg.Wait() } + +// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType +func (e *endpoint) ARPHardwareType() header.ARPHardwareType { + return e.lower.ARPHardwareType() +} diff --git a/pkg/tcpip/link/sharedmem/sharedmem.go b/pkg/tcpip/link/sharedmem/sharedmem.go index 0374a2441..a36862c67 100644 --- a/pkg/tcpip/link/sharedmem/sharedmem.go +++ b/pkg/tcpip/link/sharedmem/sharedmem.go @@ -287,3 +287,8 @@ func (e *endpoint) dispatchLoop(d stack.NetworkDispatcher) { e.completed.Done() } + +// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType +func (*endpoint) ARPHardwareType() header.ARPHardwareType { + return header.ARPHardwareEther +} diff --git a/pkg/tcpip/link/tun/device.go b/pkg/tcpip/link/tun/device.go index 6bc9033d0..47446efec 100644 --- a/pkg/tcpip/link/tun/device.go +++ b/pkg/tcpip/link/tun/device.go @@ -139,6 +139,7 @@ func attachOrCreateNIC(s *stack.Stack, name, prefix string, linkCaps stack.LinkE stack: s, nicID: id, name: name, + isTap: prefix == "tap", } endpoint.Endpoint.LinkEPCapabilities = linkCaps if endpoint.name == "" { @@ -348,6 +349,7 @@ type tunEndpoint struct { stack *stack.Stack nicID tcpip.NICID name string + isTap bool } // DecRef decrements refcount of e, removes NIC if refcount goes to 0. @@ -356,3 +358,11 @@ func (e *tunEndpoint) DecRef() { e.stack.RemoveNIC(e.nicID) }) } + +// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType. +func (e *tunEndpoint) ARPHardwareType() header.ARPHardwareType { + if e.isTap { + return header.ARPHardwareEther + } + return header.ARPHardwareNone +} diff --git a/pkg/tcpip/link/waitable/BUILD b/pkg/tcpip/link/waitable/BUILD index 0956d2c65..ee84c3d96 100644 --- a/pkg/tcpip/link/waitable/BUILD +++ b/pkg/tcpip/link/waitable/BUILD @@ -12,6 +12,7 @@ go_library( "//pkg/gate", "//pkg/tcpip", "//pkg/tcpip/buffer", + "//pkg/tcpip/header", "//pkg/tcpip/stack", ], ) @@ -25,6 +26,7 @@ go_test( deps = [ "//pkg/tcpip", "//pkg/tcpip/buffer", + "//pkg/tcpip/header", "//pkg/tcpip/stack", ], ) diff --git a/pkg/tcpip/link/waitable/waitable.go b/pkg/tcpip/link/waitable/waitable.go index 949b3f2b2..24a8dc2eb 100644 --- a/pkg/tcpip/link/waitable/waitable.go +++ b/pkg/tcpip/link/waitable/waitable.go @@ -25,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/gate" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/stack" ) @@ -147,3 +148,8 @@ func (e *Endpoint) WaitDispatch() { // Wait implements stack.LinkEndpoint.Wait. func (e *Endpoint) Wait() {} + +// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType. +func (e *Endpoint) ARPHardwareType() header.ARPHardwareType { + return e.lower.ARPHardwareType() +} diff --git a/pkg/tcpip/link/waitable/waitable_test.go b/pkg/tcpip/link/waitable/waitable_test.go index 63bf40562..ffb2354be 100644 --- a/pkg/tcpip/link/waitable/waitable_test.go +++ b/pkg/tcpip/link/waitable/waitable_test.go @@ -19,6 +19,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/stack" ) @@ -81,6 +82,11 @@ func (e *countedEndpoint) WriteRawPacket(buffer.VectorisedView) *tcpip.Error { return nil } +// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType. +func (*countedEndpoint) ARPHardwareType() header.ARPHardwareType { + panic("unimplemented") +} + // Wait implements stack.LinkEndpoint.Wait. func (*countedEndpoint) Wait() {} diff --git a/pkg/tcpip/network/ip_test.go b/pkg/tcpip/network/ip_test.go index 7c8fb3e0a..a5b780ca2 100644 --- a/pkg/tcpip/network/ip_test.go +++ b/pkg/tcpip/network/ip_test.go @@ -172,14 +172,19 @@ func (t *testObject) WritePacket(_ *stack.Route, _ *stack.GSO, protocol tcpip.Ne } // WritePackets implements stack.LinkEndpoint.WritePackets. -func (t *testObject) WritePackets(_ *stack.Route, _ *stack.GSO, pkt stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { +func (*testObject) WritePackets(_ *stack.Route, _ *stack.GSO, pkt stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { panic("not implemented") } -func (t *testObject) WriteRawPacket(_ buffer.VectorisedView) *tcpip.Error { +func (*testObject) WriteRawPacket(_ buffer.VectorisedView) *tcpip.Error { return tcpip.ErrNotSupported } +// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType. +func (*testObject) ARPHardwareType() header.ARPHardwareType { + panic("not implemented") +} + func buildIPv4Route(local, remote tcpip.Address) (stack.Route, *tcpip.Error) { s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol()}, diff --git a/pkg/tcpip/stack/forwarder_test.go b/pkg/tcpip/stack/forwarder_test.go index a6546cef0..eefb4b07f 100644 --- a/pkg/tcpip/stack/forwarder_test.go +++ b/pkg/tcpip/stack/forwarder_test.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" ) const ( @@ -301,6 +302,11 @@ func (e *fwdTestLinkEndpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Er // Wait implements stack.LinkEndpoint.Wait. func (*fwdTestLinkEndpoint) Wait() {} +// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType. +func (*fwdTestLinkEndpoint) ARPHardwareType() header.ARPHardwareType { + panic("not implemented") +} + func fwdTestNetFactory(t *testing.T, proto *fwdTestNetworkProtocol) (ep1, ep2 *fwdTestLinkEndpoint) { // Create a stack with the network protocol and two NICs. s := New(Options{ diff --git a/pkg/tcpip/stack/nic_test.go b/pkg/tcpip/stack/nic_test.go index 31f865260..3bc9fd831 100644 --- a/pkg/tcpip/stack/nic_test.go +++ b/pkg/tcpip/stack/nic_test.go @@ -84,6 +84,11 @@ func (e *testLinkEndpoint) WriteRawPacket(buffer.VectorisedView) *tcpip.Error { return tcpip.ErrNotSupported } +// ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType. +func (*testLinkEndpoint) ARPHardwareType() header.ARPHardwareType { + panic("not implemented") +} + var _ NetworkEndpoint = (*testIPv6Endpoint)(nil) // An IPv6 NetworkEndpoint that throws away outgoing packets. diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go index 5cbc946b6..f260eeb7f 100644 --- a/pkg/tcpip/stack/registration.go +++ b/pkg/tcpip/stack/registration.go @@ -18,6 +18,7 @@ import ( "gvisor.dev/gvisor/pkg/sleep" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/waiter" ) @@ -436,6 +437,12 @@ type LinkEndpoint interface { // Wait will not block if the endpoint hasn't started any goroutines // yet, even if it might later. Wait() + + // ARPHardwareType returns the ARPHRD_TYPE of the link endpoint. + // + // See: + // https://github.com/torvalds/linux/blob/aa0c9086b40c17a7ad94425b3b70dd1fdd7497bf/include/uapi/linux/if_arp.h#L30 + ARPHardwareType() header.ARPHardwareType } // InjectableLinkEndpoint is a LinkEndpoint where inbound packets are diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index 0aa815447..2b7ece851 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -1095,6 +1095,11 @@ type NICInfo struct { // Context is user-supplied data optionally supplied in CreateNICWithOptions. // See type NICOptions for more details. Context NICContext + + // ARPHardwareType holds the ARP Hardware type of the NIC. This is the + // value sent in haType field of an ARP Request sent by this NIC and the + // value expected in the haType field of an ARP response. + ARPHardwareType header.ARPHardwareType } // HasNIC returns true if the NICID is defined in the stack. @@ -1126,6 +1131,7 @@ func (s *Stack) NICInfo() map[tcpip.NICID]NICInfo { MTU: nic.linkEP.MTU(), Stats: nic.stats, Context: nic.context, + ARPHardwareType: nic.linkEP.ARPHardwareType(), } } return nics diff --git a/test/syscalls/linux/socket_netdevice.cc b/test/syscalls/linux/socket_netdevice.cc index 15d4b85a7..5f8d7f981 100644 --- a/test/syscalls/linux/socket_netdevice.cc +++ b/test/syscalls/linux/socket_netdevice.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -49,6 +50,7 @@ TEST(NetdeviceTest, Loopback) { // Check that the loopback is zero hardware address. ASSERT_THAT(ioctl(sock.get(), SIOCGIFHWADDR, &ifr), SyscallSucceeds()); + EXPECT_EQ(ifr.ifr_hwaddr.sa_family, ARPHRD_LOOPBACK); EXPECT_EQ(ifr.ifr_hwaddr.sa_data[0], 0); EXPECT_EQ(ifr.ifr_hwaddr.sa_data[1], 0); EXPECT_EQ(ifr.ifr_hwaddr.sa_data[2], 0); @@ -178,6 +180,27 @@ TEST(NetdeviceTest, InterfaceMTU) { EXPECT_GT(ifr.ifr_mtu, 0); } +TEST(NetdeviceTest, EthtoolGetTSInfo) { + FileDescriptor sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + + struct ethtool_ts_info tsi = {}; + tsi.cmd = ETHTOOL_GET_TS_INFO; // Get NIC's Timestamping capabilities. + + // Prepare the request. + struct ifreq ifr = {}; + snprintf(ifr.ifr_name, IFNAMSIZ, "lo"); + ifr.ifr_data = (void*)&tsi; + + // Check that SIOCGIFMTU returns a nonzero MTU. + if (IsRunningOnGvisor()) { + ASSERT_THAT(ioctl(sock.get(), SIOCETHTOOL, &ifr), + SyscallFailsWithErrno(EOPNOTSUPP)); + return; + } + ASSERT_THAT(ioctl(sock.get(), SIOCETHTOOL, &ifr), SyscallSucceeds()); +} + } // namespace } // namespace testing -- cgit v1.2.3 From 857d03f258ffafb815698917f2a1ee9e7e265464 Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Wed, 15 Jul 2020 14:55:12 -0700 Subject: Add support for SO_ERROR to packet sockets. Packet sockets also seem to allow double binding and do not return an error on linux. This was tested by running the syscall test in a linux namespace as root and the current test DoubleBind fails@HEAD. Passes after this change. Updates #173 PiperOrigin-RevId: 321445137 --- pkg/syserr/netstack.go | 2 +- pkg/tcpip/transport/packet/endpoint.go | 25 +++++++++++- pkg/tcpip/transport/packet/endpoint_state.go | 19 +++++++++ test/syscalls/linux/packet_socket.cc | 13 +++---- test/syscalls/linux/packet_socket_raw.cc | 58 ++++++++++++++++++++++++++++ 5 files changed, 107 insertions(+), 10 deletions(-) (limited to 'test') diff --git a/pkg/syserr/netstack.go b/pkg/syserr/netstack.go index 8ff922c69..5ae10939d 100644 --- a/pkg/syserr/netstack.go +++ b/pkg/syserr/netstack.go @@ -22,7 +22,7 @@ import ( // Mapping for tcpip.Error types. var ( ErrUnknownProtocol = New(tcpip.ErrUnknownProtocol.String(), linux.EINVAL) - ErrUnknownNICID = New(tcpip.ErrUnknownNICID.String(), linux.EINVAL) + ErrUnknownNICID = New(tcpip.ErrUnknownNICID.String(), linux.ENODEV) ErrUnknownDevice = New(tcpip.ErrUnknownDevice.String(), linux.ENODEV) ErrUnknownProtocolOption = New(tcpip.ErrUnknownProtocolOption.String(), linux.ENOPROTOOPT) ErrDuplicateNICID = New(tcpip.ErrDuplicateNICID.String(), linux.EEXIST) diff --git a/pkg/tcpip/transport/packet/endpoint.go b/pkg/tcpip/transport/packet/endpoint.go index 57b7f5c19..92b487381 100644 --- a/pkg/tcpip/transport/packet/endpoint.go +++ b/pkg/tcpip/transport/packet/endpoint.go @@ -79,6 +79,11 @@ type endpoint struct { closed bool stats tcpip.TransportEndpointStats `state:"nosave"` bound bool + boundNIC tcpip.NICID + + // lastErrorMu protects lastError. + lastErrorMu sync.Mutex `state:"nosave"` + lastError *tcpip.Error `state:".(string)"` } // NewEndpoint returns a new packet endpoint. @@ -229,12 +234,14 @@ func (ep *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error { ep.mu.Lock() defer ep.mu.Unlock() - if ep.bound { - return tcpip.ErrAlreadyBound + if ep.bound && ep.boundNIC == addr.NIC { + // If the NIC being bound is the same then just return success. + return nil } // Unregister endpoint with all the nics. ep.stack.UnregisterPacketEndpoint(0, ep.netProto, ep) + ep.bound = false // Bind endpoint to receive packets from specific interface. if err := ep.stack.RegisterPacketEndpoint(addr.NIC, ep.netProto, ep); err != nil { @@ -242,6 +249,7 @@ func (ep *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error { } ep.bound = true + ep.boundNIC = addr.NIC return nil } @@ -336,8 +344,21 @@ func (ep *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error { } } +func (ep *endpoint) takeLastError() *tcpip.Error { + ep.lastErrorMu.Lock() + defer ep.lastErrorMu.Unlock() + + err := ep.lastError + ep.lastError = nil + return err +} + // GetSockOpt implements tcpip.Endpoint.GetSockOpt. func (ep *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { + switch opt.(type) { + case tcpip.ErrorOption: + return ep.takeLastError() + } return tcpip.ErrNotSupported } diff --git a/pkg/tcpip/transport/packet/endpoint_state.go b/pkg/tcpip/transport/packet/endpoint_state.go index 9b88f17e4..e2fa96d17 100644 --- a/pkg/tcpip/transport/packet/endpoint_state.go +++ b/pkg/tcpip/transport/packet/endpoint_state.go @@ -15,6 +15,7 @@ package packet import ( + "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/stack" ) @@ -70,3 +71,21 @@ func (ep *endpoint) afterLoad() { panic(*err) } } + +// saveLastError is invoked by stateify. +func (ep *endpoint) saveLastError() string { + if ep.lastError == nil { + return "" + } + + return ep.lastError.String() +} + +// loadLastError is invoked by stateify. +func (ep *endpoint) loadLastError(s string) { + if s == "" { + return + } + + ep.lastError = tcpip.StringToError(s) +} diff --git a/test/syscalls/linux/packet_socket.cc b/test/syscalls/linux/packet_socket.cc index 5ac68feb4..ce63adb23 100644 --- a/test/syscalls/linux/packet_socket.cc +++ b/test/syscalls/linux/packet_socket.cc @@ -343,7 +343,7 @@ TEST_P(CookedPacketTest, BindReceive) { } // Double Bind socket. -TEST_P(CookedPacketTest, DoubleBind) { +TEST_P(CookedPacketTest, DoubleBindSucceeds) { struct sockaddr_ll bind_addr = {}; bind_addr.sll_family = AF_PACKET; bind_addr.sll_protocol = htons(GetParam()); @@ -354,12 +354,11 @@ TEST_P(CookedPacketTest, DoubleBind) { SyscallSucceeds()); // Binding socket again should fail. - ASSERT_THAT( - bind(socket_, reinterpret_cast(&bind_addr), - sizeof(bind_addr)), - // Linux 4.09 returns EINVAL here, but some time before 4.19 it switched - // to EADDRINUSE. - AnyOf(SyscallFailsWithErrno(EADDRINUSE), SyscallFailsWithErrno(EINVAL))); + ASSERT_THAT(bind(socket_, reinterpret_cast(&bind_addr), + sizeof(bind_addr)), + // Linux 4.09 returns EINVAL here, but some time before 4.19 it + // switched to EADDRINUSE. + SyscallSucceeds()); } // Bind and verify we do not receive data on interface which is not bound diff --git a/test/syscalls/linux/packet_socket_raw.cc b/test/syscalls/linux/packet_socket_raw.cc index 6a963b12c..e44062475 100644 --- a/test/syscalls/linux/packet_socket_raw.cc +++ b/test/syscalls/linux/packet_socket_raw.cc @@ -559,6 +559,64 @@ TEST_P(RawPacketTest, SetSocketSendBuf) { ASSERT_EQ(quarter_sz, val); } +TEST_P(RawPacketTest, GetSocketError) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_ERROR, &val, &val_len), + SyscallSucceeds()); + ASSERT_EQ(val, 0); +} + +TEST_P(RawPacketTest, GetSocketErrorBind) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + { + // Bind to the loopback device. + struct sockaddr_ll bind_addr = {}; + bind_addr.sll_family = AF_PACKET; + bind_addr.sll_protocol = htons(GetParam()); + bind_addr.sll_ifindex = GetLoopbackIndex(); + + ASSERT_THAT(bind(s_, reinterpret_cast(&bind_addr), + sizeof(bind_addr)), + SyscallSucceeds()); + + // SO_ERROR should return no errors. + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_ERROR, &val, &val_len), + SyscallSucceeds()); + ASSERT_EQ(val, 0); + } + + { + // Now try binding to an invalid interface. + struct sockaddr_ll bind_addr = {}; + bind_addr.sll_family = AF_PACKET; + bind_addr.sll_protocol = htons(GetParam()); + bind_addr.sll_ifindex = 0xffff; // Just pick a really large number. + + // Binding should fail with EINVAL + ASSERT_THAT(bind(s_, reinterpret_cast(&bind_addr), + sizeof(bind_addr)), + SyscallFailsWithErrno(ENODEV)); + + // SO_ERROR does not return error when the device is invalid. + // On Linux there is just one odd ball condition where this can return + // an error where the device was valid and then removed or disabled + // between the first check for index and the actual registration of + // the packet endpoint. On Netstack this is not possible as the stack + // global mutex is held during registration and check. + int val = 0; + socklen_t val_len = sizeof(val); + ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_ERROR, &val, &val_len), + SyscallSucceeds()); + ASSERT_EQ(val, 0); + } +} + #ifndef __fuchsia__ TEST_P(RawPacketTest, SetSocketDetachFilterNoInstalledFilter) { -- cgit v1.2.3 From 5c8c0d65b9062dcbe195e7131a6a3c3fb8ba9583 Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Wed, 15 Jul 2020 18:19:52 -0700 Subject: Port httpd benchmark PiperOrigin-RevId: 321478001 --- images/benchmarks/ab/Dockerfile | 7 + images/benchmarks/httpd/Dockerfile | 17 ++ images/benchmarks/httpd/apache2-tmpdir.conf | 5 + test/benchmarks/fs/bazel_test.go | 1 + test/benchmarks/harness/machine.go | 7 + test/benchmarks/network/BUILD | 5 +- test/benchmarks/network/httpd_test.go | 276 ++++++++++++++++++++++++++++ test/benchmarks/network/iperf_test.go | 4 +- 8 files changed, 320 insertions(+), 2 deletions(-) create mode 100644 images/benchmarks/ab/Dockerfile create mode 100644 images/benchmarks/httpd/Dockerfile create mode 100644 images/benchmarks/httpd/apache2-tmpdir.conf create mode 100644 test/benchmarks/network/httpd_test.go (limited to 'test') diff --git a/images/benchmarks/ab/Dockerfile b/images/benchmarks/ab/Dockerfile new file mode 100644 index 000000000..10544639b --- /dev/null +++ b/images/benchmarks/ab/Dockerfile @@ -0,0 +1,7 @@ +FROM ubuntu:18.04 + +RUN set -x \ + && apt-get update \ + && apt-get install -y \ + apache2-utils \ + && rm -rf /var/lib/apt/lists/* diff --git a/images/benchmarks/httpd/Dockerfile b/images/benchmarks/httpd/Dockerfile new file mode 100644 index 000000000..b72406012 --- /dev/null +++ b/images/benchmarks/httpd/Dockerfile @@ -0,0 +1,17 @@ +FROM ubuntu:18.04 + +RUN set -x \ + && apt-get update \ + && apt-get install -y \ + apache2 \ + && rm -rf /var/lib/apt/lists/* + +# Generate a bunch of relevant files. +RUN mkdir -p /local && \ + for size in 1 10 100 1000 1024 10240; do \ + dd if=/dev/zero of=/local/latin${size}k.txt count=${size} bs=1024; \ + done + +# Rewrite DocumentRoot to point to /tmp/html instead of the default path. +RUN sed -i 's/DocumentRoot.*\/var\/www\/html$/DocumentRoot \/tmp\/html/' /etc/apache2/sites-enabled/000-default.conf +COPY ./apache2-tmpdir.conf /etc/apache2/sites-enabled/apache2-tmpdir.conf diff --git a/images/benchmarks/httpd/apache2-tmpdir.conf b/images/benchmarks/httpd/apache2-tmpdir.conf new file mode 100644 index 000000000..e33f8d9bb --- /dev/null +++ b/images/benchmarks/httpd/apache2-tmpdir.conf @@ -0,0 +1,5 @@ + + Options Indexes FollowSymLinks + AllowOverride None + Require all granted + \ No newline at end of file diff --git a/test/benchmarks/fs/bazel_test.go b/test/benchmarks/fs/bazel_test.go index aabcdbd87..b7915e19d 100644 --- a/test/benchmarks/fs/bazel_test.go +++ b/test/benchmarks/fs/bazel_test.go @@ -32,6 +32,7 @@ func BenchmarkABSL(b *testing.B) { if err != nil { b.Fatalf("failed to get machine: %v", err) } + defer machine.CleanUp() // Dimensions here are clean/dirty cache (do or don't drop caches) // and if the mount on which we are compiling is a tmpfs/bind mount. diff --git a/test/benchmarks/harness/machine.go b/test/benchmarks/harness/machine.go index 032b387fc..93c0db9ce 100644 --- a/test/benchmarks/harness/machine.go +++ b/test/benchmarks/harness/machine.go @@ -33,6 +33,9 @@ type Machine interface { // Returns IP Address for the machine. IPAddress() (net.IP, error) + + // CleanUp cleans up this machine. + CleanUp() } // localMachine describes this machine. @@ -62,3 +65,7 @@ func (l *localMachine) IPAddress() (net.IP, error) { addr := conn.LocalAddr().(*net.UDPAddr) return addr.IP, nil } + +// CleanUp implements Machine.CleanUp and does nothing for localMachine. +func (*localMachine) CleanUp() { +} diff --git a/test/benchmarks/network/BUILD b/test/benchmarks/network/BUILD index 57328456d..ea78416cf 100644 --- a/test/benchmarks/network/BUILD +++ b/test/benchmarks/network/BUILD @@ -11,7 +11,10 @@ go_library( go_test( name = "network_test", size = "large", - srcs = ["iperf_test.go"], + srcs = [ + "httpd_test.go", + "iperf_test.go", + ], library = ":network", tags = [ # Requires docker and runsc to be configured before test runs. diff --git a/test/benchmarks/network/httpd_test.go b/test/benchmarks/network/httpd_test.go new file mode 100644 index 000000000..f9afdf15f --- /dev/null +++ b/test/benchmarks/network/httpd_test.go @@ -0,0 +1,276 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package network + +import ( + "context" + "fmt" + "regexp" + "strconv" + "testing" + + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/test/benchmarks/harness" +) + +// see Dockerfile '//images/benchmarks/httpd'. +var docs = map[string]string{ + "notfound": "notfound", + "1Kb": "latin1k.txt", + "10Kb": "latin10k.txt", + "100Kb": "latin100k.txt", + "1000Kb": "latin1000k.txt", + "1Mb": "latin1024k.txt", + "10Mb": "latin10240k.txt", +} + +// BenchmarkHttpdConcurrency iterates the concurrency argument and tests +// how well the runtime under test handles requests in parallel. +func BenchmarkHttpdConcurrency(b *testing.B) { + // Grab a machine for the client and server. + clientMachine, err := h.GetMachine() + if err != nil { + b.Fatalf("failed to get client: %v", err) + } + defer clientMachine.CleanUp() + + serverMachine, err := h.GetMachine() + if err != nil { + b.Fatalf("failed to get server: %v", err) + } + defer serverMachine.CleanUp() + + // The test iterates over client concurrency, so set other parameters. + requests := 1000 + concurrency := []int{1, 5, 10, 25} + doc := docs["10Kb"] + + for _, c := range concurrency { + b.Run(fmt.Sprintf("%dConcurrency", c), func(b *testing.B) { + runHttpd(b, clientMachine, serverMachine, doc, requests, c) + }) + } +} + +// BenchmarkHttpdDocSize iterates over different sized payloads, testing how +// well the runtime handles different payload sizes. +func BenchmarkHttpdDocSize(b *testing.B) { + clientMachine, err := h.GetMachine() + if err != nil { + b.Fatalf("failed to get machine: %v", err) + } + defer clientMachine.CleanUp() + + serverMachine, err := h.GetMachine() + if err != nil { + b.Fatalf("failed to get machine: %v", err) + } + defer serverMachine.CleanUp() + + requests := 1000 + concurrency := 1 + + for name, filename := range docs { + b.Run(name, func(b *testing.B) { + runHttpd(b, clientMachine, serverMachine, filename, requests, concurrency) + }) + } +} + +// runHttpd runs a single test run. +func runHttpd(b *testing.B, clientMachine, serverMachine harness.Machine, doc string, requests, concurrency int) { + b.Helper() + + // Grab a container from the server. + ctx := context.Background() + server := serverMachine.GetContainer(ctx, b) + defer server.CleanUp(ctx) + + // Copy the docs to /tmp and serve from there. + cmd := "mkdir -p /tmp/html; cp -r /local /tmp/html/.; apache2 -X" + port := 80 + + // Start the server. + server.Spawn(ctx, dockerutil.RunOpts{ + Image: "benchmarks/httpd", + Ports: []int{port}, + Env: []string{ + // Standard environmental variables for httpd. + "APACHE_RUN_DIR=/tmp", + "APACHE_RUN_USER=nobody", + "APACHE_RUN_GROUP=nogroup", + "APACHE_LOG_DIR=/tmp", + "APACHE_PID_FILE=/tmp/apache.pid", + }, + }, "sh", "-c", cmd) + + ip, err := serverMachine.IPAddress() + if err != nil { + b.Fatalf("failed to find server ip: %v", err) + } + + servingPort, err := server.FindPort(ctx, port) + if err != nil { + b.Fatalf("failed to find server port %d: %v", port, err) + } + + // Check the server is serving. + harness.WaitUntilServing(ctx, clientMachine, ip, servingPort) + + // Grab a client. + client := clientMachine.GetContainer(ctx, b) + defer client.CleanUp(ctx) + + path := fmt.Sprintf("http://%s:%d/%s", ip, servingPort, doc) + // See apachebench (ab) for flags. + cmd = fmt.Sprintf("ab -n %d -c %d %s", requests, concurrency, path) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + out, err := client.Run(ctx, dockerutil.RunOpts{ + Image: "benchmarks/ab", + }, "sh", "-c", cmd) + if err != nil { + b.Fatalf("run failed with: %v", err) + } + + b.StopTimer() + + // Parse and report custom metrics. + transferRate, err := parseTransferRate(out) + if err != nil { + b.Logf("failed to parse transferrate: %v", err) + } + b.ReportMetric(transferRate*1024, "transfer_rate") // Convert from Kb/s to b/s. + + latency, err := parseLatency(out) + if err != nil { + b.Logf("failed to parse latency: %v", err) + } + b.ReportMetric(latency/1000, "mean_latency") // Convert from ms to s. + + reqPerSecond, err := parseRequestsPerSecond(out) + if err != nil { + b.Logf("failed to parse requests per second: %v", err) + } + b.ReportMetric(reqPerSecond, "requests_per_second") + + b.StartTimer() + } +} + +var transferRateRE = regexp.MustCompile(`Transfer rate:\s+(\d+\.?\d+?)\s+\[Kbytes/sec\]\s+received`) + +// parseTransferRate parses transfer rate from apachebench output. +func parseTransferRate(data string) (float64, error) { + match := transferRateRE.FindStringSubmatch(data) + if len(match) < 2 { + return 0, fmt.Errorf("failed get bandwidth: %s", data) + } + return strconv.ParseFloat(match[1], 64) +} + +var latencyRE = regexp.MustCompile(`Total:\s+\d+\s+(\d+)\s+(\d+\.?\d+?)\s+\d+\s+\d+\s`) + +// parseLatency parses latency from apachebench output. +func parseLatency(data string) (float64, error) { + match := latencyRE.FindStringSubmatch(data) + if len(match) < 2 { + return 0, fmt.Errorf("failed get bandwidth: %s", data) + } + return strconv.ParseFloat(match[1], 64) +} + +var requestsPerSecondRE = regexp.MustCompile(`Requests per second:\s+(\d+\.?\d+?)\s+`) + +// parseRequestsPerSecond parses requests per second from apachebench output. +func parseRequestsPerSecond(data string) (float64, error) { + match := requestsPerSecondRE.FindStringSubmatch(data) + if len(match) < 2 { + return 0, fmt.Errorf("failed get bandwidth: %s", data) + } + return strconv.ParseFloat(match[1], 64) +} + +// Sample output from apachebench. +const sampleData = `This is ApacheBench, Version 2.3 <$Revision: 1826891 $> +Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/ +Licensed to The Apache Software Foundation, http://www.apache.org/ + +Benchmarking 10.10.10.10 (be patient).....done + + +Server Software: Apache/2.4.38 +Server Hostname: 10.10.10.10 +Server Port: 80 + +Document Path: /latin10k.txt +Document Length: 210 bytes + +Concurrency Level: 1 +Time taken for tests: 0.180 seconds +Complete requests: 100 +Failed requests: 0 +Non-2xx responses: 100 +Total transferred: 38800 bytes +HTML transferred: 21000 bytes +Requests per second: 556.44 [#/sec] (mean) +Time per request: 1.797 [ms] (mean) +Time per request: 1.797 [ms] (mean, across all concurrent requests) +Transfer rate: 210.84 [Kbytes/sec] received + +Connection Times (ms) + min mean[+/-sd] median max +Connect: 0 0 0.2 0 2 +Processing: 1 2 1.0 1 8 +Waiting: 1 1 1.0 1 7 +Total: 1 2 1.2 1 10 + +Percentage of the requests served within a certain time (ms) + 50% 1 + 66% 2 + 75% 2 + 80% 2 + 90% 2 + 95% 3 + 98% 7 + 99% 10 + 100% 10 (longest request)` + +// TestParsers checks the parsers work. +func TestParsers(t *testing.T) { + want := 210.84 + got, err := parseTransferRate(sampleData) + if err != nil { + t.Fatalf("failed to parse transfer rate with error: %v", err) + } else if got != want { + t.Fatalf("parseTransferRate got: %f, want: %f", got, want) + } + + want = 2.0 + got, err = parseLatency(sampleData) + if err != nil { + t.Fatalf("failed to parse transfer rate with error: %v", err) + } else if got != want { + t.Fatalf("parseLatency got: %f, want: %f", got, want) + } + + want = 556.44 + got, err = parseRequestsPerSecond(sampleData) + if err != nil { + t.Fatalf("failed to parse transfer rate with error: %v", err) + } else if got != want { + t.Fatalf("parseRequestsPerSecond got: %f, want: %f", got, want) + } +} diff --git a/test/benchmarks/network/iperf_test.go b/test/benchmarks/network/iperf_test.go index 72e9c99a8..48cc9dd8f 100644 --- a/test/benchmarks/network/iperf_test.go +++ b/test/benchmarks/network/iperf_test.go @@ -35,11 +35,13 @@ func BenchmarkIperf(b *testing.B) { if err != nil { b.Fatalf("failed to get machine: %v", err) } + defer clientMachine.CleanUp() serverMachine, err := h.GetMachine() if err != nil { b.Fatalf("failed to get machine: %v", err) } + defer serverMachine.CleanUp() for _, bm := range []struct { name string @@ -111,7 +113,7 @@ func BenchmarkIperf(b *testing.B) { if err != nil { b.Fatalf("failed to parse bandwitdth from %s: %v", out, err) } - b.ReportMetric(bW, "KBytes/sec") + b.ReportMetric(bW*1024, "bandwidth") // Convert from Kb/s to b/s. b.StartTimer() } }) -- cgit v1.2.3 From e6894cb99fe4c6e8599354bf5bc6a72a79b63fd3 Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Thu, 16 Jul 2020 14:35:39 -0700 Subject: Port runtime tests to use go_test PiperOrigin-RevId: 321647645 --- test/runtimes/BUILD | 58 +++++++-- test/runtimes/defs.bzl | 79 ++++--------- test/runtimes/exclude/go1.12.csv | 16 +++ test/runtimes/exclude/java11.csv | 126 ++++++++++++++++++++ test/runtimes/exclude/nodejs12.4.0.csv | 50 ++++++++ test/runtimes/exclude/php7.3.6.csv | 35 ++++++ test/runtimes/exclude/python3.7.3.csv | 27 +++++ test/runtimes/exclude_go1.12.csv | 16 --- test/runtimes/exclude_java11.csv | 126 -------------------- test/runtimes/exclude_nodejs12.4.0.csv | 50 -------- test/runtimes/exclude_php7.3.6.csv | 35 ------ test/runtimes/exclude_python3.7.3.csv | 27 ----- test/runtimes/exclude_test.go | 37 ++++++ test/runtimes/runner.go | 209 +++++++++++++++++++++++++++++++++ test/runtimes/runner/BUILD | 22 ---- test/runtimes/runner/exclude_test.go | 37 ------ test/runtimes/runner/main.go | 205 -------------------------------- 17 files changed, 572 insertions(+), 583 deletions(-) create mode 100644 test/runtimes/exclude/go1.12.csv create mode 100644 test/runtimes/exclude/java11.csv create mode 100644 test/runtimes/exclude/nodejs12.4.0.csv create mode 100644 test/runtimes/exclude/php7.3.6.csv create mode 100644 test/runtimes/exclude/python3.7.3.csv delete mode 100644 test/runtimes/exclude_go1.12.csv delete mode 100644 test/runtimes/exclude_java11.csv delete mode 100644 test/runtimes/exclude_nodejs12.4.0.csv delete mode 100644 test/runtimes/exclude_php7.3.6.csv delete mode 100644 test/runtimes/exclude_python3.7.3.csv create mode 100644 test/runtimes/exclude_test.go create mode 100644 test/runtimes/runner.go delete mode 100644 test/runtimes/runner/BUILD delete mode 100644 test/runtimes/runner/exclude_test.go delete mode 100644 test/runtimes/runner/main.go (limited to 'test') diff --git a/test/runtimes/BUILD b/test/runtimes/BUILD index f98d02e00..aeefa8f56 100644 --- a/test/runtimes/BUILD +++ b/test/runtimes/BUILD @@ -1,38 +1,82 @@ -load("//test/runtimes:defs.bzl", "runtime_test") +load("//test/runtimes:defs.bzl", "exclude_test", "runtime_test") package(licenses = ["notice"]) +go_binary( + name = "runner", + testonly = 1, + srcs = ["runner.go"], + visibility = ["//test/runtimes:__pkg__"], + deps = [ + "//pkg/log", + "//pkg/test/dockerutil", + "//pkg/test/testutil", + ], +) + runtime_test( name = "go1.12", - exclude_file = "exclude_go1.12.csv", + exclude_file = "exclude/go1.12.csv", lang = "go", shard_count = 5, ) runtime_test( name = "java11", - exclude_file = "exclude_java11.csv", + exclude_file = "exclude/java11.csv", lang = "java", - shard_count = 10, + shard_count = 5, ) runtime_test( name = "nodejs12.4.0", - exclude_file = "exclude_nodejs12.4.0.csv", + exclude_file = "exclude/nodejs12.4.0.csv", lang = "nodejs", shard_count = 5, ) runtime_test( name = "php7.3.6", - exclude_file = "exclude_php7.3.6.csv", + exclude_file = "exclude/php7.3.6.csv", lang = "php", shard_count = 5, ) runtime_test( name = "python3.7.3", - exclude_file = "exclude_python3.7.3.csv", + exclude_file = "exclude/python3.7.3.csv", lang = "python", shard_count = 5, ) + +go_test( + name = "exclude_test", + size = "small", + srcs = ["exclude_test.go"], + library = ":runner", +) + +exclude_test( + name = "go", + exclude_file = "exclude/go1.12.csv", +) + +exclude_test( + name = "java", + exclude_file = "exclude/java11.csv", +) + +exclude_test( + name = "nodejs", + exclude_file = "exclude/nodejs12.4.0.csv", +) + +exclude_test( + name = "php", + exclude_file = "exclude/php7.3.6.csv", +) + +exclude_test( + name = "python", + exclude_file = "exclude/python3.7.3.csv", +) diff --git a/test/runtimes/defs.bzl b/test/runtimes/defs.bzl index dc3667f05..3eb494958 100644 --- a/test/runtimes/defs.bzl +++ b/test/runtimes/defs.bzl @@ -2,69 +2,32 @@ load("//tools:defs.bzl", "go_test") -def _runtime_test_impl(ctx): - # Construct arguments. - args = [ - "--lang", - ctx.attr.lang, - "--image", - ctx.attr.image, - ] - if ctx.attr.exclude_file: - args += [ - "--exclude_file", - ctx.files.exclude_file[0].short_path, - ] - - # Build a runner. - runner = ctx.actions.declare_file("%s-executer" % ctx.label.name) - runner_content = "\n".join([ - "#!/bin/bash", - "%s %s\n" % (ctx.files._runner[0].short_path, " ".join(args)), - ]) - ctx.actions.write(runner, runner_content, is_executable = True) - - # Return the runner. - return [DefaultInfo( - executable = runner, - runfiles = ctx.runfiles( - files = ctx.files._runner + ctx.files.exclude_file + ctx.files._proctor, - collect_default = True, - collect_data = True, - ), - )] - -_runtime_test = rule( - implementation = _runtime_test_impl, - attrs = { - "image": attr.string( - mandatory = False, - ), - "lang": attr.string( - mandatory = True, - ), - "exclude_file": attr.label( - mandatory = False, - allow_single_file = True, - ), - "_runner": attr.label( - default = "//test/runtimes/runner:runner", - ), - "_proctor": attr.label( - default = "//test/runtimes/proctor:proctor", - ), - }, - test = True, -) - -def runtime_test(name, **kwargs): - _runtime_test( +def runtime_test(name, lang, exclude_file, **kwargs): + go_test( name = name, - image = name, # Resolved as images/runtimes/%s. + srcs = ["runner.go"], + args = [ + "--lang", + lang, + "--image", + name, # Resolved as images/runtimes/%s. + "--exclude_file", + "test/runtimes/" + exclude_file, + ], + data = [ + exclude_file, + "//test/runtimes/proctor", + ], + defines_main = 1, tags = [ "local", "manual", ], + deps = [ + "//pkg/log", + "//pkg/test/dockerutil", + "//pkg/test/testutil", + ], **kwargs ) diff --git a/test/runtimes/exclude/go1.12.csv b/test/runtimes/exclude/go1.12.csv new file mode 100644 index 000000000..8c8ae0c5d --- /dev/null +++ b/test/runtimes/exclude/go1.12.csv @@ -0,0 +1,16 @@ +test name,bug id,comment +cgo_errors,,FLAKY +cgo_test,,FLAKY +go_test:cmd/go,,FLAKY +go_test:cmd/vendor/golang.org/x/sys/unix,b/118783622,/dev devices missing +go_test:net,b/118784196,socket: invalid argument. Works as intended: see bug. +go_test:os,b/118780122,we have a pollable filesystem but that's a surprise +go_test:os/signal,b/118780860,/dev/pts not properly supported +go_test:runtime,b/118782341,sigtrap not reported or caught or something +go_test:syscall,b/118781998,bad bytes -- bad mem addr +race,b/118782931,thread sanitizer. Works as intended: b/62219744. +runtime:cpu124,b/118778254,segmentation fault +test:0_1,,FLAKY +testasan,, +testcarchive,b/118782924,no sigpipe +testshared,,FLAKY diff --git a/test/runtimes/exclude/java11.csv b/test/runtimes/exclude/java11.csv new file mode 100644 index 000000000..c012e5a56 --- /dev/null +++ b/test/runtimes/exclude/java11.csv @@ -0,0 +1,126 @@ +test name,bug id,comment +com/sun/crypto/provider/Cipher/PBE/PKCS12Cipher.java,,Fails in Docker +com/sun/jdi/NashornPopFrameTest.java,, +com/sun/jdi/ProcessAttachTest.java,, +com/sun/management/HotSpotDiagnosticMXBean/CheckOrigin.java,,Fails in Docker +com/sun/management/OperatingSystemMXBean/GetCommittedVirtualMemorySize.java,, +com/sun/management/UnixOperatingSystemMXBean/GetMaxFileDescriptorCount.sh,, +com/sun/tools/attach/AttachSelf.java,, +com/sun/tools/attach/BasicTests.java,, +com/sun/tools/attach/PermissionTest.java,, +com/sun/tools/attach/StartManagementAgent.java,, +com/sun/tools/attach/TempDirTest.java,, +com/sun/tools/attach/modules/Driver.java,, +java/lang/Character/CheckScript.java,,Fails in Docker +java/lang/Character/CheckUnicode.java,,Fails in Docker +java/lang/Class/GetPackageBootLoaderChildLayer.java,, +java/lang/ClassLoader/nativeLibrary/NativeLibraryTest.java,,Fails in Docker +java/lang/String/nativeEncoding/StringPlatformChars.java,, +java/net/DatagramSocket/ReuseAddressTest.java,, +java/net/DatagramSocket/SendDatagramToBadAddress.java,b/78473345, +java/net/Inet4Address/PingThis.java,, +java/net/InterfaceAddress/NetworkPrefixLength.java,b/78507103, +java/net/MulticastSocket/MulticastTTL.java,, +java/net/MulticastSocket/Promiscuous.java,, +java/net/MulticastSocket/SetLoopbackMode.java,, +java/net/MulticastSocket/SetTTLAndGetTTL.java,, +java/net/MulticastSocket/Test.java,, +java/net/MulticastSocket/TestDefaults.java,, +java/net/MulticastSocket/TimeToLive.java,, +java/net/NetworkInterface/NetworkInterfaceStreamTest.java,, +java/net/Socket/SetSoLinger.java,b/78527327,SO_LINGER is not yet supported +java/net/Socket/TrafficClass.java,b/78527818,Not supported on gVisor +java/net/Socket/UrgentDataTest.java,b/111515323, +java/net/Socket/setReuseAddress/Basic.java,b/78519214,SO_REUSEADDR enabled by default +java/net/SocketOption/OptionsTest.java,,Fails in Docker +java/net/SocketOption/TcpKeepAliveTest.java,, +java/net/SocketPermission/SocketPermissionTest.java,, +java/net/URLConnection/6212146/TestDriver.java,,Fails in Docker +java/net/httpclient/RequestBuilderTest.java,,Fails in Docker +java/net/httpclient/ShortResponseBody.java,, +java/net/httpclient/ShortResponseBodyWithRetry.java,, +java/nio/channels/AsyncCloseAndInterrupt.java,, +java/nio/channels/AsynchronousServerSocketChannel/Basic.java,, +java/nio/channels/AsynchronousSocketChannel/Basic.java,b/77921528,SO_KEEPALIVE is not settable +java/nio/channels/DatagramChannel/BasicMulticastTests.java,, +java/nio/channels/DatagramChannel/SocketOptionTests.java,,Fails in Docker +java/nio/channels/DatagramChannel/UseDGWithIPv6.java,, +java/nio/channels/FileChannel/directio/DirectIOTest.java,,Fails in Docker +java/nio/channels/Selector/OutOfBand.java,, +java/nio/channels/Selector/SelectWithConsumer.java,,Flaky +java/nio/channels/ServerSocketChannel/SocketOptionTests.java,, +java/nio/channels/SocketChannel/LingerOnClose.java,, +java/nio/channels/SocketChannel/SocketOptionTests.java,b/77965901, +java/nio/channels/spi/SelectorProvider/inheritedChannel/InheritedChannelTest.java,,Fails in Docker +java/rmi/activation/Activatable/extLoadedImpl/ext.sh,, +java/rmi/transport/checkLeaseInfoLeak/CheckLeaseLeak.java,, +java/text/Format/NumberFormat/CurrencyFormat.java,,Fails in Docker +java/text/Format/NumberFormat/CurrencyFormat.java,,Fails in Docker +java/util/Calendar/JapaneseEraNameTest.java,, +java/util/Currency/CurrencyTest.java,,Fails in Docker +java/util/Currency/ValidateISO4217.java,,Fails in Docker +java/util/Locale/LSRDataTest.java,, +java/util/concurrent/locks/Lock/TimedAcquireLeak.java,, +java/util/jar/JarFile/mrjar/MultiReleaseJarAPI.java,,Fails in Docker +java/util/logging/LogManager/Configuration/updateConfiguration/SimpleUpdateConfigWithInputStreamTest.java,, +java/util/logging/TestLoggerWeakRefLeak.java,, +javax/imageio/AppletResourceTest.java,, +javax/management/security/HashedPasswordFileTest.java,, +javax/net/ssl/SSLSession/JSSERenegotiate.java,,Fails in Docker +javax/sound/sampled/AudioInputStream/FrameLengthAfterConversion.java,, +jdk/jfr/event/runtime/TestNetworkUtilizationEvent.java,, +jdk/jfr/event/runtime/TestThreadParkEvent.java,, +jdk/jfr/event/sampling/TestNative.java,, +jdk/jfr/jcmd/TestJcmdChangeLogLevel.java,, +jdk/jfr/jcmd/TestJcmdConfigure.java,, +jdk/jfr/jcmd/TestJcmdDump.java,, +jdk/jfr/jcmd/TestJcmdDumpGeneratedFilename.java,, +jdk/jfr/jcmd/TestJcmdDumpLimited.java,, +jdk/jfr/jcmd/TestJcmdDumpPathToGCRoots.java,, +jdk/jfr/jcmd/TestJcmdLegacy.java,, +jdk/jfr/jcmd/TestJcmdSaveToFile.java,, +jdk/jfr/jcmd/TestJcmdStartDirNotExist.java,, +jdk/jfr/jcmd/TestJcmdStartInvaldFile.java,, +jdk/jfr/jcmd/TestJcmdStartPathToGCRoots.java,, +jdk/jfr/jcmd/TestJcmdStartStopDefault.java,, +jdk/jfr/jcmd/TestJcmdStartWithOptions.java,, +jdk/jfr/jcmd/TestJcmdStartWithSettings.java,, +jdk/jfr/jcmd/TestJcmdStopInvalidFile.java,, +jdk/jfr/jvm/TestJfrJavaBase.java,, +jdk/jfr/startupargs/TestStartRecording.java,, +jdk/modules/incubator/ImageModules.java,, +jdk/net/Sockets/ExtOptionTest.java,, +jdk/net/Sockets/QuickAckTest.java,, +lib/security/cacerts/VerifyCACerts.java,, +sun/management/jmxremote/bootstrap/CustomLauncherTest.java,, +sun/management/jmxremote/bootstrap/JvmstatCountersTest.java,, +sun/management/jmxremote/bootstrap/LocalManagementTest.java,, +sun/management/jmxremote/bootstrap/RmiRegistrySslTest.java,, +sun/management/jmxremote/bootstrap/RmiSslBootstrapTest.sh,, +sun/management/jmxremote/startstop/JMXStartStopTest.java,, +sun/management/jmxremote/startstop/JMXStatusPerfCountersTest.java,, +sun/management/jmxremote/startstop/JMXStatusTest.java,, +sun/text/resources/LocaleDataTest.java,, +sun/tools/jcmd/TestJcmdSanity.java,, +sun/tools/jhsdb/AlternateHashingTest.java,, +sun/tools/jhsdb/BasicLauncherTest.java,, +sun/tools/jhsdb/HeapDumpTest.java,, +sun/tools/jhsdb/heapconfig/JMapHeapConfigTest.java,, +sun/tools/jinfo/BasicJInfoTest.java,, +sun/tools/jinfo/JInfoTest.java,, +sun/tools/jmap/BasicJMapTest.java,, +sun/tools/jstack/BasicJStackTest.java,, +sun/tools/jstack/DeadlockDetectionTest.java,, +sun/tools/jstatd/TestJstatdExternalRegistry.java,, +sun/tools/jstatd/TestJstatdPort.java,,Flaky +sun/tools/jstatd/TestJstatdPortAndServer.java,,Flaky +sun/util/calendar/zi/TestZoneInfo310.java,, +tools/jar/modularJar/Basic.java,, +tools/jar/multiRelease/Basic.java,, +tools/jimage/JImageExtractTest.java,, +tools/jimage/JImageTest.java,, +tools/jlink/JLinkTest.java,, +tools/jlink/plugins/IncludeLocalesPluginTest.java,, +tools/jmod/hashes/HashesTest.java,, +tools/launcher/BigJar.java,b/111611473, +tools/launcher/modules/patch/systemmodules/PatchSystemModules.java,, diff --git a/test/runtimes/exclude/nodejs12.4.0.csv b/test/runtimes/exclude/nodejs12.4.0.csv new file mode 100644 index 000000000..e7edfa0a5 --- /dev/null +++ b/test/runtimes/exclude/nodejs12.4.0.csv @@ -0,0 +1,50 @@ +test name,bug id,comment +benchmark/test-benchmark-fs.js,, +benchmark/test-benchmark-module.js,, +benchmark/test-benchmark-napi.js,, +doctool/test-make-doc.js,b/68848110,Expected to fail. +fixtures/test-error-first-line-offset.js,, +fixtures/test-fs-readfile-error.js,, +fixtures/test-fs-stat-sync-overflow.js,, +internet/test-dgram-broadcast-multi-process.js,, +internet/test-dgram-multicast-multi-process.js,, +internet/test-dgram-multicast-set-interface-lo.js,, +internet/test-doctool-versions.js,, +internet/test-uv-threadpool-schedule.js,, +parallel/test-cluster-dgram-reuse.js,b/64024294, +parallel/test-dgram-bind-fd.js,b/132447356, +parallel/test-dgram-create-socket-handle-fd.js,b/132447238, +parallel/test-dgram-createSocket-type.js,b/68847739, +parallel/test-dgram-socket-buffer-size.js,b/68847921, +parallel/test-fs-access.js,, +parallel/test-fs-write-stream-double-close.js,, +parallel/test-fs-write-stream-throw-type-error.js,b/110226209, +parallel/test-fs-write-stream.js,, +parallel/test-http2-respond-file-error-pipe-offset.js,, +parallel/test-os.js,, +parallel/test-process-uid-gid.js,, +pseudo-tty/test-assert-colors.js,, +pseudo-tty/test-assert-no-color.js,, +pseudo-tty/test-assert-position-indicator.js,, +pseudo-tty/test-async-wrap-getasyncid-tty.js,, +pseudo-tty/test-fatal-error.js,, +pseudo-tty/test-handle-wrap-isrefed-tty.js,, +pseudo-tty/test-readable-tty-keepalive.js,, +pseudo-tty/test-set-raw-mode-reset-process-exit.js,, +pseudo-tty/test-set-raw-mode-reset-signal.js,, +pseudo-tty/test-set-raw-mode-reset.js,, +pseudo-tty/test-stderr-stdout-handle-sigwinch.js,, +pseudo-tty/test-stdout-read.js,, +pseudo-tty/test-tty-color-support.js,, +pseudo-tty/test-tty-isatty.js,, +pseudo-tty/test-tty-stdin-call-end.js,, +pseudo-tty/test-tty-stdin-end.js,, +pseudo-tty/test-stdin-write.js,, +pseudo-tty/test-tty-stdout-end.js,, +pseudo-tty/test-tty-stdout-resize.js,, +pseudo-tty/test-tty-stream-constructors.js,, +pseudo-tty/test-tty-window-size.js,, +pseudo-tty/test-tty-wrap.js,, +pummel/test-net-pingpong.js,, +pummel/test-vm-memleak.js,, +tick-processor/test-tick-processor-builtin.js,, diff --git a/test/runtimes/exclude/php7.3.6.csv b/test/runtimes/exclude/php7.3.6.csv new file mode 100644 index 000000000..f3606bfe8 --- /dev/null +++ b/test/runtimes/exclude/php7.3.6.csv @@ -0,0 +1,35 @@ +test name,bug id,comment +ext/intl/tests/bug77895.phpt,, +ext/intl/tests/dateformat_bug65683_2.phpt,, +ext/mbstring/tests/bug76319.phpt,, +ext/mbstring/tests/bug76958.phpt,, +ext/mbstring/tests/bug77025.phpt,, +ext/mbstring/tests/bug77165.phpt,, +ext/mbstring/tests/bug77454.phpt,, +ext/mbstring/tests/mb_convert_encoding_leak.phpt,, +ext/mbstring/tests/mb_strrpos_encoding_3rd_param.phpt,, +ext/session/tests/session_set_save_handler_class_018.phpt,, +ext/session/tests/session_set_save_handler_iface_003.phpt,, +ext/session/tests/session_set_save_handler_variation4.phpt,, +ext/standard/tests/file/filetype_variation.phpt,, +ext/standard/tests/file/fopen_variation19.phpt,, +ext/standard/tests/file/php_fd_wrapper_01.phpt,, +ext/standard/tests/file/php_fd_wrapper_02.phpt,, +ext/standard/tests/file/php_fd_wrapper_03.phpt,, +ext/standard/tests/file/php_fd_wrapper_04.phpt,, +ext/standard/tests/file/realpath_bug77484.phpt,, +ext/standard/tests/file/rename_variation.phpt,b/68717309, +ext/standard/tests/file/symlink_link_linkinfo_is_link_variation4.phpt,, +ext/standard/tests/file/symlink_link_linkinfo_is_link_variation8.phpt,, +ext/standard/tests/general_functions/escapeshellarg_bug71270.phpt,, +ext/standard/tests/general_functions/escapeshellcmd_bug71270.phpt,, +ext/standard/tests/network/bug20134.phpt,, +ext/standard/tests/streams/stream_socket_sendto.phpt,, +ext/standard/tests/strings/007.phpt,, +tests/output/stream_isatty_err.phpt,b/68720279, +tests/output/stream_isatty_in-err.phpt,b/68720282, +tests/output/stream_isatty_in-out-err.phpt,, +tests/output/stream_isatty_in-out.phpt,b/68720299, +tests/output/stream_isatty_out-err.phpt,b/68720311, +tests/output/stream_isatty_out.phpt,b/68720325, +Zend/tests/concat_003.phpt,, diff --git a/test/runtimes/exclude/python3.7.3.csv b/test/runtimes/exclude/python3.7.3.csv new file mode 100644 index 000000000..2b9947212 --- /dev/null +++ b/test/runtimes/exclude/python3.7.3.csv @@ -0,0 +1,27 @@ +test name,bug id,comment +test_asynchat,b/76031995,SO_REUSEADDR +test_asyncio,,Fails on Docker. +test_asyncore,b/76031995,SO_REUSEADDR +test_epoll,, +test_fcntl,,fcntl invalid argument -- artificial test to make sure something works in 64 bit mode. +test_ftplib,,Fails in Docker +test_httplib,b/76031995,SO_REUSEADDR +test_imaplib,, +test_logging,, +test_multiprocessing_fork,,Flaky. Sometimes times out. +test_multiprocessing_forkserver,,Flaky. Sometimes times out. +test_multiprocessing_main_handling,,Flaky. Sometimes times out. +test_multiprocessing_spawn,,Flaky. Sometimes times out. +test_nntplib,b/76031995,tests should not set SO_REUSEADDR +test_poplib,,Fails on Docker +test_posix,b/76174079,posix.sched_get_priority_min not implemented + posix.sched_rr_get_interval not permitted +test_pty,b/76157709,out of pty devices +test_readline,b/76157709,out of pty devices +test_resource,b/76174079, +test_selectors,b/76116849,OSError not raised with epoll +test_smtplib,b/76031995,SO_REUSEADDR and unclosed sockets +test_socket,b/75983380, +test_ssl,b/76031995,SO_REUSEADDR +test_subprocess,, +test_support,b/76031995,SO_REUSEADDR +test_telnetlib,b/76031995,SO_REUSEADDR diff --git a/test/runtimes/exclude_go1.12.csv b/test/runtimes/exclude_go1.12.csv deleted file mode 100644 index 8c8ae0c5d..000000000 --- a/test/runtimes/exclude_go1.12.csv +++ /dev/null @@ -1,16 +0,0 @@ -test name,bug id,comment -cgo_errors,,FLAKY -cgo_test,,FLAKY -go_test:cmd/go,,FLAKY -go_test:cmd/vendor/golang.org/x/sys/unix,b/118783622,/dev devices missing -go_test:net,b/118784196,socket: invalid argument. Works as intended: see bug. -go_test:os,b/118780122,we have a pollable filesystem but that's a surprise -go_test:os/signal,b/118780860,/dev/pts not properly supported -go_test:runtime,b/118782341,sigtrap not reported or caught or something -go_test:syscall,b/118781998,bad bytes -- bad mem addr -race,b/118782931,thread sanitizer. Works as intended: b/62219744. -runtime:cpu124,b/118778254,segmentation fault -test:0_1,,FLAKY -testasan,, -testcarchive,b/118782924,no sigpipe -testshared,,FLAKY diff --git a/test/runtimes/exclude_java11.csv b/test/runtimes/exclude_java11.csv deleted file mode 100644 index c012e5a56..000000000 --- a/test/runtimes/exclude_java11.csv +++ /dev/null @@ -1,126 +0,0 @@ -test name,bug id,comment -com/sun/crypto/provider/Cipher/PBE/PKCS12Cipher.java,,Fails in Docker -com/sun/jdi/NashornPopFrameTest.java,, -com/sun/jdi/ProcessAttachTest.java,, -com/sun/management/HotSpotDiagnosticMXBean/CheckOrigin.java,,Fails in Docker -com/sun/management/OperatingSystemMXBean/GetCommittedVirtualMemorySize.java,, -com/sun/management/UnixOperatingSystemMXBean/GetMaxFileDescriptorCount.sh,, -com/sun/tools/attach/AttachSelf.java,, -com/sun/tools/attach/BasicTests.java,, -com/sun/tools/attach/PermissionTest.java,, -com/sun/tools/attach/StartManagementAgent.java,, -com/sun/tools/attach/TempDirTest.java,, -com/sun/tools/attach/modules/Driver.java,, -java/lang/Character/CheckScript.java,,Fails in Docker -java/lang/Character/CheckUnicode.java,,Fails in Docker -java/lang/Class/GetPackageBootLoaderChildLayer.java,, -java/lang/ClassLoader/nativeLibrary/NativeLibraryTest.java,,Fails in Docker -java/lang/String/nativeEncoding/StringPlatformChars.java,, -java/net/DatagramSocket/ReuseAddressTest.java,, -java/net/DatagramSocket/SendDatagramToBadAddress.java,b/78473345, -java/net/Inet4Address/PingThis.java,, -java/net/InterfaceAddress/NetworkPrefixLength.java,b/78507103, -java/net/MulticastSocket/MulticastTTL.java,, -java/net/MulticastSocket/Promiscuous.java,, -java/net/MulticastSocket/SetLoopbackMode.java,, -java/net/MulticastSocket/SetTTLAndGetTTL.java,, -java/net/MulticastSocket/Test.java,, -java/net/MulticastSocket/TestDefaults.java,, -java/net/MulticastSocket/TimeToLive.java,, -java/net/NetworkInterface/NetworkInterfaceStreamTest.java,, -java/net/Socket/SetSoLinger.java,b/78527327,SO_LINGER is not yet supported -java/net/Socket/TrafficClass.java,b/78527818,Not supported on gVisor -java/net/Socket/UrgentDataTest.java,b/111515323, -java/net/Socket/setReuseAddress/Basic.java,b/78519214,SO_REUSEADDR enabled by default -java/net/SocketOption/OptionsTest.java,,Fails in Docker -java/net/SocketOption/TcpKeepAliveTest.java,, -java/net/SocketPermission/SocketPermissionTest.java,, -java/net/URLConnection/6212146/TestDriver.java,,Fails in Docker -java/net/httpclient/RequestBuilderTest.java,,Fails in Docker -java/net/httpclient/ShortResponseBody.java,, -java/net/httpclient/ShortResponseBodyWithRetry.java,, -java/nio/channels/AsyncCloseAndInterrupt.java,, -java/nio/channels/AsynchronousServerSocketChannel/Basic.java,, -java/nio/channels/AsynchronousSocketChannel/Basic.java,b/77921528,SO_KEEPALIVE is not settable -java/nio/channels/DatagramChannel/BasicMulticastTests.java,, -java/nio/channels/DatagramChannel/SocketOptionTests.java,,Fails in Docker -java/nio/channels/DatagramChannel/UseDGWithIPv6.java,, -java/nio/channels/FileChannel/directio/DirectIOTest.java,,Fails in Docker -java/nio/channels/Selector/OutOfBand.java,, -java/nio/channels/Selector/SelectWithConsumer.java,,Flaky -java/nio/channels/ServerSocketChannel/SocketOptionTests.java,, -java/nio/channels/SocketChannel/LingerOnClose.java,, -java/nio/channels/SocketChannel/SocketOptionTests.java,b/77965901, -java/nio/channels/spi/SelectorProvider/inheritedChannel/InheritedChannelTest.java,,Fails in Docker -java/rmi/activation/Activatable/extLoadedImpl/ext.sh,, -java/rmi/transport/checkLeaseInfoLeak/CheckLeaseLeak.java,, -java/text/Format/NumberFormat/CurrencyFormat.java,,Fails in Docker -java/text/Format/NumberFormat/CurrencyFormat.java,,Fails in Docker -java/util/Calendar/JapaneseEraNameTest.java,, -java/util/Currency/CurrencyTest.java,,Fails in Docker -java/util/Currency/ValidateISO4217.java,,Fails in Docker -java/util/Locale/LSRDataTest.java,, -java/util/concurrent/locks/Lock/TimedAcquireLeak.java,, -java/util/jar/JarFile/mrjar/MultiReleaseJarAPI.java,,Fails in Docker -java/util/logging/LogManager/Configuration/updateConfiguration/SimpleUpdateConfigWithInputStreamTest.java,, -java/util/logging/TestLoggerWeakRefLeak.java,, -javax/imageio/AppletResourceTest.java,, -javax/management/security/HashedPasswordFileTest.java,, -javax/net/ssl/SSLSession/JSSERenegotiate.java,,Fails in Docker -javax/sound/sampled/AudioInputStream/FrameLengthAfterConversion.java,, -jdk/jfr/event/runtime/TestNetworkUtilizationEvent.java,, -jdk/jfr/event/runtime/TestThreadParkEvent.java,, -jdk/jfr/event/sampling/TestNative.java,, -jdk/jfr/jcmd/TestJcmdChangeLogLevel.java,, -jdk/jfr/jcmd/TestJcmdConfigure.java,, -jdk/jfr/jcmd/TestJcmdDump.java,, -jdk/jfr/jcmd/TestJcmdDumpGeneratedFilename.java,, -jdk/jfr/jcmd/TestJcmdDumpLimited.java,, -jdk/jfr/jcmd/TestJcmdDumpPathToGCRoots.java,, -jdk/jfr/jcmd/TestJcmdLegacy.java,, -jdk/jfr/jcmd/TestJcmdSaveToFile.java,, -jdk/jfr/jcmd/TestJcmdStartDirNotExist.java,, -jdk/jfr/jcmd/TestJcmdStartInvaldFile.java,, -jdk/jfr/jcmd/TestJcmdStartPathToGCRoots.java,, -jdk/jfr/jcmd/TestJcmdStartStopDefault.java,, -jdk/jfr/jcmd/TestJcmdStartWithOptions.java,, -jdk/jfr/jcmd/TestJcmdStartWithSettings.java,, -jdk/jfr/jcmd/TestJcmdStopInvalidFile.java,, -jdk/jfr/jvm/TestJfrJavaBase.java,, -jdk/jfr/startupargs/TestStartRecording.java,, -jdk/modules/incubator/ImageModules.java,, -jdk/net/Sockets/ExtOptionTest.java,, -jdk/net/Sockets/QuickAckTest.java,, -lib/security/cacerts/VerifyCACerts.java,, -sun/management/jmxremote/bootstrap/CustomLauncherTest.java,, -sun/management/jmxremote/bootstrap/JvmstatCountersTest.java,, -sun/management/jmxremote/bootstrap/LocalManagementTest.java,, -sun/management/jmxremote/bootstrap/RmiRegistrySslTest.java,, -sun/management/jmxremote/bootstrap/RmiSslBootstrapTest.sh,, -sun/management/jmxremote/startstop/JMXStartStopTest.java,, -sun/management/jmxremote/startstop/JMXStatusPerfCountersTest.java,, -sun/management/jmxremote/startstop/JMXStatusTest.java,, -sun/text/resources/LocaleDataTest.java,, -sun/tools/jcmd/TestJcmdSanity.java,, -sun/tools/jhsdb/AlternateHashingTest.java,, -sun/tools/jhsdb/BasicLauncherTest.java,, -sun/tools/jhsdb/HeapDumpTest.java,, -sun/tools/jhsdb/heapconfig/JMapHeapConfigTest.java,, -sun/tools/jinfo/BasicJInfoTest.java,, -sun/tools/jinfo/JInfoTest.java,, -sun/tools/jmap/BasicJMapTest.java,, -sun/tools/jstack/BasicJStackTest.java,, -sun/tools/jstack/DeadlockDetectionTest.java,, -sun/tools/jstatd/TestJstatdExternalRegistry.java,, -sun/tools/jstatd/TestJstatdPort.java,,Flaky -sun/tools/jstatd/TestJstatdPortAndServer.java,,Flaky -sun/util/calendar/zi/TestZoneInfo310.java,, -tools/jar/modularJar/Basic.java,, -tools/jar/multiRelease/Basic.java,, -tools/jimage/JImageExtractTest.java,, -tools/jimage/JImageTest.java,, -tools/jlink/JLinkTest.java,, -tools/jlink/plugins/IncludeLocalesPluginTest.java,, -tools/jmod/hashes/HashesTest.java,, -tools/launcher/BigJar.java,b/111611473, -tools/launcher/modules/patch/systemmodules/PatchSystemModules.java,, diff --git a/test/runtimes/exclude_nodejs12.4.0.csv b/test/runtimes/exclude_nodejs12.4.0.csv deleted file mode 100644 index e7edfa0a5..000000000 --- a/test/runtimes/exclude_nodejs12.4.0.csv +++ /dev/null @@ -1,50 +0,0 @@ -test name,bug id,comment -benchmark/test-benchmark-fs.js,, -benchmark/test-benchmark-module.js,, -benchmark/test-benchmark-napi.js,, -doctool/test-make-doc.js,b/68848110,Expected to fail. -fixtures/test-error-first-line-offset.js,, -fixtures/test-fs-readfile-error.js,, -fixtures/test-fs-stat-sync-overflow.js,, -internet/test-dgram-broadcast-multi-process.js,, -internet/test-dgram-multicast-multi-process.js,, -internet/test-dgram-multicast-set-interface-lo.js,, -internet/test-doctool-versions.js,, -internet/test-uv-threadpool-schedule.js,, -parallel/test-cluster-dgram-reuse.js,b/64024294, -parallel/test-dgram-bind-fd.js,b/132447356, -parallel/test-dgram-create-socket-handle-fd.js,b/132447238, -parallel/test-dgram-createSocket-type.js,b/68847739, -parallel/test-dgram-socket-buffer-size.js,b/68847921, -parallel/test-fs-access.js,, -parallel/test-fs-write-stream-double-close.js,, -parallel/test-fs-write-stream-throw-type-error.js,b/110226209, -parallel/test-fs-write-stream.js,, -parallel/test-http2-respond-file-error-pipe-offset.js,, -parallel/test-os.js,, -parallel/test-process-uid-gid.js,, -pseudo-tty/test-assert-colors.js,, -pseudo-tty/test-assert-no-color.js,, -pseudo-tty/test-assert-position-indicator.js,, -pseudo-tty/test-async-wrap-getasyncid-tty.js,, -pseudo-tty/test-fatal-error.js,, -pseudo-tty/test-handle-wrap-isrefed-tty.js,, -pseudo-tty/test-readable-tty-keepalive.js,, -pseudo-tty/test-set-raw-mode-reset-process-exit.js,, -pseudo-tty/test-set-raw-mode-reset-signal.js,, -pseudo-tty/test-set-raw-mode-reset.js,, -pseudo-tty/test-stderr-stdout-handle-sigwinch.js,, -pseudo-tty/test-stdout-read.js,, -pseudo-tty/test-tty-color-support.js,, -pseudo-tty/test-tty-isatty.js,, -pseudo-tty/test-tty-stdin-call-end.js,, -pseudo-tty/test-tty-stdin-end.js,, -pseudo-tty/test-stdin-write.js,, -pseudo-tty/test-tty-stdout-end.js,, -pseudo-tty/test-tty-stdout-resize.js,, -pseudo-tty/test-tty-stream-constructors.js,, -pseudo-tty/test-tty-window-size.js,, -pseudo-tty/test-tty-wrap.js,, -pummel/test-net-pingpong.js,, -pummel/test-vm-memleak.js,, -tick-processor/test-tick-processor-builtin.js,, diff --git a/test/runtimes/exclude_php7.3.6.csv b/test/runtimes/exclude_php7.3.6.csv deleted file mode 100644 index f3606bfe8..000000000 --- a/test/runtimes/exclude_php7.3.6.csv +++ /dev/null @@ -1,35 +0,0 @@ -test name,bug id,comment -ext/intl/tests/bug77895.phpt,, -ext/intl/tests/dateformat_bug65683_2.phpt,, -ext/mbstring/tests/bug76319.phpt,, -ext/mbstring/tests/bug76958.phpt,, -ext/mbstring/tests/bug77025.phpt,, -ext/mbstring/tests/bug77165.phpt,, -ext/mbstring/tests/bug77454.phpt,, -ext/mbstring/tests/mb_convert_encoding_leak.phpt,, -ext/mbstring/tests/mb_strrpos_encoding_3rd_param.phpt,, -ext/session/tests/session_set_save_handler_class_018.phpt,, -ext/session/tests/session_set_save_handler_iface_003.phpt,, -ext/session/tests/session_set_save_handler_variation4.phpt,, -ext/standard/tests/file/filetype_variation.phpt,, -ext/standard/tests/file/fopen_variation19.phpt,, -ext/standard/tests/file/php_fd_wrapper_01.phpt,, -ext/standard/tests/file/php_fd_wrapper_02.phpt,, -ext/standard/tests/file/php_fd_wrapper_03.phpt,, -ext/standard/tests/file/php_fd_wrapper_04.phpt,, -ext/standard/tests/file/realpath_bug77484.phpt,, -ext/standard/tests/file/rename_variation.phpt,b/68717309, -ext/standard/tests/file/symlink_link_linkinfo_is_link_variation4.phpt,, -ext/standard/tests/file/symlink_link_linkinfo_is_link_variation8.phpt,, -ext/standard/tests/general_functions/escapeshellarg_bug71270.phpt,, -ext/standard/tests/general_functions/escapeshellcmd_bug71270.phpt,, -ext/standard/tests/network/bug20134.phpt,, -ext/standard/tests/streams/stream_socket_sendto.phpt,, -ext/standard/tests/strings/007.phpt,, -tests/output/stream_isatty_err.phpt,b/68720279, -tests/output/stream_isatty_in-err.phpt,b/68720282, -tests/output/stream_isatty_in-out-err.phpt,, -tests/output/stream_isatty_in-out.phpt,b/68720299, -tests/output/stream_isatty_out-err.phpt,b/68720311, -tests/output/stream_isatty_out.phpt,b/68720325, -Zend/tests/concat_003.phpt,, diff --git a/test/runtimes/exclude_python3.7.3.csv b/test/runtimes/exclude_python3.7.3.csv deleted file mode 100644 index 2b9947212..000000000 --- a/test/runtimes/exclude_python3.7.3.csv +++ /dev/null @@ -1,27 +0,0 @@ -test name,bug id,comment -test_asynchat,b/76031995,SO_REUSEADDR -test_asyncio,,Fails on Docker. -test_asyncore,b/76031995,SO_REUSEADDR -test_epoll,, -test_fcntl,,fcntl invalid argument -- artificial test to make sure something works in 64 bit mode. -test_ftplib,,Fails in Docker -test_httplib,b/76031995,SO_REUSEADDR -test_imaplib,, -test_logging,, -test_multiprocessing_fork,,Flaky. Sometimes times out. -test_multiprocessing_forkserver,,Flaky. Sometimes times out. -test_multiprocessing_main_handling,,Flaky. Sometimes times out. -test_multiprocessing_spawn,,Flaky. Sometimes times out. -test_nntplib,b/76031995,tests should not set SO_REUSEADDR -test_poplib,,Fails on Docker -test_posix,b/76174079,posix.sched_get_priority_min not implemented + posix.sched_rr_get_interval not permitted -test_pty,b/76157709,out of pty devices -test_readline,b/76157709,out of pty devices -test_resource,b/76174079, -test_selectors,b/76116849,OSError not raised with epoll -test_smtplib,b/76031995,SO_REUSEADDR and unclosed sockets -test_socket,b/75983380, -test_ssl,b/76031995,SO_REUSEADDR -test_subprocess,, -test_support,b/76031995,SO_REUSEADDR -test_telnetlib,b/76031995,SO_REUSEADDR diff --git a/test/runtimes/exclude_test.go b/test/runtimes/exclude_test.go new file mode 100644 index 000000000..bb4b46630 --- /dev/null +++ b/test/runtimes/exclude_test.go @@ -0,0 +1,37 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "flag" + "os" + "testing" +) + +func TestMain(m *testing.M) { + flag.Parse() + os.Exit(m.Run()) +} + +// Test that the exclude file parses without error. +func TestExcludeList(t *testing.T) { + ex, err := getExcludes() + if err != nil { + t.Fatalf("error parsing exclude file: %v", err) + } + if *excludeFile != "" && len(ex) == 0 { + t.Errorf("got empty excludes for file %q", *excludeFile) + } +} diff --git a/test/runtimes/runner.go b/test/runtimes/runner.go new file mode 100644 index 000000000..50aca5d69 --- /dev/null +++ b/test/runtimes/runner.go @@ -0,0 +1,209 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Binary runner runs the runtime tests in a Docker container. +package main + +import ( + "context" + "encoding/csv" + "flag" + "fmt" + "io" + "os" + "sort" + "strings" + "testing" + "time" + + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/pkg/test/testutil" +) + +var ( + lang = flag.String("lang", "", "language runtime to test") + image = flag.String("image", "", "docker image with runtime tests") + excludeFile = flag.String("exclude_file", "", "file containing list of tests to exclude, in CSV format with fields: test name, bug id, comment") + batchSize = flag.Int("batch", 50, "number of test cases run in one command") +) + +// Wait time for each test to run. +const timeout = 45 * time.Minute + +func main() { + flag.Parse() + if *lang == "" || *image == "" { + fmt.Fprintf(os.Stderr, "lang and image flags must not be empty\n") + os.Exit(1) + } + os.Exit(runTests()) +} + +// runTests is a helper that is called by main. It exists so that we can run +// defered functions before exiting. It returns an exit code that should be +// passed to os.Exit. +func runTests() int { + // Get tests to exclude.. + excludes, err := getExcludes() + if err != nil { + fmt.Fprintf(os.Stderr, "Error getting exclude list: %s\n", err.Error()) + return 1 + } + + // Construct the shared docker instance. + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, testutil.DefaultLogger(*lang)) + defer d.CleanUp(ctx) + + if err := testutil.TouchShardStatusFile(); err != nil { + fmt.Fprintf(os.Stderr, "error touching status shard file: %v\n", err) + return 1 + } + + // Get a slice of tests to run. This will also start a single Docker + // container that will be used to run each test. The final test will + // stop the Docker container. + tests, err := getTests(ctx, d, excludes) + if err != nil { + fmt.Fprintf(os.Stderr, "%s\n", err.Error()) + return 1 + } + + m := testing.MainStart(testDeps{}, tests, nil, nil) + return m.Run() +} + +// getTests executes all tests as table tests. +func getTests(ctx context.Context, d *dockerutil.Container, excludes map[string]struct{}) ([]testing.InternalTest, error) { + // Start the container. + opts := dockerutil.RunOpts{ + Image: fmt.Sprintf("runtimes/%s", *image), + } + d.CopyFiles(&opts, "/proctor", "test/runtimes/proctor/proctor") + if err := d.Spawn(ctx, opts, "/proctor/proctor", "--pause"); err != nil { + return nil, fmt.Errorf("docker run failed: %v", err) + } + + // Get a list of all tests in the image. + list, err := d.Exec(ctx, dockerutil.ExecOpts{}, "/proctor/proctor", "--runtime", *lang, "--list") + if err != nil { + return nil, fmt.Errorf("docker exec failed: %v\nlogs: %s", err, list) + } + + // Calculate a subset of tests to run corresponding to the current + // shard. + tests := strings.Fields(list) + sort.Strings(tests) + indices, err := testutil.TestIndicesForShard(len(tests)) + if err != nil { + return nil, fmt.Errorf("TestsForShard() failed: %v", err) + } + + var itests []testing.InternalTest + for i := 0; i < len(indices); i += *batchSize { + var tcs []string + end := i + *batchSize + if end > len(indices) { + end = len(indices) + } + for _, tc := range indices[i:end] { + // Add test if not excluded. + if _, ok := excludes[tests[tc]]; ok { + log.Infof("Skipping test case %s\n", tests[tc]) + continue + } + tcs = append(tcs, tests[tc]) + } + itests = append(itests, testing.InternalTest{ + Name: strings.Join(tcs, ", "), + F: func(t *testing.T) { + var ( + now = time.Now() + done = make(chan struct{}) + output string + err error + ) + + go func() { + fmt.Printf("RUNNING the following in a batch\n%s\n", strings.Join(tcs, "\n")) + output, err = d.Exec(ctx, dockerutil.ExecOpts{}, "/proctor/proctor", "--runtime", *lang, "--tests", strings.Join(tcs, ",")) + close(done) + }() + + select { + case <-done: + if err == nil { + fmt.Printf("PASS: (%v)\n\n", time.Since(now)) + return + } + t.Errorf("FAIL: (%v):\n%s\n", time.Since(now), output) + case <-time.After(timeout): + t.Errorf("TIMEOUT: (%v):\n%s\n", time.Since(now), output) + } + }, + }) + } + + return itests, nil +} + +// getBlacklist reads the exclude file and returns a set of test names to +// exclude. +func getExcludes() (map[string]struct{}, error) { + excludes := make(map[string]struct{}) + if *excludeFile == "" { + return excludes, nil + } + path, err := testutil.FindFile(*excludeFile) + if err != nil { + return nil, err + } + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + + r := csv.NewReader(f) + + // First line is header. Skip it. + if _, err := r.Read(); err != nil { + return nil, err + } + + for { + record, err := r.Read() + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + excludes[record[0]] = struct{}{} + } + return excludes, nil +} + +// testDeps implements testing.testDeps (an unexported interface), and is +// required to use testing.MainStart. +type testDeps struct{} + +func (f testDeps) MatchString(a, b string) (bool, error) { return a == b, nil } +func (f testDeps) StartCPUProfile(io.Writer) error { return nil } +func (f testDeps) StopCPUProfile() {} +func (f testDeps) WriteProfileTo(string, io.Writer, int) error { return nil } +func (f testDeps) ImportPath() string { return "" } +func (f testDeps) StartTestLog(io.Writer) {} +func (f testDeps) StopTestLog() error { return nil } diff --git a/test/runtimes/runner/BUILD b/test/runtimes/runner/BUILD deleted file mode 100644 index dc0d5d5b4..000000000 --- a/test/runtimes/runner/BUILD +++ /dev/null @@ -1,22 +0,0 @@ -load("//tools:defs.bzl", "go_binary", "go_test") - -package(licenses = ["notice"]) - -go_binary( - name = "runner", - testonly = 1, - srcs = ["main.go"], - visibility = ["//test/runtimes:__pkg__"], - deps = [ - "//pkg/log", - "//pkg/test/dockerutil", - "//pkg/test/testutil", - ], -) - -go_test( - name = "exclude_test", - size = "small", - srcs = ["exclude_test.go"], - library = ":runner", -) diff --git a/test/runtimes/runner/exclude_test.go b/test/runtimes/runner/exclude_test.go deleted file mode 100644 index 67c2170c8..000000000 --- a/test/runtimes/runner/exclude_test.go +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "flag" - "os" - "testing" -) - -func TestMain(m *testing.M) { - flag.Parse() - os.Exit(m.Run()) -} - -// Test that the exclude file parses without error. -func TestExcludelist(t *testing.T) { - ex, err := getExcludes() - if err != nil { - t.Fatalf("error parsing exclude file: %v", err) - } - if *excludeFile != "" && len(ex) == 0 { - t.Errorf("got empty excludes for file %q", *excludeFile) - } -} diff --git a/test/runtimes/runner/main.go b/test/runtimes/runner/main.go deleted file mode 100644 index e230912c9..000000000 --- a/test/runtimes/runner/main.go +++ /dev/null @@ -1,205 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Binary runner runs the runtime tests in a Docker container. -package main - -import ( - "context" - "encoding/csv" - "flag" - "fmt" - "io" - "os" - "sort" - "strings" - "testing" - "time" - - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/test/dockerutil" - "gvisor.dev/gvisor/pkg/test/testutil" -) - -var ( - lang = flag.String("lang", "", "language runtime to test") - image = flag.String("image", "", "docker image with runtime tests") - excludeFile = flag.String("exclude_file", "", "file containing list of tests to exclude, in CSV format with fields: test name, bug id, comment") - batchSize = flag.Int("batch", 50, "number of test cases run in one command") -) - -// Wait time for each test to run. -const timeout = 45 * time.Minute - -func main() { - flag.Parse() - if *lang == "" || *image == "" { - fmt.Fprintf(os.Stderr, "lang and image flags must not be empty\n") - os.Exit(1) - } - os.Exit(runTests()) -} - -// runTests is a helper that is called by main. It exists so that we can run -// defered functions before exiting. It returns an exit code that should be -// passed to os.Exit. -func runTests() int { - // Get tests to exclude.. - excludes, err := getExcludes() - if err != nil { - fmt.Fprintf(os.Stderr, "Error getting exclude list: %s\n", err.Error()) - return 1 - } - - // Construct the shared docker instance. - ctx := context.Background() - d := dockerutil.MakeContainer(ctx, testutil.DefaultLogger(*lang)) - defer d.CleanUp(ctx) - - if err := testutil.TouchShardStatusFile(); err != nil { - fmt.Fprintf(os.Stderr, "error touching status shard file: %v\n", err) - return 1 - } - - // Get a slice of tests to run. This will also start a single Docker - // container that will be used to run each test. The final test will - // stop the Docker container. - tests, err := getTests(ctx, d, excludes) - if err != nil { - fmt.Fprintf(os.Stderr, "%s\n", err.Error()) - return 1 - } - - m := testing.MainStart(testDeps{}, tests, nil, nil) - return m.Run() -} - -// getTests executes all tests as table tests. -func getTests(ctx context.Context, d *dockerutil.Container, excludes map[string]struct{}) ([]testing.InternalTest, error) { - // Start the container. - opts := dockerutil.RunOpts{ - Image: fmt.Sprintf("runtimes/%s", *image), - } - d.CopyFiles(&opts, "/proctor", "test/runtimes/proctor/proctor") - if err := d.Spawn(ctx, opts, "/proctor/proctor", "--pause"); err != nil { - return nil, fmt.Errorf("docker run failed: %v", err) - } - - // Get a list of all tests in the image. - list, err := d.Exec(ctx, dockerutil.ExecOpts{}, "/proctor/proctor", "--runtime", *lang, "--list") - if err != nil { - return nil, fmt.Errorf("docker exec failed: %v", err) - } - - // Calculate a subset of tests to run corresponding to the current - // shard. - tests := strings.Fields(list) - sort.Strings(tests) - indices, err := testutil.TestIndicesForShard(len(tests)) - if err != nil { - return nil, fmt.Errorf("TestsForShard() failed: %v", err) - } - - var itests []testing.InternalTest - for i := 0; i < len(indices); i += *batchSize { - var tcs []string - end := i + *batchSize - if end > len(indices) { - end = len(indices) - } - for _, tc := range indices[i:end] { - // Add test if not excluded. - if _, ok := excludes[tests[tc]]; ok { - log.Infof("Skipping test case %s\n", tests[tc]) - continue - } - tcs = append(tcs, tests[tc]) - } - itests = append(itests, testing.InternalTest{ - Name: strings.Join(tcs, ", "), - F: func(t *testing.T) { - var ( - now = time.Now() - done = make(chan struct{}) - output string - err error - ) - - go func() { - fmt.Printf("RUNNING the following in a batch\n%s\n", strings.Join(tcs, "\n")) - output, err = d.Exec(ctx, dockerutil.ExecOpts{}, "/proctor/proctor", "--runtime", *lang, "--tests", strings.Join(tcs, ",")) - close(done) - }() - - select { - case <-done: - if err == nil { - fmt.Printf("PASS: (%v)\n\n", time.Since(now)) - return - } - t.Errorf("FAIL: (%v):\n%s\n", time.Since(now), output) - case <-time.After(timeout): - t.Errorf("TIMEOUT: (%v):\n%s\n", time.Since(now), output) - } - }, - }) - } - - return itests, nil -} - -// getBlacklist reads the exclude file and returns a set of test names to -// exclude. -func getExcludes() (map[string]struct{}, error) { - excludes := make(map[string]struct{}) - if *excludeFile == "" { - return excludes, nil - } - f, err := os.Open(*excludeFile) - if err != nil { - return nil, err - } - defer f.Close() - - r := csv.NewReader(f) - - // First line is header. Skip it. - if _, err := r.Read(); err != nil { - return nil, err - } - - for { - record, err := r.Read() - if err == io.EOF { - break - } - if err != nil { - return nil, err - } - excludes[record[0]] = struct{}{} - } - return excludes, nil -} - -// testDeps implements testing.testDeps (an unexported interface), and is -// required to use testing.MainStart. -type testDeps struct{} - -func (f testDeps) MatchString(a, b string) (bool, error) { return a == b, nil } -func (f testDeps) StartCPUProfile(io.Writer) error { return nil } -func (f testDeps) StopCPUProfile() {} -func (f testDeps) WriteProfileTo(string, io.Writer, int) error { return nil } -func (f testDeps) ImportPath() string { return "" } -func (f testDeps) StartTestLog(io.Writer) {} -func (f testDeps) StopTestLog() error { return nil } -- cgit v1.2.3 From dcf6ddc2772b8fcf824f1f48e0281e1cc80b93ea Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Thu, 16 Jul 2020 18:38:28 -0700 Subject: Add support to return protocol in recvmsg for AF_PACKET. Updates #173 PiperOrigin-RevId: 321690756 --- pkg/sentry/socket/netstack/netstack.go | 24 +++++++++++++++++++++--- pkg/tcpip/tcpip.go | 19 +++++++++++++++++++ pkg/tcpip/transport/packet/endpoint.go | 18 ++++++++++++++++-- test/syscalls/linux/packet_socket.cc | 1 + test/syscalls/linux/packet_socket_raw.cc | 1 + 5 files changed, 58 insertions(+), 5 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 0b1be1bd2..49a04e613 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -297,8 +297,9 @@ type socketOpsCommon struct { readView buffer.View // readCM holds control message information for the last packet read // from Endpoint. - readCM tcpip.ControlMessages - sender tcpip.FullAddress + readCM tcpip.ControlMessages + sender tcpip.FullAddress + linkPacketInfo tcpip.LinkPacketInfo // sockOptTimestamp corresponds to SO_TIMESTAMP. When true, timestamps // of returned messages can be returned via control messages. When @@ -447,8 +448,21 @@ func (s *socketOpsCommon) fetchReadView() *syserr.Error { } s.readView = nil s.sender = tcpip.FullAddress{} + s.linkPacketInfo = tcpip.LinkPacketInfo{} - v, cms, err := s.Endpoint.Read(&s.sender) + var v buffer.View + var cms tcpip.ControlMessages + var err *tcpip.Error + + switch e := s.Endpoint.(type) { + // The ordering of these interfaces matters. The most specific + // interfaces must be specified before the more generic Endpoint + // interface. + case tcpip.PacketEndpoint: + v, cms, err = e.ReadPacket(&s.sender, &s.linkPacketInfo) + case tcpip.Endpoint: + v, cms, err = e.Read(&s.sender) + } if err != nil { atomic.StoreUint32(&s.readViewHasData, 0) return syserr.TranslateNetstackError(err) @@ -2509,6 +2523,10 @@ func (s *socketOpsCommon) nonBlockingRead(ctx context.Context, dst usermem.IOSeq var addrLen uint32 if isPacket && senderRequested { addr, addrLen = ConvertAddress(s.family, s.sender) + switch v := addr.(type) { + case *linux.SockAddrLink: + v.Protocol = htons(uint16(s.linkPacketInfo.Protocol)) + } } if peek { diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 71bcee785..48ad56d4d 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -549,6 +549,25 @@ type Endpoint interface { SetOwner(owner PacketOwner) } +// LinkPacketInfo holds Link layer information for a received packet. +// +// +stateify savable +type LinkPacketInfo struct { + // Protocol is the NetworkProtocolNumber for the packet. + Protocol NetworkProtocolNumber +} + +// PacketEndpoint are additional methods that are only implemented by Packet +// endpoints. +type PacketEndpoint interface { + // ReadPacket reads a datagram/packet from the endpoint and optionally + // returns the sender and additional LinkPacketInfo. + // + // This method does not block if there is no data pending. It will also + // either return an error or data, never both. + ReadPacket(*FullAddress, *LinkPacketInfo) (buffer.View, ControlMessages, *Error) +} + // EndpointInfo is the interface implemented by each endpoint info struct. type EndpointInfo interface { // IsEndpointInfo is an empty method to implement the tcpip.EndpointInfo diff --git a/pkg/tcpip/transport/packet/endpoint.go b/pkg/tcpip/transport/packet/endpoint.go index 92b487381..7b2083a09 100644 --- a/pkg/tcpip/transport/packet/endpoint.go +++ b/pkg/tcpip/transport/packet/endpoint.go @@ -45,6 +45,9 @@ type packet struct { timestampNS int64 // senderAddr is the network address of the sender. senderAddr tcpip.FullAddress + // packetInfo holds additional information like the protocol + // of the packet etc. + packetInfo tcpip.LinkPacketInfo } // endpoint is the packet socket implementation of tcpip.Endpoint. It is legal @@ -151,8 +154,8 @@ func (ep *endpoint) Close() { // ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf. func (ep *endpoint) ModerateRecvBuf(copied int) {} -// Read implements tcpip.Endpoint.Read. -func (ep *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) { +// Read implements tcpip.PacketEndpoint.ReadPacket. +func (ep *endpoint) ReadPacket(addr *tcpip.FullAddress, info *tcpip.LinkPacketInfo) (buffer.View, tcpip.ControlMessages, *tcpip.Error) { ep.rcvMu.Lock() // If there's no data to read, return that read would block or that the @@ -177,9 +180,18 @@ func (ep *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMes *addr = packet.senderAddr } + if info != nil { + *info = packet.packetInfo + } + return packet.data.ToView(), tcpip.ControlMessages{HasTimestamp: true, Timestamp: packet.timestampNS}, nil } +// Read implements tcpip.Endpoint.Read. +func (ep *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) { + return ep.ReadPacket(addr, nil) +} + func (ep *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-chan struct{}, *tcpip.Error) { // TODO(b/129292371): Implement. return 0, nil, tcpip.ErrInvalidOptionValue @@ -428,12 +440,14 @@ func (ep *endpoint) HandlePacket(nicID tcpip.NICID, localAddr tcpip.LinkAddress, NIC: nicID, Addr: tcpip.Address(hdr.SourceAddress()), } + packet.packetInfo.Protocol = netProto } else { // Guess the would-be ethernet header. packet.senderAddr = tcpip.FullAddress{ NIC: nicID, Addr: tcpip.Address(localAddr), } + packet.packetInfo.Protocol = netProto } if ep.cooked { diff --git a/test/syscalls/linux/packet_socket.cc b/test/syscalls/linux/packet_socket.cc index ce63adb23..e94ddcb77 100644 --- a/test/syscalls/linux/packet_socket.cc +++ b/test/syscalls/linux/packet_socket.cc @@ -193,6 +193,7 @@ void ReceiveMessage(int sock, int ifindex) { EXPECT_EQ(src.sll_family, AF_PACKET); EXPECT_EQ(src.sll_ifindex, ifindex); EXPECT_EQ(src.sll_halen, ETH_ALEN); + EXPECT_EQ(ntohs(src.sll_protocol), ETH_P_IP); // This came from the loopback device, so the address is all 0s. for (int i = 0; i < src.sll_halen; i++) { EXPECT_EQ(src.sll_addr[i], 0); diff --git a/test/syscalls/linux/packet_socket_raw.cc b/test/syscalls/linux/packet_socket_raw.cc index e44062475..2fca9fe4d 100644 --- a/test/syscalls/linux/packet_socket_raw.cc +++ b/test/syscalls/linux/packet_socket_raw.cc @@ -200,6 +200,7 @@ TEST_P(RawPacketTest, Receive) { EXPECT_EQ(src.sll_family, AF_PACKET); EXPECT_EQ(src.sll_ifindex, GetLoopbackIndex()); EXPECT_EQ(src.sll_halen, ETH_ALEN); + EXPECT_EQ(ntohs(src.sll_protocol), ETH_P_IP); // This came from the loopback device, so the address is all 0s. for (int i = 0; i < src.sll_halen; i++) { EXPECT_EQ(src.sll_addr[i], 0); -- cgit v1.2.3 From 7e226c80763351debded7b5a48463d69df7db198 Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Fri, 17 Jul 2020 09:12:48 -0700 Subject: Test UDP packets with mcast source addr are discarded PiperOrigin-RevId: 321790802 --- test/packetimpact/tests/BUILD | 11 +++ .../tests/udp_discard_mcast_source_addr_test.go | 92 ++++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 test/packetimpact/tests/udp_discard_mcast_source_addr_test.go (limited to 'test') diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 8051562b2..6a07889be 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -26,6 +26,17 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "udp_discard_mcast_source_addr", + srcs = ["udp_discard_mcast_source_addr_test.go"], + deps = [ + "//pkg/tcpip", + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + packetimpact_go_test( name = "udp_recv_mcast_bcast", srcs = ["udp_recv_mcast_bcast_test.go"], diff --git a/test/packetimpact/tests/udp_discard_mcast_source_addr_test.go b/test/packetimpact/tests/udp_discard_mcast_source_addr_test.go new file mode 100644 index 000000000..b0315e67c --- /dev/null +++ b/test/packetimpact/tests/udp_discard_mcast_source_addr_test.go @@ -0,0 +1,92 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package udp_discard_mcast_source_addr_test + +import ( + "context" + "flag" + "fmt" + "net" + "syscall" + "testing" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +var oneSecond = unix.Timeval{Sec: 1, Usec: 0} + +func init() { + testbench.RegisterFlags(flag.CommandLine) +} + +func TestDiscardsUDPPacketsWithMcastSourceAddressV4(t *testing.T) { + dut := testbench.NewDUT(t) + defer dut.TearDown() + remoteFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP(testbench.RemoteIPv4)) + defer dut.Close(remoteFD) + dut.SetSockOptTimeval(remoteFD, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &oneSecond) + conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) + defer conn.Close() + + for _, mcastAddr := range []net.IP{ + net.IPv4allsys, + net.IPv4allrouter, + net.IPv4(224, 0, 1, 42), + net.IPv4(232, 1, 2, 3), + } { + t.Run(fmt.Sprintf("srcaddr=%s", mcastAddr), func(t *testing.T) { + conn.SendIP( + testbench.IPv4{SrcAddr: testbench.Address(tcpip.Address(mcastAddr.To4()))}, + testbench.UDP{}, + ) + + ret, payload, errno := dut.RecvWithErrno(context.Background(), remoteFD, 100, 0) + if errno != syscall.EAGAIN || errno != syscall.EWOULDBLOCK { + t.Errorf("Recv got unexpected result, ret=%d, payload=%q, errno=%s", ret, payload, errno) + } + }) + } +} + +func TestDiscardsUDPPacketsWithMcastSourceAddressV6(t *testing.T) { + dut := testbench.NewDUT(t) + defer dut.TearDown() + remoteFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP(testbench.RemoteIPv6)) + defer dut.Close(remoteFD) + dut.SetSockOptTimeval(remoteFD, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &oneSecond) + conn := testbench.NewUDPIPv6(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) + defer conn.Close() + + for _, mcastAddr := range []net.IP{ + net.IPv6interfacelocalallnodes, + net.IPv6linklocalallnodes, + net.IPv6linklocalallrouters, + net.ParseIP("fe01::42"), + net.ParseIP("fe02::4242"), + } { + t.Run(fmt.Sprintf("srcaddr=%s", mcastAddr), func(t *testing.T) { + conn.SendIPv6( + testbench.IPv6{SrcAddr: testbench.Address(tcpip.Address(mcastAddr.To16()))}, + testbench.UDP{}, + ) + ret, payload, errno := dut.RecvWithErrno(context.Background(), remoteFD, 100, 0) + if errno != syscall.EAGAIN || errno != syscall.EWOULDBLOCK { + t.Errorf("Recv got unexpected result, ret=%d, payload=%q, errno=%s", ret, payload, errno) + } + }) + } +} -- cgit v1.2.3 From c0ee95198ae4753d02e26e9fbf5341c1d047d9c1 Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Fri, 17 Jul 2020 10:43:58 -0700 Subject: Automated rollback of changelist 321647645 PiperOrigin-RevId: 321808673 --- test/runtimes/BUILD | 58 ++------- test/runtimes/defs.bzl | 79 +++++++++---- test/runtimes/exclude/go1.12.csv | 16 --- test/runtimes/exclude/java11.csv | 126 -------------------- test/runtimes/exclude/nodejs12.4.0.csv | 50 -------- test/runtimes/exclude/php7.3.6.csv | 35 ------ test/runtimes/exclude/python3.7.3.csv | 27 ----- test/runtimes/exclude_go1.12.csv | 16 +++ test/runtimes/exclude_java11.csv | 126 ++++++++++++++++++++ test/runtimes/exclude_nodejs12.4.0.csv | 50 ++++++++ test/runtimes/exclude_php7.3.6.csv | 35 ++++++ test/runtimes/exclude_python3.7.3.csv | 27 +++++ test/runtimes/exclude_test.go | 37 ------ test/runtimes/runner.go | 209 --------------------------------- test/runtimes/runner/BUILD | 22 ++++ test/runtimes/runner/exclude_test.go | 37 ++++++ test/runtimes/runner/main.go | 205 ++++++++++++++++++++++++++++++++ 17 files changed, 583 insertions(+), 572 deletions(-) delete mode 100644 test/runtimes/exclude/go1.12.csv delete mode 100644 test/runtimes/exclude/java11.csv delete mode 100644 test/runtimes/exclude/nodejs12.4.0.csv delete mode 100644 test/runtimes/exclude/php7.3.6.csv delete mode 100644 test/runtimes/exclude/python3.7.3.csv create mode 100644 test/runtimes/exclude_go1.12.csv create mode 100644 test/runtimes/exclude_java11.csv create mode 100644 test/runtimes/exclude_nodejs12.4.0.csv create mode 100644 test/runtimes/exclude_php7.3.6.csv create mode 100644 test/runtimes/exclude_python3.7.3.csv delete mode 100644 test/runtimes/exclude_test.go delete mode 100644 test/runtimes/runner.go create mode 100644 test/runtimes/runner/BUILD create mode 100644 test/runtimes/runner/exclude_test.go create mode 100644 test/runtimes/runner/main.go (limited to 'test') diff --git a/test/runtimes/BUILD b/test/runtimes/BUILD index aeefa8f56..f98d02e00 100644 --- a/test/runtimes/BUILD +++ b/test/runtimes/BUILD @@ -1,82 +1,38 @@ -load("//test/runtimes:defs.bzl", "exclude_test", "runtime_test") +load("//test/runtimes:defs.bzl", "runtime_test") package(licenses = ["notice"]) -go_binary( - name = "runner", - testonly = 1, - srcs = ["runner.go"], - visibility = ["//test/runtimes:__pkg__"], - deps = [ - "//pkg/log", - "//pkg/test/dockerutil", - "//pkg/test/testutil", - ], -) - runtime_test( name = "go1.12", - exclude_file = "exclude/go1.12.csv", + exclude_file = "exclude_go1.12.csv", lang = "go", shard_count = 5, ) runtime_test( name = "java11", - exclude_file = "exclude/java11.csv", + exclude_file = "exclude_java11.csv", lang = "java", - shard_count = 5, + shard_count = 10, ) runtime_test( name = "nodejs12.4.0", - exclude_file = "exclude/nodejs12.4.0.csv", + exclude_file = "exclude_nodejs12.4.0.csv", lang = "nodejs", shard_count = 5, ) runtime_test( name = "php7.3.6", - exclude_file = "exclude/php7.3.6.csv", + exclude_file = "exclude_php7.3.6.csv", lang = "php", shard_count = 5, ) runtime_test( name = "python3.7.3", - exclude_file = "exclude/python3.7.3.csv", + exclude_file = "exclude_python3.7.3.csv", lang = "python", shard_count = 5, ) - -go_test( - name = "exclude_test", - size = "small", - srcs = ["exclude_test.go"], - library = ":runner", -) - -exclude_test( - name = "go", - exclude_file = "exclude/go1.12.csv", -) - -exclude_test( - name = "java", - exclude_file = "exclude/java11.csv", -) - -exclude_test( - name = "nodejs", - exclude_file = "exclude/nodejs12.4.0.csv", -) - -exclude_test( - name = "php", - exclude_file = "exclude/php7.3.6.csv", -) - -exclude_test( - name = "python", - exclude_file = "exclude/python3.7.3.csv", -) diff --git a/test/runtimes/defs.bzl b/test/runtimes/defs.bzl index 3eb494958..399060fa4 100644 --- a/test/runtimes/defs.bzl +++ b/test/runtimes/defs.bzl @@ -2,32 +2,69 @@ load("//tools:defs.bzl", "go_test") -def runtime_test(name, lang, exclude_file, **kwargs): - go_test( - name = name, - srcs = ["runner.go"], - args = [ - "--lang", - lang, - "--image", - name, # Resolved as images/runtimes/%s. +def _runtime_test_impl(ctx): + # Construct arguments. + args = [ + "--lang", + ctx.attr.lang, + "--image", + ctx.attr.image, + ] + if ctx.attr.exclude_file: + args += [ "--exclude_file", - "test/runtimes/" + exclude_file, - ], - data = [ - exclude_file, - "//test/runtimes/proctor", - ], - defines_main = 1, + ctx.files.exclude_file[0].short_path, + ] + + # Build a runner. + runner = ctx.actions.declare_file("%s-executer" % ctx.label.name) + runner_content = "\n".join([ + "#!/bin/bash", + "%s %s $@\n" % (ctx.files._runner[0].short_path, " ".join(args)), + ]) + ctx.actions.write(runner, runner_content, is_executable = True) + + # Return the runner. + return [DefaultInfo( + executable = runner, + runfiles = ctx.runfiles( + files = ctx.files._runner + ctx.files.exclude_file + ctx.files._proctor, + collect_default = True, + collect_data = True, + ), + )] + +_runtime_test = rule( + implementation = _runtime_test_impl, + attrs = { + "image": attr.string( + mandatory = False, + ), + "lang": attr.string( + mandatory = True, + ), + "exclude_file": attr.label( + mandatory = False, + allow_single_file = True, + ), + "_runner": attr.label( + default = "//test/runtimes/runner:runner", + ), + "_proctor": attr.label( + default = "//test/runtimes/proctor:proctor", + ), + }, + test = True, +) + +def runtime_test(name, **kwargs): + _runtime_test( + name = name, + image = name, # Resolved as images/runtimes/%s. tags = [ "local", "manual", ], - deps = [ - "//pkg/log", - "//pkg/test/dockerutil", - "//pkg/test/testutil", - ], **kwargs ) diff --git a/test/runtimes/exclude/go1.12.csv b/test/runtimes/exclude/go1.12.csv deleted file mode 100644 index 8c8ae0c5d..000000000 --- a/test/runtimes/exclude/go1.12.csv +++ /dev/null @@ -1,16 +0,0 @@ -test name,bug id,comment -cgo_errors,,FLAKY -cgo_test,,FLAKY -go_test:cmd/go,,FLAKY -go_test:cmd/vendor/golang.org/x/sys/unix,b/118783622,/dev devices missing -go_test:net,b/118784196,socket: invalid argument. Works as intended: see bug. -go_test:os,b/118780122,we have a pollable filesystem but that's a surprise -go_test:os/signal,b/118780860,/dev/pts not properly supported -go_test:runtime,b/118782341,sigtrap not reported or caught or something -go_test:syscall,b/118781998,bad bytes -- bad mem addr -race,b/118782931,thread sanitizer. Works as intended: b/62219744. -runtime:cpu124,b/118778254,segmentation fault -test:0_1,,FLAKY -testasan,, -testcarchive,b/118782924,no sigpipe -testshared,,FLAKY diff --git a/test/runtimes/exclude/java11.csv b/test/runtimes/exclude/java11.csv deleted file mode 100644 index c012e5a56..000000000 --- a/test/runtimes/exclude/java11.csv +++ /dev/null @@ -1,126 +0,0 @@ -test name,bug id,comment -com/sun/crypto/provider/Cipher/PBE/PKCS12Cipher.java,,Fails in Docker -com/sun/jdi/NashornPopFrameTest.java,, -com/sun/jdi/ProcessAttachTest.java,, -com/sun/management/HotSpotDiagnosticMXBean/CheckOrigin.java,,Fails in Docker -com/sun/management/OperatingSystemMXBean/GetCommittedVirtualMemorySize.java,, -com/sun/management/UnixOperatingSystemMXBean/GetMaxFileDescriptorCount.sh,, -com/sun/tools/attach/AttachSelf.java,, -com/sun/tools/attach/BasicTests.java,, -com/sun/tools/attach/PermissionTest.java,, -com/sun/tools/attach/StartManagementAgent.java,, -com/sun/tools/attach/TempDirTest.java,, -com/sun/tools/attach/modules/Driver.java,, -java/lang/Character/CheckScript.java,,Fails in Docker -java/lang/Character/CheckUnicode.java,,Fails in Docker -java/lang/Class/GetPackageBootLoaderChildLayer.java,, -java/lang/ClassLoader/nativeLibrary/NativeLibraryTest.java,,Fails in Docker -java/lang/String/nativeEncoding/StringPlatformChars.java,, -java/net/DatagramSocket/ReuseAddressTest.java,, -java/net/DatagramSocket/SendDatagramToBadAddress.java,b/78473345, -java/net/Inet4Address/PingThis.java,, -java/net/InterfaceAddress/NetworkPrefixLength.java,b/78507103, -java/net/MulticastSocket/MulticastTTL.java,, -java/net/MulticastSocket/Promiscuous.java,, -java/net/MulticastSocket/SetLoopbackMode.java,, -java/net/MulticastSocket/SetTTLAndGetTTL.java,, -java/net/MulticastSocket/Test.java,, -java/net/MulticastSocket/TestDefaults.java,, -java/net/MulticastSocket/TimeToLive.java,, -java/net/NetworkInterface/NetworkInterfaceStreamTest.java,, -java/net/Socket/SetSoLinger.java,b/78527327,SO_LINGER is not yet supported -java/net/Socket/TrafficClass.java,b/78527818,Not supported on gVisor -java/net/Socket/UrgentDataTest.java,b/111515323, -java/net/Socket/setReuseAddress/Basic.java,b/78519214,SO_REUSEADDR enabled by default -java/net/SocketOption/OptionsTest.java,,Fails in Docker -java/net/SocketOption/TcpKeepAliveTest.java,, -java/net/SocketPermission/SocketPermissionTest.java,, -java/net/URLConnection/6212146/TestDriver.java,,Fails in Docker -java/net/httpclient/RequestBuilderTest.java,,Fails in Docker -java/net/httpclient/ShortResponseBody.java,, -java/net/httpclient/ShortResponseBodyWithRetry.java,, -java/nio/channels/AsyncCloseAndInterrupt.java,, -java/nio/channels/AsynchronousServerSocketChannel/Basic.java,, -java/nio/channels/AsynchronousSocketChannel/Basic.java,b/77921528,SO_KEEPALIVE is not settable -java/nio/channels/DatagramChannel/BasicMulticastTests.java,, -java/nio/channels/DatagramChannel/SocketOptionTests.java,,Fails in Docker -java/nio/channels/DatagramChannel/UseDGWithIPv6.java,, -java/nio/channels/FileChannel/directio/DirectIOTest.java,,Fails in Docker -java/nio/channels/Selector/OutOfBand.java,, -java/nio/channels/Selector/SelectWithConsumer.java,,Flaky -java/nio/channels/ServerSocketChannel/SocketOptionTests.java,, -java/nio/channels/SocketChannel/LingerOnClose.java,, -java/nio/channels/SocketChannel/SocketOptionTests.java,b/77965901, -java/nio/channels/spi/SelectorProvider/inheritedChannel/InheritedChannelTest.java,,Fails in Docker -java/rmi/activation/Activatable/extLoadedImpl/ext.sh,, -java/rmi/transport/checkLeaseInfoLeak/CheckLeaseLeak.java,, -java/text/Format/NumberFormat/CurrencyFormat.java,,Fails in Docker -java/text/Format/NumberFormat/CurrencyFormat.java,,Fails in Docker -java/util/Calendar/JapaneseEraNameTest.java,, -java/util/Currency/CurrencyTest.java,,Fails in Docker -java/util/Currency/ValidateISO4217.java,,Fails in Docker -java/util/Locale/LSRDataTest.java,, -java/util/concurrent/locks/Lock/TimedAcquireLeak.java,, -java/util/jar/JarFile/mrjar/MultiReleaseJarAPI.java,,Fails in Docker -java/util/logging/LogManager/Configuration/updateConfiguration/SimpleUpdateConfigWithInputStreamTest.java,, -java/util/logging/TestLoggerWeakRefLeak.java,, -javax/imageio/AppletResourceTest.java,, -javax/management/security/HashedPasswordFileTest.java,, -javax/net/ssl/SSLSession/JSSERenegotiate.java,,Fails in Docker -javax/sound/sampled/AudioInputStream/FrameLengthAfterConversion.java,, -jdk/jfr/event/runtime/TestNetworkUtilizationEvent.java,, -jdk/jfr/event/runtime/TestThreadParkEvent.java,, -jdk/jfr/event/sampling/TestNative.java,, -jdk/jfr/jcmd/TestJcmdChangeLogLevel.java,, -jdk/jfr/jcmd/TestJcmdConfigure.java,, -jdk/jfr/jcmd/TestJcmdDump.java,, -jdk/jfr/jcmd/TestJcmdDumpGeneratedFilename.java,, -jdk/jfr/jcmd/TestJcmdDumpLimited.java,, -jdk/jfr/jcmd/TestJcmdDumpPathToGCRoots.java,, -jdk/jfr/jcmd/TestJcmdLegacy.java,, -jdk/jfr/jcmd/TestJcmdSaveToFile.java,, -jdk/jfr/jcmd/TestJcmdStartDirNotExist.java,, -jdk/jfr/jcmd/TestJcmdStartInvaldFile.java,, -jdk/jfr/jcmd/TestJcmdStartPathToGCRoots.java,, -jdk/jfr/jcmd/TestJcmdStartStopDefault.java,, -jdk/jfr/jcmd/TestJcmdStartWithOptions.java,, -jdk/jfr/jcmd/TestJcmdStartWithSettings.java,, -jdk/jfr/jcmd/TestJcmdStopInvalidFile.java,, -jdk/jfr/jvm/TestJfrJavaBase.java,, -jdk/jfr/startupargs/TestStartRecording.java,, -jdk/modules/incubator/ImageModules.java,, -jdk/net/Sockets/ExtOptionTest.java,, -jdk/net/Sockets/QuickAckTest.java,, -lib/security/cacerts/VerifyCACerts.java,, -sun/management/jmxremote/bootstrap/CustomLauncherTest.java,, -sun/management/jmxremote/bootstrap/JvmstatCountersTest.java,, -sun/management/jmxremote/bootstrap/LocalManagementTest.java,, -sun/management/jmxremote/bootstrap/RmiRegistrySslTest.java,, -sun/management/jmxremote/bootstrap/RmiSslBootstrapTest.sh,, -sun/management/jmxremote/startstop/JMXStartStopTest.java,, -sun/management/jmxremote/startstop/JMXStatusPerfCountersTest.java,, -sun/management/jmxremote/startstop/JMXStatusTest.java,, -sun/text/resources/LocaleDataTest.java,, -sun/tools/jcmd/TestJcmdSanity.java,, -sun/tools/jhsdb/AlternateHashingTest.java,, -sun/tools/jhsdb/BasicLauncherTest.java,, -sun/tools/jhsdb/HeapDumpTest.java,, -sun/tools/jhsdb/heapconfig/JMapHeapConfigTest.java,, -sun/tools/jinfo/BasicJInfoTest.java,, -sun/tools/jinfo/JInfoTest.java,, -sun/tools/jmap/BasicJMapTest.java,, -sun/tools/jstack/BasicJStackTest.java,, -sun/tools/jstack/DeadlockDetectionTest.java,, -sun/tools/jstatd/TestJstatdExternalRegistry.java,, -sun/tools/jstatd/TestJstatdPort.java,,Flaky -sun/tools/jstatd/TestJstatdPortAndServer.java,,Flaky -sun/util/calendar/zi/TestZoneInfo310.java,, -tools/jar/modularJar/Basic.java,, -tools/jar/multiRelease/Basic.java,, -tools/jimage/JImageExtractTest.java,, -tools/jimage/JImageTest.java,, -tools/jlink/JLinkTest.java,, -tools/jlink/plugins/IncludeLocalesPluginTest.java,, -tools/jmod/hashes/HashesTest.java,, -tools/launcher/BigJar.java,b/111611473, -tools/launcher/modules/patch/systemmodules/PatchSystemModules.java,, diff --git a/test/runtimes/exclude/nodejs12.4.0.csv b/test/runtimes/exclude/nodejs12.4.0.csv deleted file mode 100644 index e7edfa0a5..000000000 --- a/test/runtimes/exclude/nodejs12.4.0.csv +++ /dev/null @@ -1,50 +0,0 @@ -test name,bug id,comment -benchmark/test-benchmark-fs.js,, -benchmark/test-benchmark-module.js,, -benchmark/test-benchmark-napi.js,, -doctool/test-make-doc.js,b/68848110,Expected to fail. -fixtures/test-error-first-line-offset.js,, -fixtures/test-fs-readfile-error.js,, -fixtures/test-fs-stat-sync-overflow.js,, -internet/test-dgram-broadcast-multi-process.js,, -internet/test-dgram-multicast-multi-process.js,, -internet/test-dgram-multicast-set-interface-lo.js,, -internet/test-doctool-versions.js,, -internet/test-uv-threadpool-schedule.js,, -parallel/test-cluster-dgram-reuse.js,b/64024294, -parallel/test-dgram-bind-fd.js,b/132447356, -parallel/test-dgram-create-socket-handle-fd.js,b/132447238, -parallel/test-dgram-createSocket-type.js,b/68847739, -parallel/test-dgram-socket-buffer-size.js,b/68847921, -parallel/test-fs-access.js,, -parallel/test-fs-write-stream-double-close.js,, -parallel/test-fs-write-stream-throw-type-error.js,b/110226209, -parallel/test-fs-write-stream.js,, -parallel/test-http2-respond-file-error-pipe-offset.js,, -parallel/test-os.js,, -parallel/test-process-uid-gid.js,, -pseudo-tty/test-assert-colors.js,, -pseudo-tty/test-assert-no-color.js,, -pseudo-tty/test-assert-position-indicator.js,, -pseudo-tty/test-async-wrap-getasyncid-tty.js,, -pseudo-tty/test-fatal-error.js,, -pseudo-tty/test-handle-wrap-isrefed-tty.js,, -pseudo-tty/test-readable-tty-keepalive.js,, -pseudo-tty/test-set-raw-mode-reset-process-exit.js,, -pseudo-tty/test-set-raw-mode-reset-signal.js,, -pseudo-tty/test-set-raw-mode-reset.js,, -pseudo-tty/test-stderr-stdout-handle-sigwinch.js,, -pseudo-tty/test-stdout-read.js,, -pseudo-tty/test-tty-color-support.js,, -pseudo-tty/test-tty-isatty.js,, -pseudo-tty/test-tty-stdin-call-end.js,, -pseudo-tty/test-tty-stdin-end.js,, -pseudo-tty/test-stdin-write.js,, -pseudo-tty/test-tty-stdout-end.js,, -pseudo-tty/test-tty-stdout-resize.js,, -pseudo-tty/test-tty-stream-constructors.js,, -pseudo-tty/test-tty-window-size.js,, -pseudo-tty/test-tty-wrap.js,, -pummel/test-net-pingpong.js,, -pummel/test-vm-memleak.js,, -tick-processor/test-tick-processor-builtin.js,, diff --git a/test/runtimes/exclude/php7.3.6.csv b/test/runtimes/exclude/php7.3.6.csv deleted file mode 100644 index f3606bfe8..000000000 --- a/test/runtimes/exclude/php7.3.6.csv +++ /dev/null @@ -1,35 +0,0 @@ -test name,bug id,comment -ext/intl/tests/bug77895.phpt,, -ext/intl/tests/dateformat_bug65683_2.phpt,, -ext/mbstring/tests/bug76319.phpt,, -ext/mbstring/tests/bug76958.phpt,, -ext/mbstring/tests/bug77025.phpt,, -ext/mbstring/tests/bug77165.phpt,, -ext/mbstring/tests/bug77454.phpt,, -ext/mbstring/tests/mb_convert_encoding_leak.phpt,, -ext/mbstring/tests/mb_strrpos_encoding_3rd_param.phpt,, -ext/session/tests/session_set_save_handler_class_018.phpt,, -ext/session/tests/session_set_save_handler_iface_003.phpt,, -ext/session/tests/session_set_save_handler_variation4.phpt,, -ext/standard/tests/file/filetype_variation.phpt,, -ext/standard/tests/file/fopen_variation19.phpt,, -ext/standard/tests/file/php_fd_wrapper_01.phpt,, -ext/standard/tests/file/php_fd_wrapper_02.phpt,, -ext/standard/tests/file/php_fd_wrapper_03.phpt,, -ext/standard/tests/file/php_fd_wrapper_04.phpt,, -ext/standard/tests/file/realpath_bug77484.phpt,, -ext/standard/tests/file/rename_variation.phpt,b/68717309, -ext/standard/tests/file/symlink_link_linkinfo_is_link_variation4.phpt,, -ext/standard/tests/file/symlink_link_linkinfo_is_link_variation8.phpt,, -ext/standard/tests/general_functions/escapeshellarg_bug71270.phpt,, -ext/standard/tests/general_functions/escapeshellcmd_bug71270.phpt,, -ext/standard/tests/network/bug20134.phpt,, -ext/standard/tests/streams/stream_socket_sendto.phpt,, -ext/standard/tests/strings/007.phpt,, -tests/output/stream_isatty_err.phpt,b/68720279, -tests/output/stream_isatty_in-err.phpt,b/68720282, -tests/output/stream_isatty_in-out-err.phpt,, -tests/output/stream_isatty_in-out.phpt,b/68720299, -tests/output/stream_isatty_out-err.phpt,b/68720311, -tests/output/stream_isatty_out.phpt,b/68720325, -Zend/tests/concat_003.phpt,, diff --git a/test/runtimes/exclude/python3.7.3.csv b/test/runtimes/exclude/python3.7.3.csv deleted file mode 100644 index 2b9947212..000000000 --- a/test/runtimes/exclude/python3.7.3.csv +++ /dev/null @@ -1,27 +0,0 @@ -test name,bug id,comment -test_asynchat,b/76031995,SO_REUSEADDR -test_asyncio,,Fails on Docker. -test_asyncore,b/76031995,SO_REUSEADDR -test_epoll,, -test_fcntl,,fcntl invalid argument -- artificial test to make sure something works in 64 bit mode. -test_ftplib,,Fails in Docker -test_httplib,b/76031995,SO_REUSEADDR -test_imaplib,, -test_logging,, -test_multiprocessing_fork,,Flaky. Sometimes times out. -test_multiprocessing_forkserver,,Flaky. Sometimes times out. -test_multiprocessing_main_handling,,Flaky. Sometimes times out. -test_multiprocessing_spawn,,Flaky. Sometimes times out. -test_nntplib,b/76031995,tests should not set SO_REUSEADDR -test_poplib,,Fails on Docker -test_posix,b/76174079,posix.sched_get_priority_min not implemented + posix.sched_rr_get_interval not permitted -test_pty,b/76157709,out of pty devices -test_readline,b/76157709,out of pty devices -test_resource,b/76174079, -test_selectors,b/76116849,OSError not raised with epoll -test_smtplib,b/76031995,SO_REUSEADDR and unclosed sockets -test_socket,b/75983380, -test_ssl,b/76031995,SO_REUSEADDR -test_subprocess,, -test_support,b/76031995,SO_REUSEADDR -test_telnetlib,b/76031995,SO_REUSEADDR diff --git a/test/runtimes/exclude_go1.12.csv b/test/runtimes/exclude_go1.12.csv new file mode 100644 index 000000000..8c8ae0c5d --- /dev/null +++ b/test/runtimes/exclude_go1.12.csv @@ -0,0 +1,16 @@ +test name,bug id,comment +cgo_errors,,FLAKY +cgo_test,,FLAKY +go_test:cmd/go,,FLAKY +go_test:cmd/vendor/golang.org/x/sys/unix,b/118783622,/dev devices missing +go_test:net,b/118784196,socket: invalid argument. Works as intended: see bug. +go_test:os,b/118780122,we have a pollable filesystem but that's a surprise +go_test:os/signal,b/118780860,/dev/pts not properly supported +go_test:runtime,b/118782341,sigtrap not reported or caught or something +go_test:syscall,b/118781998,bad bytes -- bad mem addr +race,b/118782931,thread sanitizer. Works as intended: b/62219744. +runtime:cpu124,b/118778254,segmentation fault +test:0_1,,FLAKY +testasan,, +testcarchive,b/118782924,no sigpipe +testshared,,FLAKY diff --git a/test/runtimes/exclude_java11.csv b/test/runtimes/exclude_java11.csv new file mode 100644 index 000000000..c012e5a56 --- /dev/null +++ b/test/runtimes/exclude_java11.csv @@ -0,0 +1,126 @@ +test name,bug id,comment +com/sun/crypto/provider/Cipher/PBE/PKCS12Cipher.java,,Fails in Docker +com/sun/jdi/NashornPopFrameTest.java,, +com/sun/jdi/ProcessAttachTest.java,, +com/sun/management/HotSpotDiagnosticMXBean/CheckOrigin.java,,Fails in Docker +com/sun/management/OperatingSystemMXBean/GetCommittedVirtualMemorySize.java,, +com/sun/management/UnixOperatingSystemMXBean/GetMaxFileDescriptorCount.sh,, +com/sun/tools/attach/AttachSelf.java,, +com/sun/tools/attach/BasicTests.java,, +com/sun/tools/attach/PermissionTest.java,, +com/sun/tools/attach/StartManagementAgent.java,, +com/sun/tools/attach/TempDirTest.java,, +com/sun/tools/attach/modules/Driver.java,, +java/lang/Character/CheckScript.java,,Fails in Docker +java/lang/Character/CheckUnicode.java,,Fails in Docker +java/lang/Class/GetPackageBootLoaderChildLayer.java,, +java/lang/ClassLoader/nativeLibrary/NativeLibraryTest.java,,Fails in Docker +java/lang/String/nativeEncoding/StringPlatformChars.java,, +java/net/DatagramSocket/ReuseAddressTest.java,, +java/net/DatagramSocket/SendDatagramToBadAddress.java,b/78473345, +java/net/Inet4Address/PingThis.java,, +java/net/InterfaceAddress/NetworkPrefixLength.java,b/78507103, +java/net/MulticastSocket/MulticastTTL.java,, +java/net/MulticastSocket/Promiscuous.java,, +java/net/MulticastSocket/SetLoopbackMode.java,, +java/net/MulticastSocket/SetTTLAndGetTTL.java,, +java/net/MulticastSocket/Test.java,, +java/net/MulticastSocket/TestDefaults.java,, +java/net/MulticastSocket/TimeToLive.java,, +java/net/NetworkInterface/NetworkInterfaceStreamTest.java,, +java/net/Socket/SetSoLinger.java,b/78527327,SO_LINGER is not yet supported +java/net/Socket/TrafficClass.java,b/78527818,Not supported on gVisor +java/net/Socket/UrgentDataTest.java,b/111515323, +java/net/Socket/setReuseAddress/Basic.java,b/78519214,SO_REUSEADDR enabled by default +java/net/SocketOption/OptionsTest.java,,Fails in Docker +java/net/SocketOption/TcpKeepAliveTest.java,, +java/net/SocketPermission/SocketPermissionTest.java,, +java/net/URLConnection/6212146/TestDriver.java,,Fails in Docker +java/net/httpclient/RequestBuilderTest.java,,Fails in Docker +java/net/httpclient/ShortResponseBody.java,, +java/net/httpclient/ShortResponseBodyWithRetry.java,, +java/nio/channels/AsyncCloseAndInterrupt.java,, +java/nio/channels/AsynchronousServerSocketChannel/Basic.java,, +java/nio/channels/AsynchronousSocketChannel/Basic.java,b/77921528,SO_KEEPALIVE is not settable +java/nio/channels/DatagramChannel/BasicMulticastTests.java,, +java/nio/channels/DatagramChannel/SocketOptionTests.java,,Fails in Docker +java/nio/channels/DatagramChannel/UseDGWithIPv6.java,, +java/nio/channels/FileChannel/directio/DirectIOTest.java,,Fails in Docker +java/nio/channels/Selector/OutOfBand.java,, +java/nio/channels/Selector/SelectWithConsumer.java,,Flaky +java/nio/channels/ServerSocketChannel/SocketOptionTests.java,, +java/nio/channels/SocketChannel/LingerOnClose.java,, +java/nio/channels/SocketChannel/SocketOptionTests.java,b/77965901, +java/nio/channels/spi/SelectorProvider/inheritedChannel/InheritedChannelTest.java,,Fails in Docker +java/rmi/activation/Activatable/extLoadedImpl/ext.sh,, +java/rmi/transport/checkLeaseInfoLeak/CheckLeaseLeak.java,, +java/text/Format/NumberFormat/CurrencyFormat.java,,Fails in Docker +java/text/Format/NumberFormat/CurrencyFormat.java,,Fails in Docker +java/util/Calendar/JapaneseEraNameTest.java,, +java/util/Currency/CurrencyTest.java,,Fails in Docker +java/util/Currency/ValidateISO4217.java,,Fails in Docker +java/util/Locale/LSRDataTest.java,, +java/util/concurrent/locks/Lock/TimedAcquireLeak.java,, +java/util/jar/JarFile/mrjar/MultiReleaseJarAPI.java,,Fails in Docker +java/util/logging/LogManager/Configuration/updateConfiguration/SimpleUpdateConfigWithInputStreamTest.java,, +java/util/logging/TestLoggerWeakRefLeak.java,, +javax/imageio/AppletResourceTest.java,, +javax/management/security/HashedPasswordFileTest.java,, +javax/net/ssl/SSLSession/JSSERenegotiate.java,,Fails in Docker +javax/sound/sampled/AudioInputStream/FrameLengthAfterConversion.java,, +jdk/jfr/event/runtime/TestNetworkUtilizationEvent.java,, +jdk/jfr/event/runtime/TestThreadParkEvent.java,, +jdk/jfr/event/sampling/TestNative.java,, +jdk/jfr/jcmd/TestJcmdChangeLogLevel.java,, +jdk/jfr/jcmd/TestJcmdConfigure.java,, +jdk/jfr/jcmd/TestJcmdDump.java,, +jdk/jfr/jcmd/TestJcmdDumpGeneratedFilename.java,, +jdk/jfr/jcmd/TestJcmdDumpLimited.java,, +jdk/jfr/jcmd/TestJcmdDumpPathToGCRoots.java,, +jdk/jfr/jcmd/TestJcmdLegacy.java,, +jdk/jfr/jcmd/TestJcmdSaveToFile.java,, +jdk/jfr/jcmd/TestJcmdStartDirNotExist.java,, +jdk/jfr/jcmd/TestJcmdStartInvaldFile.java,, +jdk/jfr/jcmd/TestJcmdStartPathToGCRoots.java,, +jdk/jfr/jcmd/TestJcmdStartStopDefault.java,, +jdk/jfr/jcmd/TestJcmdStartWithOptions.java,, +jdk/jfr/jcmd/TestJcmdStartWithSettings.java,, +jdk/jfr/jcmd/TestJcmdStopInvalidFile.java,, +jdk/jfr/jvm/TestJfrJavaBase.java,, +jdk/jfr/startupargs/TestStartRecording.java,, +jdk/modules/incubator/ImageModules.java,, +jdk/net/Sockets/ExtOptionTest.java,, +jdk/net/Sockets/QuickAckTest.java,, +lib/security/cacerts/VerifyCACerts.java,, +sun/management/jmxremote/bootstrap/CustomLauncherTest.java,, +sun/management/jmxremote/bootstrap/JvmstatCountersTest.java,, +sun/management/jmxremote/bootstrap/LocalManagementTest.java,, +sun/management/jmxremote/bootstrap/RmiRegistrySslTest.java,, +sun/management/jmxremote/bootstrap/RmiSslBootstrapTest.sh,, +sun/management/jmxremote/startstop/JMXStartStopTest.java,, +sun/management/jmxremote/startstop/JMXStatusPerfCountersTest.java,, +sun/management/jmxremote/startstop/JMXStatusTest.java,, +sun/text/resources/LocaleDataTest.java,, +sun/tools/jcmd/TestJcmdSanity.java,, +sun/tools/jhsdb/AlternateHashingTest.java,, +sun/tools/jhsdb/BasicLauncherTest.java,, +sun/tools/jhsdb/HeapDumpTest.java,, +sun/tools/jhsdb/heapconfig/JMapHeapConfigTest.java,, +sun/tools/jinfo/BasicJInfoTest.java,, +sun/tools/jinfo/JInfoTest.java,, +sun/tools/jmap/BasicJMapTest.java,, +sun/tools/jstack/BasicJStackTest.java,, +sun/tools/jstack/DeadlockDetectionTest.java,, +sun/tools/jstatd/TestJstatdExternalRegistry.java,, +sun/tools/jstatd/TestJstatdPort.java,,Flaky +sun/tools/jstatd/TestJstatdPortAndServer.java,,Flaky +sun/util/calendar/zi/TestZoneInfo310.java,, +tools/jar/modularJar/Basic.java,, +tools/jar/multiRelease/Basic.java,, +tools/jimage/JImageExtractTest.java,, +tools/jimage/JImageTest.java,, +tools/jlink/JLinkTest.java,, +tools/jlink/plugins/IncludeLocalesPluginTest.java,, +tools/jmod/hashes/HashesTest.java,, +tools/launcher/BigJar.java,b/111611473, +tools/launcher/modules/patch/systemmodules/PatchSystemModules.java,, diff --git a/test/runtimes/exclude_nodejs12.4.0.csv b/test/runtimes/exclude_nodejs12.4.0.csv new file mode 100644 index 000000000..e7edfa0a5 --- /dev/null +++ b/test/runtimes/exclude_nodejs12.4.0.csv @@ -0,0 +1,50 @@ +test name,bug id,comment +benchmark/test-benchmark-fs.js,, +benchmark/test-benchmark-module.js,, +benchmark/test-benchmark-napi.js,, +doctool/test-make-doc.js,b/68848110,Expected to fail. +fixtures/test-error-first-line-offset.js,, +fixtures/test-fs-readfile-error.js,, +fixtures/test-fs-stat-sync-overflow.js,, +internet/test-dgram-broadcast-multi-process.js,, +internet/test-dgram-multicast-multi-process.js,, +internet/test-dgram-multicast-set-interface-lo.js,, +internet/test-doctool-versions.js,, +internet/test-uv-threadpool-schedule.js,, +parallel/test-cluster-dgram-reuse.js,b/64024294, +parallel/test-dgram-bind-fd.js,b/132447356, +parallel/test-dgram-create-socket-handle-fd.js,b/132447238, +parallel/test-dgram-createSocket-type.js,b/68847739, +parallel/test-dgram-socket-buffer-size.js,b/68847921, +parallel/test-fs-access.js,, +parallel/test-fs-write-stream-double-close.js,, +parallel/test-fs-write-stream-throw-type-error.js,b/110226209, +parallel/test-fs-write-stream.js,, +parallel/test-http2-respond-file-error-pipe-offset.js,, +parallel/test-os.js,, +parallel/test-process-uid-gid.js,, +pseudo-tty/test-assert-colors.js,, +pseudo-tty/test-assert-no-color.js,, +pseudo-tty/test-assert-position-indicator.js,, +pseudo-tty/test-async-wrap-getasyncid-tty.js,, +pseudo-tty/test-fatal-error.js,, +pseudo-tty/test-handle-wrap-isrefed-tty.js,, +pseudo-tty/test-readable-tty-keepalive.js,, +pseudo-tty/test-set-raw-mode-reset-process-exit.js,, +pseudo-tty/test-set-raw-mode-reset-signal.js,, +pseudo-tty/test-set-raw-mode-reset.js,, +pseudo-tty/test-stderr-stdout-handle-sigwinch.js,, +pseudo-tty/test-stdout-read.js,, +pseudo-tty/test-tty-color-support.js,, +pseudo-tty/test-tty-isatty.js,, +pseudo-tty/test-tty-stdin-call-end.js,, +pseudo-tty/test-tty-stdin-end.js,, +pseudo-tty/test-stdin-write.js,, +pseudo-tty/test-tty-stdout-end.js,, +pseudo-tty/test-tty-stdout-resize.js,, +pseudo-tty/test-tty-stream-constructors.js,, +pseudo-tty/test-tty-window-size.js,, +pseudo-tty/test-tty-wrap.js,, +pummel/test-net-pingpong.js,, +pummel/test-vm-memleak.js,, +tick-processor/test-tick-processor-builtin.js,, diff --git a/test/runtimes/exclude_php7.3.6.csv b/test/runtimes/exclude_php7.3.6.csv new file mode 100644 index 000000000..f3606bfe8 --- /dev/null +++ b/test/runtimes/exclude_php7.3.6.csv @@ -0,0 +1,35 @@ +test name,bug id,comment +ext/intl/tests/bug77895.phpt,, +ext/intl/tests/dateformat_bug65683_2.phpt,, +ext/mbstring/tests/bug76319.phpt,, +ext/mbstring/tests/bug76958.phpt,, +ext/mbstring/tests/bug77025.phpt,, +ext/mbstring/tests/bug77165.phpt,, +ext/mbstring/tests/bug77454.phpt,, +ext/mbstring/tests/mb_convert_encoding_leak.phpt,, +ext/mbstring/tests/mb_strrpos_encoding_3rd_param.phpt,, +ext/session/tests/session_set_save_handler_class_018.phpt,, +ext/session/tests/session_set_save_handler_iface_003.phpt,, +ext/session/tests/session_set_save_handler_variation4.phpt,, +ext/standard/tests/file/filetype_variation.phpt,, +ext/standard/tests/file/fopen_variation19.phpt,, +ext/standard/tests/file/php_fd_wrapper_01.phpt,, +ext/standard/tests/file/php_fd_wrapper_02.phpt,, +ext/standard/tests/file/php_fd_wrapper_03.phpt,, +ext/standard/tests/file/php_fd_wrapper_04.phpt,, +ext/standard/tests/file/realpath_bug77484.phpt,, +ext/standard/tests/file/rename_variation.phpt,b/68717309, +ext/standard/tests/file/symlink_link_linkinfo_is_link_variation4.phpt,, +ext/standard/tests/file/symlink_link_linkinfo_is_link_variation8.phpt,, +ext/standard/tests/general_functions/escapeshellarg_bug71270.phpt,, +ext/standard/tests/general_functions/escapeshellcmd_bug71270.phpt,, +ext/standard/tests/network/bug20134.phpt,, +ext/standard/tests/streams/stream_socket_sendto.phpt,, +ext/standard/tests/strings/007.phpt,, +tests/output/stream_isatty_err.phpt,b/68720279, +tests/output/stream_isatty_in-err.phpt,b/68720282, +tests/output/stream_isatty_in-out-err.phpt,, +tests/output/stream_isatty_in-out.phpt,b/68720299, +tests/output/stream_isatty_out-err.phpt,b/68720311, +tests/output/stream_isatty_out.phpt,b/68720325, +Zend/tests/concat_003.phpt,, diff --git a/test/runtimes/exclude_python3.7.3.csv b/test/runtimes/exclude_python3.7.3.csv new file mode 100644 index 000000000..2b9947212 --- /dev/null +++ b/test/runtimes/exclude_python3.7.3.csv @@ -0,0 +1,27 @@ +test name,bug id,comment +test_asynchat,b/76031995,SO_REUSEADDR +test_asyncio,,Fails on Docker. +test_asyncore,b/76031995,SO_REUSEADDR +test_epoll,, +test_fcntl,,fcntl invalid argument -- artificial test to make sure something works in 64 bit mode. +test_ftplib,,Fails in Docker +test_httplib,b/76031995,SO_REUSEADDR +test_imaplib,, +test_logging,, +test_multiprocessing_fork,,Flaky. Sometimes times out. +test_multiprocessing_forkserver,,Flaky. Sometimes times out. +test_multiprocessing_main_handling,,Flaky. Sometimes times out. +test_multiprocessing_spawn,,Flaky. Sometimes times out. +test_nntplib,b/76031995,tests should not set SO_REUSEADDR +test_poplib,,Fails on Docker +test_posix,b/76174079,posix.sched_get_priority_min not implemented + posix.sched_rr_get_interval not permitted +test_pty,b/76157709,out of pty devices +test_readline,b/76157709,out of pty devices +test_resource,b/76174079, +test_selectors,b/76116849,OSError not raised with epoll +test_smtplib,b/76031995,SO_REUSEADDR and unclosed sockets +test_socket,b/75983380, +test_ssl,b/76031995,SO_REUSEADDR +test_subprocess,, +test_support,b/76031995,SO_REUSEADDR +test_telnetlib,b/76031995,SO_REUSEADDR diff --git a/test/runtimes/exclude_test.go b/test/runtimes/exclude_test.go deleted file mode 100644 index bb4b46630..000000000 --- a/test/runtimes/exclude_test.go +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "flag" - "os" - "testing" -) - -func TestMain(m *testing.M) { - flag.Parse() - os.Exit(m.Run()) -} - -// Test that the exclude file parses without error. -func TestExcludeList(t *testing.T) { - ex, err := getExcludes() - if err != nil { - t.Fatalf("error parsing exclude file: %v", err) - } - if *excludeFile != "" && len(ex) == 0 { - t.Errorf("got empty excludes for file %q", *excludeFile) - } -} diff --git a/test/runtimes/runner.go b/test/runtimes/runner.go deleted file mode 100644 index 50aca5d69..000000000 --- a/test/runtimes/runner.go +++ /dev/null @@ -1,209 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Binary runner runs the runtime tests in a Docker container. -package main - -import ( - "context" - "encoding/csv" - "flag" - "fmt" - "io" - "os" - "sort" - "strings" - "testing" - "time" - - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/test/dockerutil" - "gvisor.dev/gvisor/pkg/test/testutil" -) - -var ( - lang = flag.String("lang", "", "language runtime to test") - image = flag.String("image", "", "docker image with runtime tests") - excludeFile = flag.String("exclude_file", "", "file containing list of tests to exclude, in CSV format with fields: test name, bug id, comment") - batchSize = flag.Int("batch", 50, "number of test cases run in one command") -) - -// Wait time for each test to run. -const timeout = 45 * time.Minute - -func main() { - flag.Parse() - if *lang == "" || *image == "" { - fmt.Fprintf(os.Stderr, "lang and image flags must not be empty\n") - os.Exit(1) - } - os.Exit(runTests()) -} - -// runTests is a helper that is called by main. It exists so that we can run -// defered functions before exiting. It returns an exit code that should be -// passed to os.Exit. -func runTests() int { - // Get tests to exclude.. - excludes, err := getExcludes() - if err != nil { - fmt.Fprintf(os.Stderr, "Error getting exclude list: %s\n", err.Error()) - return 1 - } - - // Construct the shared docker instance. - ctx := context.Background() - d := dockerutil.MakeContainer(ctx, testutil.DefaultLogger(*lang)) - defer d.CleanUp(ctx) - - if err := testutil.TouchShardStatusFile(); err != nil { - fmt.Fprintf(os.Stderr, "error touching status shard file: %v\n", err) - return 1 - } - - // Get a slice of tests to run. This will also start a single Docker - // container that will be used to run each test. The final test will - // stop the Docker container. - tests, err := getTests(ctx, d, excludes) - if err != nil { - fmt.Fprintf(os.Stderr, "%s\n", err.Error()) - return 1 - } - - m := testing.MainStart(testDeps{}, tests, nil, nil) - return m.Run() -} - -// getTests executes all tests as table tests. -func getTests(ctx context.Context, d *dockerutil.Container, excludes map[string]struct{}) ([]testing.InternalTest, error) { - // Start the container. - opts := dockerutil.RunOpts{ - Image: fmt.Sprintf("runtimes/%s", *image), - } - d.CopyFiles(&opts, "/proctor", "test/runtimes/proctor/proctor") - if err := d.Spawn(ctx, opts, "/proctor/proctor", "--pause"); err != nil { - return nil, fmt.Errorf("docker run failed: %v", err) - } - - // Get a list of all tests in the image. - list, err := d.Exec(ctx, dockerutil.ExecOpts{}, "/proctor/proctor", "--runtime", *lang, "--list") - if err != nil { - return nil, fmt.Errorf("docker exec failed: %v\nlogs: %s", err, list) - } - - // Calculate a subset of tests to run corresponding to the current - // shard. - tests := strings.Fields(list) - sort.Strings(tests) - indices, err := testutil.TestIndicesForShard(len(tests)) - if err != nil { - return nil, fmt.Errorf("TestsForShard() failed: %v", err) - } - - var itests []testing.InternalTest - for i := 0; i < len(indices); i += *batchSize { - var tcs []string - end := i + *batchSize - if end > len(indices) { - end = len(indices) - } - for _, tc := range indices[i:end] { - // Add test if not excluded. - if _, ok := excludes[tests[tc]]; ok { - log.Infof("Skipping test case %s\n", tests[tc]) - continue - } - tcs = append(tcs, tests[tc]) - } - itests = append(itests, testing.InternalTest{ - Name: strings.Join(tcs, ", "), - F: func(t *testing.T) { - var ( - now = time.Now() - done = make(chan struct{}) - output string - err error - ) - - go func() { - fmt.Printf("RUNNING the following in a batch\n%s\n", strings.Join(tcs, "\n")) - output, err = d.Exec(ctx, dockerutil.ExecOpts{}, "/proctor/proctor", "--runtime", *lang, "--tests", strings.Join(tcs, ",")) - close(done) - }() - - select { - case <-done: - if err == nil { - fmt.Printf("PASS: (%v)\n\n", time.Since(now)) - return - } - t.Errorf("FAIL: (%v):\n%s\n", time.Since(now), output) - case <-time.After(timeout): - t.Errorf("TIMEOUT: (%v):\n%s\n", time.Since(now), output) - } - }, - }) - } - - return itests, nil -} - -// getBlacklist reads the exclude file and returns a set of test names to -// exclude. -func getExcludes() (map[string]struct{}, error) { - excludes := make(map[string]struct{}) - if *excludeFile == "" { - return excludes, nil - } - path, err := testutil.FindFile(*excludeFile) - if err != nil { - return nil, err - } - f, err := os.Open(path) - if err != nil { - return nil, err - } - defer f.Close() - - r := csv.NewReader(f) - - // First line is header. Skip it. - if _, err := r.Read(); err != nil { - return nil, err - } - - for { - record, err := r.Read() - if err == io.EOF { - break - } - if err != nil { - return nil, err - } - excludes[record[0]] = struct{}{} - } - return excludes, nil -} - -// testDeps implements testing.testDeps (an unexported interface), and is -// required to use testing.MainStart. -type testDeps struct{} - -func (f testDeps) MatchString(a, b string) (bool, error) { return a == b, nil } -func (f testDeps) StartCPUProfile(io.Writer) error { return nil } -func (f testDeps) StopCPUProfile() {} -func (f testDeps) WriteProfileTo(string, io.Writer, int) error { return nil } -func (f testDeps) ImportPath() string { return "" } -func (f testDeps) StartTestLog(io.Writer) {} -func (f testDeps) StopTestLog() error { return nil } diff --git a/test/runtimes/runner/BUILD b/test/runtimes/runner/BUILD new file mode 100644 index 000000000..dc0d5d5b4 --- /dev/null +++ b/test/runtimes/runner/BUILD @@ -0,0 +1,22 @@ +load("//tools:defs.bzl", "go_binary", "go_test") + +package(licenses = ["notice"]) + +go_binary( + name = "runner", + testonly = 1, + srcs = ["main.go"], + visibility = ["//test/runtimes:__pkg__"], + deps = [ + "//pkg/log", + "//pkg/test/dockerutil", + "//pkg/test/testutil", + ], +) + +go_test( + name = "exclude_test", + size = "small", + srcs = ["exclude_test.go"], + library = ":runner", +) diff --git a/test/runtimes/runner/exclude_test.go b/test/runtimes/runner/exclude_test.go new file mode 100644 index 000000000..67c2170c8 --- /dev/null +++ b/test/runtimes/runner/exclude_test.go @@ -0,0 +1,37 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "flag" + "os" + "testing" +) + +func TestMain(m *testing.M) { + flag.Parse() + os.Exit(m.Run()) +} + +// Test that the exclude file parses without error. +func TestExcludelist(t *testing.T) { + ex, err := getExcludes() + if err != nil { + t.Fatalf("error parsing exclude file: %v", err) + } + if *excludeFile != "" && len(ex) == 0 { + t.Errorf("got empty excludes for file %q", *excludeFile) + } +} diff --git a/test/runtimes/runner/main.go b/test/runtimes/runner/main.go new file mode 100644 index 000000000..e230912c9 --- /dev/null +++ b/test/runtimes/runner/main.go @@ -0,0 +1,205 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Binary runner runs the runtime tests in a Docker container. +package main + +import ( + "context" + "encoding/csv" + "flag" + "fmt" + "io" + "os" + "sort" + "strings" + "testing" + "time" + + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/pkg/test/testutil" +) + +var ( + lang = flag.String("lang", "", "language runtime to test") + image = flag.String("image", "", "docker image with runtime tests") + excludeFile = flag.String("exclude_file", "", "file containing list of tests to exclude, in CSV format with fields: test name, bug id, comment") + batchSize = flag.Int("batch", 50, "number of test cases run in one command") +) + +// Wait time for each test to run. +const timeout = 45 * time.Minute + +func main() { + flag.Parse() + if *lang == "" || *image == "" { + fmt.Fprintf(os.Stderr, "lang and image flags must not be empty\n") + os.Exit(1) + } + os.Exit(runTests()) +} + +// runTests is a helper that is called by main. It exists so that we can run +// defered functions before exiting. It returns an exit code that should be +// passed to os.Exit. +func runTests() int { + // Get tests to exclude.. + excludes, err := getExcludes() + if err != nil { + fmt.Fprintf(os.Stderr, "Error getting exclude list: %s\n", err.Error()) + return 1 + } + + // Construct the shared docker instance. + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, testutil.DefaultLogger(*lang)) + defer d.CleanUp(ctx) + + if err := testutil.TouchShardStatusFile(); err != nil { + fmt.Fprintf(os.Stderr, "error touching status shard file: %v\n", err) + return 1 + } + + // Get a slice of tests to run. This will also start a single Docker + // container that will be used to run each test. The final test will + // stop the Docker container. + tests, err := getTests(ctx, d, excludes) + if err != nil { + fmt.Fprintf(os.Stderr, "%s\n", err.Error()) + return 1 + } + + m := testing.MainStart(testDeps{}, tests, nil, nil) + return m.Run() +} + +// getTests executes all tests as table tests. +func getTests(ctx context.Context, d *dockerutil.Container, excludes map[string]struct{}) ([]testing.InternalTest, error) { + // Start the container. + opts := dockerutil.RunOpts{ + Image: fmt.Sprintf("runtimes/%s", *image), + } + d.CopyFiles(&opts, "/proctor", "test/runtimes/proctor/proctor") + if err := d.Spawn(ctx, opts, "/proctor/proctor", "--pause"); err != nil { + return nil, fmt.Errorf("docker run failed: %v", err) + } + + // Get a list of all tests in the image. + list, err := d.Exec(ctx, dockerutil.ExecOpts{}, "/proctor/proctor", "--runtime", *lang, "--list") + if err != nil { + return nil, fmt.Errorf("docker exec failed: %v", err) + } + + // Calculate a subset of tests to run corresponding to the current + // shard. + tests := strings.Fields(list) + sort.Strings(tests) + indices, err := testutil.TestIndicesForShard(len(tests)) + if err != nil { + return nil, fmt.Errorf("TestsForShard() failed: %v", err) + } + + var itests []testing.InternalTest + for i := 0; i < len(indices); i += *batchSize { + var tcs []string + end := i + *batchSize + if end > len(indices) { + end = len(indices) + } + for _, tc := range indices[i:end] { + // Add test if not excluded. + if _, ok := excludes[tests[tc]]; ok { + log.Infof("Skipping test case %s\n", tests[tc]) + continue + } + tcs = append(tcs, tests[tc]) + } + itests = append(itests, testing.InternalTest{ + Name: strings.Join(tcs, ", "), + F: func(t *testing.T) { + var ( + now = time.Now() + done = make(chan struct{}) + output string + err error + ) + + go func() { + fmt.Printf("RUNNING the following in a batch\n%s\n", strings.Join(tcs, "\n")) + output, err = d.Exec(ctx, dockerutil.ExecOpts{}, "/proctor/proctor", "--runtime", *lang, "--tests", strings.Join(tcs, ",")) + close(done) + }() + + select { + case <-done: + if err == nil { + fmt.Printf("PASS: (%v)\n\n", time.Since(now)) + return + } + t.Errorf("FAIL: (%v):\n%s\n", time.Since(now), output) + case <-time.After(timeout): + t.Errorf("TIMEOUT: (%v):\n%s\n", time.Since(now), output) + } + }, + }) + } + + return itests, nil +} + +// getBlacklist reads the exclude file and returns a set of test names to +// exclude. +func getExcludes() (map[string]struct{}, error) { + excludes := make(map[string]struct{}) + if *excludeFile == "" { + return excludes, nil + } + f, err := os.Open(*excludeFile) + if err != nil { + return nil, err + } + defer f.Close() + + r := csv.NewReader(f) + + // First line is header. Skip it. + if _, err := r.Read(); err != nil { + return nil, err + } + + for { + record, err := r.Read() + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + excludes[record[0]] = struct{}{} + } + return excludes, nil +} + +// testDeps implements testing.testDeps (an unexported interface), and is +// required to use testing.MainStart. +type testDeps struct{} + +func (f testDeps) MatchString(a, b string) (bool, error) { return a == b, nil } +func (f testDeps) StartCPUProfile(io.Writer) error { return nil } +func (f testDeps) StopCPUProfile() {} +func (f testDeps) WriteProfileTo(string, io.Writer, int) error { return nil } +func (f testDeps) ImportPath() string { return "" } +func (f testDeps) StartTestLog(io.Writer) {} +func (f testDeps) StopTestLog() error { return nil } -- cgit v1.2.3 From 40826b0557fc3af642236c427863d25239e7c620 Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Fri, 17 Jul 2020 11:17:20 -0700 Subject: DUT logs in test failure messages in packetimpact DUT logs will include logs from the posix server and gVisor, which provides a way to instrument the DUT during test failures. PiperOrigin-RevId: 321816647 --- test/packetimpact/runner/packetimpact_test.go | 29 +++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) (limited to 'test') diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go index 1a99719c2..ff5f5c7f1 100644 --- a/test/packetimpact/runner/packetimpact_test.go +++ b/test/packetimpact/runner/packetimpact_test.go @@ -308,12 +308,29 @@ func TestOne(t *testing.T) { "--device", testNetDev, "--dut_type", *dutPlatform, ) - logs, err := testbench.Exec(ctx, dockerutil.ExecOpts{}, testArgs...) - if !*expectFailure && err != nil { - t.Fatalf("test failed: %v, logs from testbench:\n%s", err, logs) - } - if *expectFailure && err == nil { - t.Fatalf("test failure expected but the test succeeded, enable the test and mark the corresponding bug as fixed, logs from testbench:\n%s", logs) + testbenchLogs, err := testbench.Exec(ctx, dockerutil.ExecOpts{}, testArgs...) + if (err != nil) != *expectFailure { + var dutLogs string + if logs, err := dut.Logs(ctx); err != nil { + dutLogs = fmt.Sprintf("failed to fetch DUT logs: %s", err) + } else { + dutLogs = logs + } + + t.Errorf(`test error: %v, expect failure: %t + +====== Begin of DUT Logs ====== + +%s + +====== End of DUT Logs ====== + +====== Begin of Testbench Logs ====== + +%s + +====== End of Testbench Logs ======`, + err, *expectFailure, dutLogs, testbenchLogs) } } -- cgit v1.2.3 From e3c2bd51a1a970991cce71d6994bb053c546e538 Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Fri, 17 Jul 2020 16:13:44 -0700 Subject: Move main methods for benchmark packages main package file. PiperOrigin-RevId: 321875119 --- test/benchmarks/fs/BUILD | 7 +++---- test/benchmarks/fs/bazel_test.go | 9 --------- test/benchmarks/fs/fs.go | 15 +++++++++++++++ test/benchmarks/network/BUILD | 1 + test/benchmarks/network/iperf_test.go | 9 --------- test/benchmarks/network/network.go | 15 +++++++++++++++ 6 files changed, 34 insertions(+), 22 deletions(-) (limited to 'test') diff --git a/test/benchmarks/fs/BUILD b/test/benchmarks/fs/BUILD index 606331895..2874cdbb3 100644 --- a/test/benchmarks/fs/BUILD +++ b/test/benchmarks/fs/BUILD @@ -4,7 +4,9 @@ package(licenses = ["notice"]) go_library( name = "fs", + testonly = 1, srcs = ["fs.go"], + deps = ["//test/benchmarks/harness"], ) go_test( @@ -17,8 +19,5 @@ go_test( "local", "manual", ], - deps = [ - "//pkg/test/dockerutil", - "//test/benchmarks/harness", - ], + deps = ["//pkg/test/dockerutil"], ) diff --git a/test/benchmarks/fs/bazel_test.go b/test/benchmarks/fs/bazel_test.go index b7915e19d..fdcac1a7a 100644 --- a/test/benchmarks/fs/bazel_test.go +++ b/test/benchmarks/fs/bazel_test.go @@ -15,16 +15,12 @@ package fs import ( "context" - "os" "strings" "testing" "gvisor.dev/gvisor/pkg/test/dockerutil" - "gvisor.dev/gvisor/test/benchmarks/harness" ) -var h harness.Harness - // Note: CleanCache versions of this test require running with root permissions. func BenchmarkABSL(b *testing.B) { // Get a machine from the Harness on which to run. @@ -97,8 +93,3 @@ func BenchmarkABSL(b *testing.B) { }) } } - -func TestMain(m *testing.M) { - h.Init() - os.Exit(m.Run()) -} diff --git a/test/benchmarks/fs/fs.go b/test/benchmarks/fs/fs.go index 27eb6c56a..e5ca28c3b 100644 --- a/test/benchmarks/fs/fs.go +++ b/test/benchmarks/fs/fs.go @@ -14,3 +14,18 @@ // Package fs holds benchmarks around filesystem performance. package fs + +import ( + "os" + "testing" + + "gvisor.dev/gvisor/test/benchmarks/harness" +) + +var h harness.Harness + +// TestMain is the main method for package fs. +func TestMain(m *testing.M) { + h.Init() + os.Exit(m.Run()) +} diff --git a/test/benchmarks/network/BUILD b/test/benchmarks/network/BUILD index ea78416cf..16d267bc8 100644 --- a/test/benchmarks/network/BUILD +++ b/test/benchmarks/network/BUILD @@ -6,6 +6,7 @@ go_library( name = "network", testonly = 1, srcs = ["network.go"], + deps = ["//test/benchmarks/harness"], ) go_test( diff --git a/test/benchmarks/network/iperf_test.go b/test/benchmarks/network/iperf_test.go index 48cc9dd8f..664e0797e 100644 --- a/test/benchmarks/network/iperf_test.go +++ b/test/benchmarks/network/iperf_test.go @@ -16,7 +16,6 @@ package network import ( "context" "fmt" - "os" "regexp" "strconv" "strings" @@ -26,8 +25,6 @@ import ( "gvisor.dev/gvisor/test/benchmarks/harness" ) -var h harness.Harness - func BenchmarkIperf(b *testing.B) { // Get two machines @@ -144,10 +141,4 @@ TCP window size: 45.0 KByte (default) if err != nil || bandwidth != 45900 { t.Fatalf("failed with: %v and %f", err, bandwidth) } - -} - -func TestMain(m *testing.M) { - h.Init() - os.Exit(m.Run()) } diff --git a/test/benchmarks/network/network.go b/test/benchmarks/network/network.go index f480b5bcd..ce17ddb94 100644 --- a/test/benchmarks/network/network.go +++ b/test/benchmarks/network/network.go @@ -14,3 +14,18 @@ // Package network holds benchmarks around raw network performance. package network + +import ( + "os" + "testing" + + "gvisor.dev/gvisor/test/benchmarks/harness" +) + +var h harness.Harness + +// TestMain is the main method for package network. +func TestMain(m *testing.M) { + h.Init() + os.Exit(m.Run()) +} -- cgit v1.2.3 From 40acd22bc81e51db6e13e9db38d9a8264cf56e35 Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Fri, 17 Jul 2020 17:17:56 -0700 Subject: Runtime tests are enormous PiperOrigin-RevId: 321885126 --- test/runtimes/defs.bzl | 1 + 1 file changed, 1 insertion(+) (limited to 'test') diff --git a/test/runtimes/defs.bzl b/test/runtimes/defs.bzl index 399060fa4..5779b9591 100644 --- a/test/runtimes/defs.bzl +++ b/test/runtimes/defs.bzl @@ -65,6 +65,7 @@ def runtime_test(name, **kwargs): "local", "manual", ], + size = "enormous", **kwargs ) -- cgit v1.2.3 From e1a04f84e864b9a5c8a51a7cdd32f8db5377aff1 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Mon, 20 Jul 2020 18:03:04 -0700 Subject: Add standard entrypoints for test targets. PiperOrigin-RevId: 322265513 --- Makefile | 158 ++++++++++++++++++++++++------ images/Makefile | 7 ++ images/basic/hostoverlaytest/Dockerfile | 7 ++ images/basic/hostoverlaytest/test.c | 88 +++++++++++++++++ images/basic/hostoverlaytest/testfile.txt | 1 + images/basic/tmpfile/Dockerfile | 4 + images/hostoverlaytest/Dockerfile | 7 -- images/hostoverlaytest/test.c | 88 ----------------- images/hostoverlaytest/testfile.txt | 1 - images/tmpfile/Dockerfile | 4 - pkg/test/criutil/criutil.go | 8 +- test/e2e/integration_test.go | 29 +++--- test/image/image_test.go | 21 ++-- test/packetdrill/defs.bzl | 12 ++- test/packetimpact/runner/defs.bzl | 15 ++- test/root/crictl_test.go | 7 +- tools/bazel.mk | 27 +++-- 17 files changed, 312 insertions(+), 172 deletions(-) create mode 100644 images/basic/hostoverlaytest/Dockerfile create mode 100644 images/basic/hostoverlaytest/test.c create mode 100644 images/basic/hostoverlaytest/testfile.txt create mode 100644 images/basic/tmpfile/Dockerfile delete mode 100644 images/hostoverlaytest/Dockerfile delete mode 100644 images/hostoverlaytest/test.c delete mode 100644 images/hostoverlaytest/testfile.txt delete mode 100644 images/tmpfile/Dockerfile (limited to 'test') diff --git a/Makefile b/Makefile index 599cb350f..9806e9901 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Helpful pretty-printer. +MAKEBANNER := \033[1;34mmake\033[0m +submake = echo -e '$(MAKEBANNER) $1' >&2; sh -c '$(MAKE) $1' + # Described below. OPTIONS := STARTUP_OPTIONS := @@ -85,7 +89,7 @@ endif ## define images $(1)-%: ## Image tool: $(1) a given image (also may use 'all-images'). - @$(MAKE) -C images $$@ + @$(call submake,-C images $$@) endef rebuild-...: ## Rebuild the given image. Also may use 'rebuild-all-images'. $(eval $(call images,rebuild)) @@ -96,7 +100,7 @@ $(eval $(call images,push)) load-...: ## Load (pull or rebuild) the given image. Also may use 'load-all-images'. $(eval $(call images,load)) list-images: ## List all available images. - @$(MAKE) -C images $$@ + @$(call submake, -C images $$@) ## ## Canonical build and test targets. @@ -106,21 +110,113 @@ list-images: ## List all available images. ## new subsystem or workflow, consider adding a new target here. ## runsc: ## Builds the runsc binary. - @$(MAKE) build TARGETS="//runsc" + @$(call submake,build OPTIONS="-c opt" TARGETS="//runsc") .PHONY: runsc +debian: ## Builds the debian packages. + @$(call submake,build OPTIONS="-c opt" TARGETS="//runsc:runsc-debian") +.PHONY: debian + smoke-test: ## Runs a simple smoke test after build runsc. - @$(MAKE) run DOCKER_PRIVILEGED="" ARGS="--alsologtostderr --network none --debug --TESTONLY-unsafe-nonroot=true --rootless do true" + @$(call submake,run DOCKER_PRIVILEGED="" ARGS="--alsologtostderr --network none --debug --TESTONLY-unsafe-nonroot=true --rootless do true") .PHONY: smoke-tests -unit-tests: ## Runs all unit tests in pkg runsc and tools. - @$(MAKE) test OPTIONS="pkg/... runsc/... tools/..." -.PHONY: unit-tests +unit-tests: ## Local package unit tests in pkg/..., runsc/, tools/.., etc. + @$(call submake,test TARGETS="pkg/... runsc/... tools/... benchmarks/... benchmarks/runner:runner_test") -tests: ## Runs all local ptrace system call tests. - @$(MAKE) test OPTIONS="--test_tag_filters runsc_ptrace test/syscalls/..." +tests: ## Runs all unit tests and syscall tests. +tests: unit-tests + @$(call submake,test TARGETS="test/syscalls/...") .PHONY: tests + +integration-tests: ## Run all standard integration tests. +integration-tests: docker-tests overlay-tests hostnet-tests swgso-tests +integration-tests: do-tests kvm-tests root-tests containerd-tests +.PHONY: integration-tests + +network-tests: ## Run all networking integration tests. +network-tests: iptables-tests packetdrill-tests packetimpact-tests +.PHONY: network-tests + +# Standard integration targets. +INTEGRATION_TARGETS := //test/image:image_test //test/e2e:integration_test + +syscall-%-tests: + @$(call submake,test OPTIONS="--test_tag_filters runsc_$* test/syscalls/...") + +syscall-native-tests: + @$(call submake,test OPTIONS="--test_tag_filters native test/syscalls/...") +.PHONY: syscall-native-tests + +syscall-tests: ## Run all system call tests. +syscall-tests: syscall-ptrace-tests syscall-kvm-tests syscall-native-tests +.PHONY: syscall-tests + +%-runtime-tests: load-runtimes_% + @$(call submake,install-test-runtime) + @$(call submake,test-runtime TARGETS="//test/runtimes:$*") + +do-tests: runsc + @$(call submake,run TARGETS="//runsc" ARGS="--rootless do true") + @$(call submake,run TARGETS="//runsc" ARGS="--rootless -network=none do true") + @$(call submake,sudo TARGETS="//runsc" ARGS="do true") +.PHONY: do-tests + +simple-tests: unit-tests # Compatibility target. +.PHONY: simple-tests + +docker-tests: load-basic-images + @$(call submake,install-test-runtime RUNTIME="vfs1") + @$(call submake,test-runtime RUNTIME="vfs1" TARGETS="$(INTEGRATION_TARGETS)") + @$(call submake,install-test-runtime RUNTIME="vfs2" ARGS="--vfs2") + @$(call submake,test-runtime RUNTIME="vfs2" OPTIONS="--test_filter=.*TestHelloWorld" TARGETS="$(INTEGRATION_TARGETS)") +.PHONY: docker-tests + +overlay-tests: load-basic-images + @$(call submake,install-test-runtime RUNTIME="overlay" ARGS="--overlay") + @$(call submake,test-runtime RUNTIME="overlay" TARGETS="$(INTEGRATION_TARGETS)") +.PHONY: overlay-tests + +swgso-tests: load-basic-images + @$(call submake,install-test-runtime RUNTIME="swgso" ARGS="--software-gso=true --gso=false") + @$(call submake,test-runtime RUNTIME="swgso" TARGETS="$(INTEGRATION_TARGETS)") +.PHONY: swgso-tests + +hostnet-tests: load-basic-images + @$(call submake,install-test-runtime RUNTIME="hostnet" ARGS="--network=host") + @$(call submake,test-runtime RUNTIME="hostnet" OPTIONS="--test_arg=-checkpoint=false" TARGETS="$(INTEGRATION_TARGETS)") +.PHONY: hostnet-tests + +kvm-tests: load-basic-images + @(lsmod | grep -E '^(kvm_intel|kvm_amd)') || sudo modprobe kvm + @if ! [[ -w /dev/kvm ]]; then sudo chmod a+rw /dev/kvm; fi + @$(call submake,test TARGETS="//pkg/sentry/platform/kvm:kvm_test") + @$(call submake,install-test-runtime RUNTIME="kvm" ARGS="--platform=kvm") + @$(call submake,test-runtime RUNTIME="kvm" TARGETS="$(INTEGRATION_TARGETS)") +.PHONY: kvm-tests + +iptables-tests: load-iptables + @$(call submake,test-runtime RUNTIME="runc" TARGETS="//test/iptables:iptables_test") + @$(call submake,install-test-runtime RUNTIME="iptables" ARGS="--net-raw") + @$(call submake,test-runtime RUNTIME="iptables" TARGETS="//test/iptables:iptables_test") +.PHONY: iptables-tests + +packetdrill-tests: load-packetdrill + @$(call submake,install-test-runtime RUNTIME="packetdrill") + @$(call submake,test-runtime RUNTIME="packetdrill" TARGETS="$(shell $(MAKE) query TARGETS='attr(tags, packetdrill, tests(//...))')") +.PHONY: packetdrill-tests + +packetimpact-tests: load-packetimpact + @$(call submake,install-test-runtime RUNTIME="packetimpact") + @$(call submake,test-runtime RUNTIME="packetimpact" TARGETS="$(shell $(MAKE) query TARGETS='attr(tags, packetimpact, tests(//...))')") +.PHONY: packetimpact-tests + +root-tests: load-basic-images + @$(call submake,install-test-runtime) + @$(call submake,sudo TARGETS="//test/root:root_test" ARGS="-test.v") +.PHONY: test-root + # Specific containerd version tests. containerd-test-%: load-basic_alpine load-basic_python load-basic_busybox load-basic_resolv load-basic_httpd install-test-runtime @CONTAINERD_VERSION=$* $(MAKE) sudo TARGETS="tools/installers:containerd" @@ -154,7 +250,7 @@ WEBSITE_PROJECT := gvisordev WEBSITE_REGION := us-central1 website-build: load-jekyll ## Build the site image locally. - @$(MAKE) run TARGETS="//website:website" + @$(call submake,run TARGETS="//website:website") .PHONY: website-build website-server: website-build ## Run a local server for development. @@ -205,8 +301,8 @@ $(RELEASE_KEY): release: $(RELEASE_KEY) ## Builds a release. @mkdir -p $(RELEASE_ROOT) @T=$$(mktemp -d /tmp/release.XXXXXX); \ - $(MAKE) copy TARGETS="runsc" DESTINATION=$$T && \ - $(MAKE) copy TARGETS="runsc:runsc-debian" DESTINATION=$$T && \ + $(call submake,copy TARGETS="runsc" DESTINATION=$$T) && \ + $(call submake,copy TARGETS="runsc:runsc-debian" DESTINATION=$$T) && \ NIGHTLY=$(RELEASE_NIGHTLY) tools/make_release.sh $(RELEASE_KEY) $(RELEASE_ROOT) $$T/*; \ rc=$$?; rm -rf $$T; exit $$rc .PHONY: release @@ -229,43 +325,47 @@ tag: ## Creates and pushes a release tag. ## ifeq (,$(BRANCH_NAME)) RUNTIME := runsc -RUNTIME_DIR := $(shell dirname $(shell mktemp -u))/runsc +RUNTIME_DIR := $(shell dirname $(shell mktemp -u))/$(RUNTIME) else RUNTIME := $(BRANCH_NAME) -RUNTIME_DIR := $(shell dirname $(shell mktemp -u))/$(BRANCH_NAME) +RUNTIME_DIR := $(shell dirname $(shell mktemp -u))/$(RUNTIME) endif RUNTIME_BIN := $(RUNTIME_DIR)/runsc RUNTIME_LOG_DIR := $(RUNTIME_DIR)/logs RUNTIME_LOGS := $(RUNTIME_LOG_DIR)/runsc.log.%TEST%.%TIMESTAMP%.%COMMAND% dev: ## Installs a set of local runtimes. Requires sudo. - @$(MAKE) refresh ARGS="--net-raw" - @$(MAKE) configure RUNTIME="$(RUNTIME)" ARGS="--net-raw" - @$(MAKE) configure RUNTIME="$(RUNTIME)-d" ARGS="--net-raw --debug --strace --log-packets" - @$(MAKE) configure RUNTIME="$(RUNTIME)-p" ARGS="--net-raw --profile" - @$(MAKE) configure RUNTIME="$(RUNTIME)-vfs2-d" ARGS="--net-raw --debug --strace --log-packets --vfs2" + @$(call submake,refresh ARGS="--net-raw") + @$(call submake,configure RUNTIME="$(RUNTIME)" ARGS="--net-raw") + @$(call submake,configure RUNTIME="$(RUNTIME)-d" ARGS="--net-raw --debug --strace --log-packets") + @$(call submake,configure RUNTIME="$(RUNTIME)-p" ARGS="--net-raw --profile") + @$(call submake,configure RUNTIME="$(RUNTIME)-vfs2-d" ARGS="--net-raw --debug --strace --log-packets --vfs2") @sudo systemctl restart docker .PHONY: dev -refresh: ## Refreshes the runtime binary (for development only). Must have called 'dev' or 'test-install' first. +refresh: ## Refreshes the runtime binary (for development only). Must have called 'dev' or 'install-test-runtime' first. @mkdir -p "$(RUNTIME_DIR)" - @$(MAKE) copy TARGETS=runsc DESTINATION="$(RUNTIME_BIN)" + @$(call submake,copy TARGETS=runsc DESTINATION="$(RUNTIME_BIN)") .PHONY: install install-test-runtime: ## Installs the runtime for testing. Requires sudo. - @$(MAKE) refresh ARGS="--net-raw --TESTONLY-test-name-env=RUNSC_TEST_NAME --debug --strace --log-packets $(ARGS)" - @$(MAKE) configure RUNTIME=runsc - @$(MAKE) configure + @$(call submake,refresh ARGS="--net-raw --TESTONLY-test-name-env=RUNSC_TEST_NAME --debug --strace --log-packets $(ARGS)") + @$(call submake,configure RUNTIME=runsc) + @$(call submake,configure) @sudo systemctl restart docker + @if [[ -f /etc/docker/daemon.json ]]; then \ + sudo chmod 0755 /etc/docker && \ + sudo chmod 0644 /etc/docker/daemon.json; \ + fi .PHONY: install-test-runtime -configure: ## Configures a single runtime. Requires sudo. Typically called from dev or test-install. +configure: ## Configures a single runtime. Requires sudo. Typically called from dev or install-test-runtime. @sudo sudo "$(RUNTIME_BIN)" install --experimental=true --runtime="$(RUNTIME)" -- --debug-log "$(RUNTIME_LOGS)" $(ARGS) - @echo "Installed runtime \"$(RUNTIME)\" @ $(RUNTIME_BIN)" - @echo "Logs are in: $(RUNTIME_LOG_DIR)" + @echo -e "$(INFO) Installed runtime \"$(RUNTIME)\" @ $(RUNTIME_BIN)" + @echo -e "$(INFO) Logs are in: $(RUNTIME_LOG_DIR)" @sudo rm -rf "$(RUNTIME_LOG_DIR)" && mkdir -p "$(RUNTIME_LOG_DIR)" .PHONY: configure test-runtime: ## A convenient wrapper around test that provides the runtime argument. Target must still be provided. - @$(MAKE) test OPTIONS="$(OPTIONS) --test_arg=--runtime=$(RUNTIME)" -.PHONY: runtime-test + @$(call submake,test OPTIONS="$(OPTIONS) --test_output=streamed --test_arg=--runtime=$(RUNTIME)") +.PHONY: test-runtime diff --git a/images/Makefile b/images/Makefile index 1485607bd..9de359a28 100644 --- a/images/Makefile +++ b/images/Makefile @@ -34,8 +34,15 @@ list-all-images: @for image in $(ALL_IMAGES); do echo $${image}; done .PHONY: list-build-images +# Handy wrapper to allow load-all-images, push-all-images, etc. %-all-images: @$(MAKE) $(patsubst %,$*-%,$(ALL_IMAGES)) +load-all-images: + @$(MAKE) $(patsubst %,load-%,$(ALL_IMAGES)) + +# Handy wrapper to load specified "groups", e.g. load-basic-images, etc. +load-%-images: + @$(MAKE) $(patsubst %,load-%,$(subst /,_,$(subst ./,,$(shell find ./$* -name Dockerfile -exec dirname {} \;)))) # tag is a function that returns the tag name, given an image. # diff --git a/images/basic/hostoverlaytest/Dockerfile b/images/basic/hostoverlaytest/Dockerfile new file mode 100644 index 000000000..d83439e9c --- /dev/null +++ b/images/basic/hostoverlaytest/Dockerfile @@ -0,0 +1,7 @@ +FROM ubuntu:bionic + +WORKDIR /root +COPY . . + +RUN apt-get update && apt-get install -y gcc +RUN gcc -O2 -o test test.c diff --git a/images/basic/hostoverlaytest/test.c b/images/basic/hostoverlaytest/test.c new file mode 100644 index 000000000..088f90746 --- /dev/null +++ b/images/basic/hostoverlaytest/test.c @@ -0,0 +1,88 @@ +#include +#include +#include +#include +#include +#include + +int main(int argc, char** argv) { + const char kTestFilePath[] = "testfile.txt"; + const char kOldFileData[] = "old data\n"; + const char kNewFileData[] = "new data\n"; + const size_t kPageSize = sysconf(_SC_PAGE_SIZE); + + // Open a file that already exists in a host overlayfs lower layer. + const int fd_rdonly = open(kTestFilePath, O_RDONLY); + if (fd_rdonly < 0) { + err(1, "open(%s, O_RDONLY)", kTestFilePath); + } + + // Check that the file's initial contents are what we expect when read via + // syscall. + char oldbuf[sizeof(kOldFileData)] = {}; + ssize_t n = pread(fd_rdonly, oldbuf, sizeof(oldbuf), 0); + if (n < 0) { + err(1, "initial pread"); + } + if (n != strlen(kOldFileData)) { + errx(1, "short initial pread (%ld/%lu bytes)", n, strlen(kOldFileData)); + } + if (strcmp(oldbuf, kOldFileData) != 0) { + errx(1, "initial pread returned wrong data: %s", oldbuf); + } + + // Check that the file's initial contents are what we expect when read via + // memory mapping. + void* page = mmap(NULL, kPageSize, PROT_READ, MAP_SHARED, fd_rdonly, 0); + if (page == MAP_FAILED) { + err(1, "mmap"); + } + if (strcmp(page, kOldFileData) != 0) { + errx(1, "mapping contains wrong initial data: %s", (const char*)page); + } + + // Open the same file writably, causing host overlayfs to copy it up, and + // replace its contents. + const int fd_rdwr = open(kTestFilePath, O_RDWR); + if (fd_rdwr < 0) { + err(1, "open(%s, O_RDWR)", kTestFilePath); + } + n = write(fd_rdwr, kNewFileData, strlen(kNewFileData)); + if (n < 0) { + err(1, "write"); + } + if (n != strlen(kNewFileData)) { + errx(1, "short write (%ld/%lu bytes)", n, strlen(kNewFileData)); + } + if (ftruncate(fd_rdwr, strlen(kNewFileData)) < 0) { + err(1, "truncate"); + } + + int failed = 0; + + // Check that syscalls on the old FD return updated contents. (Before Linux + // 4.18, this requires that runsc use a post-copy-up FD to service the read.) + char newbuf[sizeof(kNewFileData)] = {}; + n = pread(fd_rdonly, newbuf, sizeof(newbuf), 0); + if (n < 0) { + err(1, "final pread"); + } + if (n != strlen(kNewFileData)) { + warnx("short final pread (%ld/%lu bytes)", n, strlen(kNewFileData)); + failed = 1; + } else if (strcmp(newbuf, kNewFileData) != 0) { + warnx("final pread returned wrong data: %s", newbuf); + failed = 1; + } + + // Check that the memory mapping of the old FD has been updated. (Linux + // overlayfs does not do this, so regardless of kernel version this requires + // that runsc replace existing memory mappings with mappings of a + // post-copy-up FD.) + if (strcmp(page, kNewFileData) != 0) { + warnx("mapping contains wrong final data: %s", (const char*)page); + failed = 1; + } + + return failed; +} diff --git a/images/basic/hostoverlaytest/testfile.txt b/images/basic/hostoverlaytest/testfile.txt new file mode 100644 index 000000000..e4188c841 --- /dev/null +++ b/images/basic/hostoverlaytest/testfile.txt @@ -0,0 +1 @@ +old data diff --git a/images/basic/tmpfile/Dockerfile b/images/basic/tmpfile/Dockerfile new file mode 100644 index 000000000..e3816c8cb --- /dev/null +++ b/images/basic/tmpfile/Dockerfile @@ -0,0 +1,4 @@ +# Create file under /tmp to ensure files inside '/tmp' are not overridden. +FROM alpine:3.11.5 +RUN mkdir -p /tmp/foo \ + && echo 123 > /tmp/foo/file.txt diff --git a/images/hostoverlaytest/Dockerfile b/images/hostoverlaytest/Dockerfile deleted file mode 100644 index d83439e9c..000000000 --- a/images/hostoverlaytest/Dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -FROM ubuntu:bionic - -WORKDIR /root -COPY . . - -RUN apt-get update && apt-get install -y gcc -RUN gcc -O2 -o test test.c diff --git a/images/hostoverlaytest/test.c b/images/hostoverlaytest/test.c deleted file mode 100644 index 088f90746..000000000 --- a/images/hostoverlaytest/test.c +++ /dev/null @@ -1,88 +0,0 @@ -#include -#include -#include -#include -#include -#include - -int main(int argc, char** argv) { - const char kTestFilePath[] = "testfile.txt"; - const char kOldFileData[] = "old data\n"; - const char kNewFileData[] = "new data\n"; - const size_t kPageSize = sysconf(_SC_PAGE_SIZE); - - // Open a file that already exists in a host overlayfs lower layer. - const int fd_rdonly = open(kTestFilePath, O_RDONLY); - if (fd_rdonly < 0) { - err(1, "open(%s, O_RDONLY)", kTestFilePath); - } - - // Check that the file's initial contents are what we expect when read via - // syscall. - char oldbuf[sizeof(kOldFileData)] = {}; - ssize_t n = pread(fd_rdonly, oldbuf, sizeof(oldbuf), 0); - if (n < 0) { - err(1, "initial pread"); - } - if (n != strlen(kOldFileData)) { - errx(1, "short initial pread (%ld/%lu bytes)", n, strlen(kOldFileData)); - } - if (strcmp(oldbuf, kOldFileData) != 0) { - errx(1, "initial pread returned wrong data: %s", oldbuf); - } - - // Check that the file's initial contents are what we expect when read via - // memory mapping. - void* page = mmap(NULL, kPageSize, PROT_READ, MAP_SHARED, fd_rdonly, 0); - if (page == MAP_FAILED) { - err(1, "mmap"); - } - if (strcmp(page, kOldFileData) != 0) { - errx(1, "mapping contains wrong initial data: %s", (const char*)page); - } - - // Open the same file writably, causing host overlayfs to copy it up, and - // replace its contents. - const int fd_rdwr = open(kTestFilePath, O_RDWR); - if (fd_rdwr < 0) { - err(1, "open(%s, O_RDWR)", kTestFilePath); - } - n = write(fd_rdwr, kNewFileData, strlen(kNewFileData)); - if (n < 0) { - err(1, "write"); - } - if (n != strlen(kNewFileData)) { - errx(1, "short write (%ld/%lu bytes)", n, strlen(kNewFileData)); - } - if (ftruncate(fd_rdwr, strlen(kNewFileData)) < 0) { - err(1, "truncate"); - } - - int failed = 0; - - // Check that syscalls on the old FD return updated contents. (Before Linux - // 4.18, this requires that runsc use a post-copy-up FD to service the read.) - char newbuf[sizeof(kNewFileData)] = {}; - n = pread(fd_rdonly, newbuf, sizeof(newbuf), 0); - if (n < 0) { - err(1, "final pread"); - } - if (n != strlen(kNewFileData)) { - warnx("short final pread (%ld/%lu bytes)", n, strlen(kNewFileData)); - failed = 1; - } else if (strcmp(newbuf, kNewFileData) != 0) { - warnx("final pread returned wrong data: %s", newbuf); - failed = 1; - } - - // Check that the memory mapping of the old FD has been updated. (Linux - // overlayfs does not do this, so regardless of kernel version this requires - // that runsc replace existing memory mappings with mappings of a - // post-copy-up FD.) - if (strcmp(page, kNewFileData) != 0) { - warnx("mapping contains wrong final data: %s", (const char*)page); - failed = 1; - } - - return failed; -} diff --git a/images/hostoverlaytest/testfile.txt b/images/hostoverlaytest/testfile.txt deleted file mode 100644 index e4188c841..000000000 --- a/images/hostoverlaytest/testfile.txt +++ /dev/null @@ -1 +0,0 @@ -old data diff --git a/images/tmpfile/Dockerfile b/images/tmpfile/Dockerfile deleted file mode 100644 index e3816c8cb..000000000 --- a/images/tmpfile/Dockerfile +++ /dev/null @@ -1,4 +0,0 @@ -# Create file under /tmp to ensure files inside '/tmp' are not overridden. -FROM alpine:3.11.5 -RUN mkdir -p /tmp/foo \ - && echo 123 > /tmp/foo/file.txt diff --git a/pkg/test/criutil/criutil.go b/pkg/test/criutil/criutil.go index 66f10c016..70945f234 100644 --- a/pkg/test/criutil/criutil.go +++ b/pkg/test/criutil/criutil.go @@ -40,9 +40,9 @@ type Crictl struct { cleanup []func() } -// resolvePath attempts to find binary paths. It may set the path to invalid, +// ResolvePath attempts to find binary paths. It may set the path to invalid, // which will cause the execution to fail with a sensible error. -func resolvePath(executable string) string { +func ResolvePath(executable string) string { runtime, err := dockerutil.RuntimePath() if err == nil { // Check first the directory of the runtime itself. @@ -230,7 +230,7 @@ func (cc *Crictl) Import(image string) error { // be pushing a lot of bytes in order to import the image. The connect // timeout stays the same and is inherited from the Crictl instance. cmd := testutil.Command(cc.logger, - resolvePath("ctr"), + ResolvePath("ctr"), fmt.Sprintf("--connect-timeout=%s", 30*time.Second), fmt.Sprintf("--address=%s", cc.endpoint), "-n", "k8s.io", "images", "import", "-") @@ -358,7 +358,7 @@ func (cc *Crictl) StopPodAndContainer(podID, contID string) error { // run runs crictl with the given args. func (cc *Crictl) run(args ...string) (string, error) { defaultArgs := []string{ - resolvePath("crictl"), + ResolvePath("crictl"), "--image-endpoint", fmt.Sprintf("unix://%s", cc.endpoint), "--runtime-endpoint", fmt.Sprintf("unix://%s", cc.endpoint), } diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go index 5a9455b33..ef42b689a 100644 --- a/test/e2e/integration_test.go +++ b/test/e2e/integration_test.go @@ -40,6 +40,9 @@ import ( "gvisor.dev/gvisor/pkg/test/testutil" ) +// defaultWait is the default wait time used for tests. +const defaultWait = time.Minute + // httpRequestSucceeds sends a request to a given url and checks that the status is OK. func httpRequestSucceeds(client http.Client, server string, port int) error { url := fmt.Sprintf("http://%s:%d", server, port) @@ -76,10 +79,10 @@ func TestLifeCycle(t *testing.T) { if err != nil { t.Fatalf("docker.FindPort(80) failed: %v", err) } - if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil { + if err := testutil.WaitForHTTP(port, defaultWait); err != nil { t.Fatalf("WaitForHTTP() timeout: %v", err) } - client := http.Client{Timeout: time.Duration(2 * time.Second)} + client := http.Client{Timeout: defaultWait} if err := httpRequestSucceeds(client, "localhost", port); err != nil { t.Errorf("http request failed: %v", err) } @@ -116,12 +119,12 @@ func TestPauseResume(t *testing.T) { } // Wait until it's up and running. - if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil { + if err := testutil.WaitForHTTP(port, defaultWait); err != nil { t.Fatalf("WaitForHTTP() timeout: %v", err) } // Check that container is working. - client := http.Client{Timeout: time.Duration(2 * time.Second)} + client := http.Client{Timeout: defaultWait} if err := httpRequestSucceeds(client, "localhost", port); err != nil { t.Error("http request failed:", err) } @@ -131,6 +134,7 @@ func TestPauseResume(t *testing.T) { } // Check if container is paused. + client = http.Client{Timeout: 10 * time.Millisecond} // Don't wait a minute. switch _, err := client.Get(fmt.Sprintf("http://localhost:%d", port)); v := err.(type) { case nil: t.Errorf("http req expected to fail but it succeeded") @@ -147,11 +151,12 @@ func TestPauseResume(t *testing.T) { } // Wait until it's up and running. - if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil { + if err := testutil.WaitForHTTP(port, defaultWait); err != nil { t.Fatalf("WaitForHTTP() timeout: %v", err) } // Check if container is working again. + client = http.Client{Timeout: defaultWait} if err := httpRequestSucceeds(client, "localhost", port); err != nil { t.Error("http request failed:", err) } @@ -178,12 +183,12 @@ func TestCheckpointRestore(t *testing.T) { if err := d.Checkpoint(ctx, "test"); err != nil { t.Fatalf("docker checkpoint failed: %v", err) } - if err := d.WaitTimeout(ctx, 30*time.Second); err != nil { + if err := d.WaitTimeout(ctx, defaultWait); err != nil { t.Fatalf("wait failed: %v", err) } // TODO(b/143498576): Remove Poll after github.com/moby/moby/issues/38963 is fixed. - if err := testutil.Poll(func() error { return d.Restore(ctx, "test") }, 15*time.Second); err != nil { + if err := testutil.Poll(func() error { return d.Restore(ctx, "test") }, defaultWait); err != nil { t.Fatalf("docker restore failed: %v", err) } @@ -194,12 +199,12 @@ func TestCheckpointRestore(t *testing.T) { } // Wait until it's up and running. - if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil { + if err := testutil.WaitForHTTP(port, defaultWait); err != nil { t.Fatalf("WaitForHTTP() timeout: %v", err) } // Check if container is working again. - client := http.Client{Timeout: time.Duration(2 * time.Second)} + client := http.Client{Timeout: defaultWait} if err := httpRequestSucceeds(client, "localhost", port); err != nil { t.Error("http request failed:", err) } @@ -236,7 +241,7 @@ func TestConnectToSelf(t *testing.T) { if want := "server\n"; reply != want { t.Errorf("Error on server, want: %q, got: %q", want, reply) } - if _, err := d.WaitForOutput(ctx, "^client\n$", 1*time.Second); err != nil { + if _, err := d.WaitForOutput(ctx, "^client\n$", defaultWait); err != nil { t.Fatalf("docker.WaitForOutput(client) timeout: %v", err) } } @@ -375,7 +380,7 @@ func TestTmpFile(t *testing.T) { d := dockerutil.MakeContainer(ctx, t) defer d.CleanUp(ctx) - opts := dockerutil.RunOpts{Image: "tmpfile"} + opts := dockerutil.RunOpts{Image: "basic/tmpfile"} got, err := d.Run(ctx, opts, "cat", "/tmp/foo/file.txt") if err != nil { t.Fatalf("docker run failed: %v", err) @@ -427,7 +432,7 @@ func TestHostOverlayfsCopyUp(t *testing.T) { defer d.CleanUp(ctx) if _, err := d.Run(ctx, dockerutil.RunOpts{ - Image: "hostoverlaytest", + Image: "basic/hostoverlaytest", WorkDir: "/root", }, "./test"); err != nil { t.Fatalf("docker run failed: %v", err) diff --git a/test/image/image_test.go b/test/image/image_test.go index 8aa78035f..ac6186688 100644 --- a/test/image/image_test.go +++ b/test/image/image_test.go @@ -37,6 +37,13 @@ import ( "gvisor.dev/gvisor/pkg/test/testutil" ) +// defaultWait defines how long to wait for progress. +// +// See BUILD: This is at least a "large" test, so allow up to 1 minute for any +// given "wait" step. Note that all tests are run in parallel, which may cause +// individual slow-downs (but a huge speed-up in aggregate). +const defaultWait = time.Minute + func TestHelloWorld(t *testing.T) { ctx := context.Background() d := dockerutil.MakeContainer(ctx, t) @@ -130,7 +137,7 @@ func TestHttpd(t *testing.T) { } // Wait until it's up and running. - if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil { + if err := testutil.WaitForHTTP(port, defaultWait); err != nil { t.Errorf("WaitForHTTP() timeout: %v", err) } @@ -159,7 +166,7 @@ func TestNginx(t *testing.T) { } // Wait until it's up and running. - if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil { + if err := testutil.WaitForHTTP(port, defaultWait); err != nil { t.Errorf("WaitForHTTP() timeout: %v", err) } @@ -180,7 +187,7 @@ func TestMysql(t *testing.T) { } // Wait until it's up and running. - if _, err := server.WaitForOutput(ctx, "port: 3306 MySQL Community Server", 3*time.Minute); err != nil { + if _, err := server.WaitForOutput(ctx, "port: 3306 MySQL Community Server", defaultWait); err != nil { t.Fatalf("WaitForOutput() timeout: %v", err) } @@ -200,7 +207,7 @@ func TestMysql(t *testing.T) { } // Ensure file executed to the end and shutdown mysql. - if _, err := server.WaitForOutput(ctx, "mysqld: Shutdown complete", 30*time.Second); err != nil { + if _, err := server.WaitForOutput(ctx, "mysqld: Shutdown complete", defaultWait); err != nil { t.Fatalf("WaitForOutput() timeout: %v", err) } } @@ -225,7 +232,7 @@ func TestTomcat(t *testing.T) { } // Wait until it's up and running. - if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil { + if err := testutil.WaitForHTTP(port, defaultWait); err != nil { t.Fatalf("WaitForHTTP() timeout: %v", err) } @@ -262,7 +269,7 @@ func TestRuby(t *testing.T) { } // Wait until it's up and running, 'gem install' can take some time. - if err := testutil.WaitForHTTP(port, 1*time.Minute); err != nil { + if err := testutil.WaitForHTTP(port, time.Minute); err != nil { t.Fatalf("WaitForHTTP() timeout: %v", err) } @@ -299,7 +306,7 @@ func TestStdio(t *testing.T) { } for _, want := range []string{wantStdout, wantStderr} { - if _, err := d.WaitForOutput(ctx, want, 5*time.Second); err != nil { + if _, err := d.WaitForOutput(ctx, want, defaultWait); err != nil { t.Fatalf("docker didn't get output %q : %v", want, err) } } diff --git a/test/packetdrill/defs.bzl b/test/packetdrill/defs.bzl index f499c177b..fc28ce9ba 100644 --- a/test/packetdrill/defs.bzl +++ b/test/packetdrill/defs.bzl @@ -26,7 +26,7 @@ def _packetdrill_test_impl(ctx): transitive_files = depset() if hasattr(ctx.attr._test_runner, "data_runfiles"): - transitive_files = depset(ctx.attr._test_runner.data_runfiles.files) + transitive_files = ctx.attr._test_runner.data_runfiles.files runfiles = ctx.runfiles( files = [test_runner] + ctx.files._init_script + ctx.files.scripts, transitive_files = transitive_files, @@ -60,11 +60,15 @@ _packetdrill_test = rule( implementation = _packetdrill_test_impl, ) -_PACKETDRILL_TAGS = ["local", "manual"] +PACKETDRILL_TAGS = [ + "local", + "manual", + "packetdrill", +] def packetdrill_linux_test(name, **kwargs): if "tags" not in kwargs: - kwargs["tags"] = _PACKETDRILL_TAGS + kwargs["tags"] = PACKETDRILL_TAGS _packetdrill_test( name = name, flags = ["--dut_platform", "linux"], @@ -73,7 +77,7 @@ def packetdrill_linux_test(name, **kwargs): def packetdrill_netstack_test(name, **kwargs): if "tags" not in kwargs: - kwargs["tags"] = _PACKETDRILL_TAGS + kwargs["tags"] = PACKETDRILL_TAGS _packetdrill_test( name = name, # This is the default runtime unless diff --git a/test/packetimpact/runner/defs.bzl b/test/packetimpact/runner/defs.bzl index 77cdfea12..79b3c9162 100644 --- a/test/packetimpact/runner/defs.bzl +++ b/test/packetimpact/runner/defs.bzl @@ -55,7 +55,11 @@ _packetimpact_test = rule( implementation = _packetimpact_test_impl, ) -PACKETIMPACT_TAGS = ["local", "manual"] +PACKETIMPACT_TAGS = [ + "local", + "manual", + "packetimpact", +] def packetimpact_linux_test( name, @@ -75,7 +79,7 @@ def packetimpact_linux_test( name = name + "_linux_test", testbench_binary = testbench_binary, flags = ["--dut_platform", "linux"] + expect_failure_flag, - tags = PACKETIMPACT_TAGS + ["packetimpact"], + tags = PACKETIMPACT_TAGS, **kwargs ) @@ -101,7 +105,7 @@ def packetimpact_netstack_test( # This is the default runtime unless # "--test_arg=--runtime=OTHER_RUNTIME" is used to override the value. flags = ["--dut_platform", "netstack", "--runtime=runsc-d"] + expect_failure_flag, - tags = PACKETIMPACT_TAGS + ["packetimpact"], + tags = PACKETIMPACT_TAGS, **kwargs ) @@ -121,7 +125,10 @@ def packetimpact_go_test(name, size = "small", pure = True, expect_linux_failure name = testbench_binary, size = size, pure = pure, - tags = PACKETIMPACT_TAGS, + tags = [ + "local", + "manual", + ], **kwargs ) packetimpact_linux_test( diff --git a/test/root/crictl_test.go b/test/root/crictl_test.go index 193705ab8..df91fa0fe 100644 --- a/test/root/crictl_test.go +++ b/test/root/crictl_test.go @@ -405,11 +405,8 @@ func setup(t *testing.T, version string) (*criutil.Crictl, func(), error) { } // We provide the shim, followed by the runtime, and then a - // temporary root directory. Note that we can safely assume - // that the shim has been installed in the same directory as - // the runtime (for test installs and for normal installs). - // Since this is v1, the binary name will be fixed. - config = fmt.Sprintf(v1Template, path.Join(runtimeDir, "gvisor-containerd-shim"), runtime, runtimeDir) + // temporary root directory. + config = fmt.Sprintf(v1Template, criutil.ResolvePath("gvisor-containerd-shim"), runtime, containerdRoot) case v2: // This is only supported past 1.2. if major < 1 || (major == 1 && minor <= 1) { diff --git a/tools/bazel.mk b/tools/bazel.mk index 9e02af8dc..b6f23fe7e 100644 --- a/tools/bazel.mk +++ b/tools/bazel.mk @@ -23,11 +23,18 @@ BRANCH_NAME := $(shell (git branch --show-current 2>/dev/null || \ USER ?= gvisor HASH ?= $(shell readlink -m $(CURDIR) | md5sum | cut -c1-8) DOCKER_NAME ?= gvisor-bazel-$(HASH) -DOCKER_PRIVILEGED ?= --privileged +DOCKER_PRIVILEGED ?= --privileged --network host BAZEL_CACHE := $(shell readlink -m ~/.cache/bazel/) GCLOUD_CONFIG := $(shell readlink -m ~/.config/gcloud/) DOCKER_SOCKET := /var/run/docker.sock +# Bazel flags. +OPTIONS += --test_output=errors --keep_going --verbose_failures=true +ifneq ($(AUTH_CREDENTIALS),) +OPTIONS += --auth_credentials=${AUTH_CREDENTIALS} --config=remote +endif +BAZEL := bazel $(STARTUP_OPTIONS) + # Non-configurable. UID := $(shell id -u ${USER}) GID := $(shell id -g ${USER}) @@ -38,6 +45,7 @@ FULL_DOCKER_RUN_OPTIONS += -v "$(GCLOUD_CONFIG):$(GCLOUD_CONFIG)" FULL_DOCKER_RUN_OPTIONS += -v "/tmp:/tmp" ifneq ($(DOCKER_PRIVILEGED),) FULL_DOCKER_RUN_OPTIONS += -v "$(DOCKER_SOCKET):$(DOCKER_SOCKET)" +FULL_DOCKER_RUN_OPTIONS += $(DOCKER_PRIVILEGED) DOCKER_GROUP := $(shell stat -c '%g' $(DOCKER_SOCKET)) ifneq ($(GID),$(DOCKER_GROUP)) USERADD_OPTIONS += --groups $(DOCKER_GROUP) @@ -63,6 +71,7 @@ SHELL=/bin/bash -o pipefail bazel-server-start: load-default ## Starts the bazel server. @mkdir -p $(BAZEL_CACHE) @mkdir -p $(GCLOUD_CONFIG) + @if docker ps --all | grep $(DOCKER_NAME); then docker rm $(DOCKER_NAME); fi docker run -d --rm \ --init \ --name $(DOCKER_NAME) \ @@ -75,14 +84,14 @@ bazel-server-start: load-default ## Starts the bazel server. sh -c "groupadd --gid $(GID) --non-unique $(USER) && \ $(GROUPADD_DOCKER) \ useradd --uid $(UID) --non-unique --no-create-home --gid $(GID) $(USERADD_OPTIONS) -d $(HOME) $(USER) && \ - bazel version && \ - exec tail --pid=\$$(bazel info server_pid) -f /dev/null" + $(BAZEL) version && \ + exec tail --pid=\$$($(BAZEL) info server_pid) -f /dev/null" @while :; do if docker logs $(DOCKER_NAME) 2>/dev/null | grep "Build label:" >/dev/null; then break; fi; \ - if ! docker ps | grep $(DOCKER_NAME); then exit 1; else sleep 1; fi; done + if ! docker ps | grep $(DOCKER_NAME); then docker logs $(DOCKER_NAME); exit 1; else sleep 1; fi; done .PHONY: bazel-server-start bazel-shutdown: ## Shuts down a running bazel server. - @docker exec --user $(UID):$(GID) $(DOCKER_NAME) bazel shutdown; rc=$$?; docker kill $(DOCKER_NAME) || [[ $$rc -ne 0 ]] + @docker exec --user $(UID):$(GID) $(DOCKER_NAME) $(BAZEL) shutdown; rc=$$?; docker kill $(DOCKER_NAME) || [[ $$rc -ne 0 ]] .PHONY: bazel-shutdown bazel-alias: ## Emits an alias that can be used within the shell. @@ -93,7 +102,7 @@ bazel-server: ## Ensures that the server exists. Used as an internal target. @docker exec $(DOCKER_NAME) true || $(MAKE) bazel-server-start .PHONY: bazel-server -build_cmd = docker exec --user $(UID):$(GID) -i $(DOCKER_NAME) sh -o pipefail -c 'bazel $(STARTUP_OPTIONS) build $(OPTIONS) $(TARGETS)' +build_cmd = docker exec --user $(UID):$(GID) -i $(DOCKER_NAME) sh -o pipefail -c '$(BAZEL) $(STARTUP_OPTIONS) build $(OPTIONS) $(TARGETS)' build_paths = $(build_cmd) 2>&1 \ | tee /proc/self/fd/2 \ @@ -120,5 +129,9 @@ sudo: bazel-server .PHONY: sudo test: bazel-server - @docker exec --user $(UID):$(GID) -i $(DOCKER_NAME) bazel $(STARTUP_OPTIONS) test $(OPTIONS) $(TARGETS) + @docker exec --user $(UID):$(GID) -i $(DOCKER_NAME) $(BAZEL) test $(OPTIONS) $(TARGETS) .PHONY: test + +query: bazel-server + @docker exec --user $(UID):$(GID) -i $(DOCKER_NAME) $(BAZEL) query $(OPTIONS) '$(TARGETS)' +.PHONY: query -- cgit v1.2.3 From 13c0cca50e061c9b9a3ae8e13e8baa0f29909370 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Wed, 22 Jul 2020 15:10:17 -0700 Subject: Skip RawHDRINCL tests that are blocking presubmits/releases. Temporarily skip these, on bhaskherh@'s advice. PiperOrigin-RevId: 322664955 --- test/syscalls/linux/raw_socket_hdrincl.cc | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/raw_socket_hdrincl.cc b/test/syscalls/linux/raw_socket_hdrincl.cc index 5bb14d57c..97f0467aa 100644 --- a/test/syscalls/linux/raw_socket_hdrincl.cc +++ b/test/syscalls/linux/raw_socket_hdrincl.cc @@ -178,6 +178,9 @@ TEST_F(RawHDRINCL, ConnectToLoopback) { } TEST_F(RawHDRINCL, SendWithoutConnectSucceeds) { + // FIXME(github.dev/issue/3159): Test currently flaky. + SKIP_IF(true); + struct iphdr hdr = LoopbackHeader(); ASSERT_THAT(send(socket_, &hdr, sizeof(hdr), 0), SyscallSucceedsWithValue(sizeof(hdr))); @@ -281,6 +284,9 @@ TEST_F(RawHDRINCL, SendAndReceive) { // Send and receive a packet where the sendto address is not the same as the // provided destination. TEST_F(RawHDRINCL, SendAndReceiveDifferentAddress) { + // FIXME(github.dev/issue/3160): Test currently flaky. + SKIP_IF(true); + int port = 40000; if (!IsRunningOnGvisor()) { port = static_cast(ASSERT_NO_ERRNO_AND_VALUE( -- cgit v1.2.3 From 71bf90c55bd888f9b9c493533ca5e4b2b4b3d21d Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Wed, 22 Jul 2020 15:12:56 -0700 Subject: Support for receiving outbound packets in AF_PACKET. Updates #173 PiperOrigin-RevId: 322665518 --- pkg/abi/linux/socket.go | 9 +++ pkg/sentry/socket/netstack/netstack.go | 19 +++++ pkg/tcpip/link/channel/channel.go | 4 + pkg/tcpip/link/fdbased/endpoint.go | 36 ++++----- pkg/tcpip/link/fdbased/endpoint_test.go | 8 ++ pkg/tcpip/link/loopback/loopback.go | 3 + pkg/tcpip/link/muxed/injectable.go | 4 + pkg/tcpip/link/nested/nested.go | 15 ++++ pkg/tcpip/link/nested/nested_test.go | 4 + pkg/tcpip/link/packetsocket/BUILD | 14 ++++ pkg/tcpip/link/packetsocket/endpoint.go | 50 +++++++++++++ pkg/tcpip/link/qdisc/fifo/endpoint.go | 12 +++ pkg/tcpip/link/sharedmem/sharedmem.go | 21 ++++-- pkg/tcpip/link/sharedmem/sharedmem_test.go | 4 + pkg/tcpip/link/sniffer/sniffer.go | 5 ++ pkg/tcpip/link/tun/device.go | 43 +++++++---- pkg/tcpip/link/waitable/waitable.go | 14 ++++ pkg/tcpip/link/waitable/waitable_test.go | 9 +++ pkg/tcpip/network/ip_test.go | 5 ++ pkg/tcpip/stack/forwarder_test.go | 5 ++ pkg/tcpip/stack/nic.go | 30 ++++++-- pkg/tcpip/stack/nic_test.go | 5 ++ pkg/tcpip/stack/packet_buffer.go | 4 + pkg/tcpip/stack/registration.go | 16 +++- pkg/tcpip/tcpip.go | 25 +++++++ pkg/tcpip/transport/packet/endpoint.go | 52 +++++++++---- runsc/boot/BUILD | 1 + runsc/boot/network.go | 4 + test/syscalls/linux/packet_socket.cc | 116 +++++++++++++++++++++++++++++ 29 files changed, 471 insertions(+), 66 deletions(-) create mode 100644 pkg/tcpip/link/packetsocket/BUILD create mode 100644 pkg/tcpip/link/packetsocket/endpoint.go (limited to 'test') diff --git a/pkg/abi/linux/socket.go b/pkg/abi/linux/socket.go index 4a14ef691..95337c168 100644 --- a/pkg/abi/linux/socket.go +++ b/pkg/abi/linux/socket.go @@ -134,6 +134,15 @@ const ( SHUT_RDWR = 2 ) +// Packet types from +const ( + PACKET_HOST = 0 // To us + PACKET_BROADCAST = 1 // To all + PACKET_MULTICAST = 2 // To group + PACKET_OTHERHOST = 3 // To someone else + PACKET_OUTGOING = 4 // Outgoing of any type +) + // Socket options from socket.h. const ( SO_DEBUG = 1 diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 49a04e613..964ec8414 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -26,6 +26,7 @@ package netstack import ( "bytes" + "fmt" "io" "math" "reflect" @@ -2468,6 +2469,23 @@ func (s *socketOpsCommon) fillCmsgInq(cmsg *socket.ControlMessages) { cmsg.IP.Inq = int32(len(s.readView) + rcvBufUsed) } +func toLinuxPacketType(pktType tcpip.PacketType) uint8 { + switch pktType { + case tcpip.PacketHost: + return linux.PACKET_HOST + case tcpip.PacketOtherHost: + return linux.PACKET_OTHERHOST + case tcpip.PacketOutgoing: + return linux.PACKET_OUTGOING + case tcpip.PacketBroadcast: + return linux.PACKET_BROADCAST + case tcpip.PacketMulticast: + return linux.PACKET_MULTICAST + default: + panic(fmt.Sprintf("unknown packet type: %d", pktType)) + } +} + // nonBlockingRead issues a non-blocking read. // // TODO(b/78348848): Support timestamps for stream sockets. @@ -2526,6 +2544,7 @@ func (s *socketOpsCommon) nonBlockingRead(ctx context.Context, dst usermem.IOSeq switch v := addr.(type) { case *linux.SockAddrLink: v.Protocol = htons(uint16(s.linkPacketInfo.Protocol)) + v.PacketType = toLinuxPacketType(s.linkPacketInfo.PktType) } } diff --git a/pkg/tcpip/link/channel/channel.go b/pkg/tcpip/link/channel/channel.go index a2bb773d4..e12a5929b 100644 --- a/pkg/tcpip/link/channel/channel.go +++ b/pkg/tcpip/link/channel/channel.go @@ -302,3 +302,7 @@ func (e *Endpoint) RemoveNotify(handle *NotificationHandle) { func (*Endpoint) ARPHardwareType() header.ARPHardwareType { return header.ARPHardwareNone } + +// AddHeader implements stack.LinkEndpoint.AddHeader. +func (e *Endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { +} diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go index 6aa1badc7..c18bb91fb 100644 --- a/pkg/tcpip/link/fdbased/endpoint.go +++ b/pkg/tcpip/link/fdbased/endpoint.go @@ -386,26 +386,33 @@ const ( _VIRTIO_NET_HDR_GSO_TCPV6 = 4 ) -// WritePacket writes outbound packets to the file descriptor. If it is not -// currently writable, the packet is dropped. -func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { +// AddHeader implements stack.LinkEndpoint.AddHeader. +func (e *endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { if e.hdrSize > 0 { // Add ethernet header if needed. eth := header.Ethernet(pkt.Header.Prepend(header.EthernetMinimumSize)) pkt.LinkHeader = buffer.View(eth) ethHdr := &header.EthernetFields{ - DstAddr: r.RemoteLinkAddress, + DstAddr: remote, Type: protocol, } // Preserve the src address if it's set in the route. - if r.LocalLinkAddress != "" { - ethHdr.SrcAddr = r.LocalLinkAddress + if local != "" { + ethHdr.SrcAddr = local } else { ethHdr.SrcAddr = e.addr } eth.Encode(ethHdr) } +} + +// WritePacket writes outbound packets to the file descriptor. If it is not +// currently writable, the packet is dropped. +func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { + if e.hdrSize > 0 { + e.AddHeader(r.LocalLinkAddress, r.RemoteLinkAddress, protocol, pkt) + } var builder iovec.Builder @@ -448,22 +455,8 @@ func (e *endpoint) sendBatch(batchFD int, batch []*stack.PacketBuffer) (int, *tc // Send a batch of packets through batchFD. mmsgHdrs := make([]rawfile.MMsgHdr, 0, len(batch)) for _, pkt := range batch { - var eth header.Ethernet if e.hdrSize > 0 { - // Add ethernet header if needed. - eth = make(header.Ethernet, header.EthernetMinimumSize) - ethHdr := &header.EthernetFields{ - DstAddr: pkt.EgressRoute.RemoteLinkAddress, - Type: pkt.NetworkProtocolNumber, - } - - // Preserve the src address if it's set in the route. - if pkt.EgressRoute.LocalLinkAddress != "" { - ethHdr.SrcAddr = pkt.EgressRoute.LocalLinkAddress - } else { - ethHdr.SrcAddr = e.addr - } - eth.Encode(ethHdr) + e.AddHeader(pkt.EgressRoute.LocalLinkAddress, pkt.EgressRoute.RemoteLinkAddress, pkt.NetworkProtocolNumber, pkt) } var vnetHdrBuf []byte @@ -493,7 +486,6 @@ func (e *endpoint) sendBatch(batchFD int, batch []*stack.PacketBuffer) (int, *tc var builder iovec.Builder builder.Add(vnetHdrBuf) - builder.Add(eth) builder.Add(pkt.Header.View()) for _, v := range pkt.Data.Views() { builder.Add(v) diff --git a/pkg/tcpip/link/fdbased/endpoint_test.go b/pkg/tcpip/link/fdbased/endpoint_test.go index 4bad930c7..7b995b85a 100644 --- a/pkg/tcpip/link/fdbased/endpoint_test.go +++ b/pkg/tcpip/link/fdbased/endpoint_test.go @@ -107,6 +107,10 @@ func (c *context) DeliverNetworkPacket(remote tcpip.LinkAddress, local tcpip.Lin c.ch <- packetInfo{remote, protocol, pkt} } +func (c *context) DeliverOutboundPacket(remote tcpip.LinkAddress, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { + panic("unimplemented") +} + func TestNoEthernetProperties(t *testing.T) { c := newContext(t, &Options{MTU: mtu}) defer c.cleanup() @@ -510,6 +514,10 @@ func (d *fakeNetworkDispatcher) DeliverNetworkPacket(remote, local tcpip.LinkAdd d.pkts = append(d.pkts, pkt) } +func (d *fakeNetworkDispatcher) DeliverOutboundPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { + panic("unimplemented") +} + func TestDispatchPacketFormat(t *testing.T) { for _, test := range []struct { name string diff --git a/pkg/tcpip/link/loopback/loopback.go b/pkg/tcpip/link/loopback/loopback.go index 3b17d8c28..781cdd317 100644 --- a/pkg/tcpip/link/loopback/loopback.go +++ b/pkg/tcpip/link/loopback/loopback.go @@ -118,3 +118,6 @@ func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error { func (*endpoint) ARPHardwareType() header.ARPHardwareType { return header.ARPHardwareLoopback } + +func (e *endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { +} diff --git a/pkg/tcpip/link/muxed/injectable.go b/pkg/tcpip/link/muxed/injectable.go index c305d9e86..56a611825 100644 --- a/pkg/tcpip/link/muxed/injectable.go +++ b/pkg/tcpip/link/muxed/injectable.go @@ -135,6 +135,10 @@ func (*InjectableEndpoint) ARPHardwareType() header.ARPHardwareType { panic("unsupported operation") } +// AddHeader implements stack.LinkEndpoint.AddHeader. +func (*InjectableEndpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { +} + // NewInjectableEndpoint creates a new multi-endpoint injectable endpoint. func NewInjectableEndpoint(routes map[tcpip.Address]stack.InjectableLinkEndpoint) *InjectableEndpoint { return &InjectableEndpoint{ diff --git a/pkg/tcpip/link/nested/nested.go b/pkg/tcpip/link/nested/nested.go index 328bd048e..d40de54df 100644 --- a/pkg/tcpip/link/nested/nested.go +++ b/pkg/tcpip/link/nested/nested.go @@ -61,6 +61,16 @@ func (e *Endpoint) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protoco } } +// DeliverOutboundPacket implements stack.NetworkDispatcher.DeliverOutboundPacket. +func (e *Endpoint) DeliverOutboundPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { + e.mu.RLock() + d := e.dispatcher + e.mu.RUnlock() + if d != nil { + d.DeliverOutboundPacket(remote, local, protocol, pkt) + } +} + // Attach implements stack.LinkEndpoint. func (e *Endpoint) Attach(dispatcher stack.NetworkDispatcher) { e.mu.Lock() @@ -135,3 +145,8 @@ func (e *Endpoint) GSOMaxSize() uint32 { func (e *Endpoint) ARPHardwareType() header.ARPHardwareType { return e.child.ARPHardwareType() } + +// AddHeader implements stack.LinkEndpoint.AddHeader. +func (e *Endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { + e.child.AddHeader(local, remote, protocol, pkt) +} diff --git a/pkg/tcpip/link/nested/nested_test.go b/pkg/tcpip/link/nested/nested_test.go index c1a219f02..7d9249c1c 100644 --- a/pkg/tcpip/link/nested/nested_test.go +++ b/pkg/tcpip/link/nested/nested_test.go @@ -55,6 +55,10 @@ func (d *counterDispatcher) DeliverNetworkPacket(tcpip.LinkAddress, tcpip.LinkAd d.count++ } +func (d *counterDispatcher) DeliverOutboundPacket(tcpip.LinkAddress, tcpip.LinkAddress, tcpip.NetworkProtocolNumber, *stack.PacketBuffer) { + panic("unimplemented") +} + func TestNestedLinkEndpoint(t *testing.T) { const emptyAddress = tcpip.LinkAddress("") diff --git a/pkg/tcpip/link/packetsocket/BUILD b/pkg/tcpip/link/packetsocket/BUILD new file mode 100644 index 000000000..6fff160ce --- /dev/null +++ b/pkg/tcpip/link/packetsocket/BUILD @@ -0,0 +1,14 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "packetsocket", + srcs = ["endpoint.go"], + visibility = ["//visibility:public"], + deps = [ + "//pkg/tcpip", + "//pkg/tcpip/link/nested", + "//pkg/tcpip/stack", + ], +) diff --git a/pkg/tcpip/link/packetsocket/endpoint.go b/pkg/tcpip/link/packetsocket/endpoint.go new file mode 100644 index 000000000..3922c2a04 --- /dev/null +++ b/pkg/tcpip/link/packetsocket/endpoint.go @@ -0,0 +1,50 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package packetsocket provides a link layer endpoint that provides the ability +// to loop outbound packets to any AF_PACKET sockets that may be interested in +// the outgoing packet. +package packetsocket + +import ( + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/link/nested" + "gvisor.dev/gvisor/pkg/tcpip/stack" +) + +type endpoint struct { + nested.Endpoint +} + +// New creates a new packetsocket LinkEndpoint. +func New(lower stack.LinkEndpoint) stack.LinkEndpoint { + e := &endpoint{} + e.Endpoint.Init(lower, e) + return e +} + +// WritePacket implements stack.LinkEndpoint.WritePacket. +func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { + e.Endpoint.DeliverOutboundPacket(r.RemoteLinkAddress, r.LocalLinkAddress, protocol, pkt) + return e.Endpoint.WritePacket(r, gso, protocol, pkt) +} + +// WritePackets implements stack.LinkEndpoint.WritePackets. +func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, proto tcpip.NetworkProtocolNumber) (int, *tcpip.Error) { + for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() { + e.Endpoint.DeliverOutboundPacket(pkt.EgressRoute.RemoteLinkAddress, pkt.EgressRoute.LocalLinkAddress, pkt.NetworkProtocolNumber, pkt) + } + + return e.Endpoint.WritePackets(r, gso, pkts, proto) +} diff --git a/pkg/tcpip/link/qdisc/fifo/endpoint.go b/pkg/tcpip/link/qdisc/fifo/endpoint.go index c84fe1bb9..467083239 100644 --- a/pkg/tcpip/link/qdisc/fifo/endpoint.go +++ b/pkg/tcpip/link/qdisc/fifo/endpoint.go @@ -107,6 +107,11 @@ func (e *endpoint) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protoco e.dispatcher.DeliverNetworkPacket(remote, local, protocol, pkt) } +// DeliverOutboundPacket implements stack.NetworkDispatcher.DeliverOutboundPacket. +func (e *endpoint) DeliverOutboundPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { + e.dispatcher.DeliverOutboundPacket(remote, local, protocol, pkt) +} + // Attach implements stack.LinkEndpoint.Attach. func (e *endpoint) Attach(dispatcher stack.NetworkDispatcher) { e.dispatcher = dispatcher @@ -194,6 +199,8 @@ func (e *endpoint) WritePackets(_ *stack.Route, _ *stack.GSO, pkts stack.PacketB // WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket. func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error { + // TODO(gvisor.dev/issue/3267/): Queue these packets as well once + // WriteRawPacket takes PacketBuffer instead of VectorisedView. return e.lower.WriteRawPacket(vv) } @@ -213,3 +220,8 @@ func (e *endpoint) Wait() { func (e *endpoint) ARPHardwareType() header.ARPHardwareType { return e.lower.ARPHardwareType() } + +// AddHeader implements stack.LinkEndpoint.AddHeader. +func (e *endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { + e.lower.AddHeader(local, remote, protocol, pkt) +} diff --git a/pkg/tcpip/link/sharedmem/sharedmem.go b/pkg/tcpip/link/sharedmem/sharedmem.go index a36862c67..507c76b76 100644 --- a/pkg/tcpip/link/sharedmem/sharedmem.go +++ b/pkg/tcpip/link/sharedmem/sharedmem.go @@ -183,22 +183,29 @@ func (e *endpoint) LinkAddress() tcpip.LinkAddress { return e.addr } -// WritePacket writes outbound packets to the file descriptor. If it is not -// currently writable, the packet is dropped. -func (e *endpoint) WritePacket(r *stack.Route, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { - // Add the ethernet header here. +// AddHeader implements stack.LinkEndpoint.AddHeader. +func (e *endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { + // Add ethernet header if needed. eth := header.Ethernet(pkt.Header.Prepend(header.EthernetMinimumSize)) pkt.LinkHeader = buffer.View(eth) ethHdr := &header.EthernetFields{ - DstAddr: r.RemoteLinkAddress, + DstAddr: remote, Type: protocol, } - if r.LocalLinkAddress != "" { - ethHdr.SrcAddr = r.LocalLinkAddress + + // Preserve the src address if it's set in the route. + if local != "" { + ethHdr.SrcAddr = local } else { ethHdr.SrcAddr = e.addr } eth.Encode(ethHdr) +} + +// WritePacket writes outbound packets to the file descriptor. If it is not +// currently writable, the packet is dropped. +func (e *endpoint) WritePacket(r *stack.Route, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error { + e.AddHeader(r.LocalLinkAddress, r.RemoteLinkAddress, protocol, pkt) v := pkt.Data.ToView() // Transmit the packet. diff --git a/pkg/tcpip/link/sharedmem/sharedmem_test.go b/pkg/tcpip/link/sharedmem/sharedmem_test.go index 28a2e88ba..8f3cd9449 100644 --- a/pkg/tcpip/link/sharedmem/sharedmem_test.go +++ b/pkg/tcpip/link/sharedmem/sharedmem_test.go @@ -143,6 +143,10 @@ func (c *testContext) DeliverNetworkPacket(remoteLinkAddr, localLinkAddr tcpip.L c.packetCh <- struct{}{} } +func (c *testContext) DeliverOutboundPacket(remoteLinkAddr, localLinkAddr tcpip.LinkAddress, proto tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { + panic("unimplemented") +} + func (c *testContext) cleanup() { c.ep.Close() closeFDs(&c.txCfg) diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go index d9cd4e83a..509076643 100644 --- a/pkg/tcpip/link/sniffer/sniffer.go +++ b/pkg/tcpip/link/sniffer/sniffer.go @@ -123,6 +123,11 @@ func (e *endpoint) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protoco e.Endpoint.DeliverNetworkPacket(remote, local, protocol, pkt) } +// DeliverOutboundPacket implements stack.NetworkDispatcher.DeliverOutboundPacket. +func (e *endpoint) DeliverOutboundPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { + e.Endpoint.DeliverOutboundPacket(remote, local, protocol, pkt) +} + func (e *endpoint) dumpPacket(prefix string, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { writer := e.writer if writer == nil && atomic.LoadUint32(&LogPackets) == 1 { diff --git a/pkg/tcpip/link/tun/device.go b/pkg/tcpip/link/tun/device.go index 47446efec..04ae58e59 100644 --- a/pkg/tcpip/link/tun/device.go +++ b/pkg/tcpip/link/tun/device.go @@ -272,21 +272,9 @@ func (d *Device) encodePkt(info *channel.PacketInfo) (buffer.View, bool) { if d.hasFlags(linux.IFF_TAP) { // Add ethernet header if not provided. if info.Pkt.LinkHeader == nil { - hdr := &header.EthernetFields{ - SrcAddr: info.Route.LocalLinkAddress, - DstAddr: info.Route.RemoteLinkAddress, - Type: info.Proto, - } - if hdr.SrcAddr == "" { - hdr.SrcAddr = d.endpoint.LinkAddress() - } - - eth := make(header.Ethernet, header.EthernetMinimumSize) - eth.Encode(hdr) - vv.AppendView(buffer.View(eth)) - } else { - vv.AppendView(info.Pkt.LinkHeader) + d.endpoint.AddHeader(info.Route.LocalLinkAddress, info.Route.RemoteLinkAddress, info.Proto, info.Pkt) } + vv.AppendView(info.Pkt.LinkHeader) } // Append upper headers. @@ -366,3 +354,30 @@ func (e *tunEndpoint) ARPHardwareType() header.ARPHardwareType { } return header.ARPHardwareNone } + +// AddHeader implements stack.LinkEndpoint.AddHeader. +func (e *tunEndpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { + if !e.isTap { + return + } + eth := header.Ethernet(pkt.Header.Prepend(header.EthernetMinimumSize)) + pkt.LinkHeader = buffer.View(eth) + hdr := &header.EthernetFields{ + SrcAddr: local, + DstAddr: remote, + Type: protocol, + } + if hdr.SrcAddr == "" { + hdr.SrcAddr = e.LinkAddress() + } + + eth.Encode(hdr) +} + +// MaxHeaderLength returns the maximum size of the link layer header. +func (e *tunEndpoint) MaxHeaderLength() uint16 { + if e.isTap { + return header.EthernetMinimumSize + } + return 0 +} diff --git a/pkg/tcpip/link/waitable/waitable.go b/pkg/tcpip/link/waitable/waitable.go index 24a8dc2eb..b152a0f26 100644 --- a/pkg/tcpip/link/waitable/waitable.go +++ b/pkg/tcpip/link/waitable/waitable.go @@ -60,6 +60,15 @@ func (e *Endpoint) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protoco e.dispatchGate.Leave() } +// DeliverOutboundPacket implements stack.NetworkDispatcher.DeliverOutboundPacket. +func (e *Endpoint) DeliverOutboundPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { + if !e.dispatchGate.Enter() { + return + } + e.dispatcher.DeliverOutboundPacket(remote, local, protocol, pkt) + e.dispatchGate.Leave() +} + // Attach implements stack.LinkEndpoint.Attach. It saves the dispatcher and // registers with the lower endpoint as its dispatcher so that "e" is called // for inbound packets. @@ -153,3 +162,8 @@ func (e *Endpoint) Wait() {} func (e *Endpoint) ARPHardwareType() header.ARPHardwareType { return e.lower.ARPHardwareType() } + +// AddHeader implements stack.LinkEndpoint.AddHeader. +func (e *Endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { + e.lower.AddHeader(local, remote, protocol, pkt) +} diff --git a/pkg/tcpip/link/waitable/waitable_test.go b/pkg/tcpip/link/waitable/waitable_test.go index ffb2354be..c448a888f 100644 --- a/pkg/tcpip/link/waitable/waitable_test.go +++ b/pkg/tcpip/link/waitable/waitable_test.go @@ -40,6 +40,10 @@ func (e *countedEndpoint) DeliverNetworkPacket(remote, local tcpip.LinkAddress, e.dispatchCount++ } +func (e *countedEndpoint) DeliverOutboundPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { + panic("unimplemented") +} + func (e *countedEndpoint) Attach(dispatcher stack.NetworkDispatcher) { e.attachCount++ e.dispatcher = dispatcher @@ -90,6 +94,11 @@ func (*countedEndpoint) ARPHardwareType() header.ARPHardwareType { // Wait implements stack.LinkEndpoint.Wait. func (*countedEndpoint) Wait() {} +// AddHeader implements stack.LinkEndpoint.AddHeader. +func (e *countedEndpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { + panic("unimplemented") +} + func TestWaitWrite(t *testing.T) { ep := &countedEndpoint{} wep := New(ep) diff --git a/pkg/tcpip/network/ip_test.go b/pkg/tcpip/network/ip_test.go index a5b780ca2..615bae648 100644 --- a/pkg/tcpip/network/ip_test.go +++ b/pkg/tcpip/network/ip_test.go @@ -185,6 +185,11 @@ func (*testObject) ARPHardwareType() header.ARPHardwareType { panic("not implemented") } +// AddHeader implements stack.LinkEndpoint.AddHeader. +func (*testObject) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { + panic("not implemented") +} + func buildIPv4Route(local, remote tcpip.Address) (stack.Route, *tcpip.Error) { s := stack.New(stack.Options{ NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol()}, diff --git a/pkg/tcpip/stack/forwarder_test.go b/pkg/tcpip/stack/forwarder_test.go index eefb4b07f..bca1d940b 100644 --- a/pkg/tcpip/stack/forwarder_test.go +++ b/pkg/tcpip/stack/forwarder_test.go @@ -307,6 +307,11 @@ func (*fwdTestLinkEndpoint) ARPHardwareType() header.ARPHardwareType { panic("not implemented") } +// AddHeader implements stack.LinkEndpoint.AddHeader. +func (e *fwdTestLinkEndpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) { + panic("not implemented") +} + func fwdTestNetFactory(t *testing.T, proto *fwdTestNetworkProtocol) (ep1, ep2 *fwdTestLinkEndpoint) { // Create a stack with the network protocol and two NICs. s := New(Options{ diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index 7b80534e6..fea0ce7e8 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -1200,15 +1200,13 @@ func (n *NIC) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcp // Are any packet sockets listening for this network protocol? packetEPs := n.mu.packetEPs[protocol] - // Check whether there are packet sockets listening for every protocol. - // If we received a packet with protocol EthernetProtocolAll, then the - // previous for loop will have handled it. - if protocol != header.EthernetProtocolAll { - packetEPs = append(packetEPs, n.mu.packetEPs[header.EthernetProtocolAll]...) - } + // Add any other packet sockets that maybe listening for all protocols. + packetEPs = append(packetEPs, n.mu.packetEPs[header.EthernetProtocolAll]...) n.mu.RUnlock() for _, ep := range packetEPs { - ep.HandlePacket(n.id, local, protocol, pkt.Clone()) + p := pkt.Clone() + p.PktType = tcpip.PacketHost + ep.HandlePacket(n.id, local, protocol, p) } if netProto.Number() == header.IPv4ProtocolNumber || netProto.Number() == header.IPv6ProtocolNumber { @@ -1311,6 +1309,24 @@ func (n *NIC) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcp } } +// DeliverOutboundPacket implements NetworkDispatcher.DeliverOutboundPacket. +func (n *NIC) DeliverOutboundPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) { + n.mu.RLock() + // We do not deliver to protocol specific packet endpoints as on Linux + // only ETH_P_ALL endpoints get outbound packets. + // Add any other packet sockets that maybe listening for all protocols. + packetEPs := n.mu.packetEPs[header.EthernetProtocolAll] + n.mu.RUnlock() + for _, ep := range packetEPs { + p := pkt.Clone() + p.PktType = tcpip.PacketOutgoing + // Add the link layer header as outgoing packets are intercepted + // before the link layer header is created. + n.linkEP.AddHeader(local, remote, protocol, p) + ep.HandlePacket(n.id, local, protocol, p) + } +} + func (n *NIC) forwardPacket(r *Route, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) { // TODO(b/143425874) Decrease the TTL field in forwarded packets. // TODO(b/151227689): Avoid copying the packet when forwarding. We can do this diff --git a/pkg/tcpip/stack/nic_test.go b/pkg/tcpip/stack/nic_test.go index 3bc9fd831..c477e31d8 100644 --- a/pkg/tcpip/stack/nic_test.go +++ b/pkg/tcpip/stack/nic_test.go @@ -89,6 +89,11 @@ func (*testLinkEndpoint) ARPHardwareType() header.ARPHardwareType { panic("not implemented") } +// AddHeader implements stack.LinkEndpoint.AddHeader. +func (e *testLinkEndpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) { + panic("not implemented") +} + var _ NetworkEndpoint = (*testIPv6Endpoint)(nil) // An IPv6 NetworkEndpoint that throws away outgoing packets. diff --git a/pkg/tcpip/stack/packet_buffer.go b/pkg/tcpip/stack/packet_buffer.go index e3556d5d2..5d6865e35 100644 --- a/pkg/tcpip/stack/packet_buffer.go +++ b/pkg/tcpip/stack/packet_buffer.go @@ -79,6 +79,10 @@ type PacketBuffer struct { // NatDone indicates if the packet has been manipulated as per NAT // iptables rule. NatDone bool + + // PktType indicates the SockAddrLink.PacketType of the packet as defined in + // https://www.man7.org/linux/man-pages/man7/packet.7.html. + PktType tcpip.PacketType } // Clone makes a copy of pk. It clones the Data field, which creates a new diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go index f260eeb7f..cd4b7a449 100644 --- a/pkg/tcpip/stack/registration.go +++ b/pkg/tcpip/stack/registration.go @@ -330,8 +330,7 @@ type NetworkProtocol interface { } // NetworkDispatcher contains the methods used by the network stack to deliver -// packets to the appropriate network endpoint after it has been handled by -// the data link layer. +// inbound/outbound packets to the appropriate network/packet(if any) endpoints. type NetworkDispatcher interface { // DeliverNetworkPacket finds the appropriate network protocol endpoint // and hands the packet over for further processing. @@ -342,6 +341,16 @@ type NetworkDispatcher interface { // // DeliverNetworkPacket takes ownership of pkt. DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) + + // DeliverOutboundPacket is called by link layer when a packet is being + // sent out. + // + // pkt.LinkHeader may or may not be set before calling + // DeliverOutboundPacket. Some packets do not have link headers (e.g. + // packets sent via loopback), and won't have the field set. + // + // DeliverOutboundPacket takes ownership of pkt. + DeliverOutboundPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) } // LinkEndpointCapabilities is the type associated with the capabilities @@ -443,6 +452,9 @@ type LinkEndpoint interface { // See: // https://github.com/torvalds/linux/blob/aa0c9086b40c17a7ad94425b3b70dd1fdd7497bf/include/uapi/linux/if_arp.h#L30 ARPHardwareType() header.ARPHardwareType + + // AddHeader adds a link layer header to pkt if required. + AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) } // InjectableLinkEndpoint is a LinkEndpoint where inbound packets are diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 48ad56d4d..ff14a3b3c 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -316,6 +316,28 @@ const ( ShutdownWrite ) +// PacketType is used to indicate the destination of the packet. +type PacketType uint8 + +const ( + // PacketHost indicates a packet addressed to the local host. + PacketHost PacketType = iota + + // PacketOtherHost indicates an outgoing packet addressed to + // another host caught by a NIC in promiscuous mode. + PacketOtherHost + + // PacketOutgoing for a packet originating from the local host + // that is looped back to a packet socket. + PacketOutgoing + + // PacketBroadcast indicates a link layer broadcast packet. + PacketBroadcast + + // PacketMulticast indicates a link layer multicast packet. + PacketMulticast +) + // FullAddress represents a full transport node address, as required by the // Connect() and Bind() methods. // @@ -555,6 +577,9 @@ type Endpoint interface { type LinkPacketInfo struct { // Protocol is the NetworkProtocolNumber for the packet. Protocol NetworkProtocolNumber + + // PktType is used to indicate the destination of the packet. + PktType PacketType } // PacketEndpoint are additional methods that are only implemented by Packet diff --git a/pkg/tcpip/transport/packet/endpoint.go b/pkg/tcpip/transport/packet/endpoint.go index 7b2083a09..8f167391f 100644 --- a/pkg/tcpip/transport/packet/endpoint.go +++ b/pkg/tcpip/transport/packet/endpoint.go @@ -441,6 +441,7 @@ func (ep *endpoint) HandlePacket(nicID tcpip.NICID, localAddr tcpip.LinkAddress, Addr: tcpip.Address(hdr.SourceAddress()), } packet.packetInfo.Protocol = netProto + packet.packetInfo.PktType = pkt.PktType } else { // Guess the would-be ethernet header. packet.senderAddr = tcpip.FullAddress{ @@ -448,30 +449,53 @@ func (ep *endpoint) HandlePacket(nicID tcpip.NICID, localAddr tcpip.LinkAddress, Addr: tcpip.Address(localAddr), } packet.packetInfo.Protocol = netProto + packet.packetInfo.PktType = pkt.PktType } if ep.cooked { // Cooked packets can simply be queued. - packet.data = pkt.Data + switch pkt.PktType { + case tcpip.PacketHost: + packet.data = pkt.Data + case tcpip.PacketOutgoing: + // Strip Link Header from the Header. + pkt.Header = buffer.NewPrependableFromView(pkt.Header.View()[len(pkt.LinkHeader):]) + combinedVV := pkt.Header.View().ToVectorisedView() + combinedVV.Append(pkt.Data) + packet.data = combinedVV + default: + panic(fmt.Sprintf("unexpected PktType in pkt: %+v", pkt)) + } + } else { // Raw packets need their ethernet headers prepended before // queueing. var linkHeader buffer.View - if len(pkt.LinkHeader) == 0 { - // We weren't provided with an actual ethernet header, - // so fake one. - ethFields := header.EthernetFields{ - SrcAddr: tcpip.LinkAddress([]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00}), - DstAddr: localAddr, - Type: netProto, + var combinedVV buffer.VectorisedView + if pkt.PktType != tcpip.PacketOutgoing { + if len(pkt.LinkHeader) == 0 { + // We weren't provided with an actual ethernet header, + // so fake one. + ethFields := header.EthernetFields{ + SrcAddr: tcpip.LinkAddress([]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00}), + DstAddr: localAddr, + Type: netProto, + } + fakeHeader := make(header.Ethernet, header.EthernetMinimumSize) + fakeHeader.Encode(ðFields) + linkHeader = buffer.View(fakeHeader) + } else { + linkHeader = append(buffer.View(nil), pkt.LinkHeader...) } - fakeHeader := make(header.Ethernet, header.EthernetMinimumSize) - fakeHeader.Encode(ðFields) - linkHeader = buffer.View(fakeHeader) - } else { - linkHeader = append(buffer.View(nil), pkt.LinkHeader...) + combinedVV = linkHeader.ToVectorisedView() + } + if pkt.PktType == tcpip.PacketOutgoing { + // For outgoing packets the Link, Network and Transport + // headers are in the pkt.Header fields normally unless + // a Raw socket is in use in which case pkt.Header could + // be nil. + combinedVV.AppendView(pkt.Header.View()) } - combinedVV := linkHeader.ToVectorisedView() combinedVV.Append(pkt.Data) packet.data = combinedVV } diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 55d45aaa6..9f52438c2 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -90,6 +90,7 @@ go_library( "//pkg/tcpip", "//pkg/tcpip/link/fdbased", "//pkg/tcpip/link/loopback", + "//pkg/tcpip/link/packetsocket", "//pkg/tcpip/link/qdisc/fifo", "//pkg/tcpip/link/sniffer", "//pkg/tcpip/network/arp", diff --git a/runsc/boot/network.go b/runsc/boot/network.go index 14d2f56a5..4e1fa7665 100644 --- a/runsc/boot/network.go +++ b/runsc/boot/network.go @@ -25,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/link/fdbased" "gvisor.dev/gvisor/pkg/tcpip/link/loopback" + "gvisor.dev/gvisor/pkg/tcpip/link/packetsocket" "gvisor.dev/gvisor/pkg/tcpip/link/qdisc/fifo" "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" "gvisor.dev/gvisor/pkg/tcpip/network/arp" @@ -252,6 +253,9 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct linkEP = fifo.New(linkEP, runtime.GOMAXPROCS(0), 1000) } + // Enable support for AF_PACKET sockets to receive outgoing packets. + linkEP = packetsocket.New(linkEP) + log.Infof("Enabling interface %q with id %d on addresses %+v (%v) w/ %d channels", link.Name, nicID, link.Addresses, mac, link.NumChannels) if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil { return err diff --git a/test/syscalls/linux/packet_socket.cc b/test/syscalls/linux/packet_socket.cc index e94ddcb77..40aa9326d 100644 --- a/test/syscalls/linux/packet_socket.cc +++ b/test/syscalls/linux/packet_socket.cc @@ -417,6 +417,122 @@ TEST_P(CookedPacketTest, BindDrop) { EXPECT_THAT(RetryEINTR(poll)(&pfd, 1, 1000), SyscallSucceedsWithValue(0)); } +// Verify that we receive outbound packets. This test requires at least one +// non loopback interface so that we can actually capture an outgoing packet. +TEST_P(CookedPacketTest, ReceiveOutbound) { + // Only ETH_P_ALL sockets can receive outbound packets on linux. + SKIP_IF(GetParam() != ETH_P_ALL); + + // Let's use a simple IP payload: a UDP datagram. + FileDescriptor udp_sock = + ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); + + struct ifaddrs* if_addr_list = nullptr; + auto cleanup = Cleanup([&if_addr_list]() { freeifaddrs(if_addr_list); }); + + ASSERT_THAT(getifaddrs(&if_addr_list), SyscallSucceeds()); + + // Get interface other than loopback. + struct ifreq ifr = {}; + for (struct ifaddrs* i = if_addr_list; i; i = i->ifa_next) { + if (strcmp(i->ifa_name, "lo") != 0) { + strncpy(ifr.ifr_name, i->ifa_name, sizeof(ifr.ifr_name)); + break; + } + } + + // Skip if no interface is available other than loopback. + if (strlen(ifr.ifr_name) == 0) { + GTEST_SKIP(); + } + + // Get interface index and name. + EXPECT_THAT(ioctl(socket_, SIOCGIFINDEX, &ifr), SyscallSucceeds()); + EXPECT_NE(ifr.ifr_ifindex, 0); + int ifindex = ifr.ifr_ifindex; + + constexpr int kMACSize = 6; + char hwaddr[kMACSize]; + // Get interface address. + ASSERT_THAT(ioctl(socket_, SIOCGIFHWADDR, &ifr), SyscallSucceeds()); + ASSERT_THAT(ifr.ifr_hwaddr.sa_family, + AnyOf(Eq(ARPHRD_NONE), Eq(ARPHRD_ETHER))); + memcpy(hwaddr, ifr.ifr_hwaddr.sa_data, kMACSize); + + // Just send it to the google dns server 8.8.8.8. It's UDP we don't care + // if it actually gets to the DNS Server we just want to see that we receive + // it on our AF_PACKET socket. + // + // NOTE: We just want to pick an IP that is non-local to avoid having to + // handle ARP as this should cause the UDP packet to be sent to the default + // gateway configured for the system under test. Otherwise the only packet we + // will see is the ARP query unless we picked an IP which will actually + // resolve. The test is a bit brittle but this was the best compromise for + // now. + struct sockaddr_in dest = {}; + ASSERT_EQ(inet_pton(AF_INET, "8.8.8.8", &dest.sin_addr.s_addr), 1); + dest.sin_family = AF_INET; + dest.sin_port = kPort; + EXPECT_THAT(sendto(udp_sock.get(), kMessage, sizeof(kMessage), 0, + reinterpret_cast(&dest), sizeof(dest)), + SyscallSucceedsWithValue(sizeof(kMessage))); + + // Wait and make sure the socket receives the data. + struct pollfd pfd = {}; + pfd.fd = socket_; + pfd.events = POLLIN; + EXPECT_THAT(RetryEINTR(poll)(&pfd, 1, 1000), SyscallSucceedsWithValue(1)); + + // Now read and check that the packet is the one we just sent. + // Read and verify the data. + constexpr size_t packet_size = + sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kMessage); + char buf[64]; + struct sockaddr_ll src = {}; + socklen_t src_len = sizeof(src); + ASSERT_THAT(recvfrom(socket_, buf, sizeof(buf), 0, + reinterpret_cast(&src), &src_len), + SyscallSucceedsWithValue(packet_size)); + + // sockaddr_ll ends with an 8 byte physical address field, but ethernet + // addresses only use 6 bytes. Linux used to return sizeof(sockaddr_ll)-2 + // here, but since commit b2cf86e1563e33a14a1c69b3e508d15dc12f804c returns + // sizeof(sockaddr_ll). + ASSERT_THAT(src_len, AnyOf(Eq(sizeof(src)), Eq(sizeof(src) - 2))); + + // Verify the source address. + EXPECT_EQ(src.sll_family, AF_PACKET); + EXPECT_EQ(src.sll_ifindex, ifindex); + EXPECT_EQ(src.sll_halen, ETH_ALEN); + EXPECT_EQ(ntohs(src.sll_protocol), ETH_P_IP); + EXPECT_EQ(src.sll_pkttype, PACKET_OUTGOING); + // Verify the link address of the interface matches that of the non + // non loopback interface address we stored above. + for (int i = 0; i < src.sll_halen; i++) { + EXPECT_EQ(src.sll_addr[i], hwaddr[i]); + } + + // Verify the IP header. + struct iphdr ip = {}; + memcpy(&ip, buf, sizeof(ip)); + EXPECT_EQ(ip.ihl, 5); + EXPECT_EQ(ip.version, 4); + EXPECT_EQ(ip.tot_len, htons(packet_size)); + EXPECT_EQ(ip.protocol, IPPROTO_UDP); + EXPECT_EQ(ip.daddr, dest.sin_addr.s_addr); + EXPECT_NE(ip.saddr, htonl(INADDR_LOOPBACK)); + + // Verify the UDP header. + struct udphdr udp = {}; + memcpy(&udp, buf + sizeof(iphdr), sizeof(udp)); + EXPECT_EQ(udp.dest, kPort); + EXPECT_EQ(udp.len, htons(sizeof(udphdr) + sizeof(kMessage))); + + // Verify the payload. + char* payload = reinterpret_cast(buf + sizeof(iphdr) + sizeof(udphdr)); + EXPECT_EQ(strncmp(payload, kMessage, sizeof(kMessage)), 0); +} + // Bind with invalid address. TEST_P(CookedPacketTest, BindFail) { // Null address. -- cgit v1.2.3 From 39525d64cbd5901ea80651e323c8554af132f4dd Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Wed, 22 Jul 2020 15:40:44 -0700 Subject: Add O_APPEND support in vfs2 gofer. Helps in fixing open syscall tests: AppendConcurrentWrite and AppendOnly. We also now update the file size for seekable special files (regular files) which we were not doing earlier. Updates #2923 PiperOrigin-RevId: 322670843 --- pkg/sentry/fsimpl/gofer/regular_file.go | 41 +++++++++++++++++++----- pkg/sentry/fsimpl/gofer/special_file.go | 56 +++++++++++++++++++++++++++------ test/syscalls/linux/open.cc | 4 +-- 3 files changed, 83 insertions(+), 18 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go index f10350c97..02317a133 100644 --- a/pkg/sentry/fsimpl/gofer/regular_file.go +++ b/pkg/sentry/fsimpl/gofer/regular_file.go @@ -155,26 +155,53 @@ func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts // PWrite implements vfs.FileDescriptionImpl.PWrite. func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { + n, _, err := fd.pwrite(ctx, src, offset, opts) + return n, err +} + +// pwrite returns the number of bytes written, final offset, error. The final +// offset should be ignored by PWrite. +func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) { if offset < 0 { - return 0, syserror.EINVAL + return 0, offset, syserror.EINVAL } // Check that flags are supported. // // TODO(gvisor.dev/issue/2601): Support select pwritev2 flags. if opts.Flags&^linux.RWF_HIPRI != 0 { - return 0, syserror.EOPNOTSUPP + return 0, offset, syserror.EOPNOTSUPP + } + + d := fd.dentry() + // If the fd was opened with O_APPEND, make sure the file size is updated. + // There is a possible race here if size is modified externally after + // metadata cache is updated. + if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 && !d.cachedMetadataAuthoritative() { + if err := d.updateFromGetattr(ctx); err != nil { + return 0, offset, err + } } + d.metadataMu.Lock() + defer d.metadataMu.Unlock() + // Set offset to file size if the fd was opened with O_APPEND. + if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 { + // Holding d.metadataMu is sufficient for reading d.size. + offset = int64(d.size) + } limit, err := vfs.CheckLimit(ctx, offset, src.NumBytes()) if err != nil { - return 0, err + return 0, offset, err } src = src.TakeFirst64(limit) + n, err := fd.pwriteLocked(ctx, src, offset, opts) + return n, offset + n, err +} +// Preconditions: fd.dentry().metatdataMu must be locked. +func (fd *regularFileFD) pwriteLocked(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { d := fd.dentry() - d.metadataMu.Lock() - defer d.metadataMu.Unlock() if d.fs.opts.interop != InteropModeShared { // Compare Linux's mm/filemap.c:__generic_file_write_iter() => // file_update_time(). This is d.touchCMtime(), but without locking @@ -237,8 +264,8 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off // Write implements vfs.FileDescriptionImpl.Write. func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { fd.mu.Lock() - n, err := fd.PWrite(ctx, src, fd.off, opts) - fd.off += n + n, off, err := fd.pwrite(ctx, src, fd.off, opts) + fd.off = off fd.mu.Unlock() return n, err } diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go index a7b53b2d2..811528982 100644 --- a/pkg/sentry/fsimpl/gofer/special_file.go +++ b/pkg/sentry/fsimpl/gofer/special_file.go @@ -16,6 +16,7 @@ package gofer import ( "sync" + "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" @@ -144,7 +145,7 @@ func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs // mmap due to lock ordering; MM locks precede dentry.dataMu. That doesn't // hold here since specialFileFD doesn't client-cache data. Just buffer the // read instead. - if d := fd.dentry(); d.fs.opts.interop != InteropModeShared { + if d := fd.dentry(); d.cachedMetadataAuthoritative() { d.touchAtime(fd.vfsfd.Mount()) } buf := make([]byte, dst.NumBytes()) @@ -176,39 +177,76 @@ func (fd *specialFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts // PWrite implements vfs.FileDescriptionImpl.PWrite. func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { + n, _, err := fd.pwrite(ctx, src, offset, opts) + return n, err +} + +// pwrite returns the number of bytes written, final offset, error. The final +// offset should be ignored by PWrite. +func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) { if fd.seekable && offset < 0 { - return 0, syserror.EINVAL + return 0, offset, syserror.EINVAL } // Check that flags are supported. // // TODO(gvisor.dev/issue/2601): Support select pwritev2 flags. if opts.Flags&^linux.RWF_HIPRI != 0 { - return 0, syserror.EOPNOTSUPP + return 0, offset, syserror.EOPNOTSUPP + } + + d := fd.dentry() + // If the regular file fd was opened with O_APPEND, make sure the file size + // is updated. There is a possible race here if size is modified externally + // after metadata cache is updated. + if fd.seekable && fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 && !d.cachedMetadataAuthoritative() { + if err := d.updateFromGetattr(ctx); err != nil { + return 0, offset, err + } } if fd.seekable { + // We need to hold the metadataMu *while* writing to a regular file. + d.metadataMu.Lock() + defer d.metadataMu.Unlock() + + // Set offset to file size if the regular file was opened with O_APPEND. + if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 { + // Holding d.metadataMu is sufficient for reading d.size. + offset = int64(d.size) + } limit, err := vfs.CheckLimit(ctx, offset, src.NumBytes()) if err != nil { - return 0, err + return 0, offset, err } src = src.TakeFirst64(limit) } // Do a buffered write. See rationale in PRead. - if d := fd.dentry(); d.fs.opts.interop != InteropModeShared { + if d.cachedMetadataAuthoritative() { d.touchCMtime() } buf := make([]byte, src.NumBytes()) // Don't do partial writes if we get a partial read from src. if _, err := src.CopyIn(ctx, buf); err != nil { - return 0, err + return 0, offset, err } n, err := fd.handle.writeFromBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset)) if err == syserror.EAGAIN { err = syserror.ErrWouldBlock } - return int64(n), err + finalOff = offset + // Update file size for regular files. + if fd.seekable { + finalOff += int64(n) + // d.metadataMu is already locked at this point. + if uint64(finalOff) > d.size { + d.dataMu.Lock() + defer d.dataMu.Unlock() + atomic.StoreUint64(&d.size, uint64(finalOff)) + } + } + return int64(n), finalOff, err } // Write implements vfs.FileDescriptionImpl.Write. @@ -218,8 +256,8 @@ func (fd *specialFileFD) Write(ctx context.Context, src usermem.IOSequence, opts } fd.mu.Lock() - n, err := fd.PWrite(ctx, src, fd.off, opts) - fd.off += n + n, off, err := fd.pwrite(ctx, src, fd.off, opts) + fd.off = off fd.mu.Unlock() return n, err } diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc index bb7d108e8..bf350946b 100644 --- a/test/syscalls/linux/open.cc +++ b/test/syscalls/linux/open.cc @@ -235,7 +235,7 @@ TEST_F(OpenTest, AppendOnly) { ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR | O_APPEND)); EXPECT_THAT(lseek(fd2.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0)); - // Then try to write to the first file and make sure the bytes are appended. + // Then try to write to the first fd and make sure the bytes are appended. EXPECT_THAT(WriteFd(fd1.get(), buf.data(), buf.size()), SyscallSucceedsWithValue(buf.size())); @@ -247,7 +247,7 @@ TEST_F(OpenTest, AppendOnly) { EXPECT_THAT(lseek(fd1.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(kBufSize * 2)); - // Then try to write to the second file and make sure the bytes are appended. + // Then try to write to the second fd and make sure the bytes are appended. EXPECT_THAT(WriteFd(fd2.get(), buf.data(), buf.size()), SyscallSucceedsWithValue(buf.size())); -- cgit v1.2.3 From 89bd71c942146f9a77aabab8bc832ec5c3912d6b Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Fri, 5 Jun 2020 11:22:44 -0700 Subject: iptables: don't NAT existing connections Fixes a NAT bug that manifested as: - A SYN was sent from gVisor to another host, unaffected by iptables. - The corresponding SYN/ACK was NATted by a PREROUTING REDIRECT rule despite being part of the existing connection. - The socket that sent the SYN never received the SYN/ACK and thus a connection could not be established. We handle this (as Linux does) by tracking all connections, inserting a no-op conntrack rule for new connections with no rules of their own. Needed for istio support (#170). --- pkg/tcpip/stack/conntrack.go | 136 +++++++++++++++++++++++++++++------- pkg/tcpip/stack/iptables.go | 21 +++++- pkg/tcpip/stack/iptables_targets.go | 2 +- test/iptables/iptables_test.go | 7 ++ test/iptables/nat.go | 52 ++++++++++++++ 5 files changed, 189 insertions(+), 29 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/stack/conntrack.go b/pkg/tcpip/stack/conntrack.go index d39baf620..559a1c4dd 100644 --- a/pkg/tcpip/stack/conntrack.go +++ b/pkg/tcpip/stack/conntrack.go @@ -49,7 +49,8 @@ const ( type manipType int const ( - manipDstPrerouting manipType = iota + manipNone manipType = iota + manipDstPrerouting manipDstOutput ) @@ -113,13 +114,11 @@ type conn struct { // update the state of tcb. It is immutable. tcbHook Hook - // mu protects tcb. + // mu protects all mutable state. mu sync.Mutex `state:"nosave"` - // tcb is TCB control block. It is used to keep track of states // of tcp connection and is protected by mu. tcb tcpconntrack.TCB - // lastUsed is the last time the connection saw a relevant packet, and // is updated by each packet on the connection. It is protected by mu. lastUsed time.Time `state:".(unixTime)"` @@ -141,8 +140,26 @@ func (cn *conn) timedOut(now time.Time) bool { return now.Sub(cn.lastUsed) > defaultTimeout } +// update the connection tracking state. +// +// Precondition: ct.mu must be held. +func (ct *conn) updateLocked(tcpHeader header.TCP, hook Hook) { + // Update the state of tcb. tcb assumes it's always initialized on the + // client. However, we only need to know whether the connection is + // established or not, so the client/server distinction isn't important. + // TODO(gvisor.dev/issue/170): Add support in tcpconntrack to handle + // other tcp states. + if ct.tcb.IsEmpty() { + ct.tcb.Init(tcpHeader) + } else if hook == ct.tcbHook { + ct.tcb.UpdateStateOutbound(tcpHeader) + } else { + ct.tcb.UpdateStateInbound(tcpHeader) + } +} + // ConnTrack tracks all connections created for NAT rules. Most users are -// expected to only call handlePacket and createConnFor. +// expected to only call handlePacket, insertRedirectConn, and maybeInsertNoop. // // ConnTrack keeps all connections in a slice of buckets, each of which holds a // linked list of tuples. This gives us some desirable properties: @@ -248,8 +265,7 @@ func (ct *ConnTrack) connFor(pkt *PacketBuffer) (*conn, direction) { return nil, dirOriginal } -// createConnFor creates a new conn for pkt. -func (ct *ConnTrack) createConnFor(pkt *PacketBuffer, hook Hook, rt RedirectTarget) *conn { +func (ct *ConnTrack) insertRedirectConn(pkt *PacketBuffer, hook Hook, rt RedirectTarget) *conn { tid, err := packetToTupleID(pkt) if err != nil { return nil @@ -272,10 +288,15 @@ func (ct *ConnTrack) createConnFor(pkt *PacketBuffer, hook Hook, rt RedirectTarg manip = manipDstOutput } conn := newConn(tid, replyTID, manip, hook) + ct.insertConn(conn) + return conn +} +// insertConn inserts conn into the appropriate table bucket. +func (ct *ConnTrack) insertConn(conn *conn) { // Lock the buckets in the correct order. - tupleBucket := ct.bucket(tid) - replyBucket := ct.bucket(replyTID) + tupleBucket := ct.bucket(conn.original.tupleID) + replyBucket := ct.bucket(conn.reply.tupleID) ct.mu.RLock() defer ct.mu.RUnlock() if tupleBucket < replyBucket { @@ -289,22 +310,37 @@ func (ct *ConnTrack) createConnFor(pkt *PacketBuffer, hook Hook, rt RedirectTarg ct.buckets[tupleBucket].mu.Lock() } - // Add the tuple to the map. - ct.buckets[tupleBucket].tuples.PushFront(&conn.original) - ct.buckets[replyBucket].tuples.PushFront(&conn.reply) + // Now that we hold the locks, ensure the tuple hasn't been inserted by + // another thread. + alreadyInserted := false + for other := ct.buckets[tupleBucket].tuples.Front(); other != nil; other = other.Next() { + if other.tupleID == conn.original.tupleID { + alreadyInserted = true + break + } + } + + if !alreadyInserted { + // Add the tuple to the map. + ct.buckets[tupleBucket].tuples.PushFront(&conn.original) + ct.buckets[replyBucket].tuples.PushFront(&conn.reply) + } // Unlocking can happen in any order. ct.buckets[tupleBucket].mu.Unlock() if tupleBucket != replyBucket { ct.buckets[replyBucket].mu.Unlock() } - - return conn } // handlePacketPrerouting manipulates ports for packets in Prerouting hook. // TODO(gvisor.dev/issue/170): Change address for Prerouting hook. func handlePacketPrerouting(pkt *PacketBuffer, conn *conn, dir direction) { + // If this is a noop entry, don't do anything. + if conn.manip == manipNone { + return + } + netHeader := header.IPv4(pkt.NetworkHeader) tcpHeader := header.TCP(pkt.TransportHeader) @@ -322,12 +358,22 @@ func handlePacketPrerouting(pkt *PacketBuffer, conn *conn, dir direction) { netHeader.SetSourceAddress(conn.original.dstAddr) } + // TODO(gvisor.dev/issue/170): TCP checksums aren't usually validated + // on inbound packets, so we don't recalculate them. However, we should + // support cases when they are validated, e.g. when we can't offload + // receive checksumming. + netHeader.SetChecksum(0) netHeader.SetChecksum(^netHeader.CalculateChecksum()) } // handlePacketOutput manipulates ports for packets in Output hook. func handlePacketOutput(pkt *PacketBuffer, conn *conn, gso *GSO, r *Route, dir direction) { + // If this is a noop entry, don't do anything. + if conn.manip == manipNone { + return + } + netHeader := header.IPv4(pkt.NetworkHeader) tcpHeader := header.TCP(pkt.TransportHeader) @@ -362,20 +408,31 @@ func handlePacketOutput(pkt *PacketBuffer, conn *conn, gso *GSO, r *Route, dir d } // handlePacket will manipulate the port and address of the packet if the -// connection exists. -func (ct *ConnTrack) handlePacket(pkt *PacketBuffer, hook Hook, gso *GSO, r *Route) { +// connection exists. Returns whether, after the packet traverses the tables, +// it should create a new entry in the table. +func (ct *ConnTrack) handlePacket(pkt *PacketBuffer, hook Hook, gso *GSO, r *Route) bool { if pkt.NatDone { - return + return false } if hook != Prerouting && hook != Output { - return + return false + } + + // TODO(gvisor.dev/issue/170): Support other transport protocols. + if pkt.NetworkHeader == nil || header.IPv4(pkt.NetworkHeader).TransportProtocol() != header.TCPProtocolNumber { + return false } conn, dir := ct.connFor(pkt) + // Connection or Rule not found for the packet. if conn == nil { - // Connection not found for the packet or the packet is invalid. - return + return true + } + + tcpHeader := header.TCP(pkt.TransportHeader) + if tcpHeader == nil { + return false } switch hook { @@ -395,14 +452,39 @@ func (ct *ConnTrack) handlePacket(pkt *PacketBuffer, hook Hook, gso *GSO, r *Rou // Mark the connection as having been used recently so it isn't reaped. conn.lastUsed = time.Now() // Update connection state. - if tcpHeader := header.TCP(pkt.TransportHeader); conn.tcb.IsEmpty() { - conn.tcb.Init(tcpHeader) - conn.tcbHook = hook - } else if hook == conn.tcbHook { - conn.tcb.UpdateStateOutbound(tcpHeader) - } else { - conn.tcb.UpdateStateInbound(tcpHeader) + conn.updateLocked(header.TCP(pkt.TransportHeader), hook) + + return false +} + +// maybeInsertNoop tries to insert a no-op connection entry to keep connections +// from getting clobbered when replies arrive. It only inserts if there isn't +// already a connection for pkt. +// +// This should be called after traversing iptables rules only, to ensure that +// pkt.NatDone is set correctly. +func (ct *ConnTrack) maybeInsertNoop(pkt *PacketBuffer, hook Hook) { + // If there were a rule applying to this packet, it would be marked + // with NatDone. + if pkt.NatDone { + return + } + + // We only track TCP connections. + if pkt.NetworkHeader == nil || header.IPv4(pkt.NetworkHeader).TransportProtocol() != header.TCPProtocolNumber { + return + } + + // This is the first packet we're seeing for the TCP connection. Insert + // the noop entry (an identity mapping) so that the response doesn't + // get NATed, breaking the connection. + tid, err := packetToTupleID(pkt) + if err != nil { + return } + conn := newConn(tid, tid.reply(), manipNone, hook) + conn.updateLocked(header.TCP(pkt.TransportHeader), hook) + ct.insertConn(conn) } // bucket gets the conntrack bucket for a tupleID. diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go index 5f647c5fe..ca1dda695 100644 --- a/pkg/tcpip/stack/iptables.go +++ b/pkg/tcpip/stack/iptables.go @@ -245,13 +245,18 @@ func (it *IPTables) Check(hook Hook, pkt *PacketBuffer, gso *GSO, r *Route, addr // Packets are manipulated only if connection and matching // NAT rule exists. - it.connections.handlePacket(pkt, hook, gso, r) + shouldTrack := it.connections.handlePacket(pkt, hook, gso, r) // Go through each table containing the hook. it.mu.RLock() defer it.mu.RUnlock() priorities := it.priorities[hook] for _, tableID := range priorities { + // If handlePacket already NATed the packet, we don't need to + // check the NAT table. + if tableID == natID && pkt.NatDone { + continue + } table := it.tables[tableID] ruleIdx := table.BuiltinChains[hook] switch verdict := it.checkChain(hook, pkt, table, ruleIdx, gso, r, address, nicName); verdict { @@ -281,6 +286,20 @@ func (it *IPTables) Check(hook Hook, pkt *PacketBuffer, gso *GSO, r *Route, addr } } + // If this connection should be tracked, try to add an entry for it. If + // traversing the nat table didn't end in adding an entry, + // maybeInsertNoop will add a no-op entry for the connection. This is + // needeed when establishing connections so that the SYN/ACK reply to an + // outgoing SYN is delivered to the correct endpoint rather than being + // redirected by a prerouting rule. + // + // From the iptables documentation: "If there is no rule, a `null' + // binding is created: this usually does not map the packet, but exists + // to ensure we don't map another stream over an existing one." + if shouldTrack { + it.connections.maybeInsertNoop(pkt, hook) + } + // Every table returned Accept. return true } diff --git a/pkg/tcpip/stack/iptables_targets.go b/pkg/tcpip/stack/iptables_targets.go index d43f60c67..dc88033c7 100644 --- a/pkg/tcpip/stack/iptables_targets.go +++ b/pkg/tcpip/stack/iptables_targets.go @@ -153,7 +153,7 @@ func (rt RedirectTarget) Action(pkt *PacketBuffer, ct *ConnTrack, hook Hook, gso // Set up conection for matching NAT rule. Only the first // packet of the connection comes here. Other packets will be // manipulated in connection tracking. - if conn := ct.createConnFor(pkt, hook, rt); conn != nil { + if conn := ct.insertRedirectConn(pkt, hook, rt); conn != nil { ct.handlePacket(pkt, hook, gso, r) } default: diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index f5ac79370..f303030aa 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -263,6 +263,13 @@ func TestNATPreRedirectTCPPort(t *testing.T) { singleTest(t, NATPreRedirectTCPPort{}) } +func TestNATPreRedirectTCPOutgoing(t *testing.T) { + singleTest(t, NATPreRedirectTCPOutgoing{}) +} + +func TestNATOutRedirectTCPIncoming(t *testing.T) { + singleTest(t, NATOutRedirectTCPIncoming{}) +} func TestNATOutRedirectUDPPort(t *testing.T) { singleTest(t, NATOutRedirectUDPPort{}) } diff --git a/test/iptables/nat.go b/test/iptables/nat.go index 8562b0820..149dec2bb 100644 --- a/test/iptables/nat.go +++ b/test/iptables/nat.go @@ -28,6 +28,8 @@ const ( func init() { RegisterTestCase(NATPreRedirectUDPPort{}) RegisterTestCase(NATPreRedirectTCPPort{}) + RegisterTestCase(NATPreRedirectTCPOutgoing{}) + RegisterTestCase(NATOutRedirectTCPIncoming{}) RegisterTestCase(NATOutRedirectUDPPort{}) RegisterTestCase(NATOutRedirectTCPPort{}) RegisterTestCase(NATDropUDP{}) @@ -91,6 +93,56 @@ func (NATPreRedirectTCPPort) LocalAction(ip net.IP) error { return connectTCP(ip, dropPort, sendloopDuration) } +// NATPreRedirectTCPOutgoing verifies that outgoing TCP connections aren't +// affected by PREROUTING connection tracking. +type NATPreRedirectTCPOutgoing struct{} + +// Name implements TestCase.Name. +func (NATPreRedirectTCPOutgoing) Name() string { + return "NATPreRedirectTCPOutgoing" +} + +// ContainerAction implements TestCase.ContainerAction. +func (NATPreRedirectTCPOutgoing) ContainerAction(ip net.IP) error { + // Redirect all incoming TCP traffic to a closed port. + if err := natTable("-A", "PREROUTING", "-p", "tcp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", dropPort)); err != nil { + return err + } + + // Establish a connection to the host process. + return connectTCP(ip, acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (NATPreRedirectTCPOutgoing) LocalAction(ip net.IP) error { + return listenTCP(acceptPort, sendloopDuration) +} + +// NATOutRedirectTCPIncoming verifies that incoming TCP connections aren't +// affected by OUTPUT connection tracking. +type NATOutRedirectTCPIncoming struct{} + +// Name implements TestCase.Name. +func (NATOutRedirectTCPIncoming) Name() string { + return "NATOutRedirectTCPIncoming" +} + +// ContainerAction implements TestCase.ContainerAction. +func (NATOutRedirectTCPIncoming) ContainerAction(ip net.IP) error { + // Redirect all outgoing TCP traffic to a closed port. + if err := natTable("-A", "OUTPUT", "-p", "tcp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", dropPort)); err != nil { + return err + } + + // Establish a connection to the host process. + return listenTCP(acceptPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (NATOutRedirectTCPIncoming) LocalAction(ip net.IP) error { + return connectTCP(ip, acceptPort, sendloopDuration) +} + // NATOutRedirectUDPPort tests that packets are redirected to different port. type NATOutRedirectUDPPort struct{} -- cgit v1.2.3 From fb8be7e6273f5a646cdf48e38743a2507a4bf64f Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Thu, 9 Jul 2020 22:37:11 -0700 Subject: make connect(2) fail when dest is unreachable Previously, ICMP destination unreachable datagrams were ignored by TCP endpoints. This caused connect to hang when an intermediate router couldn't find a route to the host. This manifested as a Kokoro error when Docker IPv6 was enabled. The Ruby image test would try to install the sinatra gem and hang indefinitely attempting to use an IPv6 address. Fixes #3079. --- pkg/tcpip/header/icmpv4.go | 1 + pkg/tcpip/header/icmpv6.go | 11 +- pkg/tcpip/network/ipv4/icmp.go | 3 + pkg/tcpip/network/ipv6/icmp.go | 2 + pkg/tcpip/stack/registration.go | 5 +- pkg/tcpip/transport/tcp/connect.go | 6 + pkg/tcpip/transport/tcp/endpoint.go | 26 +++- pkg/test/dockerutil/exec.go | 1 - test/packetimpact/runner/packetimpact_test.go | 8 +- test/packetimpact/testbench/connections.go | 58 ++++++++- test/packetimpact/testbench/layers.go | 6 +- test/packetimpact/tests/BUILD | 10 ++ .../tests/tcp_network_unreachable_test.go | 139 +++++++++++++++++++++ 13 files changed, 262 insertions(+), 14 deletions(-) create mode 100644 test/packetimpact/tests/tcp_network_unreachable_test.go (limited to 'test') diff --git a/pkg/tcpip/header/icmpv4.go b/pkg/tcpip/header/icmpv4.go index 7908c5744..1a631b31a 100644 --- a/pkg/tcpip/header/icmpv4.go +++ b/pkg/tcpip/header/icmpv4.go @@ -72,6 +72,7 @@ const ( // Values for ICMP code as defined in RFC 792. const ( ICMPv4TTLExceeded = 0 + ICMPv4HostUnreachable = 1 ICMPv4PortUnreachable = 3 ICMPv4FragmentationNeeded = 4 ) diff --git a/pkg/tcpip/header/icmpv6.go b/pkg/tcpip/header/icmpv6.go index c7ee2de57..a13b4b809 100644 --- a/pkg/tcpip/header/icmpv6.go +++ b/pkg/tcpip/header/icmpv6.go @@ -110,9 +110,16 @@ const ( ICMPv6RedirectMsg ICMPv6Type = 137 ) -// Values for ICMP code as defined in RFC 4443. +// Values for ICMP destination unreachable code as defined in RFC 4443 section +// 3.1. const ( - ICMPv6PortUnreachable = 4 + ICMPv6NetworkUnreachable = 0 + ICMPv6Prohibited = 1 + ICMPv6BeyondScope = 2 + ICMPv6AddressUnreachable = 3 + ICMPv6PortUnreachable = 4 + ICMPv6Policy = 5 + ICMPv6RejectRoute = 6 ) // Type is the ICMP type field. diff --git a/pkg/tcpip/network/ipv4/icmp.go b/pkg/tcpip/network/ipv4/icmp.go index 1b67aa066..83e71cb8c 100644 --- a/pkg/tcpip/network/ipv4/icmp.go +++ b/pkg/tcpip/network/ipv4/icmp.go @@ -129,6 +129,9 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer) { pkt.Data.TrimFront(header.ICMPv4MinimumSize) switch h.Code() { + case header.ICMPv4HostUnreachable: + e.handleControl(stack.ControlNoRoute, 0, pkt) + case header.ICMPv4PortUnreachable: e.handleControl(stack.ControlPortUnreachable, 0, pkt) diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go index 3b79749b5..ff1cb53dd 100644 --- a/pkg/tcpip/network/ipv6/icmp.go +++ b/pkg/tcpip/network/ipv6/icmp.go @@ -128,6 +128,8 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme } pkt.Data.TrimFront(header.ICMPv6DstUnreachableMinimumSize) switch header.ICMPv6(hdr).Code() { + case header.ICMPv6NetworkUnreachable: + e.handleControl(stack.ControlNetworkUnreachable, 0, pkt) case header.ICMPv6PortUnreachable: e.handleControl(stack.ControlPortUnreachable, 0, pkt) } diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go index cd4b7a449..9e1b2d25f 100644 --- a/pkg/tcpip/stack/registration.go +++ b/pkg/tcpip/stack/registration.go @@ -52,8 +52,11 @@ type TransportEndpointID struct { type ControlType int // The following are the allowed values for ControlType values. +// TODO(http://gvisor.dev/issue/3210): Support time exceeded messages. const ( - ControlPacketTooBig ControlType = iota + ControlNetworkUnreachable ControlType = iota + ControlNoRoute + ControlPacketTooBig ControlPortUnreachable ControlUnknown ) diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index 81b740115..1798510bc 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -490,6 +490,9 @@ func (h *handshake) resolveRoute() *tcpip.Error { <-h.ep.undrain h.ep.mu.Lock() } + if n¬ifyError != 0 { + return h.ep.takeLastError() + } } // Wait for notification. @@ -616,6 +619,9 @@ func (h *handshake) execute() *tcpip.Error { <-h.ep.undrain h.ep.mu.Lock() } + if n¬ifyError != 0 { + return h.ep.takeLastError() + } case wakerForNewSegment: if err := h.processSegments(); err != nil { diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 83dc10ed0..0f7487963 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -1209,6 +1209,14 @@ func (e *endpoint) SetOwner(owner tcpip.PacketOwner) { e.owner = owner } +func (e *endpoint) takeLastError() *tcpip.Error { + e.lastErrorMu.Lock() + defer e.lastErrorMu.Unlock() + err := e.lastError + e.lastError = nil + return err +} + // Read reads data from the endpoint. func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) { e.LockUser() @@ -1956,11 +1964,7 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { switch o := opt.(type) { case tcpip.ErrorOption: - e.lastErrorMu.Lock() - err := e.lastError - e.lastError = nil - e.lastErrorMu.Unlock() - return err + return e.takeLastError() case *tcpip.BindToDeviceOption: e.LockUser() @@ -2546,6 +2550,18 @@ func (e *endpoint) HandleControlPacket(id stack.TransportEndpointID, typ stack.C e.sndBufMu.Unlock() e.notifyProtocolGoroutine(notifyMTUChanged) + + case stack.ControlNoRoute: + e.lastErrorMu.Lock() + e.lastError = tcpip.ErrNoRoute + e.lastErrorMu.Unlock() + e.notifyProtocolGoroutine(notifyError) + + case stack.ControlNetworkUnreachable: + e.lastErrorMu.Lock() + e.lastError = tcpip.ErrNetworkUnreachable + e.lastErrorMu.Unlock() + e.notifyProtocolGoroutine(notifyError) } } diff --git a/pkg/test/dockerutil/exec.go b/pkg/test/dockerutil/exec.go index 921d1da9e..4c739c9e9 100644 --- a/pkg/test/dockerutil/exec.go +++ b/pkg/test/dockerutil/exec.go @@ -87,7 +87,6 @@ func (c *Container) doExec(ctx context.Context, r ExecOpts, args []string) (Proc execid: resp.ID, conn: hijack, }, nil - } func (c *Container) execConfig(r ExecOpts, cmd []string) types.ExecConfig { diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go index ff5f5c7f1..1a0221893 100644 --- a/test/packetimpact/runner/packetimpact_test.go +++ b/test/packetimpact/runner/packetimpact_test.go @@ -280,11 +280,13 @@ func TestOne(t *testing.T) { } // Because the Linux kernel receives the SYN-ACK but didn't send the SYN it - // will issue a RST. To prevent this IPtables can be used to filter out all + // will issue an RST. To prevent this IPtables can be used to filter out all // incoming packets. The raw socket that packetimpact tests use will still see // everything. - if logs, err := testbench.Exec(ctx, dockerutil.ExecOpts{}, "iptables", "-A", "INPUT", "-i", testNetDev, "-j", "DROP"); err != nil { - t.Fatalf("unable to Exec iptables on container %s: %s, logs from testbench:\n%s", testbench.Name, err, logs) + for _, bin := range []string{"iptables", "ip6tables"} { + if logs, err := testbench.Exec(ctx, dockerutil.ExecOpts{}, bin, "-A", "INPUT", "-i", testNetDev, "-p", "tcp", "-j", "DROP"); err != nil { + t.Fatalf("unable to Exec %s on container %s: %s, logs from testbench:\n%s", bin, testbench.Name, err, logs) + } } // FIXME(b/156449515): Some piece of the system has a race. The old diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index 5d9cec73e..87ce58c24 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -41,7 +41,8 @@ func portFromSockaddr(sa unix.Sockaddr) (uint16, error) { return 0, fmt.Errorf("sockaddr type %T does not contain port", sa) } -// pickPort makes a new socket and returns the socket FD and port. The domain should be AF_INET or AF_INET6. The caller must close the FD when done with +// pickPort makes a new socket and returns the socket FD and port. The domain +// should be AF_INET or AF_INET6. The caller must close the FD when done with // the port if there is no error. func pickPort(domain, typ int) (fd int, port uint16, err error) { fd, err = unix.Socket(domain, typ, 0) @@ -1061,3 +1062,58 @@ func (conn *UDPIPv6) Close() { func (conn *UDPIPv6) Drain() { conn.sniffer.Drain() } + +// TCPIPv6 maintains the state for all the layers in a TCP/IPv6 connection. +type TCPIPv6 Connection + +// NewTCPIPv6 creates a new TCPIPv6 connection with reasonable defaults. +func NewTCPIPv6(t *testing.T, outgoingTCP, incomingTCP TCP) TCPIPv6 { + etherState, err := newEtherState(Ether{}, Ether{}) + if err != nil { + t.Fatalf("can't make etherState: %s", err) + } + ipv6State, err := newIPv6State(IPv6{}, IPv6{}) + if err != nil { + t.Fatalf("can't make ipv6State: %s", err) + } + tcpState, err := newTCPState(unix.AF_INET6, outgoingTCP, incomingTCP) + if err != nil { + t.Fatalf("can't make tcpState: %s", err) + } + injector, err := NewInjector(t) + if err != nil { + t.Fatalf("can't make injector: %s", err) + } + sniffer, err := NewSniffer(t) + if err != nil { + t.Fatalf("can't make sniffer: %s", err) + } + + return TCPIPv6{ + layerStates: []layerState{etherState, ipv6State, tcpState}, + injector: injector, + sniffer: sniffer, + t: t, + } +} + +func (conn *TCPIPv6) SrcPort() uint16 { + state := conn.layerStates[2].(*tcpState) + return *state.out.SrcPort +} + +// ExpectData is a convenient method that expects a Layer and the Layer after +// it. If it doens't arrive in time, it returns nil. +func (conn *TCPIPv6) ExpectData(tcp *TCP, payload *Payload, timeout time.Duration) (Layers, error) { + expected := make([]Layer, len(conn.layerStates)) + expected[len(expected)-1] = tcp + if payload != nil { + expected = append(expected, payload) + } + return (*Connection)(conn).ExpectFrame(expected, timeout) +} + +// Close frees associated resources held by the TCPIPv6 connection. +func (conn *TCPIPv6) Close() { + (*Connection)(conn).Close() +} diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go index 645f6c1a9..24aa46cce 100644 --- a/test/packetimpact/testbench/layers.go +++ b/test/packetimpact/testbench/layers.go @@ -805,7 +805,11 @@ func (l *ICMPv6) ToBytes() ([]byte, error) { // We need to search forward to find the IPv6 header. for prev := l.Prev(); prev != nil; prev = prev.Prev() { if ipv6, ok := prev.(*IPv6); ok { - h.SetChecksum(header.ICMPv6Checksum(h, *ipv6.SrcAddr, *ipv6.DstAddr, buffer.VectorisedView{})) + payload, err := payload(l) + if err != nil { + return nil, err + } + h.SetChecksum(header.ICMPv6Checksum(h, *ipv6.SrcAddr, *ipv6.DstAddr, payload)) break } } diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 6a07889be..27905dcff 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -219,6 +219,16 @@ packetimpact_go_test( ], ) +packetimpact_go_test( + name = "tcp_network_unreachable", + srcs = ["tcp_network_unreachable_test.go"], + deps = [ + "//pkg/tcpip/header", + "//test/packetimpact/testbench", + "@org_golang_x_sys//unix:go_default_library", + ], +) + packetimpact_go_test( name = "tcp_cork_mss", srcs = ["tcp_cork_mss_test.go"], diff --git a/test/packetimpact/tests/tcp_network_unreachable_test.go b/test/packetimpact/tests/tcp_network_unreachable_test.go new file mode 100644 index 000000000..868a08da8 --- /dev/null +++ b/test/packetimpact/tests/tcp_network_unreachable_test.go @@ -0,0 +1,139 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tcp_synsent_reset_test + +import ( + "context" + "flag" + "net" + "syscall" + "testing" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func init() { + testbench.RegisterFlags(flag.CommandLine) +} + +// TestTCPSynSentUnreachable verifies that TCP connections fail immediately when +// an ICMP destination unreachable message is sent in response to the inital +// SYN. +func TestTCPSynSentUnreachable(t *testing.T) { + // Create the DUT and connection. + dut := testbench.NewDUT(t) + defer dut.TearDown() + clientFD, clientPort := dut.CreateBoundSocket(unix.SOCK_STREAM|unix.SOCK_NONBLOCK, unix.IPPROTO_TCP, net.ParseIP(testbench.RemoteIPv4)) + port := uint16(9001) + conn := testbench.NewTCPIPv4(t, testbench.TCP{SrcPort: &port, DstPort: &clientPort}, testbench.TCP{SrcPort: &clientPort, DstPort: &port}) + defer conn.Close() + + // Bring the DUT to SYN-SENT state with a non-blocking connect. + ctx, cancel := context.WithTimeout(context.Background(), testbench.RPCTimeout) + defer cancel() + sa := unix.SockaddrInet4{Port: int(port)} + copy(sa.Addr[:], net.IP(net.ParseIP(testbench.LocalIPv4)).To4()) + if _, err := dut.ConnectWithErrno(ctx, clientFD, &sa); err != syscall.Errno(unix.EINPROGRESS) { + t.Errorf("expected connect to fail with EINPROGRESS, but got %v", err) + } + + // Get the SYN. + tcpLayers, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}, nil, time.Second) + if err != nil { + t.Fatalf("expected SYN: %s", err) + } + + // Send a host unreachable message. + rawConn := (*testbench.Connection)(&conn) + layers := rawConn.CreateFrame(nil) + layers = layers[:len(layers)-1] + const ipLayer = 1 + const tcpLayer = ipLayer + 1 + ip, ok := tcpLayers[ipLayer].(*testbench.IPv4) + if !ok { + t.Fatalf("expected %s to be IPv4", tcpLayers[ipLayer]) + } + tcp, ok := tcpLayers[tcpLayer].(*testbench.TCP) + if !ok { + t.Fatalf("expected %s to be TCP", tcpLayers[tcpLayer]) + } + var icmpv4 testbench.ICMPv4 = testbench.ICMPv4{Type: testbench.ICMPv4Type(header.ICMPv4DstUnreachable), Code: testbench.Uint8(header.ICMPv4HostUnreachable)} + layers = append(layers, &icmpv4, ip, tcp) + rawConn.SendFrameStateless(layers) + + if _, err = dut.ConnectWithErrno(ctx, clientFD, &sa); err != syscall.Errno(unix.EHOSTUNREACH) { + t.Errorf("expected connect to fail with EHOSTUNREACH, but got %v", err) + } +} + +// TestTCPSynSentUnreachable6 verifies that TCP connections fail immediately when +// an ICMP destination unreachable message is sent in response to the inital +// SYN. +func TestTCPSynSentUnreachable6(t *testing.T) { + // Create the DUT and connection. + dut := testbench.NewDUT(t) + defer dut.TearDown() + clientFD, clientPort := dut.CreateBoundSocket(unix.SOCK_STREAM|unix.SOCK_NONBLOCK, unix.IPPROTO_TCP, net.ParseIP(testbench.RemoteIPv6)) + conn := testbench.NewTCPIPv6(t, testbench.TCP{DstPort: &clientPort}, testbench.TCP{SrcPort: &clientPort}) + defer conn.Close() + + // Bring the DUT to SYN-SENT state with a non-blocking connect. + ctx, cancel := context.WithTimeout(context.Background(), testbench.RPCTimeout) + defer cancel() + sa := unix.SockaddrInet6{ + Port: int(conn.SrcPort()), + ZoneId: uint32(testbench.RemoteInterfaceID), + } + copy(sa.Addr[:], net.IP(net.ParseIP(testbench.LocalIPv6)).To16()) + if _, err := dut.ConnectWithErrno(ctx, clientFD, &sa); err != syscall.Errno(unix.EINPROGRESS) { + t.Errorf("expected connect to fail with EINPROGRESS, but got %v", err) + } + + // Get the SYN. + tcpLayers, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}, nil, time.Second) + if err != nil { + t.Fatalf("expected SYN: %s", err) + } + + // Send a host unreachable message. + rawConn := (*testbench.Connection)(&conn) + layers := rawConn.CreateFrame(nil) + layers = layers[:len(layers)-1] + const ipLayer = 1 + const tcpLayer = ipLayer + 1 + ip, ok := tcpLayers[ipLayer].(*testbench.IPv6) + if !ok { + t.Fatalf("expected %s to be IPv6", tcpLayers[ipLayer]) + } + tcp, ok := tcpLayers[tcpLayer].(*testbench.TCP) + if !ok { + t.Fatalf("expected %s to be TCP", tcpLayers[tcpLayer]) + } + var icmpv6 testbench.ICMPv6 = testbench.ICMPv6{ + Type: testbench.ICMPv6Type(header.ICMPv6DstUnreachable), + Code: testbench.Uint8(header.ICMPv6NetworkUnreachable), + // Per RFC 4443 3.1, the payload contains 4 zeroed bytes. + Payload: []byte{0, 0, 0, 0}, + } + layers = append(layers, &icmpv6, ip, tcp) + rawConn.SendFrameStateless(layers) + + if _, err = dut.ConnectWithErrno(ctx, clientFD, &sa); err != syscall.Errno(unix.ENETUNREACH) { + t.Errorf("expected connect to fail with ENETUNREACH, but got %v", err) + } +} -- cgit v1.2.3 From b396d3882ccccc82e1a3c799c7a291d47c43c2c8 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Thu, 23 Jul 2020 10:34:46 -0700 Subject: Port sendfile to vfs2. And do some refactoring of the wait logic in sendfile/splice/tee. Updates #1035 #2923 PiperOrigin-RevId: 322815521 --- pkg/sentry/syscalls/linux/vfs2/splice.go | 321 +++++++++++++++++++++++++------ pkg/sentry/syscalls/linux/vfs2/vfs2.go | 2 +- test/syscalls/BUILD | 2 + 3 files changed, 261 insertions(+), 64 deletions(-) (limited to 'test') diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go index 945a364a7..63ab11f8c 100644 --- a/pkg/sentry/syscalls/linux/vfs2/splice.go +++ b/pkg/sentry/syscalls/linux/vfs2/splice.go @@ -15,12 +15,15 @@ package vfs2 import ( + "io" + "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -110,16 +113,20 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Move data. var ( - n int64 - err error - inCh chan struct{} - outCh chan struct{} + n int64 + err error ) + dw := dualWaiter{ + inFile: inFile, + outFile: outFile, + } + defer dw.destroy() for { // If both input and output are pipes, delegate to the pipe - // implementation. Otherwise, exactly one end is a pipe, which we - // ensure is consistently ordered after the non-pipe FD's locks by - // passing the pipe FD as usermem.IO to the non-pipe end. + // implementation. Otherwise, exactly one end is a pipe, which + // we ensure is consistently ordered after the non-pipe FD's + // locks by passing the pipe FD as usermem.IO to the non-pipe + // end. switch { case inIsPipe && outIsPipe: n, err = pipe.Splice(t, outPipeFD, inPipeFD, count) @@ -137,38 +144,15 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal } else { n, err = inFile.Read(t, outPipeFD.IOSequence(count), vfs.ReadOptions{}) } + default: + panic("not possible") } + if n != 0 || err != syserror.ErrWouldBlock || nonBlock { break } - - // Note that the blocking behavior here is a bit different than the - // normal pattern. Because we need to have both data to read and data - // to write simultaneously, we actually explicitly block on both of - // these cases in turn before returning to the splice operation. - if inFile.Readiness(eventMaskRead)&eventMaskRead == 0 { - if inCh == nil { - inCh = make(chan struct{}, 1) - inW, _ := waiter.NewChannelEntry(inCh) - inFile.EventRegister(&inW, eventMaskRead) - defer inFile.EventUnregister(&inW) - continue // Need to refresh readiness. - } - if err = t.Block(inCh); err != nil { - break - } - } - if outFile.Readiness(eventMaskWrite)&eventMaskWrite == 0 { - if outCh == nil { - outCh = make(chan struct{}, 1) - outW, _ := waiter.NewChannelEntry(outCh) - outFile.EventRegister(&outW, eventMaskWrite) - defer outFile.EventUnregister(&outW) - continue // Need to refresh readiness. - } - if err = t.Block(outCh); err != nil { - break - } + if err = dw.waitForBoth(t); err != nil { + break } } @@ -247,45 +231,256 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo // Copy data. var ( - inCh chan struct{} - outCh chan struct{} + n int64 + err error ) + dw := dualWaiter{ + inFile: inFile, + outFile: outFile, + } + defer dw.destroy() for { - n, err := pipe.Tee(t, outPipeFD, inPipeFD, count) - if n != 0 { - return uintptr(n), nil, nil + n, err = pipe.Tee(t, outPipeFD, inPipeFD, count) + if n != 0 || err != syserror.ErrWouldBlock || nonBlock { + break + } + if err = dw.waitForBoth(t); err != nil { + break + } + } + if n == 0 { + return 0, nil, err + } + outFile.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent) + return uintptr(n), nil, nil +} + +// Sendfile implements linux system call sendfile(2). +func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + outFD := args[0].Int() + inFD := args[1].Int() + offsetAddr := args[2].Pointer() + count := int64(args[3].SizeT()) + + inFile := t.GetFileVFS2(inFD) + if inFile == nil { + return 0, nil, syserror.EBADF + } + defer inFile.DecRef() + if !inFile.IsReadable() { + return 0, nil, syserror.EBADF + } + + outFile := t.GetFileVFS2(outFD) + if outFile == nil { + return 0, nil, syserror.EBADF + } + defer outFile.DecRef() + if !outFile.IsWritable() { + return 0, nil, syserror.EBADF + } + + // Verify that the outFile Append flag is not set. + if outFile.StatusFlags()&linux.O_APPEND != 0 { + return 0, nil, syserror.EINVAL + } + + // Verify that inFile is a regular file or block device. This is a + // requirement; the same check appears in Linux + // (fs/splice.c:splice_direct_to_actor). + if stat, err := inFile.Stat(t, vfs.StatOptions{Mask: linux.STATX_TYPE}); err != nil { + return 0, nil, err + } else if stat.Mask&linux.STATX_TYPE == 0 || + (stat.Mode&linux.S_IFMT != linux.S_IFREG && stat.Mode&linux.S_IFMT != linux.S_IFBLK) { + return 0, nil, syserror.EINVAL + } + + // Copy offset if it exists. + offset := int64(-1) + if offsetAddr != 0 { + if inFile.Options().DenyPRead { + return 0, nil, syserror.ESPIPE } - if err != syserror.ErrWouldBlock || nonBlock { + if _, err := t.CopyIn(offsetAddr, &offset); err != nil { return 0, nil, err } + if offset < 0 { + return 0, nil, syserror.EINVAL + } + if offset+count < 0 { + return 0, nil, syserror.EINVAL + } + } + + // Validate count. This must come after offset checks. + if count < 0 { + return 0, nil, syserror.EINVAL + } + if count == 0 { + return 0, nil, nil + } + if count > int64(kernel.MAX_RW_COUNT) { + count = int64(kernel.MAX_RW_COUNT) + } - // Note that the blocking behavior here is a bit different than the - // normal pattern. Because we need to have both data to read and data - // to write simultaneously, we actually explicitly block on both of - // these cases in turn before returning to the tee operation. - if inFile.Readiness(eventMaskRead)&eventMaskRead == 0 { - if inCh == nil { - inCh = make(chan struct{}, 1) - inW, _ := waiter.NewChannelEntry(inCh) - inFile.EventRegister(&inW, eventMaskRead) - defer inFile.EventUnregister(&inW) - continue // Need to refresh readiness. + // Copy data. + var ( + n int64 + err error + ) + dw := dualWaiter{ + inFile: inFile, + outFile: outFile, + } + defer dw.destroy() + outPipeFD, outIsPipe := outFile.Impl().(*pipe.VFSPipeFD) + // Reading from input file should never block, since it is regular or + // block device. We only need to check if writing to the output file + // can block. + nonBlock := outFile.StatusFlags()&linux.O_NONBLOCK != 0 + if outIsPipe { + for n < count { + var spliceN int64 + if offset != -1 { + spliceN, err = inFile.PRead(t, outPipeFD.IOSequence(count), offset, vfs.ReadOptions{}) + offset += spliceN + } else { + spliceN, err = inFile.Read(t, outPipeFD.IOSequence(count), vfs.ReadOptions{}) } - if err := t.Block(inCh); err != nil { - return 0, nil, err + n += spliceN + if err == syserror.ErrWouldBlock && !nonBlock { + err = dw.waitForBoth(t) + } + if err != nil { + break } } - if outFile.Readiness(eventMaskWrite)&eventMaskWrite == 0 { - if outCh == nil { - outCh = make(chan struct{}, 1) - outW, _ := waiter.NewChannelEntry(outCh) - outFile.EventRegister(&outW, eventMaskWrite) - defer outFile.EventUnregister(&outW) - continue // Need to refresh readiness. + } else { + // Read inFile to buffer, then write the contents to outFile. + buf := make([]byte, count) + for n < count { + var readN int64 + if offset != -1 { + readN, err = inFile.PRead(t, usermem.BytesIOSequence(buf), offset, vfs.ReadOptions{}) + offset += readN + } else { + readN, err = inFile.Read(t, usermem.BytesIOSequence(buf), vfs.ReadOptions{}) + } + if readN == 0 && err == io.EOF { + // We reached the end of the file. Eat the + // error and exit the loop. + err = nil + break } - if err := t.Block(outCh); err != nil { - return 0, nil, err + n += readN + if err != nil { + break + } + + // Write all of the bytes that we read. This may need + // multiple write calls to complete. + wbuf := buf[:n] + for len(wbuf) > 0 { + var writeN int64 + writeN, err = outFile.Write(t, usermem.BytesIOSequence(wbuf), vfs.WriteOptions{}) + wbuf = wbuf[writeN:] + if err == syserror.ErrWouldBlock && !nonBlock { + err = dw.waitForOut(t) + } + if err != nil { + // We didn't complete the write. Only + // report the bytes that were actually + // written, and rewind the offset. + notWritten := int64(len(wbuf)) + n -= notWritten + if offset != -1 { + offset -= notWritten + } + break + } + } + if err == syserror.ErrWouldBlock && !nonBlock { + err = dw.waitForBoth(t) } + if err != nil { + break + } + } + } + + if offsetAddr != 0 { + // Copy out the new offset. + if _, err := t.CopyOut(offsetAddr, offset); err != nil { + return 0, nil, err + } + } + + if n == 0 { + return 0, nil, err + } + + inFile.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) + outFile.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent) + return uintptr(n), nil, nil +} + +// dualWaiter is used to wait on one or both vfs.FileDescriptions. It is not +// thread-safe, and does not take a reference on the vfs.FileDescriptions. +// +// Users must call destroy() when finished. +type dualWaiter struct { + inFile *vfs.FileDescription + outFile *vfs.FileDescription + + inW waiter.Entry + inCh chan struct{} + outW waiter.Entry + outCh chan struct{} +} + +// waitForBoth waits for both dw.inFile and dw.outFile to be ready. +func (dw *dualWaiter) waitForBoth(t *kernel.Task) error { + if dw.inFile.Readiness(eventMaskRead)&eventMaskRead == 0 { + if dw.inCh == nil { + dw.inW, dw.inCh = waiter.NewChannelEntry(nil) + dw.inFile.EventRegister(&dw.inW, eventMaskRead) + // We might be ready now. Try again before blocking. + return nil + } + if err := t.Block(dw.inCh); err != nil { + return err + } + } + return dw.waitForOut(t) +} + +// waitForOut waits for dw.outfile to be read. +func (dw *dualWaiter) waitForOut(t *kernel.Task) error { + if dw.outFile.Readiness(eventMaskWrite)&eventMaskWrite == 0 { + if dw.outCh == nil { + dw.outW, dw.outCh = waiter.NewChannelEntry(nil) + dw.outFile.EventRegister(&dw.outW, eventMaskWrite) + // We might be ready now. Try again before blocking. + return nil } + if err := t.Block(dw.outCh); err != nil { + return err + } + } + return nil +} + +// destroy cleans up resources help by dw. No more calls to wait* can occur +// after destroy is called. +func (dw *dualWaiter) destroy() { + if dw.inCh != nil { + dw.inFile.EventUnregister(&dw.inW) + dw.inCh = nil + } + if dw.outCh != nil { + dw.outFile.EventUnregister(&dw.outW) + dw.outCh = nil } + dw.inFile = nil + dw.outFile = nil } diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go index 8f497ecc7..1b2cfad7d 100644 --- a/pkg/sentry/syscalls/linux/vfs2/vfs2.go +++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go @@ -44,7 +44,7 @@ func Override() { s.Table[23] = syscalls.Supported("select", Select) s.Table[32] = syscalls.Supported("dup", Dup) s.Table[33] = syscalls.Supported("dup2", Dup2) - delete(s.Table, 40) // sendfile + s.Table[40] = syscalls.Supported("sendfile", Sendfile) s.Table[41] = syscalls.Supported("socket", Socket) s.Table[42] = syscalls.Supported("connect", Connect) s.Table[43] = syscalls.Supported("accept", Accept) diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index c06a75ada..15e5fd223 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -640,11 +640,13 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:sendfile_socket_test", + vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:sendfile_test", + vfs2 = "True", ) syscall_test( -- cgit v1.2.3 From 20b556e625354dd8330e30e4075ad06eedc6a2ce Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Thu, 23 Jul 2020 12:52:18 -0700 Subject: Fix wildcard bind for raw socket. Fixes #3334 PiperOrigin-RevId: 322846384 --- pkg/tcpip/transport/raw/endpoint.go | 2 +- test/syscalls/linux/raw_socket.cc | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go index c2e9fd29f..aefe0e2b2 100644 --- a/pkg/tcpip/transport/raw/endpoint.go +++ b/pkg/tcpip/transport/raw/endpoint.go @@ -456,7 +456,7 @@ func (e *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error { defer e.mu.Unlock() // If a local address was specified, verify that it's valid. - if e.stack.CheckLocalAddress(addr.NIC, e.NetProto, addr.Addr) == 0 { + if len(addr.Addr) != 0 && e.stack.CheckLocalAddress(addr.NIC, e.NetProto, addr.Addr) == 0 { return tcpip.ErrBadLocalAddress } diff --git a/test/syscalls/linux/raw_socket.cc b/test/syscalls/linux/raw_socket.cc index ce54dc064..8d6e5c913 100644 --- a/test/syscalls/linux/raw_socket.cc +++ b/test/syscalls/linux/raw_socket.cc @@ -262,6 +262,27 @@ TEST_P(RawSocketTest, SendWithoutConnectFails) { SyscallFailsWithErrno(EDESTADDRREQ)); } +// Wildcard Bind. +TEST_P(RawSocketTest, BindToWildcard) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + struct sockaddr_storage addr; + addr = {}; + + // We don't set ports because raw sockets don't have a notion of ports. + if (Family() == AF_INET) { + struct sockaddr_in* sin = reinterpret_cast(&addr); + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = htonl(INADDR_ANY); + } else { + struct sockaddr_in6* sin6 = reinterpret_cast(&addr); + sin6->sin6_family = AF_INET6; + sin6->sin6_addr = in6addr_any; + } + + ASSERT_THAT(bind(s_, reinterpret_cast(&addr_), AddrLen()), + SyscallSucceeds()); +} + // Bind to localhost. TEST_P(RawSocketTest, BindToLocalhost) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); -- cgit v1.2.3 From 15da310efaad5574ca7b0fe03cb76919fdffc5ce Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Thu, 23 Jul 2020 13:56:49 -0700 Subject: Internal change. PiperOrigin-RevId: 322859907 --- test/syscalls/linux/BUILD | 1 + test/syscalls/linux/getdents.cc | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 662d780d8..66a31cd28 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -943,6 +943,7 @@ cc_binary( "//test/util:eventfd_util", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/container:node_hash_set", "@com_google_absl//absl/strings", gtest, "//test/util:posix_error", diff --git a/test/syscalls/linux/getdents.cc b/test/syscalls/linux/getdents.cc index b147d6181..b040cdcf7 100644 --- a/test/syscalls/linux/getdents.cc +++ b/test/syscalls/linux/getdents.cc @@ -32,6 +32,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/container/node_hash_set.h" #include "absl/strings/numbers.h" #include "absl/strings/str_cat.h" #include "test/util/eventfd_util.h" @@ -393,7 +394,7 @@ TYPED_TEST(GetdentsTest, ProcSelfFd) { // Make the buffer very small since we want to iterate. typename TestFixture::DirentBufferType dirents( 2 * sizeof(typename TestFixture::LinuxDirentType)); - std::unordered_set prev_fds; + absl::node_hash_set prev_fds; while (true) { dirents.Reset(); int rv; -- cgit v1.2.3 From 3e0e3b9b11fee58835a0a492d66e72b354459e27 Mon Sep 17 00:00:00 2001 From: Ridwan Sharif Date: Tue, 9 Jun 2020 12:35:39 -0400 Subject: Added stub FUSE filesystem Allow FUSE filesystems to be mounted using libfuse. The appropriate flags and mount options are parsed and understood by fusefs. --- pkg/sentry/fsimpl/fuse/BUILD | 5 + pkg/sentry/fsimpl/fuse/dev.go | 28 +++++ pkg/sentry/fsimpl/fuse/fusefs.go | 201 ++++++++++++++++++++++++++++++++ pkg/sentry/syscalls/linux/vfs2/mount.go | 9 +- pkg/sentry/vfs/options.go | 11 ++ runsc/boot/vfs.go | 5 + test/syscalls/linux/dev.cc | 12 ++ test/syscalls/linux/mount.cc | 28 +++++ 8 files changed, 297 insertions(+), 2 deletions(-) create mode 100644 pkg/sentry/fsimpl/fuse/fusefs.go (limited to 'test') diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD index 3e00c2abb..1fb9c438b 100644 --- a/pkg/sentry/fsimpl/fuse/BUILD +++ b/pkg/sentry/fsimpl/fuse/BUILD @@ -6,15 +6,20 @@ go_library( name = "fuse", srcs = [ "dev.go", + "fusefs.go", ], visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/log", "//pkg/sentry/fsimpl/devtmpfs", + "//pkg/sentry/fsimpl/kernfs", "//pkg/sentry/kernel", + "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", "//pkg/syserror", "//pkg/usermem", + "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go index dc33268af..c9e12a94f 100644 --- a/pkg/sentry/fsimpl/fuse/dev.go +++ b/pkg/sentry/fsimpl/fuse/dev.go @@ -51,6 +51,9 @@ type DeviceFD struct { vfs.DentryMetadataFileDescriptionImpl vfs.NoLockFD + // mounted specifies whether a FUSE filesystem was mounted using the DeviceFD. + mounted bool + // TODO(gvisor.dev/issue/2987): Add all the data structures needed to enqueue // and deque requests, control synchronization and establish communication // between the FUSE kernel module and the /dev/fuse character device. @@ -61,26 +64,51 @@ func (fd *DeviceFD) Release() {} // PRead implements vfs.FileDescriptionImpl.PRead. func (fd *DeviceFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { + // Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted. + if !fd.mounted { + return 0, syserror.EPERM + } + return 0, syserror.ENOSYS } // Read implements vfs.FileDescriptionImpl.Read. func (fd *DeviceFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { + // Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted. + if !fd.mounted { + return 0, syserror.EPERM + } + return 0, syserror.ENOSYS } // PWrite implements vfs.FileDescriptionImpl.PWrite. func (fd *DeviceFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { + // Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted. + if !fd.mounted { + return 0, syserror.EPERM + } + return 0, syserror.ENOSYS } // Write implements vfs.FileDescriptionImpl.Write. func (fd *DeviceFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { + // Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted. + if !fd.mounted { + return 0, syserror.EPERM + } + return 0, syserror.ENOSYS } // Seek implements vfs.FileDescriptionImpl.Seek. func (fd *DeviceFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { + // Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted. + if !fd.mounted { + return 0, syserror.EPERM + } + return 0, syserror.ENOSYS } diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go new file mode 100644 index 000000000..6d40e2d87 --- /dev/null +++ b/pkg/sentry/fsimpl/fuse/fusefs.go @@ -0,0 +1,201 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fuse implements fusefs. +package fuse + +import ( + "strconv" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" +) + +// Name is the default filesystem name. +const Name = "fuse" + +// FilesystemType implements vfs.FilesystemType. +type FilesystemType struct{} + +type filesystemOptions struct { + // userID specifies the numeric uid of the mount owner. + // This option should not be specified by the filesystem owner. + // It is set by libfuse (or, if libfuse is not used, must be set + // by the filesystem itself). For more information, see man page + // for fuse(8) + userID uint32 + + // groupID specifies the numeric gid of the mount owner. + // This option should not be specified by the filesystem owner. + // It is set by libfuse (or, if libfuse is not used, must be set + // by the filesystem itself). For more information, see man page + // for fuse(8) + groupID uint32 + + // rootMode specifies the the file mode of the filesystem's root. + rootMode linux.FileMode +} + +// filesystem implements vfs.FilesystemImpl. +type filesystem struct { + kernfs.Filesystem + devMinor uint32 + + // fuseFD is the FD returned when opening /dev/fuse. It is used for communication + // between the FUSE server daemon and the sentry fusefs. + fuseFD *DeviceFD + + // opts is the options the fusefs is initialized with. + opts filesystemOptions +} + +// Name implements vfs.FilesystemType.Name. +func (FilesystemType) Name() string { + return Name +} + +// GetFilesystem implements vfs.FilesystemType.GetFilesystem. +func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { + devMinor, err := vfsObj.GetAnonBlockDevMinor() + if err != nil { + return nil, nil, err + } + + var fsopts filesystemOptions + mopts := vfs.GenericParseMountOptions(opts.Data) + deviceDescriptorStr, ok := mopts["fd"] + if !ok { + log.Warningf("%s.GetFilesystem: communication file descriptor N (obtained by opening /dev/fuse) must be specified as 'fd=N'", fsType.Name()) + return nil, nil, syserror.EINVAL + } + delete(mopts, "fd") + + deviceDescriptor, err := strconv.ParseInt(deviceDescriptorStr, 10 /* base */, 32 /* bitSize */) + if err != nil { + return nil, nil, err + } + + kernelTask := kernel.TaskFromContext(ctx) + if kernelTask == nil { + log.Warningf("%s.GetFilesystem: couldn't get kernel task from context", fsType.Name()) + return nil, nil, syserror.EINVAL + } + fuseFd := kernelTask.GetFileVFS2(int32(deviceDescriptor)) + + // Parse and set all the other supported FUSE mount options. + // TODO: Expand the supported mount options. + if userIDStr, ok := mopts["user_id"]; ok { + delete(mopts, "user_id") + userID, err := strconv.ParseUint(userIDStr, 10, 32) + if err != nil { + log.Warningf("%s.GetFilesystem: invalid user_id: user_id=%s", fsType.Name(), userIDStr) + return nil, nil, syserror.EINVAL + } + fsopts.userID = uint32(userID) + } + + if groupIDStr, ok := mopts["group_id"]; ok { + delete(mopts, "group_id") + groupID, err := strconv.ParseUint(groupIDStr, 10, 32) + if err != nil { + log.Warningf("%s.GetFilesystem: invalid group_id: group_id=%s", fsType.Name(), groupIDStr) + return nil, nil, syserror.EINVAL + } + fsopts.groupID = uint32(groupID) + } + + rootMode := linux.FileMode(0777) + modeStr, ok := mopts["rootmode"] + if ok { + delete(mopts, "rootmode") + mode, err := strconv.ParseUint(modeStr, 8, 32) + if err != nil { + log.Warningf("%s.GetFilesystem: invalid mode: %q", fsType.Name(), modeStr) + return nil, nil, syserror.EINVAL + } + rootMode = linux.FileMode(mode & 07777) + } + fsopts.rootMode = rootMode + + // Check for unparsed options. + if len(mopts) != 0 { + log.Warningf("%s.GetFilesystem: unknown options: %v", fsType.Name(), mopts) + return nil, nil, syserror.EINVAL + } + + // Mark the device as ready so it can be used. /dev/fuse can only be used if the FD was used to + // mount a FUSE filesystem. + fuseFD := fuseFd.Impl().(*DeviceFD) + fuseFD.mounted = true + + fs := &filesystem{ + devMinor: devMinor, + fuseFD: fuseFD, + opts: fsopts, + } + + fs.VFSFilesystem().Init(vfsObj, &fsType, fs) + + // TODO: dispatch a FUSE_INIT request to the FUSE daemon server before + // returning. Mount will not block on this dispatched request. + + // root is the fusefs root directory. + defaultFusefsDirMode := linux.FileMode(0755) + root := fs.newInode(creds, defaultFusefsDirMode) + + return fs.VFSFilesystem(), root.VFSDentry(), nil +} + +// Release implements vfs.FilesystemImpl.Release. +func (fs *filesystem) Release() { + fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) + fs.Filesystem.Release() +} + +// Inode implements kernfs.Inode. +type Inode struct { + kernfs.InodeAttrs + kernfs.InodeNoDynamicLookup + kernfs.InodeNotSymlink + kernfs.InodeDirectoryNoNewChildren + kernfs.OrderedChildren + + locks vfs.FileLocks + + dentry kernfs.Dentry +} + +func (fs *filesystem) newInode(creds *auth.Credentials, mode linux.FileMode) *kernfs.Dentry { + i := &Inode{} + i.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0755) + i.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) + i.dentry.Init(i) + + return &i.dentry +} + +// Open implements kernfs.Inode.Open. +func (i *Inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts) + if err != nil { + return nil, err + } + return fd.VFSFileDescription(), nil +} diff --git a/pkg/sentry/syscalls/linux/vfs2/mount.go b/pkg/sentry/syscalls/linux/vfs2/mount.go index adeaa39cc..ea337de7c 100644 --- a/pkg/sentry/syscalls/linux/vfs2/mount.go +++ b/pkg/sentry/syscalls/linux/vfs2/mount.go @@ -77,8 +77,7 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Silently allow MS_NOSUID, since we don't implement set-id bits // anyway. - const unsupportedFlags = linux.MS_NODEV | - linux.MS_NODIRATIME | linux.MS_STRICTATIME + const unsupportedFlags = linux.MS_NODIRATIME | linux.MS_STRICTATIME // Linux just allows passing any flags to mount(2) - it won't fail when // unknown or unsupported flags are passed. Since we don't implement @@ -94,6 +93,12 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if flags&linux.MS_NOEXEC == linux.MS_NOEXEC { opts.Flags.NoExec = true } + if flags&linux.MS_NODEV == linux.MS_NODEV { + opts.Flags.NoDev = true + } + if flags&linux.MS_NOSUID == linux.MS_NOSUID { + opts.Flags.NoSUID = true + } if flags&linux.MS_RDONLY == linux.MS_RDONLY { opts.ReadOnly = true } diff --git a/pkg/sentry/vfs/options.go b/pkg/sentry/vfs/options.go index f223aeda8..d37208a1f 100644 --- a/pkg/sentry/vfs/options.go +++ b/pkg/sentry/vfs/options.go @@ -79,6 +79,17 @@ type MountFlags struct { // NoATime is equivalent to MS_NOATIME and indicates that the // filesystem should not update access time in-place. NoATime bool + + // NoDev is equivalent to MS_NODEV and indicates that the + // filesystem should not allow access to devices (special files). + // TODO(gVisor.dev/issue/3186): respect this flag in non FUSE + // filesystems. + NoDev bool + + // NoSUID is equivalent to MS_NOSUID and indicates that the + // filesystem should not honor set-user-ID and set-group-ID bits or + // file capabilities when executing programs. + NoSUID bool } // MountOptions contains options to VirtualFilesystem.MountAt(). diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 56f4ba15d..9a1ed8e9e 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -77,6 +77,10 @@ func registerFilesystems(k *kernel.Kernel) error { AllowUserMount: true, AllowUserList: true, }) + vfsObj.MustRegisterFilesystemType(fuse.Name, &fuse.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserMount: true, + AllowUserList: true, + }) // Setup files in devtmpfs. if err := memdev.Register(vfsObj); err != nil { @@ -119,6 +123,7 @@ func registerFilesystems(k *kernel.Kernel) error { return fmt.Errorf("creating fusedev devtmpfs files: %w", err) } } + return nil } diff --git a/test/syscalls/linux/dev.cc b/test/syscalls/linux/dev.cc index 6fa16208e..69e8b76d3 100644 --- a/test/syscalls/linux/dev.cc +++ b/test/syscalls/linux/dev.cc @@ -161,6 +161,18 @@ TEST(DevTest, OpenDevFuse) { ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/fuse", O_RDONLY)); } +TEST(DevTest, ReadDevFuseWithoutMount) { + // Note(gvisor.dev/issue/3076) This won't work in the sentry until the new + // device registration is complete. + SKIP_IF(IsRunningWithVFS1() || IsRunningOnGvisor()); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/fuse", O_RDONLY)); + + std::vector buf(1); + EXPECT_THAT(ReadFd(fd.get(), buf.data(), sizeof(buf)), SyscallFailsWithErrno(EPERM)); +} + } // namespace } // namespace testing diff --git a/test/syscalls/linux/mount.cc b/test/syscalls/linux/mount.cc index a3e9745cf..7664fa73d 100644 --- a/test/syscalls/linux/mount.cc +++ b/test/syscalls/linux/mount.cc @@ -321,6 +321,34 @@ TEST(MountTest, RenameRemoveMountPoint) { ASSERT_THAT(rmdir(dir.path().c_str()), SyscallFailsWithErrno(EBUSY)); } +TEST(MountTest, MountFuseFilesystemNoDevice) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + // Note(gvisor.dev/issue/3076) This won't work in the sentry until the new + // device registration is complete. + SKIP_IF(IsRunningWithVFS1() || IsRunningOnGvisor()); + + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(mount("", dir.path().c_str(), "fuse", 0, ""), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(MountTest, MountFuseFilesystem) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + // Note(gvisor.dev/issue/3076) This won't work in the sentry until the new + // device registration is complete. + SKIP_IF(IsRunningWithVFS1() || IsRunningOnGvisor()); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/fuse", O_WRONLY)); + std::string mopts = "fd=" + std::to_string(fd.get()); + + auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto const mount = + ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir.path(), "fuse", 0, mopts, 0)); +} + } // namespace } // namespace testing -- cgit v1.2.3 From 4ec351633206fdbd191bc3aef29a007925a731cc Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Thu, 23 Jul 2020 17:40:46 -0700 Subject: Implement get/set_robust_list. PiperOrigin-RevId: 322904430 --- pkg/abi/linux/futex.go | 18 +++++ pkg/sentry/kernel/futex/futex.go | 8 +-- pkg/sentry/kernel/task.go | 4 ++ pkg/sentry/kernel/task_exec.go | 3 + pkg/sentry/kernel/task_exit.go | 3 + pkg/sentry/kernel/task_futex.go | 125 +++++++++++++++++++++++++++++++++ pkg/sentry/syscalls/linux/linux64.go | 4 +- pkg/sentry/syscalls/linux/sys_futex.go | 48 ++++++++++++- test/syscalls/linux/futex.cc | 92 ++++++++++++++++++++++++ 9 files changed, 298 insertions(+), 7 deletions(-) (limited to 'test') diff --git a/pkg/abi/linux/futex.go b/pkg/abi/linux/futex.go index 08bfde3b5..8138088a6 100644 --- a/pkg/abi/linux/futex.go +++ b/pkg/abi/linux/futex.go @@ -60,3 +60,21 @@ const ( FUTEX_WAITERS = 0x80000000 FUTEX_OWNER_DIED = 0x40000000 ) + +// FUTEX_BITSET_MATCH_ANY has all bits set. +const FUTEX_BITSET_MATCH_ANY = 0xffffffff + +// ROBUST_LIST_LIMIT protects against a deliberately circular list. +const ROBUST_LIST_LIMIT = 2048 + +// RobustListHead corresponds to Linux's struct robust_list_head. +// +// +marshal +type RobustListHead struct { + List uint64 + FutexOffset uint64 + ListOpPending uint64 +} + +// SizeOfRobustListHead is the size of a RobustListHead struct. +var SizeOfRobustListHead = (*RobustListHead)(nil).SizeBytes() diff --git a/pkg/sentry/kernel/futex/futex.go b/pkg/sentry/kernel/futex/futex.go index 732e66da4..bcc1b29a8 100644 --- a/pkg/sentry/kernel/futex/futex.go +++ b/pkg/sentry/kernel/futex/futex.go @@ -717,10 +717,10 @@ func (m *Manager) lockPILocked(w *Waiter, t Target, addr usermem.Addr, tid uint3 } } -// UnlockPI unlock the futex following the Priority-inheritance futex -// rules. The address provided must contain the caller's TID. If there are -// waiters, TID of the next waiter (FIFO) is set to the given address, and the -// waiter woken up. If there are no waiters, 0 is set to the address. +// UnlockPI unlocks the futex following the Priority-inheritance futex rules. +// The address provided must contain the caller's TID. If there are waiters, +// TID of the next waiter (FIFO) is set to the given address, and the waiter +// woken up. If there are no waiters, 0 is set to the address. func (m *Manager) UnlockPI(t Target, addr usermem.Addr, tid uint32, private bool) error { k, err := getKey(t, addr, private) if err != nil { diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index b3d655b6e..c4db05bd8 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -565,6 +565,10 @@ type Task struct { // futexWaiter is exclusive to the task goroutine. futexWaiter *futex.Waiter `state:"nosave"` + // robustList is a pointer to the head of the tasks's robust futex + // list. + robustList usermem.Addr + // startTime is the real time at which the task started. It is set when // a Task is created or invokes execve(2). // diff --git a/pkg/sentry/kernel/task_exec.go b/pkg/sentry/kernel/task_exec.go index 9b69f3cbe..7803b98d0 100644 --- a/pkg/sentry/kernel/task_exec.go +++ b/pkg/sentry/kernel/task_exec.go @@ -207,6 +207,9 @@ func (r *runSyscallAfterExecStop) execute(t *Task) taskRunState { return flags.CloseOnExec }) + // Handle the robust futex list. + t.exitRobustList() + // NOTE(b/30815691): We currently do not implement privileged // executables (set-user/group-ID bits and file capabilities). This // allows us to unconditionally enable user dumpability on the new mm. diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go index c4ade6e8e..231ac548a 100644 --- a/pkg/sentry/kernel/task_exit.go +++ b/pkg/sentry/kernel/task_exit.go @@ -253,6 +253,9 @@ func (*runExitMain) execute(t *Task) taskRunState { } } + // Handle the robust futex list. + t.exitRobustList() + // Deactivate the address space and update max RSS before releasing the // task's MM. t.Deactivate() diff --git a/pkg/sentry/kernel/task_futex.go b/pkg/sentry/kernel/task_futex.go index a53e77c9f..4b535c949 100644 --- a/pkg/sentry/kernel/task_futex.go +++ b/pkg/sentry/kernel/task_futex.go @@ -15,6 +15,7 @@ package kernel import ( + "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/kernel/futex" "gvisor.dev/gvisor/pkg/usermem" ) @@ -52,3 +53,127 @@ func (t *Task) LoadUint32(addr usermem.Addr) (uint32, error) { func (t *Task) GetSharedKey(addr usermem.Addr) (futex.Key, error) { return t.MemoryManager().GetSharedFutexKey(t, addr) } + +// GetRobustList sets the robust futex list for the task. +func (t *Task) GetRobustList() usermem.Addr { + t.mu.Lock() + addr := t.robustList + t.mu.Unlock() + return addr +} + +// SetRobustList sets the robust futex list for the task. +func (t *Task) SetRobustList(addr usermem.Addr) { + t.mu.Lock() + t.robustList = addr + t.mu.Unlock() +} + +// exitRobustList walks the robust futex list, marking locks dead and notifying +// wakers. It corresponds to Linux's exit_robust_list(). Following Linux, +// errors are silently ignored. +func (t *Task) exitRobustList() { + t.mu.Lock() + addr := t.robustList + t.robustList = 0 + t.mu.Unlock() + + if addr == 0 { + return + } + + var rl linux.RobustListHead + if _, err := rl.CopyIn(t, usermem.Addr(addr)); err != nil { + return + } + + next := rl.List + done := 0 + var pendingLockAddr usermem.Addr + if rl.ListOpPending != 0 { + pendingLockAddr = usermem.Addr(rl.ListOpPending + rl.FutexOffset) + } + + // Wake up normal elements. + for usermem.Addr(next) != addr { + // We traverse to the next element of the list before we + // actually wake anything. This prevents the race where waking + // this futex causes a modification of the list. + thisLockAddr := usermem.Addr(next + rl.FutexOffset) + + // Try to decode the next element in the list before waking the + // current futex. But don't check the error until after we've + // woken the current futex. Linux does it in this order too + _, nextErr := t.CopyIn(usermem.Addr(next), &next) + + // Wakeup the current futex if it's not pending. + if thisLockAddr != pendingLockAddr { + t.wakeRobustListOne(thisLockAddr) + } + + // If there was an error copying the next futex, we must bail. + if nextErr != nil { + break + } + + // This is a user structure, so it could be a massive list, or + // even contain a loop if they are trying to mess with us. We + // cap traversal to prevent that. + done++ + if done >= linux.ROBUST_LIST_LIMIT { + break + } + } + + // Is there a pending entry to wake? + if pendingLockAddr != 0 { + t.wakeRobustListOne(pendingLockAddr) + } +} + +// wakeRobustListOne wakes a single futex from the robust list. +func (t *Task) wakeRobustListOne(addr usermem.Addr) { + // Bit 0 in address signals PI futex. + pi := addr&1 == 1 + addr = addr &^ 1 + + // Load the futex. + f, err := t.LoadUint32(addr) + if err != nil { + // Can't read this single value? Ignore the problem. + // We can wake the other futexes in the list. + return + } + + tid := uint32(t.ThreadID()) + for { + // Is this held by someone else? + if f&linux.FUTEX_TID_MASK != tid { + return + } + + // This thread is dying and it's holding this futex. We need to + // set the owner died bit and wake up any waiters. + newF := (f & linux.FUTEX_WAITERS) | linux.FUTEX_OWNER_DIED + if curF, err := t.CompareAndSwapUint32(addr, f, newF); err != nil { + return + } else if curF != f { + // Futex changed out from under us. Try again... + f = curF + continue + } + + // Wake waiters if there are any. + if f&linux.FUTEX_WAITERS != 0 { + private := f&linux.FUTEX_PRIVATE_FLAG != 0 + if pi { + t.Futex().UnlockPI(t, addr, tid, private) + return + } + t.Futex().Wake(t, addr, private, linux.FUTEX_BITSET_MATCH_ANY, 1) + } + + // Done. + return + } +} diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go index ea4f9b1a7..80c65164a 100644 --- a/pkg/sentry/syscalls/linux/linux64.go +++ b/pkg/sentry/syscalls/linux/linux64.go @@ -325,8 +325,8 @@ var AMD64 = &kernel.SyscallTable{ 270: syscalls.Supported("pselect", Pselect), 271: syscalls.Supported("ppoll", Ppoll), 272: syscalls.PartiallySupported("unshare", Unshare, "Mount, cgroup namespaces not supported. Network namespaces supported but must be empty.", nil), - 273: syscalls.Error("set_robust_list", syserror.ENOSYS, "Obsolete.", nil), - 274: syscalls.Error("get_robust_list", syserror.ENOSYS, "Obsolete.", nil), + 273: syscalls.Supported("set_robust_list", SetRobustList), + 274: syscalls.Supported("get_robust_list", GetRobustList), 275: syscalls.Supported("splice", Splice), 276: syscalls.Supported("tee", Tee), 277: syscalls.PartiallySupported("sync_file_range", SyncFileRange, "Full data flush is not guaranteed at this time.", nil), diff --git a/pkg/sentry/syscalls/linux/sys_futex.go b/pkg/sentry/syscalls/linux/sys_futex.go index b68261f72..f04d78856 100644 --- a/pkg/sentry/syscalls/linux/sys_futex.go +++ b/pkg/sentry/syscalls/linux/sys_futex.go @@ -198,7 +198,7 @@ func Futex(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall switch cmd { case linux.FUTEX_WAIT: // WAIT uses a relative timeout. - mask = ^uint32(0) + mask = linux.FUTEX_BITSET_MATCH_ANY var timeoutDur time.Duration if !forever { timeoutDur = time.Duration(timespec.ToNsecCapped()) * time.Nanosecond @@ -286,3 +286,49 @@ func Futex(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, syserror.ENOSYS } } + +// SetRobustList implements linux syscall set_robust_list(2). +func SetRobustList(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + // Despite the syscall using the name 'pid' for this variable, it is + // very much a tid. + head := args[0].Pointer() + length := args[1].SizeT() + + if length != uint(linux.SizeOfRobustListHead) { + return 0, nil, syserror.EINVAL + } + t.SetRobustList(head) + return 0, nil, nil +} + +// GetRobustList implements linux syscall get_robust_list(2). +func GetRobustList(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + // Despite the syscall using the name 'pid' for this variable, it is + // very much a tid. + tid := args[0].Int() + head := args[1].Pointer() + size := args[2].Pointer() + + if tid < 0 { + return 0, nil, syserror.EINVAL + } + + ot := t + if tid != 0 { + if ot = t.PIDNamespace().TaskWithID(kernel.ThreadID(tid)); ot == nil { + return 0, nil, syserror.ESRCH + } + } + + // Copy out head pointer. + if _, err := t.CopyOut(head, uint64(ot.GetRobustList())); err != nil { + return 0, nil, err + } + + // Copy out size, which is a constant. + if _, err := t.CopyOut(size, uint64(linux.SizeOfRobustListHead)); err != nil { + return 0, nil, err + } + + return 0, nil, nil +} diff --git a/test/syscalls/linux/futex.cc b/test/syscalls/linux/futex.cc index 40c80a6e1..90b1f0508 100644 --- a/test/syscalls/linux/futex.cc +++ b/test/syscalls/linux/futex.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -737,6 +738,97 @@ TEST_P(PrivateAndSharedFutexTest, PITryLockConcurrency_NoRandomSave) { } } +int get_robust_list(int pid, struct robust_list_head** head_ptr, + size_t* len_ptr) { + return syscall(__NR_get_robust_list, pid, head_ptr, len_ptr); +} + +int set_robust_list(struct robust_list_head* head, size_t len) { + return syscall(__NR_set_robust_list, head, len); +} + +TEST(RobustFutexTest, BasicSetGet) { + struct robust_list_head hd = {}; + struct robust_list_head* hd_ptr = &hd; + + // Set! + EXPECT_THAT(set_robust_list(hd_ptr, sizeof(hd)), SyscallSucceedsWithValue(0)); + + // Get! + struct robust_list_head* new_hd_ptr = hd_ptr; + size_t len; + EXPECT_THAT(get_robust_list(0, &new_hd_ptr, &len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(new_hd_ptr, hd_ptr); + EXPECT_EQ(len, sizeof(hd)); +} + +TEST(RobustFutexTest, GetFromOtherTid) { + // Get the current tid and list head. + pid_t tid = gettid(); + struct robust_list_head* hd_ptr = {}; + size_t len; + EXPECT_THAT(get_robust_list(0, &hd_ptr, &len), SyscallSucceedsWithValue(0)); + + // Create a new thread. + ScopedThread t([&] { + // Current tid list head should be different from parent tid. + struct robust_list_head* got_hd_ptr = {}; + EXPECT_THAT(get_robust_list(0, &got_hd_ptr, &len), + SyscallSucceedsWithValue(0)); + EXPECT_NE(hd_ptr, got_hd_ptr); + + // Get the parent list head by passing its tid. + EXPECT_THAT(get_robust_list(tid, &got_hd_ptr, &len), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(hd_ptr, got_hd_ptr); + }); + + // Wait for thread. + t.Join(); +} + +TEST(RobustFutexTest, InvalidSize) { + struct robust_list_head* hd = {}; + EXPECT_THAT(set_robust_list(hd, sizeof(*hd) + 1), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(RobustFutexTest, PthreadMutexAttr) { + constexpr int kNumMutexes = 3; + + // Create a bunch of robust mutexes. + pthread_mutexattr_t attrs[kNumMutexes]; + pthread_mutex_t mtxs[kNumMutexes]; + for (int i = 0; i < kNumMutexes; i++) { + TEST_PCHECK(pthread_mutexattr_init(&attrs[i]) == 0); + TEST_PCHECK(pthread_mutexattr_setrobust(&attrs[i], PTHREAD_MUTEX_ROBUST) == + 0); + TEST_PCHECK(pthread_mutex_init(&mtxs[i], &attrs[i]) == 0); + } + + // Start thread to lock the mutexes and then exit. + ScopedThread t([&] { + for (int i = 0; i < kNumMutexes; i++) { + TEST_PCHECK(pthread_mutex_lock(&mtxs[i]) == 0); + } + pthread_exit(NULL); + }); + + // Wait for thread. + t.Join(); + + // Now try to take the mutexes. + for (int i = 0; i < kNumMutexes; i++) { + // Should get EOWNERDEAD. + EXPECT_EQ(pthread_mutex_lock(&mtxs[i]), EOWNERDEAD); + // Make the mutex consistent. + EXPECT_EQ(pthread_mutex_consistent(&mtxs[i]), 0); + // Unlock. + EXPECT_EQ(pthread_mutex_unlock(&mtxs[i]), 0); + } +} + } // namespace } // namespace testing } // namespace gvisor -- cgit v1.2.3 From 65b5e648022cb33d4c5b650a008cb18e4aa6f600 Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Fri, 24 Jul 2020 01:11:00 -0700 Subject: Enable open test Updates #2923 PiperOrigin-RevId: 322953552 --- test/syscalls/BUILD | 1 + 1 file changed, 1 insertion(+) (limited to 'test') diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 15e5fd223..c19b30b4a 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -400,6 +400,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:open_test", + vfs2 = "True", ) syscall_test( -- cgit v1.2.3 From da631a3ef21ccace88803a9d8dcf05e285167e3f Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Fri, 24 Jul 2020 15:03:13 -0700 Subject: Speed up some iptables tests Sending UDP packets in a loop can be done in a separate goroutine. We can't do this in ContainerAction because the container will terminate early. Locally, scripts/iptables_tests.sh runs ~40 seconds faster. --- test/iptables/filter_input.go | 30 +++++++++++++++--------------- test/iptables/iptables_util.go | 33 ++++++++++++++++++++++++++++----- test/iptables/nat.go | 12 ++++++------ 3 files changed, 49 insertions(+), 26 deletions(-) (limited to 'test') diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index 068f228bd..af4355ba8 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -81,7 +81,7 @@ func (FilterInputDropUDP) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterInputDropUDP) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, dropPort, sendloopDuration) + return spawnUDPLoop(ip, dropPort, sendloopDuration) } // FilterInputDropOnlyUDP tests that "-p udp -j DROP" only affects UDP traffic. @@ -141,7 +141,7 @@ func (FilterInputDropUDPPort) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterInputDropUDPPort) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, dropPort, sendloopDuration) + return spawnUDPLoop(ip, dropPort, sendloopDuration) } // FilterInputDropDifferentUDPPort tests that dropping traffic for a single UDP port @@ -169,7 +169,7 @@ func (FilterInputDropDifferentUDPPort) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterInputDropDifferentUDPPort) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return spawnUDPLoop(ip, acceptPort, sendloopDuration) } // FilterInputDropTCPDestPort tests that connections are not accepted on specified source ports. @@ -269,7 +269,7 @@ func (FilterInputDropAll) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterInputDropAll) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, dropPort, sendloopDuration) + return spawnUDPLoop(ip, dropPort, sendloopDuration) } // FilterInputMultiUDPRules verifies that multiple UDP rules are applied @@ -365,7 +365,7 @@ func (FilterInputDefaultPolicyAccept) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterInputDefaultPolicyAccept) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return spawnUDPLoop(ip, acceptPort, sendloopDuration) } // FilterInputDefaultPolicyDrop tests the default DROP policy. @@ -396,7 +396,7 @@ func (FilterInputDefaultPolicyDrop) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterInputDefaultPolicyDrop) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return spawnUDPLoop(ip, acceptPort, sendloopDuration) } // FilterInputReturnUnderflow tests that -j RETURN in a built-in chain causes @@ -428,7 +428,7 @@ func (FilterInputReturnUnderflow) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterInputReturnUnderflow) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return spawnUDPLoop(ip, acceptPort, sendloopDuration) } // FilterInputSerializeJump verifies that we can serialize jumps. @@ -482,7 +482,7 @@ func (FilterInputJumpBasic) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterInputJumpBasic) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return spawnUDPLoop(ip, acceptPort, sendloopDuration) } // FilterInputJumpReturn jumps, returns, and executes a rule. @@ -512,7 +512,7 @@ func (FilterInputJumpReturn) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterInputJumpReturn) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return spawnUDPLoop(ip, acceptPort, sendloopDuration) } // FilterInputJumpReturnDrop jumps to a chain, returns, and DROPs packets. @@ -549,7 +549,7 @@ func (FilterInputJumpReturnDrop) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterInputJumpReturnDrop) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, dropPort, sendloopDuration) + return spawnUDPLoop(ip, dropPort, sendloopDuration) } // FilterInputJumpBuiltin verifies that jumping to a top-levl chain is illegal. @@ -604,7 +604,7 @@ func (FilterInputJumpTwice) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterInputJumpTwice) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return spawnUDPLoop(ip, acceptPort, sendloopDuration) } // FilterInputDestination verifies that we can filter packets via `-d @@ -638,7 +638,7 @@ func (FilterInputDestination) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterInputDestination) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return spawnUDPLoop(ip, acceptPort, sendloopDuration) } // FilterInputInvertDestination verifies that we can filter packets via `! -d @@ -667,7 +667,7 @@ func (FilterInputInvertDestination) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterInputInvertDestination) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return spawnUDPLoop(ip, acceptPort, sendloopDuration) } // FilterInputSource verifies that we can filter packets via `-s @@ -696,7 +696,7 @@ func (FilterInputSource) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterInputSource) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return spawnUDPLoop(ip, acceptPort, sendloopDuration) } // FilterInputInvertSource verifies that we can filter packets via `! -s @@ -725,5 +725,5 @@ func (FilterInputInvertSource) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (FilterInputInvertSource) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return spawnUDPLoop(ip, acceptPort, sendloopDuration) } diff --git a/test/iptables/iptables_util.go b/test/iptables/iptables_util.go index d4bc55b24..174694002 100644 --- a/test/iptables/iptables_util.go +++ b/test/iptables/iptables_util.go @@ -84,17 +84,42 @@ func listenUDP(port int, timeout time.Duration) error { // sendUDPLoop sends 1 byte UDP packets repeatedly to the IP and port specified // over a duration. func sendUDPLoop(ip net.IP, port int, duration time.Duration) error { - // Send packets for a few seconds. + conn, err := connectUDP(ip, port) + if err != nil { + return err + } + defer conn.Close() + loopUDP(conn, duration) + return nil +} + +// spawnUDPLoop works like sendUDPLoop, but returns immediately and sends +// packets in another goroutine. +func spawnUDPLoop(ip net.IP, port int, duration time.Duration) error { + conn, err := connectUDP(ip, port) + if err != nil { + return err + } + go func() { + defer conn.Close() + loopUDP(conn, duration) + }() + return nil +} + +func connectUDP(ip net.IP, port int) (net.Conn, error) { remote := net.UDPAddr{ IP: ip, Port: port, } conn, err := net.DialUDP(network, nil, &remote) if err != nil { - return err + return nil, err } - defer conn.Close() + return conn, nil +} +func loopUDP(conn net.Conn, duration time.Duration) { to := time.After(duration) for timedOut := false; !timedOut; { // This may return an error (connection refused) if the remote @@ -109,8 +134,6 @@ func sendUDPLoop(ip net.IP, port int, duration time.Duration) error { time.Sleep(200 * time.Millisecond) } } - - return nil } // listenTCP listens for connections on a TCP port. diff --git a/test/iptables/nat.go b/test/iptables/nat.go index 149dec2bb..23288577d 100644 --- a/test/iptables/nat.go +++ b/test/iptables/nat.go @@ -67,7 +67,7 @@ func (NATPreRedirectUDPPort) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (NATPreRedirectUDPPort) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return spawnUDPLoop(ip, acceptPort, sendloopDuration) } // NATPreRedirectTCPPort tests that connections are redirected on specified ports. @@ -187,7 +187,7 @@ func (NATDropUDP) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (NATDropUDP) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return spawnUDPLoop(ip, acceptPort, sendloopDuration) } // NATAcceptAll tests that all UDP packets are accepted. @@ -213,7 +213,7 @@ func (NATAcceptAll) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (NATAcceptAll) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return spawnUDPLoop(ip, acceptPort, sendloopDuration) } // NATOutRedirectIP uses iptables to select packets based on destination IP and @@ -310,7 +310,7 @@ func (NATPreRedirectIP) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (NATPreRedirectIP) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, dropPort, sendloopDuration) + return spawnUDPLoop(ip, dropPort, sendloopDuration) } // NATPreDontRedirectIP tests that iptables matching with "-d" does not match @@ -332,7 +332,7 @@ func (NATPreDontRedirectIP) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (NATPreDontRedirectIP) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return spawnUDPLoop(ip, acceptPort, sendloopDuration) } // NATPreRedirectInvert tests that iptables can match with "! -d". @@ -353,7 +353,7 @@ func (NATPreRedirectInvert) ContainerAction(ip net.IP) error { // LocalAction implements TestCase.LocalAction. func (NATPreRedirectInvert) LocalAction(ip net.IP) error { - return sendUDPLoop(ip, dropPort, sendloopDuration) + return spawnUDPLoop(ip, dropPort, sendloopDuration) } // NATRedirectRequiresProtocol tests that use of the --to-ports flag requires a -- cgit v1.2.3 From 91a47a40a8490aaeceb5a8162eb47b758ca738a1 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Fri, 24 Jul 2020 17:27:11 -0700 Subject: Bugfix: non-native tests were tagged as native Copy the list of tags when passing it to _syscall_test. --- test/runner/defs.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/runner/defs.bzl b/test/runner/defs.bzl index 600cb5192..c92392b35 100644 --- a/test/runner/defs.bzl +++ b/test/runner/defs.bzl @@ -157,7 +157,7 @@ def syscall_test( platform = "native", use_tmpfs = False, add_uds_tree = add_uds_tree, - tags = tags, + tags = list(tags), ) for (platform, platform_tags) in platforms.items(): -- cgit v1.2.3 From d6b676ae6ac6ab6f3520d6d2663b9d71c29a3788 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 25 Jul 2020 01:03:02 -0700 Subject: test/syscall: run each test case in a separate network namespace ... when it is possible. The guitar gVisorKernel*Workflow-s runs test with the local execution_method. In this case, blaze runs test cases locally without sandboxes. This means that all tests run in the same network namespace. We have a few tests which use hard-coded network ports and they can fail if one of these port will be used by someone else or by another test cases. PiperOrigin-RevId: 323137254 --- runsc/fsgofer/fsgofer.go | 2 +- test/runner/BUILD | 1 + test/runner/runner.go | 8 ++++++++ 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index 723e118e1..ebefeacf2 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -606,7 +606,7 @@ func (l *localFile) fillAttr(stat syscall.Stat_t) (p9.AttrMask, p9.Attr) { Mode: p9.FileMode(stat.Mode), UID: p9.UID(stat.Uid), GID: p9.GID(stat.Gid), - NLink: stat.Nlink, + NLink: uint64(stat.Nlink), RDev: stat.Rdev, Size: uint64(stat.Size), BlockSize: uint64(stat.Blksize), diff --git a/test/runner/BUILD b/test/runner/BUILD index 1f45a6922..63c7ec83a 100644 --- a/test/runner/BUILD +++ b/test/runner/BUILD @@ -17,6 +17,7 @@ go_binary( "//test/runner/gtest", "//test/uds", "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", + "@com_github_syndtr_gocapability//capability:go_default_library", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/test/runner/runner.go b/test/runner/runner.go index 2296f3a46..bc4b39cbb 100644 --- a/test/runner/runner.go +++ b/test/runner/runner.go @@ -30,6 +30,7 @@ import ( "time" specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/syndtr/gocapability/capability" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/test/testutil" @@ -105,6 +106,13 @@ func runTestCaseNative(testBin string, tc gtest.TestCase, t *testing.T) { cmd.Env = env cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr + + if specutils.HasCapabilities(capability.CAP_NET_ADMIN) { + cmd.SysProcAttr = &syscall.SysProcAttr{ + Cloneflags: syscall.CLONE_NEWNET, + } + } + if err := cmd.Run(); err != nil { ws := err.(*exec.ExitError).Sys().(syscall.WaitStatus) t.Errorf("test %q exited with status %d, want 0", tc.FullName(), ws.ExitStatus()) -- cgit v1.2.3 From 2ecf66903ed3da46fa021feeeeccad81cd82eaa6 Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Sun, 26 Jul 2020 22:01:16 -0700 Subject: Add profiling to dockerutil Adds profiling with `runsc debug` or pprof to dockerutil. All targets using dockerutil should now be able to use profiling. In addition, modifies existing benchmarks to use profiling. PiperOrigin-RevId: 323298634 --- pkg/test/dockerutil/BUILD | 19 +++- pkg/test/dockerutil/README.md | 86 +++++++++++++++ pkg/test/dockerutil/container.go | 82 ++++++++++---- pkg/test/dockerutil/dockerutil.go | 21 ++++ pkg/test/dockerutil/profile.go | 152 ++++++++++++++++++++++++++ pkg/test/dockerutil/profile_test.go | 117 ++++++++++++++++++++ scripts/benchmark.sh | 30 ----- scripts/common.sh | 27 ----- test/benchmarks/README.md | 81 ++++++++++---- test/benchmarks/fs/bazel_test.go | 32 ++++-- test/benchmarks/harness/machine.go | 12 +- test/benchmarks/harness/util.go | 2 +- test/benchmarks/network/BUILD | 1 + test/benchmarks/network/httpd_test.go | 9 +- test/benchmarks/network/iperf_test.go | 40 ++++--- test/packetimpact/runner/packetimpact_test.go | 5 +- 16 files changed, 582 insertions(+), 134 deletions(-) create mode 100644 pkg/test/dockerutil/README.md create mode 100644 pkg/test/dockerutil/profile.go create mode 100644 pkg/test/dockerutil/profile_test.go delete mode 100755 scripts/benchmark.sh (limited to 'test') diff --git a/pkg/test/dockerutil/BUILD b/pkg/test/dockerutil/BUILD index 83b80c8bc..a5e84658a 100644 --- a/pkg/test/dockerutil/BUILD +++ b/pkg/test/dockerutil/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -10,6 +10,7 @@ go_library( "dockerutil.go", "exec.go", "network.go", + "profile.go", ], visibility = ["//:sandbox"], deps = [ @@ -23,3 +24,19 @@ go_library( "@com_github_docker_go_connections//nat:go_default_library", ], ) + +go_test( + name = "profile_test", + size = "large", + srcs = [ + "profile_test.go", + ], + library = ":dockerutil", + tags = [ + # Requires docker and runsc to be configured before test runs. + # Also requires the test to be run as root. + "manual", + "local", + ], + visibility = ["//:sandbox"], +) diff --git a/pkg/test/dockerutil/README.md b/pkg/test/dockerutil/README.md new file mode 100644 index 000000000..870292096 --- /dev/null +++ b/pkg/test/dockerutil/README.md @@ -0,0 +1,86 @@ +# dockerutil + +This package is for creating and controlling docker containers for testing +runsc, gVisor's docker/kubernetes binary. A simple test may look like: + +``` + func TestSuperCool(t *testing.T) { + ctx := context.Background() + c := dockerutil.MakeContainer(ctx, t) + got, err := c.Run(ctx, dockerutil.RunOpts{ + Image: "basic/alpine" + }, "echo", "super cool") + if err != nil { + t.Fatalf("err was not nil: %v", err) + } + want := "super cool" + if !strings.Contains(got, want){ + t.Fatalf("want: %s, got: %s", want, got) + } + } +``` + +For further examples, see many of our end to end tests elsewhere in the repo, +such as those in //test/e2e or benchmarks at //test/benchmarks. + +dockerutil uses the "official" docker golang api, which is +[very powerful](https://godoc.org/github.com/docker/docker/client). dockerutil +is a thin wrapper around this API, allowing desired new use cases to be easily +implemented. + +## Profiling + +dockerutil is capable of generating profiles. Currently, the only option is to +use pprof profiles generated by `runsc debug`. The profiler will generate Block, +CPU, Heap, Goroutine, and Mutex profiles. To generate profiles: + +* Install runsc with the `--profile` flag: `make configure RUNTIME=myrunsc + ARGS="--profile"` Also add other flags with ARGS like `--platform=kvm` or + `--vfs2`. +* Restart docker: `sudo service docker restart` + +To run and generate CPU profiles run: + +``` +make sudo TARGETS=//path/to:target \ + ARGS="--runtime=myrunsc -test.v -test.bench=. --pprof-cpu" OPTIONS="-c opt" +``` + +Profiles would be at: `/tmp/profile/myrunsc/CONTAINERNAME/cpu.pprof` + +Container name in most tests and benchmarks in gVisor is usually the test name +and some random characters like so: +`BenchmarkABSL-CleanCache-JF2J2ZYF3U7SL47QAA727CSJI3C4ZAW2` + +Profiling requires root as runsc debug inspects running containers in /var/run +among other things. + +### Writing for Profiling + +The below shows an example of using profiles with dockerutil. + +``` +func TestSuperCool(t *testing.T){ + ctx := context.Background() + // profiled and using runtime from dockerutil.runtime flag + profiled := MakeContainer() + + // not profiled and using runtime runc + native := MakeNativeContainer() + + err := profiled.Spawn(ctx, RunOpts{ + Image: "some/image", + }, "sleep", "100000") + // profiling has begun here + ... + expensive setup that I don't want to profile. + ... + profiled.RestartProfiles() + // profiled activity +} +``` + +In the above example, `profiled` would be profiled and `native` would not. The +call to `RestartProfiles()` restarts the clock on profiling. This is useful if +the main activity being tested is done with `docker exec` or `container.Spawn()` +followed by one or more `container.Exec()` calls. diff --git a/pkg/test/dockerutil/container.go b/pkg/test/dockerutil/container.go index 17acdaf6f..b59503188 100644 --- a/pkg/test/dockerutil/container.go +++ b/pkg/test/dockerutil/container.go @@ -43,15 +43,21 @@ import ( // See: https://pkg.go.dev/github.com/docker/docker. type Container struct { Name string - Runtime string + runtime string logger testutil.Logger client *client.Client id string mounts []mount.Mount links []string - cleanups []func() copyErr error + cleanups []func() + + // Profiles are profiles added to this container. They contain methods + // that are run after Creation, Start, and Cleanup of this Container, along + // a handle to restart the profile. Generally, tests/benchmarks using + // profiles need to run as root. + profiles []Profile // Stores streams attached to the container. Used by WaitForOutputSubmatch. streams types.HijackedResponse @@ -106,7 +112,19 @@ type RunOpts struct { // MakeContainer sets up the struct for a Docker container. // // Names of containers will be unique. +// Containers will check flags for profiling requests. func MakeContainer(ctx context.Context, logger testutil.Logger) *Container { + c := MakeNativeContainer(ctx, logger) + c.runtime = *runtime + if p := MakePprofFromFlags(c); p != nil { + c.AddProfile(p) + } + return c +} + +// MakeNativeContainer sets up the struct for a DockerContainer using runc. Native +// containers aren't profiled. +func MakeNativeContainer(ctx context.Context, logger testutil.Logger) *Container { // Slashes are not allowed in container names. name := testutil.RandomID(logger.Name()) name = strings.ReplaceAll(name, "/", "-") @@ -114,20 +132,33 @@ func MakeContainer(ctx context.Context, logger testutil.Logger) *Container { if err != nil { return nil } - client.NegotiateAPIVersion(ctx) - return &Container{ logger: logger, Name: name, - Runtime: *runtime, + runtime: "", client: client, } } +// AddProfile adds a profile to this container. +func (c *Container) AddProfile(p Profile) { + c.profiles = append(c.profiles, p) +} + +// RestartProfiles calls Restart on all profiles for this container. +func (c *Container) RestartProfiles() error { + for _, profile := range c.profiles { + if err := profile.Restart(c); err != nil { + return err + } + } + return nil +} + // Spawn is analogous to 'docker run -d'. func (c *Container) Spawn(ctx context.Context, r RunOpts, args ...string) error { - if err := c.create(ctx, r, args); err != nil { + if err := c.create(ctx, c.config(r, args), c.hostConfig(r), nil); err != nil { return err } return c.Start(ctx) @@ -153,7 +184,7 @@ func (c *Container) SpawnProcess(ctx context.Context, r RunOpts, args ...string) // Run is analogous to 'docker run'. func (c *Container) Run(ctx context.Context, r RunOpts, args ...string) (string, error) { - if err := c.create(ctx, r, args); err != nil { + if err := c.create(ctx, c.config(r, args), c.hostConfig(r), nil); err != nil { return "", err } @@ -181,27 +212,25 @@ func (c *Container) MakeLink(target string) string { // CreateFrom creates a container from the given configs. func (c *Container) CreateFrom(ctx context.Context, conf *container.Config, hostconf *container.HostConfig, netconf *network.NetworkingConfig) error { - cont, err := c.client.ContainerCreate(ctx, conf, hostconf, netconf, c.Name) - if err != nil { - return err - } - c.id = cont.ID - return nil + return c.create(ctx, conf, hostconf, netconf) } // Create is analogous to 'docker create'. func (c *Container) Create(ctx context.Context, r RunOpts, args ...string) error { - return c.create(ctx, r, args) + return c.create(ctx, c.config(r, args), c.hostConfig(r), nil) } -func (c *Container) create(ctx context.Context, r RunOpts, args []string) error { - conf := c.config(r, args) - hostconf := c.hostConfig(r) +func (c *Container) create(ctx context.Context, conf *container.Config, hostconf *container.HostConfig, netconf *network.NetworkingConfig) error { cont, err := c.client.ContainerCreate(ctx, conf, hostconf, nil, c.Name) if err != nil { return err } c.id = cont.ID + for _, profile := range c.profiles { + if err := profile.OnCreate(c); err != nil { + return fmt.Errorf("OnCreate method failed with: %v", err) + } + } return nil } @@ -227,7 +256,7 @@ func (c *Container) hostConfig(r RunOpts) *container.HostConfig { c.mounts = append(c.mounts, r.Mounts...) return &container.HostConfig{ - Runtime: c.Runtime, + Runtime: c.runtime, Mounts: c.mounts, PublishAllPorts: true, Links: r.Links, @@ -261,8 +290,15 @@ func (c *Container) Start(ctx context.Context) error { c.cleanups = append(c.cleanups, func() { c.streams.Close() }) - - return c.client.ContainerStart(ctx, c.id, types.ContainerStartOptions{}) + if err := c.client.ContainerStart(ctx, c.id, types.ContainerStartOptions{}); err != nil { + return fmt.Errorf("ContainerStart failed: %v", err) + } + for _, profile := range c.profiles { + if err := profile.OnStart(c); err != nil { + return fmt.Errorf("OnStart method failed: %v", err) + } + } + return nil } // Stop is analogous to 'docker stop'. @@ -482,6 +518,12 @@ func (c *Container) Remove(ctx context.Context) error { // CleanUp kills and deletes the container (best effort). func (c *Container) CleanUp(ctx context.Context) { + // Execute profile cleanups before the container goes down. + for _, profile := range c.profiles { + profile.OnCleanUp(c) + } + // Forget profiles. + c.profiles = nil // Kill the container. if err := c.Kill(ctx); err != nil && !strings.Contains(err.Error(), "is not running") { // Just log; can't do anything here. diff --git a/pkg/test/dockerutil/dockerutil.go b/pkg/test/dockerutil/dockerutil.go index df09babf3..5a9dd8bd8 100644 --- a/pkg/test/dockerutil/dockerutil.go +++ b/pkg/test/dockerutil/dockerutil.go @@ -25,6 +25,7 @@ import ( "os/exec" "regexp" "strconv" + "time" "gvisor.dev/gvisor/pkg/test/testutil" ) @@ -42,6 +43,26 @@ var ( // config is the default Docker daemon configuration path. config = flag.String("config_path", "/etc/docker/daemon.json", "configuration file for reading paths") + + // The following flags are for the "pprof" profiler tool. + + // pprofBaseDir allows the user to change the directory to which profiles are + // written. By default, profiles will appear under: + // /tmp/profile/RUNTIME/CONTAINER_NAME/*.pprof. + pprofBaseDir = flag.String("pprof-dir", "/tmp/profile", "base directory in: BASEDIR/RUNTIME/CONTINER_NAME/FILENAME (e.g. /tmp/profile/runtime/mycontainer/cpu.pprof)") + + // duration is the max duration `runsc debug` will run and capture profiles. + // If the container's clean up method is called prior to duration, the + // profiling process will be killed. + duration = flag.Duration("pprof-duration", 10*time.Second, "duration to run the profile in seconds") + + // The below flags enable each type of profile. Multiple profiles can be + // enabled for each run. + pprofBlock = flag.Bool("pprof-block", false, "enables block profiling with runsc debug") + pprofCPU = flag.Bool("pprof-cpu", false, "enables CPU profiling with runsc debug") + pprofGo = flag.Bool("pprof-go", false, "enables goroutine profiling with runsc debug") + pprofHeap = flag.Bool("pprof-heap", false, "enables heap profiling with runsc debug") + pprofMutex = flag.Bool("pprof-mutex", false, "enables mutex profiling with runsc debug") ) // EnsureSupportedDockerVersion checks if correct docker is installed. diff --git a/pkg/test/dockerutil/profile.go b/pkg/test/dockerutil/profile.go new file mode 100644 index 000000000..1fab33083 --- /dev/null +++ b/pkg/test/dockerutil/profile.go @@ -0,0 +1,152 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dockerutil + +import ( + "context" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "time" +) + +// Profile represents profile-like operations on a container, +// such as running perf or pprof. It is meant to be added to containers +// such that the container type calls the Profile during its lifecycle. +type Profile interface { + // OnCreate is called just after the container is created when the container + // has a valid ID (e.g. c.ID()). + OnCreate(c *Container) error + + // OnStart is called just after the container is started when the container + // has a valid Pid (e.g. c.SandboxPid()). + OnStart(c *Container) error + + // Restart restarts the Profile on request. + Restart(c *Container) error + + // OnCleanUp is called during the container's cleanup method. + // Cleanups should just log errors if they have them. + OnCleanUp(c *Container) error +} + +// Pprof is for running profiles with 'runsc debug'. Pprof workloads +// should be run as root and ONLY against runsc sandboxes. The runtime +// should have --profile set as an option in /etc/docker/daemon.json in +// order for profiling to work with Pprof. +type Pprof struct { + BasePath string // path to put profiles + BlockProfile bool + CPUProfile bool + GoRoutineProfile bool + HeapProfile bool + MutexProfile bool + Duration time.Duration // duration to run profiler e.g. '10s' or '1m'. + shouldRun bool + cmd *exec.Cmd + stdout io.ReadCloser + stderr io.ReadCloser +} + +// MakePprofFromFlags makes a Pprof profile from flags. +func MakePprofFromFlags(c *Container) *Pprof { + if !(*pprofBlock || *pprofCPU || *pprofGo || *pprofHeap || *pprofMutex) { + return nil + } + return &Pprof{ + BasePath: filepath.Join(*pprofBaseDir, c.runtime, c.Name), + BlockProfile: *pprofBlock, + CPUProfile: *pprofCPU, + GoRoutineProfile: *pprofGo, + HeapProfile: *pprofHeap, + MutexProfile: *pprofMutex, + Duration: *duration, + } +} + +// OnCreate implements Profile.OnCreate. +func (p *Pprof) OnCreate(c *Container) error { + return os.MkdirAll(p.BasePath, 0755) +} + +// OnStart implements Profile.OnStart. +func (p *Pprof) OnStart(c *Container) error { + path, err := RuntimePath() + if err != nil { + return fmt.Errorf("failed to get runtime path: %v", err) + } + + // The root directory of this container's runtime. + root := fmt.Sprintf("--root=/var/run/docker/runtime-%s/moby", c.runtime) + // Format is `runsc --root=rootdir debug --profile-*=file --duration=* containerID`. + args := []string{root, "debug"} + args = append(args, p.makeProfileArgs(c)...) + args = append(args, c.ID()) + + // Best effort wait until container is running. + for now := time.Now(); time.Since(now) < 5*time.Second; { + if status, err := c.Status(context.Background()); err != nil { + return fmt.Errorf("failed to get status with: %v", err) + + } else if status.Running { + break + } + time.Sleep(500 * time.Millisecond) + } + p.cmd = exec.Command(path, args...) + if err := p.cmd.Start(); err != nil { + return fmt.Errorf("process failed: %v", err) + } + return nil +} + +// Restart implements Profile.Restart. +func (p *Pprof) Restart(c *Container) error { + p.OnCleanUp(c) + return p.OnStart(c) +} + +// OnCleanUp implements Profile.OnCleanup +func (p *Pprof) OnCleanUp(c *Container) error { + defer func() { p.cmd = nil }() + if p.cmd != nil && p.cmd.Process != nil && p.cmd.ProcessState != nil && !p.cmd.ProcessState.Exited() { + return p.cmd.Process.Kill() + } + return nil +} + +// makeProfileArgs turns Pprof fields into runsc debug flags. +func (p *Pprof) makeProfileArgs(c *Container) []string { + var ret []string + if p.BlockProfile { + ret = append(ret, fmt.Sprintf("--profile-block=%s", filepath.Join(p.BasePath, "block.pprof"))) + } + if p.CPUProfile { + ret = append(ret, fmt.Sprintf("--profile-cpu=%s", filepath.Join(p.BasePath, "cpu.pprof"))) + } + if p.GoRoutineProfile { + ret = append(ret, fmt.Sprintf("--profile-goroutine=%s", filepath.Join(p.BasePath, "go.pprof"))) + } + if p.HeapProfile { + ret = append(ret, fmt.Sprintf("--profile-heap=%s", filepath.Join(p.BasePath, "heap.pprof"))) + } + if p.MutexProfile { + ret = append(ret, fmt.Sprintf("--profile-mutex=%s", filepath.Join(p.BasePath, "mutex.pprof"))) + } + ret = append(ret, fmt.Sprintf("--duration=%s", p.Duration)) + return ret +} diff --git a/pkg/test/dockerutil/profile_test.go b/pkg/test/dockerutil/profile_test.go new file mode 100644 index 000000000..b7b4d7618 --- /dev/null +++ b/pkg/test/dockerutil/profile_test.go @@ -0,0 +1,117 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dockerutil + +import ( + "context" + "fmt" + "os" + "path/filepath" + "testing" + "time" +) + +type testCase struct { + name string + pprof Pprof + expectedFiles []string +} + +func TestPprof(t *testing.T) { + // Basepath and expected file names for each type of profile. + basePath := "/tmp/test/profile" + block := "block.pprof" + cpu := "cpu.pprof" + goprofle := "go.pprof" + heap := "heap.pprof" + mutex := "mutex.pprof" + + testCases := []testCase{ + { + name: "Cpu", + pprof: Pprof{ + BasePath: basePath, + CPUProfile: true, + Duration: 2 * time.Second, + }, + expectedFiles: []string{cpu}, + }, + { + name: "All", + pprof: Pprof{ + BasePath: basePath, + BlockProfile: true, + CPUProfile: true, + GoRoutineProfile: true, + HeapProfile: true, + MutexProfile: true, + Duration: 2 * time.Second, + }, + expectedFiles: []string{block, cpu, goprofle, heap, mutex}, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ctx := context.Background() + c := MakeContainer(ctx, t) + // Set basepath to include the container name so there are no conflicts. + tc.pprof.BasePath = filepath.Join(tc.pprof.BasePath, c.Name) + c.AddProfile(&tc.pprof) + + func() { + defer c.CleanUp(ctx) + // Start a container. + if err := c.Spawn(ctx, RunOpts{ + Image: "basic/alpine", + }, "sleep", "1000"); err != nil { + t.Fatalf("run failed with: %v", err) + } + + if status, err := c.Status(context.Background()); !status.Running { + t.Fatalf("container is not yet running: %+v err: %v", status, err) + } + + // End early if the expected files exist and have data. + for start := time.Now(); time.Since(start) < tc.pprof.Duration; time.Sleep(500 * time.Millisecond) { + if err := checkFiles(tc); err == nil { + break + } + } + }() + + // Check all expected files exist and have data. + if err := checkFiles(tc); err != nil { + t.Fatalf(err.Error()) + } + }) + } +} + +func checkFiles(tc testCase) error { + for _, file := range tc.expectedFiles { + stat, err := os.Stat(filepath.Join(tc.pprof.BasePath, file)) + if err != nil { + return fmt.Errorf("stat failed with: %v", err) + } else if stat.Size() < 1 { + return fmt.Errorf("file not written to: %+v", stat) + } + } + return nil +} + +func TestMain(m *testing.M) { + EnsureSupportedDockerVersion() + os.Exit(m.Run()) +} diff --git a/scripts/benchmark.sh b/scripts/benchmark.sh deleted file mode 100755 index c49f988b8..000000000 --- a/scripts/benchmark.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -# Copyright 2020 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -source $(dirname $0)/common.sh - -make load-all-images - -if [[ -z "${1:-}" ]]; then - target=$(query "attr(tags, manual, tests(//test/benchmarks/...))") -else - target="$1" -fi - -install_runsc_for_benchmarks benchmark - -echo $target -benchmark_runsc $target "${@:2}" diff --git a/scripts/common.sh b/scripts/common.sh index 36158654f..3ca699e4a 100755 --- a/scripts/common.sh +++ b/scripts/common.sh @@ -42,15 +42,6 @@ function test_runsc() { test --test_arg=--runtime=${RUNTIME} "$@" } -function benchmark_runsc() { - test_runsc -c opt \ - --nocache_test_results \ - --test_arg=-test.bench=. \ - --test_arg=-test.benchmem \ - --jobs=1 \ - "$@" -} - function install_runsc_for_test() { local -r test_name=$1 shift @@ -72,24 +63,6 @@ function install_runsc_for_test() { "$@" } -function install_runsc_for_benchmarks() { - local -r test_name=$1 - shift - if [[ -z "${test_name}" ]]; then - echo "Missing mandatory test name" - exit 1 - fi - - # Add test to the name, so it doesn't conflict with other runtimes. - set_runtime $(find_branch_name)_"${test_name}" - - # ${RUNSC_TEST_NAME} is set by tests (see dockerutil) to pass the test name - # down to the runtime. - install_runsc "${RUNTIME}" \ - --TESTONLY-test-name-env=RUNSC_TEST_NAME \ - "$@" -} - # Installs the runsc with given runtime name. set_runtime must have been called # to set runtime and logs location. function install_runsc() { diff --git a/test/benchmarks/README.md b/test/benchmarks/README.md index 9ff602cf1..d1bbabf6f 100644 --- a/test/benchmarks/README.md +++ b/test/benchmarks/README.md @@ -13,33 +13,51 @@ To run benchmarks you will need: * Docker installed (17.09.0 or greater). -The easiest way to run benchmarks is to use the script at -//scripts/benchmark.sh. +The easiest way to setup runsc for running benchmarks is to use the make file. +From the root directory: -If not using the script, you will need: +* Download images: `make load-all-images` +* Install runsc suitable for benchmarking, which should probably not have + strace or debug logs enabled. For example:`make configure RUNTIME=myrunsc + ARGS=--platform=kvm`. +* Restart docker: `sudo service docker restart` -* `runsc` configured with docker +You should now have a runtime with the following options configured in +`/etc/docker/daemon.json` -Note: benchmarks call the runtime by name. If docker can run it with -`--runtime=` flag, these tools should work. +``` +"myrunsc": { + "path": "/tmp/myrunsc/runsc", + "runtimeArgs": [ + "--debug-log", + "/tmp/bench/logs/runsc.log.%TEST%.%TIMESTAMP%.%COMMAND%", + "--platform=kvm" + ] + }, + +``` + +This runtime has been configured with a debugging off and strace logs off and is +using kvm for demonstration. ## Running benchmarks -The easiest way to run is with the script at //scripts/benchmarks.sh. The script -will run all benchmarks under //test/benchmarks if a target is not provided. +Given the runtime above runtime `myrunsc`, run benchmarks with the following: -```bash -./script/benchmarks.sh //path/to/target +``` +make sudo TARGETS=//path/to:target ARGS="--runtime=myrunsc -test.v \ + -test.bench=." OPTIONS="-c opt ``` -If you want to run benchmarks manually: - -* Run `make load-all-images` from `//` -* Run with: +For example, to run only the Iperf tests: -```bash -bazel test --test_arg=--runtime=RUNTIME -c opt --test_output=streamed --test_timeout=600 --test_arg=-test.bench=. --nocache_test_results //path/to/target ``` +make sudo TARGETS=//test/benchmarks/network:network_test \ + ARGS="--runtime=myrunsc -test.v -test.bench=Iperf" OPTIONS="-c opt" +``` + +Benchmarks are run with root as some benchmarks require root privileges to do +things like drop caches. ## Writing benchmarks @@ -69,6 +87,7 @@ var h harness.Harness func BenchmarkMyCoolOne(b *testing.B) { machine, err := h.GetMachine() // check err + defer machine.CleanUp() ctx := context.Background() container := machine.GetContainer(ctx, b) @@ -82,7 +101,7 @@ func BenchmarkMyCoolOne(b *testing.B) { Image: "benchmarks/my-cool-image", Env: []string{"MY_VAR=awesome"}, other options...see dockerutil - }, "sh", "-c", "echo MY_VAR" ...) + }, "sh", "-c", "echo MY_VAR") //check err b.StopTimer() @@ -107,12 +126,32 @@ Some notes on the above: flags, remote virtual machines (eventually), and other services. * Respect `b.N` in that users of the benchmark may want to "run for an hour" or something of the sort. -* Use the `b.ReportMetric` method to report custom metrics. +* Use the `b.ReportMetric()` method to report custom metrics. * Set the timer if time is useful for reporting. There isn't a way to turn off default metrics in testing.B (B/op, allocs/op, ns/op). * Take a look at dockerutil at //pkg/test/dockerutil to see all methods available from containers. The API is based on the "official" [docker API for golang](https://pkg.go.dev/mod/github.com/docker/docker). -* `harness.GetMachine` marks how many machines this tests needs. If you have a - client and server and to mark them as multiple machines, call it - `GetMachine` twice. +* `harness.GetMachine()` marks how many machines this tests needs. If you have + a client and server and to mark them as multiple machines, call + `harness.GetMachine()` twice. + +## Profiling + +For profiling, the runtime is required to have the `--profile` flag enabled. +This flag loosens seccomp filters so that the runtime can write profile data to +disk. This configuration is not recommended for production. + +* Install runsc with the `--profile` flag: `make configure RUNTIME=myrunsc + ARGS="--profile --platform=kvm --vfs2"`. The kvm and vfs2 flags are not + required, but are included for demonstration. +* Restart docker: `sudo service docker restart` + +To run and generate CPU profiles fs_test test run: + +``` +make sudo TARGETS=//test/benchmarks/fs:fs_test \ + ARGS="--runtime=myrunsc -test.v -test.bench=. --pprof-cpu" OPTIONS="-c opt" +``` + +Profiles would be at: `/tmp/profile/myrunsc/CONTAINERNAME/cpu.pprof` diff --git a/test/benchmarks/fs/bazel_test.go b/test/benchmarks/fs/bazel_test.go index fdcac1a7a..9b652fd43 100644 --- a/test/benchmarks/fs/bazel_test.go +++ b/test/benchmarks/fs/bazel_test.go @@ -15,6 +15,7 @@ package fs import ( "context" + "fmt" "strings" "testing" @@ -51,10 +52,10 @@ func BenchmarkABSL(b *testing.B) { workdir := "/abseil-cpp" - // Start a container. + // Start a container and sleep by an order of b.N. if err := container.Spawn(ctx, dockerutil.RunOpts{ Image: "benchmarks/absl", - }, "sleep", "1000"); err != nil { + }, "sleep", fmt.Sprintf("%d", 1000000)); err != nil { b.Fatalf("run failed with: %v", err) } @@ -67,15 +68,21 @@ func BenchmarkABSL(b *testing.B) { workdir = "/tmp" + workdir } - // Drop Caches. - if bm.clearCache { - if out, err := machine.RunCommand("/bin/sh -c sync; echo 3 > /proc/sys/vm/drop_caches"); err != nil { - b.Fatalf("failed to drop caches: %v %s", err, out) - } - } - + // Restart profiles after the copy. + container.RestartProfiles() b.ResetTimer() + // Drop Caches and bazel clean should happen inside the loop as we may use + // time options with b.N. (e.g. Run for an hour.) for i := 0; i < b.N; i++ { + b.StopTimer() + // Drop Caches for clear cache runs. + if bm.clearCache { + if out, err := machine.RunCommand("/bin/sh", "-c", "sync && sysctl vm.drop_caches=3"); err != nil { + b.Skipf("failed to drop caches: %v %s. You probably need root.", err, out) + } + } + b.StartTimer() + got, err := container.Exec(ctx, dockerutil.ExecOpts{ WorkDir: workdir, }, "bazel", "build", "-c", "opt", "absl/base/...") @@ -88,6 +95,13 @@ func BenchmarkABSL(b *testing.B) { if !strings.Contains(got, want) { b.Fatalf("string %s not in: %s", want, got) } + // Clean bazel in case we use b.N. + _, err = container.Exec(ctx, dockerutil.ExecOpts{ + WorkDir: workdir, + }, "bazel", "clean") + if err != nil { + b.Fatalf("build failed with: %v", err) + } b.StartTimer() } }) diff --git a/test/benchmarks/harness/machine.go b/test/benchmarks/harness/machine.go index 93c0db9ce..88e5e841b 100644 --- a/test/benchmarks/harness/machine.go +++ b/test/benchmarks/harness/machine.go @@ -25,9 +25,14 @@ import ( // Machine describes a real machine for use in benchmarks. type Machine interface { - // GetContainer gets a container from the machine, + // GetContainer gets a container from the machine. The container uses the + // runtime under test and is profiled if requested by flags. GetContainer(ctx context.Context, log testutil.Logger) *dockerutil.Container + // GetNativeContainer gets a native container from the machine. Native containers + // use runc by default and are not profiled. + GetNativeContainer(ctx context.Context, log testutil.Logger) *dockerutil.Container + // RunCommand runs cmd on this machine. RunCommand(cmd string, args ...string) (string, error) @@ -47,6 +52,11 @@ func (l *localMachine) GetContainer(ctx context.Context, logger testutil.Logger) return dockerutil.MakeContainer(ctx, logger) } +// GetContainer implements Machine.GetContainer for localMachine. +func (l *localMachine) GetNativeContainer(ctx context.Context, logger testutil.Logger) *dockerutil.Container { + return dockerutil.MakeNativeContainer(ctx, logger) +} + // RunCommand implements Machine.RunCommand for localMachine. func (l *localMachine) RunCommand(cmd string, args ...string) (string, error) { c := exec.Command(cmd, args...) diff --git a/test/benchmarks/harness/util.go b/test/benchmarks/harness/util.go index cc7de6426..7f8e42201 100644 --- a/test/benchmarks/harness/util.go +++ b/test/benchmarks/harness/util.go @@ -27,7 +27,7 @@ import ( // IP:port. func WaitUntilServing(ctx context.Context, machine Machine, server net.IP, port int) error { var logger testutil.DefaultLogger = "netcat" - netcat := machine.GetContainer(ctx, logger) + netcat := machine.GetNativeContainer(ctx, logger) defer netcat.CleanUp(ctx) cmd := fmt.Sprintf("while ! nc -zv %s %d; do true; done", server.String(), port) diff --git a/test/benchmarks/network/BUILD b/test/benchmarks/network/BUILD index 16d267bc8..363041fb7 100644 --- a/test/benchmarks/network/BUILD +++ b/test/benchmarks/network/BUILD @@ -24,6 +24,7 @@ go_test( ], deps = [ "//pkg/test/dockerutil", + "//pkg/test/testutil", "//test/benchmarks/harness", ], ) diff --git a/test/benchmarks/network/httpd_test.go b/test/benchmarks/network/httpd_test.go index f9afdf15f..fe23ca949 100644 --- a/test/benchmarks/network/httpd_test.go +++ b/test/benchmarks/network/httpd_test.go @@ -52,12 +52,12 @@ func BenchmarkHttpdConcurrency(b *testing.B) { defer serverMachine.CleanUp() // The test iterates over client concurrency, so set other parameters. - requests := 1000 + requests := 10000 concurrency := []int{1, 5, 10, 25} doc := docs["10Kb"] for _, c := range concurrency { - b.Run(fmt.Sprintf("%dConcurrency", c), func(b *testing.B) { + b.Run(fmt.Sprintf("%d", c), func(b *testing.B) { runHttpd(b, clientMachine, serverMachine, doc, requests, c) }) } @@ -78,7 +78,7 @@ func BenchmarkHttpdDocSize(b *testing.B) { } defer serverMachine.CleanUp() - requests := 1000 + requests := 10000 concurrency := 1 for name, filename := range docs { @@ -129,7 +129,7 @@ func runHttpd(b *testing.B, clientMachine, serverMachine harness.Machine, doc st harness.WaitUntilServing(ctx, clientMachine, ip, servingPort) // Grab a client. - client := clientMachine.GetContainer(ctx, b) + client := clientMachine.GetNativeContainer(ctx, b) defer client.CleanUp(ctx) path := fmt.Sprintf("http://%s:%d/%s", ip, servingPort, doc) @@ -137,6 +137,7 @@ func runHttpd(b *testing.B, clientMachine, serverMachine harness.Machine, doc st cmd = fmt.Sprintf("ab -n %d -c %d %s", requests, concurrency, path) b.ResetTimer() + server.RestartProfiles() for i := 0; i < b.N; i++ { out, err := client.Run(ctx, dockerutil.RunOpts{ Image: "benchmarks/ab", diff --git a/test/benchmarks/network/iperf_test.go b/test/benchmarks/network/iperf_test.go index 664e0797e..a5e198e14 100644 --- a/test/benchmarks/network/iperf_test.go +++ b/test/benchmarks/network/iperf_test.go @@ -22,12 +22,13 @@ import ( "testing" "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/test/benchmarks/harness" ) func BenchmarkIperf(b *testing.B) { + const time = 10 // time in seconds to run the client. - // Get two machines clientMachine, err := h.GetMachine() if err != nil { b.Fatalf("failed to get machine: %v", err) @@ -39,30 +40,32 @@ func BenchmarkIperf(b *testing.B) { b.Fatalf("failed to get machine: %v", err) } defer serverMachine.CleanUp() - + ctx := context.Background() for _, bm := range []struct { - name string - clientRuntime string - serverRuntime string + name string + clientFunc func(context.Context, testutil.Logger) *dockerutil.Container + serverFunc func(context.Context, testutil.Logger) *dockerutil.Container }{ // We are either measuring the server or the client. The other should be // runc. e.g. Upload sees how fast the runtime under test uploads to a native // server. - {name: "Upload", clientRuntime: dockerutil.Runtime(), serverRuntime: "runc"}, - {name: "Download", clientRuntime: "runc", serverRuntime: dockerutil.Runtime()}, + { + name: "Upload", + clientFunc: clientMachine.GetContainer, + serverFunc: serverMachine.GetNativeContainer, + }, + { + name: "Download", + clientFunc: clientMachine.GetNativeContainer, + serverFunc: serverMachine.GetContainer, + }, } { b.Run(bm.name, func(b *testing.B) { - - // Get a container from the server and set its runtime. - ctx := context.Background() - server := serverMachine.GetContainer(ctx, b) + // Set up the containers. + server := bm.serverFunc(ctx, b) defer server.CleanUp(ctx) - server.Runtime = bm.serverRuntime - - // Get a container from the client and set its runtime. - client := clientMachine.GetContainer(ctx, b) + client := bm.clientFunc(ctx, b) defer client.CleanUp(ctx) - client.Runtime = bm.clientRuntime // iperf serves on port 5001 by default. port := 5001 @@ -91,11 +94,14 @@ func BenchmarkIperf(b *testing.B) { } // iperf report in Kb realtime - cmd := fmt.Sprintf("iperf -f K --realtime -c %s -p %d", ip.String(), servingPort) + cmd := fmt.Sprintf("iperf -f K --realtime --time %d -c %s -p %d", time, ip.String(), servingPort) // Run the client. b.ResetTimer() + // Restart the server profiles. If the server isn't being profiled + // this does nothing. + server.RestartProfiles() for i := 0; i < b.N; i++ { out, err := client.Run(ctx, dockerutil.RunOpts{ Image: "benchmarks/iperf", diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go index 1a0221893..74e1e6def 100644 --- a/test/packetimpact/runner/packetimpact_test.go +++ b/test/packetimpact/runner/packetimpact_test.go @@ -142,7 +142,7 @@ func TestOne(t *testing.T) { // Create the Docker container for the DUT. dut := dockerutil.MakeContainer(ctx, logger("dut")) if *dutPlatform == "linux" { - dut.Runtime = "" + dut = dockerutil.MakeNativeContainer(ctx, logger("dut")) } runOpts := dockerutil.RunOpts{ @@ -208,8 +208,7 @@ func TestOne(t *testing.T) { } // Create the Docker container for the testbench. - testbench := dockerutil.MakeContainer(ctx, logger("testbench")) - testbench.Runtime = "" // The testbench always runs on Linux. + testbench := dockerutil.MakeNativeContainer(ctx, logger("testbench")) tbb := path.Base(*testbenchBinary) containerTestbenchBinary := "/packetimpact/" + tbb -- cgit v1.2.3 From cf7141fb43d9b6757c6838baa71e2edfee339d44 Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Mon, 27 Jul 2020 06:37:11 -0700 Subject: Ask for *testing.T instead of storing it Storing *testing.T on test helper structs is problematic when subtests are used, because it is possible for nested tests to call Fatal on parent test, which incorrect terminates the parent test. For example func TestOuter(t *testing.T) { dut := NewDUT(t) t.Run("first test", func(t *testing.T) { dut.FallibleCall() }) t.Run("second test", func(t *testing.T) { dut.FallibleCall() } } In the example above, assuming `FallibleCall` calls `t.Fatal` on the `t` it holds, if `dut.FallibleCall` fails in "first test", it will call `Fatal` on the parent `t`, quitting `TestOuter`. This is not a behavior we want. PiperOrigin-RevId: 323350241 --- test/packetimpact/testbench/connections.go | 330 ++++++++++++------- test/packetimpact/testbench/dut.go | 358 ++++++++++++--------- test/packetimpact/testbench/rawsockets.go | 44 ++- test/packetimpact/tests/fin_wait2_timeout_test.go | 26 +- .../tests/icmpv6_param_problem_test.go | 8 +- test/packetimpact/tests/ipv4_id_uniqueness_test.go | 34 +- .../tests/ipv6_fragment_reassembly_test.go | 10 +- .../tests/ipv6_unknown_options_action_test.go | 44 +-- test/packetimpact/tests/tcp_close_wait_ack_test.go | 63 ++-- test/packetimpact/tests/tcp_cork_mss_test.go | 36 +-- .../tests/tcp_handshake_window_size_test.go | 20 +- .../tests/tcp_network_unreachable_test.go | 28 +- .../tests/tcp_noaccept_close_rst_test.go | 10 +- .../tests/tcp_outside_the_window_test.go | 24 +- test/packetimpact/tests/tcp_paws_mechanism_test.go | 24 +- .../tests/tcp_queue_receive_in_syn_sent_test.go | 26 +- test/packetimpact/tests/tcp_reordering_test.go | 48 +-- test/packetimpact/tests/tcp_retransmits_test.go | 28 +- .../tests/tcp_send_window_sizes_piggyback_test.go | 24 +- test/packetimpact/tests/tcp_synrcvd_reset_test.go | 16 +- test/packetimpact/tests/tcp_synsent_reset_test.go | 30 +- test/packetimpact/tests/tcp_user_timeout_test.go | 41 ++- test/packetimpact/tests/tcp_window_shrink_test.go | 36 +-- .../tests/tcp_zero_window_probe_retransmit_test.go | 39 ++- .../tests/tcp_zero_window_probe_test.go | 44 +-- .../tcp_zero_window_probe_usertimeout_test.go | 40 +-- .../tests/udp_discard_mcast_source_addr_test.go | 22 +- .../tests/udp_icmp_error_propagation_test.go | 130 ++++---- .../tests/udp_recv_mcast_bcast_test.go | 9 +- .../packetimpact/tests/udp_send_recv_dgram_test.go | 28 +- 30 files changed, 876 insertions(+), 744 deletions(-) (limited to 'test') diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go index 87ce58c24..3af5f83fd 100644 --- a/test/packetimpact/testbench/connections.go +++ b/test/packetimpact/testbench/connections.go @@ -429,7 +429,6 @@ type Connection struct { layerStates []layerState injector Injector sniffer Sniffer - t *testing.T } // Returns the default incoming frame against which to match. If received is @@ -462,7 +461,9 @@ func (conn *Connection) match(override, received Layers) bool { } // Close frees associated resources held by the Connection. -func (conn *Connection) Close() { +func (conn *Connection) Close(t *testing.T) { + t.Helper() + errs := multierr.Combine(conn.sniffer.close(), conn.injector.close()) for _, s := range conn.layerStates { if err := s.close(); err != nil { @@ -470,7 +471,7 @@ func (conn *Connection) Close() { } } if errs != nil { - conn.t.Fatalf("unable to close %+v: %s", conn, errs) + t.Fatalf("unable to close %+v: %s", conn, errs) } } @@ -482,7 +483,9 @@ func (conn *Connection) Close() { // overriden first. As an example, valid values of overrideLayers for a TCP- // over-IPv4-over-Ethernet connection are: nil, [TCP], [IPv4, TCP], and // [Ethernet, IPv4, TCP]. -func (conn *Connection) CreateFrame(overrideLayers Layers, additionalLayers ...Layer) Layers { +func (conn *Connection) CreateFrame(t *testing.T, overrideLayers Layers, additionalLayers ...Layer) Layers { + t.Helper() + var layersToSend Layers for i, s := range conn.layerStates { layer := s.outgoing() @@ -491,7 +494,7 @@ func (conn *Connection) CreateFrame(overrideLayers Layers, additionalLayers ...L // end. if j := len(overrideLayers) - (len(conn.layerStates) - i); j >= 0 { if err := layer.merge(overrideLayers[j]); err != nil { - conn.t.Fatalf("can't merge %+v into %+v: %s", layer, overrideLayers[j], err) + t.Fatalf("can't merge %+v into %+v: %s", layer, overrideLayers[j], err) } } layersToSend = append(layersToSend, layer) @@ -505,21 +508,25 @@ func (conn *Connection) CreateFrame(overrideLayers Layers, additionalLayers ...L // This method is useful for sending out-of-band control messages such as // ICMP packets, where it would not make sense to update the transport layer's // state using the ICMP header. -func (conn *Connection) SendFrameStateless(frame Layers) { +func (conn *Connection) SendFrameStateless(t *testing.T, frame Layers) { + t.Helper() + outBytes, err := frame.ToBytes() if err != nil { - conn.t.Fatalf("can't build outgoing packet: %s", err) + t.Fatalf("can't build outgoing packet: %s", err) } - conn.injector.Send(outBytes) + conn.injector.Send(t, outBytes) } // SendFrame sends a frame on the wire and updates the state of all layers. -func (conn *Connection) SendFrame(frame Layers) { +func (conn *Connection) SendFrame(t *testing.T, frame Layers) { + t.Helper() + outBytes, err := frame.ToBytes() if err != nil { - conn.t.Fatalf("can't build outgoing packet: %s", err) + t.Fatalf("can't build outgoing packet: %s", err) } - conn.injector.Send(outBytes) + conn.injector.Send(t, outBytes) // frame might have nil values where the caller wanted to use default values. // sentFrame will have no nil values in it because it comes from parsing the @@ -528,7 +535,7 @@ func (conn *Connection) SendFrame(frame Layers) { // Update the state of each layer based on what was sent. for i, s := range conn.layerStates { if err := s.sent(sentFrame[i]); err != nil { - conn.t.Fatalf("Unable to update the state of %+v with %s: %s", s, sentFrame[i], err) + t.Fatalf("Unable to update the state of %+v with %s: %s", s, sentFrame[i], err) } } } @@ -538,18 +545,22 @@ func (conn *Connection) SendFrame(frame Layers) { // // Types defined with Connection as the underlying type should expose // type-safe versions of this method. -func (conn *Connection) send(overrideLayers Layers, additionalLayers ...Layer) { - conn.SendFrame(conn.CreateFrame(overrideLayers, additionalLayers...)) +func (conn *Connection) send(t *testing.T, overrideLayers Layers, additionalLayers ...Layer) { + t.Helper() + + conn.SendFrame(t, conn.CreateFrame(t, overrideLayers, additionalLayers...)) } // recvFrame gets the next successfully parsed frame (of type Layers) within the // timeout provided. If no parsable frame arrives before the timeout, it returns // nil. -func (conn *Connection) recvFrame(timeout time.Duration) Layers { +func (conn *Connection) recvFrame(t *testing.T, timeout time.Duration) Layers { + t.Helper() + if timeout <= 0 { return nil } - b := conn.sniffer.Recv(timeout) + b := conn.sniffer.Recv(t, timeout) if b == nil { return nil } @@ -569,32 +580,36 @@ func (e *layersError) Error() string { // Expect expects a frame with the final layerStates layer matching the // provided Layer within the timeout specified. If it doesn't arrive in time, // an error is returned. -func (conn *Connection) Expect(layer Layer, timeout time.Duration) (Layer, error) { +func (conn *Connection) Expect(t *testing.T, layer Layer, timeout time.Duration) (Layer, error) { + t.Helper() + // Make a frame that will ignore all but the final layer. layers := make([]Layer, len(conn.layerStates)) layers[len(layers)-1] = layer - gotFrame, err := conn.ExpectFrame(layers, timeout) + gotFrame, err := conn.ExpectFrame(t, layers, timeout) if err != nil { return nil, err } if len(conn.layerStates)-1 < len(gotFrame) { return gotFrame[len(conn.layerStates)-1], nil } - conn.t.Fatal("the received frame should be at least as long as the expected layers") + t.Fatalf("the received frame should be at least as long as the expected layers, got %d layers, want at least %d layers, got frame: %#v", len(gotFrame), len(conn.layerStates), gotFrame) panic("unreachable") } // ExpectFrame expects a frame that matches the provided Layers within the // timeout specified. If one arrives in time, the Layers is returned without an // error. If it doesn't arrive in time, it returns nil and error is non-nil. -func (conn *Connection) ExpectFrame(layers Layers, timeout time.Duration) (Layers, error) { +func (conn *Connection) ExpectFrame(t *testing.T, layers Layers, timeout time.Duration) (Layers, error) { + t.Helper() + deadline := time.Now().Add(timeout) var errs error for { var gotLayers Layers if timeout = time.Until(deadline); timeout > 0 { - gotLayers = conn.recvFrame(timeout) + gotLayers = conn.recvFrame(t, timeout) } if gotLayers == nil { if errs == nil { @@ -605,7 +620,7 @@ func (conn *Connection) ExpectFrame(layers Layers, timeout time.Duration) (Layer if conn.match(layers, gotLayers) { for i, s := range conn.layerStates { if err := s.received(gotLayers[i]); err != nil { - conn.t.Fatal(err) + t.Fatalf("failed to update test connection's layer states based on received frame: %s", err) } } return gotLayers, nil @@ -616,8 +631,10 @@ func (conn *Connection) ExpectFrame(layers Layers, timeout time.Duration) (Layer // Drain drains the sniffer's receive buffer by receiving packets until there's // nothing else to receive. -func (conn *Connection) Drain() { - conn.sniffer.Drain() +func (conn *Connection) Drain(t *testing.T) { + t.Helper() + + conn.sniffer.Drain(t) } // TCPIPv4 maintains the state for all the layers in a TCP/IPv4 connection. @@ -625,6 +642,8 @@ type TCPIPv4 Connection // NewTCPIPv4 creates a new TCPIPv4 connection with reasonable defaults. func NewTCPIPv4(t *testing.T, outgoingTCP, incomingTCP TCP) TCPIPv4 { + t.Helper() + etherState, err := newEtherState(Ether{}, Ether{}) if err != nil { t.Fatalf("can't make etherState: %s", err) @@ -650,57 +669,58 @@ func NewTCPIPv4(t *testing.T, outgoingTCP, incomingTCP TCP) TCPIPv4 { layerStates: []layerState{etherState, ipv4State, tcpState}, injector: injector, sniffer: sniffer, - t: t, } } // Connect performs a TCP 3-way handshake. The input Connection should have a // final TCP Layer. -func (conn *TCPIPv4) Connect() { - conn.t.Helper() +func (conn *TCPIPv4) Connect(t *testing.T) { + t.Helper() // Send the SYN. - conn.Send(TCP{Flags: Uint8(header.TCPFlagSyn)}) + conn.Send(t, TCP{Flags: Uint8(header.TCPFlagSyn)}) // Wait for the SYN-ACK. - synAck, err := conn.Expect(TCP{Flags: Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second) + synAck, err := conn.Expect(t, TCP{Flags: Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second) if err != nil { - conn.t.Fatalf("didn't get synack during handshake: %s", err) + t.Fatalf("didn't get synack during handshake: %s", err) } conn.layerStates[len(conn.layerStates)-1].(*tcpState).synAck = synAck // Send an ACK. - conn.Send(TCP{Flags: Uint8(header.TCPFlagAck)}) + conn.Send(t, TCP{Flags: Uint8(header.TCPFlagAck)}) } // ConnectWithOptions performs a TCP 3-way handshake with given TCP options. // The input Connection should have a final TCP Layer. -func (conn *TCPIPv4) ConnectWithOptions(options []byte) { - conn.t.Helper() +func (conn *TCPIPv4) ConnectWithOptions(t *testing.T, options []byte) { + t.Helper() // Send the SYN. - conn.Send(TCP{Flags: Uint8(header.TCPFlagSyn), Options: options}) + conn.Send(t, TCP{Flags: Uint8(header.TCPFlagSyn), Options: options}) // Wait for the SYN-ACK. - synAck, err := conn.Expect(TCP{Flags: Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second) + synAck, err := conn.Expect(t, TCP{Flags: Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second) if err != nil { - conn.t.Fatalf("didn't get synack during handshake: %s", err) + t.Fatalf("didn't get synack during handshake: %s", err) } conn.layerStates[len(conn.layerStates)-1].(*tcpState).synAck = synAck // Send an ACK. - conn.Send(TCP{Flags: Uint8(header.TCPFlagAck)}) + conn.Send(t, TCP{Flags: Uint8(header.TCPFlagAck)}) } // ExpectData is a convenient method that expects a Layer and the Layer after // it. If it doens't arrive in time, it returns nil. -func (conn *TCPIPv4) ExpectData(tcp *TCP, payload *Payload, timeout time.Duration) (Layers, error) { +func (conn *TCPIPv4) ExpectData(t *testing.T, tcp *TCP, payload *Payload, timeout time.Duration) (Layers, error) { + t.Helper() + expected := make([]Layer, len(conn.layerStates)) expected[len(expected)-1] = tcp if payload != nil { expected = append(expected, payload) } - return (*Connection)(conn).ExpectFrame(expected, timeout) + return (*Connection)(conn).ExpectFrame(t, expected, timeout) } // ExpectNextData attempts to receive the next incoming segment for the @@ -709,9 +729,11 @@ func (conn *TCPIPv4) ExpectData(tcp *TCP, payload *Payload, timeout time.Duratio // It differs from ExpectData() in that here we are only interested in the next // received segment, while ExpectData() can receive multiple segments for the // connection until there is a match with given layers or a timeout. -func (conn *TCPIPv4) ExpectNextData(tcp *TCP, payload *Payload, timeout time.Duration) (Layers, error) { +func (conn *TCPIPv4) ExpectNextData(t *testing.T, tcp *TCP, payload *Payload, timeout time.Duration) (Layers, error) { + t.Helper() + // Receive the first incoming TCP segment for this connection. - got, err := conn.ExpectData(&TCP{}, nil, timeout) + got, err := conn.ExpectData(t, &TCP{}, nil, timeout) if err != nil { return nil, err } @@ -720,7 +742,7 @@ func (conn *TCPIPv4) ExpectNextData(tcp *TCP, payload *Payload, timeout time.Dur expected[len(expected)-1] = tcp if payload != nil { expected = append(expected, payload) - tcp.SeqNum = Uint32(uint32(*conn.RemoteSeqNum()) - uint32(payload.Length())) + tcp.SeqNum = Uint32(uint32(*conn.RemoteSeqNum(t)) - uint32(payload.Length())) } if !(*Connection)(conn).match(expected, got) { return nil, fmt.Errorf("next frame is not matching %s during %s: got %s", expected, timeout, got) @@ -730,71 +752,91 @@ func (conn *TCPIPv4) ExpectNextData(tcp *TCP, payload *Payload, timeout time.Dur // Send a packet with reasonable defaults. Potentially override the TCP layer in // the connection with the provided layer and add additionLayers. -func (conn *TCPIPv4) Send(tcp TCP, additionalLayers ...Layer) { - (*Connection)(conn).send(Layers{&tcp}, additionalLayers...) +func (conn *TCPIPv4) Send(t *testing.T, tcp TCP, additionalLayers ...Layer) { + t.Helper() + + (*Connection)(conn).send(t, Layers{&tcp}, additionalLayers...) } // Close frees associated resources held by the TCPIPv4 connection. -func (conn *TCPIPv4) Close() { - (*Connection)(conn).Close() +func (conn *TCPIPv4) Close(t *testing.T) { + t.Helper() + + (*Connection)(conn).Close(t) } // Expect expects a frame with the TCP layer matching the provided TCP within // the timeout specified. If it doesn't arrive in time, an error is returned. -func (conn *TCPIPv4) Expect(tcp TCP, timeout time.Duration) (*TCP, error) { - layer, err := (*Connection)(conn).Expect(&tcp, timeout) +func (conn *TCPIPv4) Expect(t *testing.T, tcp TCP, timeout time.Duration) (*TCP, error) { + t.Helper() + + layer, err := (*Connection)(conn).Expect(t, &tcp, timeout) if layer == nil { return nil, err } gotTCP, ok := layer.(*TCP) if !ok { - conn.t.Fatalf("expected %s to be TCP", layer) + t.Fatalf("expected %s to be TCP", layer) } return gotTCP, err } -func (conn *TCPIPv4) tcpState() *tcpState { +func (conn *TCPIPv4) tcpState(t *testing.T) *tcpState { + t.Helper() + state, ok := conn.layerStates[2].(*tcpState) if !ok { - conn.t.Fatalf("got transport-layer state type=%T, expected tcpState", conn.layerStates[2]) + t.Fatalf("got transport-layer state type=%T, expected tcpState", conn.layerStates[2]) } return state } -func (conn *TCPIPv4) ipv4State() *ipv4State { +func (conn *TCPIPv4) ipv4State(t *testing.T) *ipv4State { + t.Helper() + state, ok := conn.layerStates[1].(*ipv4State) if !ok { - conn.t.Fatalf("expected network-layer state type=%T, expected ipv4State", conn.layerStates[1]) + t.Fatalf("expected network-layer state type=%T, expected ipv4State", conn.layerStates[1]) } return state } // RemoteSeqNum returns the next expected sequence number from the DUT. -func (conn *TCPIPv4) RemoteSeqNum() *seqnum.Value { - return conn.tcpState().remoteSeqNum +func (conn *TCPIPv4) RemoteSeqNum(t *testing.T) *seqnum.Value { + t.Helper() + + return conn.tcpState(t).remoteSeqNum } // LocalSeqNum returns the next sequence number to send from the testbench. -func (conn *TCPIPv4) LocalSeqNum() *seqnum.Value { - return conn.tcpState().localSeqNum +func (conn *TCPIPv4) LocalSeqNum(t *testing.T) *seqnum.Value { + t.Helper() + + return conn.tcpState(t).localSeqNum } // SynAck returns the SynAck that was part of the handshake. -func (conn *TCPIPv4) SynAck() *TCP { - return conn.tcpState().synAck +func (conn *TCPIPv4) SynAck(t *testing.T) *TCP { + t.Helper() + + return conn.tcpState(t).synAck } // LocalAddr gets the local socket address of this connection. -func (conn *TCPIPv4) LocalAddr() *unix.SockaddrInet4 { - sa := &unix.SockaddrInet4{Port: int(*conn.tcpState().out.SrcPort)} - copy(sa.Addr[:], *conn.ipv4State().out.SrcAddr) +func (conn *TCPIPv4) LocalAddr(t *testing.T) *unix.SockaddrInet4 { + t.Helper() + + sa := &unix.SockaddrInet4{Port: int(*conn.tcpState(t).out.SrcPort)} + copy(sa.Addr[:], *conn.ipv4State(t).out.SrcAddr) return sa } // Drain drains the sniffer's receive buffer by receiving packets until there's // nothing else to receive. -func (conn *TCPIPv4) Drain() { - conn.sniffer.Drain() +func (conn *TCPIPv4) Drain(t *testing.T) { + t.Helper() + + conn.sniffer.Drain(t) } // IPv6Conn maintains the state for all the layers in a IPv6 connection. @@ -802,6 +844,8 @@ type IPv6Conn Connection // NewIPv6Conn creates a new IPv6Conn connection with reasonable defaults. func NewIPv6Conn(t *testing.T, outgoingIPv6, incomingIPv6 IPv6) IPv6Conn { + t.Helper() + etherState, err := newEtherState(Ether{}, Ether{}) if err != nil { t.Fatalf("can't make EtherState: %s", err) @@ -824,25 +868,30 @@ func NewIPv6Conn(t *testing.T, outgoingIPv6, incomingIPv6 IPv6) IPv6Conn { layerStates: []layerState{etherState, ipv6State}, injector: injector, sniffer: sniffer, - t: t, } } // Send sends a frame with ipv6 overriding the IPv6 layer defaults and // additionalLayers added after it. -func (conn *IPv6Conn) Send(ipv6 IPv6, additionalLayers ...Layer) { - (*Connection)(conn).send(Layers{&ipv6}, additionalLayers...) +func (conn *IPv6Conn) Send(t *testing.T, ipv6 IPv6, additionalLayers ...Layer) { + t.Helper() + + (*Connection)(conn).send(t, Layers{&ipv6}, additionalLayers...) } // Close to clean up any resources held. -func (conn *IPv6Conn) Close() { - (*Connection)(conn).Close() +func (conn *IPv6Conn) Close(t *testing.T) { + t.Helper() + + (*Connection)(conn).Close(t) } // ExpectFrame expects a frame that matches the provided Layers within the // timeout specified. If it doesn't arrive in time, an error is returned. -func (conn *IPv6Conn) ExpectFrame(frame Layers, timeout time.Duration) (Layers, error) { - return (*Connection)(conn).ExpectFrame(frame, timeout) +func (conn *IPv6Conn) ExpectFrame(t *testing.T, frame Layers, timeout time.Duration) (Layers, error) { + t.Helper() + + return (*Connection)(conn).ExpectFrame(t, frame, timeout) } // UDPIPv4 maintains the state for all the layers in a UDP/IPv4 connection. @@ -850,6 +899,8 @@ type UDPIPv4 Connection // NewUDPIPv4 creates a new UDPIPv4 connection with reasonable defaults. func NewUDPIPv4(t *testing.T, outgoingUDP, incomingUDP UDP) UDPIPv4 { + t.Helper() + etherState, err := newEtherState(Ether{}, Ether{}) if err != nil { t.Fatalf("can't make etherState: %s", err) @@ -875,81 +926,96 @@ func NewUDPIPv4(t *testing.T, outgoingUDP, incomingUDP UDP) UDPIPv4 { layerStates: []layerState{etherState, ipv4State, udpState}, injector: injector, sniffer: sniffer, - t: t, } } -func (conn *UDPIPv4) udpState() *udpState { +func (conn *UDPIPv4) udpState(t *testing.T) *udpState { + t.Helper() + state, ok := conn.layerStates[2].(*udpState) if !ok { - conn.t.Fatalf("got transport-layer state type=%T, expected udpState", conn.layerStates[2]) + t.Fatalf("got transport-layer state type=%T, expected udpState", conn.layerStates[2]) } return state } -func (conn *UDPIPv4) ipv4State() *ipv4State { +func (conn *UDPIPv4) ipv4State(t *testing.T) *ipv4State { + t.Helper() + state, ok := conn.layerStates[1].(*ipv4State) if !ok { - conn.t.Fatalf("got network-layer state type=%T, expected ipv4State", conn.layerStates[1]) + t.Fatalf("got network-layer state type=%T, expected ipv4State", conn.layerStates[1]) } return state } // LocalAddr gets the local socket address of this connection. -func (conn *UDPIPv4) LocalAddr() *unix.SockaddrInet4 { - sa := &unix.SockaddrInet4{Port: int(*conn.udpState().out.SrcPort)} - copy(sa.Addr[:], *conn.ipv4State().out.SrcAddr) +func (conn *UDPIPv4) LocalAddr(t *testing.T) *unix.SockaddrInet4 { + t.Helper() + + sa := &unix.SockaddrInet4{Port: int(*conn.udpState(t).out.SrcPort)} + copy(sa.Addr[:], *conn.ipv4State(t).out.SrcAddr) return sa } // Send sends a packet with reasonable defaults, potentially overriding the UDP // layer and adding additionLayers. -func (conn *UDPIPv4) Send(udp UDP, additionalLayers ...Layer) { - (*Connection)(conn).send(Layers{&udp}, additionalLayers...) +func (conn *UDPIPv4) Send(t *testing.T, udp UDP, additionalLayers ...Layer) { + t.Helper() + + (*Connection)(conn).send(t, Layers{&udp}, additionalLayers...) } // SendIP sends a packet with reasonable defaults, potentially overriding the // UDP and IPv4 headers and adding additionLayers. -func (conn *UDPIPv4) SendIP(ip IPv4, udp UDP, additionalLayers ...Layer) { - (*Connection)(conn).send(Layers{&ip, &udp}, additionalLayers...) +func (conn *UDPIPv4) SendIP(t *testing.T, ip IPv4, udp UDP, additionalLayers ...Layer) { + t.Helper() + + (*Connection)(conn).send(t, Layers{&ip, &udp}, additionalLayers...) } // Expect expects a frame with the UDP layer matching the provided UDP within // the timeout specified. If it doesn't arrive in time, an error is returned. -func (conn *UDPIPv4) Expect(udp UDP, timeout time.Duration) (*UDP, error) { - conn.t.Helper() - layer, err := (*Connection)(conn).Expect(&udp, timeout) +func (conn *UDPIPv4) Expect(t *testing.T, udp UDP, timeout time.Duration) (*UDP, error) { + t.Helper() + + layer, err := (*Connection)(conn).Expect(t, &udp, timeout) if err != nil { return nil, err } gotUDP, ok := layer.(*UDP) if !ok { - conn.t.Fatalf("expected %s to be UDP", layer) + t.Fatalf("expected %s to be UDP", layer) } return gotUDP, nil } // ExpectData is a convenient method that expects a Layer and the Layer after // it. If it doens't arrive in time, it returns nil. -func (conn *UDPIPv4) ExpectData(udp UDP, payload Payload, timeout time.Duration) (Layers, error) { - conn.t.Helper() +func (conn *UDPIPv4) ExpectData(t *testing.T, udp UDP, payload Payload, timeout time.Duration) (Layers, error) { + t.Helper() + expected := make([]Layer, len(conn.layerStates)) expected[len(expected)-1] = &udp if payload.length() != 0 { expected = append(expected, &payload) } - return (*Connection)(conn).ExpectFrame(expected, timeout) + return (*Connection)(conn).ExpectFrame(t, expected, timeout) } // Close frees associated resources held by the UDPIPv4 connection. -func (conn *UDPIPv4) Close() { - (*Connection)(conn).Close() +func (conn *UDPIPv4) Close(t *testing.T) { + t.Helper() + + (*Connection)(conn).Close(t) } // Drain drains the sniffer's receive buffer by receiving packets until there's // nothing else to receive. -func (conn *UDPIPv4) Drain() { - conn.sniffer.Drain() +func (conn *UDPIPv4) Drain(t *testing.T) { + t.Helper() + + conn.sniffer.Drain(t) } // UDPIPv6 maintains the state for all the layers in a UDP/IPv6 connection. @@ -957,6 +1023,8 @@ type UDPIPv6 Connection // NewUDPIPv6 creates a new UDPIPv6 connection with reasonable defaults. func NewUDPIPv6(t *testing.T, outgoingUDP, incomingUDP UDP) UDPIPv6 { + t.Helper() + etherState, err := newEtherState(Ether{}, Ether{}) if err != nil { t.Fatalf("can't make etherState: %s", err) @@ -981,86 +1049,101 @@ func NewUDPIPv6(t *testing.T, outgoingUDP, incomingUDP UDP) UDPIPv6 { layerStates: []layerState{etherState, ipv6State, udpState}, injector: injector, sniffer: sniffer, - t: t, } } -func (conn *UDPIPv6) udpState() *udpState { +func (conn *UDPIPv6) udpState(t *testing.T) *udpState { + t.Helper() + state, ok := conn.layerStates[2].(*udpState) if !ok { - conn.t.Fatalf("got transport-layer state type=%T, expected udpState", conn.layerStates[2]) + t.Fatalf("got transport-layer state type=%T, expected udpState", conn.layerStates[2]) } return state } -func (conn *UDPIPv6) ipv6State() *ipv6State { +func (conn *UDPIPv6) ipv6State(t *testing.T) *ipv6State { + t.Helper() + state, ok := conn.layerStates[1].(*ipv6State) if !ok { - conn.t.Fatalf("got network-layer state type=%T, expected ipv6State", conn.layerStates[1]) + t.Fatalf("got network-layer state type=%T, expected ipv6State", conn.layerStates[1]) } return state } // LocalAddr gets the local socket address of this connection. -func (conn *UDPIPv6) LocalAddr() *unix.SockaddrInet6 { +func (conn *UDPIPv6) LocalAddr(t *testing.T) *unix.SockaddrInet6 { + t.Helper() + sa := &unix.SockaddrInet6{ - Port: int(*conn.udpState().out.SrcPort), + Port: int(*conn.udpState(t).out.SrcPort), // Local address is in perspective to the remote host, so it's scoped to the // ID of the remote interface. ZoneId: uint32(RemoteInterfaceID), } - copy(sa.Addr[:], *conn.ipv6State().out.SrcAddr) + copy(sa.Addr[:], *conn.ipv6State(t).out.SrcAddr) return sa } // Send sends a packet with reasonable defaults, potentially overriding the UDP // layer and adding additionLayers. -func (conn *UDPIPv6) Send(udp UDP, additionalLayers ...Layer) { - (*Connection)(conn).send(Layers{&udp}, additionalLayers...) +func (conn *UDPIPv6) Send(t *testing.T, udp UDP, additionalLayers ...Layer) { + t.Helper() + + (*Connection)(conn).send(t, Layers{&udp}, additionalLayers...) } // SendIPv6 sends a packet with reasonable defaults, potentially overriding the // UDP and IPv6 headers and adding additionLayers. -func (conn *UDPIPv6) SendIPv6(ip IPv6, udp UDP, additionalLayers ...Layer) { - (*Connection)(conn).send(Layers{&ip, &udp}, additionalLayers...) +func (conn *UDPIPv6) SendIPv6(t *testing.T, ip IPv6, udp UDP, additionalLayers ...Layer) { + t.Helper() + + (*Connection)(conn).send(t, Layers{&ip, &udp}, additionalLayers...) } // Expect expects a frame with the UDP layer matching the provided UDP within // the timeout specified. If it doesn't arrive in time, an error is returned. -func (conn *UDPIPv6) Expect(udp UDP, timeout time.Duration) (*UDP, error) { - conn.t.Helper() - layer, err := (*Connection)(conn).Expect(&udp, timeout) +func (conn *UDPIPv6) Expect(t *testing.T, udp UDP, timeout time.Duration) (*UDP, error) { + t.Helper() + + layer, err := (*Connection)(conn).Expect(t, &udp, timeout) if err != nil { return nil, err } gotUDP, ok := layer.(*UDP) if !ok { - conn.t.Fatalf("expected %s to be UDP", layer) + t.Fatalf("expected %s to be UDP", layer) } return gotUDP, nil } // ExpectData is a convenient method that expects a Layer and the Layer after // it. If it doens't arrive in time, it returns nil. -func (conn *UDPIPv6) ExpectData(udp UDP, payload Payload, timeout time.Duration) (Layers, error) { - conn.t.Helper() +func (conn *UDPIPv6) ExpectData(t *testing.T, udp UDP, payload Payload, timeout time.Duration) (Layers, error) { + t.Helper() + expected := make([]Layer, len(conn.layerStates)) expected[len(expected)-1] = &udp if payload.length() != 0 { expected = append(expected, &payload) } - return (*Connection)(conn).ExpectFrame(expected, timeout) + return (*Connection)(conn).ExpectFrame(t, expected, timeout) } // Close frees associated resources held by the UDPIPv6 connection. -func (conn *UDPIPv6) Close() { - (*Connection)(conn).Close() +func (conn *UDPIPv6) Close(t *testing.T) { + t.Helper() + + (*Connection)(conn).Close(t) } // Drain drains the sniffer's receive buffer by receiving packets until there's // nothing else to receive. -func (conn *UDPIPv6) Drain() { - conn.sniffer.Drain() +func (conn *UDPIPv6) Drain(t *testing.T) { + t.Helper() + + conn.sniffer.Drain(t) } // TCPIPv6 maintains the state for all the layers in a TCP/IPv6 connection. @@ -1093,7 +1176,6 @@ func NewTCPIPv6(t *testing.T, outgoingTCP, incomingTCP TCP) TCPIPv6 { layerStates: []layerState{etherState, ipv6State, tcpState}, injector: injector, sniffer: sniffer, - t: t, } } @@ -1104,16 +1186,20 @@ func (conn *TCPIPv6) SrcPort() uint16 { // ExpectData is a convenient method that expects a Layer and the Layer after // it. If it doens't arrive in time, it returns nil. -func (conn *TCPIPv6) ExpectData(tcp *TCP, payload *Payload, timeout time.Duration) (Layers, error) { +func (conn *TCPIPv6) ExpectData(t *testing.T, tcp *TCP, payload *Payload, timeout time.Duration) (Layers, error) { + t.Helper() + expected := make([]Layer, len(conn.layerStates)) expected[len(expected)-1] = tcp if payload != nil { expected = append(expected, payload) } - return (*Connection)(conn).ExpectFrame(expected, timeout) + return (*Connection)(conn).ExpectFrame(t, expected, timeout) } // Close frees associated resources held by the TCPIPv6 connection. -func (conn *TCPIPv6) Close() { - (*Connection)(conn).Close() +func (conn *TCPIPv6) Close(t *testing.T) { + t.Helper() + + (*Connection)(conn).Close(t) } diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go index 51be13759..73c532e75 100644 --- a/test/packetimpact/testbench/dut.go +++ b/test/packetimpact/testbench/dut.go @@ -31,13 +31,14 @@ import ( // DUT communicates with the DUT to force it to make POSIX calls. type DUT struct { - t *testing.T conn *grpc.ClientConn posixServer POSIXClient } // NewDUT creates a new connection with the DUT over gRPC. func NewDUT(t *testing.T) DUT { + t.Helper() + flag.Parse() if err := genPseudoFlags(); err != nil { t.Fatal("generating psuedo flags:", err) @@ -50,7 +51,6 @@ func NewDUT(t *testing.T) DUT { } posixServer := NewPOSIXClient(conn) return DUT{ - t: t, conn: conn, posixServer: posixServer, } @@ -61,8 +61,9 @@ func (dut *DUT) TearDown() { dut.conn.Close() } -func (dut *DUT) sockaddrToProto(sa unix.Sockaddr) *pb.Sockaddr { - dut.t.Helper() +func (dut *DUT) sockaddrToProto(t *testing.T, sa unix.Sockaddr) *pb.Sockaddr { + t.Helper() + switch s := sa.(type) { case *unix.SockaddrInet4: return &pb.Sockaddr{ @@ -87,12 +88,13 @@ func (dut *DUT) sockaddrToProto(sa unix.Sockaddr) *pb.Sockaddr { }, } } - dut.t.Fatalf("can't parse Sockaddr struct: %+v", sa) + t.Fatalf("can't parse Sockaddr struct: %+v", sa) return nil } -func (dut *DUT) protoToSockaddr(sa *pb.Sockaddr) unix.Sockaddr { - dut.t.Helper() +func (dut *DUT) protoToSockaddr(t *testing.T, sa *pb.Sockaddr) unix.Sockaddr { + t.Helper() + switch s := sa.Sockaddr.(type) { case *pb.Sockaddr_In: ret := unix.SockaddrInet4{ @@ -108,31 +110,32 @@ func (dut *DUT) protoToSockaddr(sa *pb.Sockaddr) unix.Sockaddr { copy(ret.Addr[:], s.In6.GetAddr()) return &ret } - dut.t.Fatalf("can't parse Sockaddr proto: %+v", sa) + t.Fatalf("can't parse Sockaddr proto: %#v", sa) return nil } // CreateBoundSocket makes a new socket on the DUT, with type typ and protocol // proto, and bound to the IP address addr. Returns the new file descriptor and // the port that was selected on the DUT. -func (dut *DUT) CreateBoundSocket(typ, proto int32, addr net.IP) (int32, uint16) { - dut.t.Helper() +func (dut *DUT) CreateBoundSocket(t *testing.T, typ, proto int32, addr net.IP) (int32, uint16) { + t.Helper() + var fd int32 if addr.To4() != nil { - fd = dut.Socket(unix.AF_INET, typ, proto) + fd = dut.Socket(t, unix.AF_INET, typ, proto) sa := unix.SockaddrInet4{} copy(sa.Addr[:], addr.To4()) - dut.Bind(fd, &sa) + dut.Bind(t, fd, &sa) } else if addr.To16() != nil { - fd = dut.Socket(unix.AF_INET6, typ, proto) + fd = dut.Socket(t, unix.AF_INET6, typ, proto) sa := unix.SockaddrInet6{} copy(sa.Addr[:], addr.To16()) sa.ZoneId = uint32(RemoteInterfaceID) - dut.Bind(fd, &sa) + dut.Bind(t, fd, &sa) } else { - dut.t.Fatalf("unknown ip addr type for remoteIP") + t.Fatalf("invalid IP address: %s", addr) } - sa := dut.GetSockName(fd) + sa := dut.GetSockName(t, fd) var port int switch s := sa.(type) { case *unix.SockaddrInet4: @@ -140,15 +143,17 @@ func (dut *DUT) CreateBoundSocket(typ, proto int32, addr net.IP) (int32, uint16) case *unix.SockaddrInet6: port = s.Port default: - dut.t.Fatalf("unknown sockaddr type from getsockname: %t", sa) + t.Fatalf("unknown sockaddr type from getsockname: %T", sa) } return fd, uint16(port) } // CreateListener makes a new TCP connection. If it fails, the test ends. -func (dut *DUT) CreateListener(typ, proto, backlog int32) (int32, uint16) { - fd, remotePort := dut.CreateBoundSocket(typ, proto, net.ParseIP(RemoteIPv4)) - dut.Listen(fd, backlog) +func (dut *DUT) CreateListener(t *testing.T, typ, proto, backlog int32) (int32, uint16) { + t.Helper() + + fd, remotePort := dut.CreateBoundSocket(t, typ, proto, net.ParseIP(RemoteIPv4)) + dut.Listen(t, fd, backlog) return fd, remotePort } @@ -158,53 +163,57 @@ func (dut *DUT) CreateListener(typ, proto, backlog int32) (int32, uint16) { // Accept calls accept on the DUT and causes a fatal test failure if it doesn't // succeed. If more control over the timeout or error handling is needed, use // AcceptWithErrno. -func (dut *DUT) Accept(sockfd int32) (int32, unix.Sockaddr) { - dut.t.Helper() +func (dut *DUT) Accept(t *testing.T, sockfd int32) (int32, unix.Sockaddr) { + t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() - fd, sa, err := dut.AcceptWithErrno(ctx, sockfd) + fd, sa, err := dut.AcceptWithErrno(ctx, t, sockfd) if fd < 0 { - dut.t.Fatalf("failed to accept: %s", err) + t.Fatalf("failed to accept: %s", err) } return fd, sa } // AcceptWithErrno calls accept on the DUT. -func (dut *DUT) AcceptWithErrno(ctx context.Context, sockfd int32) (int32, unix.Sockaddr, error) { - dut.t.Helper() +func (dut *DUT) AcceptWithErrno(ctx context.Context, t *testing.T, sockfd int32) (int32, unix.Sockaddr, error) { + t.Helper() + req := pb.AcceptRequest{ Sockfd: sockfd, } resp, err := dut.posixServer.Accept(ctx, &req) if err != nil { - dut.t.Fatalf("failed to call Accept: %s", err) + t.Fatalf("failed to call Accept: %s", err) } - return resp.GetFd(), dut.protoToSockaddr(resp.GetAddr()), syscall.Errno(resp.GetErrno_()) + return resp.GetFd(), dut.protoToSockaddr(t, resp.GetAddr()), syscall.Errno(resp.GetErrno_()) } // Bind calls bind on the DUT and causes a fatal test failure if it doesn't // succeed. If more control over the timeout or error handling is // needed, use BindWithErrno. -func (dut *DUT) Bind(fd int32, sa unix.Sockaddr) { - dut.t.Helper() +func (dut *DUT) Bind(t *testing.T, fd int32, sa unix.Sockaddr) { + t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() - ret, err := dut.BindWithErrno(ctx, fd, sa) + ret, err := dut.BindWithErrno(ctx, t, fd, sa) if ret != 0 { - dut.t.Fatalf("failed to bind socket: %s", err) + t.Fatalf("failed to bind socket: %s", err) } } // BindWithErrno calls bind on the DUT. -func (dut *DUT) BindWithErrno(ctx context.Context, fd int32, sa unix.Sockaddr) (int32, error) { - dut.t.Helper() +func (dut *DUT) BindWithErrno(ctx context.Context, t *testing.T, fd int32, sa unix.Sockaddr) (int32, error) { + t.Helper() + req := pb.BindRequest{ Sockfd: fd, - Addr: dut.sockaddrToProto(sa), + Addr: dut.sockaddrToProto(t, sa), } resp, err := dut.posixServer.Bind(ctx, &req) if err != nil { - dut.t.Fatalf("failed to call Bind: %s", err) + t.Fatalf("failed to call Bind: %s", err) } return resp.GetRet(), syscall.Errno(resp.GetErrno_()) } @@ -212,25 +221,27 @@ func (dut *DUT) BindWithErrno(ctx context.Context, fd int32, sa unix.Sockaddr) ( // Close calls close on the DUT and causes a fatal test failure if it doesn't // succeed. If more control over the timeout or error handling is needed, use // CloseWithErrno. -func (dut *DUT) Close(fd int32) { - dut.t.Helper() +func (dut *DUT) Close(t *testing.T, fd int32) { + t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() - ret, err := dut.CloseWithErrno(ctx, fd) + ret, err := dut.CloseWithErrno(ctx, t, fd) if ret != 0 { - dut.t.Fatalf("failed to close: %s", err) + t.Fatalf("failed to close: %s", err) } } // CloseWithErrno calls close on the DUT. -func (dut *DUT) CloseWithErrno(ctx context.Context, fd int32) (int32, error) { - dut.t.Helper() +func (dut *DUT) CloseWithErrno(ctx context.Context, t *testing.T, fd int32) (int32, error) { + t.Helper() + req := pb.CloseRequest{ Fd: fd, } resp, err := dut.posixServer.Close(ctx, &req) if err != nil { - dut.t.Fatalf("failed to call Close: %s", err) + t.Fatalf("failed to call Close: %s", err) } return resp.GetRet(), syscall.Errno(resp.GetErrno_()) } @@ -238,28 +249,30 @@ func (dut *DUT) CloseWithErrno(ctx context.Context, fd int32) (int32, error) { // Connect calls connect on the DUT and causes a fatal test failure if it // doesn't succeed. If more control over the timeout or error handling is // needed, use ConnectWithErrno. -func (dut *DUT) Connect(fd int32, sa unix.Sockaddr) { - dut.t.Helper() +func (dut *DUT) Connect(t *testing.T, fd int32, sa unix.Sockaddr) { + t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() - ret, err := dut.ConnectWithErrno(ctx, fd, sa) + ret, err := dut.ConnectWithErrno(ctx, t, fd, sa) // Ignore 'operation in progress' error that can be returned when the socket // is non-blocking. if err != syscall.Errno(unix.EINPROGRESS) && ret != 0 { - dut.t.Fatalf("failed to connect socket: %s", err) + t.Fatalf("failed to connect socket: %s", err) } } // ConnectWithErrno calls bind on the DUT. -func (dut *DUT) ConnectWithErrno(ctx context.Context, fd int32, sa unix.Sockaddr) (int32, error) { - dut.t.Helper() +func (dut *DUT) ConnectWithErrno(ctx context.Context, t *testing.T, fd int32, sa unix.Sockaddr) (int32, error) { + t.Helper() + req := pb.ConnectRequest{ Sockfd: fd, - Addr: dut.sockaddrToProto(sa), + Addr: dut.sockaddrToProto(t, sa), } resp, err := dut.posixServer.Connect(ctx, &req) if err != nil { - dut.t.Fatalf("failed to call Connect: %s", err) + t.Fatalf("failed to call Connect: %s", err) } return resp.GetRet(), syscall.Errno(resp.GetErrno_()) } @@ -267,20 +280,22 @@ func (dut *DUT) ConnectWithErrno(ctx context.Context, fd int32, sa unix.Sockaddr // Fcntl calls fcntl on the DUT and causes a fatal test failure if it // doesn't succeed. If more control over the timeout or error handling is // needed, use FcntlWithErrno. -func (dut *DUT) Fcntl(fd, cmd, arg int32) int32 { - dut.t.Helper() +func (dut *DUT) Fcntl(t *testing.T, fd, cmd, arg int32) int32 { + t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() - ret, err := dut.FcntlWithErrno(ctx, fd, cmd, arg) + ret, err := dut.FcntlWithErrno(ctx, t, fd, cmd, arg) if ret == -1 { - dut.t.Fatalf("failed to Fcntl: ret=%d, errno=%s", ret, err) + t.Fatalf("failed to Fcntl: ret=%d, errno=%s", ret, err) } return ret } // FcntlWithErrno calls fcntl on the DUT. -func (dut *DUT) FcntlWithErrno(ctx context.Context, fd, cmd, arg int32) (int32, error) { - dut.t.Helper() +func (dut *DUT) FcntlWithErrno(ctx context.Context, t *testing.T, fd, cmd, arg int32) (int32, error) { + t.Helper() + req := pb.FcntlRequest{ Fd: fd, Cmd: cmd, @@ -288,7 +303,7 @@ func (dut *DUT) FcntlWithErrno(ctx context.Context, fd, cmd, arg int32) (int32, } resp, err := dut.posixServer.Fcntl(ctx, &req) if err != nil { - dut.t.Fatalf("failed to call Fcntl: %s", err) + t.Fatalf("failed to call Fcntl: %s", err) } return resp.GetRet(), syscall.Errno(resp.GetErrno_()) } @@ -296,32 +311,35 @@ func (dut *DUT) FcntlWithErrno(ctx context.Context, fd, cmd, arg int32) (int32, // GetSockName calls getsockname on the DUT and causes a fatal test failure if // it doesn't succeed. If more control over the timeout or error handling is // needed, use GetSockNameWithErrno. -func (dut *DUT) GetSockName(sockfd int32) unix.Sockaddr { - dut.t.Helper() +func (dut *DUT) GetSockName(t *testing.T, sockfd int32) unix.Sockaddr { + t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() - ret, sa, err := dut.GetSockNameWithErrno(ctx, sockfd) + ret, sa, err := dut.GetSockNameWithErrno(ctx, t, sockfd) if ret != 0 { - dut.t.Fatalf("failed to getsockname: %s", err) + t.Fatalf("failed to getsockname: %s", err) } return sa } // GetSockNameWithErrno calls getsockname on the DUT. -func (dut *DUT) GetSockNameWithErrno(ctx context.Context, sockfd int32) (int32, unix.Sockaddr, error) { - dut.t.Helper() +func (dut *DUT) GetSockNameWithErrno(ctx context.Context, t *testing.T, sockfd int32) (int32, unix.Sockaddr, error) { + t.Helper() + req := pb.GetSockNameRequest{ Sockfd: sockfd, } resp, err := dut.posixServer.GetSockName(ctx, &req) if err != nil { - dut.t.Fatalf("failed to call Bind: %s", err) + t.Fatalf("failed to call Bind: %s", err) } - return resp.GetRet(), dut.protoToSockaddr(resp.GetAddr()), syscall.Errno(resp.GetErrno_()) + return resp.GetRet(), dut.protoToSockaddr(t, resp.GetAddr()), syscall.Errno(resp.GetErrno_()) } -func (dut *DUT) getSockOpt(ctx context.Context, sockfd, level, optname, optlen int32, typ pb.GetSockOptRequest_SockOptType) (int32, *pb.SockOptVal, error) { - dut.t.Helper() +func (dut *DUT) getSockOpt(ctx context.Context, t *testing.T, sockfd, level, optname, optlen int32, typ pb.GetSockOptRequest_SockOptType) (int32, *pb.SockOptVal, error) { + t.Helper() + req := pb.GetSockOptRequest{ Sockfd: sockfd, Level: level, @@ -331,11 +349,11 @@ func (dut *DUT) getSockOpt(ctx context.Context, sockfd, level, optname, optlen i } resp, err := dut.posixServer.GetSockOpt(ctx, &req) if err != nil { - dut.t.Fatalf("failed to call GetSockOpt: %s", err) + t.Fatalf("failed to call GetSockOpt: %s", err) } optval := resp.GetOptval() if optval == nil { - dut.t.Fatalf("GetSockOpt response does not contain a value") + t.Fatalf("GetSockOpt response does not contain a value") } return resp.GetRet(), optval, syscall.Errno(resp.GetErrno_()) } @@ -345,13 +363,14 @@ func (dut *DUT) getSockOpt(ctx context.Context, sockfd, level, optname, optlen i // needed, use GetSockOptWithErrno. Because endianess and the width of values // might differ between the testbench and DUT architectures, prefer to use a // more specific GetSockOptXxx function. -func (dut *DUT) GetSockOpt(sockfd, level, optname, optlen int32) []byte { - dut.t.Helper() +func (dut *DUT) GetSockOpt(t *testing.T, sockfd, level, optname, optlen int32) []byte { + t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() - ret, optval, err := dut.GetSockOptWithErrno(ctx, sockfd, level, optname, optlen) + ret, optval, err := dut.GetSockOptWithErrno(ctx, t, sockfd, level, optname, optlen) if ret != 0 { - dut.t.Fatalf("failed to GetSockOpt: %s", err) + t.Fatalf("failed to GetSockOpt: %s", err) } return optval } @@ -359,12 +378,13 @@ func (dut *DUT) GetSockOpt(sockfd, level, optname, optlen int32) []byte { // GetSockOptWithErrno calls getsockopt on the DUT. Because endianess and the // width of values might differ between the testbench and DUT architectures, // prefer to use a more specific GetSockOptXxxWithErrno function. -func (dut *DUT) GetSockOptWithErrno(ctx context.Context, sockfd, level, optname, optlen int32) (int32, []byte, error) { - dut.t.Helper() - ret, optval, errno := dut.getSockOpt(ctx, sockfd, level, optname, optlen, pb.GetSockOptRequest_BYTES) +func (dut *DUT) GetSockOptWithErrno(ctx context.Context, t *testing.T, sockfd, level, optname, optlen int32) (int32, []byte, error) { + t.Helper() + + ret, optval, errno := dut.getSockOpt(ctx, t, sockfd, level, optname, optlen, pb.GetSockOptRequest_BYTES) bytesval, ok := optval.Val.(*pb.SockOptVal_Bytesval) if !ok { - dut.t.Fatalf("GetSockOpt got value type: %T, want bytes", optval) + t.Fatalf("GetSockOpt got value type: %T, want bytes", optval.Val) } return ret, bytesval.Bytesval, errno } @@ -372,24 +392,26 @@ func (dut *DUT) GetSockOptWithErrno(ctx context.Context, sockfd, level, optname, // GetSockOptInt calls getsockopt on the DUT and causes a fatal test failure // if it doesn't succeed. If more control over the int optval or error handling // is needed, use GetSockOptIntWithErrno. -func (dut *DUT) GetSockOptInt(sockfd, level, optname int32) int32 { - dut.t.Helper() +func (dut *DUT) GetSockOptInt(t *testing.T, sockfd, level, optname int32) int32 { + t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() - ret, intval, err := dut.GetSockOptIntWithErrno(ctx, sockfd, level, optname) + ret, intval, err := dut.GetSockOptIntWithErrno(ctx, t, sockfd, level, optname) if ret != 0 { - dut.t.Fatalf("failed to GetSockOptInt: %s", err) + t.Fatalf("failed to GetSockOptInt: %s", err) } return intval } // GetSockOptIntWithErrno calls getsockopt with an integer optval. -func (dut *DUT) GetSockOptIntWithErrno(ctx context.Context, sockfd, level, optname int32) (int32, int32, error) { - dut.t.Helper() - ret, optval, errno := dut.getSockOpt(ctx, sockfd, level, optname, 0, pb.GetSockOptRequest_INT) +func (dut *DUT) GetSockOptIntWithErrno(ctx context.Context, t *testing.T, sockfd, level, optname int32) (int32, int32, error) { + t.Helper() + + ret, optval, errno := dut.getSockOpt(ctx, t, sockfd, level, optname, 0, pb.GetSockOptRequest_INT) intval, ok := optval.Val.(*pb.SockOptVal_Intval) if !ok { - dut.t.Fatalf("GetSockOpt got value type: %T, want int", optval) + t.Fatalf("GetSockOpt got value type: %T, want int", optval.Val) } return ret, intval.Intval, errno } @@ -397,24 +419,26 @@ func (dut *DUT) GetSockOptIntWithErrno(ctx context.Context, sockfd, level, optna // GetSockOptTimeval calls getsockopt on the DUT and causes a fatal test failure // if it doesn't succeed. If more control over the timeout or error handling is // needed, use GetSockOptTimevalWithErrno. -func (dut *DUT) GetSockOptTimeval(sockfd, level, optname int32) unix.Timeval { - dut.t.Helper() +func (dut *DUT) GetSockOptTimeval(t *testing.T, sockfd, level, optname int32) unix.Timeval { + t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() - ret, timeval, err := dut.GetSockOptTimevalWithErrno(ctx, sockfd, level, optname) + ret, timeval, err := dut.GetSockOptTimevalWithErrno(ctx, t, sockfd, level, optname) if ret != 0 { - dut.t.Fatalf("failed to GetSockOptTimeval: %s", err) + t.Fatalf("failed to GetSockOptTimeval: %s", err) } return timeval } // GetSockOptTimevalWithErrno calls getsockopt and returns a timeval. -func (dut *DUT) GetSockOptTimevalWithErrno(ctx context.Context, sockfd, level, optname int32) (int32, unix.Timeval, error) { - dut.t.Helper() - ret, optval, errno := dut.getSockOpt(ctx, sockfd, level, optname, 0, pb.GetSockOptRequest_TIME) +func (dut *DUT) GetSockOptTimevalWithErrno(ctx context.Context, t *testing.T, sockfd, level, optname int32) (int32, unix.Timeval, error) { + t.Helper() + + ret, optval, errno := dut.getSockOpt(ctx, t, sockfd, level, optname, 0, pb.GetSockOptRequest_TIME) tv, ok := optval.Val.(*pb.SockOptVal_Timeval) if !ok { - dut.t.Fatalf("GetSockOpt got value type: %T, want timeval", optval) + t.Fatalf("GetSockOpt got value type: %T, want timeval", optval.Val) } timeval := unix.Timeval{ Sec: tv.Timeval.Seconds, @@ -426,26 +450,28 @@ func (dut *DUT) GetSockOptTimevalWithErrno(ctx context.Context, sockfd, level, o // Listen calls listen on the DUT and causes a fatal test failure if it doesn't // succeed. If more control over the timeout or error handling is needed, use // ListenWithErrno. -func (dut *DUT) Listen(sockfd, backlog int32) { - dut.t.Helper() +func (dut *DUT) Listen(t *testing.T, sockfd, backlog int32) { + t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() - ret, err := dut.ListenWithErrno(ctx, sockfd, backlog) + ret, err := dut.ListenWithErrno(ctx, t, sockfd, backlog) if ret != 0 { - dut.t.Fatalf("failed to listen: %s", err) + t.Fatalf("failed to listen: %s", err) } } // ListenWithErrno calls listen on the DUT. -func (dut *DUT) ListenWithErrno(ctx context.Context, sockfd, backlog int32) (int32, error) { - dut.t.Helper() +func (dut *DUT) ListenWithErrno(ctx context.Context, t *testing.T, sockfd, backlog int32) (int32, error) { + t.Helper() + req := pb.ListenRequest{ Sockfd: sockfd, Backlog: backlog, } resp, err := dut.posixServer.Listen(ctx, &req) if err != nil { - dut.t.Fatalf("failed to call Listen: %s", err) + t.Fatalf("failed to call Listen: %s", err) } return resp.GetRet(), syscall.Errno(resp.GetErrno_()) } @@ -453,20 +479,22 @@ func (dut *DUT) ListenWithErrno(ctx context.Context, sockfd, backlog int32) (int // Send calls send on the DUT and causes a fatal test failure if it doesn't // succeed. If more control over the timeout or error handling is needed, use // SendWithErrno. -func (dut *DUT) Send(sockfd int32, buf []byte, flags int32) int32 { - dut.t.Helper() +func (dut *DUT) Send(t *testing.T, sockfd int32, buf []byte, flags int32) int32 { + t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() - ret, err := dut.SendWithErrno(ctx, sockfd, buf, flags) + ret, err := dut.SendWithErrno(ctx, t, sockfd, buf, flags) if ret == -1 { - dut.t.Fatalf("failed to send: %s", err) + t.Fatalf("failed to send: %s", err) } return ret } // SendWithErrno calls send on the DUT. -func (dut *DUT) SendWithErrno(ctx context.Context, sockfd int32, buf []byte, flags int32) (int32, error) { - dut.t.Helper() +func (dut *DUT) SendWithErrno(ctx context.Context, t *testing.T, sockfd int32, buf []byte, flags int32) (int32, error) { + t.Helper() + req := pb.SendRequest{ Sockfd: sockfd, Buf: buf, @@ -474,7 +502,7 @@ func (dut *DUT) SendWithErrno(ctx context.Context, sockfd int32, buf []byte, fla } resp, err := dut.posixServer.Send(ctx, &req) if err != nil { - dut.t.Fatalf("failed to call Send: %s", err) + t.Fatalf("failed to call Send: %s", err) } return resp.GetRet(), syscall.Errno(resp.GetErrno_()) } @@ -482,48 +510,52 @@ func (dut *DUT) SendWithErrno(ctx context.Context, sockfd int32, buf []byte, fla // SendTo calls sendto on the DUT and causes a fatal test failure if it doesn't // succeed. If more control over the timeout or error handling is needed, use // SendToWithErrno. -func (dut *DUT) SendTo(sockfd int32, buf []byte, flags int32, destAddr unix.Sockaddr) int32 { - dut.t.Helper() +func (dut *DUT) SendTo(t *testing.T, sockfd int32, buf []byte, flags int32, destAddr unix.Sockaddr) int32 { + t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() - ret, err := dut.SendToWithErrno(ctx, sockfd, buf, flags, destAddr) + ret, err := dut.SendToWithErrno(ctx, t, sockfd, buf, flags, destAddr) if ret == -1 { - dut.t.Fatalf("failed to sendto: %s", err) + t.Fatalf("failed to sendto: %s", err) } return ret } // SendToWithErrno calls sendto on the DUT. -func (dut *DUT) SendToWithErrno(ctx context.Context, sockfd int32, buf []byte, flags int32, destAddr unix.Sockaddr) (int32, error) { - dut.t.Helper() +func (dut *DUT) SendToWithErrno(ctx context.Context, t *testing.T, sockfd int32, buf []byte, flags int32, destAddr unix.Sockaddr) (int32, error) { + t.Helper() + req := pb.SendToRequest{ Sockfd: sockfd, Buf: buf, Flags: flags, - DestAddr: dut.sockaddrToProto(destAddr), + DestAddr: dut.sockaddrToProto(t, destAddr), } resp, err := dut.posixServer.SendTo(ctx, &req) if err != nil { - dut.t.Fatalf("faled to call SendTo: %s", err) + t.Fatalf("faled to call SendTo: %s", err) } return resp.GetRet(), syscall.Errno(resp.GetErrno_()) } // SetNonBlocking will set O_NONBLOCK flag for fd if nonblocking // is true, otherwise it will clear the flag. -func (dut *DUT) SetNonBlocking(fd int32, nonblocking bool) { - dut.t.Helper() - flags := dut.Fcntl(fd, unix.F_GETFL, 0) +func (dut *DUT) SetNonBlocking(t *testing.T, fd int32, nonblocking bool) { + t.Helper() + + flags := dut.Fcntl(t, fd, unix.F_GETFL, 0) if nonblocking { flags |= unix.O_NONBLOCK } else { flags &= ^unix.O_NONBLOCK } - dut.Fcntl(fd, unix.F_SETFL, flags) + dut.Fcntl(t, fd, unix.F_SETFL, flags) } -func (dut *DUT) setSockOpt(ctx context.Context, sockfd, level, optname int32, optval *pb.SockOptVal) (int32, error) { - dut.t.Helper() +func (dut *DUT) setSockOpt(ctx context.Context, t *testing.T, sockfd, level, optname int32, optval *pb.SockOptVal) (int32, error) { + t.Helper() + req := pb.SetSockOptRequest{ Sockfd: sockfd, Level: level, @@ -532,7 +564,7 @@ func (dut *DUT) setSockOpt(ctx context.Context, sockfd, level, optname int32, op } resp, err := dut.posixServer.SetSockOpt(ctx, &req) if err != nil { - dut.t.Fatalf("failed to call SetSockOpt: %s", err) + t.Fatalf("failed to call SetSockOpt: %s", err) } return resp.GetRet(), syscall.Errno(resp.GetErrno_()) } @@ -542,81 +574,89 @@ func (dut *DUT) setSockOpt(ctx context.Context, sockfd, level, optname int32, op // needed, use SetSockOptWithErrno. Because endianess and the width of values // might differ between the testbench and DUT architectures, prefer to use a // more specific SetSockOptXxx function. -func (dut *DUT) SetSockOpt(sockfd, level, optname int32, optval []byte) { - dut.t.Helper() +func (dut *DUT) SetSockOpt(t *testing.T, sockfd, level, optname int32, optval []byte) { + t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() - ret, err := dut.SetSockOptWithErrno(ctx, sockfd, level, optname, optval) + ret, err := dut.SetSockOptWithErrno(ctx, t, sockfd, level, optname, optval) if ret != 0 { - dut.t.Fatalf("failed to SetSockOpt: %s", err) + t.Fatalf("failed to SetSockOpt: %s", err) } } // SetSockOptWithErrno calls setsockopt on the DUT. Because endianess and the // width of values might differ between the testbench and DUT architectures, // prefer to use a more specific SetSockOptXxxWithErrno function. -func (dut *DUT) SetSockOptWithErrno(ctx context.Context, sockfd, level, optname int32, optval []byte) (int32, error) { - dut.t.Helper() - return dut.setSockOpt(ctx, sockfd, level, optname, &pb.SockOptVal{Val: &pb.SockOptVal_Bytesval{optval}}) +func (dut *DUT) SetSockOptWithErrno(ctx context.Context, t *testing.T, sockfd, level, optname int32, optval []byte) (int32, error) { + t.Helper() + + return dut.setSockOpt(ctx, t, sockfd, level, optname, &pb.SockOptVal{Val: &pb.SockOptVal_Bytesval{optval}}) } // SetSockOptInt calls setsockopt on the DUT and causes a fatal test failure // if it doesn't succeed. If more control over the int optval or error handling // is needed, use SetSockOptIntWithErrno. -func (dut *DUT) SetSockOptInt(sockfd, level, optname, optval int32) { - dut.t.Helper() +func (dut *DUT) SetSockOptInt(t *testing.T, sockfd, level, optname, optval int32) { + t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() - ret, err := dut.SetSockOptIntWithErrno(ctx, sockfd, level, optname, optval) + ret, err := dut.SetSockOptIntWithErrno(ctx, t, sockfd, level, optname, optval) if ret != 0 { - dut.t.Fatalf("failed to SetSockOptInt: %s", err) + t.Fatalf("failed to SetSockOptInt: %s", err) } } // SetSockOptIntWithErrno calls setsockopt with an integer optval. -func (dut *DUT) SetSockOptIntWithErrno(ctx context.Context, sockfd, level, optname, optval int32) (int32, error) { - dut.t.Helper() - return dut.setSockOpt(ctx, sockfd, level, optname, &pb.SockOptVal{Val: &pb.SockOptVal_Intval{optval}}) +func (dut *DUT) SetSockOptIntWithErrno(ctx context.Context, t *testing.T, sockfd, level, optname, optval int32) (int32, error) { + t.Helper() + + return dut.setSockOpt(ctx, t, sockfd, level, optname, &pb.SockOptVal{Val: &pb.SockOptVal_Intval{optval}}) } // SetSockOptTimeval calls setsockopt on the DUT and causes a fatal test failure // if it doesn't succeed. If more control over the timeout or error handling is // needed, use SetSockOptTimevalWithErrno. -func (dut *DUT) SetSockOptTimeval(sockfd, level, optname int32, tv *unix.Timeval) { - dut.t.Helper() +func (dut *DUT) SetSockOptTimeval(t *testing.T, sockfd, level, optname int32, tv *unix.Timeval) { + t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() - ret, err := dut.SetSockOptTimevalWithErrno(ctx, sockfd, level, optname, tv) + ret, err := dut.SetSockOptTimevalWithErrno(ctx, t, sockfd, level, optname, tv) if ret != 0 { - dut.t.Fatalf("failed to SetSockOptTimeval: %s", err) + t.Fatalf("failed to SetSockOptTimeval: %s", err) } } // SetSockOptTimevalWithErrno calls setsockopt with the timeval converted to // bytes. -func (dut *DUT) SetSockOptTimevalWithErrno(ctx context.Context, sockfd, level, optname int32, tv *unix.Timeval) (int32, error) { - dut.t.Helper() +func (dut *DUT) SetSockOptTimevalWithErrno(ctx context.Context, t *testing.T, sockfd, level, optname int32, tv *unix.Timeval) (int32, error) { + t.Helper() + timeval := pb.Timeval{ Seconds: int64(tv.Sec), Microseconds: int64(tv.Usec), } - return dut.setSockOpt(ctx, sockfd, level, optname, &pb.SockOptVal{Val: &pb.SockOptVal_Timeval{&timeval}}) + return dut.setSockOpt(ctx, t, sockfd, level, optname, &pb.SockOptVal{Val: &pb.SockOptVal_Timeval{&timeval}}) } // Socket calls socket on the DUT and returns the file descriptor. If socket // fails on the DUT, the test ends. -func (dut *DUT) Socket(domain, typ, proto int32) int32 { - dut.t.Helper() - fd, err := dut.SocketWithErrno(domain, typ, proto) +func (dut *DUT) Socket(t *testing.T, domain, typ, proto int32) int32 { + t.Helper() + + fd, err := dut.SocketWithErrno(t, domain, typ, proto) if fd < 0 { - dut.t.Fatalf("failed to create socket: %s", err) + t.Fatalf("failed to create socket: %s", err) } return fd } // SocketWithErrno calls socket on the DUT and returns the fd and errno. -func (dut *DUT) SocketWithErrno(domain, typ, proto int32) (int32, error) { - dut.t.Helper() +func (dut *DUT) SocketWithErrno(t *testing.T, domain, typ, proto int32) (int32, error) { + t.Helper() + req := pb.SocketRequest{ Domain: domain, Type: typ, @@ -625,7 +665,7 @@ func (dut *DUT) SocketWithErrno(domain, typ, proto int32) (int32, error) { ctx := context.Background() resp, err := dut.posixServer.Socket(ctx, &req) if err != nil { - dut.t.Fatalf("failed to call Socket: %s", err) + t.Fatalf("failed to call Socket: %s", err) } return resp.GetFd(), syscall.Errno(resp.GetErrno_()) } @@ -633,20 +673,22 @@ func (dut *DUT) SocketWithErrno(domain, typ, proto int32) (int32, error) { // Recv calls recv on the DUT and causes a fatal test failure if it doesn't // succeed. If more control over the timeout or error handling is needed, use // RecvWithErrno. -func (dut *DUT) Recv(sockfd, len, flags int32) []byte { - dut.t.Helper() +func (dut *DUT) Recv(t *testing.T, sockfd, len, flags int32) []byte { + t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout) defer cancel() - ret, buf, err := dut.RecvWithErrno(ctx, sockfd, len, flags) + ret, buf, err := dut.RecvWithErrno(ctx, t, sockfd, len, flags) if ret == -1 { - dut.t.Fatalf("failed to recv: %s", err) + t.Fatalf("failed to recv: %s", err) } return buf } // RecvWithErrno calls recv on the DUT. -func (dut *DUT) RecvWithErrno(ctx context.Context, sockfd, len, flags int32) (int32, []byte, error) { - dut.t.Helper() +func (dut *DUT) RecvWithErrno(ctx context.Context, t *testing.T, sockfd, len, flags int32) (int32, []byte, error) { + t.Helper() + req := pb.RecvRequest{ Sockfd: sockfd, Len: len, @@ -654,7 +696,7 @@ func (dut *DUT) RecvWithErrno(ctx context.Context, sockfd, len, flags int32) (in } resp, err := dut.posixServer.Recv(ctx, &req) if err != nil { - dut.t.Fatalf("failed to call Recv: %s", err) + t.Fatalf("failed to call Recv: %s", err) } return resp.GetRet(), resp.GetBuf(), syscall.Errno(resp.GetErrno_()) } diff --git a/test/packetimpact/testbench/rawsockets.go b/test/packetimpact/testbench/rawsockets.go index 278229b7e..57e822725 100644 --- a/test/packetimpact/testbench/rawsockets.go +++ b/test/packetimpact/testbench/rawsockets.go @@ -28,7 +28,6 @@ import ( // Sniffer can sniff raw packets on the wire. type Sniffer struct { - t *testing.T fd int } @@ -40,6 +39,8 @@ func htons(x uint16) uint16 { // NewSniffer creates a Sniffer connected to *device. func NewSniffer(t *testing.T) (Sniffer, error) { + t.Helper() + snifferFd, err := unix.Socket(unix.AF_PACKET, unix.SOCK_RAW, int(htons(unix.ETH_P_ALL))) if err != nil { return Sniffer{}, err @@ -51,7 +52,6 @@ func NewSniffer(t *testing.T) (Sniffer, error) { t.Fatalf("can't setsockopt SO_RCVBUF to 10M: %s", err) } return Sniffer{ - t: t, fd: snifferFd, }, nil } @@ -61,7 +61,9 @@ func NewSniffer(t *testing.T) (Sniffer, error) { const maxReadSize int = 65536 // Recv tries to read one frame until the timeout is up. -func (s *Sniffer) Recv(timeout time.Duration) []byte { +func (s *Sniffer) Recv(t *testing.T, timeout time.Duration) []byte { + t.Helper() + deadline := time.Now().Add(timeout) for { timeout = deadline.Sub(time.Now()) @@ -75,7 +77,7 @@ func (s *Sniffer) Recv(timeout time.Duration) []byte { } if err := unix.SetsockoptTimeval(s.fd, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &tv); err != nil { - s.t.Fatalf("can't setsockopt SO_RCVTIMEO: %s", err) + t.Fatalf("can't setsockopt SO_RCVTIMEO: %s", err) } buf := make([]byte, maxReadSize) @@ -85,10 +87,10 @@ func (s *Sniffer) Recv(timeout time.Duration) []byte { continue } if err != nil { - s.t.Fatalf("can't read: %s", err) + t.Fatalf("can't read: %s", err) } if nread > maxReadSize { - s.t.Fatalf("received a truncated frame of %d bytes", nread) + t.Fatalf("received a truncated frame of %d bytes, want at most %d bytes", nread, maxReadSize) } return buf[:nread] } @@ -96,14 +98,16 @@ func (s *Sniffer) Recv(timeout time.Duration) []byte { // Drain drains the Sniffer's socket receive buffer by receiving until there's // nothing else to receive. -func (s *Sniffer) Drain() { - s.t.Helper() +func (s *Sniffer) Drain(t *testing.T) { + t.Helper() + flags, err := unix.FcntlInt(uintptr(s.fd), unix.F_GETFL, 0) if err != nil { - s.t.Fatalf("failed to get sniffer socket fd flags: %s", err) + t.Fatalf("failed to get sniffer socket fd flags: %s", err) } - if _, err := unix.FcntlInt(uintptr(s.fd), unix.F_SETFL, flags|unix.O_NONBLOCK); err != nil { - s.t.Fatalf("failed to make sniffer socket non-blocking: %s", err) + nonBlockingFlags := flags | unix.O_NONBLOCK + if _, err := unix.FcntlInt(uintptr(s.fd), unix.F_SETFL, nonBlockingFlags); err != nil { + t.Fatalf("failed to make sniffer socket non-blocking with flags %b: %s", nonBlockingFlags, err) } for { buf := make([]byte, maxReadSize) @@ -113,7 +117,7 @@ func (s *Sniffer) Drain() { } } if _, err := unix.FcntlInt(uintptr(s.fd), unix.F_SETFL, flags); err != nil { - s.t.Fatalf("failed to restore sniffer socket fd flags: %s", err) + t.Fatalf("failed to restore sniffer socket fd flags to %b: %s", flags, err) } } @@ -128,12 +132,13 @@ func (s *Sniffer) close() error { // Injector can inject raw frames. type Injector struct { - t *testing.T fd int } // NewInjector creates a new injector on *device. func NewInjector(t *testing.T) (Injector, error) { + t.Helper() + ifInfo, err := net.InterfaceByName(Device) if err != nil { return Injector{}, err @@ -156,15 +161,20 @@ func NewInjector(t *testing.T) (Injector, error) { return Injector{}, err } return Injector{ - t: t, fd: injectFd, }, nil } // Send a raw frame. -func (i *Injector) Send(b []byte) { - if _, err := unix.Write(i.fd, b); err != nil { - i.t.Fatalf("can't write: %s of len %d", err, len(b)) +func (i *Injector) Send(t *testing.T, b []byte) { + t.Helper() + + n, err := unix.Write(i.fd, b) + if err != nil { + t.Fatalf("can't write bytes of len %d: %s", len(b), err) + } + if n != len(b) { + t.Fatalf("got %d bytes written, want %d", n, len(b)) } } diff --git a/test/packetimpact/tests/fin_wait2_timeout_test.go b/test/packetimpact/tests/fin_wait2_timeout_test.go index 407565078..a61054c2c 100644 --- a/test/packetimpact/tests/fin_wait2_timeout_test.go +++ b/test/packetimpact/tests/fin_wait2_timeout_test.go @@ -39,34 +39,34 @@ func TestFinWait2Timeout(t *testing.T) { t.Run(tt.description, func(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) - defer dut.Close(listenFd) + listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(t, listenFd) conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) - defer conn.Close() - conn.Connect() + defer conn.Close(t) + conn.Connect(t) - acceptFd, _ := dut.Accept(listenFd) + acceptFd, _ := dut.Accept(t, listenFd) if tt.linger2 { tv := unix.Timeval{Sec: 1, Usec: 0} - dut.SetSockOptTimeval(acceptFd, unix.SOL_TCP, unix.TCP_LINGER2, &tv) + dut.SetSockOptTimeval(t, acceptFd, unix.SOL_TCP, unix.TCP_LINGER2, &tv) } - dut.Close(acceptFd) + dut.Close(t, acceptFd) - if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil { + if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil { t.Fatalf("expected a FIN-ACK within 1 second but got none: %s", err) } - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) time.Sleep(5 * time.Second) - conn.Drain() + conn.Drain(t) - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) if tt.linger2 { - if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, time.Second); err != nil { + if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, time.Second); err != nil { t.Fatalf("expected a RST packet within a second but got none: %s", err) } } else { - if got, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, 10*time.Second); got != nil || err == nil { + if got, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, 10*time.Second); got != nil || err == nil { t.Fatalf("expected no RST packets within ten seconds but got one: %s", got) } } diff --git a/test/packetimpact/tests/icmpv6_param_problem_test.go b/test/packetimpact/tests/icmpv6_param_problem_test.go index 8dfd26ee8..2d59d552d 100644 --- a/test/packetimpact/tests/icmpv6_param_problem_test.go +++ b/test/packetimpact/tests/icmpv6_param_problem_test.go @@ -34,7 +34,7 @@ func TestICMPv6ParamProblemTest(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() conn := testbench.NewIPv6Conn(t, testbench.IPv6{}, testbench.IPv6{}) - defer conn.Close() + defer conn.Close(t) ipv6 := testbench.IPv6{ // 254 is reserved and used for experimentation and testing. This should // cause an error. @@ -45,8 +45,8 @@ func TestICMPv6ParamProblemTest(t *testing.T) { Payload: []byte("hello world"), } - toSend := (*testbench.Connection)(&conn).CreateFrame(testbench.Layers{&ipv6}, &icmpv6) - (*testbench.Connection)(&conn).SendFrame(toSend) + toSend := (*testbench.Connection)(&conn).CreateFrame(t, testbench.Layers{&ipv6}, &icmpv6) + (*testbench.Connection)(&conn).SendFrame(t, toSend) // Build the expected ICMPv6 payload, which includes an index to the // problematic byte and also the problematic packet as described in @@ -72,7 +72,7 @@ func TestICMPv6ParamProblemTest(t *testing.T) { &expectedICMPv6, } timeout := time.Second - if _, err := conn.ExpectFrame(paramProblem, timeout); err != nil { + if _, err := conn.ExpectFrame(t, paramProblem, timeout); err != nil { t.Errorf("expected %s within %s but got none: %s", paramProblem, timeout, err) } } diff --git a/test/packetimpact/tests/ipv4_id_uniqueness_test.go b/test/packetimpact/tests/ipv4_id_uniqueness_test.go index 70f6df5e0..cf881418c 100644 --- a/test/packetimpact/tests/ipv4_id_uniqueness_test.go +++ b/test/packetimpact/tests/ipv4_id_uniqueness_test.go @@ -31,8 +31,8 @@ func init() { testbench.RegisterFlags(flag.CommandLine) } -func recvTCPSegment(conn *testbench.TCPIPv4, expect *testbench.TCP, expectPayload *testbench.Payload) (uint16, error) { - layers, err := conn.ExpectData(expect, expectPayload, time.Second) +func recvTCPSegment(t *testing.T, conn *testbench.TCPIPv4, expect *testbench.TCP, expectPayload *testbench.Payload) (uint16, error) { + layers, err := conn.ExpectData(t, expect, expectPayload, time.Second) if err != nil { return 0, fmt.Errorf("failed to receive TCP segment: %s", err) } @@ -69,17 +69,17 @@ func TestIPv4RetransmitIdentificationUniqueness(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) - defer dut.Close(listenFD) + listenFD, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(t, listenFD) conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) - defer conn.Close() + defer conn.Close(t) - conn.Connect() - remoteFD, _ := dut.Accept(listenFD) - defer dut.Close(remoteFD) + conn.Connect(t) + remoteFD, _ := dut.Accept(t, listenFD) + defer dut.Close(t, remoteFD) - dut.SetSockOptInt(remoteFD, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) + dut.SetSockOptInt(t, remoteFD, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) // TODO(b/129291778) The following socket option clears the DF bit on // IP packets sent over the socket, and is currently not supported by @@ -87,30 +87,30 @@ func TestIPv4RetransmitIdentificationUniqueness(t *testing.T) { // socket option being not supported does not affect the operation of // this test. Once the socket option is supported, the following call // can be changed to simply assert success. - ret, errno := dut.SetSockOptIntWithErrno(context.Background(), remoteFD, unix.IPPROTO_IP, linux.IP_MTU_DISCOVER, linux.IP_PMTUDISC_DONT) + ret, errno := dut.SetSockOptIntWithErrno(context.Background(), t, remoteFD, unix.IPPROTO_IP, linux.IP_MTU_DISCOVER, linux.IP_PMTUDISC_DONT) if ret == -1 && errno != unix.ENOTSUP { t.Fatalf("failed to set IP_MTU_DISCOVER socket option to IP_PMTUDISC_DONT: %s", errno) } samplePayload := &testbench.Payload{Bytes: tc.payload} - dut.Send(remoteFD, tc.payload, 0) - if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { + dut.Send(t, remoteFD, tc.payload, 0) + if _, err := conn.ExpectData(t, &testbench.TCP{}, samplePayload, time.Second); err != nil { t.Fatalf("failed to receive TCP segment sent for RTT calculation: %s", err) } // Let the DUT estimate RTO with RTT from the DATA-ACK. // TODO(gvisor.dev/issue/2685) Estimate RTO during handshake, after which // we can skip sending this ACK. - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) - dut.Send(remoteFD, tc.payload, 0) - expectTCP := &testbench.TCP{SeqNum: testbench.Uint32(uint32(*conn.RemoteSeqNum()))} - originalID, err := recvTCPSegment(&conn, expectTCP, samplePayload) + dut.Send(t, remoteFD, tc.payload, 0) + expectTCP := &testbench.TCP{SeqNum: testbench.Uint32(uint32(*conn.RemoteSeqNum(t)))} + originalID, err := recvTCPSegment(t, &conn, expectTCP, samplePayload) if err != nil { t.Fatalf("failed to receive TCP segment: %s", err) } - retransmitID, err := recvTCPSegment(&conn, expectTCP, samplePayload) + retransmitID, err := recvTCPSegment(t, &conn, expectTCP, samplePayload) if err != nil { t.Fatalf("failed to receive retransmitted TCP segment: %s", err) } diff --git a/test/packetimpact/tests/ipv6_fragment_reassembly_test.go b/test/packetimpact/tests/ipv6_fragment_reassembly_test.go index 7b462c8e2..b5f94ad4b 100644 --- a/test/packetimpact/tests/ipv6_fragment_reassembly_test.go +++ b/test/packetimpact/tests/ipv6_fragment_reassembly_test.go @@ -48,7 +48,7 @@ func TestIPv6FragmentReassembly(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() conn := testbench.NewIPv6Conn(t, testbench.IPv6{}, testbench.IPv6{}) - defer conn.Close() + defer conn.Close(t) firstPayloadToSend := make([]byte, firstPayloadLength) for i := range firstPayloadToSend { @@ -81,7 +81,7 @@ func TestIPv6FragmentReassembly(t *testing.T) { buffer.NewVectorisedView(len(secondPayloadToSend), []buffer.View{secondPayloadToSend}), ) - conn.Send(testbench.IPv6{}, + conn.Send(t, testbench.IPv6{}, &testbench.IPv6FragmentExtHdr{ FragmentOffset: testbench.Uint16(0), MoreFragments: testbench.Bool(true), @@ -96,7 +96,7 @@ func TestIPv6FragmentReassembly(t *testing.T) { icmpv6ProtoNum := header.IPv6ExtensionHeaderIdentifier(header.ICMPv6ProtocolNumber) - conn.Send(testbench.IPv6{}, + conn.Send(t, testbench.IPv6{}, &testbench.IPv6FragmentExtHdr{ NextHeader: &icmpv6ProtoNum, FragmentOffset: testbench.Uint16((firstPayloadLength + header.ICMPv6EchoMinimumSize) / 8), @@ -107,7 +107,7 @@ func TestIPv6FragmentReassembly(t *testing.T) { Bytes: secondPayloadToSend, }) - gotEchoReplyFirstPart, err := conn.ExpectFrame(testbench.Layers{ + gotEchoReplyFirstPart, err := conn.ExpectFrame(t, testbench.Layers{ &testbench.Ether{}, &testbench.IPv6{}, &testbench.IPv6FragmentExtHdr{ @@ -142,7 +142,7 @@ func TestIPv6FragmentReassembly(t *testing.T) { hex.Dump(wantFirstPayload)) } - gotEchoReplySecondPart, err := conn.ExpectFrame(testbench.Layers{ + gotEchoReplySecondPart, err := conn.ExpectFrame(t, testbench.Layers{ &testbench.Ether{}, &testbench.IPv6{}, &testbench.IPv6FragmentExtHdr{ diff --git a/test/packetimpact/tests/ipv6_unknown_options_action_test.go b/test/packetimpact/tests/ipv6_unknown_options_action_test.go index 100b30ad7..d7d63cbd2 100644 --- a/test/packetimpact/tests/ipv6_unknown_options_action_test.go +++ b/test/packetimpact/tests/ipv6_unknown_options_action_test.go @@ -23,21 +23,21 @@ import ( "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" - tb "gvisor.dev/gvisor/test/packetimpact/testbench" + "gvisor.dev/gvisor/test/packetimpact/testbench" ) func init() { - tb.RegisterFlags(flag.CommandLine) + testbench.RegisterFlags(flag.CommandLine) } -func mkHopByHopOptionsExtHdr(optType byte) tb.Layer { - return &tb.IPv6HopByHopOptionsExtHdr{ +func mkHopByHopOptionsExtHdr(optType byte) testbench.Layer { + return &testbench.IPv6HopByHopOptionsExtHdr{ Options: []byte{optType, 0x04, 0x00, 0x00, 0x00, 0x00}, } } -func mkDestinationOptionsExtHdr(optType byte) tb.Layer { - return &tb.IPv6DestinationOptionsExtHdr{ +func mkDestinationOptionsExtHdr(optType byte) testbench.Layer { + return &testbench.IPv6DestinationOptionsExtHdr{ Options: []byte{optType, 0x04, 0x00, 0x00, 0x00, 0x00}, } } @@ -49,7 +49,7 @@ func optionTypeFromAction(action header.IPv6OptionUnknownAction) byte { func TestIPv6UnknownOptionAction(t *testing.T) { for _, tt := range []struct { description string - mkExtHdr func(optType byte) tb.Layer + mkExtHdr func(optType byte) testbench.Layer action header.IPv6OptionUnknownAction multicastDst bool wantICMPv6 bool @@ -140,21 +140,21 @@ func TestIPv6UnknownOptionAction(t *testing.T) { }, } { t.Run(tt.description, func(t *testing.T) { - dut := tb.NewDUT(t) + dut := testbench.NewDUT(t) defer dut.TearDown() - ipv6Conn := tb.NewIPv6Conn(t, tb.IPv6{}, tb.IPv6{}) - conn := (*tb.Connection)(&ipv6Conn) - defer ipv6Conn.Close() + ipv6Conn := testbench.NewIPv6Conn(t, testbench.IPv6{}, testbench.IPv6{}) + conn := (*testbench.Connection)(&ipv6Conn) + defer ipv6Conn.Close(t) - outgoingOverride := tb.Layers{} + outgoingOverride := testbench.Layers{} if tt.multicastDst { - outgoingOverride = tb.Layers{&tb.IPv6{ - DstAddr: tb.Address(tcpip.Address(net.ParseIP("ff02::1"))), + outgoingOverride = testbench.Layers{&testbench.IPv6{ + DstAddr: testbench.Address(tcpip.Address(net.ParseIP("ff02::1"))), }} } - outgoing := conn.CreateFrame(outgoingOverride, tt.mkExtHdr(optionTypeFromAction(tt.action))) - conn.SendFrame(outgoing) + outgoing := conn.CreateFrame(t, outgoingOverride, tt.mkExtHdr(optionTypeFromAction(tt.action))) + conn.SendFrame(t, outgoing) ipv6Sent := outgoing[1:] invokingPacket, err := ipv6Sent.ToBytes() if err != nil { @@ -167,12 +167,12 @@ func TestIPv6UnknownOptionAction(t *testing.T) { // after the IPv6 header (after NextHeader and ExtHdrLen). binary.BigEndian.PutUint32(icmpv6Payload, header.IPv6MinimumSize+2) icmpv6Payload = append(icmpv6Payload, invokingPacket...) - gotICMPv6, err := ipv6Conn.ExpectFrame(tb.Layers{ - &tb.Ether{}, - &tb.IPv6{}, - &tb.ICMPv6{ - Type: tb.ICMPv6Type(header.ICMPv6ParamProblem), - Code: tb.Byte(2), + gotICMPv6, err := ipv6Conn.ExpectFrame(t, testbench.Layers{ + &testbench.Ether{}, + &testbench.IPv6{}, + &testbench.ICMPv6{ + Type: testbench.ICMPv6Type(header.ICMPv6ParamProblem), + Code: testbench.Byte(2), Payload: icmpv6Payload, }, }, time.Second) diff --git a/test/packetimpact/tests/tcp_close_wait_ack_test.go b/test/packetimpact/tests/tcp_close_wait_ack_test.go index 6e7ff41d7..e6a96f214 100644 --- a/test/packetimpact/tests/tcp_close_wait_ack_test.go +++ b/test/packetimpact/tests/tcp_close_wait_ack_test.go @@ -33,39 +33,39 @@ func init() { func TestCloseWaitAck(t *testing.T) { for _, tt := range []struct { description string - makeTestingTCP func(conn *testbench.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) testbench.TCP + makeTestingTCP func(t *testing.T, conn *testbench.TCPIPv4, seqNumOffset, windowSize seqnum.Size) testbench.TCP seqNumOffset seqnum.Size expectAck bool }{ - {"OTW", GenerateOTWSeqSegment, 0, false}, - {"OTW", GenerateOTWSeqSegment, 1, true}, - {"OTW", GenerateOTWSeqSegment, 2, true}, - {"ACK", GenerateUnaccACKSegment, 0, false}, - {"ACK", GenerateUnaccACKSegment, 1, true}, - {"ACK", GenerateUnaccACKSegment, 2, true}, + {"OTW", generateOTWSeqSegment, 0, false}, + {"OTW", generateOTWSeqSegment, 1, true}, + {"OTW", generateOTWSeqSegment, 2, true}, + {"ACK", generateUnaccACKSegment, 0, false}, + {"ACK", generateUnaccACKSegment, 1, true}, + {"ACK", generateUnaccACKSegment, 2, true}, } { t.Run(fmt.Sprintf("%s%d", tt.description, tt.seqNumOffset), func(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) - defer dut.Close(listenFd) + listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(t, listenFd) conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) - defer conn.Close() + defer conn.Close(t) - conn.Connect() - acceptFd, _ := dut.Accept(listenFd) + conn.Connect(t) + acceptFd, _ := dut.Accept(t, listenFd) // Send a FIN to DUT to intiate the active close - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagFin)}) - gotTCP, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagFin)}) + gotTCP, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second) if err != nil { t.Fatalf("expected an ACK for our fin and DUT should enter CLOSE_WAIT: %s", err) } windowSize := seqnum.Size(*gotTCP.WindowSize) // Send a segment with OTW Seq / unacc ACK and expect an ACK back - conn.Send(tt.makeTestingTCP(&conn, tt.seqNumOffset, windowSize), &testbench.Payload{Bytes: []byte("Sample Data")}) - gotAck, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second) + conn.Send(t, tt.makeTestingTCP(t, &conn, tt.seqNumOffset, windowSize), &testbench.Payload{Bytes: []byte("Sample Data")}) + gotAck, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second) if tt.expectAck && err != nil { t.Fatalf("expected an ack but got none: %s", err) } @@ -74,35 +74,36 @@ func TestCloseWaitAck(t *testing.T) { } // Now let's verify DUT is indeed in CLOSE_WAIT - dut.Close(acceptFd) - if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagFin)}, time.Second); err != nil { + dut.Close(t, acceptFd) + if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagFin)}, time.Second); err != nil { t.Fatalf("expected DUT to send a FIN: %s", err) } // Ack the FIN from DUT - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) // Send some extra data to DUT - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: []byte("Sample Data")}) - if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, time.Second); err != nil { + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: []byte("Sample Data")}) + if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, time.Second); err != nil { t.Fatalf("expected DUT to send an RST: %s", err) } }) } } -// This generates an segment with seqnum = RCV.NXT + RCV.WND + seqNumOffset, the -// generated segment is only acceptable when seqNumOffset is 0, otherwise an ACK -// is expected from the receiver. -func GenerateOTWSeqSegment(conn *testbench.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) testbench.TCP { - lastAcceptable := conn.LocalSeqNum().Add(windowSize) +// generateOTWSeqSegment generates an segment with +// seqnum = RCV.NXT + RCV.WND + seqNumOffset, the generated segment is only +// acceptable when seqNumOffset is 0, otherwise an ACK is expected from the +// receiver. +func generateOTWSeqSegment(t *testing.T, conn *testbench.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) testbench.TCP { + lastAcceptable := conn.LocalSeqNum(t).Add(windowSize) otwSeq := uint32(lastAcceptable.Add(seqNumOffset)) return testbench.TCP{SeqNum: testbench.Uint32(otwSeq), Flags: testbench.Uint8(header.TCPFlagAck)} } -// This generates an segment with acknum = SND.NXT + seqNumOffset, the generated -// segment is only acceptable when seqNumOffset is 0, otherwise an ACK is -// expected from the receiver. -func GenerateUnaccACKSegment(conn *testbench.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) testbench.TCP { - lastAcceptable := conn.RemoteSeqNum() +// generateUnaccACKSegment generates an segment with +// acknum = SND.NXT + seqNumOffset, the generated segment is only acceptable +// when seqNumOffset is 0, otherwise an ACK is expected from the receiver. +func generateUnaccACKSegment(t *testing.T, conn *testbench.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) testbench.TCP { + lastAcceptable := conn.RemoteSeqNum(t) unaccAck := uint32(lastAcceptable.Add(seqNumOffset)) return testbench.TCP{AckNum: testbench.Uint32(unaccAck), Flags: testbench.Uint8(header.TCPFlagAck)} } diff --git a/test/packetimpact/tests/tcp_cork_mss_test.go b/test/packetimpact/tests/tcp_cork_mss_test.go index fb8f48629..8feea4a82 100644 --- a/test/packetimpact/tests/tcp_cork_mss_test.go +++ b/test/packetimpact/tests/tcp_cork_mss_test.go @@ -32,53 +32,53 @@ func init() { func TestTCPCorkMSS(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) - defer dut.Close(listenFD) + listenFD, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(t, listenFD) conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) - defer conn.Close() + defer conn.Close(t) const mss = uint32(header.TCPDefaultMSS) options := make([]byte, header.TCPOptionMSSLength) header.EncodeMSSOption(mss, options) - conn.ConnectWithOptions(options) + conn.ConnectWithOptions(t, options) - acceptFD, _ := dut.Accept(listenFD) - defer dut.Close(acceptFD) + acceptFD, _ := dut.Accept(t, listenFD) + defer dut.Close(t, acceptFD) - dut.SetSockOptInt(acceptFD, unix.IPPROTO_TCP, unix.TCP_CORK, 1) + dut.SetSockOptInt(t, acceptFD, unix.IPPROTO_TCP, unix.TCP_CORK, 1) // Let the dut application send 2 small segments to be held up and coalesced // until the application sends a larger segment to fill up to > MSS. sampleData := []byte("Sample Data") - dut.Send(acceptFD, sampleData, 0) - dut.Send(acceptFD, sampleData, 0) + dut.Send(t, acceptFD, sampleData, 0) + dut.Send(t, acceptFD, sampleData, 0) expectedData := sampleData expectedData = append(expectedData, sampleData...) largeData := make([]byte, mss+1) expectedData = append(expectedData, largeData...) - dut.Send(acceptFD, largeData, 0) + dut.Send(t, acceptFD, largeData, 0) // Expect the segments to be coalesced and sent and capped to MSS. expectedPayload := testbench.Payload{Bytes: expectedData[:mss]} - if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &expectedPayload, time.Second); err != nil { + if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &expectedPayload, time.Second); err != nil { t.Fatalf("expected payload was not received: %s", err) } - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) // Expect the coalesced segment to be split and transmitted. expectedPayload = testbench.Payload{Bytes: expectedData[mss:]} - if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &expectedPayload, time.Second); err != nil { + if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &expectedPayload, time.Second); err != nil { t.Fatalf("expected payload was not received: %s", err) } // Check for segments to *not* be held up because of TCP_CORK when // the current send window is less than MSS. - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(uint16(2 * len(sampleData)))}) - dut.Send(acceptFD, sampleData, 0) - dut.Send(acceptFD, sampleData, 0) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(uint16(2 * len(sampleData)))}) + dut.Send(t, acceptFD, sampleData, 0) + dut.Send(t, acceptFD, sampleData, 0) expectedPayload = testbench.Payload{Bytes: append(sampleData, sampleData...)} - if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &expectedPayload, time.Second); err != nil { + if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &expectedPayload, time.Second); err != nil { t.Fatalf("expected payload was not received: %s", err) } - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) } diff --git a/test/packetimpact/tests/tcp_handshake_window_size_test.go b/test/packetimpact/tests/tcp_handshake_window_size_test.go index 652b530d0..22937d92f 100644 --- a/test/packetimpact/tests/tcp_handshake_window_size_test.go +++ b/test/packetimpact/tests/tcp_handshake_window_size_test.go @@ -33,14 +33,14 @@ func init() { func TestTCPHandshakeWindowSize(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) - defer dut.Close(listenFD) + listenFD, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(t, listenFD) conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) - defer conn.Close() + defer conn.Close(t) // Start handshake with zero window size. - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn), WindowSize: testbench.Uint16(uint16(0))}) - if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil { + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn), WindowSize: testbench.Uint16(uint16(0))}) + if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil { t.Fatalf("expected SYN-ACK: %s", err) } // Update the advertised window size to a non-zero value with the ACK that @@ -48,10 +48,10 @@ func TestTCPHandshakeWindowSize(t *testing.T) { // // Set the window size with MSB set and expect the dut to treat it as // an unsigned value. - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(uint16(1 << 15))}) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(uint16(1 << 15))}) - acceptFd, _ := dut.Accept(listenFD) - defer dut.Close(acceptFd) + acceptFd, _ := dut.Accept(t, listenFD) + defer dut.Close(t, acceptFd) sampleData := []byte("Sample Data") samplePayload := &testbench.Payload{Bytes: sampleData} @@ -59,8 +59,8 @@ func TestTCPHandshakeWindowSize(t *testing.T) { // Since we advertised a zero window followed by a non-zero window, // expect the dut to honor the recently advertised non-zero window // and actually send out the data instead of probing for zero window. - dut.Send(acceptFd, sampleData, 0) - if _, err := conn.ExpectNextData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload, time.Second); err != nil { + dut.Send(t, acceptFd, sampleData, 0) + if _, err := conn.ExpectNextData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload, time.Second); err != nil { t.Fatalf("expected payload was not received: %s", err) } } diff --git a/test/packetimpact/tests/tcp_network_unreachable_test.go b/test/packetimpact/tests/tcp_network_unreachable_test.go index 868a08da8..900352fa1 100644 --- a/test/packetimpact/tests/tcp_network_unreachable_test.go +++ b/test/packetimpact/tests/tcp_network_unreachable_test.go @@ -38,29 +38,29 @@ func TestTCPSynSentUnreachable(t *testing.T) { // Create the DUT and connection. dut := testbench.NewDUT(t) defer dut.TearDown() - clientFD, clientPort := dut.CreateBoundSocket(unix.SOCK_STREAM|unix.SOCK_NONBLOCK, unix.IPPROTO_TCP, net.ParseIP(testbench.RemoteIPv4)) + clientFD, clientPort := dut.CreateBoundSocket(t, unix.SOCK_STREAM|unix.SOCK_NONBLOCK, unix.IPPROTO_TCP, net.ParseIP(testbench.RemoteIPv4)) port := uint16(9001) conn := testbench.NewTCPIPv4(t, testbench.TCP{SrcPort: &port, DstPort: &clientPort}, testbench.TCP{SrcPort: &clientPort, DstPort: &port}) - defer conn.Close() + defer conn.Close(t) // Bring the DUT to SYN-SENT state with a non-blocking connect. ctx, cancel := context.WithTimeout(context.Background(), testbench.RPCTimeout) defer cancel() sa := unix.SockaddrInet4{Port: int(port)} copy(sa.Addr[:], net.IP(net.ParseIP(testbench.LocalIPv4)).To4()) - if _, err := dut.ConnectWithErrno(ctx, clientFD, &sa); err != syscall.Errno(unix.EINPROGRESS) { + if _, err := dut.ConnectWithErrno(ctx, t, clientFD, &sa); err != syscall.Errno(unix.EINPROGRESS) { t.Errorf("expected connect to fail with EINPROGRESS, but got %v", err) } // Get the SYN. - tcpLayers, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}, nil, time.Second) + tcpLayers, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}, nil, time.Second) if err != nil { t.Fatalf("expected SYN: %s", err) } // Send a host unreachable message. rawConn := (*testbench.Connection)(&conn) - layers := rawConn.CreateFrame(nil) + layers := rawConn.CreateFrame(t, nil) layers = layers[:len(layers)-1] const ipLayer = 1 const tcpLayer = ipLayer + 1 @@ -74,9 +74,9 @@ func TestTCPSynSentUnreachable(t *testing.T) { } var icmpv4 testbench.ICMPv4 = testbench.ICMPv4{Type: testbench.ICMPv4Type(header.ICMPv4DstUnreachable), Code: testbench.Uint8(header.ICMPv4HostUnreachable)} layers = append(layers, &icmpv4, ip, tcp) - rawConn.SendFrameStateless(layers) + rawConn.SendFrameStateless(t, layers) - if _, err = dut.ConnectWithErrno(ctx, clientFD, &sa); err != syscall.Errno(unix.EHOSTUNREACH) { + if _, err = dut.ConnectWithErrno(ctx, t, clientFD, &sa); err != syscall.Errno(unix.EHOSTUNREACH) { t.Errorf("expected connect to fail with EHOSTUNREACH, but got %v", err) } } @@ -88,9 +88,9 @@ func TestTCPSynSentUnreachable6(t *testing.T) { // Create the DUT and connection. dut := testbench.NewDUT(t) defer dut.TearDown() - clientFD, clientPort := dut.CreateBoundSocket(unix.SOCK_STREAM|unix.SOCK_NONBLOCK, unix.IPPROTO_TCP, net.ParseIP(testbench.RemoteIPv6)) + clientFD, clientPort := dut.CreateBoundSocket(t, unix.SOCK_STREAM|unix.SOCK_NONBLOCK, unix.IPPROTO_TCP, net.ParseIP(testbench.RemoteIPv6)) conn := testbench.NewTCPIPv6(t, testbench.TCP{DstPort: &clientPort}, testbench.TCP{SrcPort: &clientPort}) - defer conn.Close() + defer conn.Close(t) // Bring the DUT to SYN-SENT state with a non-blocking connect. ctx, cancel := context.WithTimeout(context.Background(), testbench.RPCTimeout) @@ -100,19 +100,19 @@ func TestTCPSynSentUnreachable6(t *testing.T) { ZoneId: uint32(testbench.RemoteInterfaceID), } copy(sa.Addr[:], net.IP(net.ParseIP(testbench.LocalIPv6)).To16()) - if _, err := dut.ConnectWithErrno(ctx, clientFD, &sa); err != syscall.Errno(unix.EINPROGRESS) { + if _, err := dut.ConnectWithErrno(ctx, t, clientFD, &sa); err != syscall.Errno(unix.EINPROGRESS) { t.Errorf("expected connect to fail with EINPROGRESS, but got %v", err) } // Get the SYN. - tcpLayers, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}, nil, time.Second) + tcpLayers, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}, nil, time.Second) if err != nil { t.Fatalf("expected SYN: %s", err) } // Send a host unreachable message. rawConn := (*testbench.Connection)(&conn) - layers := rawConn.CreateFrame(nil) + layers := rawConn.CreateFrame(t, nil) layers = layers[:len(layers)-1] const ipLayer = 1 const tcpLayer = ipLayer + 1 @@ -131,9 +131,9 @@ func TestTCPSynSentUnreachable6(t *testing.T) { Payload: []byte{0, 0, 0, 0}, } layers = append(layers, &icmpv6, ip, tcp) - rawConn.SendFrameStateless(layers) + rawConn.SendFrameStateless(t, layers) - if _, err = dut.ConnectWithErrno(ctx, clientFD, &sa); err != syscall.Errno(unix.ENETUNREACH) { + if _, err = dut.ConnectWithErrno(ctx, t, clientFD, &sa); err != syscall.Errno(unix.ENETUNREACH) { t.Errorf("expected connect to fail with ENETUNREACH, but got %v", err) } } diff --git a/test/packetimpact/tests/tcp_noaccept_close_rst_test.go b/test/packetimpact/tests/tcp_noaccept_close_rst_test.go index b9b3e91d3..82b7a85ff 100644 --- a/test/packetimpact/tests/tcp_noaccept_close_rst_test.go +++ b/test/packetimpact/tests/tcp_noaccept_close_rst_test.go @@ -31,12 +31,12 @@ func init() { func TestTcpNoAcceptCloseReset(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) - conn.Connect() - defer conn.Close() - dut.Close(listenFd) - if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst | header.TCPFlagAck)}, 1*time.Second); err != nil { + conn.Connect(t) + defer conn.Close(t) + dut.Close(t, listenFd) + if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst | header.TCPFlagAck)}, 1*time.Second); err != nil { t.Fatalf("expected a RST-ACK packet but got none: %s", err) } } diff --git a/test/packetimpact/tests/tcp_outside_the_window_test.go b/test/packetimpact/tests/tcp_outside_the_window_test.go index ad8c74234..08f759f7c 100644 --- a/test/packetimpact/tests/tcp_outside_the_window_test.go +++ b/test/packetimpact/tests/tcp_outside_the_window_test.go @@ -63,25 +63,25 @@ func TestTCPOutsideTheWindow(t *testing.T) { t.Run(fmt.Sprintf("%s%d", tt.description, tt.seqNumOffset), func(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) - defer dut.Close(listenFD) + listenFD, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(t, listenFD) conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) - defer conn.Close() - conn.Connect() - acceptFD, _ := dut.Accept(listenFD) - defer dut.Close(acceptFD) + defer conn.Close(t) + conn.Connect(t) + acceptFD, _ := dut.Accept(t, listenFD) + defer dut.Close(t, acceptFD) - windowSize := seqnum.Size(*conn.SynAck().WindowSize) + tt.seqNumOffset - conn.Drain() + windowSize := seqnum.Size(*conn.SynAck(t).WindowSize) + tt.seqNumOffset + conn.Drain(t) // Ignore whatever incrementing that this out-of-order packet might cause // to the AckNum. - localSeqNum := testbench.Uint32(uint32(*conn.LocalSeqNum())) - conn.Send(testbench.TCP{ + localSeqNum := testbench.Uint32(uint32(*conn.LocalSeqNum(t))) + conn.Send(t, testbench.TCP{ Flags: testbench.Uint8(tt.tcpFlags), - SeqNum: testbench.Uint32(uint32(conn.LocalSeqNum().Add(windowSize))), + SeqNum: testbench.Uint32(uint32(conn.LocalSeqNum(t).Add(windowSize))), }, tt.payload...) timeout := 3 * time.Second - gotACK, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), AckNum: localSeqNum}, timeout) + gotACK, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), AckNum: localSeqNum}, timeout) if tt.expectACK && err != nil { t.Fatalf("expected an ACK packet within %s but got none: %s", timeout, err) } diff --git a/test/packetimpact/tests/tcp_paws_mechanism_test.go b/test/packetimpact/tests/tcp_paws_mechanism_test.go index 55db4ece6..37f3b56dd 100644 --- a/test/packetimpact/tests/tcp_paws_mechanism_test.go +++ b/test/packetimpact/tests/tcp_paws_mechanism_test.go @@ -32,15 +32,15 @@ func init() { func TestPAWSMechanism(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) - defer dut.Close(listenFD) + listenFD, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(t, listenFD) conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) - defer conn.Close() + defer conn.Close(t) options := make([]byte, header.TCPOptionTSLength) header.EncodeTSOption(currentTS(), 0, options) - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn), Options: options}) - synAck, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn), Options: options}) + synAck, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second) if err != nil { t.Fatalf("didn't get synack during handshake: %s", err) } @@ -50,9 +50,9 @@ func TestPAWSMechanism(t *testing.T) { } tsecr := parsedSynOpts.TSVal header.EncodeTSOption(currentTS(), tsecr, options) - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), Options: options}) - acceptFD, _ := dut.Accept(listenFD) - defer dut.Close(acceptFD) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), Options: options}) + acceptFD, _ := dut.Accept(t, listenFD) + defer dut.Close(t, acceptFD) sampleData := []byte("Sample Data") sentTSVal := currentTS() @@ -61,9 +61,9 @@ func TestPAWSMechanism(t *testing.T) { // every time we send one, it should not cause any flakiness because timestamps // only need to be non-decreasing. time.Sleep(3 * time.Millisecond) - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), Options: options}, &testbench.Payload{Bytes: sampleData}) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), Options: options}, &testbench.Payload{Bytes: sampleData}) - gotTCP, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second) + gotTCP, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second) if err != nil { t.Fatalf("expected an ACK but got none: %s", err) } @@ -86,9 +86,9 @@ func TestPAWSMechanism(t *testing.T) { // 3ms here is chosen arbitrarily and this time.Sleep() should not cause flakiness // due to the exact same reasoning discussed above. time.Sleep(3 * time.Millisecond) - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), Options: options}, &testbench.Payload{Bytes: sampleData}) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), Options: options}, &testbench.Payload{Bytes: sampleData}) - gotTCP, err = conn.Expect(testbench.TCP{AckNum: lastAckNum, Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second) + gotTCP, err = conn.Expect(t, testbench.TCP{AckNum: lastAckNum, Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second) if err != nil { t.Fatalf("expected segment with AckNum %d but got none: %s", lastAckNum, err) } diff --git a/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go b/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go index 8fbec893b..d9f3ea0f2 100644 --- a/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go +++ b/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go @@ -52,26 +52,26 @@ func TestQueueReceiveInSynSent(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - socket, remotePort := dut.CreateBoundSocket(unix.SOCK_STREAM, unix.IPPROTO_TCP, net.ParseIP(testbench.RemoteIPv4)) + socket, remotePort := dut.CreateBoundSocket(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, net.ParseIP(testbench.RemoteIPv4)) conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) - defer conn.Close() + defer conn.Close(t) sampleData := []byte("Sample Data") - dut.SetNonBlocking(socket, true) - if _, err := dut.ConnectWithErrno(context.Background(), socket, conn.LocalAddr()); !errors.Is(err, syscall.EINPROGRESS) { + dut.SetNonBlocking(t, socket, true) + if _, err := dut.ConnectWithErrno(context.Background(), t, socket, conn.LocalAddr(t)); !errors.Is(err, syscall.EINPROGRESS) { t.Fatalf("failed to bring DUT to SYN-SENT, got: %s, want EINPROGRESS", err) } - if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}, time.Second); err != nil { + if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}, time.Second); err != nil { t.Fatalf("expected a SYN from DUT, but got none: %s", err) } - if _, _, err := dut.RecvWithErrno(context.Background(), socket, int32(len(sampleData)), 0); err != syscall.Errno(unix.EWOULDBLOCK) { + if _, _, err := dut.RecvWithErrno(context.Background(), t, socket, int32(len(sampleData)), 0); err != syscall.Errno(unix.EWOULDBLOCK) { t.Fatalf("expected error %s, got %s", syscall.Errno(unix.EWOULDBLOCK), err) } // Test blocking read. - dut.SetNonBlocking(socket, false) + dut.SetNonBlocking(t, socket, false) var wg sync.WaitGroup defer wg.Wait() @@ -86,7 +86,7 @@ func TestQueueReceiveInSynSent(t *testing.T) { block.Done() // Issue RECEIVE call in SYN-SENT, this should be queued for // process until the connection is established. - n, buff, err := dut.RecvWithErrno(ctx, socket, int32(len(sampleData)), 0) + n, buff, err := dut.RecvWithErrno(ctx, t, socket, int32(len(sampleData)), 0) if tt.reset { if err != syscall.Errno(unix.ECONNREFUSED) { t.Errorf("expected error %s, got %s", syscall.Errno(unix.ECONNREFUSED), err) @@ -112,19 +112,19 @@ func TestQueueReceiveInSynSent(t *testing.T) { time.Sleep(100 * time.Millisecond) if tt.reset { - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst | header.TCPFlagAck)}) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst | header.TCPFlagAck)}) return } // Bring the connection to Established. - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}) - if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second); err != nil { + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}) + if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second); err != nil { t.Fatalf("expected an ACK from DUT, but got none: %s", err) } // Send sample payload and expect an ACK. - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: sampleData}) - if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second); err != nil { + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: sampleData}) + if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second); err != nil { t.Fatalf("expected an ACK from DUT, but got none: %s", err) } }) diff --git a/test/packetimpact/tests/tcp_reordering_test.go b/test/packetimpact/tests/tcp_reordering_test.go index a5378a9dd..8742819ca 100644 --- a/test/packetimpact/tests/tcp_reordering_test.go +++ b/test/packetimpact/tests/tcp_reordering_test.go @@ -32,10 +32,10 @@ func init() { func TestReorderingWindow(t *testing.T) { dut := tb.NewDUT(t) defer dut.TearDown() - listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) - defer dut.Close(listenFd) + listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(t, listenFd) conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort}) - defer conn.Close() + defer conn.Close(t) // Enable SACK. opts := make([]byte, 40) @@ -49,17 +49,17 @@ func TestReorderingWindow(t *testing.T) { const mss = minMTU - header.IPv4MinimumSize - header.TCPMinimumSize optsOff += header.EncodeMSSOption(mss, opts[optsOff:]) - conn.ConnectWithOptions(opts[:optsOff]) + conn.ConnectWithOptions(t, opts[:optsOff]) - acceptFd, _ := dut.Accept(listenFd) - defer dut.Close(acceptFd) + acceptFd, _ := dut.Accept(t, listenFd) + defer dut.Close(t, acceptFd) if tb.DUTType == "linux" { // Linux has changed its handling of reordering, force the old behavior. - dut.SetSockOpt(acceptFd, unix.IPPROTO_TCP, unix.TCP_CONGESTION, []byte("reno")) + dut.SetSockOpt(t, acceptFd, unix.IPPROTO_TCP, unix.TCP_CONGESTION, []byte("reno")) } - pls := dut.GetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_MAXSEG) + pls := dut.GetSockOptInt(t, acceptFd, unix.IPPROTO_TCP, unix.TCP_MAXSEG) if tb.DUTType == "netstack" { // netstack does not impliment TCP_MAXSEG correctly. Fake it // here. Netstack uses the max SACK size which is 32. The MSS @@ -69,13 +69,13 @@ func TestReorderingWindow(t *testing.T) { payload := make([]byte, pls) - seqNum1 := *conn.RemoteSeqNum() + seqNum1 := *conn.RemoteSeqNum(t) const numPkts = 10 // Send some packets, checking that we receive each. for i, sn := 0, seqNum1; i < numPkts; i++ { - dut.Send(acceptFd, payload, 0) + dut.Send(t, acceptFd, payload, 0) - gotOne, err := conn.Expect(tb.TCP{SeqNum: tb.Uint32(uint32(sn))}, time.Second) + gotOne, err := conn.Expect(t, tb.TCP{SeqNum: tb.Uint32(uint32(sn))}, time.Second) sn.UpdateForward(seqnum.Size(len(payload))) if err != nil { t.Errorf("Expect #%d: %s", i+1, err) @@ -86,7 +86,7 @@ func TestReorderingWindow(t *testing.T) { } } - seqNum2 := *conn.RemoteSeqNum() + seqNum2 := *conn.RemoteSeqNum(t) // SACK packets #2-4. sackBlock := make([]byte, 40) @@ -97,13 +97,13 @@ func TestReorderingWindow(t *testing.T) { seqNum1.Add(seqnum.Size(len(payload))), seqNum1.Add(seqnum.Size(4 * len(payload))), }}, sackBlock[sbOff:]) - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), AckNum: tb.Uint32(uint32(seqNum1)), Options: sackBlock[:sbOff]}) + conn.Send(t, tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), AckNum: tb.Uint32(uint32(seqNum1)), Options: sackBlock[:sbOff]}) // ACK first packet. - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), AckNum: tb.Uint32(uint32(seqNum1) + uint32(len(payload)))}) + conn.Send(t, tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), AckNum: tb.Uint32(uint32(seqNum1) + uint32(len(payload)))}) // Check for retransmit. - gotOne, err := conn.Expect(tb.TCP{SeqNum: tb.Uint32(uint32(seqNum1))}, time.Second) + gotOne, err := conn.Expect(t, tb.TCP{SeqNum: tb.Uint32(uint32(seqNum1))}, time.Second) if err != nil { t.Error("Expect for retransmit:", err) } @@ -123,14 +123,14 @@ func TestReorderingWindow(t *testing.T) { seqNum1.Add(seqnum.Size(4 * len(payload))), }}, dsackBlock[dsbOff:]) - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), AckNum: tb.Uint32(uint32(seqNum2)), Options: dsackBlock[:dsbOff]}) + conn.Send(t, tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), AckNum: tb.Uint32(uint32(seqNum2)), Options: dsackBlock[:dsbOff]}) // Send half of the original window of packets, checking that we // received each. for i, sn := 0, seqNum2; i < numPkts/2; i++ { - dut.Send(acceptFd, payload, 0) + dut.Send(t, acceptFd, payload, 0) - gotOne, err := conn.Expect(tb.TCP{SeqNum: tb.Uint32(uint32(sn))}, time.Second) + gotOne, err := conn.Expect(t, tb.TCP{SeqNum: tb.Uint32(uint32(sn))}, time.Second) sn.UpdateForward(seqnum.Size(len(payload))) if err != nil { t.Errorf("Expect #%d: %s", i+1, err) @@ -144,8 +144,8 @@ func TestReorderingWindow(t *testing.T) { if tb.DUTType == "netstack" { // The window should now be halved, so we should receive any // more, even if we send them. - dut.Send(acceptFd, payload, 0) - if got, err := conn.Expect(tb.TCP{}, 100*time.Millisecond); got != nil || err == nil { + dut.Send(t, acceptFd, payload, 0) + if got, err := conn.Expect(t, tb.TCP{}, 100*time.Millisecond); got != nil || err == nil { t.Fatalf("expected no packets within 100 millisecond, but got one: %s", got) } return @@ -153,9 +153,9 @@ func TestReorderingWindow(t *testing.T) { // Linux reduces the window by three. Check that we can receive the rest. for i, sn := 0, seqNum2.Add(seqnum.Size(numPkts/2*len(payload))); i < 2; i++ { - dut.Send(acceptFd, payload, 0) + dut.Send(t, acceptFd, payload, 0) - gotOne, err := conn.Expect(tb.TCP{SeqNum: tb.Uint32(uint32(sn))}, time.Second) + gotOne, err := conn.Expect(t, tb.TCP{SeqNum: tb.Uint32(uint32(sn))}, time.Second) sn.UpdateForward(seqnum.Size(len(payload))) if err != nil { t.Errorf("Expect #%d: %s", i+1, err) @@ -167,8 +167,8 @@ func TestReorderingWindow(t *testing.T) { } // The window should now be full. - dut.Send(acceptFd, payload, 0) - if got, err := conn.Expect(tb.TCP{}, 100*time.Millisecond); got != nil || err == nil { + dut.Send(t, acceptFd, payload, 0) + if got, err := conn.Expect(t, tb.TCP{}, 100*time.Millisecond); got != nil || err == nil { t.Fatalf("expected no packets within 100 millisecond, but got one: %s", got) } } diff --git a/test/packetimpact/tests/tcp_retransmits_test.go b/test/packetimpact/tests/tcp_retransmits_test.go index 6940eb7fb..072014ff8 100644 --- a/test/packetimpact/tests/tcp_retransmits_test.go +++ b/test/packetimpact/tests/tcp_retransmits_test.go @@ -33,41 +33,41 @@ func init() { func TestRetransmits(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) - defer dut.Close(listenFd) + listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(t, listenFd) conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) - defer conn.Close() + defer conn.Close(t) - conn.Connect() - acceptFd, _ := dut.Accept(listenFd) - defer dut.Close(acceptFd) + conn.Connect(t) + acceptFd, _ := dut.Accept(t, listenFd) + defer dut.Close(t, acceptFd) - dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) + dut.SetSockOptInt(t, acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) sampleData := []byte("Sample Data") samplePayload := &testbench.Payload{Bytes: sampleData} - dut.Send(acceptFd, sampleData, 0) - if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { + dut.Send(t, acceptFd, sampleData, 0) + if _, err := conn.ExpectData(t, &testbench.TCP{}, samplePayload, time.Second); err != nil { t.Fatalf("expected payload was not received: %s", err) } // Give a chance for the dut to estimate RTO with RTT from the DATA-ACK. // TODO(gvisor.dev/issue/2685) Estimate RTO during handshake, after which // we can skip sending this ACK. - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) startRTO := time.Second current := startRTO first := time.Now() - dut.Send(acceptFd, sampleData, 0) - seq := testbench.Uint32(uint32(*conn.RemoteSeqNum())) - if _, err := conn.ExpectData(&testbench.TCP{SeqNum: seq}, samplePayload, startRTO); err != nil { + dut.Send(t, acceptFd, sampleData, 0) + seq := testbench.Uint32(uint32(*conn.RemoteSeqNum(t))) + if _, err := conn.ExpectData(t, &testbench.TCP{SeqNum: seq}, samplePayload, startRTO); err != nil { t.Fatalf("expected payload was not received: %s", err) } // Expect retransmits of the same segment. for i := 0; i < 5; i++ { start := time.Now() - if _, err := conn.ExpectData(&testbench.TCP{SeqNum: seq}, samplePayload, 2*current); err != nil { + if _, err := conn.ExpectData(t, &testbench.TCP{SeqNum: seq}, samplePayload, 2*current); err != nil { t.Fatalf("expected payload was not received: %s loop %d", err, i) } if i == 0 { diff --git a/test/packetimpact/tests/tcp_send_window_sizes_piggyback_test.go b/test/packetimpact/tests/tcp_send_window_sizes_piggyback_test.go index 90ab85419..f91b06ba1 100644 --- a/test/packetimpact/tests/tcp_send_window_sizes_piggyback_test.go +++ b/test/packetimpact/tests/tcp_send_window_sizes_piggyback_test.go @@ -61,23 +61,23 @@ func TestSendWindowSizesPiggyback(t *testing.T) { t.Run(fmt.Sprintf("%s%d", tt.description, tt.windowSize), func(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) - defer dut.Close(listenFd) + listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(t, listenFd) conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort, WindowSize: testbench.Uint16(tt.windowSize)}, testbench.TCP{SrcPort: &remotePort}) - defer conn.Close() + defer conn.Close(t) - conn.Connect() - acceptFd, _ := dut.Accept(listenFd) - defer dut.Close(acceptFd) + conn.Connect(t) + acceptFd, _ := dut.Accept(t, listenFd) + defer dut.Close(t, acceptFd) - dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) + dut.SetSockOptInt(t, acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) expectedTCP := testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)} - dut.Send(acceptFd, sampleData, 0) + dut.Send(t, acceptFd, sampleData, 0) expectedPayload := testbench.Payload{Bytes: tt.expectedPayload1} - if _, err := conn.ExpectData(&expectedTCP, &expectedPayload, time.Second); err != nil { + if _, err := conn.ExpectData(t, &expectedTCP, &expectedPayload, time.Second); err != nil { t.Fatalf("expected payload was not received: %s", err) } @@ -86,18 +86,18 @@ func TestSendWindowSizesPiggyback(t *testing.T) { if tt.enqueue { // Enqueue a segment for the dut to transmit. - dut.Send(acceptFd, sampleData, 0) + dut.Send(t, acceptFd, sampleData, 0) } // Send ACK for the previous segment along with data for the dut to // receive and ACK back. Sending this ACK would make room for the dut // to transmit any enqueued segment. - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh), WindowSize: testbench.Uint16(tt.windowSize)}, &testbench.Payload{Bytes: sampleData}) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh), WindowSize: testbench.Uint16(tt.windowSize)}, &testbench.Payload{Bytes: sampleData}) // Expect the dut to piggyback the ACK for received data along with // the segment enqueued for transmit. expectedPayload = testbench.Payload{Bytes: tt.expectedPayload2} - if _, err := conn.ExpectData(&expectedTCP, &expectedPayload, time.Second); err != nil { + if _, err := conn.ExpectData(t, &expectedTCP, &expectedPayload, time.Second); err != nil { t.Fatalf("expected payload was not received: %s", err) } }) diff --git a/test/packetimpact/tests/tcp_synrcvd_reset_test.go b/test/packetimpact/tests/tcp_synrcvd_reset_test.go index 7d5deab01..57d034dd1 100644 --- a/test/packetimpact/tests/tcp_synrcvd_reset_test.go +++ b/test/packetimpact/tests/tcp_synrcvd_reset_test.go @@ -32,21 +32,21 @@ func init() { func TestTCPSynRcvdReset(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) - defer dut.Close(listenFD) + listenFD, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(t, listenFD) conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) - defer conn.Close() + defer conn.Close(t) // Expect dut connection to have transitioned to SYN-RCVD state. - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}) - if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil { + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}) + if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil { t.Fatalf("expected SYN-ACK %s", err) } - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}) // Expect the connection to have transitioned SYN-RCVD to CLOSED. // TODO(gvisor.dev/issue/478): Check for TCP_INFO on the dut side. - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) - if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil { + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) + if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil { t.Fatalf("expected a TCP RST %s", err) } } diff --git a/test/packetimpact/tests/tcp_synsent_reset_test.go b/test/packetimpact/tests/tcp_synsent_reset_test.go index 6898a2239..eac8eb19d 100644 --- a/test/packetimpact/tests/tcp_synsent_reset_test.go +++ b/test/packetimpact/tests/tcp_synsent_reset_test.go @@ -31,17 +31,19 @@ func init() { // dutSynSentState sets up the dut connection in SYN-SENT state. func dutSynSentState(t *testing.T) (*tb.DUT, *tb.TCPIPv4, uint16, uint16) { + t.Helper() + dut := tb.NewDUT(t) - clientFD, clientPort := dut.CreateBoundSocket(unix.SOCK_STREAM|unix.SOCK_NONBLOCK, unix.IPPROTO_TCP, net.ParseIP(tb.RemoteIPv4)) + clientFD, clientPort := dut.CreateBoundSocket(t, unix.SOCK_STREAM|unix.SOCK_NONBLOCK, unix.IPPROTO_TCP, net.ParseIP(tb.RemoteIPv4)) port := uint16(9001) conn := tb.NewTCPIPv4(t, tb.TCP{SrcPort: &port, DstPort: &clientPort}, tb.TCP{SrcPort: &clientPort, DstPort: &port}) sa := unix.SockaddrInet4{Port: int(port)} copy(sa.Addr[:], net.IP(net.ParseIP(tb.LocalIPv4)).To4()) // Bring the dut to SYN-SENT state with a non-blocking connect. - dut.Connect(clientFD, &sa) - if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn)}, nil, time.Second); err != nil { + dut.Connect(t, clientFD, &sa) + if _, err := conn.ExpectData(t, &tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn)}, nil, time.Second); err != nil { t.Fatalf("expected SYN\n") } @@ -51,13 +53,13 @@ func dutSynSentState(t *testing.T) (*tb.DUT, *tb.TCPIPv4, uint16, uint16) { // TestTCPSynSentReset tests RFC793, p67: SYN-SENT to CLOSED transition. func TestTCPSynSentReset(t *testing.T) { dut, conn, _, _ := dutSynSentState(t) - defer conn.Close() + defer conn.Close(t) defer dut.TearDown() - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst | header.TCPFlagAck)}) + conn.Send(t, tb.TCP{Flags: tb.Uint8(header.TCPFlagRst | header.TCPFlagAck)}) // Expect the connection to have closed. // TODO(gvisor.dev/issue/478): Check for TCP_INFO on the dut side. - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) - if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil { + conn.Send(t, tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) + if _, err := conn.ExpectData(t, &tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil { t.Fatalf("expected a TCP RST") } } @@ -67,22 +69,22 @@ func TestTCPSynSentReset(t *testing.T) { func TestTCPSynSentRcvdReset(t *testing.T) { dut, c, remotePort, clientPort := dutSynSentState(t) defer dut.TearDown() - defer c.Close() + defer c.Close(t) conn := tb.NewTCPIPv4(t, tb.TCP{SrcPort: &remotePort, DstPort: &clientPort}, tb.TCP{SrcPort: &clientPort, DstPort: &remotePort}) - defer conn.Close() + defer conn.Close(t) // Initiate new SYN connection with the same port pair // (simultaneous open case), expect the dut connection to move to // SYN-RCVD state - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn)}) - if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil { + conn.Send(t, tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn)}) + if _, err := conn.ExpectData(t, &tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil { t.Fatalf("expected SYN-ACK %s\n", err) } - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}) + conn.Send(t, tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}) // Expect the connection to have transitioned SYN-RCVD to CLOSED. // TODO(gvisor.dev/issue/478): Check for TCP_INFO on the dut side. - conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) - if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil { + conn.Send(t, tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)}) + if _, err := conn.ExpectData(t, &tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil { t.Fatalf("expected a TCP RST") } } diff --git a/test/packetimpact/tests/tcp_user_timeout_test.go b/test/packetimpact/tests/tcp_user_timeout_test.go index 87e45d765..551dc78e7 100644 --- a/test/packetimpact/tests/tcp_user_timeout_test.go +++ b/test/packetimpact/tests/tcp_user_timeout_test.go @@ -16,7 +16,6 @@ package tcp_user_timeout_test import ( "flag" - "fmt" "testing" "time" @@ -29,22 +28,20 @@ func init() { testbench.RegisterFlags(flag.CommandLine) } -func sendPayload(conn *testbench.TCPIPv4, dut *testbench.DUT, fd int32) error { +func sendPayload(t *testing.T, conn *testbench.TCPIPv4, dut *testbench.DUT, fd int32) { sampleData := make([]byte, 100) for i := range sampleData { sampleData[i] = uint8(i) } - conn.Drain() - dut.Send(fd, sampleData, 0) - if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &testbench.Payload{Bytes: sampleData}, time.Second); err != nil { - return fmt.Errorf("expected data but got none: %w", err) + conn.Drain(t) + dut.Send(t, fd, sampleData, 0) + if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &testbench.Payload{Bytes: sampleData}, time.Second); err != nil { + t.Fatalf("expected data but got none: %w", err) } - return nil } -func sendFIN(conn *testbench.TCPIPv4, dut *testbench.DUT, fd int32) error { - dut.Close(fd) - return nil +func sendFIN(t *testing.T, conn *testbench.TCPIPv4, dut *testbench.DUT, fd int32) { + dut.Close(t, fd) } func TestTCPUserTimeout(t *testing.T) { @@ -59,7 +56,7 @@ func TestTCPUserTimeout(t *testing.T) { } { for _, ttf := range []struct { description string - f func(conn *testbench.TCPIPv4, dut *testbench.DUT, fd int32) error + f func(_ *testing.T, _ *testbench.TCPIPv4, _ *testbench.DUT, fd int32) }{ {"AfterPayload", sendPayload}, {"AfterFIN", sendFIN}, @@ -68,31 +65,29 @@ func TestTCPUserTimeout(t *testing.T) { // Create a socket, listen, TCP handshake, and accept. dut := testbench.NewDUT(t) defer dut.TearDown() - listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) - defer dut.Close(listenFD) + listenFD, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(t, listenFD) conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) - defer conn.Close() - conn.Connect() - acceptFD, _ := dut.Accept(listenFD) + defer conn.Close(t) + conn.Connect(t) + acceptFD, _ := dut.Accept(t, listenFD) if tt.userTimeout != 0 { - dut.SetSockOptInt(acceptFD, unix.SOL_TCP, unix.TCP_USER_TIMEOUT, int32(tt.userTimeout.Milliseconds())) + dut.SetSockOptInt(t, acceptFD, unix.SOL_TCP, unix.TCP_USER_TIMEOUT, int32(tt.userTimeout.Milliseconds())) } - if err := ttf.f(&conn, &dut, acceptFD); err != nil { - t.Fatal(err) - } + ttf.f(t, &conn, &dut, acceptFD) time.Sleep(tt.sendDelay) - conn.Drain() - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) + conn.Drain(t) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) // If TCP_USER_TIMEOUT was set and the above delay was longer than the // TCP_USER_TIMEOUT then the DUT should send a RST in response to the // testbench's packet. expectRST := tt.userTimeout != 0 && tt.sendDelay > tt.userTimeout expectTimeout := 5 * time.Second - got, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, expectTimeout) + got, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, expectTimeout) if expectRST && err != nil { t.Errorf("expected RST packet within %s but got none: %s", expectTimeout, err) } diff --git a/test/packetimpact/tests/tcp_window_shrink_test.go b/test/packetimpact/tests/tcp_window_shrink_test.go index e78d04756..5b001fbec 100644 --- a/test/packetimpact/tests/tcp_window_shrink_test.go +++ b/test/packetimpact/tests/tcp_window_shrink_test.go @@ -31,43 +31,43 @@ func init() { func TestWindowShrink(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) - defer dut.Close(listenFd) + listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(t, listenFd) conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) - defer conn.Close() + defer conn.Close(t) - conn.Connect() - acceptFd, _ := dut.Accept(listenFd) - defer dut.Close(acceptFd) + conn.Connect(t) + acceptFd, _ := dut.Accept(t, listenFd) + defer dut.Close(t, acceptFd) - dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) + dut.SetSockOptInt(t, acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) sampleData := []byte("Sample Data") samplePayload := &testbench.Payload{Bytes: sampleData} - dut.Send(acceptFd, sampleData, 0) - if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { + dut.Send(t, acceptFd, sampleData, 0) + if _, err := conn.ExpectData(t, &testbench.TCP{}, samplePayload, time.Second); err != nil { t.Fatalf("expected payload was not received: %s", err) } - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) - dut.Send(acceptFd, sampleData, 0) - dut.Send(acceptFd, sampleData, 0) - if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { + dut.Send(t, acceptFd, sampleData, 0) + dut.Send(t, acceptFd, sampleData, 0) + if _, err := conn.ExpectData(t, &testbench.TCP{}, samplePayload, time.Second); err != nil { t.Fatalf("expected payload was not received: %s", err) } - if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { + if _, err := conn.ExpectData(t, &testbench.TCP{}, samplePayload, time.Second); err != nil { t.Fatalf("expected payload was not received: %s", err) } // We close our receiving window here - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)}) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)}) - dut.Send(acceptFd, []byte("Sample Data"), 0) + dut.Send(t, acceptFd, []byte("Sample Data"), 0) // Note: There is another kind of zero-window probing which Windows uses (by sending one // new byte at `RemoteSeqNum`), if netstack wants to go that way, we may want to change // the following lines. - expectedRemoteSeqNum := *conn.RemoteSeqNum() - 1 - if _, err := conn.ExpectData(&testbench.TCP{SeqNum: testbench.Uint32(uint32(expectedRemoteSeqNum))}, nil, time.Second); err != nil { + expectedRemoteSeqNum := *conn.RemoteSeqNum(t) - 1 + if _, err := conn.ExpectData(t, &testbench.TCP{SeqNum: testbench.Uint32(uint32(expectedRemoteSeqNum))}, nil, time.Second); err != nil { t.Fatalf("expected a packet with sequence number %d: %s", expectedRemoteSeqNum, err) } } diff --git a/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go b/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go index 8c89d57c9..da93267d6 100644 --- a/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go +++ b/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go @@ -33,27 +33,27 @@ func init() { func TestZeroWindowProbeRetransmit(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) - defer dut.Close(listenFd) + listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(t, listenFd) conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) - defer conn.Close() + defer conn.Close(t) - conn.Connect() - acceptFd, _ := dut.Accept(listenFd) - defer dut.Close(acceptFd) + conn.Connect(t) + acceptFd, _ := dut.Accept(t, listenFd) + defer dut.Close(t, acceptFd) - dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) + dut.SetSockOptInt(t, acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) sampleData := []byte("Sample Data") samplePayload := &testbench.Payload{Bytes: sampleData} // Send and receive sample data to the dut. - dut.Send(acceptFd, sampleData, 0) - if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { + dut.Send(t, acceptFd, sampleData, 0) + if _, err := conn.ExpectData(t, &testbench.TCP{}, samplePayload, time.Second); err != nil { t.Fatalf("expected payload was not received: %s", err) } - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload) - if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil { + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload) + if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil { t.Fatalf("expected packet was not received: %s", err) } @@ -63,15 +63,15 @@ func TestZeroWindowProbeRetransmit(t *testing.T) { // of the recorded first zero probe transmission duration. // // Advertize zero receive window again. - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)}) - probeSeq := testbench.Uint32(uint32(*conn.RemoteSeqNum() - 1)) - ackProbe := testbench.Uint32(uint32(*conn.RemoteSeqNum())) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)}) + probeSeq := testbench.Uint32(uint32(*conn.RemoteSeqNum(t) - 1)) + ackProbe := testbench.Uint32(uint32(*conn.RemoteSeqNum(t))) startProbeDuration := time.Second current := startProbeDuration first := time.Now() // Ask the dut to send out data. - dut.Send(acceptFd, sampleData, 0) + dut.Send(t, acceptFd, sampleData, 0) // Expect the dut to keep the connection alive as long as the remote is // acknowledging the zero-window probes. for i := 0; i < 5; i++ { @@ -79,7 +79,7 @@ func TestZeroWindowProbeRetransmit(t *testing.T) { // Expect zero-window probe with a timeout which is a function of the typical // first retransmission time. The retransmission times is supposed to // exponentially increase. - if _, err := conn.ExpectData(&testbench.TCP{SeqNum: probeSeq}, nil, 2*current); err != nil { + if _, err := conn.ExpectData(t, &testbench.TCP{SeqNum: probeSeq}, nil, 2*current); err != nil { t.Fatalf("expected a probe with sequence number %d: loop %d", probeSeq, i) } if i == 0 { @@ -92,14 +92,13 @@ func TestZeroWindowProbeRetransmit(t *testing.T) { t.Errorf("got zero probe %d after %s, want >= %s", i, got, want) } // Acknowledge the zero-window probes from the dut. - conn.Send(testbench.TCP{AckNum: ackProbe, Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)}) + conn.Send(t, testbench.TCP{AckNum: ackProbe, Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)}) current *= 2 } // Advertize non-zero window. - conn.Send(testbench.TCP{AckNum: ackProbe, Flags: testbench.Uint8(header.TCPFlagAck)}) + conn.Send(t, testbench.TCP{AckNum: ackProbe, Flags: testbench.Uint8(header.TCPFlagAck)}) // Expect the dut to recover and transmit data. - if _, err := conn.ExpectData(&testbench. - TCP{SeqNum: ackProbe}, samplePayload, time.Second); err != nil { + if _, err := conn.ExpectData(t, &testbench.TCP{SeqNum: ackProbe}, samplePayload, time.Second); err != nil { t.Fatalf("expected payload was not received: %s", err) } } diff --git a/test/packetimpact/tests/tcp_zero_window_probe_test.go b/test/packetimpact/tests/tcp_zero_window_probe_test.go index 649fd5699..44cac42f8 100644 --- a/test/packetimpact/tests/tcp_zero_window_probe_test.go +++ b/test/packetimpact/tests/tcp_zero_window_probe_test.go @@ -33,29 +33,29 @@ func init() { func TestZeroWindowProbe(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) - defer dut.Close(listenFd) + listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(t, listenFd) conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) - defer conn.Close() + defer conn.Close(t) - conn.Connect() - acceptFd, _ := dut.Accept(listenFd) - defer dut.Close(acceptFd) + conn.Connect(t) + acceptFd, _ := dut.Accept(t, listenFd) + defer dut.Close(t, acceptFd) - dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) + dut.SetSockOptInt(t, acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) sampleData := []byte("Sample Data") samplePayload := &testbench.Payload{Bytes: sampleData} start := time.Now() // Send and receive sample data to the dut. - dut.Send(acceptFd, sampleData, 0) - if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { + dut.Send(t, acceptFd, sampleData, 0) + if _, err := conn.ExpectData(t, &testbench.TCP{}, samplePayload, time.Second); err != nil { t.Fatalf("expected payload was not received: %s", err) } sendTime := time.Now().Sub(start) - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload) - if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil { + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload) + if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil { t.Fatalf("expected packet was not received: %s", err) } @@ -63,24 +63,24 @@ func TestZeroWindowProbe(t *testing.T) { // probe to be sent. // // Advertize zero window to the dut. - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)}) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)}) // Expected sequence number of the zero window probe. - probeSeq := testbench.Uint32(uint32(*conn.RemoteSeqNum() - 1)) + probeSeq := testbench.Uint32(uint32(*conn.RemoteSeqNum(t) - 1)) // Expected ack number of the ACK for the probe. - ackProbe := testbench.Uint32(uint32(*conn.RemoteSeqNum())) + ackProbe := testbench.Uint32(uint32(*conn.RemoteSeqNum(t))) // Expect there are no zero-window probes sent until there is data to be sent out // from the dut. - if _, err := conn.ExpectData(&testbench.TCP{SeqNum: probeSeq}, nil, 2*time.Second); err == nil { + if _, err := conn.ExpectData(t, &testbench.TCP{SeqNum: probeSeq}, nil, 2*time.Second); err == nil { t.Fatalf("unexpected packet with sequence number %d: %s", probeSeq, err) } start = time.Now() // Ask the dut to send out data. - dut.Send(acceptFd, sampleData, 0) + dut.Send(t, acceptFd, sampleData, 0) // Expect zero-window probe from the dut. - if _, err := conn.ExpectData(&testbench.TCP{SeqNum: probeSeq}, nil, time.Second); err != nil { + if _, err := conn.ExpectData(t, &testbench.TCP{SeqNum: probeSeq}, nil, time.Second); err != nil { t.Fatalf("expected a packet with sequence number %d: %s", probeSeq, err) } // Expect the probe to be sent after some time. Compare against the previous @@ -94,9 +94,9 @@ func TestZeroWindowProbe(t *testing.T) { // and sends out the sample payload after the send window opens. // // Advertize non-zero window to the dut and ack the zero window probe. - conn.Send(testbench.TCP{AckNum: ackProbe, Flags: testbench.Uint8(header.TCPFlagAck)}) + conn.Send(t, testbench.TCP{AckNum: ackProbe, Flags: testbench.Uint8(header.TCPFlagAck)}) // Expect the dut to recover and transmit data. - if _, err := conn.ExpectData(&testbench.TCP{SeqNum: ackProbe}, samplePayload, time.Second); err != nil { + if _, err := conn.ExpectData(t, &testbench.TCP{SeqNum: ackProbe}, samplePayload, time.Second); err != nil { t.Fatalf("expected payload was not received: %s", err) } @@ -104,9 +104,9 @@ func TestZeroWindowProbe(t *testing.T) { // Check if the dut responds as we do for a similar probe sent to it. // Basically with sequence number to one byte behind the unacknowledged // sequence number. - p := testbench.Uint32(uint32(*conn.LocalSeqNum())) - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), SeqNum: testbench.Uint32(uint32(*conn.LocalSeqNum() - 1))}) - if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), AckNum: p}, nil, time.Second); err != nil { + p := testbench.Uint32(uint32(*conn.LocalSeqNum(t))) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), SeqNum: testbench.Uint32(uint32(*conn.LocalSeqNum(t) - 1))}) + if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), AckNum: p}, nil, time.Second); err != nil { t.Fatalf("expected a packet with ack number: %d: %s", p, err) } } diff --git a/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go b/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go index 3c467b14f..09a1c653f 100644 --- a/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go +++ b/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go @@ -33,27 +33,27 @@ func init() { func TestZeroWindowProbeUserTimeout(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) - defer dut.Close(listenFd) + listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1) + defer dut.Close(t, listenFd) conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort}) - defer conn.Close() + defer conn.Close(t) - conn.Connect() - acceptFd, _ := dut.Accept(listenFd) - defer dut.Close(acceptFd) + conn.Connect(t) + acceptFd, _ := dut.Accept(t, listenFd) + defer dut.Close(t, acceptFd) - dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) + dut.SetSockOptInt(t, acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1) sampleData := []byte("Sample Data") samplePayload := &testbench.Payload{Bytes: sampleData} // Send and receive sample data to the dut. - dut.Send(acceptFd, sampleData, 0) - if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil { + dut.Send(t, acceptFd, sampleData, 0) + if _, err := conn.ExpectData(t, &testbench.TCP{}, samplePayload, time.Second); err != nil { t.Fatalf("expected payload was not received: %s", err) } - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload) - if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil { + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload) + if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil { t.Fatalf("expected packet was not received: %s", err) } @@ -61,15 +61,15 @@ func TestZeroWindowProbeUserTimeout(t *testing.T) { // probe to be sent. // // Advertize zero window to the dut. - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)}) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)}) // Expected sequence number of the zero window probe. - probeSeq := testbench.Uint32(uint32(*conn.RemoteSeqNum() - 1)) + probeSeq := testbench.Uint32(uint32(*conn.RemoteSeqNum(t) - 1)) start := time.Now() // Ask the dut to send out data. - dut.Send(acceptFd, sampleData, 0) + dut.Send(t, acceptFd, sampleData, 0) // Expect zero-window probe from the dut. - if _, err := conn.ExpectData(&testbench.TCP{SeqNum: probeSeq}, nil, time.Second); err != nil { + if _, err := conn.ExpectData(t, &testbench.TCP{SeqNum: probeSeq}, nil, time.Second); err != nil { t.Fatalf("expected a packet with sequence number %d: %s", probeSeq, err) } // Record the duration for first probe, the dut sends the zero window probe after @@ -80,19 +80,19 @@ func TestZeroWindowProbeUserTimeout(t *testing.T) { // when the dut is sending zero-window probes. // // Reduce the retransmit timeout. - dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_USER_TIMEOUT, int32(startProbeDuration.Milliseconds())) + dut.SetSockOptInt(t, acceptFd, unix.IPPROTO_TCP, unix.TCP_USER_TIMEOUT, int32(startProbeDuration.Milliseconds())) // Advertize zero window again. - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)}) + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)}) // Ask the dut to send out data that would trigger zero window probe retransmissions. - dut.Send(acceptFd, sampleData, 0) + dut.Send(t, acceptFd, sampleData, 0) // Wait for the connection to timeout after multiple zero-window probe retransmissions. time.Sleep(8 * startProbeDuration) // Expect the connection to have timed out and closed which would cause the dut // to reply with a RST to the ACK we send. - conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) - if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil { + conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}) + if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil { t.Fatalf("expected a TCP RST") } } diff --git a/test/packetimpact/tests/udp_discard_mcast_source_addr_test.go b/test/packetimpact/tests/udp_discard_mcast_source_addr_test.go index b0315e67c..d30177e64 100644 --- a/test/packetimpact/tests/udp_discard_mcast_source_addr_test.go +++ b/test/packetimpact/tests/udp_discard_mcast_source_addr_test.go @@ -36,11 +36,11 @@ func init() { func TestDiscardsUDPPacketsWithMcastSourceAddressV4(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - remoteFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP(testbench.RemoteIPv4)) - defer dut.Close(remoteFD) - dut.SetSockOptTimeval(remoteFD, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &oneSecond) + remoteFD, remotePort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP(testbench.RemoteIPv4)) + defer dut.Close(t, remoteFD) + dut.SetSockOptTimeval(t, remoteFD, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &oneSecond) conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) - defer conn.Close() + defer conn.Close(t) for _, mcastAddr := range []net.IP{ net.IPv4allsys, @@ -50,11 +50,12 @@ func TestDiscardsUDPPacketsWithMcastSourceAddressV4(t *testing.T) { } { t.Run(fmt.Sprintf("srcaddr=%s", mcastAddr), func(t *testing.T) { conn.SendIP( + t, testbench.IPv4{SrcAddr: testbench.Address(tcpip.Address(mcastAddr.To4()))}, testbench.UDP{}, ) - ret, payload, errno := dut.RecvWithErrno(context.Background(), remoteFD, 100, 0) + ret, payload, errno := dut.RecvWithErrno(context.Background(), t, remoteFD, 100, 0) if errno != syscall.EAGAIN || errno != syscall.EWOULDBLOCK { t.Errorf("Recv got unexpected result, ret=%d, payload=%q, errno=%s", ret, payload, errno) } @@ -65,11 +66,11 @@ func TestDiscardsUDPPacketsWithMcastSourceAddressV4(t *testing.T) { func TestDiscardsUDPPacketsWithMcastSourceAddressV6(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - remoteFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP(testbench.RemoteIPv6)) - defer dut.Close(remoteFD) - dut.SetSockOptTimeval(remoteFD, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &oneSecond) + remoteFD, remotePort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP(testbench.RemoteIPv6)) + defer dut.Close(t, remoteFD) + dut.SetSockOptTimeval(t, remoteFD, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &oneSecond) conn := testbench.NewUDPIPv6(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) - defer conn.Close() + defer conn.Close(t) for _, mcastAddr := range []net.IP{ net.IPv6interfacelocalallnodes, @@ -80,10 +81,11 @@ func TestDiscardsUDPPacketsWithMcastSourceAddressV6(t *testing.T) { } { t.Run(fmt.Sprintf("srcaddr=%s", mcastAddr), func(t *testing.T) { conn.SendIPv6( + t, testbench.IPv6{SrcAddr: testbench.Address(tcpip.Address(mcastAddr.To16()))}, testbench.UDP{}, ) - ret, payload, errno := dut.RecvWithErrno(context.Background(), remoteFD, 100, 0) + ret, payload, errno := dut.RecvWithErrno(context.Background(), t, remoteFD, 100, 0) if errno != syscall.EAGAIN || errno != syscall.EWOULDBLOCK { t.Errorf("Recv got unexpected result, ret=%d, payload=%q, errno=%s", ret, payload, errno) } diff --git a/test/packetimpact/tests/udp_icmp_error_propagation_test.go b/test/packetimpact/tests/udp_icmp_error_propagation_test.go index b754918f6..715e8f5b5 100644 --- a/test/packetimpact/tests/udp_icmp_error_propagation_test.go +++ b/test/packetimpact/tests/udp_icmp_error_propagation_test.go @@ -72,7 +72,7 @@ func (e icmpError) ToICMPv4() *testbench.ICMPv4 { type errorDetection struct { name string useValidConn bool - f func(context.Context, testData) error + f func(context.Context, *testing.T, testData) } type testData struct { @@ -95,12 +95,14 @@ func wantErrno(c connectionMode, icmpErr icmpError) syscall.Errno { } // sendICMPError sends an ICMP error message in response to a UDP datagram. -func sendICMPError(conn *testbench.UDPIPv4, icmpErr icmpError, udp *testbench.UDP) error { - layers := (*testbench.Connection)(conn).CreateFrame(nil) +func sendICMPError(t *testing.T, conn *testbench.UDPIPv4, icmpErr icmpError, udp *testbench.UDP) { + t.Helper() + + layers := (*testbench.Connection)(conn).CreateFrame(t, nil) layers = layers[:len(layers)-1] ip, ok := udp.Prev().(*testbench.IPv4) if !ok { - return fmt.Errorf("expected %s to be IPv4", udp.Prev()) + t.Fatalf("expected %s to be IPv4", udp.Prev()) } if icmpErr == timeToLiveExceeded { *ip.TTL = 1 @@ -114,84 +116,82 @@ func sendICMPError(conn *testbench.UDPIPv4, icmpErr icmpError, udp *testbench.UD // resulting in a mal-formed packet. layers = append(layers, icmpErr.ToICMPv4(), ip, udp) - (*testbench.Connection)(conn).SendFrameStateless(layers) - return nil + (*testbench.Connection)(conn).SendFrameStateless(t, layers) } // testRecv tests observing the ICMP error through the recv syscall. A packet // is sent to the DUT, and if wantErrno is non-zero, then the first recv should // fail and the second should succeed. Otherwise if wantErrno is zero then the // first recv should succeed immediately. -func testRecv(ctx context.Context, d testData) error { +func testRecv(ctx context.Context, t *testing.T, d testData) { + t.Helper() + // Check that receiving on the clean socket works. - d.conn.Send(testbench.UDP{DstPort: &d.cleanPort}) - d.dut.Recv(d.cleanFD, 100, 0) + d.conn.Send(t, testbench.UDP{DstPort: &d.cleanPort}) + d.dut.Recv(t, d.cleanFD, 100, 0) - d.conn.Send(testbench.UDP{}) + d.conn.Send(t, testbench.UDP{}) if d.wantErrno != syscall.Errno(0) { ctx, cancel := context.WithTimeout(ctx, time.Second) defer cancel() - ret, _, err := d.dut.RecvWithErrno(ctx, d.remoteFD, 100, 0) + ret, _, err := d.dut.RecvWithErrno(ctx, t, d.remoteFD, 100, 0) if ret != -1 { - return fmt.Errorf("recv after ICMP error succeeded unexpectedly, expected (%[1]d) %[1]v", d.wantErrno) + t.Fatalf("recv after ICMP error succeeded unexpectedly, expected (%[1]d) %[1]v", d.wantErrno) } if err != d.wantErrno { - return fmt.Errorf("recv after ICMP error resulted in error (%[1]d) %[1]v, expected (%[2]d) %[2]v", err, d.wantErrno) + t.Fatalf("recv after ICMP error resulted in error (%[1]d) %[1]v, expected (%[2]d) %[2]v", err, d.wantErrno) } } - d.dut.Recv(d.remoteFD, 100, 0) - return nil + d.dut.Recv(t, d.remoteFD, 100, 0) } // testSendTo tests observing the ICMP error through the send syscall. If // wantErrno is non-zero, the first send should fail and a subsequent send // should suceed; while if wantErrno is zero then the first send should just // succeed. -func testSendTo(ctx context.Context, d testData) error { +func testSendTo(ctx context.Context, t *testing.T, d testData) { // Check that sending on the clean socket works. - d.dut.SendTo(d.cleanFD, nil, 0, d.conn.LocalAddr()) - if _, err := d.conn.Expect(testbench.UDP{SrcPort: &d.cleanPort}, time.Second); err != nil { - return fmt.Errorf("did not receive UDP packet from clean socket on DUT: %s", err) + d.dut.SendTo(t, d.cleanFD, nil, 0, d.conn.LocalAddr(t)) + if _, err := d.conn.Expect(t, testbench.UDP{SrcPort: &d.cleanPort}, time.Second); err != nil { + t.Fatalf("did not receive UDP packet from clean socket on DUT: %s", err) } if d.wantErrno != syscall.Errno(0) { ctx, cancel := context.WithTimeout(ctx, time.Second) defer cancel() - ret, err := d.dut.SendToWithErrno(ctx, d.remoteFD, nil, 0, d.conn.LocalAddr()) + ret, err := d.dut.SendToWithErrno(ctx, t, d.remoteFD, nil, 0, d.conn.LocalAddr(t)) if ret != -1 { - return fmt.Errorf("sendto after ICMP error succeeded unexpectedly, expected (%[1]d) %[1]v", d.wantErrno) + t.Fatalf("sendto after ICMP error succeeded unexpectedly, expected (%[1]d) %[1]v", d.wantErrno) } if err != d.wantErrno { - return fmt.Errorf("sendto after ICMP error resulted in error (%[1]d) %[1]v, expected (%[2]d) %[2]v", err, d.wantErrno) + t.Fatalf("sendto after ICMP error resulted in error (%[1]d) %[1]v, expected (%[2]d) %[2]v", err, d.wantErrno) } } - d.dut.SendTo(d.remoteFD, nil, 0, d.conn.LocalAddr()) - if _, err := d.conn.Expect(testbench.UDP{}, time.Second); err != nil { - return fmt.Errorf("did not receive UDP packet as expected: %s", err) + d.dut.SendTo(t, d.remoteFD, nil, 0, d.conn.LocalAddr(t)) + if _, err := d.conn.Expect(t, testbench.UDP{}, time.Second); err != nil { + t.Fatalf("did not receive UDP packet as expected: %s", err) } - return nil } -func testSockOpt(_ context.Context, d testData) error { +func testSockOpt(_ context.Context, t *testing.T, d testData) { // Check that there's no pending error on the clean socket. - if errno := syscall.Errno(d.dut.GetSockOptInt(d.cleanFD, unix.SOL_SOCKET, unix.SO_ERROR)); errno != syscall.Errno(0) { - return fmt.Errorf("unexpected error (%[1]d) %[1]v on clean socket", errno) + if errno := syscall.Errno(d.dut.GetSockOptInt(t, d.cleanFD, unix.SOL_SOCKET, unix.SO_ERROR)); errno != syscall.Errno(0) { + t.Fatalf("unexpected error (%[1]d) %[1]v on clean socket", errno) } - if errno := syscall.Errno(d.dut.GetSockOptInt(d.remoteFD, unix.SOL_SOCKET, unix.SO_ERROR)); errno != d.wantErrno { - return fmt.Errorf("SO_ERROR sockopt after ICMP error is (%[1]d) %[1]v, expected (%[2]d) %[2]v", errno, d.wantErrno) + if errno := syscall.Errno(d.dut.GetSockOptInt(t, d.remoteFD, unix.SOL_SOCKET, unix.SO_ERROR)); errno != d.wantErrno { + t.Fatalf("SO_ERROR sockopt after ICMP error is (%[1]d) %[1]v, expected (%[2]d) %[2]v", errno, d.wantErrno) } // Check that after clearing socket error, sending doesn't fail. - d.dut.SendTo(d.remoteFD, nil, 0, d.conn.LocalAddr()) - if _, err := d.conn.Expect(testbench.UDP{}, time.Second); err != nil { - return fmt.Errorf("did not receive UDP packet as expected: %s", err) + d.dut.SendTo(t, d.remoteFD, nil, 0, d.conn.LocalAddr(t)) + if _, err := d.conn.Expect(t, testbench.UDP{}, time.Second); err != nil { + t.Fatalf("did not receive UDP packet as expected: %s", err) } - return nil } // TestUDPICMPErrorPropagation tests that ICMP error messages in response to @@ -227,31 +227,29 @@ func TestUDPICMPErrorPropagation(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - remoteFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) - defer dut.Close(remoteFD) + remoteFD, remotePort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) + defer dut.Close(t, remoteFD) // Create a second, clean socket on the DUT to ensure that the ICMP // error messages only affect the sockets they are intended for. - cleanFD, cleanPort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) - defer dut.Close(cleanFD) + cleanFD, cleanPort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) + defer dut.Close(t, cleanFD) conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) - defer conn.Close() + defer conn.Close(t) if connect { - dut.Connect(remoteFD, conn.LocalAddr()) - dut.Connect(cleanFD, conn.LocalAddr()) + dut.Connect(t, remoteFD, conn.LocalAddr(t)) + dut.Connect(t, cleanFD, conn.LocalAddr(t)) } - dut.SendTo(remoteFD, nil, 0, conn.LocalAddr()) - udp, err := conn.Expect(testbench.UDP{}, time.Second) + dut.SendTo(t, remoteFD, nil, 0, conn.LocalAddr(t)) + udp, err := conn.Expect(t, testbench.UDP{}, time.Second) if err != nil { t.Fatalf("did not receive message from DUT: %s", err) } - if err := sendICMPError(&conn, icmpErr, udp); err != nil { - t.Fatal(err) - } + sendICMPError(t, &conn, icmpErr, udp) errDetectConn := &conn if errDetect.useValidConn { @@ -260,14 +258,12 @@ func TestUDPICMPErrorPropagation(t *testing.T) { // interactions between it and the the DUT should be independent of // the ICMP error at least at the port level. connClean := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) - defer connClean.Close() + defer connClean.Close(t) errDetectConn = &connClean } - if err := errDetect.f(context.Background(), testData{&dut, errDetectConn, remoteFD, remotePort, cleanFD, cleanPort, wantErrno}); err != nil { - t.Fatal(err) - } + errDetect.f(context.Background(), t, testData{&dut, errDetectConn, remoteFD, remotePort, cleanFD, cleanPort, wantErrno}) }) } } @@ -285,24 +281,24 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - remoteFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) - defer dut.Close(remoteFD) + remoteFD, remotePort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) + defer dut.Close(t, remoteFD) // Create a second, clean socket on the DUT to ensure that the ICMP // error messages only affect the sockets they are intended for. - cleanFD, cleanPort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) - defer dut.Close(cleanFD) + cleanFD, cleanPort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) + defer dut.Close(t, cleanFD) conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) - defer conn.Close() + defer conn.Close(t) if connect { - dut.Connect(remoteFD, conn.LocalAddr()) - dut.Connect(cleanFD, conn.LocalAddr()) + dut.Connect(t, remoteFD, conn.LocalAddr(t)) + dut.Connect(t, cleanFD, conn.LocalAddr(t)) } - dut.SendTo(remoteFD, nil, 0, conn.LocalAddr()) - udp, err := conn.Expect(testbench.UDP{}, time.Second) + dut.SendTo(t, remoteFD, nil, 0, conn.LocalAddr(t)) + udp, err := conn.Expect(t, testbench.UDP{}, time.Second) if err != nil { t.Fatalf("did not receive message from DUT: %s", err) } @@ -316,7 +312,7 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() - ret, _, err := dut.RecvWithErrno(ctx, remoteFD, 100, 0) + ret, _, err := dut.RecvWithErrno(ctx, t, remoteFD, 100, 0) if ret != -1 { t.Errorf("recv during ICMP error succeeded unexpectedly, expected (%[1]d) %[1]v", wantErrno) return @@ -330,7 +326,7 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() - if ret, _, err := dut.RecvWithErrno(ctx, remoteFD, 100, 0); ret == -1 { + if ret, _, err := dut.RecvWithErrno(ctx, t, remoteFD, 100, 0); ret == -1 { t.Errorf("recv after ICMP error failed with (%[1]d) %[1]", err) } }() @@ -341,7 +337,7 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() - if ret, _, err := dut.RecvWithErrno(ctx, cleanFD, 100, 0); ret == -1 { + if ret, _, err := dut.RecvWithErrno(ctx, t, cleanFD, 100, 0); ret == -1 { t.Errorf("recv on clean socket failed with (%[1]d) %[1]", err) } }() @@ -352,12 +348,10 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) { // alternative is available. time.Sleep(2 * time.Second) - if err := sendICMPError(&conn, icmpErr, udp); err != nil { - t.Fatal(err) - } + sendICMPError(t, &conn, icmpErr, udp) - conn.Send(testbench.UDP{DstPort: &cleanPort}) - conn.Send(testbench.UDP{}) + conn.Send(t, testbench.UDP{DstPort: &cleanPort}) + conn.Send(t, testbench.UDP{}) wg.Wait() }) } diff --git a/test/packetimpact/tests/udp_recv_mcast_bcast_test.go b/test/packetimpact/tests/udp_recv_mcast_bcast_test.go index 263a54291..fcd202643 100644 --- a/test/packetimpact/tests/udp_recv_mcast_bcast_test.go +++ b/test/packetimpact/tests/udp_recv_mcast_bcast_test.go @@ -31,10 +31,10 @@ func init() { func TestUDPRecvMulticastBroadcast(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - boundFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.IPv4(0, 0, 0, 0)) - defer dut.Close(boundFD) + boundFD, remotePort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.IPv4(0, 0, 0, 0)) + defer dut.Close(t, boundFD) conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) - defer conn.Close() + defer conn.Close(t) for _, bcastAddr := range []net.IP{ broadcastAddr(net.ParseIP(testbench.RemoteIPv4), net.CIDRMask(testbench.IPv4PrefixLength, 32)), @@ -43,12 +43,13 @@ func TestUDPRecvMulticastBroadcast(t *testing.T) { } { payload := testbench.GenerateRandomPayload(t, 1<<10) conn.SendIP( + t, testbench.IPv4{DstAddr: testbench.Address(tcpip.Address(bcastAddr.To4()))}, testbench.UDP{}, &testbench.Payload{Bytes: payload}, ) t.Logf("Receiving packet sent to address: %s", bcastAddr) - if got, want := string(dut.Recv(boundFD, int32(len(payload)), 0)), string(payload); got != want { + if got, want := string(dut.Recv(t, boundFD, int32(len(payload)), 0)), string(payload); got != want { t.Errorf("received payload does not match sent payload got: %s, want: %s", got, want) } } diff --git a/test/packetimpact/tests/udp_send_recv_dgram_test.go b/test/packetimpact/tests/udp_send_recv_dgram_test.go index bd53ad90b..dc20275d6 100644 --- a/test/packetimpact/tests/udp_send_recv_dgram_test.go +++ b/test/packetimpact/tests/udp_send_recv_dgram_test.go @@ -29,10 +29,10 @@ func init() { } type udpConn interface { - Send(testbench.UDP, ...testbench.Layer) - ExpectData(testbench.UDP, testbench.Payload, time.Duration) (testbench.Layers, error) - Drain() - Close() + Send(*testing.T, testbench.UDP, ...testbench.Layer) + ExpectData(*testing.T, testbench.UDP, testbench.Payload, time.Duration) (testbench.Layers, error) + Drain(*testing.T) + Close(*testing.T) } func TestUDP(t *testing.T) { @@ -51,21 +51,21 @@ func TestUDP(t *testing.T) { } else { addr = testbench.RemoteIPv6 } - boundFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP(addr)) - defer dut.Close(boundFD) + boundFD, remotePort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP(addr)) + defer dut.Close(t, boundFD) var conn udpConn var localAddr unix.Sockaddr if isIPv4 { v4Conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) - localAddr = v4Conn.LocalAddr() + localAddr = v4Conn.LocalAddr(t) conn = &v4Conn } else { v6Conn := testbench.NewUDPIPv6(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) - localAddr = v6Conn.LocalAddr() + localAddr = v6Conn.LocalAddr(t) conn = &v6Conn } - defer conn.Close() + defer conn.Close(t) testCases := []struct { name string @@ -81,17 +81,17 @@ func TestUDP(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { t.Run("Send", func(t *testing.T) { - conn.Send(testbench.UDP{}, &testbench.Payload{Bytes: tc.payload}) - if got, want := string(dut.Recv(boundFD, int32(len(tc.payload)), 0)), string(tc.payload); got != want { + conn.Send(t, testbench.UDP{}, &testbench.Payload{Bytes: tc.payload}) + if got, want := string(dut.Recv(t, boundFD, int32(len(tc.payload)), 0)), string(tc.payload); got != want { t.Fatalf("received payload does not match sent payload got: %s, want: %s", got, want) } }) t.Run("Recv", func(t *testing.T) { - conn.Drain() - if got, want := int(dut.SendTo(boundFD, tc.payload, 0, localAddr)), len(tc.payload); got != want { + conn.Drain(t) + if got, want := int(dut.SendTo(t, boundFD, tc.payload, 0, localAddr)), len(tc.payload); got != want { t.Fatalf("short write got: %d, want: %d", got, want) } - if _, err := conn.ExpectData(testbench.UDP{SrcPort: &remotePort}, testbench.Payload{Bytes: tc.payload}, time.Second); err != nil { + if _, err := conn.ExpectData(t, testbench.UDP{SrcPort: &remotePort}, testbench.Payload{Bytes: tc.payload}, time.Second); err != nil { t.Fatal(err) } }) -- cgit v1.2.3 From 29e5609b228363bcc435a8828f9b6ee19018a525 Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Mon, 27 Jul 2020 10:00:17 -0700 Subject: Port redis benchmark PiperOrigin-RevId: 323381964 --- images/benchmarks/redis/Dockerfile | 1 + test/benchmarks/database/BUILD | 28 +++++ test/benchmarks/database/database.go | 31 ++++++ test/benchmarks/database/redis_test.go | 197 +++++++++++++++++++++++++++++++++ test/benchmarks/harness/util.go | 2 +- 5 files changed, 258 insertions(+), 1 deletion(-) create mode 100644 images/benchmarks/redis/Dockerfile create mode 100644 test/benchmarks/database/BUILD create mode 100644 test/benchmarks/database/database.go create mode 100644 test/benchmarks/database/redis_test.go (limited to 'test') diff --git a/images/benchmarks/redis/Dockerfile b/images/benchmarks/redis/Dockerfile new file mode 100644 index 000000000..0f17249af --- /dev/null +++ b/images/benchmarks/redis/Dockerfile @@ -0,0 +1 @@ +FROM redis:5.0.4 diff --git a/test/benchmarks/database/BUILD b/test/benchmarks/database/BUILD new file mode 100644 index 000000000..5e33465cd --- /dev/null +++ b/test/benchmarks/database/BUILD @@ -0,0 +1,28 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "database", + testonly = 1, + srcs = ["database.go"], + deps = ["//test/benchmarks/harness"], +) + +go_test( + name = "database_test", + size = "enormous", + srcs = [ + "redis_test.go", + ], + library = ":database", + tags = [ + # Requires docker and runsc to be configured before test runs. + "manual", + "local", + ], + deps = [ + "//pkg/test/dockerutil", + "//test/benchmarks/harness", + ], +) diff --git a/test/benchmarks/database/database.go b/test/benchmarks/database/database.go new file mode 100644 index 000000000..9eeb59f9a --- /dev/null +++ b/test/benchmarks/database/database.go @@ -0,0 +1,31 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package database holds benchmarks around database applications. +package database + +import ( + "os" + "testing" + + "gvisor.dev/gvisor/test/benchmarks/harness" +) + +var h harness.Harness + +// TestMain is the main method for package database. +func TestMain(m *testing.M) { + h.Init() + os.Exit(m.Run()) +} diff --git a/test/benchmarks/database/redis_test.go b/test/benchmarks/database/redis_test.go new file mode 100644 index 000000000..6d39f4d66 --- /dev/null +++ b/test/benchmarks/database/redis_test.go @@ -0,0 +1,197 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package database + +import ( + "context" + "fmt" + "regexp" + "strconv" + "strings" + "testing" + "time" + + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/test/benchmarks/harness" +) + +// All possible operations from redis. Note: "ping" will +// run both PING_INLINE and PING_BUILD. +var operations []string = []string{ + "PING_INLINE", + "PING_BULK", + "SET", + "GET", + "INCR", + "LPUSH", + "RPUSH", + "LPOP", + "RPOP", + "SADD", + "HSET", + "SPOP", + "LRANGE_100", + "LRANGE_300", + "LRANGE_500", + "LRANGE_600", + "MSET", +} + +// BenchmarkRedis runs redis-benchmark against a redis instance and reports +// data in queries per second. Each is reported by named operation (e.g. LPUSH). +func BenchmarkRedis(b *testing.B) { + clientMachine, err := h.GetMachine() + if err != nil { + b.Fatalf("failed to get machine: %v", err) + } + defer clientMachine.CleanUp() + + serverMachine, err := h.GetMachine() + if err != nil { + b.Fatalf("failed to get machine: %v", err) + } + defer serverMachine.CleanUp() + + // Redis runs on port 6379 by default. + port := 6379 + ctx := context.Background() + + for _, operation := range operations { + b.Run(operation, func(b *testing.B) { + server := serverMachine.GetContainer(ctx, b) + defer server.CleanUp(ctx) + + // The redis docker container takes no arguments to run a redis server. + if err := server.Spawn(ctx, dockerutil.RunOpts{ + Image: "benchmarks/redis", + Ports: []int{port}, + }); err != nil { + b.Fatalf("failed to start redis server with: %v", err) + } + + if out, err := server.WaitForOutput(ctx, "Ready to accept connections", 3*time.Second); err != nil { + b.Fatalf("failed to start redis server: %v %s", err, out) + } + + ip, err := serverMachine.IPAddress() + if err != nil { + b.Fatal("failed to get IP from server: %v", err) + } + + serverPort, err := server.FindPort(ctx, port) + if err != nil { + b.Fatal("failed to get IP from server: %v", err) + } + + if err = harness.WaitUntilServing(ctx, clientMachine, ip, serverPort); err != nil { + b.Fatalf("failed to start redis with: %v", err) + } + + // runs redis benchmark -t operation for 100K requests against server. + cmd := strings.Split( + fmt.Sprintf("redis-benchmark --csv -t %s -h %s -p %d", operation, ip, serverPort), " ") + + // There is no -t PING_BULK for redis-benchmark, so adjust the command in that case. + // Note that "ping" will run both PING_INLINE and PING_BULK. + if operation == "PING_BULK" { + cmd = strings.Split( + fmt.Sprintf("redis-benchmark --csv -t ping -h %s -p %d", ip, serverPort), " ") + } + // Reset profiles and timer to begin the measurement. + server.RestartProfiles() + b.ResetTimer() + for i := 0; i < b.N; i++ { + client := clientMachine.GetNativeContainer(ctx, b) + defer client.CleanUp(ctx) + out, err := client.Run(ctx, dockerutil.RunOpts{ + Image: "benchmarks/redis", + }, cmd...) + if err != nil { + b.Fatalf("redis-benchmark failed with: %v", err) + } + + // Stop time while we parse results. + b.StopTimer() + result, err := parseOperation(operation, out) + if err != nil { + b.Fatalf("parsing result %s failed with err: %v", out, err) + } + b.ReportMetric(result, operation) // operations per second + b.StartTimer() + } + }) + } +} + +// parseOperation grabs the metric operations per second from redis-benchmark output. +func parseOperation(operation, data string) (float64, error) { + re := regexp.MustCompile(fmt.Sprintf(`"%s( .*)?","(\d*\.\d*)"`, operation)) + match := re.FindStringSubmatch(data) + // If no match, simply don't add it to the result map. + if len(match) < 3 { + return 0.0, fmt.Errorf("could not find %s in %s", operation, data) + } + return strconv.ParseFloat(match[2], 64) +} + +// TestParser tests the parser on sample data. +func TestParser(t *testing.T) { + sampleData := ` + "PING_INLINE","48661.80" + "PING_BULK","50301.81" + "SET","48923.68" + "GET","49382.71" + "INCR","49975.02" + "LPUSH","49875.31" + "RPUSH","50276.52" + "LPOP","50327.12" + "RPOP","50556.12" + "SADD","49504.95" + "HSET","49504.95" + "SPOP","50025.02" + "LPUSH (needed to benchmark LRANGE)","48875.86" + "LRANGE_100 (first 100 elements)","33955.86" + "LRANGE_300 (first 300 elements)","16550.81" + "LRANGE_500 (first 450 elements)","13653.74" + "LRANGE_600 (first 600 elements)","11219.57" + "MSET (10 keys)","44682.75" + ` + wants := map[string]float64{ + "PING_INLINE": 48661.80, + "PING_BULK": 50301.81, + "SET": 48923.68, + "GET": 49382.71, + "INCR": 49975.02, + "LPUSH": 49875.31, + "RPUSH": 50276.52, + "LPOP": 50327.12, + "RPOP": 50556.12, + "SADD": 49504.95, + "HSET": 49504.95, + "SPOP": 50025.02, + "LRANGE_100": 33955.86, + "LRANGE_300": 16550.81, + "LRANGE_500": 13653.74, + "LRANGE_600": 11219.57, + "MSET": 44682.75, + } + for op, want := range wants { + if got, err := parseOperation(op, sampleData); err != nil { + t.Fatalf("failed to parse %s: %v", op, err) + } else if want != got { + t.Fatalf("wanted %f for op %s, got %f", want, op, got) + } + } +} diff --git a/test/benchmarks/harness/util.go b/test/benchmarks/harness/util.go index 7f8e42201..eda2eeffb 100644 --- a/test/benchmarks/harness/util.go +++ b/test/benchmarks/harness/util.go @@ -30,7 +30,7 @@ func WaitUntilServing(ctx context.Context, machine Machine, server net.IP, port netcat := machine.GetNativeContainer(ctx, logger) defer netcat.CleanUp(ctx) - cmd := fmt.Sprintf("while ! nc -zv %s %d; do true; done", server.String(), port) + cmd := fmt.Sprintf("while ! nc -zv %s %d; do true; done", server, port) _, err := netcat.Run(ctx, dockerutil.RunOpts{ Image: "packetdrill", }, "sh", "-c", cmd) -- cgit v1.2.3 From 77552f1c770da7413c3c8212443328c9081901c0 Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Mon, 27 Jul 2020 10:06:07 -0700 Subject: Port ffmpeg benchmark PiperOrigin-RevId: 323383320 --- images/benchmarks/ffmpeg/Dockerfile | 9 +++++++ test/benchmarks/harness/util.go | 8 ++++++ test/benchmarks/media/BUILD | 21 +++++++++++++++ test/benchmarks/media/ffmpeg_test.go | 52 ++++++++++++++++++++++++++++++++++++ test/benchmarks/media/media.go | 31 +++++++++++++++++++++ 5 files changed, 121 insertions(+) create mode 100644 images/benchmarks/ffmpeg/Dockerfile create mode 100644 test/benchmarks/media/BUILD create mode 100644 test/benchmarks/media/ffmpeg_test.go create mode 100644 test/benchmarks/media/media.go (limited to 'test') diff --git a/images/benchmarks/ffmpeg/Dockerfile b/images/benchmarks/ffmpeg/Dockerfile new file mode 100644 index 000000000..7108df64f --- /dev/null +++ b/images/benchmarks/ffmpeg/Dockerfile @@ -0,0 +1,9 @@ +FROM ubuntu:18.04 + +RUN set -x \ + && apt-get update \ + && apt-get install -y \ + ffmpeg \ + && rm -rf /var/lib/apt/lists/* +WORKDIR /media +ADD https://samples.ffmpeg.org/MPEG-4/video.mp4 video.mp4 diff --git a/test/benchmarks/harness/util.go b/test/benchmarks/harness/util.go index eda2eeffb..bc551c582 100644 --- a/test/benchmarks/harness/util.go +++ b/test/benchmarks/harness/util.go @@ -36,3 +36,11 @@ func WaitUntilServing(ctx context.Context, machine Machine, server net.IP, port }, "sh", "-c", cmd) return err } + +// DropCaches drops caches on the provided machine. Requires root. +func DropCaches(machine Machine) error { + if out, err := machine.RunCommand("/bin/sh", "-c", "sync | sysctl vm.drop_caches=3"); err != nil { + return fmt.Errorf("failed to drop caches: %v logs: %s", err, out) + } + return nil +} diff --git a/test/benchmarks/media/BUILD b/test/benchmarks/media/BUILD new file mode 100644 index 000000000..6c41fc4f6 --- /dev/null +++ b/test/benchmarks/media/BUILD @@ -0,0 +1,21 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "media", + testonly = 1, + srcs = ["media.go"], + deps = ["//test/benchmarks/harness"], +) + +go_test( + name = "media_test", + size = "large", + srcs = ["ffmpeg_test.go"], + library = ":media", + deps = [ + "//pkg/test/dockerutil", + "//test/benchmarks/harness", + ], +) diff --git a/test/benchmarks/media/ffmpeg_test.go b/test/benchmarks/media/ffmpeg_test.go new file mode 100644 index 000000000..bfcfbab80 --- /dev/null +++ b/test/benchmarks/media/ffmpeg_test.go @@ -0,0 +1,52 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package media + +import ( + "context" + "strings" + "testing" + + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/test/benchmarks/harness" +) + +// BenchmarkFfmpeg runs ffmpeg in a container and records runtime. +// BenchmarkFfmpeg should run as root to drop caches. +func BenchmarkFfmpeg(b *testing.B) { + machine, err := h.GetMachine() + if err != nil { + b.Fatalf("failed to get machine: %v", err) + } + defer machine.CleanUp() + + ctx := context.Background() + container := machine.GetContainer(ctx, b) + cmd := strings.Split("ffmpeg -i video.mp4 -c:v libx264 -preset veryslow output.mp4", " ") + + b.ResetTimer() + for i := 0; i < b.N; i++ { + b.StopTimer() + if err := harness.DropCaches(machine); err != nil { + b.Skipf("failed to drop caches: %v. You probably need root.", err) + } + b.StartTimer() + + if _, err := container.Run(ctx, dockerutil.RunOpts{ + Image: "benchmarks/ffmpeg", + }, cmd...); err != nil { + b.Fatalf("failed to run container: %v", err) + } + } +} diff --git a/test/benchmarks/media/media.go b/test/benchmarks/media/media.go new file mode 100644 index 000000000..c7b35b758 --- /dev/null +++ b/test/benchmarks/media/media.go @@ -0,0 +1,31 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package media holds benchmarks around media processing applications. +package media + +import ( + "os" + "testing" + + "gvisor.dev/gvisor/test/benchmarks/harness" +) + +var h harness.Harness + +// TestMain is the main method for package media. +func TestMain(m *testing.M) { + h.Init() + os.Exit(m.Run()) +} -- cgit v1.2.3 From c8fa685cb6b562e4d8fc05c7d01968e4e12234aa Mon Sep 17 00:00:00 2001 From: Rahat Mahmood Date: Mon, 27 Jul 2020 13:19:25 -0700 Subject: Fix when FUSE tests need to be skipped due to sentry configuration. PiperOrigin-RevId: 323426851 --- test/syscalls/linux/mount.cc | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/mount.cc b/test/syscalls/linux/mount.cc index 7664fa73d..97e8d0f7e 100644 --- a/test/syscalls/linux/mount.cc +++ b/test/syscalls/linux/mount.cc @@ -323,10 +323,7 @@ TEST(MountTest, RenameRemoveMountPoint) { TEST(MountTest, MountFuseFilesystemNoDevice) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); - - // Note(gvisor.dev/issue/3076) This won't work in the sentry until the new - // device registration is complete. - SKIP_IF(IsRunningWithVFS1() || IsRunningOnGvisor()); + SKIP_IF(IsRunningOnGvisor() && !IsFUSEEnabled()); auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); EXPECT_THAT(mount("", dir.path().c_str(), "fuse", 0, ""), @@ -335,10 +332,7 @@ TEST(MountTest, MountFuseFilesystemNoDevice) { TEST(MountTest, MountFuseFilesystem) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); - - // Note(gvisor.dev/issue/3076) This won't work in the sentry until the new - // device registration is complete. - SKIP_IF(IsRunningWithVFS1() || IsRunningOnGvisor()); + SKIP_IF(IsRunningOnGvisor() && !IsFUSEEnabled()); const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/fuse", O_WRONLY)); -- cgit v1.2.3 From 487b0905a4d21d8ca97013cab3f3c245d9241edc Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Tue, 28 Jul 2020 05:28:35 -0700 Subject: More test cases on receiving UDP mcast/bcast PiperOrigin-RevId: 323553832 --- .../tests/udp_recv_mcast_bcast_test.go | 72 +++++++++++++++++----- 1 file changed, 58 insertions(+), 14 deletions(-) (limited to 'test') diff --git a/test/packetimpact/tests/udp_recv_mcast_bcast_test.go b/test/packetimpact/tests/udp_recv_mcast_bcast_test.go index fcd202643..df972b33d 100644 --- a/test/packetimpact/tests/udp_recv_mcast_bcast_test.go +++ b/test/packetimpact/tests/udp_recv_mcast_bcast_test.go @@ -15,8 +15,11 @@ package udp_recv_mcast_bcast_test import ( + "context" "flag" + "fmt" "net" + "syscall" "testing" "golang.org/x/sys/unix" @@ -28,30 +31,71 @@ func init() { testbench.RegisterFlags(flag.CommandLine) } -func TestUDPRecvMulticastBroadcast(t *testing.T) { +func TestUDPRecvMcastBcast(t *testing.T) { + subnetBcastAddr := broadcastAddr(net.ParseIP(testbench.RemoteIPv4), net.CIDRMask(testbench.IPv4PrefixLength, 32)) + + for _, v := range []struct { + bound, to net.IP + }{ + {bound: net.IPv4(0, 0, 0, 0), to: subnetBcastAddr}, + {bound: net.IPv4(0, 0, 0, 0), to: net.IPv4bcast}, + {bound: net.IPv4(0, 0, 0, 0), to: net.IPv4allsys}, + + {bound: subnetBcastAddr, to: subnetBcastAddr}, + {bound: subnetBcastAddr, to: net.IPv4bcast}, + + {bound: net.IPv4bcast, to: net.IPv4bcast}, + {bound: net.IPv4allsys, to: net.IPv4allsys}, + } { + t.Run(fmt.Sprintf("bound=%s,to=%s", v.bound, v.to), func(t *testing.T) { + dut := testbench.NewDUT(t) + defer dut.TearDown() + boundFD, remotePort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, v.bound) + defer dut.Close(t, boundFD) + conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) + defer conn.Close(t) + + payload := testbench.GenerateRandomPayload(t, 1<<10) + conn.SendIP( + t, + testbench.IPv4{DstAddr: testbench.Address(tcpip.Address(v.to.To4()))}, + testbench.UDP{}, + &testbench.Payload{Bytes: payload}, + ) + if got, want := string(dut.Recv(t, boundFD, int32(len(payload)), 0)), string(payload); got != want { + t.Errorf("received payload does not match sent payload got: %s, want: %s", got, want) + } + }) + } +} + +func TestUDPDoesntRecvMcastBcastOnUnicastAddr(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - boundFD, remotePort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.IPv4(0, 0, 0, 0)) + boundFD, remotePort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP(testbench.RemoteIPv4)) + dut.SetSockOptTimeval(t, boundFD, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &unix.Timeval{Sec: 1, Usec: 0}) defer dut.Close(t, boundFD) conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) defer conn.Close(t) - for _, bcastAddr := range []net.IP{ + for _, to := range []net.IP{ broadcastAddr(net.ParseIP(testbench.RemoteIPv4), net.CIDRMask(testbench.IPv4PrefixLength, 32)), net.IPv4(255, 255, 255, 255), net.IPv4(224, 0, 0, 1), } { - payload := testbench.GenerateRandomPayload(t, 1<<10) - conn.SendIP( - t, - testbench.IPv4{DstAddr: testbench.Address(tcpip.Address(bcastAddr.To4()))}, - testbench.UDP{}, - &testbench.Payload{Bytes: payload}, - ) - t.Logf("Receiving packet sent to address: %s", bcastAddr) - if got, want := string(dut.Recv(t, boundFD, int32(len(payload)), 0)), string(payload); got != want { - t.Errorf("received payload does not match sent payload got: %s, want: %s", got, want) - } + t.Run(fmt.Sprint("to=%s", to), func(t *testing.T) { + payload := testbench.GenerateRandomPayload(t, 1<<10) + conn.SendIP( + t, + testbench.IPv4{DstAddr: testbench.Address(tcpip.Address(to.To4()))}, + testbench.UDP{}, + &testbench.Payload{Bytes: payload}, + ) + ret, payload, errno := dut.RecvWithErrno(context.Background(), t, boundFD, 100, 0) + if errno != syscall.EAGAIN || errno != syscall.EWOULDBLOCK { + t.Errorf("Recv got unexpected result, ret=%d, payload=%q, errno=%s", ret, payload, errno) + } + }) } } -- cgit v1.2.3 From d9c9420335a78b54bc04ec0639d89539b4c3972c Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Sun, 14 Jun 2020 22:40:46 -0700 Subject: ip6tables testing We skip gVisor tests for now, as ip6tables aren't yet implemented. --- pkg/test/dockerutil/container.go | 9 ++- test/iptables/BUILD | 1 + test/iptables/filter_input.go | 145 +++++++++++++++++++-------------------- test/iptables/filter_output.go | 122 ++++++++++++++++---------------- test/iptables/iptables.go | 4 +- test/iptables/iptables_test.go | 29 ++++++-- test/iptables/iptables_util.go | 64 +++++++++++------ test/iptables/nat.go | 121 +++++++++++++++++--------------- test/iptables/runner/main.go | 9 ++- 9 files changed, 281 insertions(+), 223 deletions(-) (limited to 'test') diff --git a/pkg/test/dockerutil/container.go b/pkg/test/dockerutil/container.go index b59503188..441173ec2 100644 --- a/pkg/test/dockerutil/container.go +++ b/pkg/test/dockerutil/container.go @@ -360,13 +360,18 @@ func (c *Container) SandboxPid(ctx context.Context) (int, error) { } // FindIP returns the IP address of the container. -func (c *Container) FindIP(ctx context.Context) (net.IP, error) { +func (c *Container) FindIP(ctx context.Context, ipv6 bool) (net.IP, error) { resp, err := c.client.ContainerInspect(ctx, c.id) if err != nil { return nil, err } - ip := net.ParseIP(resp.NetworkSettings.DefaultNetworkSettings.IPAddress) + var ip net.IP + if ipv6 { + ip = net.ParseIP(resp.NetworkSettings.DefaultNetworkSettings.GlobalIPv6Address) + } else { + ip = net.ParseIP(resp.NetworkSettings.DefaultNetworkSettings.IPAddress) + } if ip == nil { return net.IP{}, fmt.Errorf("invalid IP: %q", ip) } diff --git a/test/iptables/BUILD b/test/iptables/BUILD index 3e29ca90d..40b63ebbe 100644 --- a/test/iptables/BUILD +++ b/test/iptables/BUILD @@ -20,6 +20,7 @@ go_library( go_test( name = "iptables_test", + size = "large", srcs = [ "iptables_test.go", ], diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index af4355ba8..5737ee317 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -25,7 +25,6 @@ const ( dropPort = 2401 acceptPort = 2402 sendloopDuration = 2 * time.Second - network = "udp4" chainName = "foochain" ) @@ -62,8 +61,8 @@ func (FilterInputDropUDP) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputDropUDP) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "INPUT", "-p", "udp", "-j", "DROP"); err != nil { +func (FilterInputDropUDP) ContainerAction(ip net.IP, ipv6 bool) error { + if err := filterTable(ipv6, "-A", "INPUT", "-p", "udp", "-j", "DROP"); err != nil { return err } @@ -80,7 +79,7 @@ func (FilterInputDropUDP) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputDropUDP) LocalAction(ip net.IP) error { +func (FilterInputDropUDP) LocalAction(ip net.IP, ipv6 bool) error { return spawnUDPLoop(ip, dropPort, sendloopDuration) } @@ -93,8 +92,8 @@ func (FilterInputDropOnlyUDP) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputDropOnlyUDP) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "INPUT", "-p", "udp", "-j", "DROP"); err != nil { +func (FilterInputDropOnlyUDP) ContainerAction(ip net.IP, ipv6 bool) error { + if err := filterTable(ipv6, "-A", "INPUT", "-p", "udp", "-j", "DROP"); err != nil { return err } @@ -107,7 +106,7 @@ func (FilterInputDropOnlyUDP) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputDropOnlyUDP) LocalAction(ip net.IP) error { +func (FilterInputDropOnlyUDP) LocalAction(ip net.IP, ipv6 bool) error { // Try to establish a TCP connection with the container, which should // succeed. return connectTCP(ip, acceptPort, sendloopDuration) @@ -122,8 +121,8 @@ func (FilterInputDropUDPPort) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputDropUDPPort) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { +func (FilterInputDropUDPPort) ContainerAction(ip net.IP, ipv6 bool) error { + if err := filterTable(ipv6, "-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { return err } @@ -140,7 +139,7 @@ func (FilterInputDropUDPPort) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputDropUDPPort) LocalAction(ip net.IP) error { +func (FilterInputDropUDPPort) LocalAction(ip net.IP, ipv6 bool) error { return spawnUDPLoop(ip, dropPort, sendloopDuration) } @@ -154,8 +153,8 @@ func (FilterInputDropDifferentUDPPort) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputDropDifferentUDPPort) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { +func (FilterInputDropDifferentUDPPort) ContainerAction(ip net.IP, ipv6 bool) error { + if err := filterTable(ipv6, "-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { return err } @@ -168,7 +167,7 @@ func (FilterInputDropDifferentUDPPort) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputDropDifferentUDPPort) LocalAction(ip net.IP) error { +func (FilterInputDropDifferentUDPPort) LocalAction(ip net.IP, ipv6 bool) error { return spawnUDPLoop(ip, acceptPort, sendloopDuration) } @@ -181,8 +180,8 @@ func (FilterInputDropTCPDestPort) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputDropTCPDestPort) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "INPUT", "-p", "tcp", "-m", "tcp", "--dport", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { +func (FilterInputDropTCPDestPort) ContainerAction(ip net.IP, ipv6 bool) error { + if err := filterTable(ipv6, "-A", "INPUT", "-p", "tcp", "-m", "tcp", "--dport", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { return err } @@ -195,7 +194,7 @@ func (FilterInputDropTCPDestPort) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputDropTCPDestPort) LocalAction(ip net.IP) error { +func (FilterInputDropTCPDestPort) LocalAction(ip net.IP, ipv6 bool) error { // Ensure we cannot connect to the container. for start := time.Now(); time.Since(start) < sendloopDuration; { if err := connectTCP(ip, dropPort, sendloopDuration-time.Since(start)); err == nil { @@ -215,9 +214,9 @@ func (FilterInputDropTCPSrcPort) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputDropTCPSrcPort) ContainerAction(ip net.IP) error { +func (FilterInputDropTCPSrcPort) ContainerAction(ip net.IP, ipv6 bool) error { // Drop anything from an ephemeral port. - if err := filterTable("-A", "INPUT", "-p", "tcp", "-m", "tcp", "--sport", "1024:65535", "-j", "DROP"); err != nil { + if err := filterTable(ipv6, "-A", "INPUT", "-p", "tcp", "-m", "tcp", "--sport", "1024:65535", "-j", "DROP"); err != nil { return err } @@ -230,7 +229,7 @@ func (FilterInputDropTCPSrcPort) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputDropTCPSrcPort) LocalAction(ip net.IP) error { +func (FilterInputDropTCPSrcPort) LocalAction(ip net.IP, ipv6 bool) error { // Ensure we cannot connect to the container. for start := time.Now(); time.Since(start) < sendloopDuration; { if err := connectTCP(ip, acceptPort, sendloopDuration-time.Since(start)); err == nil { @@ -250,8 +249,8 @@ func (FilterInputDropAll) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputDropAll) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "INPUT", "-j", "DROP"); err != nil { +func (FilterInputDropAll) ContainerAction(ip net.IP, ipv6 bool) error { + if err := filterTable(ipv6, "-A", "INPUT", "-j", "DROP"); err != nil { return err } @@ -268,7 +267,7 @@ func (FilterInputDropAll) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputDropAll) LocalAction(ip net.IP) error { +func (FilterInputDropAll) LocalAction(ip net.IP, ipv6 bool) error { return spawnUDPLoop(ip, dropPort, sendloopDuration) } @@ -284,17 +283,17 @@ func (FilterInputMultiUDPRules) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputMultiUDPRules) ContainerAction(ip net.IP) error { +func (FilterInputMultiUDPRules) ContainerAction(ip net.IP, ipv6 bool) error { rules := [][]string{ {"-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", dropPort), "-j", "DROP"}, {"-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", acceptPort), "-j", "ACCEPT"}, {"-L"}, } - return filterTableRules(rules) + return filterTableRules(ipv6, rules) } // LocalAction implements TestCase.LocalAction. -func (FilterInputMultiUDPRules) LocalAction(ip net.IP) error { +func (FilterInputMultiUDPRules) LocalAction(ip net.IP, ipv6 bool) error { // No-op. return nil } @@ -309,14 +308,14 @@ func (FilterInputRequireProtocolUDP) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputRequireProtocolUDP) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "INPUT", "-m", "udp", "--destination-port", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err == nil { +func (FilterInputRequireProtocolUDP) ContainerAction(ip net.IP, ipv6 bool) error { + if err := filterTable(ipv6, "-A", "INPUT", "-m", "udp", "--destination-port", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err == nil { return errors.New("expected iptables to fail with out \"-p udp\", but succeeded") } return nil } -func (FilterInputRequireProtocolUDP) LocalAction(ip net.IP) error { +func (FilterInputRequireProtocolUDP) LocalAction(ip net.IP, ipv6 bool) error { // No-op. return nil } @@ -330,18 +329,18 @@ func (FilterInputCreateUserChain) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputCreateUserChain) ContainerAction(ip net.IP) error { +func (FilterInputCreateUserChain) ContainerAction(ip net.IP, ipv6 bool) error { rules := [][]string{ // Create a chain. {"-N", chainName}, // Add a simple rule to the chain. {"-A", chainName, "-j", "DROP"}, } - return filterTableRules(rules) + return filterTableRules(ipv6, rules) } // LocalAction implements TestCase.LocalAction. -func (FilterInputCreateUserChain) LocalAction(ip net.IP) error { +func (FilterInputCreateUserChain) LocalAction(ip net.IP, ipv6 bool) error { // No-op. return nil } @@ -355,16 +354,16 @@ func (FilterInputDefaultPolicyAccept) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputDefaultPolicyAccept) ContainerAction(ip net.IP) error { +func (FilterInputDefaultPolicyAccept) ContainerAction(ip net.IP, ipv6 bool) error { // Set the default policy to accept, then receive a packet. - if err := filterTable("-P", "INPUT", "ACCEPT"); err != nil { + if err := filterTable(ipv6, "-P", "INPUT", "ACCEPT"); err != nil { return err } return listenUDP(acceptPort, sendloopDuration) } // LocalAction implements TestCase.LocalAction. -func (FilterInputDefaultPolicyAccept) LocalAction(ip net.IP) error { +func (FilterInputDefaultPolicyAccept) LocalAction(ip net.IP, ipv6 bool) error { return spawnUDPLoop(ip, acceptPort, sendloopDuration) } @@ -377,8 +376,8 @@ func (FilterInputDefaultPolicyDrop) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputDefaultPolicyDrop) ContainerAction(ip net.IP) error { - if err := filterTable("-P", "INPUT", "DROP"); err != nil { +func (FilterInputDefaultPolicyDrop) ContainerAction(ip net.IP, ipv6 bool) error { + if err := filterTable(ipv6, "-P", "INPUT", "DROP"); err != nil { return err } @@ -395,7 +394,7 @@ func (FilterInputDefaultPolicyDrop) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputDefaultPolicyDrop) LocalAction(ip net.IP) error { +func (FilterInputDefaultPolicyDrop) LocalAction(ip net.IP, ipv6 bool) error { return spawnUDPLoop(ip, acceptPort, sendloopDuration) } @@ -409,7 +408,7 @@ func (FilterInputReturnUnderflow) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputReturnUnderflow) ContainerAction(ip net.IP) error { +func (FilterInputReturnUnderflow) ContainerAction(ip net.IP, ipv6 bool) error { // Add a RETURN rule followed by an unconditional accept, and set the // default policy to DROP. rules := [][]string{ @@ -417,7 +416,7 @@ func (FilterInputReturnUnderflow) ContainerAction(ip net.IP) error { {"-A", "INPUT", "-j", "DROP"}, {"-P", "INPUT", "ACCEPT"}, } - if err := filterTableRules(rules); err != nil { + if err := filterTableRules(ipv6, rules); err != nil { return err } @@ -427,7 +426,7 @@ func (FilterInputReturnUnderflow) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputReturnUnderflow) LocalAction(ip net.IP) error { +func (FilterInputReturnUnderflow) LocalAction(ip net.IP, ipv6 bool) error { return spawnUDPLoop(ip, acceptPort, sendloopDuration) } @@ -440,18 +439,18 @@ func (FilterInputSerializeJump) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputSerializeJump) ContainerAction(ip net.IP) error { +func (FilterInputSerializeJump) ContainerAction(ip net.IP, ipv6 bool) error { // Write a JUMP rule, the serialize it with `-L`. rules := [][]string{ {"-N", chainName}, {"-A", "INPUT", "-j", chainName}, {"-L"}, } - return filterTableRules(rules) + return filterTableRules(ipv6, rules) } // LocalAction implements TestCase.LocalAction. -func (FilterInputSerializeJump) LocalAction(ip net.IP) error { +func (FilterInputSerializeJump) LocalAction(ip net.IP, ipv6 bool) error { // No-op. return nil } @@ -465,14 +464,14 @@ func (FilterInputJumpBasic) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputJumpBasic) ContainerAction(ip net.IP) error { +func (FilterInputJumpBasic) ContainerAction(ip net.IP, ipv6 bool) error { rules := [][]string{ {"-P", "INPUT", "DROP"}, {"-N", chainName}, {"-A", "INPUT", "-j", chainName}, {"-A", chainName, "-j", "ACCEPT"}, } - if err := filterTableRules(rules); err != nil { + if err := filterTableRules(ipv6, rules); err != nil { return err } @@ -481,7 +480,7 @@ func (FilterInputJumpBasic) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputJumpBasic) LocalAction(ip net.IP) error { +func (FilterInputJumpBasic) LocalAction(ip net.IP, ipv6 bool) error { return spawnUDPLoop(ip, acceptPort, sendloopDuration) } @@ -494,7 +493,7 @@ func (FilterInputJumpReturn) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputJumpReturn) ContainerAction(ip net.IP) error { +func (FilterInputJumpReturn) ContainerAction(ip net.IP, ipv6 bool) error { rules := [][]string{ {"-N", chainName}, {"-P", "INPUT", "ACCEPT"}, @@ -502,7 +501,7 @@ func (FilterInputJumpReturn) ContainerAction(ip net.IP) error { {"-A", chainName, "-j", "RETURN"}, {"-A", chainName, "-j", "DROP"}, } - if err := filterTableRules(rules); err != nil { + if err := filterTableRules(ipv6, rules); err != nil { return err } @@ -511,7 +510,7 @@ func (FilterInputJumpReturn) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputJumpReturn) LocalAction(ip net.IP) error { +func (FilterInputJumpReturn) LocalAction(ip net.IP, ipv6 bool) error { return spawnUDPLoop(ip, acceptPort, sendloopDuration) } @@ -524,14 +523,14 @@ func (FilterInputJumpReturnDrop) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputJumpReturnDrop) ContainerAction(ip net.IP) error { +func (FilterInputJumpReturnDrop) ContainerAction(ip net.IP, ipv6 bool) error { rules := [][]string{ {"-N", chainName}, {"-A", "INPUT", "-j", chainName}, {"-A", "INPUT", "-j", "DROP"}, {"-A", chainName, "-j", "RETURN"}, } - if err := filterTableRules(rules); err != nil { + if err := filterTableRules(ipv6, rules); err != nil { return err } @@ -548,7 +547,7 @@ func (FilterInputJumpReturnDrop) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputJumpReturnDrop) LocalAction(ip net.IP) error { +func (FilterInputJumpReturnDrop) LocalAction(ip net.IP, ipv6 bool) error { return spawnUDPLoop(ip, dropPort, sendloopDuration) } @@ -561,15 +560,15 @@ func (FilterInputJumpBuiltin) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputJumpBuiltin) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "INPUT", "-j", "OUTPUT"); err == nil { +func (FilterInputJumpBuiltin) ContainerAction(ip net.IP, ipv6 bool) error { + if err := filterTable(ipv6, "-A", "INPUT", "-j", "OUTPUT"); err == nil { return fmt.Errorf("iptables should be unable to jump to a built-in chain") } return nil } // LocalAction implements TestCase.LocalAction. -func (FilterInputJumpBuiltin) LocalAction(ip net.IP) error { +func (FilterInputJumpBuiltin) LocalAction(ip net.IP, ipv6 bool) error { // No-op. return nil } @@ -583,7 +582,7 @@ func (FilterInputJumpTwice) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputJumpTwice) ContainerAction(ip net.IP) error { +func (FilterInputJumpTwice) ContainerAction(ip net.IP, ipv6 bool) error { const chainName2 = chainName + "2" rules := [][]string{ {"-P", "INPUT", "DROP"}, @@ -593,7 +592,7 @@ func (FilterInputJumpTwice) ContainerAction(ip net.IP) error { {"-A", chainName, "-j", chainName2}, {"-A", "INPUT", "-j", "ACCEPT"}, } - if err := filterTableRules(rules); err != nil { + if err := filterTableRules(ipv6, rules); err != nil { return err } @@ -603,7 +602,7 @@ func (FilterInputJumpTwice) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputJumpTwice) LocalAction(ip net.IP) error { +func (FilterInputJumpTwice) LocalAction(ip net.IP, ipv6 bool) error { return spawnUDPLoop(ip, acceptPort, sendloopDuration) } @@ -617,8 +616,8 @@ func (FilterInputDestination) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputDestination) ContainerAction(ip net.IP) error { - addrs, err := localAddrs(false) +func (FilterInputDestination) ContainerAction(ip net.IP, ipv6 bool) error { + addrs, err := localAddrs(ipv6) if err != nil { return err } @@ -629,7 +628,7 @@ func (FilterInputDestination) ContainerAction(ip net.IP) error { for _, addr := range addrs { rules = append(rules, []string{"-A", "INPUT", "-d", addr, "-j", "ACCEPT"}) } - if err := filterTableRules(rules); err != nil { + if err := filterTableRules(ipv6, rules); err != nil { return err } @@ -637,7 +636,7 @@ func (FilterInputDestination) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputDestination) LocalAction(ip net.IP) error { +func (FilterInputDestination) LocalAction(ip net.IP, ipv6 bool) error { return spawnUDPLoop(ip, acceptPort, sendloopDuration) } @@ -651,14 +650,14 @@ func (FilterInputInvertDestination) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputInvertDestination) ContainerAction(ip net.IP) error { +func (FilterInputInvertDestination) ContainerAction(ip net.IP, ipv6 bool) error { // Make INPUT's default action DROP, then ACCEPT all packets not bound // for 127.0.0.1. rules := [][]string{ {"-P", "INPUT", "DROP"}, - {"-A", "INPUT", "!", "-d", localIP, "-j", "ACCEPT"}, + {"-A", "INPUT", "!", "-d", localIP(ipv6), "-j", "ACCEPT"}, } - if err := filterTableRules(rules); err != nil { + if err := filterTableRules(ipv6, rules); err != nil { return err } @@ -666,7 +665,7 @@ func (FilterInputInvertDestination) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputInvertDestination) LocalAction(ip net.IP) error { +func (FilterInputInvertDestination) LocalAction(ip net.IP, ipv6 bool) error { return spawnUDPLoop(ip, acceptPort, sendloopDuration) } @@ -680,14 +679,14 @@ func (FilterInputSource) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputSource) ContainerAction(ip net.IP) error { +func (FilterInputSource) ContainerAction(ip net.IP, ipv6 bool) error { // Make INPUT's default action DROP, then ACCEPT all packets from this // machine. rules := [][]string{ {"-P", "INPUT", "DROP"}, {"-A", "INPUT", "-s", fmt.Sprintf("%v", ip), "-j", "ACCEPT"}, } - if err := filterTableRules(rules); err != nil { + if err := filterTableRules(ipv6, rules); err != nil { return err } @@ -695,7 +694,7 @@ func (FilterInputSource) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputSource) LocalAction(ip net.IP) error { +func (FilterInputSource) LocalAction(ip net.IP, ipv6 bool) error { return spawnUDPLoop(ip, acceptPort, sendloopDuration) } @@ -709,14 +708,14 @@ func (FilterInputInvertSource) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputInvertSource) ContainerAction(ip net.IP) error { +func (FilterInputInvertSource) ContainerAction(ip net.IP, ipv6 bool) error { // Make INPUT's default action DROP, then ACCEPT all packets not bound // for 127.0.0.1. rules := [][]string{ {"-P", "INPUT", "DROP"}, - {"-A", "INPUT", "!", "-s", localIP, "-j", "ACCEPT"}, + {"-A", "INPUT", "!", "-s", localIP(ipv6), "-j", "ACCEPT"}, } - if err := filterTableRules(rules); err != nil { + if err := filterTableRules(ipv6, rules); err != nil { return err } @@ -724,6 +723,6 @@ func (FilterInputInvertSource) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputInvertSource) LocalAction(ip net.IP) error { +func (FilterInputInvertSource) LocalAction(ip net.IP, ipv6 bool) error { return spawnUDPLoop(ip, acceptPort, sendloopDuration) } diff --git a/test/iptables/filter_output.go b/test/iptables/filter_output.go index ba0d6fc29..c1d83b471 100644 --- a/test/iptables/filter_output.go +++ b/test/iptables/filter_output.go @@ -52,8 +52,8 @@ func (FilterOutputDropTCPDestPort) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputDropTCPDestPort) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "OUTPUT", "-p", "tcp", "-m", "tcp", "--dport", "1024:65535", "-j", "DROP"); err != nil { +func (FilterOutputDropTCPDestPort) ContainerAction(ip net.IP, ipv6 bool) error { + if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "-m", "tcp", "--dport", "1024:65535", "-j", "DROP"); err != nil { return err } @@ -66,7 +66,7 @@ func (FilterOutputDropTCPDestPort) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterOutputDropTCPDestPort) LocalAction(ip net.IP) error { +func (FilterOutputDropTCPDestPort) LocalAction(ip net.IP, ipv6 bool) error { if err := connectTCP(ip, acceptPort, sendloopDuration); err == nil { return fmt.Errorf("connection on port %d should not be accepted, but got accepted", dropPort) } @@ -84,8 +84,8 @@ func (FilterOutputDropTCPSrcPort) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputDropTCPSrcPort) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "OUTPUT", "-p", "tcp", "-m", "tcp", "--sport", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { +func (FilterOutputDropTCPSrcPort) ContainerAction(ip net.IP, ipv6 bool) error { + if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "-m", "tcp", "--sport", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { return err } @@ -98,7 +98,7 @@ func (FilterOutputDropTCPSrcPort) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterOutputDropTCPSrcPort) LocalAction(ip net.IP) error { +func (FilterOutputDropTCPSrcPort) LocalAction(ip net.IP, ipv6 bool) error { if err := connectTCP(ip, dropPort, sendloopDuration); err == nil { return fmt.Errorf("connection destined to port %d should not be accepted, but got accepted", dropPort) } @@ -115,8 +115,8 @@ func (FilterOutputAcceptTCPOwner) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputAcceptTCPOwner) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "OUTPUT", "-p", "tcp", "-m", "owner", "--uid-owner", "root", "-j", "ACCEPT"); err != nil { +func (FilterOutputAcceptTCPOwner) ContainerAction(ip net.IP, ipv6 bool) error { + if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "-m", "owner", "--uid-owner", "root", "-j", "ACCEPT"); err != nil { return err } @@ -125,7 +125,7 @@ func (FilterOutputAcceptTCPOwner) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterOutputAcceptTCPOwner) LocalAction(ip net.IP) error { +func (FilterOutputAcceptTCPOwner) LocalAction(ip net.IP, ipv6 bool) error { return connectTCP(ip, acceptPort, sendloopDuration) } @@ -138,8 +138,8 @@ func (FilterOutputDropTCPOwner) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputDropTCPOwner) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "OUTPUT", "-p", "tcp", "-m", "owner", "--uid-owner", "root", "-j", "DROP"); err != nil { +func (FilterOutputDropTCPOwner) ContainerAction(ip net.IP, ipv6 bool) error { + if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "-m", "owner", "--uid-owner", "root", "-j", "DROP"); err != nil { return err } @@ -152,7 +152,7 @@ func (FilterOutputDropTCPOwner) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterOutputDropTCPOwner) LocalAction(ip net.IP) error { +func (FilterOutputDropTCPOwner) LocalAction(ip net.IP, ipv6 bool) error { if err := connectTCP(ip, acceptPort, sendloopDuration); err == nil { return fmt.Errorf("connection destined to port %d should be dropped, but got accepted", acceptPort) } @@ -169,8 +169,8 @@ func (FilterOutputAcceptUDPOwner) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputAcceptUDPOwner) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "OUTPUT", "-p", "udp", "-m", "owner", "--uid-owner", "root", "-j", "ACCEPT"); err != nil { +func (FilterOutputAcceptUDPOwner) ContainerAction(ip net.IP, ipv6 bool) error { + if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "udp", "-m", "owner", "--uid-owner", "root", "-j", "ACCEPT"); err != nil { return err } @@ -179,7 +179,7 @@ func (FilterOutputAcceptUDPOwner) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterOutputAcceptUDPOwner) LocalAction(ip net.IP) error { +func (FilterOutputAcceptUDPOwner) LocalAction(ip net.IP, ipv6 bool) error { // Listen for UDP packets on acceptPort. return listenUDP(acceptPort, sendloopDuration) } @@ -193,8 +193,8 @@ func (FilterOutputDropUDPOwner) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputDropUDPOwner) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "OUTPUT", "-p", "udp", "-m", "owner", "--uid-owner", "root", "-j", "DROP"); err != nil { +func (FilterOutputDropUDPOwner) ContainerAction(ip net.IP, ipv6 bool) error { + if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "udp", "-m", "owner", "--uid-owner", "root", "-j", "DROP"); err != nil { return err } @@ -203,7 +203,7 @@ func (FilterOutputDropUDPOwner) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterOutputDropUDPOwner) LocalAction(ip net.IP) error { +func (FilterOutputDropUDPOwner) LocalAction(ip net.IP, ipv6 bool) error { // Listen for UDP packets on dropPort. if err := listenUDP(dropPort, sendloopDuration); err == nil { return fmt.Errorf("packets should not be received") @@ -222,8 +222,8 @@ func (FilterOutputOwnerFail) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputOwnerFail) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "OUTPUT", "-p", "udp", "-m", "owner", "-j", "ACCEPT"); err == nil { +func (FilterOutputOwnerFail) ContainerAction(ip net.IP, ipv6 bool) error { + if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "udp", "-m", "owner", "-j", "ACCEPT"); err == nil { return fmt.Errorf("Invalid argument") } @@ -231,7 +231,7 @@ func (FilterOutputOwnerFail) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterOutputOwnerFail) LocalAction(ip net.IP) error { +func (FilterOutputOwnerFail) LocalAction(ip net.IP, ipv6 bool) error { // no-op. return nil } @@ -245,8 +245,8 @@ func (FilterOutputAcceptGIDOwner) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputAcceptGIDOwner) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "OUTPUT", "-p", "tcp", "-m", "owner", "--gid-owner", "root", "-j", "ACCEPT"); err != nil { +func (FilterOutputAcceptGIDOwner) ContainerAction(ip net.IP, ipv6 bool) error { + if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "-m", "owner", "--gid-owner", "root", "-j", "ACCEPT"); err != nil { return err } @@ -255,7 +255,7 @@ func (FilterOutputAcceptGIDOwner) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterOutputAcceptGIDOwner) LocalAction(ip net.IP) error { +func (FilterOutputAcceptGIDOwner) LocalAction(ip net.IP, ipv6 bool) error { return connectTCP(ip, acceptPort, sendloopDuration) } @@ -268,8 +268,8 @@ func (FilterOutputDropGIDOwner) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputDropGIDOwner) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "OUTPUT", "-p", "tcp", "-m", "owner", "--gid-owner", "root", "-j", "DROP"); err != nil { +func (FilterOutputDropGIDOwner) ContainerAction(ip net.IP, ipv6 bool) error { + if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "-m", "owner", "--gid-owner", "root", "-j", "DROP"); err != nil { return err } @@ -282,7 +282,7 @@ func (FilterOutputDropGIDOwner) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterOutputDropGIDOwner) LocalAction(ip net.IP) error { +func (FilterOutputDropGIDOwner) LocalAction(ip net.IP, ipv6 bool) error { if err := connectTCP(ip, acceptPort, sendloopDuration); err == nil { return fmt.Errorf("connection destined to port %d should not be accepted, but got accepted", acceptPort) } @@ -299,12 +299,12 @@ func (FilterOutputInvertGIDOwner) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputInvertGIDOwner) ContainerAction(ip net.IP) error { +func (FilterOutputInvertGIDOwner) ContainerAction(ip net.IP, ipv6 bool) error { rules := [][]string{ {"-A", "OUTPUT", "-p", "tcp", "-m", "owner", "!", "--gid-owner", "root", "-j", "ACCEPT"}, {"-A", "OUTPUT", "-p", "tcp", "-j", "DROP"}, } - if err := filterTableRules(rules); err != nil { + if err := filterTableRules(ipv6, rules); err != nil { return err } @@ -317,7 +317,7 @@ func (FilterOutputInvertGIDOwner) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterOutputInvertGIDOwner) LocalAction(ip net.IP) error { +func (FilterOutputInvertGIDOwner) LocalAction(ip net.IP, ipv6 bool) error { if err := connectTCP(ip, acceptPort, sendloopDuration); err == nil { return fmt.Errorf("connection destined to port %d should not be accepted, but got accepted", acceptPort) } @@ -334,12 +334,12 @@ func (FilterOutputInvertUIDOwner) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputInvertUIDOwner) ContainerAction(ip net.IP) error { +func (FilterOutputInvertUIDOwner) ContainerAction(ip net.IP, ipv6 bool) error { rules := [][]string{ {"-A", "OUTPUT", "-p", "tcp", "-m", "owner", "!", "--uid-owner", "root", "-j", "DROP"}, {"-A", "OUTPUT", "-p", "tcp", "-j", "ACCEPT"}, } - if err := filterTableRules(rules); err != nil { + if err := filterTableRules(ipv6, rules); err != nil { return err } @@ -348,7 +348,7 @@ func (FilterOutputInvertUIDOwner) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterOutputInvertUIDOwner) LocalAction(ip net.IP) error { +func (FilterOutputInvertUIDOwner) LocalAction(ip net.IP, ipv6 bool) error { return connectTCP(ip, acceptPort, sendloopDuration) } @@ -362,12 +362,12 @@ func (FilterOutputInvertUIDAndGIDOwner) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputInvertUIDAndGIDOwner) ContainerAction(ip net.IP) error { +func (FilterOutputInvertUIDAndGIDOwner) ContainerAction(ip net.IP, ipv6 bool) error { rules := [][]string{ {"-A", "OUTPUT", "-p", "tcp", "-m", "owner", "!", "--uid-owner", "root", "!", "--gid-owner", "root", "-j", "ACCEPT"}, {"-A", "OUTPUT", "-p", "tcp", "-j", "DROP"}, } - if err := filterTableRules(rules); err != nil { + if err := filterTableRules(ipv6, rules); err != nil { return err } @@ -380,7 +380,7 @@ func (FilterOutputInvertUIDAndGIDOwner) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterOutputInvertUIDAndGIDOwner) LocalAction(ip net.IP) error { +func (FilterOutputInvertUIDAndGIDOwner) LocalAction(ip net.IP, ipv6 bool) error { if err := connectTCP(ip, acceptPort, sendloopDuration); err == nil { return fmt.Errorf("connection destined to port %d should not be accepted, but got accepted", acceptPort) } @@ -398,12 +398,12 @@ func (FilterOutputDestination) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputDestination) ContainerAction(ip net.IP) error { +func (FilterOutputDestination) ContainerAction(ip net.IP, ipv6 bool) error { rules := [][]string{ {"-A", "OUTPUT", "-d", ip.String(), "-j", "ACCEPT"}, {"-P", "OUTPUT", "DROP"}, } - if err := filterTableRules(rules); err != nil { + if err := filterTableRules(ipv6, rules); err != nil { return err } @@ -411,7 +411,7 @@ func (FilterOutputDestination) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterOutputDestination) LocalAction(ip net.IP) error { +func (FilterOutputDestination) LocalAction(ip net.IP, ipv6 bool) error { return listenUDP(acceptPort, sendloopDuration) } @@ -425,12 +425,12 @@ func (FilterOutputInvertDestination) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputInvertDestination) ContainerAction(ip net.IP) error { +func (FilterOutputInvertDestination) ContainerAction(ip net.IP, ipv6 bool) error { rules := [][]string{ - {"-A", "OUTPUT", "!", "-d", localIP, "-j", "ACCEPT"}, + {"-A", "OUTPUT", "!", "-d", localIP(ipv6), "-j", "ACCEPT"}, {"-P", "OUTPUT", "DROP"}, } - if err := filterTableRules(rules); err != nil { + if err := filterTableRules(ipv6, rules); err != nil { return err } @@ -438,7 +438,7 @@ func (FilterOutputInvertDestination) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterOutputInvertDestination) LocalAction(ip net.IP) error { +func (FilterOutputInvertDestination) LocalAction(ip net.IP, ipv6 bool) error { return listenUDP(acceptPort, sendloopDuration) } @@ -452,12 +452,12 @@ func (FilterOutputInterfaceAccept) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputInterfaceAccept) ContainerAction(ip net.IP) error { +func (FilterOutputInterfaceAccept) ContainerAction(ip net.IP, ipv6 bool) error { ifname, ok := getInterfaceName() if !ok { return fmt.Errorf("no interface is present, except loopback") } - if err := filterTable("-A", "OUTPUT", "-p", "udp", "-o", ifname, "-j", "ACCEPT"); err != nil { + if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "udp", "-o", ifname, "-j", "ACCEPT"); err != nil { return err } @@ -465,7 +465,7 @@ func (FilterOutputInterfaceAccept) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterOutputInterfaceAccept) LocalAction(ip net.IP) error { +func (FilterOutputInterfaceAccept) LocalAction(ip net.IP, ipv6 bool) error { return listenUDP(acceptPort, sendloopDuration) } @@ -479,12 +479,12 @@ func (FilterOutputInterfaceDrop) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputInterfaceDrop) ContainerAction(ip net.IP) error { +func (FilterOutputInterfaceDrop) ContainerAction(ip net.IP, ipv6 bool) error { ifname, ok := getInterfaceName() if !ok { return fmt.Errorf("no interface is present, except loopback") } - if err := filterTable("-A", "OUTPUT", "-p", "udp", "-o", ifname, "-j", "DROP"); err != nil { + if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "udp", "-o", ifname, "-j", "DROP"); err != nil { return err } @@ -492,7 +492,7 @@ func (FilterOutputInterfaceDrop) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterOutputInterfaceDrop) LocalAction(ip net.IP) error { +func (FilterOutputInterfaceDrop) LocalAction(ip net.IP, ipv6 bool) error { if err := listenUDP(acceptPort, sendloopDuration); err == nil { return fmt.Errorf("packets should not be received on port %v, but are received", acceptPort) } @@ -510,8 +510,8 @@ func (FilterOutputInterface) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputInterface) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "OUTPUT", "-p", "udp", "-o", "lo", "-j", "DROP"); err != nil { +func (FilterOutputInterface) ContainerAction(ip net.IP, ipv6 bool) error { + if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "udp", "-o", "lo", "-j", "DROP"); err != nil { return err } @@ -519,7 +519,7 @@ func (FilterOutputInterface) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterOutputInterface) LocalAction(ip net.IP) error { +func (FilterOutputInterface) LocalAction(ip net.IP, ipv6 bool) error { return listenUDP(acceptPort, sendloopDuration) } @@ -533,8 +533,8 @@ func (FilterOutputInterfaceBeginsWith) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputInterfaceBeginsWith) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "OUTPUT", "-p", "udp", "-o", "e+", "-j", "DROP"); err != nil { +func (FilterOutputInterfaceBeginsWith) ContainerAction(ip net.IP, ipv6 bool) error { + if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "udp", "-o", "e+", "-j", "DROP"); err != nil { return err } @@ -542,7 +542,7 @@ func (FilterOutputInterfaceBeginsWith) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterOutputInterfaceBeginsWith) LocalAction(ip net.IP) error { +func (FilterOutputInterfaceBeginsWith) LocalAction(ip net.IP, ipv6 bool) error { if err := listenUDP(acceptPort, sendloopDuration); err == nil { return fmt.Errorf("packets should not be received on port %v, but are received", acceptPort) } @@ -560,8 +560,8 @@ func (FilterOutputInterfaceInvertDrop) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputInterfaceInvertDrop) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "OUTPUT", "-p", "tcp", "!", "-o", "lo", "-j", "DROP"); err != nil { +func (FilterOutputInterfaceInvertDrop) ContainerAction(ip net.IP, ipv6 bool) error { + if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "!", "-o", "lo", "-j", "DROP"); err != nil { return err } @@ -574,7 +574,7 @@ func (FilterOutputInterfaceInvertDrop) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterOutputInterfaceInvertDrop) LocalAction(ip net.IP) error { +func (FilterOutputInterfaceInvertDrop) LocalAction(ip net.IP, ipv6 bool) error { if err := connectTCP(ip, acceptPort, sendloopDuration); err == nil { return fmt.Errorf("connection destined to port %d should not be accepted, but got accepted", acceptPort) } @@ -592,8 +592,8 @@ func (FilterOutputInterfaceInvertAccept) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputInterfaceInvertAccept) ContainerAction(ip net.IP) error { - if err := filterTable("-A", "OUTPUT", "-p", "tcp", "!", "-o", "lo", "-j", "ACCEPT"); err != nil { +func (FilterOutputInterfaceInvertAccept) ContainerAction(ip net.IP, ipv6 bool) error { + if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "!", "-o", "lo", "-j", "ACCEPT"); err != nil { return err } @@ -602,6 +602,6 @@ func (FilterOutputInterfaceInvertAccept) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (FilterOutputInterfaceInvertAccept) LocalAction(ip net.IP) error { +func (FilterOutputInterfaceInvertAccept) LocalAction(ip net.IP, ipv6 bool) error { return connectTCP(ip, acceptPort, sendloopDuration) } diff --git a/test/iptables/iptables.go b/test/iptables/iptables.go index 16cb4f4da..dfbd80cd1 100644 --- a/test/iptables/iptables.go +++ b/test/iptables/iptables.go @@ -40,10 +40,10 @@ type TestCase interface { // ContainerAction runs inside the container. It receives the IP of the // local process. - ContainerAction(ip net.IP) error + ContainerAction(ip net.IP, ipv6 bool) error // LocalAction runs locally. It receives the IP of the container. - LocalAction(ip net.IP) error + LocalAction(ip net.IP, ipv6 bool) error } // Tests maps test names to TestCase. diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index f303030aa..550b6198a 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -35,6 +35,18 @@ import ( // Container output is logged to $TEST_UNDECLARED_OUTPUTS_DIR if it exists, or // to stderr. func singleTest(t *testing.T, test TestCase) { + for _, tc := range []bool{false, true} { + subtest := "IPv4" + if tc { + subtest = "IPv6" + } + t.Run(subtest, func(t *testing.T) { + iptablesTest(t, test, tc) + }) + } +} + +func iptablesTest(t *testing.T, test TestCase, ipv6 bool) { if _, ok := Tests[test.Name()]; !ok { t.Fatalf("no test found with name %q. Has it been registered?", test.Name()) } @@ -43,18 +55,27 @@ func singleTest(t *testing.T, test TestCase) { d := dockerutil.MakeContainer(ctx, t) defer d.CleanUp(ctx) + // TODO(gvisor.dev/issue/170): Skipping IPv6 gVisor tests. + if ipv6 && dockerutil.Runtime() != "runc" { + t.Skip("gVisor ip6tables not yet implemented") + } + // Create and start the container. opts := dockerutil.RunOpts{ Image: "iptables", CapAdd: []string{"NET_ADMIN"}, } d.CopyFiles(&opts, "/runner", "test/iptables/runner/runner") - if err := d.Spawn(ctx, opts, "/runner/runner", "-name", test.Name()); err != nil { + args := []string{"/runner/runner", "-name", test.Name()} + if ipv6 { + args = append(args, "-ipv6") + } + if err := d.Spawn(ctx, opts, args...); err != nil { t.Fatalf("docker run failed: %v", err) } // Get the container IP. - ip, err := d.FindIP(ctx) + ip, err := d.FindIP(ctx, ipv6) if err != nil { t.Fatalf("failed to get container IP: %v", err) } @@ -65,7 +86,7 @@ func singleTest(t *testing.T, test TestCase) { } // Run our side of the test. - if err := test.LocalAction(ip); err != nil { + if err := test.LocalAction(ip, ipv6); err != nil { t.Fatalf("LocalAction failed: %v", err) } @@ -86,7 +107,7 @@ func sendIP(ip net.IP) error { // The container may not be listening when we first connect, so retry // upon error. cb := func() error { - c, err := net.DialTCP("tcp4", nil, &contAddr) + c, err := net.DialTCP("tcp", nil, &contAddr) conn = c return err } diff --git a/test/iptables/iptables_util.go b/test/iptables/iptables_util.go index 174694002..ca80a4b5f 100644 --- a/test/iptables/iptables_util.go +++ b/test/iptables/iptables_util.go @@ -24,22 +24,23 @@ import ( "gvisor.dev/gvisor/pkg/test/testutil" ) -const iptablesBinary = "iptables" -const localIP = "127.0.0.1" - -// filterTable calls `iptables -t filter` with the given args. -func filterTable(args ...string) error { - return tableCmd("filter", args) +// filterTable calls `ip{6}tables -t filter` with the given args. +func filterTable(ipv6 bool, args ...string) error { + return tableCmd(ipv6, "filter", args) } -// natTable calls `iptables -t nat` with the given args. -func natTable(args ...string) error { - return tableCmd("nat", args) +// natTable calls `ip{6}tables -t nat` with the given args. +func natTable(ipv6 bool, args ...string) error { + return tableCmd(ipv6, "nat", args) } -func tableCmd(table string, args []string) error { +func tableCmd(ipv6 bool, table string, args []string) error { args = append([]string{"-t", table}, args...) - cmd := exec.Command(iptablesBinary, args...) + binary := "iptables" + if ipv6 { + binary = "ip6tables" + } + cmd := exec.Command(binary, args...) if out, err := cmd.CombinedOutput(); err != nil { return fmt.Errorf("error running iptables with args %v\nerror: %v\noutput: %s", args, err, string(out)) } @@ -47,18 +48,18 @@ func tableCmd(table string, args []string) error { } // filterTableRules is like filterTable, but runs multiple iptables commands. -func filterTableRules(argsList [][]string) error { - return tableRules("filter", argsList) +func filterTableRules(ipv6 bool, argsList [][]string) error { + return tableRules(ipv6, "filter", argsList) } // natTableRules is like natTable, but runs multiple iptables commands. -func natTableRules(argsList [][]string) error { - return tableRules("nat", argsList) +func natTableRules(ipv6 bool, argsList [][]string) error { + return tableRules(ipv6, "nat", argsList) } -func tableRules(table string, argsList [][]string) error { +func tableRules(ipv6 bool, table string, argsList [][]string) error { for _, args := range argsList { - if err := tableCmd(table, args); err != nil { + if err := tableCmd(ipv6, table, args); err != nil { return err } } @@ -71,7 +72,7 @@ func listenUDP(port int, timeout time.Duration) error { localAddr := net.UDPAddr{ Port: port, } - conn, err := net.ListenUDP(network, &localAddr) + conn, err := net.ListenUDP("udp", &localAddr) if err != nil { return err } @@ -112,7 +113,7 @@ func connectUDP(ip net.IP, port int) (net.Conn, error) { IP: ip, Port: port, } - conn, err := net.DialUDP(network, nil, &remote) + conn, err := net.DialUDP("udp", nil, &remote) if err != nil { return nil, err } @@ -143,7 +144,7 @@ func listenTCP(port int, timeout time.Duration) error { } // Starts listening on port. - lConn, err := net.ListenTCP("tcp4", &localAddr) + lConn, err := net.ListenTCP("tcp", &localAddr) if err != nil { return err } @@ -191,7 +192,14 @@ func localAddrs(ipv6 bool) ([]string, error) { } addrStrs := make([]string, 0, len(addrs)) for _, addr := range addrs { - addrStrs = append(addrStrs, addr.String()) + // Add only IPv4 or only IPv6 addresses. + parts := strings.Split(addr.String(), "/") + if len(parts) != 2 { + return nil, fmt.Errorf("bad interface address: %q", addr.String()) + } + if isIPv6 := net.ParseIP(parts[0]).To4() == nil; isIPv6 == ipv6 { + addrStrs = append(addrStrs, addr.String()) + } } return filterAddrs(addrStrs, ipv6), nil } @@ -222,3 +230,17 @@ func getInterfaceName() (string, bool) { return ifname, ifname != "" } + +func localIP(ipv6 bool) string { + if ipv6 { + return "::1" + } + return "127.0.0.1" +} + +func nowhereIP(ipv6 bool) string { + if ipv6 { + return "2001:db8::1" + } + return "192.0.2.1" +} diff --git a/test/iptables/nat.go b/test/iptables/nat.go index 23288577d..ac0d91bb2 100644 --- a/test/iptables/nat.go +++ b/test/iptables/nat.go @@ -53,8 +53,8 @@ func (NATPreRedirectUDPPort) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATPreRedirectUDPPort) ContainerAction(ip net.IP) error { - if err := natTable("-A", "PREROUTING", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil { +func (NATPreRedirectUDPPort) ContainerAction(ip net.IP, ipv6 bool) error { + if err := natTable(ipv6, "-A", "PREROUTING", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil { return err } @@ -66,7 +66,7 @@ func (NATPreRedirectUDPPort) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (NATPreRedirectUDPPort) LocalAction(ip net.IP) error { +func (NATPreRedirectUDPPort) LocalAction(ip net.IP, ipv6 bool) error { return spawnUDPLoop(ip, acceptPort, sendloopDuration) } @@ -79,8 +79,8 @@ func (NATPreRedirectTCPPort) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATPreRedirectTCPPort) ContainerAction(ip net.IP) error { - if err := natTable("-A", "PREROUTING", "-p", "tcp", "-m", "tcp", "--dport", fmt.Sprintf("%d", dropPort), "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)); err != nil { +func (NATPreRedirectTCPPort) ContainerAction(ip net.IP, ipv6 bool) error { + if err := natTable(ipv6, "-A", "PREROUTING", "-p", "tcp", "-m", "tcp", "--dport", fmt.Sprintf("%d", dropPort), "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)); err != nil { return err } @@ -89,7 +89,7 @@ func (NATPreRedirectTCPPort) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (NATPreRedirectTCPPort) LocalAction(ip net.IP) error { +func (NATPreRedirectTCPPort) LocalAction(ip net.IP, ipv6 bool) error { return connectTCP(ip, dropPort, sendloopDuration) } @@ -103,9 +103,9 @@ func (NATPreRedirectTCPOutgoing) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATPreRedirectTCPOutgoing) ContainerAction(ip net.IP) error { +func (NATPreRedirectTCPOutgoing) ContainerAction(ip net.IP, ipv6 bool) error { // Redirect all incoming TCP traffic to a closed port. - if err := natTable("-A", "PREROUTING", "-p", "tcp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", dropPort)); err != nil { + if err := natTable(ipv6, "-A", "PREROUTING", "-p", "tcp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", dropPort)); err != nil { return err } @@ -114,7 +114,7 @@ func (NATPreRedirectTCPOutgoing) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (NATPreRedirectTCPOutgoing) LocalAction(ip net.IP) error { +func (NATPreRedirectTCPOutgoing) LocalAction(ip net.IP, ipv6 bool) error { return listenTCP(acceptPort, sendloopDuration) } @@ -128,9 +128,9 @@ func (NATOutRedirectTCPIncoming) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATOutRedirectTCPIncoming) ContainerAction(ip net.IP) error { +func (NATOutRedirectTCPIncoming) ContainerAction(ip net.IP, ipv6 bool) error { // Redirect all outgoing TCP traffic to a closed port. - if err := natTable("-A", "OUTPUT", "-p", "tcp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", dropPort)); err != nil { + if err := natTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", dropPort)); err != nil { return err } @@ -139,7 +139,7 @@ func (NATOutRedirectTCPIncoming) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (NATOutRedirectTCPIncoming) LocalAction(ip net.IP) error { +func (NATOutRedirectTCPIncoming) LocalAction(ip net.IP, ipv6 bool) error { return connectTCP(ip, acceptPort, sendloopDuration) } @@ -152,13 +152,12 @@ func (NATOutRedirectUDPPort) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATOutRedirectUDPPort) ContainerAction(ip net.IP) error { - dest := []byte{200, 0, 0, 1} - return loopbackTest(dest, "-A", "OUTPUT", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)) +func (NATOutRedirectUDPPort) ContainerAction(ip net.IP, ipv6 bool) error { + return loopbackTest(ipv6, net.ParseIP(nowhereIP(ipv6)), "-A", "OUTPUT", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)) } // LocalAction implements TestCase.LocalAction. -func (NATOutRedirectUDPPort) LocalAction(ip net.IP) error { +func (NATOutRedirectUDPPort) LocalAction(ip net.IP, ipv6 bool) error { // No-op. return nil } @@ -173,8 +172,8 @@ func (NATDropUDP) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATDropUDP) ContainerAction(ip net.IP) error { - if err := natTable("-A", "PREROUTING", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil { +func (NATDropUDP) ContainerAction(ip net.IP, ipv6 bool) error { + if err := natTable(ipv6, "-A", "PREROUTING", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil { return err } @@ -186,7 +185,7 @@ func (NATDropUDP) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (NATDropUDP) LocalAction(ip net.IP) error { +func (NATDropUDP) LocalAction(ip net.IP, ipv6 bool) error { return spawnUDPLoop(ip, acceptPort, sendloopDuration) } @@ -199,8 +198,8 @@ func (NATAcceptAll) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATAcceptAll) ContainerAction(ip net.IP) error { - if err := natTable("-A", "PREROUTING", "-p", "udp", "-j", "ACCEPT"); err != nil { +func (NATAcceptAll) ContainerAction(ip net.IP, ipv6 bool) error { + if err := natTable(ipv6, "-A", "PREROUTING", "-p", "udp", "-j", "ACCEPT"); err != nil { return err } @@ -212,7 +211,7 @@ func (NATAcceptAll) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (NATAcceptAll) LocalAction(ip net.IP) error { +func (NATAcceptAll) LocalAction(ip net.IP, ipv6 bool) error { return spawnUDPLoop(ip, acceptPort, sendloopDuration) } @@ -226,14 +225,17 @@ func (NATOutRedirectIP) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATOutRedirectIP) ContainerAction(ip net.IP) error { +func (NATOutRedirectIP) ContainerAction(ip net.IP, ipv6 bool) error { // Redirect OUTPUT packets to a listening localhost port. - dest := net.IP([]byte{200, 0, 0, 2}) - return loopbackTest(dest, "-A", "OUTPUT", "-d", dest.String(), "-p", "udp", "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", acceptPort)) + return loopbackTest(ipv6, net.ParseIP(nowhereIP(ipv6)), + "-A", "OUTPUT", + "-d", nowhereIP(ipv6), + "-p", "udp", + "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", acceptPort)) } // LocalAction implements TestCase.LocalAction. -func (NATOutRedirectIP) LocalAction(ip net.IP) error { +func (NATOutRedirectIP) LocalAction(ip net.IP, ipv6 bool) error { // No-op. return nil } @@ -248,15 +250,15 @@ func (NATOutDontRedirectIP) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATOutDontRedirectIP) ContainerAction(ip net.IP) error { - if err := natTable("-A", "OUTPUT", "-d", localIP, "-p", "udp", "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", dropPort)); err != nil { +func (NATOutDontRedirectIP) ContainerAction(ip net.IP, ipv6 bool) error { + if err := natTable(ipv6, "-A", "OUTPUT", "-d", localIP(ipv6), "-p", "udp", "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", dropPort)); err != nil { return err } return sendUDPLoop(ip, acceptPort, sendloopDuration) } // LocalAction implements TestCase.LocalAction. -func (NATOutDontRedirectIP) LocalAction(ip net.IP) error { +func (NATOutDontRedirectIP) LocalAction(ip net.IP, ipv6 bool) error { return listenUDP(acceptPort, sendloopDuration) } @@ -269,15 +271,21 @@ func (NATOutRedirectInvert) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATOutRedirectInvert) ContainerAction(ip net.IP) error { +func (NATOutRedirectInvert) ContainerAction(ip net.IP, ipv6 bool) error { // Redirect OUTPUT packets to a listening localhost port. - dest := []byte{200, 0, 0, 3} - destStr := "200.0.0.2" - return loopbackTest(dest, "-A", "OUTPUT", "!", "-d", destStr, "-p", "udp", "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", acceptPort)) + dest := "192.0.2.2" + if ipv6 { + dest = "2001:db8::2" + } + return loopbackTest(ipv6, net.ParseIP(nowhereIP(ipv6)), + "-A", "OUTPUT", + "!", "-d", dest, + "-p", "udp", + "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", acceptPort)) } // LocalAction implements TestCase.LocalAction. -func (NATOutRedirectInvert) LocalAction(ip net.IP) error { +func (NATOutRedirectInvert) LocalAction(ip net.IP, ipv6 bool) error { // No-op. return nil } @@ -292,8 +300,8 @@ func (NATPreRedirectIP) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATPreRedirectIP) ContainerAction(ip net.IP) error { - addrs, err := localAddrs(false) +func (NATPreRedirectIP) ContainerAction(ip net.IP, ipv6 bool) error { + addrs, err := localAddrs(ipv6) if err != nil { return err } @@ -302,14 +310,14 @@ func (NATPreRedirectIP) ContainerAction(ip net.IP) error { for _, addr := range addrs { rules = append(rules, []string{"-A", "PREROUTING", "-p", "udp", "-d", addr, "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)}) } - if err := natTableRules(rules); err != nil { + if err := natTableRules(ipv6, rules); err != nil { return err } return listenUDP(acceptPort, sendloopDuration) } // LocalAction implements TestCase.LocalAction. -func (NATPreRedirectIP) LocalAction(ip net.IP) error { +func (NATPreRedirectIP) LocalAction(ip net.IP, ipv6 bool) error { return spawnUDPLoop(ip, dropPort, sendloopDuration) } @@ -323,15 +331,15 @@ func (NATPreDontRedirectIP) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATPreDontRedirectIP) ContainerAction(ip net.IP) error { - if err := natTable("-A", "PREROUTING", "-p", "udp", "-d", localIP, "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", dropPort)); err != nil { +func (NATPreDontRedirectIP) ContainerAction(ip net.IP, ipv6 bool) error { + if err := natTable(ipv6, "-A", "PREROUTING", "-p", "udp", "-d", localIP(ipv6), "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", dropPort)); err != nil { return err } return listenUDP(acceptPort, sendloopDuration) } // LocalAction implements TestCase.LocalAction. -func (NATPreDontRedirectIP) LocalAction(ip net.IP) error { +func (NATPreDontRedirectIP) LocalAction(ip net.IP, ipv6 bool) error { return spawnUDPLoop(ip, acceptPort, sendloopDuration) } @@ -344,15 +352,15 @@ func (NATPreRedirectInvert) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATPreRedirectInvert) ContainerAction(ip net.IP) error { - if err := natTable("-A", "PREROUTING", "-p", "udp", "!", "-d", localIP, "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)); err != nil { +func (NATPreRedirectInvert) ContainerAction(ip net.IP, ipv6 bool) error { + if err := natTable(ipv6, "-A", "PREROUTING", "-p", "udp", "!", "-d", localIP(ipv6), "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)); err != nil { return err } return listenUDP(acceptPort, sendloopDuration) } // LocalAction implements TestCase.LocalAction. -func (NATPreRedirectInvert) LocalAction(ip net.IP) error { +func (NATPreRedirectInvert) LocalAction(ip net.IP, ipv6 bool) error { return spawnUDPLoop(ip, dropPort, sendloopDuration) } @@ -366,15 +374,15 @@ func (NATRedirectRequiresProtocol) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATRedirectRequiresProtocol) ContainerAction(ip net.IP) error { - if err := natTable("-A", "PREROUTING", "-d", localIP, "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)); err == nil { +func (NATRedirectRequiresProtocol) ContainerAction(ip net.IP, ipv6 bool) error { + if err := natTable(ipv6, "-A", "PREROUTING", "-d", localIP(ipv6), "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)); err == nil { return errors.New("expected an error using REDIRECT --to-ports without a protocol") } return nil } // LocalAction implements TestCase.LocalAction. -func (NATRedirectRequiresProtocol) LocalAction(ip net.IP) error { +func (NATRedirectRequiresProtocol) LocalAction(ip net.IP, ipv6 bool) error { // No-op. return nil } @@ -388,15 +396,14 @@ func (NATOutRedirectTCPPort) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATOutRedirectTCPPort) ContainerAction(ip net.IP) error { - if err := natTable("-A", "OUTPUT", "-p", "tcp", "-m", "tcp", "--dport", fmt.Sprintf("%d", dropPort), "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)); err != nil { +func (NATOutRedirectTCPPort) ContainerAction(ip net.IP, ipv6 bool) error { + if err := natTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "-m", "tcp", "--dport", fmt.Sprintf("%d", dropPort), "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)); err != nil { return err } timeout := 20 * time.Second - dest := []byte{127, 0, 0, 1} localAddr := net.TCPAddr{ - IP: dest, + IP: net.ParseIP(localIP(ipv6)), Port: acceptPort, } @@ -424,7 +431,7 @@ func (NATOutRedirectTCPPort) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (NATOutRedirectTCPPort) LocalAction(ip net.IP) error { +func (NATOutRedirectTCPPort) LocalAction(ip net.IP, ipv6 bool) error { return nil } @@ -438,10 +445,10 @@ func (NATLoopbackSkipsPrerouting) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATLoopbackSkipsPrerouting) ContainerAction(ip net.IP) error { +func (NATLoopbackSkipsPrerouting) ContainerAction(ip net.IP, ipv6 bool) error { // Redirect anything sent to localhost to an unused port. dest := []byte{127, 0, 0, 1} - if err := natTable("-A", "PREROUTING", "-p", "tcp", "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", dropPort)); err != nil { + if err := natTable(ipv6, "-A", "PREROUTING", "-p", "tcp", "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", dropPort)); err != nil { return err } @@ -459,15 +466,15 @@ func (NATLoopbackSkipsPrerouting) ContainerAction(ip net.IP) error { } // LocalAction implements TestCase.LocalAction. -func (NATLoopbackSkipsPrerouting) LocalAction(ip net.IP) error { +func (NATLoopbackSkipsPrerouting) LocalAction(ip net.IP, ipv6 bool) error { // No-op. return nil } // loopbackTests runs an iptables rule and ensures that packets sent to // dest:dropPort are received by localhost:acceptPort. -func loopbackTest(dest net.IP, args ...string) error { - if err := natTable(args...); err != nil { +func loopbackTest(ipv6 bool, dest net.IP, args ...string) error { + if err := natTable(ipv6, args...); err != nil { return err } sendCh := make(chan error) diff --git a/test/iptables/runner/main.go b/test/iptables/runner/main.go index 6f77c0684..69d3ef121 100644 --- a/test/iptables/runner/main.go +++ b/test/iptables/runner/main.go @@ -24,7 +24,10 @@ import ( "gvisor.dev/gvisor/test/iptables" ) -var name = flag.String("name", "", "name of the test to run") +var ( + name = flag.String("name", "", "name of the test to run") + ipv6 = flag.Bool("ipv6", false, "whether the test utilizes ip6tables") +) func main() { flag.Parse() @@ -43,7 +46,7 @@ func main() { } // Run the test. - if err := test.ContainerAction(ip); err != nil { + if err := test.ContainerAction(ip, *ipv6); err != nil { log.Fatalf("Failed running test %q: %v", *name, err) } @@ -57,7 +60,7 @@ func getIP() (net.IP, error) { localAddr := net.TCPAddr{ Port: iptables.IPExchangePort, } - listener, err := net.ListenTCP("tcp4", &localAddr) + listener, err := net.ListenTCP("tcp", &localAddr) if err != nil { return net.IP{}, fmt.Errorf("failed listening for IP: %v", err) } -- cgit v1.2.3 From 15d13e3398d519c48e9278889d7047a937f95d9c Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Tue, 28 Jul 2020 12:45:49 -0700 Subject: [Runtime Tests] Exclude flaky/failing tests PiperOrigin-RevId: 323632510 --- test/runtimes/BUILD | 4 ++-- test/runtimes/exclude_nodejs12.4.0.csv | 1 + test/runtimes/exclude_php7.3.6.csv | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/runtimes/BUILD b/test/runtimes/BUILD index f98d02e00..1728161ce 100644 --- a/test/runtimes/BUILD +++ b/test/runtimes/BUILD @@ -13,14 +13,14 @@ runtime_test( name = "java11", exclude_file = "exclude_java11.csv", lang = "java", - shard_count = 10, + shard_count = 20, ) runtime_test( name = "nodejs12.4.0", exclude_file = "exclude_nodejs12.4.0.csv", lang = "nodejs", - shard_count = 5, + shard_count = 10, ) runtime_test( diff --git a/test/runtimes/exclude_nodejs12.4.0.csv b/test/runtimes/exclude_nodejs12.4.0.csv index e7edfa0a5..4eb0a4807 100644 --- a/test/runtimes/exclude_nodejs12.4.0.csv +++ b/test/runtimes/exclude_nodejs12.4.0.csv @@ -47,4 +47,5 @@ pseudo-tty/test-tty-window-size.js,, pseudo-tty/test-tty-wrap.js,, pummel/test-net-pingpong.js,, pummel/test-vm-memleak.js,, +sequential/test-net-bytes-per-incoming-chunk-overhead.js,,flaky - timeout tick-processor/test-tick-processor-builtin.js,, diff --git a/test/runtimes/exclude_php7.3.6.csv b/test/runtimes/exclude_php7.3.6.csv index f3606bfe8..0bef786c0 100644 --- a/test/runtimes/exclude_php7.3.6.csv +++ b/test/runtimes/exclude_php7.3.6.csv @@ -11,6 +11,7 @@ ext/mbstring/tests/mb_strrpos_encoding_3rd_param.phpt,, ext/session/tests/session_set_save_handler_class_018.phpt,, ext/session/tests/session_set_save_handler_iface_003.phpt,, ext/session/tests/session_set_save_handler_variation4.phpt,, +ext/session/tests/session_set_save_handler_variation5.phpt,, ext/standard/tests/file/filetype_variation.phpt,, ext/standard/tests/file/fopen_variation19.phpt,, ext/standard/tests/file/php_fd_wrapper_01.phpt,, @@ -26,6 +27,7 @@ ext/standard/tests/general_functions/escapeshellcmd_bug71270.phpt,, ext/standard/tests/network/bug20134.phpt,, ext/standard/tests/streams/stream_socket_sendto.phpt,, ext/standard/tests/strings/007.phpt,, +sapi/cli/tests/upload_2G.phpt,, tests/output/stream_isatty_err.phpt,b/68720279, tests/output/stream_isatty_in-err.phpt,b/68720282, tests/output/stream_isatty_in-out-err.phpt,, -- cgit v1.2.3 From 5873b0f43f5ff4d811f951baaacb7bbe8b1a486c Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Tue, 28 Jul 2020 12:51:54 -0700 Subject: Port tensorflow benchmark. PiperOrigin-RevId: 323633737 --- images/benchmarks/tensorflow/Dockerfile | 7 ++++ test/benchmarks/media/ffmpeg_test.go | 1 + test/benchmarks/ml/BUILD | 21 ++++++++++ test/benchmarks/ml/ml.go | 31 +++++++++++++++ test/benchmarks/ml/tensorflow_test.go | 69 +++++++++++++++++++++++++++++++++ 5 files changed, 129 insertions(+) create mode 100644 images/benchmarks/tensorflow/Dockerfile create mode 100644 test/benchmarks/ml/BUILD create mode 100644 test/benchmarks/ml/ml.go create mode 100644 test/benchmarks/ml/tensorflow_test.go (limited to 'test') diff --git a/images/benchmarks/tensorflow/Dockerfile b/images/benchmarks/tensorflow/Dockerfile new file mode 100644 index 000000000..7564a4ee5 --- /dev/null +++ b/images/benchmarks/tensorflow/Dockerfile @@ -0,0 +1,7 @@ +FROM tensorflow/tensorflow:1.13.2 + +RUN apt-get update \ + && apt-get install -y git +RUN git clone --depth 1 https://github.com/aymericdamien/TensorFlow-Examples.git +RUN python -m pip install -U pip setuptools +RUN python -m pip install matplotlib diff --git a/test/benchmarks/media/ffmpeg_test.go b/test/benchmarks/media/ffmpeg_test.go index bfcfbab80..7822dfad7 100644 --- a/test/benchmarks/media/ffmpeg_test.go +++ b/test/benchmarks/media/ffmpeg_test.go @@ -33,6 +33,7 @@ func BenchmarkFfmpeg(b *testing.B) { ctx := context.Background() container := machine.GetContainer(ctx, b) + defer container.CleanUp(ctx) cmd := strings.Split("ffmpeg -i video.mp4 -c:v libx264 -preset veryslow output.mp4", " ") b.ResetTimer() diff --git a/test/benchmarks/ml/BUILD b/test/benchmarks/ml/BUILD new file mode 100644 index 000000000..2430b60a7 --- /dev/null +++ b/test/benchmarks/ml/BUILD @@ -0,0 +1,21 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "ml", + testonly = 1, + srcs = ["ml.go"], + deps = ["//test/benchmarks/harness"], +) + +go_test( + name = "ml_test", + size = "large", + srcs = ["tensorflow_test.go"], + library = ":ml", + deps = [ + "//pkg/test/dockerutil", + "//test/benchmarks/harness", + ], +) diff --git a/test/benchmarks/ml/ml.go b/test/benchmarks/ml/ml.go new file mode 100644 index 000000000..13282d7bb --- /dev/null +++ b/test/benchmarks/ml/ml.go @@ -0,0 +1,31 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package ml holds benchmarks around machine learning performance. +package ml + +import ( + "os" + "testing" + + "gvisor.dev/gvisor/test/benchmarks/harness" +) + +var h harness.Harness + +// TestMain is the main method for package ml. +func TestMain(m *testing.M) { + h.Init() + os.Exit(m.Run()) +} diff --git a/test/benchmarks/ml/tensorflow_test.go b/test/benchmarks/ml/tensorflow_test.go new file mode 100644 index 000000000..f7746897d --- /dev/null +++ b/test/benchmarks/ml/tensorflow_test.go @@ -0,0 +1,69 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package ml + +import ( + "context" + "testing" + + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/test/benchmarks/harness" +) + +// BenchmarkTensorflow runs workloads from a TensorFlow tutorial. +// See: https://github.com/aymericdamien/TensorFlow-Examples +func BenchmarkTensorflow(b *testing.B) { + workloads := map[string]string{ + "GradientDecisionTree": "2_BasicModels/gradient_boosted_decision_tree.py", + "Kmeans": "2_BasicModels/kmeans.py", + "LogisticRegression": "2_BasicModels/logistic_regression.py", + "NearestNeighbor": "2_BasicModels/nearest_neighbor.py", + "RandomForest": "2_BasicModels/random_forest.py", + "ConvolutionalNetwork": "3_NeuralNetworks/convolutional_network.py", + "MultilayerPerceptron": "3_NeuralNetworks/multilayer_perceptron.py", + "NeuralNetwork": "3_NeuralNetworks/neural_network.py", + } + + machine, err := h.GetMachine() + if err != nil { + b.Fatalf("failed to get machine: %v", err) + } + defer machine.CleanUp() + + for name, workload := range workloads { + b.Run(name, func(b *testing.B) { + ctx := context.Background() + container := machine.GetContainer(ctx, b) + defer container.CleanUp(ctx) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + b.StopTimer() + if err := harness.DropCaches(machine); err != nil { + b.Skipf("failed to drop caches: %v. You probably need root.", err) + } + b.StartTimer() + + if out, err := container.Run(ctx, dockerutil.RunOpts{ + Image: "benchmarks/tensorflow", + Env: []string{"PYTHONPATH=$PYTHONPATH:/TensorFlow-Examples/examples"}, + WorkDir: "/TensorFlow-Examples/examples", + }, "python", workload); err != nil { + b.Fatalf("failed to run container: %v logs: %s", err, out) + } + } + }) + } + +} -- cgit v1.2.3 From f82dd8ddb477b8923d1db12654a62d55d613019c Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Tue, 28 Jul 2020 21:22:52 -0700 Subject: Redirect TODO to GitHub issues PiperOrigin-RevId: 323715260 --- pkg/sentry/socket/netstack/netstack.go | 4 ++-- pkg/tcpip/stack/nic.go | 2 +- pkg/tcpip/stack/stack_test.go | 6 +++--- pkg/tcpip/transport/packet/endpoint.go | 4 ++-- test/syscalls/linux/packet_socket.cc | 4 ++-- test/syscalls/linux/packet_socket_raw.cc | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) (limited to 'test') diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 44b3fff46..f86e6cd7a 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -423,7 +423,7 @@ func AddressAndFamily(addr []byte) (tcpip.FullAddress, uint16, *syserr.Error) { return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument } - // TODO(b/129292371): Return protocol too. + // TODO(gvisor.dev/issue/173): Return protocol too. return tcpip.FullAddress{ NIC: tcpip.NICID(a.InterfaceIndex), Addr: tcpip.Address(a.HardwareAddr[:header.EthernetAddressSize]), @@ -2418,7 +2418,7 @@ func ConvertAddress(family int, addr tcpip.FullAddress) (linux.SockAddr, uint32) return &out, uint32(sockAddrInet6Size) case linux.AF_PACKET: - // TODO(b/129292371): Return protocol too. + // TODO(gvisor.dev/issue/173): Return protocol too. var out linux.SockAddrLink out.Family = linux.AF_PACKET out.InterfaceIndex = int32(addr.NIC) diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index fea0ce7e8..9256d4d43 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -181,7 +181,7 @@ func (n *NIC) disableLocked() *tcpip.Error { return nil } - // TODO(b/147015577): Should Routes that are currently bound to n be + // TODO(gvisor.dev/issue/1491): Should Routes that are currently bound to n be // invalidated? Currently, Routes will continue to work when a NIC is enabled // again, and applications may not know that the underlying NIC was ever // disabled. diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go index 7657a4101..101ca2206 100644 --- a/pkg/tcpip/stack/stack_test.go +++ b/pkg/tcpip/stack/stack_test.go @@ -867,9 +867,9 @@ func TestRouteWithDownNIC(t *testing.T) { // Writes with Routes that use NIC1 after being brought up should // succeed. // - // TODO(b/147015577): Should we instead completely invalidate all - // Routes that were bound to a NIC that was brought down at some - // point? + // TODO(gvisor.dev/issue/1491): Should we instead completely + // invalidate all Routes that were bound to a NIC that was brought + // down at some point? if err := upFn(s, nicID1); err != nil { t.Fatalf("test.upFn(_, %d): %s", nicID1, err) } diff --git a/pkg/tcpip/transport/packet/endpoint.go b/pkg/tcpip/transport/packet/endpoint.go index 0e46e6355..df478115d 100644 --- a/pkg/tcpip/transport/packet/endpoint.go +++ b/pkg/tcpip/transport/packet/endpoint.go @@ -193,7 +193,7 @@ func (ep *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMes } func (ep *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-chan struct{}, *tcpip.Error) { - // TODO(b/129292371): Implement. + // TODO(gvisor.dev/issue/173): Implement. return 0, nil, tcpip.ErrInvalidOptionValue } @@ -432,7 +432,7 @@ func (ep *endpoint) HandlePacket(nicID tcpip.NICID, localAddr tcpip.LinkAddress, // Push new packet into receive list and increment the buffer size. var packet packet - // TODO(b/129292371): Return network protocol. + // TODO(gvisor.dev/issue/173): Return network protocol. if len(pkt.LinkHeader) > 0 { // Get info directly from the ethernet header. hdr := header.Ethernet(pkt.LinkHeader) diff --git a/test/syscalls/linux/packet_socket.cc b/test/syscalls/linux/packet_socket.cc index 40aa9326d..861617ff7 100644 --- a/test/syscalls/linux/packet_socket.cc +++ b/test/syscalls/linux/packet_socket.cc @@ -188,7 +188,7 @@ void ReceiveMessage(int sock, int ifindex) { // sizeof(sockaddr_ll). ASSERT_THAT(src_len, AnyOf(Eq(sizeof(src)), Eq(sizeof(src) - 2))); - // TODO(b/129292371): Verify protocol once we return it. + // TODO(gvisor.dev/issue/173): Verify protocol once we return it. // Verify the source address. EXPECT_EQ(src.sll_family, AF_PACKET); EXPECT_EQ(src.sll_ifindex, ifindex); @@ -234,7 +234,7 @@ TEST_P(CookedPacketTest, Receive) { // Send via a packet socket. TEST_P(CookedPacketTest, Send) { - // TODO(b/129292371): Remove once we support packet socket writing. + // TODO(gvisor.dev/issue/173): Remove once we support packet socket writing. SKIP_IF(IsRunningOnGvisor()); // Let's send a UDP packet and receive it using a regular UDP socket. diff --git a/test/syscalls/linux/packet_socket_raw.cc b/test/syscalls/linux/packet_socket_raw.cc index 2fca9fe4d..a11a03415 100644 --- a/test/syscalls/linux/packet_socket_raw.cc +++ b/test/syscalls/linux/packet_socket_raw.cc @@ -195,7 +195,7 @@ TEST_P(RawPacketTest, Receive) { // sizeof(sockaddr_ll). ASSERT_THAT(src_len, AnyOf(Eq(sizeof(src)), Eq(sizeof(src) - 2))); - // TODO(b/129292371): Verify protocol once we return it. + // TODO(gvisor.dev/issue/173): Verify protocol once we return it. // Verify the source address. EXPECT_EQ(src.sll_family, AF_PACKET); EXPECT_EQ(src.sll_ifindex, GetLoopbackIndex()); @@ -240,7 +240,7 @@ TEST_P(RawPacketTest, Receive) { // Send via a packet socket. TEST_P(RawPacketTest, Send) { - // TODO(b/129292371): Remove once we support packet socket writing. + // TODO(gvisor.dev/issue/173): Remove once we support packet socket writing. SKIP_IF(IsRunningOnGvisor()); // Let's send a UDP packet and receive it using a regular UDP socket. -- cgit v1.2.3 From a6b4538ae0b0020b14c8292d98e2549719a49e4c Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Wed, 29 Jul 2020 06:21:39 -0700 Subject: Test UDP socket bound to ANY can receive unicast PiperOrigin-RevId: 323773771 --- test/packetimpact/tests/BUILD | 13 ++++++ .../tests/udp_any_addr_recv_unicast_test.go | 51 ++++++++++++++++++++++ .../tests/udp_icmp_error_propagation_test.go | 8 ++-- .../tests/udp_recv_mcast_bcast_test.go | 16 ++++--- .../packetimpact/tests/udp_send_recv_dgram_test.go | 6 ++- 5 files changed, 81 insertions(+), 13 deletions(-) create mode 100644 test/packetimpact/tests/udp_any_addr_recv_unicast_test.go (limited to 'test') diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 27905dcff..0c2a05380 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -46,6 +46,18 @@ packetimpact_go_test( "//pkg/tcpip", "//pkg/tcpip/header", "//test/packetimpact/testbench", + "@com_github_google_go_cmp//cmp:go_default_library", + "@org_golang_x_sys//unix:go_default_library", + ], +) + +packetimpact_go_test( + name = "udp_any_addr_recv_unicast", + srcs = ["udp_any_addr_recv_unicast_test.go"], + deps = [ + "//pkg/tcpip", + "//test/packetimpact/testbench", + "@com_github_google_go_cmp//cmp:go_default_library", "@org_golang_x_sys//unix:go_default_library", ], ) @@ -294,6 +306,7 @@ packetimpact_go_test( srcs = ["udp_send_recv_dgram_test.go"], deps = [ "//test/packetimpact/testbench", + "@com_github_google_go_cmp//cmp:go_default_library", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/test/packetimpact/tests/udp_any_addr_recv_unicast_test.go b/test/packetimpact/tests/udp_any_addr_recv_unicast_test.go new file mode 100644 index 000000000..17f32ef65 --- /dev/null +++ b/test/packetimpact/tests/udp_any_addr_recv_unicast_test.go @@ -0,0 +1,51 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package udp_any_addr_recv_unicast_test + +import ( + "flag" + "net" + "testing" + + "github.com/google/go-cmp/cmp" + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/test/packetimpact/testbench" +) + +func init() { + testbench.RegisterFlags(flag.CommandLine) +} + +func TestAnyRecvUnicastUDP(t *testing.T) { + dut := testbench.NewDUT(t) + defer dut.TearDown() + boundFD, remotePort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.IPv4zero) + defer dut.Close(t, boundFD) + conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) + defer conn.Close(t) + + payload := testbench.GenerateRandomPayload(t, 1<<10 /* 1 KiB */) + conn.SendIP( + t, + testbench.IPv4{DstAddr: testbench.Address(tcpip.Address(net.ParseIP(testbench.RemoteIPv4).To4()))}, + testbench.UDP{}, + &testbench.Payload{Bytes: payload}, + ) + got, want := dut.Recv(t, boundFD, int32(len(payload)+1), 0), payload + if diff := cmp.Diff(want, got); diff != "" { + t.Errorf("received payload does not match sent payload, diff (-want, +got):\n%s", diff) + } +} diff --git a/test/packetimpact/tests/udp_icmp_error_propagation_test.go b/test/packetimpact/tests/udp_icmp_error_propagation_test.go index 715e8f5b5..b47ddb6c3 100644 --- a/test/packetimpact/tests/udp_icmp_error_propagation_test.go +++ b/test/packetimpact/tests/udp_icmp_error_propagation_test.go @@ -227,12 +227,12 @@ func TestUDPICMPErrorPropagation(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - remoteFD, remotePort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) + remoteFD, remotePort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.IPv4zero) defer dut.Close(t, remoteFD) // Create a second, clean socket on the DUT to ensure that the ICMP // error messages only affect the sockets they are intended for. - cleanFD, cleanPort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) + cleanFD, cleanPort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.IPv4zero) defer dut.Close(t, cleanFD) conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) @@ -281,12 +281,12 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) { dut := testbench.NewDUT(t) defer dut.TearDown() - remoteFD, remotePort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) + remoteFD, remotePort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.IPv4zero) defer dut.Close(t, remoteFD) // Create a second, clean socket on the DUT to ensure that the ICMP // error messages only affect the sockets they are intended for. - cleanFD, cleanPort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0")) + cleanFD, cleanPort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.IPv4zero) defer dut.Close(t, cleanFD) conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) diff --git a/test/packetimpact/tests/udp_recv_mcast_bcast_test.go b/test/packetimpact/tests/udp_recv_mcast_bcast_test.go index df972b33d..526173969 100644 --- a/test/packetimpact/tests/udp_recv_mcast_bcast_test.go +++ b/test/packetimpact/tests/udp_recv_mcast_bcast_test.go @@ -22,6 +22,7 @@ import ( "syscall" "testing" + "github.com/google/go-cmp/cmp" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/test/packetimpact/testbench" @@ -37,9 +38,9 @@ func TestUDPRecvMcastBcast(t *testing.T) { for _, v := range []struct { bound, to net.IP }{ - {bound: net.IPv4(0, 0, 0, 0), to: subnetBcastAddr}, - {bound: net.IPv4(0, 0, 0, 0), to: net.IPv4bcast}, - {bound: net.IPv4(0, 0, 0, 0), to: net.IPv4allsys}, + {bound: net.IPv4zero, to: subnetBcastAddr}, + {bound: net.IPv4zero, to: net.IPv4bcast}, + {bound: net.IPv4zero, to: net.IPv4allsys}, {bound: subnetBcastAddr, to: subnetBcastAddr}, {bound: subnetBcastAddr, to: net.IPv4bcast}, @@ -55,15 +56,16 @@ func TestUDPRecvMcastBcast(t *testing.T) { conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort}) defer conn.Close(t) - payload := testbench.GenerateRandomPayload(t, 1<<10) + payload := testbench.GenerateRandomPayload(t, 1<<10 /* 1 KiB */) conn.SendIP( t, testbench.IPv4{DstAddr: testbench.Address(tcpip.Address(v.to.To4()))}, testbench.UDP{}, &testbench.Payload{Bytes: payload}, ) - if got, want := string(dut.Recv(t, boundFD, int32(len(payload)), 0)), string(payload); got != want { - t.Errorf("received payload does not match sent payload got: %s, want: %s", got, want) + got, want := dut.Recv(t, boundFD, int32(len(payload)+1), 0), payload + if diff := cmp.Diff(want, got); diff != "" { + t.Errorf("received payload does not match sent payload, diff (-want, +got):\n%s", diff) } }) } @@ -84,7 +86,7 @@ func TestUDPDoesntRecvMcastBcastOnUnicastAddr(t *testing.T) { net.IPv4(224, 0, 0, 1), } { t.Run(fmt.Sprint("to=%s", to), func(t *testing.T) { - payload := testbench.GenerateRandomPayload(t, 1<<10) + payload := testbench.GenerateRandomPayload(t, 1<<10 /* 1 KiB */) conn.SendIP( t, testbench.IPv4{DstAddr: testbench.Address(tcpip.Address(to.To4()))}, diff --git a/test/packetimpact/tests/udp_send_recv_dgram_test.go b/test/packetimpact/tests/udp_send_recv_dgram_test.go index dc20275d6..91b967400 100644 --- a/test/packetimpact/tests/udp_send_recv_dgram_test.go +++ b/test/packetimpact/tests/udp_send_recv_dgram_test.go @@ -20,6 +20,7 @@ import ( "testing" "time" + "github.com/google/go-cmp/cmp" "golang.org/x/sys/unix" "gvisor.dev/gvisor/test/packetimpact/testbench" ) @@ -82,8 +83,9 @@ func TestUDP(t *testing.T) { t.Run(tc.name, func(t *testing.T) { t.Run("Send", func(t *testing.T) { conn.Send(t, testbench.UDP{}, &testbench.Payload{Bytes: tc.payload}) - if got, want := string(dut.Recv(t, boundFD, int32(len(tc.payload)), 0)), string(tc.payload); got != want { - t.Fatalf("received payload does not match sent payload got: %s, want: %s", got, want) + got, want := dut.Recv(t, boundFD, int32(len(tc.payload)+1), 0), tc.payload + if diff := cmp.Diff(want, got); diff != "" { + t.Fatalf("received payload does not match sent payload, diff (-want, +got):\n%s", diff) } }) t.Run("Recv", func(t *testing.T) { -- cgit v1.2.3 From 6b4e11ab5074d41044e63337973ac2438ce2278e Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Wed, 29 Jul 2020 10:03:57 -0700 Subject: Port node benchmark. PiperOrigin-RevId: 323810235 --- images/benchmarks/hey/Dockerfile | 12 + images/benchmarks/node/Dockerfile | 1 + images/benchmarks/node/index.hbs | 8 + images/benchmarks/node/index.js | 42 +++ images/benchmarks/node/package-lock.json | 486 +++++++++++++++++++++++++++++++ images/benchmarks/node/package.json | 19 ++ test/benchmarks/network/BUILD | 1 + test/benchmarks/network/node_test.go | 261 +++++++++++++++++ 8 files changed, 830 insertions(+) create mode 100644 images/benchmarks/hey/Dockerfile create mode 100644 images/benchmarks/node/Dockerfile create mode 100644 images/benchmarks/node/index.hbs create mode 100644 images/benchmarks/node/index.js create mode 100644 images/benchmarks/node/package-lock.json create mode 100644 images/benchmarks/node/package.json create mode 100644 test/benchmarks/network/node_test.go (limited to 'test') diff --git a/images/benchmarks/hey/Dockerfile b/images/benchmarks/hey/Dockerfile new file mode 100644 index 000000000..f586978b6 --- /dev/null +++ b/images/benchmarks/hey/Dockerfile @@ -0,0 +1,12 @@ +FROM ubuntu:18.04 + +RUN set -x \ + && apt-get update \ + && apt-get install -y \ + wget \ + && rm -rf /var/lib/apt/lists/* + +RUN wget https://storage.googleapis.com/hey-release/hey_linux_amd64 \ + && chmod 777 hey_linux_amd64 \ + && cp hey_linux_amd64 /bin/hey \ + && rm hey_linux_amd64 diff --git a/images/benchmarks/node/Dockerfile b/images/benchmarks/node/Dockerfile new file mode 100644 index 000000000..bf45650a0 --- /dev/null +++ b/images/benchmarks/node/Dockerfile @@ -0,0 +1 @@ +FROM node:onbuild diff --git a/images/benchmarks/node/index.hbs b/images/benchmarks/node/index.hbs new file mode 100644 index 000000000..03feceb75 --- /dev/null +++ b/images/benchmarks/node/index.hbs @@ -0,0 +1,8 @@ + + + + {{#each text}} +

{{this}}

+ {{/each}} + + diff --git a/images/benchmarks/node/index.js b/images/benchmarks/node/index.js new file mode 100644 index 000000000..831015d18 --- /dev/null +++ b/images/benchmarks/node/index.js @@ -0,0 +1,42 @@ +const app = require('express')(); +const path = require('path'); +const redis = require('redis'); +const srs = require('secure-random-string'); + +// The hostname is the first argument. +const host_name = process.argv[2]; + +var client = redis.createClient({host: host_name, detect_buffers: true}); + +app.set('views', __dirname); +app.set('view engine', 'hbs'); + +app.get('/', (req, res) => { + var tmp = []; + /* Pull four random keys from the redis server. */ + for (i = 0; i < 4; i++) { + client.get(Math.floor(Math.random() * (100)), function(err, reply) { + tmp.push(reply.toString()); + }); + } + res.render('index', {text: tmp}); +}); + +/** + * Securely generate a random string. + * @param {number} len + * @return {string} + */ +function randomBody(len) { + return srs({alphanumeric: true, length: len}); +} + +/** Mutates one hundred keys randomly. */ +function generateText() { + for (i = 0; i < 100; i++) { + client.set(i, randomBody(1024)); + } +} + +generateText(); +app.listen(8080); diff --git a/images/benchmarks/node/package-lock.json b/images/benchmarks/node/package-lock.json new file mode 100644 index 000000000..580e68aa5 --- /dev/null +++ b/images/benchmarks/node/package-lock.json @@ -0,0 +1,486 @@ +{ + "name": "nodedum", + "version": "1.0.0", + "lockfileVersion": 1, + "requires": true, + "dependencies": { + "accepts": { + "version": "1.3.5", + "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.5.tgz", + "integrity": "sha1-63d99gEXI6OxTopywIBcjoZ0a9I=", + "requires": { + "mime-types": "~2.1.18", + "negotiator": "0.6.1" + } + }, + "array-flatten": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz", + "integrity": "sha1-ml9pkFGx5wczKPKgCJaLZOopVdI=" + }, + "async": { + "version": "2.6.2", + "resolved": "https://registry.npmjs.org/async/-/async-2.6.2.tgz", + "integrity": "sha512-H1qVYh1MYhEEFLsP97cVKqCGo7KfCyTt6uEWqsTBr9SO84oK9Uwbyd/yCW+6rKJLHksBNUVWZDAjfS+Ccx0Bbg==", + "requires": { + "lodash": "^4.17.11" + } + }, + "body-parser": { + "version": "1.18.3", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.18.3.tgz", + "integrity": "sha1-WykhmP/dVTs6DyDe0FkrlWlVyLQ=", + "requires": { + "bytes": "3.0.0", + "content-type": "~1.0.4", + "debug": "2.6.9", + "depd": "~1.1.2", + "http-errors": "~1.6.3", + "iconv-lite": "0.4.23", + "on-finished": "~2.3.0", + "qs": "6.5.2", + "raw-body": "2.3.3", + "type-is": "~1.6.16" + } + }, + "bytes": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.0.0.tgz", + "integrity": "sha1-0ygVQE1olpn4Wk6k+odV3ROpYEg=" + }, + "commander": { + "version": "2.20.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.0.tgz", + "integrity": "sha512-7j2y+40w61zy6YC2iRNpUe/NwhNyoXrYpHMrSunaMG64nRnaf96zO/KMQR4OyN/UnE5KLyEBnKHd4aG3rskjpQ==", + "optional": true + }, + "content-disposition": { + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.2.tgz", + "integrity": "sha1-DPaLud318r55YcOoUXjLhdunjLQ=" + }, + "content-type": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.4.tgz", + "integrity": "sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA==" + }, + "cookie": { + "version": "0.3.1", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.3.1.tgz", + "integrity": "sha1-5+Ch+e9DtMi6klxcWpboBtFoc7s=" + }, + "cookie-signature": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz", + "integrity": "sha1-4wOogrNCzD7oylE6eZmXNNqzriw=" + }, + "debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "requires": { + "ms": "2.0.0" + } + }, + "depd": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/depd/-/depd-1.1.2.tgz", + "integrity": "sha1-m81S4UwJd2PnSbJ0xDRu0uVgtak=" + }, + "destroy": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.0.4.tgz", + "integrity": "sha1-l4hXRCxEdJ5CBmE+N5RiBYJqvYA=" + }, + "double-ended-queue": { + "version": "2.1.0-0", + "resolved": "https://registry.npmjs.org/double-ended-queue/-/double-ended-queue-2.1.0-0.tgz", + "integrity": "sha1-ED01J/0xUo9AGIEwyEHv3XgmTlw=" + }, + "ee-first": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", + "integrity": "sha1-WQxhFWsK4vTwJVcyoViyZrxWsh0=" + }, + "encodeurl": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz", + "integrity": "sha1-rT/0yG7C0CkyL1oCw6mmBslbP1k=" + }, + "escape-html": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", + "integrity": "sha1-Aljq5NPQwJdN4cFpGI7wBR0dGYg=" + }, + "etag": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", + "integrity": "sha1-Qa4u62XvpiJorr/qg6x9eSmbCIc=" + }, + "express": { + "version": "4.16.4", + "resolved": "https://registry.npmjs.org/express/-/express-4.16.4.tgz", + "integrity": "sha512-j12Uuyb4FMrd/qQAm6uCHAkPtO8FDTRJZBDd5D2KOL2eLaz1yUNdUB/NOIyq0iU4q4cFarsUCrnFDPBcnksuOg==", + "requires": { + "accepts": "~1.3.5", + "array-flatten": "1.1.1", + "body-parser": "1.18.3", + "content-disposition": "0.5.2", + "content-type": "~1.0.4", + "cookie": "0.3.1", + "cookie-signature": "1.0.6", + "debug": "2.6.9", + "depd": "~1.1.2", + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "finalhandler": "1.1.1", + "fresh": "0.5.2", + "merge-descriptors": "1.0.1", + "methods": "~1.1.2", + "on-finished": "~2.3.0", + "parseurl": "~1.3.2", + "path-to-regexp": "0.1.7", + "proxy-addr": "~2.0.4", + "qs": "6.5.2", + "range-parser": "~1.2.0", + "safe-buffer": "5.1.2", + "send": "0.16.2", + "serve-static": "1.13.2", + "setprototypeof": "1.1.0", + "statuses": "~1.4.0", + "type-is": "~1.6.16", + "utils-merge": "1.0.1", + "vary": "~1.1.2" + } + }, + "finalhandler": { + "version": "1.1.1", + "resolved": "http://registry.npmjs.org/finalhandler/-/finalhandler-1.1.1.tgz", + "integrity": "sha512-Y1GUDo39ez4aHAw7MysnUD5JzYX+WaIj8I57kO3aEPT1fFRL4sr7mjei97FgnwhAyyzRYmQZaTHb2+9uZ1dPtg==", + "requires": { + "debug": "2.6.9", + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "on-finished": "~2.3.0", + "parseurl": "~1.3.2", + "statuses": "~1.4.0", + "unpipe": "~1.0.0" + } + }, + "foreachasync": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/foreachasync/-/foreachasync-3.0.0.tgz", + "integrity": "sha1-VQKYfchxS+M5IJfzLgBxyd7gfPY=" + }, + "forwarded": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.1.2.tgz", + "integrity": "sha1-mMI9qxF1ZXuMBXPozszZGw/xjIQ=" + }, + "fresh": { + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz", + "integrity": "sha1-PYyt2Q2XZWn6g1qx+OSyOhBWBac=" + }, + "handlebars": { + "version": "4.0.14", + "resolved": "https://registry.npmjs.org/handlebars/-/handlebars-4.0.14.tgz", + "integrity": "sha512-E7tDoyAA8ilZIV3xDJgl18sX3M8xB9/fMw8+mfW4msLW8jlX97bAnWgT3pmaNXuvzIEgSBMnAHfuXsB2hdzfow==", + "requires": { + "async": "^2.5.0", + "optimist": "^0.6.1", + "source-map": "^0.6.1", + "uglify-js": "^3.1.4" + } + }, + "hbs": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/hbs/-/hbs-4.0.4.tgz", + "integrity": "sha512-esVlyV/V59mKkwFai5YmPRSNIWZzhqL5YMN0++ueMxyK1cCfPa5f6JiHtapPKAIVAhQR6rpGxow0troav9WMEg==", + "requires": { + "handlebars": "4.0.14", + "walk": "2.3.9" + } + }, + "http-errors": { + "version": "1.6.3", + "resolved": "http://registry.npmjs.org/http-errors/-/http-errors-1.6.3.tgz", + "integrity": "sha1-i1VoC7S+KDoLW/TqLjhYC+HZMg0=", + "requires": { + "depd": "~1.1.2", + "inherits": "2.0.3", + "setprototypeof": "1.1.0", + "statuses": ">= 1.4.0 < 2" + } + }, + "iconv-lite": { + "version": "0.4.23", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.23.tgz", + "integrity": "sha512-neyTUVFtahjf0mB3dZT77u+8O0QB89jFdnBkd5P1JgYPbPaia3gXXOVL2fq8VyU2gMMD7SaN7QukTB/pmXYvDA==", + "requires": { + "safer-buffer": ">= 2.1.2 < 3" + } + }, + "inherits": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz", + "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=" + }, + "ipaddr.js": { + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.8.0.tgz", + "integrity": "sha1-6qM9bd16zo9/b+DJygRA5wZzix4=" + }, + "lodash": { + "version": "4.17.15", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.15.tgz", + "integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A==" + }, + "media-typer": { + "version": "0.3.0", + "resolved": "http://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", + "integrity": "sha1-hxDXrwqmJvj/+hzgAWhUUmMlV0g=" + }, + "merge-descriptors": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz", + "integrity": "sha1-sAqqVW3YtEVoFQ7J0blT8/kMu2E=" + }, + "methods": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz", + "integrity": "sha1-VSmk1nZUE07cxSZmVoNbD4Ua/O4=" + }, + "mime": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/mime/-/mime-1.4.1.tgz", + "integrity": "sha512-KI1+qOZu5DcW6wayYHSzR/tXKCDC5Om4s1z2QJjDULzLcmf3DvzS7oluY4HCTrc+9FiKmWUgeNLg7W3uIQvxtQ==" + }, + "mime-db": { + "version": "1.37.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.37.0.tgz", + "integrity": "sha512-R3C4db6bgQhlIhPU48fUtdVmKnflq+hRdad7IyKhtFj06VPNVdk2RhiYL3UjQIlso8L+YxAtFkobT0VK+S/ybg==" + }, + "mime-types": { + "version": "2.1.21", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.21.tgz", + "integrity": "sha512-3iL6DbwpyLzjR3xHSFNFeb9Nz/M8WDkX33t1GFQnFOllWk8pOrh/LSrB5OXlnlW5P9LH73X6loW/eogc+F5lJg==", + "requires": { + "mime-db": "~1.37.0" + } + }, + "minimist": { + "version": "0.0.10", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-0.0.10.tgz", + "integrity": "sha1-3j+YVD2/lggr5IrRoMfNqDYwHc8=" + }, + "ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" + }, + "negotiator": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.1.tgz", + "integrity": "sha1-KzJxhOiZIQEXeyhWP7XnECrNDKk=" + }, + "on-finished": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.3.0.tgz", + "integrity": "sha1-IPEzZIGwg811M3mSoWlxqi2QaUc=", + "requires": { + "ee-first": "1.1.1" + } + }, + "optimist": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/optimist/-/optimist-0.6.1.tgz", + "integrity": "sha1-2j6nRob6IaGaERwybpDrFaAZZoY=", + "requires": { + "minimist": "~0.0.1", + "wordwrap": "~0.0.2" + } + }, + "parseurl": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.2.tgz", + "integrity": "sha1-/CidTtiZMRlGDBViUyYs3I3mW/M=" + }, + "path-to-regexp": { + "version": "0.1.7", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz", + "integrity": "sha1-32BBeABfUi8V60SQ5yR6G/qmf4w=" + }, + "proxy-addr": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.4.tgz", + "integrity": "sha512-5erio2h9jp5CHGwcybmxmVqHmnCBZeewlfJ0pex+UW7Qny7OOZXTtH56TGNyBizkgiOwhJtMKrVzDTeKcySZwA==", + "requires": { + "forwarded": "~0.1.2", + "ipaddr.js": "1.8.0" + } + }, + "qs": { + "version": "6.5.2", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.5.2.tgz", + "integrity": "sha512-N5ZAX4/LxJmF+7wN74pUD6qAh9/wnvdQcjq9TZjevvXzSUo7bfmw91saqMjzGS2xq91/odN2dW/WOl7qQHNDGA==" + }, + "range-parser": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.0.tgz", + "integrity": "sha1-9JvmtIeJTdxA3MlKMi9hEJLgDV4=" + }, + "raw-body": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.3.3.tgz", + "integrity": "sha512-9esiElv1BrZoI3rCDuOuKCBRbuApGGaDPQfjSflGxdy4oyzqghxu6klEkkVIvBje+FF0BX9coEv8KqW6X/7njw==", + "requires": { + "bytes": "3.0.0", + "http-errors": "1.6.3", + "iconv-lite": "0.4.23", + "unpipe": "1.0.0" + } + }, + "redis": { + "version": "2.8.0", + "resolved": "https://registry.npmjs.org/redis/-/redis-2.8.0.tgz", + "integrity": "sha512-M1OkonEQwtRmZv4tEWF2VgpG0JWJ8Fv1PhlgT5+B+uNq2cA3Rt1Yt/ryoR+vQNOQcIEgdCdfH0jr3bDpihAw1A==", + "requires": { + "double-ended-queue": "^2.1.0-0", + "redis-commands": "^1.2.0", + "redis-parser": "^2.6.0" + }, + "dependencies": { + "redis-commands": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/redis-commands/-/redis-commands-1.4.0.tgz", + "integrity": "sha512-cu8EF+MtkwI4DLIT0x9P8qNTLFhQD4jLfxLR0cCNkeGzs87FN6879JOJwNQR/1zD7aSYNbU0hgsV9zGY71Itvw==" + }, + "redis-parser": { + "version": "2.6.0", + "resolved": "https://registry.npmjs.org/redis-parser/-/redis-parser-2.6.0.tgz", + "integrity": "sha1-Uu0J2srBCPGmMcB+m2mUHnoZUEs=" + } + } + }, + "redis-commands": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/redis-commands/-/redis-commands-1.5.0.tgz", + "integrity": "sha512-6KxamqpZ468MeQC3bkWmCB1fp56XL64D4Kf0zJSwDZbVLLm7KFkoIcHrgRvQ+sk8dnhySs7+yBg94yIkAK7aJg==" + }, + "redis-parser": { + "version": "2.6.0", + "resolved": "https://registry.npmjs.org/redis-parser/-/redis-parser-2.6.0.tgz", + "integrity": "sha1-Uu0J2srBCPGmMcB+m2mUHnoZUEs=" + }, + "safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" + }, + "safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" + }, + "secure-random-string": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/secure-random-string/-/secure-random-string-1.1.0.tgz", + "integrity": "sha512-V/h8jqoz58zklNGybVhP++cWrxEPXlLM/6BeJ4e0a8zlb4BsbYRzFs16snrxByPa5LUxCVTD3M6EYIVIHR1fAg==" + }, + "send": { + "version": "0.16.2", + "resolved": "https://registry.npmjs.org/send/-/send-0.16.2.tgz", + "integrity": "sha512-E64YFPUssFHEFBvpbbjr44NCLtI1AohxQ8ZSiJjQLskAdKuriYEP6VyGEsRDH8ScozGpkaX1BGvhanqCwkcEZw==", + "requires": { + "debug": "2.6.9", + "depd": "~1.1.2", + "destroy": "~1.0.4", + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "fresh": "0.5.2", + "http-errors": "~1.6.2", + "mime": "1.4.1", + "ms": "2.0.0", + "on-finished": "~2.3.0", + "range-parser": "~1.2.0", + "statuses": "~1.4.0" + } + }, + "serve-static": { + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.13.2.tgz", + "integrity": "sha512-p/tdJrO4U387R9oMjb1oj7qSMaMfmOyd4j9hOFoxZe2baQszgHcSWjuya/CiT5kgZZKRudHNOA0pYXOl8rQ5nw==", + "requires": { + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "parseurl": "~1.3.2", + "send": "0.16.2" + } + }, + "setprototypeof": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.1.0.tgz", + "integrity": "sha512-BvE/TwpZX4FXExxOxZyRGQQv651MSwmWKZGqvmPcRIjDqWub67kTKuIMx43cZZrS/cBBzwBcNDWoFxt2XEFIpQ==" + }, + "source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==" + }, + "statuses": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-1.4.0.tgz", + "integrity": "sha512-zhSCtt8v2NDrRlPQpCNtw/heZLtfUDqxBM1udqikb/Hbk52LK4nQSwr10u77iopCW5LsyHpuXS0GnEc48mLeew==" + }, + "type-is": { + "version": "1.6.16", + "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.16.tgz", + "integrity": "sha512-HRkVv/5qY2G6I8iab9cI7v1bOIdhm94dVjQCPFElW9W+3GeDOSHmy2EBYe4VTApuzolPcmgFTN3ftVJRKR2J9Q==", + "requires": { + "media-typer": "0.3.0", + "mime-types": "~2.1.18" + } + }, + "uglify-js": { + "version": "3.5.9", + "resolved": "https://registry.npmjs.org/uglify-js/-/uglify-js-3.5.9.tgz", + "integrity": "sha512-WpT0RqsDtAWPNJK955DEnb6xjymR8Fn0OlK4TT4pS0ASYsVPqr5ELhgwOwLCP5J5vHeJ4xmMmz3DEgdqC10JeQ==", + "optional": true, + "requires": { + "commander": "~2.20.0", + "source-map": "~0.6.1" + } + }, + "unpipe": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", + "integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw=" + }, + "utils-merge": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz", + "integrity": "sha1-n5VxD1CiZ5R7LMwSR0HBAoQn5xM=" + }, + "vary": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", + "integrity": "sha1-IpnwLG3tMNSllhsLn3RSShj2NPw=" + }, + "walk": { + "version": "2.3.9", + "resolved": "https://registry.npmjs.org/walk/-/walk-2.3.9.tgz", + "integrity": "sha1-MbTbZnjyrgHDnqn7hyWpAx5Vins=", + "requires": { + "foreachasync": "^3.0.0" + } + }, + "wordwrap": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-0.0.3.tgz", + "integrity": "sha1-o9XabNXAvAAI03I0u68b7WMFkQc=" + } + } +} diff --git a/images/benchmarks/node/package.json b/images/benchmarks/node/package.json new file mode 100644 index 000000000..7dcadd523 --- /dev/null +++ b/images/benchmarks/node/package.json @@ -0,0 +1,19 @@ +{ + "name": "nodedum", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "author": "", + "license": "ISC", + "dependencies": { + "express": "^4.16.4", + "hbs": "^4.0.4", + "redis": "^2.8.0", + "redis-commands": "^1.2.0", + "redis-parser": "^2.6.0", + "secure-random-string": "^1.1.0" + } +} diff --git a/test/benchmarks/network/BUILD b/test/benchmarks/network/BUILD index 363041fb7..b47400590 100644 --- a/test/benchmarks/network/BUILD +++ b/test/benchmarks/network/BUILD @@ -15,6 +15,7 @@ go_test( srcs = [ "httpd_test.go", "iperf_test.go", + "node_test.go", ], library = ":network", tags = [ diff --git a/test/benchmarks/network/node_test.go b/test/benchmarks/network/node_test.go new file mode 100644 index 000000000..2556f710f --- /dev/null +++ b/test/benchmarks/network/node_test.go @@ -0,0 +1,261 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package network + +import ( + "context" + "fmt" + "regexp" + "strconv" + "strings" + "testing" + "time" + + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/test/benchmarks/harness" +) + +// BenchmarkNode runs 10K requests using 'hey' against a Node server run on +// 'runtime'. The server responds to requests by grabbing some data in a +// redis instance and returns the data in its reponse. The test loops through +// increasing amounts of concurency for requests. +func BenchmarkNode(b *testing.B) { + requests := 10000 + concurrency := []int{1, 5, 10, 25} + + for _, c := range concurrency { + b.Run(fmt.Sprintf("Concurrency%d", c), func(b *testing.B) { + runNode(b, requests, c) + }) + } +} + +// runNode runs the test for a given # of requests and concurrency. +func runNode(b *testing.B, requests, concurrency int) { + b.Helper() + + // The machine to hold Redis and the Node Server. + serverMachine, err := h.GetMachine() + if err != nil { + b.Fatal("failed to get machine with: %v", err) + } + defer serverMachine.CleanUp() + + // The machine to run 'hey'. + clientMachine, err := h.GetMachine() + if err != nil { + b.Fatal("failed to get machine with: %v", err) + } + defer clientMachine.CleanUp() + + ctx := context.Background() + + // Spawn a redis instance for the app to use. + redis := serverMachine.GetNativeContainer(ctx, b) + if err := redis.Spawn(ctx, dockerutil.RunOpts{ + Image: "benchmarks/redis", + }); err != nil { + b.Fatalf("failed to spwan redis instance: %v", err) + } + defer redis.CleanUp(ctx) + + if out, err := redis.WaitForOutput(ctx, "Ready to accept connections", 3*time.Second); err != nil { + b.Fatalf("failed to start redis server: %v %s", err, out) + } + redisIP, err := redis.FindIP(ctx) + if err != nil { + b.Fatalf("failed to get IP from redis instance: %v", err) + } + + // Node runs on port 8080. + port := 8080 + + // Start-up the Node server. + nodeApp := serverMachine.GetContainer(ctx, b) + if err := nodeApp.Spawn(ctx, dockerutil.RunOpts{ + Image: "benchmarks/node", + WorkDir: "/usr/src/app", + Links: []string{redis.MakeLink("redis")}, + Ports: []int{port}, + }, "node", "index.js", redisIP.String()); err != nil { + b.Fatalf("failed to spawn node instance: %v", err) + } + defer nodeApp.CleanUp(ctx) + + servingIP, err := serverMachine.IPAddress() + if err != nil { + b.Fatalf("failed to get ip from server: %v", err) + } + + servingPort, err := nodeApp.FindPort(ctx, port) + if err != nil { + b.Fatalf("failed to port from node instance: %v", err) + } + + // Wait until the Client sees the server as up. + harness.WaitUntilServing(ctx, clientMachine, servingIP, servingPort) + + heyCmd := strings.Split(fmt.Sprintf("hey -n %d -c %d http://%s:%d/", requests, concurrency, servingIP, servingPort), " ") + + nodeApp.RestartProfiles() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + // the client should run on Native. + client := clientMachine.GetNativeContainer(ctx, b) + out, err := client.Run(ctx, dockerutil.RunOpts{ + Image: "benchmarks/hey", + }, heyCmd...) + if err != nil { + b.Fatalf("hey container failed: %v logs: %s", err, out) + } + + // Stop the timer to parse the data and report stats. + b.StopTimer() + requests, err := parseHeyRequestsPerSecond(out) + if err != nil { + b.Fatalf("failed to parse requests per second: %v", err) + } + b.ReportMetric(requests, "requests_per_second") + + bw, err := parseHeyBandwidth(out) + if err != nil { + b.Fatalf("failed to parse bandwidth: %v", err) + } + b.ReportMetric(bw, "bandwidth") + + ave, err := parseHeyAverageLatency(out) + if err != nil { + b.Fatalf("failed to parse average latency: %v", err) + } + b.ReportMetric(ave, "average_latency") + b.StartTimer() + } +} + +var heyReqPerSecondRE = regexp.MustCompile(`Requests/sec:\s*(\d+\.?\d+?)\s+`) + +// parseHeyRequestsPerSecond finds requests per second from hey output. +func parseHeyRequestsPerSecond(data string) (float64, error) { + match := heyReqPerSecondRE.FindStringSubmatch(data) + if len(match) < 2 { + return 0, fmt.Errorf("failed get bandwidth: %s", data) + } + return strconv.ParseFloat(match[1], 64) +} + +var heyAverageLatencyRE = regexp.MustCompile(`Average:\s*(\d+\.?\d+?)\s+secs`) + +// parseHeyAverageLatency finds Average Latency in seconds form hey output. +func parseHeyAverageLatency(data string) (float64, error) { + match := heyAverageLatencyRE.FindStringSubmatch(data) + if len(match) < 2 { + return 0, fmt.Errorf("failed get average latency match%d : %s", len(match), data) + } + return strconv.ParseFloat(match[1], 64) +} + +var heySizePerRequestRE = regexp.MustCompile(`Size/request:\s*(\d+\.?\d+?)\s+bytes`) + +// parseHeyBandwidth computes bandwidth from request/sec * bytes/request +// and reports in bytes/second. +func parseHeyBandwidth(data string) (float64, error) { + match := heyReqPerSecondRE.FindStringSubmatch(data) + if len(match) < 2 { + return 0, fmt.Errorf("failed get requests per second: %s", data) + } + reqPerSecond, err := strconv.ParseFloat(match[1], 64) + if err != nil { + return 0, fmt.Errorf("failed to convert %s to float", match[1]) + } + + match = heySizePerRequestRE.FindStringSubmatch(data) + if len(match) < 2 { + return 0, fmt.Errorf("failed get average latency: %s", data) + } + requestSize, err := strconv.ParseFloat(match[1], 64) + return requestSize * reqPerSecond, err +} + +// TestHeyParsers tests that the parsers work with sample output. +func TestHeyParsers(t *testing.T) { + sampleData := ` + Summary: + Total: 2.2391 secs + Slowest: 1.6292 secs + Fastest: 0.0066 secs + Average: 0.5351 secs + Requests/sec: 89.3202 + + Total data: 841200 bytes + Size/request: 4206 bytes + + Response time histogram: + 0.007 [1] | + 0.169 [0] | + 0.331 [149] |â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â–  + 0.493 [0] | + 0.656 [0] | + 0.818 [0] | + 0.980 [0] | + 1.142 [0] | + 1.305 [0] | + 1.467 [49] |â– â– â– â– â– â– â– â– â– â– â– â– â–  + 1.629 [1] | + + + Latency distribution: + 10% in 0.2149 secs + 25% in 0.2449 secs + 50% in 0.2703 secs + 75% in 1.3315 secs + 90% in 1.4045 secs + 95% in 1.4232 secs + 99% in 1.4362 secs + + Details (average, fastest, slowest): + DNS+dialup: 0.0002 secs, 0.0066 secs, 1.6292 secs + DNS-lookup: 0.0000 secs, 0.0000 secs, 0.0000 secs + req write: 0.0000 secs, 0.0000 secs, 0.0012 secs + resp wait: 0.5225 secs, 0.0064 secs, 1.4346 secs + resp read: 0.0122 secs, 0.0001 secs, 0.2006 secs + + Status code distribution: + [200] 200 responses + ` + want := 89.3202 + got, err := parseHeyRequestsPerSecond(sampleData) + if err != nil { + t.Fatalf("failed to parse request per second with: %v", err) + } else if got != want { + t.Fatalf("got: %f, want: %f", got, want) + } + + want = 89.3202 * 4206 + got, err = parseHeyBandwidth(sampleData) + if err != nil { + t.Fatalf("failed to parse bandwidth with: %v", err) + } else if got != want { + t.Fatalf("got: %f, want: %f", got, want) + } + + want = 0.5351 + got, err = parseHeyAverageLatency(sampleData) + if err != nil { + t.Fatalf("failed to parse average latency with: %v", err) + } else if got != want { + t.Fatalf("got: %f, want: %f", got, want) + } + +} -- cgit v1.2.3 From 1715896fc81fce0302e790186302d7460838a918 Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Wed, 29 Jul 2020 10:05:46 -0700 Subject: Port fio benchmark PiperOrigin-RevId: 323810654 --- images/benchmarks/fio/Dockerfile | 7 + test/benchmarks/fs/BUILD | 11 +- test/benchmarks/fs/bazel_test.go | 5 +- test/benchmarks/fs/fio_test.go | 369 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 388 insertions(+), 4 deletions(-) create mode 100644 images/benchmarks/fio/Dockerfile create mode 100644 test/benchmarks/fs/fio_test.go (limited to 'test') diff --git a/images/benchmarks/fio/Dockerfile b/images/benchmarks/fio/Dockerfile new file mode 100644 index 000000000..9531df7fa --- /dev/null +++ b/images/benchmarks/fio/Dockerfile @@ -0,0 +1,7 @@ +FROM ubuntu:18.04 + +RUN set -x \ + && apt-get update \ + && apt-get install -y \ + fio \ + && rm -rf /var/lib/apt/lists/* diff --git a/test/benchmarks/fs/BUILD b/test/benchmarks/fs/BUILD index 2874cdbb3..79327b57c 100644 --- a/test/benchmarks/fs/BUILD +++ b/test/benchmarks/fs/BUILD @@ -12,12 +12,19 @@ go_library( go_test( name = "fs_test", size = "large", - srcs = ["bazel_test.go"], + srcs = [ + "bazel_test.go", + "fio_test.go", + ], library = ":fs", tags = [ # Requires docker and runsc to be configured before test runs. "local", "manual", ], - deps = ["//pkg/test/dockerutil"], + deps = [ + "//pkg/test/dockerutil", + "//test/benchmarks/harness", + "@com_github_docker_docker//api/types/mount:go_default_library", + ], ) diff --git a/test/benchmarks/fs/bazel_test.go b/test/benchmarks/fs/bazel_test.go index 9b652fd43..3854aa87c 100644 --- a/test/benchmarks/fs/bazel_test.go +++ b/test/benchmarks/fs/bazel_test.go @@ -20,6 +20,7 @@ import ( "testing" "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/test/benchmarks/harness" ) // Note: CleanCache versions of this test require running with root permissions. @@ -77,8 +78,8 @@ func BenchmarkABSL(b *testing.B) { b.StopTimer() // Drop Caches for clear cache runs. if bm.clearCache { - if out, err := machine.RunCommand("/bin/sh", "-c", "sync && sysctl vm.drop_caches=3"); err != nil { - b.Skipf("failed to drop caches: %v %s. You probably need root.", err, out) + if err := harness.DropCaches(machine); err != nil { + b.Skipf("failed to drop caches: %v. You probably need root.", err) } } b.StartTimer() diff --git a/test/benchmarks/fs/fio_test.go b/test/benchmarks/fs/fio_test.go new file mode 100644 index 000000000..75d52726a --- /dev/null +++ b/test/benchmarks/fs/fio_test.go @@ -0,0 +1,369 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package fs + +import ( + "context" + "encoding/json" + "fmt" + "path/filepath" + "strconv" + "strings" + "testing" + + "github.com/docker/docker/api/types/mount" + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/test/benchmarks/harness" +) + +type fioTestCase struct { + test string // test to run: read, write, randread, randwrite. + size string // total size to be read/written of format N[GMK] (e.g. 5G). + blocksize string // blocksize to be read/write of format N[GMK] (e.g. 4K). + iodepth int // iodepth for reads/writes. + time int // time to run the test in seconds, usually for rand(read/write). +} + +// makeCmdFromTestcase makes a fio command. +func (f *fioTestCase) makeCmdFromTestcase(filename string) []string { + cmd := []string{"fio", "--output-format=json", "--ioengine=sync"} + cmd = append(cmd, fmt.Sprintf("--name=%s", f.test)) + cmd = append(cmd, fmt.Sprintf("--size=%s", f.size)) + cmd = append(cmd, fmt.Sprintf("--blocksize=%s", f.blocksize)) + cmd = append(cmd, fmt.Sprintf("--filename=%s", filename)) + cmd = append(cmd, fmt.Sprintf("--iodepth=%d", f.iodepth)) + cmd = append(cmd, fmt.Sprintf("--rw=%s", f.test)) + if f.time != 0 { + cmd = append(cmd, "--time_based") + cmd = append(cmd, fmt.Sprintf("--runtime=%d", f.time)) + } + return cmd +} + +// BenchmarkFio runs fio on the runtime under test. There are 4 basic test +// cases each run on a tmpfs mount and a bind mount. Fio requires root so that +// caches can be dropped. +func BenchmarkFio(b *testing.B) { + testCases := []fioTestCase{ + fioTestCase{ + test: "write", + size: "5G", + blocksize: "1M", + iodepth: 4, + }, + fioTestCase{ + test: "read", + size: "5G", + blocksize: "1M", + iodepth: 4, + }, + fioTestCase{ + test: "randwrite", + size: "5G", + blocksize: "4K", + iodepth: 4, + time: 30, + }, + fioTestCase{ + test: "randread", + size: "5G", + blocksize: "4K", + iodepth: 4, + time: 30, + }, + } + + machine, err := h.GetMachine() + if err != nil { + b.Fatalf("failed to get machine with: %v", err) + } + defer machine.CleanUp() + + for _, fsType := range []mount.Type{mount.TypeBind, mount.TypeTmpfs} { + for _, tc := range testCases { + testName := strings.Title(tc.test) + strings.Title(string(fsType)) + b.Run(testName, func(b *testing.B) { + ctx := context.Background() + container := machine.GetContainer(ctx, b) + defer container.CleanUp(ctx) + + // Directory and filename inside container where fio will read/write. + outdir := "/data" + outfile := filepath.Join(outdir, "test.txt") + + // Make the required mount and grab a cleanup for bind mounts + // as they are backed by a temp directory (mktemp). + mnt, mountCleanup, err := makeMount(machine, fsType, outdir) + if err != nil { + b.Fatalf("failed to make mount: %v", err) + } + defer mountCleanup() + cmd := tc.makeCmdFromTestcase(outfile) + + // Start the container with the mount. + if err := container.Spawn( + ctx, + dockerutil.RunOpts{ + Image: "benchmarks/fio", + Mounts: []mount.Mount{ + mnt, + }, + }, + // Sleep on the order of b.N. + "sleep", fmt.Sprintf("%d", 1000*b.N), + ); err != nil { + b.Fatalf("failed to start fio container with: %v", err) + } + + // For reads, we need a file to read so make one inside the container. + if strings.Contains(tc.test, "read") { + fallocateCmd := fmt.Sprintf("fallocate -l %s %s", tc.size, outfile) + if out, err := container.Exec(ctx, dockerutil.ExecOpts{}, + strings.Split(fallocateCmd, " ")...); err != nil { + b.Fatalf("failed to create readable file on mount: %v, %s", err, out) + } + } + + // Drop caches just before running. + if err := harness.DropCaches(machine); err != nil { + b.Skipf("failed to drop caches with %v. You probably need root.", err) + } + container.RestartProfiles() + b.ResetTimer() + for i := 0; i < b.N; i++ { + // Run fio. + data, err := container.Exec(ctx, dockerutil.ExecOpts{}, cmd...) + if err != nil { + b.Fatalf("failed to run cmd %v: %v", cmd, err) + } + b.StopTimer() + // Parse the output and report the metrics. + isRead := strings.Contains(tc.test, "read") + bw, err := parseBandwidth(data, isRead) + if err != nil { + b.Fatalf("failed to parse bandwidth from %s with: %v", data, err) + } + b.ReportMetric(bw, "bandwidth") // in b/s. + + iops, err := parseIOps(data, isRead) + if err != nil { + b.Fatalf("failed to parse iops from %s with: %v", data, err) + } + b.ReportMetric(iops, "iops") + // If b.N is used (i.e. we run for an hour), we should drop caches + // after each run. + if err := harness.DropCaches(machine); err != nil { + b.Fatalf("failed to drop caches: %v", err) + } + b.StartTimer() + } + }) + } + } +} + +// makeMount makes a mount and cleanup based on the requested type. Bind +// and volume mounts are backed by a temp directory made with mktemp. +// tmpfs mounts require no such backing and are just made. +// It is up to the caller to call the returned cleanup. +func makeMount(machine harness.Machine, mountType mount.Type, target string) (mount.Mount, func(), error) { + switch mountType { + case mount.TypeVolume, mount.TypeBind: + dir, err := machine.RunCommand("mktemp", "-d") + if err != nil { + return mount.Mount{}, func() {}, fmt.Errorf("failed to create tempdir: %v", err) + } + dir = strings.TrimSuffix(dir, "\n") + + out, err := machine.RunCommand("chmod", "777", dir) + if err != nil { + machine.RunCommand("rm", "-rf", dir) + return mount.Mount{}, func() {}, fmt.Errorf("failed modify directory: %v %s", err, out) + } + return mount.Mount{ + Target: target, + Source: dir, + Type: mount.TypeBind, + }, func() { machine.RunCommand("rm", "-rf", dir) }, nil + case mount.TypeTmpfs: + return mount.Mount{ + Target: target, + Type: mount.TypeTmpfs, + }, func() {}, nil + default: + return mount.Mount{}, func() {}, fmt.Errorf("illegal mount time not supported: %v", mountType) + } +} + +// parseBandwidth reports the bandwidth in b/s. +func parseBandwidth(data string, isRead bool) (float64, error) { + if isRead { + result, err := parseFioJSON(data, "read", "bw") + if err != nil { + return 0, err + } + return 1024 * result, nil + } + result, err := parseFioJSON(data, "write", "bw") + if err != nil { + return 0, err + } + return 1024 * result, nil +} + +// parseIOps reports the write IO per second metric. +func parseIOps(data string, isRead bool) (float64, error) { + if isRead { + return parseFioJSON(data, "read", "iops") + } + return parseFioJSON(data, "write", "iops") +} + +// fioResult is for parsing FioJSON. +type fioResult struct { + Jobs []fioJob +} + +// fioJob is for parsing FioJSON. +type fioJob map[string]json.RawMessage + +// fioMetrics is for parsing FioJSON. +type fioMetrics map[string]json.RawMessage + +// parseFioJSON parses data and grabs "op" (read or write) and "metric" +// (bw or iops) from the JSON. +func parseFioJSON(data, op, metric string) (float64, error) { + var result fioResult + if err := json.Unmarshal([]byte(data), &result); err != nil { + return 0, fmt.Errorf("could not unmarshal data: %v", err) + } + + if len(result.Jobs) < 1 { + return 0, fmt.Errorf("no jobs present to parse") + } + + var metrics fioMetrics + if err := json.Unmarshal(result.Jobs[0][op], &metrics); err != nil { + return 0, fmt.Errorf("could not unmarshal jobs: %v", err) + } + + if _, ok := metrics[metric]; !ok { + return 0, fmt.Errorf("no metric found for op: %s", op) + } + return strconv.ParseFloat(string(metrics[metric]), 64) +} + +// TestParsers tests that the parsers work on sampleData. +func TestParsers(t *testing.T) { + sampleData := ` +{ + "fio version" : "fio-3.1", + "timestamp" : 1554837456, + "timestamp_ms" : 1554837456621, + "time" : "Tue Apr 9 19:17:36 2019", + "jobs" : [ + { + "jobname" : "test", + "groupid" : 0, + "error" : 0, + "eta" : 2147483647, + "elapsed" : 1, + "job options" : { + "name" : "test", + "ioengine" : "sync", + "size" : "1073741824", + "filename" : "/disk/file.dat", + "iodepth" : "4", + "bs" : "4096", + "rw" : "write" + }, + "read" : { + "io_bytes" : 0, + "io_kbytes" : 0, + "bw" : 123456, + "iops" : 1234.5678, + "runtime" : 0, + "total_ios" : 0, + "short_ios" : 0, + "bw_min" : 0, + "bw_max" : 0, + "bw_agg" : 0.000000, + "bw_mean" : 0.000000, + "bw_dev" : 0.000000, + "bw_samples" : 0, + "iops_min" : 0, + "iops_max" : 0, + "iops_mean" : 0.000000, + "iops_stddev" : 0.000000, + "iops_samples" : 0 + }, + "write" : { + "io_bytes" : 1073741824, + "io_kbytes" : 1048576, + "bw" : 1753471, + "iops" : 438367.892977, + "runtime" : 598, + "total_ios" : 262144, + "bw_min" : 1731120, + "bw_max" : 1731120, + "bw_agg" : 98.725328, + "bw_mean" : 1731120.000000, + "bw_dev" : 0.000000, + "bw_samples" : 1, + "iops_min" : 432780, + "iops_max" : 432780, + "iops_mean" : 432780.000000, + "iops_stddev" : 0.000000, + "iops_samples" : 1 + } + } + ] +} +` + // WriteBandwidth. + got, err := parseBandwidth(sampleData, false) + var want float64 = 1753471.0 * 1024 + if err != nil { + t.Fatalf("parse failed with err: %v", err) + } else if got != want { + t.Fatalf("got: %f, want: %f", got, want) + } + + // ReadBandwidth. + got, err = parseBandwidth(sampleData, true) + want = 123456 * 1024 + if err != nil { + t.Fatalf("parse failed with err: %v", err) + } else if got != want { + t.Fatalf("got: %f, want: %f", got, want) + } + + // WriteIOps. + got, err = parseIOps(sampleData, false) + want = 438367.892977 + if err != nil { + t.Fatalf("parse failed with err: %v", err) + } else if got != want { + t.Fatalf("got: %f, want: %f", got, want) + } + + // ReadIOps. + got, err = parseIOps(sampleData, true) + want = 1234.5678 + if err != nil { + t.Fatalf("parse failed with err: %v", err) + } else if got != want { + t.Fatalf("got: %f, want: %f", got, want) + } +} -- cgit v1.2.3 From 4cd4759238655e35c8dc63723f4e55014b5ea9ea Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Wed, 29 Jul 2020 14:56:14 -0700 Subject: Force registration for EPOLLHUP, not EPOLLRDHUP, in vfs2's epoll. Compare Linux's fs/eventpoll.c:do_epoll_ctl(). I don't know where EPOLLRDHUP came from. PiperOrigin-RevId: 323874419 --- pkg/sentry/vfs/epoll.go | 4 ++-- test/syscalls/linux/epoll.cc | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/pkg/sentry/vfs/epoll.go b/pkg/sentry/vfs/epoll.go index 599c3131c..5b009b928 100644 --- a/pkg/sentry/vfs/epoll.go +++ b/pkg/sentry/vfs/epoll.go @@ -186,7 +186,7 @@ func (ep *EpollInstance) AddInterest(file *FileDescription, num int32, event lin } // Register interest in file. - mask := event.Events | linux.EPOLLERR | linux.EPOLLRDHUP + mask := event.Events | linux.EPOLLERR | linux.EPOLLHUP epi := &epollInterest{ epoll: ep, key: key, @@ -257,7 +257,7 @@ func (ep *EpollInstance) ModifyInterest(file *FileDescription, num int32, event } // Update epi for the next call to ep.ReadEvents(). - mask := event.Events | linux.EPOLLERR | linux.EPOLLRDHUP + mask := event.Events | linux.EPOLLERR | linux.EPOLLHUP ep.mu.Lock() epi.mask = mask epi.userData = event.Data diff --git a/test/syscalls/linux/epoll.cc b/test/syscalls/linux/epoll.cc index f57d38dc7..2101e5c9f 100644 --- a/test/syscalls/linux/epoll.cc +++ b/test/syscalls/linux/epoll.cc @@ -422,6 +422,28 @@ TEST(EpollTest, CloseFile) { SyscallSucceedsWithValue(0)); } +TEST(EpollTest, PipeReaderHupAfterWriterClosed) { + auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + int pipefds[2]; + ASSERT_THAT(pipe(pipefds), SyscallSucceeds()); + FileDescriptor rfd(pipefds[0]); + FileDescriptor wfd(pipefds[1]); + + ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), rfd.get(), 0, kMagicConstant)); + struct epoll_event result[kFDsPerEpoll]; + // Initially, rfd should not generate any events of interest. + ASSERT_THAT(epoll_wait(epollfd.get(), result, kFDsPerEpoll, 0), + SyscallSucceedsWithValue(0)); + // Close the write end of the pipe. + wfd.reset(); + // rfd should now generate EPOLLHUP, which EPOLL_CTL_ADD unconditionally adds + // to the set of events of interest. + ASSERT_THAT(epoll_wait(epollfd.get(), result, kFDsPerEpoll, 0), + SyscallSucceedsWithValue(1)); + EXPECT_EQ(result[0].events, EPOLLHUP); + EXPECT_EQ(result[0].data.u64, kMagicConstant); +} + } // namespace } // namespace testing -- cgit v1.2.3 From 84496b3a619def2db0e25ab41f029823621bc713 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Thu, 30 Jul 2020 09:23:44 -0700 Subject: Disable consistently failing test. PiperOrigin-RevId: 324017310 --- test/syscalls/linux/socket_inet_loopback_nogotsan.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/test/syscalls/linux/socket_inet_loopback_nogotsan.cc b/test/syscalls/linux/socket_inet_loopback_nogotsan.cc index 2324c7f6a..791e2bd51 100644 --- a/test/syscalls/linux/socket_inet_loopback_nogotsan.cc +++ b/test/syscalls/linux/socket_inet_loopback_nogotsan.cc @@ -82,8 +82,11 @@ using SocketInetLoopbackTest = ::testing::TestWithParam; // This test verifies that connect returns EADDRNOTAVAIL if all local ephemeral // ports are already in use for a given destination ip/port. +// // We disable S/R because this test creates a large number of sockets. -TEST_P(SocketInetLoopbackTest, TestTCPPortExhaustion_NoRandomSave) { +// +// FIXME(b/162475855): This test is failing reliably. +TEST_P(SocketInetLoopbackTest, DISABLED_TestTCPPortExhaustion_NoRandomSave) { auto const& param = GetParam(); TestAddress const& listener = param.listener; TestAddress const& connector = param.connector; -- cgit v1.2.3 From 2775ecd931ec67a8ba68f743b684d88c19d45d44 Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Thu, 30 Jul 2020 10:16:06 -0700 Subject: Update call in Node benchmark. PiperOrigin-RevId: 324028183 --- test/benchmarks/network/node_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/benchmarks/network/node_test.go b/test/benchmarks/network/node_test.go index 2556f710f..f9278ab66 100644 --- a/test/benchmarks/network/node_test.go +++ b/test/benchmarks/network/node_test.go @@ -73,7 +73,7 @@ func runNode(b *testing.B, requests, concurrency int) { if out, err := redis.WaitForOutput(ctx, "Ready to accept connections", 3*time.Second); err != nil { b.Fatalf("failed to start redis server: %v %s", err, out) } - redisIP, err := redis.FindIP(ctx) + redisIP, err := redis.FindIP(ctx, false) if err != nil { b.Fatalf("failed to get IP from redis instance: %v", err) } -- cgit v1.2.3 From f7281902f8f6f748e661c2b18a0bd3b227a55ce8 Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Thu, 30 Jul 2020 10:19:36 -0700 Subject: [runtime tests] go language test enhancement - Unexported some passing tests. This will increase the testing surface and will be especially helpful when this is enabled for vfs2. - Run tool tests with -v (verbose output). We only print the output when a test fails so this should not clutter the output. - Run tool tests with "-no-rebuild" flag. - Surround test name with appropriate regex, i.e. ^testname$. This will ensure that only that test is run. Earlier running go_test:os would also run go_test:os/exec, go_test:os/signal, go_test:os/user. This should help speed up the tests as we do not run the same test multiple times anymore. - Updated bugs. Updates #3191 PiperOrigin-RevId: 324028878 --- test/runtimes/exclude_go1.12.csv | 11 ++++------- test/runtimes/proctor/go.go | 4 ++-- 2 files changed, 6 insertions(+), 9 deletions(-) (limited to 'test') diff --git a/test/runtimes/exclude_go1.12.csv b/test/runtimes/exclude_go1.12.csv index 8c8ae0c5d..81e02cf64 100644 --- a/test/runtimes/exclude_go1.12.csv +++ b/test/runtimes/exclude_go1.12.csv @@ -2,15 +2,12 @@ test name,bug id,comment cgo_errors,,FLAKY cgo_test,,FLAKY go_test:cmd/go,,FLAKY -go_test:cmd/vendor/golang.org/x/sys/unix,b/118783622,/dev devices missing -go_test:net,b/118784196,socket: invalid argument. Works as intended: see bug. +go_test:net,b/162473575,setsockopt: protocol not available. go_test:os,b/118780122,we have a pollable filesystem but that's a surprise -go_test:os/signal,b/118780860,/dev/pts not properly supported -go_test:runtime,b/118782341,sigtrap not reported or caught or something -go_test:syscall,b/118781998,bad bytes -- bad mem addr -race,b/118782931,thread sanitizer. Works as intended: b/62219744. +go_test:os/signal,b/118780860,/dev/pts not properly supported. Also being tracked in b/29356795. +go_test:runtime,b/118782341,sigtrap not reported or caught or something. Also being tracked in b/33003106. +go_test:syscall,b/118781998,bad bytes -- bad mem addr; FcntlFlock(F_GETLK) not supported. runtime:cpu124,b/118778254,segmentation fault test:0_1,,FLAKY -testasan,, testcarchive,b/118782924,no sigpipe testshared,,FLAKY diff --git a/test/runtimes/proctor/go.go b/test/runtimes/proctor/go.go index 073c2959d..d0ae844e6 100644 --- a/test/runtimes/proctor/go.go +++ b/test/runtimes/proctor/go.go @@ -81,13 +81,13 @@ func (goRunner) TestCmds(tests []string) []*exec.Cmd { if strings.HasSuffix(test, ".go") { onDiskTests = append(onDiskTests, test) } else { - toolTests = append(toolTests, test) + toolTests = append(toolTests, "^"+test+"$") } } var cmds []*exec.Cmd if len(toolTests) > 0 { - cmds = append(cmds, exec.Command("go", "tool", "dist", "test", "-run", strings.Join(toolTests, "\\|"))) + cmds = append(cmds, exec.Command("go", "tool", "dist", "test", "-v", "-no-rebuild", "-run", strings.Join(toolTests, "\\|"))) } if len(onDiskTests) > 0 { cmd := exec.Command("go", append([]string{"run", "run.go", "-v", "--"}, onDiskTests...)...) -- cgit v1.2.3 From c43305731e04ce8d4f2c9b2949c326abc9c2b77e Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Thu, 30 Jul 2020 10:21:12 -0700 Subject: Fix SETOWN_EX return value. Return on success should be 0, not size of the struct copied out. PiperOrigin-RevId: 324029193 --- pkg/sentry/syscalls/linux/sys_file.go | 8 ++++---- pkg/sentry/syscalls/linux/vfs2/fd.go | 4 ++-- test/syscalls/linux/fcntl.cc | 38 ++++++++++++++++++++--------------- 3 files changed, 28 insertions(+), 22 deletions(-) (limited to 'test') diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index 2797c6a72..8cf6401e7 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -1057,7 +1057,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case linux.F_SETOWN_EX: addr := args[2].Pointer() var owner linux.FOwnerEx - n, err := t.CopyIn(addr, &owner) + _, err := t.CopyIn(addr, &owner) if err != nil { return 0, nil, err } @@ -1069,21 +1069,21 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, syserror.ESRCH } a.SetOwnerTask(t, task) - return uintptr(n), nil, nil + return 0, nil, nil case linux.F_OWNER_PID: tg := t.PIDNamespace().ThreadGroupWithID(kernel.ThreadID(owner.PID)) if tg == nil { return 0, nil, syserror.ESRCH } a.SetOwnerThreadGroup(t, tg) - return uintptr(n), nil, nil + return 0, nil, nil case linux.F_OWNER_PGRP: pg := t.PIDNamespace().ProcessGroupWithID(kernel.ProcessGroupID(owner.PID)) if pg == nil { return 0, nil, syserror.ESRCH } a.SetOwnerProcessGroup(t, pg) - return uintptr(n), nil, nil + return 0, nil, nil default: return 0, nil, syserror.EINVAL } diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go index 517394ba9..67f191551 100644 --- a/pkg/sentry/syscalls/linux/vfs2/fd.go +++ b/pkg/sentry/syscalls/linux/vfs2/fd.go @@ -185,11 +185,11 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, err case linux.F_SETOWN_EX: var owner linux.FOwnerEx - n, err := t.CopyIn(args[2].Pointer(), &owner) + _, err := t.CopyIn(args[2].Pointer(), &owner) if err != nil { return 0, nil, err } - return uintptr(n), nil, setAsyncOwner(t, file, owner.Type, owner.PID) + return 0, nil, setAsyncOwner(t, file, owner.Type, owner.PID) case linux.F_GETPIPE_SZ: pipefile, ok := file.Impl().(*pipe.VFSPipeFD) if !ok { diff --git a/test/syscalls/linux/fcntl.cc b/test/syscalls/linux/fcntl.cc index 5467fa2c8..34016d4bd 100644 --- a/test/syscalls/linux/fcntl.cc +++ b/test/syscalls/linux/fcntl.cc @@ -1004,7 +1004,8 @@ TEST(FcntlTest, SetOwnPid) { pid_t pid; EXPECT_THAT(pid = getpid(), SyscallSucceeds()); - ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, pid), SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, pid), + SyscallSucceedsWithValue(0)); EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), SyscallSucceedsWithValue(pid)); @@ -1018,7 +1019,8 @@ TEST(FcntlTest, SetOwnPgrp) { pid_t pgid; EXPECT_THAT(pgid = getpgrp(), SyscallSucceeds()); - ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, -pgid), SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, -pgid), + SyscallSucceedsWithValue(0)); // Verify with F_GETOWN_EX; using F_GETOWN on Linux may incorrectly treat the // negative return value as an error, converting the return value to -1 and @@ -1038,8 +1040,10 @@ TEST(FcntlTest, SetOwnUnset) { // Set and unset pid. pid_t pid; EXPECT_THAT(pid = getpid(), SyscallSucceeds()); - ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, pid), SyscallSucceeds()); - ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, 0), SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, pid), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, 0), + SyscallSucceedsWithValue(0)); EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), SyscallSucceedsWithValue(0)); @@ -1047,8 +1051,10 @@ TEST(FcntlTest, SetOwnUnset) { // Set and unset pgid. pid_t pgid; EXPECT_THAT(pgid = getpgrp(), SyscallSucceeds()); - ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, -pgid), SyscallSucceeds()); - ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, 0), SyscallSucceeds()); + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, -pgid), + SyscallSucceedsWithValue(0)); + ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, 0), + SyscallSucceedsWithValue(0)); EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), SyscallSucceedsWithValue(0)); @@ -1120,7 +1126,7 @@ TEST(FcntlTest, SetOwnExTid) { EXPECT_THAT(owner.pid = syscall(__NR_gettid), SyscallSucceeds()); ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), - SyscallSucceeds()); + SyscallSucceedsWithValue(0)); EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), SyscallSucceedsWithValue(owner.pid)); @@ -1136,7 +1142,7 @@ TEST(FcntlTest, SetOwnExPid) { EXPECT_THAT(owner.pid = getpid(), SyscallSucceeds()); ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), - SyscallSucceeds()); + SyscallSucceedsWithValue(0)); EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), SyscallSucceedsWithValue(owner.pid)); @@ -1152,7 +1158,7 @@ TEST(FcntlTest, SetOwnExPgrp) { EXPECT_THAT(set_owner.pid = getpgrp(), SyscallSucceeds()); ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &set_owner), - SyscallSucceeds()); + SyscallSucceedsWithValue(0)); // Verify with F_GETOWN_EX; using F_GETOWN on Linux may incorrectly treat the // negative return value as an error, converting the return value to -1 and @@ -1176,10 +1182,10 @@ TEST(FcntlTest, SetOwnExUnset) { owner.type = F_OWNER_PID; EXPECT_THAT(owner.pid = getpid(), SyscallSucceeds()); ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), - SyscallSucceeds()); + SyscallSucceedsWithValue(0)); owner.pid = 0; ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), - SyscallSucceeds()); + SyscallSucceedsWithValue(0)); EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), SyscallSucceedsWithValue(0)); @@ -1188,10 +1194,10 @@ TEST(FcntlTest, SetOwnExUnset) { owner.type = F_OWNER_PGRP; EXPECT_THAT(owner.pid = getpgrp(), SyscallSucceeds()); ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), - SyscallSucceeds()); + SyscallSucceedsWithValue(0)); owner.pid = 0; ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner), - SyscallSucceeds()); + SyscallSucceedsWithValue(0)); EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN), SyscallSucceedsWithValue(0)); @@ -1207,7 +1213,7 @@ TEST(FcntlTest, GetOwnExTid) { EXPECT_THAT(set_owner.pid = syscall(__NR_gettid), SyscallSucceeds()); ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &set_owner), - SyscallSucceeds()); + SyscallSucceedsWithValue(0)); f_owner_ex got_owner = {}; ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN_EX, &got_owner), @@ -1225,7 +1231,7 @@ TEST(FcntlTest, GetOwnExPid) { EXPECT_THAT(set_owner.pid = getpid(), SyscallSucceeds()); ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &set_owner), - SyscallSucceeds()); + SyscallSucceedsWithValue(0)); f_owner_ex got_owner = {}; ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN_EX, &got_owner), @@ -1243,7 +1249,7 @@ TEST(FcntlTest, GetOwnExPgrp) { EXPECT_THAT(set_owner.pid = getpgrp(), SyscallSucceeds()); ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &set_owner), - SyscallSucceeds()); + SyscallSucceedsWithValue(0)); f_owner_ex got_owner = {}; ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN_EX, &got_owner), -- cgit v1.2.3 From 78f1a18ab31fdc15155dd83d3ca96129b5031711 Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Thu, 30 Jul 2020 13:31:05 -0700 Subject: Add runsc build benchmark. PiperOrigin-RevId: 324071377 --- images/benchmarks/runsc/Dockerfile | 24 ++++++++++++++++++++++++ test/benchmarks/fs/bazel_test.go | 25 +++++++++++++++++-------- 2 files changed, 41 insertions(+), 8 deletions(-) create mode 100644 images/benchmarks/runsc/Dockerfile (limited to 'test') diff --git a/images/benchmarks/runsc/Dockerfile b/images/benchmarks/runsc/Dockerfile new file mode 100644 index 000000000..6c3aafa57 --- /dev/null +++ b/images/benchmarks/runsc/Dockerfile @@ -0,0 +1,24 @@ +FROM ubuntu:18.04 + +RUN set -x \ + && apt-get update \ + && apt-get install -y \ + wget \ + git \ + pkg-config \ + zip \ + g++ \ + zlib1g-dev \ + unzip \ + python-minimal \ + python3 \ + python3-pip \ + && rm -rf /var/lib/apt/lists/* +RUN wget https://github.com/bazelbuild/bazel/releases/download/3.4.1/bazel-3.4.1-installer-linux-x86_64.sh +RUN chmod +x bazel-3.4.1-installer-linux-x86_64.sh +RUN ./bazel-3.4.1-installer-linux-x86_64.sh + +# Download release-20200601.0 +RUN mkdir gvisor && cd gvisor \ + && git init && git remote add origin https://github.com/google/gvisor.git \ + && git fetch --depth 1 origin a9b47390c821942d60784e308f681f213645049c && git checkout FETCH_HEAD diff --git a/test/benchmarks/fs/bazel_test.go b/test/benchmarks/fs/bazel_test.go index 3854aa87c..f4236ba37 100644 --- a/test/benchmarks/fs/bazel_test.go +++ b/test/benchmarks/fs/bazel_test.go @@ -24,7 +24,18 @@ import ( ) // Note: CleanCache versions of this test require running with root permissions. -func BenchmarkABSL(b *testing.B) { +func BenchmarkBuildABSL(b *testing.B) { + runBuildBenchmark(b, "benchmarks/absl", "/abseil-cpp", "absl/base/...") +} + +// Note: CleanCache versions of this test require running with root permissions. +// Note: This test takes on the order of 10m per permutation for runsc on kvm. +func BenchmarkBuildRunsc(b *testing.B) { + runBuildBenchmark(b, "benchmarks/runsc", "/gvisor", "runsc:runsc") +} + +func runBuildBenchmark(b *testing.B, image, workdir, target string) { + b.Helper() // Get a machine from the Harness on which to run. machine, err := h.GetMachine() if err != nil { @@ -51,20 +62,18 @@ func BenchmarkABSL(b *testing.B) { container := machine.GetContainer(ctx, b) defer container.CleanUp(ctx) - workdir := "/abseil-cpp" - // Start a container and sleep by an order of b.N. if err := container.Spawn(ctx, dockerutil.RunOpts{ - Image: "benchmarks/absl", + Image: image, }, "sleep", fmt.Sprintf("%d", 1000000)); err != nil { b.Fatalf("run failed with: %v", err) } // If we are running on a tmpfs, copy to /tmp which is a tmpfs. if bm.tmpfs { - if _, err := container.Exec(ctx, dockerutil.ExecOpts{}, - "cp", "-r", "/abseil-cpp", "/tmp/."); err != nil { - b.Fatal("failed to copy directory: %v", err) + if out, err := container.Exec(ctx, dockerutil.ExecOpts{}, + "cp", "-r", workdir, "/tmp/."); err != nil { + b.Fatal("failed to copy directory: %v %s", err, out) } workdir = "/tmp" + workdir } @@ -86,7 +95,7 @@ func BenchmarkABSL(b *testing.B) { got, err := container.Exec(ctx, dockerutil.ExecOpts{ WorkDir: workdir, - }, "bazel", "build", "-c", "opt", "absl/base/...") + }, "bazel", "build", "-c", "opt", target) if err != nil { b.Fatalf("build failed with: %v", err) } -- cgit v1.2.3 From 3c70b4c986a2a6bb9b26f96e88f7fee878f29326 Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Thu, 30 Jul 2020 14:13:11 -0700 Subject: Implement overlayfs_stale_read for vfs2. PiperOrigin-RevId: 324080111 --- runsc/boot/vfs.go | 13 +++++++++++-- test/e2e/integration_test.go | 4 +++- 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 9a1ed8e9e..cfe2d36aa 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -171,10 +171,19 @@ func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *Config, creds *auth.Credentials) (*vfs.MountNamespace, error) { fd := c.fds.remove() - opts := strings.Join(p9MountData(fd, conf.FileAccess, true /* vfs2 */), ",") + opts := p9MountData(fd, conf.FileAccess, true /* vfs2 */) + + if conf.OverlayfsStaleRead { + // We can't check for overlayfs here because sandbox is chroot'ed and gofer + // can only send mount options for specs.Mounts (specs.Root is missing + // Options field). So assume root is always on top of overlayfs. + opts = append(opts, "overlayfs_stale_read") + } log.Infof("Mounting root over 9P, ioFD: %d", fd) - mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", gofer.Name, &vfs.GetFilesystemOptions{Data: opts}) + mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", gofer.Name, &vfs.GetFilesystemOptions{ + Data: strings.Join(opts, ","), + }) if err != nil { return nil, fmt.Errorf("setting up mount namespace: %w", err) } diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go index ef42b689a..5465dee9b 100644 --- a/test/e2e/integration_test.go +++ b/test/e2e/integration_test.go @@ -431,11 +431,13 @@ func TestHostOverlayfsCopyUp(t *testing.T) { d := dockerutil.MakeContainer(ctx, t) defer d.CleanUp(ctx) - if _, err := d.Run(ctx, dockerutil.RunOpts{ + if got, err := d.Run(ctx, dockerutil.RunOpts{ Image: "basic/hostoverlaytest", WorkDir: "/root", }, "./test"); err != nil { t.Fatalf("docker run failed: %v", err) + } else if got != "" { + t.Errorf("test failed:\n%s", got) } } -- cgit v1.2.3 From 1b11326ecdd788fccc8e396b7467bbbb591d563f Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Thu, 30 Jul 2020 15:00:42 -0700 Subject: Call lseek(0, SEEK_CUR) unconditionally in runsc fsgofer's Readdir(offset=0). 9P2000.L is silent as to how readdir RPCs interact with directory mutation. The most performant option is for Treaddir with offset=0 to restart iteration, avoiding needing to walk+open+clunk a new directory fid between invocations of getdents64(2), and the VFS2 gofer client assumes this is the case. Make this actually true for the runsc fsgofer. Fixes #3344, #3345, #3355 PiperOrigin-RevId: 324090384 --- images/basic/hostoverlaytest/Dockerfile | 3 +- images/basic/hostoverlaytest/copy_up_testfile.txt | 1 + images/basic/hostoverlaytest/test.c | 88 ----------------------- images/basic/hostoverlaytest/test_copy_up.c | 88 +++++++++++++++++++++++ images/basic/hostoverlaytest/test_rewinddir.c | 78 ++++++++++++++++++++ images/basic/hostoverlaytest/testfile.txt | 1 - runsc/fsgofer/fsgofer.go | 9 ++- test/e2e/integration_test.go | 28 +++++++- 8 files changed, 202 insertions(+), 94 deletions(-) create mode 100644 images/basic/hostoverlaytest/copy_up_testfile.txt delete mode 100644 images/basic/hostoverlaytest/test.c create mode 100644 images/basic/hostoverlaytest/test_copy_up.c create mode 100644 images/basic/hostoverlaytest/test_rewinddir.c delete mode 100644 images/basic/hostoverlaytest/testfile.txt (limited to 'test') diff --git a/images/basic/hostoverlaytest/Dockerfile b/images/basic/hostoverlaytest/Dockerfile index d83439e9c..6cef1a542 100644 --- a/images/basic/hostoverlaytest/Dockerfile +++ b/images/basic/hostoverlaytest/Dockerfile @@ -4,4 +4,5 @@ WORKDIR /root COPY . . RUN apt-get update && apt-get install -y gcc -RUN gcc -O2 -o test test.c +RUN gcc -O2 -o test_copy_up test_copy_up.c +RUN gcc -O2 -o test_rewinddir test_rewinddir.c diff --git a/images/basic/hostoverlaytest/copy_up_testfile.txt b/images/basic/hostoverlaytest/copy_up_testfile.txt new file mode 100644 index 000000000..e4188c841 --- /dev/null +++ b/images/basic/hostoverlaytest/copy_up_testfile.txt @@ -0,0 +1 @@ +old data diff --git a/images/basic/hostoverlaytest/test.c b/images/basic/hostoverlaytest/test.c deleted file mode 100644 index 088f90746..000000000 --- a/images/basic/hostoverlaytest/test.c +++ /dev/null @@ -1,88 +0,0 @@ -#include -#include -#include -#include -#include -#include - -int main(int argc, char** argv) { - const char kTestFilePath[] = "testfile.txt"; - const char kOldFileData[] = "old data\n"; - const char kNewFileData[] = "new data\n"; - const size_t kPageSize = sysconf(_SC_PAGE_SIZE); - - // Open a file that already exists in a host overlayfs lower layer. - const int fd_rdonly = open(kTestFilePath, O_RDONLY); - if (fd_rdonly < 0) { - err(1, "open(%s, O_RDONLY)", kTestFilePath); - } - - // Check that the file's initial contents are what we expect when read via - // syscall. - char oldbuf[sizeof(kOldFileData)] = {}; - ssize_t n = pread(fd_rdonly, oldbuf, sizeof(oldbuf), 0); - if (n < 0) { - err(1, "initial pread"); - } - if (n != strlen(kOldFileData)) { - errx(1, "short initial pread (%ld/%lu bytes)", n, strlen(kOldFileData)); - } - if (strcmp(oldbuf, kOldFileData) != 0) { - errx(1, "initial pread returned wrong data: %s", oldbuf); - } - - // Check that the file's initial contents are what we expect when read via - // memory mapping. - void* page = mmap(NULL, kPageSize, PROT_READ, MAP_SHARED, fd_rdonly, 0); - if (page == MAP_FAILED) { - err(1, "mmap"); - } - if (strcmp(page, kOldFileData) != 0) { - errx(1, "mapping contains wrong initial data: %s", (const char*)page); - } - - // Open the same file writably, causing host overlayfs to copy it up, and - // replace its contents. - const int fd_rdwr = open(kTestFilePath, O_RDWR); - if (fd_rdwr < 0) { - err(1, "open(%s, O_RDWR)", kTestFilePath); - } - n = write(fd_rdwr, kNewFileData, strlen(kNewFileData)); - if (n < 0) { - err(1, "write"); - } - if (n != strlen(kNewFileData)) { - errx(1, "short write (%ld/%lu bytes)", n, strlen(kNewFileData)); - } - if (ftruncate(fd_rdwr, strlen(kNewFileData)) < 0) { - err(1, "truncate"); - } - - int failed = 0; - - // Check that syscalls on the old FD return updated contents. (Before Linux - // 4.18, this requires that runsc use a post-copy-up FD to service the read.) - char newbuf[sizeof(kNewFileData)] = {}; - n = pread(fd_rdonly, newbuf, sizeof(newbuf), 0); - if (n < 0) { - err(1, "final pread"); - } - if (n != strlen(kNewFileData)) { - warnx("short final pread (%ld/%lu bytes)", n, strlen(kNewFileData)); - failed = 1; - } else if (strcmp(newbuf, kNewFileData) != 0) { - warnx("final pread returned wrong data: %s", newbuf); - failed = 1; - } - - // Check that the memory mapping of the old FD has been updated. (Linux - // overlayfs does not do this, so regardless of kernel version this requires - // that runsc replace existing memory mappings with mappings of a - // post-copy-up FD.) - if (strcmp(page, kNewFileData) != 0) { - warnx("mapping contains wrong final data: %s", (const char*)page); - failed = 1; - } - - return failed; -} diff --git a/images/basic/hostoverlaytest/test_copy_up.c b/images/basic/hostoverlaytest/test_copy_up.c new file mode 100644 index 000000000..010b261dc --- /dev/null +++ b/images/basic/hostoverlaytest/test_copy_up.c @@ -0,0 +1,88 @@ +#include +#include +#include +#include +#include +#include + +int main(int argc, char** argv) { + const char kTestFilePath[] = "copy_up_testfile.txt"; + const char kOldFileData[] = "old data\n"; + const char kNewFileData[] = "new data\n"; + const size_t kPageSize = sysconf(_SC_PAGE_SIZE); + + // Open a file that already exists in a host overlayfs lower layer. + const int fd_rdonly = open(kTestFilePath, O_RDONLY); + if (fd_rdonly < 0) { + err(1, "open(%s, O_RDONLY)", kTestFilePath); + } + + // Check that the file's initial contents are what we expect when read via + // syscall. + char oldbuf[sizeof(kOldFileData)] = {}; + ssize_t n = pread(fd_rdonly, oldbuf, sizeof(oldbuf), 0); + if (n < 0) { + err(1, "initial pread"); + } + if (n != strlen(kOldFileData)) { + errx(1, "short initial pread (%ld/%lu bytes)", n, strlen(kOldFileData)); + } + if (strcmp(oldbuf, kOldFileData) != 0) { + errx(1, "initial pread returned wrong data: %s", oldbuf); + } + + // Check that the file's initial contents are what we expect when read via + // memory mapping. + void* page = mmap(NULL, kPageSize, PROT_READ, MAP_SHARED, fd_rdonly, 0); + if (page == MAP_FAILED) { + err(1, "mmap"); + } + if (strcmp(page, kOldFileData) != 0) { + errx(1, "mapping contains wrong initial data: %s", (const char*)page); + } + + // Open the same file writably, causing host overlayfs to copy it up, and + // replace its contents. + const int fd_rdwr = open(kTestFilePath, O_RDWR); + if (fd_rdwr < 0) { + err(1, "open(%s, O_RDWR)", kTestFilePath); + } + n = write(fd_rdwr, kNewFileData, strlen(kNewFileData)); + if (n < 0) { + err(1, "write"); + } + if (n != strlen(kNewFileData)) { + errx(1, "short write (%ld/%lu bytes)", n, strlen(kNewFileData)); + } + if (ftruncate(fd_rdwr, strlen(kNewFileData)) < 0) { + err(1, "truncate"); + } + + int failed = 0; + + // Check that syscalls on the old FD return updated contents. (Before Linux + // 4.18, this requires that runsc use a post-copy-up FD to service the read.) + char newbuf[sizeof(kNewFileData)] = {}; + n = pread(fd_rdonly, newbuf, sizeof(newbuf), 0); + if (n < 0) { + err(1, "final pread"); + } + if (n != strlen(kNewFileData)) { + warnx("short final pread (%ld/%lu bytes)", n, strlen(kNewFileData)); + failed = 1; + } else if (strcmp(newbuf, kNewFileData) != 0) { + warnx("final pread returned wrong data: %s", newbuf); + failed = 1; + } + + // Check that the memory mapping of the old FD has been updated. (Linux + // overlayfs does not do this, so regardless of kernel version this requires + // that runsc replace existing memory mappings with mappings of a + // post-copy-up FD.) + if (strcmp(page, kNewFileData) != 0) { + warnx("mapping contains wrong final data: %s", (const char*)page); + failed = 1; + } + + return failed; +} diff --git a/images/basic/hostoverlaytest/test_rewinddir.c b/images/basic/hostoverlaytest/test_rewinddir.c new file mode 100644 index 000000000..f1a4085e1 --- /dev/null +++ b/images/basic/hostoverlaytest/test_rewinddir.c @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include +#include +#include + +int main(int argc, char** argv) { + const char kDirPath[] = "rewinddir_test_dir"; + const char kFileBasename[] = "rewinddir_test_file"; + + // Create the test directory. + if (mkdir(kDirPath, 0755) < 0) { + err(1, "mkdir(%s)", kDirPath); + } + + // The test directory should initially be empty. + DIR* dir = opendir(kDirPath); + if (!dir) { + err(1, "opendir(%s)", kDirPath); + } + int failed = 0; + while (1) { + errno = 0; + struct dirent* d = readdir(dir); + if (!d) { + if (errno != 0) { + err(1, "readdir"); + } + break; + } + if (strcmp(d->d_name, ".") != 0 && strcmp(d->d_name, "..") != 0) { + warnx("unexpected file %s in new directory", d->d_name); + failed = 1; + } + } + + // Create a file in the test directory. + char* file_path = malloc(strlen(kDirPath) + 1 + strlen(kFileBasename)); + if (!file_path) { + errx(1, "malloc"); + } + strcpy(file_path, kDirPath); + file_path[strlen(kDirPath)] = '/'; + strcpy(file_path + strlen(kDirPath) + 1, kFileBasename); + if (mknod(file_path, 0644, 0) < 0) { + err(1, "mknod(%s)", file_path); + } + + // After rewinddir(), re-reading the directory stream should yield the new + // file. + rewinddir(dir); + size_t found_file = 0; + while (1) { + errno = 0; + struct dirent* d = readdir(dir); + if (!d) { + if (errno != 0) { + err(1, "readdir"); + } + break; + } + if (strcmp(d->d_name, kFileBasename) == 0) { + found_file++; + } else if (strcmp(d->d_name, ".") != 0 && strcmp(d->d_name, "..") != 0) { + warnx("unexpected file %s in new directory", d->d_name); + failed = 1; + } + } + if (found_file != 1) { + warnx("readdir returned file %s %zu times, wanted 1", kFileBasename, + found_file); + failed = 1; + } + + return failed; +} diff --git a/images/basic/hostoverlaytest/testfile.txt b/images/basic/hostoverlaytest/testfile.txt deleted file mode 100644 index e4188c841..000000000 --- a/images/basic/hostoverlaytest/testfile.txt +++ /dev/null @@ -1 +0,0 @@ -old data diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index ebefeacf2..c6694c278 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -979,9 +979,12 @@ func (l *localFile) Readdir(offset uint64, count uint32) ([]p9.Dirent, error) { skip := uint64(0) - // Check if the file is at the correct position already. If not, seek to the - // beginning and read the entire directory again. - if l.lastDirentOffset != offset { + // Check if the file is at the correct position already. If not, seek to + // the beginning and read the entire directory again. We always seek if + // offset is 0, since this is side-effectual (equivalent to rewinddir(3), + // which causes the directory stream to resynchronize with the directory's + // current contents). + if l.lastDirentOffset != offset || offset == 0 { if _, err := syscall.Seek(l.file.FD(), 0, 0); err != nil { return nil, extractErrno(err) } diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go index 5465dee9b..6fe6d304f 100644 --- a/test/e2e/integration_test.go +++ b/test/e2e/integration_test.go @@ -434,7 +434,33 @@ func TestHostOverlayfsCopyUp(t *testing.T) { if got, err := d.Run(ctx, dockerutil.RunOpts{ Image: "basic/hostoverlaytest", WorkDir: "/root", - }, "./test"); err != nil { + }, "./test_copy_up"); err != nil { + t.Fatalf("docker run failed: %v", err) + } else if got != "" { + t.Errorf("test failed:\n%s", got) + } +} + +// TestHostOverlayfsRewindDir tests that rewinddir() "causes the directory +// stream to refer to the current state of the corresponding directory, as a +// call to opendir() would have done" as required by POSIX, when the directory +// in question is host overlayfs. +// +// This test specifically targets host overlayfs because, per POSIX, "if a file +// is removed from or added to the directory after the most recent call to +// opendir() or rewinddir(), whether a subsequent call to readdir() returns an +// entry for that file is unspecified"; the host filesystems used by other +// automated tests yield newly-added files from readdir() even if the fsgofer +// does not explicitly rewinddir(), but overlayfs does not. +func TestHostOverlayfsRewindDir(t *testing.T) { + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) + + if got, err := d.Run(ctx, dockerutil.RunOpts{ + Image: "basic/hostoverlaytest", + WorkDir: "/root", + }, "./test_rewinddir"); err != nil { t.Fatalf("docker run failed: %v", err) } else if got != "" { t.Errorf("test failed:\n%s", got) -- cgit v1.2.3 From 98f9527c04dfc4af242080b5ea29e6da09290098 Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Thu, 30 Jul 2020 21:15:34 -0700 Subject: Port nginx and move parsers to own package. This change: - Ports the nginx benchmark. - Switches the Httpd benchmark to use 'hey' as a client. - Moves all parsers to their own package 'tools'. Parsers are moved to their own package because 1) parsing output of a command is often dependent on the format of the command (e.g. 'fio --json'), 2) to enable easier reuse, and 3) clean up and simplify actual running benchmarks (no TestParser functions and ugly sample output in benchmark files). PiperOrigin-RevId: 324144165 --- images/benchmarks/nginx/Dockerfile | 1 + test/benchmarks/database/BUILD | 1 + test/benchmarks/database/redis_test.go | 86 +---------- test/benchmarks/fs/BUILD | 1 + test/benchmarks/fs/fio_test.go | 257 ++++----------------------------- test/benchmarks/network/BUILD | 2 + test/benchmarks/network/httpd_test.go | 166 +++------------------ test/benchmarks/network/iperf_test.go | 49 +------ test/benchmarks/network/nginx_test.go | 104 +++++++++++++ test/benchmarks/network/node_test.go | 148 ++----------------- test/benchmarks/tools/BUILD | 29 ++++ test/benchmarks/tools/ab.go | 94 ++++++++++++ test/benchmarks/tools/ab_test.go | 90 ++++++++++++ test/benchmarks/tools/fio.go | 124 ++++++++++++++++ test/benchmarks/tools/fio_test.go | 122 ++++++++++++++++ test/benchmarks/tools/hey.go | 75 ++++++++++ test/benchmarks/tools/hey_test.go | 81 +++++++++++ test/benchmarks/tools/iperf.go | 56 +++++++ test/benchmarks/tools/iperf_test.go | 34 +++++ test/benchmarks/tools/redis.go | 64 ++++++++ test/benchmarks/tools/redis_test.go | 87 +++++++++++ test/benchmarks/tools/tools.go | 17 +++ 22 files changed, 1054 insertions(+), 634 deletions(-) create mode 100644 images/benchmarks/nginx/Dockerfile create mode 100644 test/benchmarks/network/nginx_test.go create mode 100644 test/benchmarks/tools/BUILD create mode 100644 test/benchmarks/tools/ab.go create mode 100644 test/benchmarks/tools/ab_test.go create mode 100644 test/benchmarks/tools/fio.go create mode 100644 test/benchmarks/tools/fio_test.go create mode 100644 test/benchmarks/tools/hey.go create mode 100644 test/benchmarks/tools/hey_test.go create mode 100644 test/benchmarks/tools/iperf.go create mode 100644 test/benchmarks/tools/iperf_test.go create mode 100644 test/benchmarks/tools/redis.go create mode 100644 test/benchmarks/tools/redis_test.go create mode 100644 test/benchmarks/tools/tools.go (limited to 'test') diff --git a/images/benchmarks/nginx/Dockerfile b/images/benchmarks/nginx/Dockerfile new file mode 100644 index 000000000..b64eb52ae --- /dev/null +++ b/images/benchmarks/nginx/Dockerfile @@ -0,0 +1 @@ +FROM nginx:1.15.10 diff --git a/test/benchmarks/database/BUILD b/test/benchmarks/database/BUILD index 5e33465cd..572db665f 100644 --- a/test/benchmarks/database/BUILD +++ b/test/benchmarks/database/BUILD @@ -24,5 +24,6 @@ go_test( deps = [ "//pkg/test/dockerutil", "//test/benchmarks/harness", + "//test/benchmarks/tools", ], ) diff --git a/test/benchmarks/database/redis_test.go b/test/benchmarks/database/redis_test.go index 6d39f4d66..394fce820 100644 --- a/test/benchmarks/database/redis_test.go +++ b/test/benchmarks/database/redis_test.go @@ -16,15 +16,12 @@ package database import ( "context" - "fmt" - "regexp" - "strconv" - "strings" "testing" "time" "gvisor.dev/gvisor/pkg/test/dockerutil" "gvisor.dev/gvisor/test/benchmarks/harness" + "gvisor.dev/gvisor/test/benchmarks/tools" ) // All possible operations from redis. Note: "ping" will @@ -99,16 +96,10 @@ func BenchmarkRedis(b *testing.B) { b.Fatalf("failed to start redis with: %v", err) } - // runs redis benchmark -t operation for 100K requests against server. - cmd := strings.Split( - fmt.Sprintf("redis-benchmark --csv -t %s -h %s -p %d", operation, ip, serverPort), " ") - - // There is no -t PING_BULK for redis-benchmark, so adjust the command in that case. - // Note that "ping" will run both PING_INLINE and PING_BULK. - if operation == "PING_BULK" { - cmd = strings.Split( - fmt.Sprintf("redis-benchmark --csv -t ping -h %s -p %d", ip, serverPort), " ") + redis := tools.Redis{ + Operation: operation, } + // Reset profiles and timer to begin the measurement. server.RestartProfiles() b.ResetTimer() @@ -117,81 +108,16 @@ func BenchmarkRedis(b *testing.B) { defer client.CleanUp(ctx) out, err := client.Run(ctx, dockerutil.RunOpts{ Image: "benchmarks/redis", - }, cmd...) + }, redis.MakeCmd(ip, serverPort)...) if err != nil { b.Fatalf("redis-benchmark failed with: %v", err) } // Stop time while we parse results. b.StopTimer() - result, err := parseOperation(operation, out) - if err != nil { - b.Fatalf("parsing result %s failed with err: %v", out, err) - } - b.ReportMetric(result, operation) // operations per second + redis.Report(b, out) b.StartTimer() } }) } } - -// parseOperation grabs the metric operations per second from redis-benchmark output. -func parseOperation(operation, data string) (float64, error) { - re := regexp.MustCompile(fmt.Sprintf(`"%s( .*)?","(\d*\.\d*)"`, operation)) - match := re.FindStringSubmatch(data) - // If no match, simply don't add it to the result map. - if len(match) < 3 { - return 0.0, fmt.Errorf("could not find %s in %s", operation, data) - } - return strconv.ParseFloat(match[2], 64) -} - -// TestParser tests the parser on sample data. -func TestParser(t *testing.T) { - sampleData := ` - "PING_INLINE","48661.80" - "PING_BULK","50301.81" - "SET","48923.68" - "GET","49382.71" - "INCR","49975.02" - "LPUSH","49875.31" - "RPUSH","50276.52" - "LPOP","50327.12" - "RPOP","50556.12" - "SADD","49504.95" - "HSET","49504.95" - "SPOP","50025.02" - "LPUSH (needed to benchmark LRANGE)","48875.86" - "LRANGE_100 (first 100 elements)","33955.86" - "LRANGE_300 (first 300 elements)","16550.81" - "LRANGE_500 (first 450 elements)","13653.74" - "LRANGE_600 (first 600 elements)","11219.57" - "MSET (10 keys)","44682.75" - ` - wants := map[string]float64{ - "PING_INLINE": 48661.80, - "PING_BULK": 50301.81, - "SET": 48923.68, - "GET": 49382.71, - "INCR": 49975.02, - "LPUSH": 49875.31, - "RPUSH": 50276.52, - "LPOP": 50327.12, - "RPOP": 50556.12, - "SADD": 49504.95, - "HSET": 49504.95, - "SPOP": 50025.02, - "LRANGE_100": 33955.86, - "LRANGE_300": 16550.81, - "LRANGE_500": 13653.74, - "LRANGE_600": 11219.57, - "MSET": 44682.75, - } - for op, want := range wants { - if got, err := parseOperation(op, sampleData); err != nil { - t.Fatalf("failed to parse %s: %v", op, err) - } else if want != got { - t.Fatalf("wanted %f for op %s, got %f", want, op, got) - } - } -} diff --git a/test/benchmarks/fs/BUILD b/test/benchmarks/fs/BUILD index 79327b57c..20654d88f 100644 --- a/test/benchmarks/fs/BUILD +++ b/test/benchmarks/fs/BUILD @@ -25,6 +25,7 @@ go_test( deps = [ "//pkg/test/dockerutil", "//test/benchmarks/harness", + "//test/benchmarks/tools", "@com_github_docker_docker//api/types/mount:go_default_library", ], ) diff --git a/test/benchmarks/fs/fio_test.go b/test/benchmarks/fs/fio_test.go index 75d52726a..65874ed8b 100644 --- a/test/benchmarks/fs/fio_test.go +++ b/test/benchmarks/fs/fio_test.go @@ -15,72 +15,47 @@ package fs import ( "context" - "encoding/json" "fmt" "path/filepath" - "strconv" "strings" "testing" "github.com/docker/docker/api/types/mount" "gvisor.dev/gvisor/pkg/test/dockerutil" "gvisor.dev/gvisor/test/benchmarks/harness" + "gvisor.dev/gvisor/test/benchmarks/tools" ) -type fioTestCase struct { - test string // test to run: read, write, randread, randwrite. - size string // total size to be read/written of format N[GMK] (e.g. 5G). - blocksize string // blocksize to be read/write of format N[GMK] (e.g. 4K). - iodepth int // iodepth for reads/writes. - time int // time to run the test in seconds, usually for rand(read/write). -} - -// makeCmdFromTestcase makes a fio command. -func (f *fioTestCase) makeCmdFromTestcase(filename string) []string { - cmd := []string{"fio", "--output-format=json", "--ioengine=sync"} - cmd = append(cmd, fmt.Sprintf("--name=%s", f.test)) - cmd = append(cmd, fmt.Sprintf("--size=%s", f.size)) - cmd = append(cmd, fmt.Sprintf("--blocksize=%s", f.blocksize)) - cmd = append(cmd, fmt.Sprintf("--filename=%s", filename)) - cmd = append(cmd, fmt.Sprintf("--iodepth=%d", f.iodepth)) - cmd = append(cmd, fmt.Sprintf("--rw=%s", f.test)) - if f.time != 0 { - cmd = append(cmd, "--time_based") - cmd = append(cmd, fmt.Sprintf("--runtime=%d", f.time)) - } - return cmd -} - // BenchmarkFio runs fio on the runtime under test. There are 4 basic test // cases each run on a tmpfs mount and a bind mount. Fio requires root so that // caches can be dropped. func BenchmarkFio(b *testing.B) { - testCases := []fioTestCase{ - fioTestCase{ - test: "write", - size: "5G", - blocksize: "1M", - iodepth: 4, + testCases := []tools.Fio{ + tools.Fio{ + Test: "write", + Size: "5G", + Blocksize: "1M", + Iodepth: 4, }, - fioTestCase{ - test: "read", - size: "5G", - blocksize: "1M", - iodepth: 4, + tools.Fio{ + Test: "read", + Size: "5G", + Blocksize: "1M", + Iodepth: 4, }, - fioTestCase{ - test: "randwrite", - size: "5G", - blocksize: "4K", - iodepth: 4, - time: 30, + tools.Fio{ + Test: "randwrite", + Size: "5G", + Blocksize: "4K", + Iodepth: 4, + Time: 30, }, - fioTestCase{ - test: "randread", - size: "5G", - blocksize: "4K", - iodepth: 4, - time: 30, + tools.Fio{ + Test: "randread", + Size: "5G", + Blocksize: "4K", + Iodepth: 4, + Time: 30, }, } @@ -92,7 +67,7 @@ func BenchmarkFio(b *testing.B) { for _, fsType := range []mount.Type{mount.TypeBind, mount.TypeTmpfs} { for _, tc := range testCases { - testName := strings.Title(tc.test) + strings.Title(string(fsType)) + testName := strings.Title(tc.Test) + strings.Title(string(fsType)) b.Run(testName, func(b *testing.B) { ctx := context.Background() container := machine.GetContainer(ctx, b) @@ -109,7 +84,6 @@ func BenchmarkFio(b *testing.B) { b.Fatalf("failed to make mount: %v", err) } defer mountCleanup() - cmd := tc.makeCmdFromTestcase(outfile) // Start the container with the mount. if err := container.Spawn( @@ -127,8 +101,8 @@ func BenchmarkFio(b *testing.B) { } // For reads, we need a file to read so make one inside the container. - if strings.Contains(tc.test, "read") { - fallocateCmd := fmt.Sprintf("fallocate -l %s %s", tc.size, outfile) + if strings.Contains(tc.Test, "read") { + fallocateCmd := fmt.Sprintf("fallocate -l %s %s", tc.Size, outfile) if out, err := container.Exec(ctx, dockerutil.ExecOpts{}, strings.Split(fallocateCmd, " ")...); err != nil { b.Fatalf("failed to create readable file on mount: %v, %s", err, out) @@ -139,6 +113,7 @@ func BenchmarkFio(b *testing.B) { if err := harness.DropCaches(machine); err != nil { b.Skipf("failed to drop caches with %v. You probably need root.", err) } + cmd := tc.MakeCmd(outfile) container.RestartProfiles() b.ResetTimer() for i := 0; i < b.N; i++ { @@ -148,19 +123,7 @@ func BenchmarkFio(b *testing.B) { b.Fatalf("failed to run cmd %v: %v", cmd, err) } b.StopTimer() - // Parse the output and report the metrics. - isRead := strings.Contains(tc.test, "read") - bw, err := parseBandwidth(data, isRead) - if err != nil { - b.Fatalf("failed to parse bandwidth from %s with: %v", data, err) - } - b.ReportMetric(bw, "bandwidth") // in b/s. - - iops, err := parseIOps(data, isRead) - if err != nil { - b.Fatalf("failed to parse iops from %s with: %v", data, err) - } - b.ReportMetric(iops, "iops") + tc.Report(b, data) // If b.N is used (i.e. we run for an hour), we should drop caches // after each run. if err := harness.DropCaches(machine); err != nil { @@ -205,165 +168,3 @@ func makeMount(machine harness.Machine, mountType mount.Type, target string) (mo return mount.Mount{}, func() {}, fmt.Errorf("illegal mount time not supported: %v", mountType) } } - -// parseBandwidth reports the bandwidth in b/s. -func parseBandwidth(data string, isRead bool) (float64, error) { - if isRead { - result, err := parseFioJSON(data, "read", "bw") - if err != nil { - return 0, err - } - return 1024 * result, nil - } - result, err := parseFioJSON(data, "write", "bw") - if err != nil { - return 0, err - } - return 1024 * result, nil -} - -// parseIOps reports the write IO per second metric. -func parseIOps(data string, isRead bool) (float64, error) { - if isRead { - return parseFioJSON(data, "read", "iops") - } - return parseFioJSON(data, "write", "iops") -} - -// fioResult is for parsing FioJSON. -type fioResult struct { - Jobs []fioJob -} - -// fioJob is for parsing FioJSON. -type fioJob map[string]json.RawMessage - -// fioMetrics is for parsing FioJSON. -type fioMetrics map[string]json.RawMessage - -// parseFioJSON parses data and grabs "op" (read or write) and "metric" -// (bw or iops) from the JSON. -func parseFioJSON(data, op, metric string) (float64, error) { - var result fioResult - if err := json.Unmarshal([]byte(data), &result); err != nil { - return 0, fmt.Errorf("could not unmarshal data: %v", err) - } - - if len(result.Jobs) < 1 { - return 0, fmt.Errorf("no jobs present to parse") - } - - var metrics fioMetrics - if err := json.Unmarshal(result.Jobs[0][op], &metrics); err != nil { - return 0, fmt.Errorf("could not unmarshal jobs: %v", err) - } - - if _, ok := metrics[metric]; !ok { - return 0, fmt.Errorf("no metric found for op: %s", op) - } - return strconv.ParseFloat(string(metrics[metric]), 64) -} - -// TestParsers tests that the parsers work on sampleData. -func TestParsers(t *testing.T) { - sampleData := ` -{ - "fio version" : "fio-3.1", - "timestamp" : 1554837456, - "timestamp_ms" : 1554837456621, - "time" : "Tue Apr 9 19:17:36 2019", - "jobs" : [ - { - "jobname" : "test", - "groupid" : 0, - "error" : 0, - "eta" : 2147483647, - "elapsed" : 1, - "job options" : { - "name" : "test", - "ioengine" : "sync", - "size" : "1073741824", - "filename" : "/disk/file.dat", - "iodepth" : "4", - "bs" : "4096", - "rw" : "write" - }, - "read" : { - "io_bytes" : 0, - "io_kbytes" : 0, - "bw" : 123456, - "iops" : 1234.5678, - "runtime" : 0, - "total_ios" : 0, - "short_ios" : 0, - "bw_min" : 0, - "bw_max" : 0, - "bw_agg" : 0.000000, - "bw_mean" : 0.000000, - "bw_dev" : 0.000000, - "bw_samples" : 0, - "iops_min" : 0, - "iops_max" : 0, - "iops_mean" : 0.000000, - "iops_stddev" : 0.000000, - "iops_samples" : 0 - }, - "write" : { - "io_bytes" : 1073741824, - "io_kbytes" : 1048576, - "bw" : 1753471, - "iops" : 438367.892977, - "runtime" : 598, - "total_ios" : 262144, - "bw_min" : 1731120, - "bw_max" : 1731120, - "bw_agg" : 98.725328, - "bw_mean" : 1731120.000000, - "bw_dev" : 0.000000, - "bw_samples" : 1, - "iops_min" : 432780, - "iops_max" : 432780, - "iops_mean" : 432780.000000, - "iops_stddev" : 0.000000, - "iops_samples" : 1 - } - } - ] -} -` - // WriteBandwidth. - got, err := parseBandwidth(sampleData, false) - var want float64 = 1753471.0 * 1024 - if err != nil { - t.Fatalf("parse failed with err: %v", err) - } else if got != want { - t.Fatalf("got: %f, want: %f", got, want) - } - - // ReadBandwidth. - got, err = parseBandwidth(sampleData, true) - want = 123456 * 1024 - if err != nil { - t.Fatalf("parse failed with err: %v", err) - } else if got != want { - t.Fatalf("got: %f, want: %f", got, want) - } - - // WriteIOps. - got, err = parseIOps(sampleData, false) - want = 438367.892977 - if err != nil { - t.Fatalf("parse failed with err: %v", err) - } else if got != want { - t.Fatalf("got: %f, want: %f", got, want) - } - - // ReadIOps. - got, err = parseIOps(sampleData, true) - want = 1234.5678 - if err != nil { - t.Fatalf("parse failed with err: %v", err) - } else if got != want { - t.Fatalf("got: %f, want: %f", got, want) - } -} diff --git a/test/benchmarks/network/BUILD b/test/benchmarks/network/BUILD index b47400590..d15cd55ee 100644 --- a/test/benchmarks/network/BUILD +++ b/test/benchmarks/network/BUILD @@ -15,6 +15,7 @@ go_test( srcs = [ "httpd_test.go", "iperf_test.go", + "nginx_test.go", "node_test.go", ], library = ":network", @@ -27,5 +28,6 @@ go_test( "//pkg/test/dockerutil", "//pkg/test/testutil", "//test/benchmarks/harness", + "//test/benchmarks/tools", ], ) diff --git a/test/benchmarks/network/httpd_test.go b/test/benchmarks/network/httpd_test.go index fe23ca949..07833f9cd 100644 --- a/test/benchmarks/network/httpd_test.go +++ b/test/benchmarks/network/httpd_test.go @@ -16,12 +16,11 @@ package network import ( "context" "fmt" - "regexp" - "strconv" "testing" "gvisor.dev/gvisor/pkg/test/dockerutil" "gvisor.dev/gvisor/test/benchmarks/harness" + "gvisor.dev/gvisor/test/benchmarks/tools" ) // see Dockerfile '//images/benchmarks/httpd'. @@ -52,13 +51,16 @@ func BenchmarkHttpdConcurrency(b *testing.B) { defer serverMachine.CleanUp() // The test iterates over client concurrency, so set other parameters. - requests := 10000 concurrency := []int{1, 5, 10, 25} - doc := docs["10Kb"] for _, c := range concurrency { b.Run(fmt.Sprintf("%d", c), func(b *testing.B) { - runHttpd(b, clientMachine, serverMachine, doc, requests, c) + hey := &tools.Hey{ + Requests: 10000, + Concurrency: c, + Doc: docs["10Kb"], + } + runHttpd(b, clientMachine, serverMachine, hey) }) } } @@ -78,18 +80,20 @@ func BenchmarkHttpdDocSize(b *testing.B) { } defer serverMachine.CleanUp() - requests := 10000 - concurrency := 1 - for name, filename := range docs { b.Run(name, func(b *testing.B) { - runHttpd(b, clientMachine, serverMachine, filename, requests, concurrency) + hey := &tools.Hey{ + Requests: 10000, + Concurrency: 1, + Doc: filename, + } + runHttpd(b, clientMachine, serverMachine, hey) }) } } // runHttpd runs a single test run. -func runHttpd(b *testing.B, clientMachine, serverMachine harness.Machine, doc string, requests, concurrency int) { +func runHttpd(b *testing.B, clientMachine, serverMachine harness.Machine, hey *tools.Hey) { b.Helper() // Grab a container from the server. @@ -98,11 +102,11 @@ func runHttpd(b *testing.B, clientMachine, serverMachine harness.Machine, doc st defer server.CleanUp(ctx) // Copy the docs to /tmp and serve from there. - cmd := "mkdir -p /tmp/html; cp -r /local /tmp/html/.; apache2 -X" + cmd := "mkdir -p /tmp/html; cp -r /local/* /tmp/html/.; apache2 -X" port := 80 // Start the server. - server.Spawn(ctx, dockerutil.RunOpts{ + if err := server.Spawn(ctx, dockerutil.RunOpts{ Image: "benchmarks/httpd", Ports: []int{port}, Env: []string{ @@ -113,7 +117,9 @@ func runHttpd(b *testing.B, clientMachine, serverMachine harness.Machine, doc st "APACHE_LOG_DIR=/tmp", "APACHE_PID_FILE=/tmp/apache.pid", }, - }, "sh", "-c", cmd) + }, "sh", "-c", cmd); err != nil { + b.Fatalf("failed to start server: %v") + } ip, err := serverMachine.IPAddress() if err != nil { @@ -132,146 +138,18 @@ func runHttpd(b *testing.B, clientMachine, serverMachine harness.Machine, doc st client := clientMachine.GetNativeContainer(ctx, b) defer client.CleanUp(ctx) - path := fmt.Sprintf("http://%s:%d/%s", ip, servingPort, doc) - // See apachebench (ab) for flags. - cmd = fmt.Sprintf("ab -n %d -c %d %s", requests, concurrency, path) - b.ResetTimer() server.RestartProfiles() for i := 0; i < b.N; i++ { out, err := client.Run(ctx, dockerutil.RunOpts{ - Image: "benchmarks/ab", - }, "sh", "-c", cmd) + Image: "benchmarks/hey", + }, hey.MakeCmd(ip, servingPort)...) if err != nil { b.Fatalf("run failed with: %v", err) } b.StopTimer() - - // Parse and report custom metrics. - transferRate, err := parseTransferRate(out) - if err != nil { - b.Logf("failed to parse transferrate: %v", err) - } - b.ReportMetric(transferRate*1024, "transfer_rate") // Convert from Kb/s to b/s. - - latency, err := parseLatency(out) - if err != nil { - b.Logf("failed to parse latency: %v", err) - } - b.ReportMetric(latency/1000, "mean_latency") // Convert from ms to s. - - reqPerSecond, err := parseRequestsPerSecond(out) - if err != nil { - b.Logf("failed to parse requests per second: %v", err) - } - b.ReportMetric(reqPerSecond, "requests_per_second") - + hey.Report(b, out) b.StartTimer() } } - -var transferRateRE = regexp.MustCompile(`Transfer rate:\s+(\d+\.?\d+?)\s+\[Kbytes/sec\]\s+received`) - -// parseTransferRate parses transfer rate from apachebench output. -func parseTransferRate(data string) (float64, error) { - match := transferRateRE.FindStringSubmatch(data) - if len(match) < 2 { - return 0, fmt.Errorf("failed get bandwidth: %s", data) - } - return strconv.ParseFloat(match[1], 64) -} - -var latencyRE = regexp.MustCompile(`Total:\s+\d+\s+(\d+)\s+(\d+\.?\d+?)\s+\d+\s+\d+\s`) - -// parseLatency parses latency from apachebench output. -func parseLatency(data string) (float64, error) { - match := latencyRE.FindStringSubmatch(data) - if len(match) < 2 { - return 0, fmt.Errorf("failed get bandwidth: %s", data) - } - return strconv.ParseFloat(match[1], 64) -} - -var requestsPerSecondRE = regexp.MustCompile(`Requests per second:\s+(\d+\.?\d+?)\s+`) - -// parseRequestsPerSecond parses requests per second from apachebench output. -func parseRequestsPerSecond(data string) (float64, error) { - match := requestsPerSecondRE.FindStringSubmatch(data) - if len(match) < 2 { - return 0, fmt.Errorf("failed get bandwidth: %s", data) - } - return strconv.ParseFloat(match[1], 64) -} - -// Sample output from apachebench. -const sampleData = `This is ApacheBench, Version 2.3 <$Revision: 1826891 $> -Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/ -Licensed to The Apache Software Foundation, http://www.apache.org/ - -Benchmarking 10.10.10.10 (be patient).....done - - -Server Software: Apache/2.4.38 -Server Hostname: 10.10.10.10 -Server Port: 80 - -Document Path: /latin10k.txt -Document Length: 210 bytes - -Concurrency Level: 1 -Time taken for tests: 0.180 seconds -Complete requests: 100 -Failed requests: 0 -Non-2xx responses: 100 -Total transferred: 38800 bytes -HTML transferred: 21000 bytes -Requests per second: 556.44 [#/sec] (mean) -Time per request: 1.797 [ms] (mean) -Time per request: 1.797 [ms] (mean, across all concurrent requests) -Transfer rate: 210.84 [Kbytes/sec] received - -Connection Times (ms) - min mean[+/-sd] median max -Connect: 0 0 0.2 0 2 -Processing: 1 2 1.0 1 8 -Waiting: 1 1 1.0 1 7 -Total: 1 2 1.2 1 10 - -Percentage of the requests served within a certain time (ms) - 50% 1 - 66% 2 - 75% 2 - 80% 2 - 90% 2 - 95% 3 - 98% 7 - 99% 10 - 100% 10 (longest request)` - -// TestParsers checks the parsers work. -func TestParsers(t *testing.T) { - want := 210.84 - got, err := parseTransferRate(sampleData) - if err != nil { - t.Fatalf("failed to parse transfer rate with error: %v", err) - } else if got != want { - t.Fatalf("parseTransferRate got: %f, want: %f", got, want) - } - - want = 2.0 - got, err = parseLatency(sampleData) - if err != nil { - t.Fatalf("failed to parse transfer rate with error: %v", err) - } else if got != want { - t.Fatalf("parseLatency got: %f, want: %f", got, want) - } - - want = 556.44 - got, err = parseRequestsPerSecond(sampleData) - if err != nil { - t.Fatalf("failed to parse transfer rate with error: %v", err) - } else if got != want { - t.Fatalf("parseRequestsPerSecond got: %f, want: %f", got, want) - } -} diff --git a/test/benchmarks/network/iperf_test.go b/test/benchmarks/network/iperf_test.go index a5e198e14..b8ab7dfb8 100644 --- a/test/benchmarks/network/iperf_test.go +++ b/test/benchmarks/network/iperf_test.go @@ -15,19 +15,18 @@ package network import ( "context" - "fmt" - "regexp" - "strconv" - "strings" "testing" "gvisor.dev/gvisor/pkg/test/dockerutil" "gvisor.dev/gvisor/pkg/test/testutil" "gvisor.dev/gvisor/test/benchmarks/harness" + "gvisor.dev/gvisor/test/benchmarks/tools" ) func BenchmarkIperf(b *testing.B) { - const time = 10 // time in seconds to run the client. + iperf := tools.Iperf{ + Time: 10, // time in seconds to run client. + } clientMachine, err := h.GetMachine() if err != nil { @@ -92,10 +91,6 @@ func BenchmarkIperf(b *testing.B) { if err := harness.WaitUntilServing(ctx, clientMachine, ip, servingPort); err != nil { b.Fatalf("failed to wait for server: %v", err) } - - // iperf report in Kb realtime - cmd := fmt.Sprintf("iperf -f K --realtime --time %d -c %s -p %d", time, ip.String(), servingPort) - // Run the client. b.ResetTimer() @@ -105,46 +100,14 @@ func BenchmarkIperf(b *testing.B) { for i := 0; i < b.N; i++ { out, err := client.Run(ctx, dockerutil.RunOpts{ Image: "benchmarks/iperf", - }, strings.Split(cmd, " ")...) + }, iperf.MakeCmd(ip, servingPort)...) if err != nil { b.Fatalf("failed to run client: %v", err) } b.StopTimer() - - // Parse bandwidth and report it. - bW, err := bandwidth(out) - if err != nil { - b.Fatalf("failed to parse bandwitdth from %s: %v", out, err) - } - b.ReportMetric(bW*1024, "bandwidth") // Convert from Kb/s to b/s. + iperf.Report(b, out) b.StartTimer() } }) } } - -// bandwidth parses the Bandwidth number from an iperf report. A sample is below. -func bandwidth(data string) (float64, error) { - re := regexp.MustCompile(`\[\s*\d+\][^\n]+\s+(\d+\.?\d*)\s+KBytes/sec`) - match := re.FindStringSubmatch(data) - if len(match) < 1 { - return 0, fmt.Errorf("failed get bandwidth: %s", data) - } - return strconv.ParseFloat(match[1], 64) -} - -func TestParser(t *testing.T) { - sampleData := ` ------------------------------------------------------------- -Client connecting to 10.138.15.215, TCP port 32779 -TCP window size: 45.0 KByte (default) ------------------------------------------------------------- -[ 3] local 10.138.15.216 port 32866 connected with 10.138.15.215 port 32779 -[ ID] Interval Transfer Bandwidth -[ 3] 0.0-10.0 sec 459520 KBytes 45900 KBytes/sec -` - bandwidth, err := bandwidth(sampleData) - if err != nil || bandwidth != 45900 { - t.Fatalf("failed with: %v and %f", err, bandwidth) - } -} diff --git a/test/benchmarks/network/nginx_test.go b/test/benchmarks/network/nginx_test.go new file mode 100644 index 000000000..5965652a5 --- /dev/null +++ b/test/benchmarks/network/nginx_test.go @@ -0,0 +1,104 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package network + +import ( + "context" + "fmt" + "testing" + + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/test/benchmarks/harness" + "gvisor.dev/gvisor/test/benchmarks/tools" +) + +// BenchmarkNginxConcurrency iterates the concurrency argument and tests +// how well the runtime under test handles requests in parallel. +// TODO(zkoopmans): Update with different doc sizes like Httpd. +func BenchmarkNginxConcurrency(b *testing.B) { + // Grab a machine for the client and server. + clientMachine, err := h.GetMachine() + if err != nil { + b.Fatalf("failed to get client: %v", err) + } + defer clientMachine.CleanUp() + + serverMachine, err := h.GetMachine() + if err != nil { + b.Fatalf("failed to get server: %v", err) + } + defer serverMachine.CleanUp() + + concurrency := []int{1, 5, 10, 25} + for _, c := range concurrency { + b.Run(fmt.Sprintf("%d", c), func(b *testing.B) { + hey := &tools.Hey{ + Requests: 10000, + Concurrency: c, + } + runNginx(b, clientMachine, serverMachine, hey) + }) + } +} + +// runHttpd runs a single test run. +func runNginx(b *testing.B, clientMachine, serverMachine harness.Machine, hey *tools.Hey) { + b.Helper() + + // Grab a container from the server. + ctx := context.Background() + server := serverMachine.GetContainer(ctx, b) + defer server.CleanUp(ctx) + + port := 80 + // Start the server. + if err := server.Spawn(ctx, + dockerutil.RunOpts{ + Image: "benchmarks/nginx", + Ports: []int{port}, + }); err != nil { + b.Fatalf("server failed to start: %v", err) + } + + ip, err := serverMachine.IPAddress() + if err != nil { + b.Fatalf("failed to find server ip: %v", err) + } + + servingPort, err := server.FindPort(ctx, port) + if err != nil { + b.Fatalf("failed to find server port %d: %v", port, err) + } + + // Check the server is serving. + harness.WaitUntilServing(ctx, clientMachine, ip, servingPort) + + // Grab a client. + client := clientMachine.GetNativeContainer(ctx, b) + defer client.CleanUp(ctx) + + b.ResetTimer() + server.RestartProfiles() + for i := 0; i < b.N; i++ { + out, err := client.Run(ctx, dockerutil.RunOpts{ + Image: "benchmarks/hey", + }, hey.MakeCmd(ip, servingPort)...) + if err != nil { + b.Fatalf("run failed with: %v", err) + } + b.StopTimer() + hey.Report(b, out) + b.StartTimer() + } +} diff --git a/test/benchmarks/network/node_test.go b/test/benchmarks/network/node_test.go index f9278ab66..5b568cfe5 100644 --- a/test/benchmarks/network/node_test.go +++ b/test/benchmarks/network/node_test.go @@ -16,14 +16,12 @@ package network import ( "context" "fmt" - "regexp" - "strconv" - "strings" "testing" "time" "gvisor.dev/gvisor/pkg/test/dockerutil" "gvisor.dev/gvisor/test/benchmarks/harness" + "gvisor.dev/gvisor/test/benchmarks/tools" ) // BenchmarkNode runs 10K requests using 'hey' against a Node server run on @@ -36,13 +34,17 @@ func BenchmarkNode(b *testing.B) { for _, c := range concurrency { b.Run(fmt.Sprintf("Concurrency%d", c), func(b *testing.B) { - runNode(b, requests, c) + hey := &tools.Hey{ + Requests: requests, + Concurrency: c, + } + runNode(b, hey) }) } } // runNode runs the test for a given # of requests and concurrency. -func runNode(b *testing.B, requests, concurrency int) { +func runNode(b *testing.B, hey *tools.Hey) { b.Helper() // The machine to hold Redis and the Node Server. @@ -106,7 +108,7 @@ func runNode(b *testing.B, requests, concurrency int) { // Wait until the Client sees the server as up. harness.WaitUntilServing(ctx, clientMachine, servingIP, servingPort) - heyCmd := strings.Split(fmt.Sprintf("hey -n %d -c %d http://%s:%d/", requests, concurrency, servingIP, servingPort), " ") + heyCmd := hey.MakeCmd(servingIP, servingPort) nodeApp.RestartProfiles() b.ResetTimer() @@ -123,139 +125,7 @@ func runNode(b *testing.B, requests, concurrency int) { // Stop the timer to parse the data and report stats. b.StopTimer() - requests, err := parseHeyRequestsPerSecond(out) - if err != nil { - b.Fatalf("failed to parse requests per second: %v", err) - } - b.ReportMetric(requests, "requests_per_second") - - bw, err := parseHeyBandwidth(out) - if err != nil { - b.Fatalf("failed to parse bandwidth: %v", err) - } - b.ReportMetric(bw, "bandwidth") - - ave, err := parseHeyAverageLatency(out) - if err != nil { - b.Fatalf("failed to parse average latency: %v", err) - } - b.ReportMetric(ave, "average_latency") + hey.Report(b, out) b.StartTimer() } } - -var heyReqPerSecondRE = regexp.MustCompile(`Requests/sec:\s*(\d+\.?\d+?)\s+`) - -// parseHeyRequestsPerSecond finds requests per second from hey output. -func parseHeyRequestsPerSecond(data string) (float64, error) { - match := heyReqPerSecondRE.FindStringSubmatch(data) - if len(match) < 2 { - return 0, fmt.Errorf("failed get bandwidth: %s", data) - } - return strconv.ParseFloat(match[1], 64) -} - -var heyAverageLatencyRE = regexp.MustCompile(`Average:\s*(\d+\.?\d+?)\s+secs`) - -// parseHeyAverageLatency finds Average Latency in seconds form hey output. -func parseHeyAverageLatency(data string) (float64, error) { - match := heyAverageLatencyRE.FindStringSubmatch(data) - if len(match) < 2 { - return 0, fmt.Errorf("failed get average latency match%d : %s", len(match), data) - } - return strconv.ParseFloat(match[1], 64) -} - -var heySizePerRequestRE = regexp.MustCompile(`Size/request:\s*(\d+\.?\d+?)\s+bytes`) - -// parseHeyBandwidth computes bandwidth from request/sec * bytes/request -// and reports in bytes/second. -func parseHeyBandwidth(data string) (float64, error) { - match := heyReqPerSecondRE.FindStringSubmatch(data) - if len(match) < 2 { - return 0, fmt.Errorf("failed get requests per second: %s", data) - } - reqPerSecond, err := strconv.ParseFloat(match[1], 64) - if err != nil { - return 0, fmt.Errorf("failed to convert %s to float", match[1]) - } - - match = heySizePerRequestRE.FindStringSubmatch(data) - if len(match) < 2 { - return 0, fmt.Errorf("failed get average latency: %s", data) - } - requestSize, err := strconv.ParseFloat(match[1], 64) - return requestSize * reqPerSecond, err -} - -// TestHeyParsers tests that the parsers work with sample output. -func TestHeyParsers(t *testing.T) { - sampleData := ` - Summary: - Total: 2.2391 secs - Slowest: 1.6292 secs - Fastest: 0.0066 secs - Average: 0.5351 secs - Requests/sec: 89.3202 - - Total data: 841200 bytes - Size/request: 4206 bytes - - Response time histogram: - 0.007 [1] | - 0.169 [0] | - 0.331 [149] |â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â–  - 0.493 [0] | - 0.656 [0] | - 0.818 [0] | - 0.980 [0] | - 1.142 [0] | - 1.305 [0] | - 1.467 [49] |â– â– â– â– â– â– â– â– â– â– â– â– â–  - 1.629 [1] | - - - Latency distribution: - 10% in 0.2149 secs - 25% in 0.2449 secs - 50% in 0.2703 secs - 75% in 1.3315 secs - 90% in 1.4045 secs - 95% in 1.4232 secs - 99% in 1.4362 secs - - Details (average, fastest, slowest): - DNS+dialup: 0.0002 secs, 0.0066 secs, 1.6292 secs - DNS-lookup: 0.0000 secs, 0.0000 secs, 0.0000 secs - req write: 0.0000 secs, 0.0000 secs, 0.0012 secs - resp wait: 0.5225 secs, 0.0064 secs, 1.4346 secs - resp read: 0.0122 secs, 0.0001 secs, 0.2006 secs - - Status code distribution: - [200] 200 responses - ` - want := 89.3202 - got, err := parseHeyRequestsPerSecond(sampleData) - if err != nil { - t.Fatalf("failed to parse request per second with: %v", err) - } else if got != want { - t.Fatalf("got: %f, want: %f", got, want) - } - - want = 89.3202 * 4206 - got, err = parseHeyBandwidth(sampleData) - if err != nil { - t.Fatalf("failed to parse bandwidth with: %v", err) - } else if got != want { - t.Fatalf("got: %f, want: %f", got, want) - } - - want = 0.5351 - got, err = parseHeyAverageLatency(sampleData) - if err != nil { - t.Fatalf("failed to parse average latency with: %v", err) - } else if got != want { - t.Fatalf("got: %f, want: %f", got, want) - } - -} diff --git a/test/benchmarks/tools/BUILD b/test/benchmarks/tools/BUILD new file mode 100644 index 000000000..4358551bc --- /dev/null +++ b/test/benchmarks/tools/BUILD @@ -0,0 +1,29 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "tools", + srcs = [ + "ab.go", + "fio.go", + "hey.go", + "iperf.go", + "redis.go", + "tools.go", + ], + visibility = ["//:sandbox"], +) + +go_test( + name = "tools_test", + size = "small", + srcs = [ + "ab_test.go", + "fio_test.go", + "hey_test.go", + "iperf_test.go", + "redis_test.go", + ], + library = ":tools", +) diff --git a/test/benchmarks/tools/ab.go b/test/benchmarks/tools/ab.go new file mode 100644 index 000000000..4cc9c3bce --- /dev/null +++ b/test/benchmarks/tools/ab.go @@ -0,0 +1,94 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tools + +import ( + "fmt" + "net" + "regexp" + "strconv" + "testing" +) + +// ApacheBench is for the client application ApacheBench. +type ApacheBench struct { + Requests int + Concurrency int + Doc string + // TODO(zkoopmans): support KeepAlive and pass option to enable. +} + +// MakeCmd makes an ApacheBench command. +func (a *ApacheBench) MakeCmd(ip net.IP, port int) []string { + path := fmt.Sprintf("http://%s:%d/%s", ip, port, a.Doc) + // See apachebench (ab) for flags. + cmd := fmt.Sprintf("ab -n %d -c %d %s", a.Requests, a.Concurrency, path) + return []string{"sh", "-c", cmd} +} + +// Report parses and reports metrics from ApacheBench output. +func (a *ApacheBench) Report(b *testing.B, output string) { + // Parse and report custom metrics. + transferRate, err := a.parseTransferRate(output) + if err != nil { + b.Logf("failed to parse transferrate: %v", err) + } + b.ReportMetric(transferRate*1024, "transfer_rate_b/s") // Convert from Kb/s to b/s. + + latency, err := a.parseLatency(output) + if err != nil { + b.Logf("failed to parse latency: %v", err) + } + b.ReportMetric(latency/1000, "mean_latency_secs") // Convert from ms to s. + + reqPerSecond, err := a.parseRequestsPerSecond(output) + if err != nil { + b.Logf("failed to parse requests per second: %v", err) + } + b.ReportMetric(reqPerSecond, "requests_per_second") +} + +var transferRateRE = regexp.MustCompile(`Transfer rate:\s+(\d+\.?\d+?)\s+\[Kbytes/sec\]\s+received`) + +// parseTransferRate parses transfer rate from ApacheBench output. +func (a *ApacheBench) parseTransferRate(data string) (float64, error) { + match := transferRateRE.FindStringSubmatch(data) + if len(match) < 2 { + return 0, fmt.Errorf("failed get bandwidth: %s", data) + } + return strconv.ParseFloat(match[1], 64) +} + +var latencyRE = regexp.MustCompile(`Total:\s+\d+\s+(\d+)\s+(\d+\.?\d+?)\s+\d+\s+\d+\s`) + +// parseLatency parses latency from ApacheBench output. +func (a *ApacheBench) parseLatency(data string) (float64, error) { + match := latencyRE.FindStringSubmatch(data) + if len(match) < 2 { + return 0, fmt.Errorf("failed get bandwidth: %s", data) + } + return strconv.ParseFloat(match[1], 64) +} + +var requestsPerSecondRE = regexp.MustCompile(`Requests per second:\s+(\d+\.?\d+?)\s+`) + +// parseRequestsPerSecond parses requests per second from ApacheBench output. +func (a *ApacheBench) parseRequestsPerSecond(data string) (float64, error) { + match := requestsPerSecondRE.FindStringSubmatch(data) + if len(match) < 2 { + return 0, fmt.Errorf("failed get bandwidth: %s", data) + } + return strconv.ParseFloat(match[1], 64) +} diff --git a/test/benchmarks/tools/ab_test.go b/test/benchmarks/tools/ab_test.go new file mode 100644 index 000000000..28ee66ec1 --- /dev/null +++ b/test/benchmarks/tools/ab_test.go @@ -0,0 +1,90 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tools + +import "testing" + +// TestApacheBench checks the ApacheBench parsers on sample output. +func TestApacheBench(t *testing.T) { + // Sample output from apachebench. + sampleData := `This is ApacheBench, Version 2.3 <$Revision: 1826891 $> +Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/ +Licensed to The Apache Software Foundation, http://www.apache.org/ + +Benchmarking 10.10.10.10 (be patient).....done + + +Server Software: Apache/2.4.38 +Server Hostname: 10.10.10.10 +Server Port: 80 + +Document Path: /latin10k.txt +Document Length: 210 bytes + +Concurrency Level: 1 +Time taken for tests: 0.180 seconds +Complete requests: 100 +Failed requests: 0 +Non-2xx responses: 100 +Total transferred: 38800 bytes +HTML transferred: 21000 bytes +Requests per second: 556.44 [#/sec] (mean) +Time per request: 1.797 [ms] (mean) +Time per request: 1.797 [ms] (mean, across all concurrent requests) +Transfer rate: 210.84 [Kbytes/sec] received + +Connection Times (ms) + min mean[+/-sd] median max +Connect: 0 0 0.2 0 2 +Processing: 1 2 1.0 1 8 +Waiting: 1 1 1.0 1 7 +Total: 1 2 1.2 1 10 + +Percentage of the requests served within a certain time (ms) + 50% 1 + 66% 2 + 75% 2 + 80% 2 + 90% 2 + 95% 3 + 98% 7 + 99% 10 + 100% 10 (longest request)` + + ab := ApacheBench{} + want := 210.84 + got, err := ab.parseTransferRate(sampleData) + if err != nil { + t.Fatalf("failed to parse transfer rate with error: %v", err) + } else if got != want { + t.Fatalf("parseTransferRate got: %f, want: %f", got, want) + } + + want = 2.0 + got, err = ab.parseLatency(sampleData) + if err != nil { + t.Fatalf("failed to parse transfer rate with error: %v", err) + } else if got != want { + t.Fatalf("parseLatency got: %f, want: %f", got, want) + } + + want = 556.44 + got, err = ab.parseRequestsPerSecond(sampleData) + if err != nil { + t.Fatalf("failed to parse transfer rate with error: %v", err) + } else if got != want { + t.Fatalf("parseRequestsPerSecond got: %f, want: %f", got, want) + } +} diff --git a/test/benchmarks/tools/fio.go b/test/benchmarks/tools/fio.go new file mode 100644 index 000000000..20000db16 --- /dev/null +++ b/test/benchmarks/tools/fio.go @@ -0,0 +1,124 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tools + +import ( + "encoding/json" + "fmt" + "strconv" + "strings" + "testing" +) + +// Fio makes 'fio' commands and parses their output. +type Fio struct { + Test string // test to run: read, write, randread, randwrite. + Size string // total size to be read/written of format N[GMK] (e.g. 5G). + Blocksize string // blocksize to be read/write of format N[GMK] (e.g. 4K). + Iodepth int // iodepth for reads/writes. + Time int // time to run the test in seconds, usually for rand(read/write). +} + +// MakeCmd makes a 'fio' command. +func (f *Fio) MakeCmd(filename string) []string { + cmd := []string{"fio", "--output-format=json", "--ioengine=sync"} + cmd = append(cmd, fmt.Sprintf("--name=%s", f.Test)) + cmd = append(cmd, fmt.Sprintf("--size=%s", f.Size)) + cmd = append(cmd, fmt.Sprintf("--blocksize=%s", f.Blocksize)) + cmd = append(cmd, fmt.Sprintf("--filename=%s", filename)) + cmd = append(cmd, fmt.Sprintf("--iodepth=%d", f.Iodepth)) + cmd = append(cmd, fmt.Sprintf("--rw=%s", f.Test)) + if f.Time != 0 { + cmd = append(cmd, "--time_based") + cmd = append(cmd, fmt.Sprintf("--runtime=%d", f.Time)) + } + return cmd +} + +// Report reports metrics based on output from an 'fio' command. +func (f *Fio) Report(b *testing.B, output string) { + b.Helper() + // Parse the output and report the metrics. + isRead := strings.Contains(f.Test, "read") + bw, err := f.parseBandwidth(output, isRead) + if err != nil { + b.Fatalf("failed to parse bandwidth from %s with: %v", output, err) + } + b.ReportMetric(bw, "bandwidth_b/s") // in b/s. + + iops, err := f.parseIOps(output, isRead) + if err != nil { + b.Fatalf("failed to parse iops from %s with: %v", output, err) + } + b.ReportMetric(iops, "iops") +} + +// parseBandwidth reports the bandwidth in b/s. +func (f *Fio) parseBandwidth(data string, isRead bool) (float64, error) { + if isRead { + result, err := f.parseFioJSON(data, "read", "bw") + if err != nil { + return 0, err + } + return 1024 * result, nil + } + result, err := f.parseFioJSON(data, "write", "bw") + if err != nil { + return 0, err + } + return 1024 * result, nil +} + +// parseIOps reports the write IO per second metric. +func (f *Fio) parseIOps(data string, isRead bool) (float64, error) { + if isRead { + return f.parseFioJSON(data, "read", "iops") + } + return f.parseFioJSON(data, "write", "iops") +} + +// fioResult is for parsing FioJSON. +type fioResult struct { + Jobs []fioJob +} + +// fioJob is for parsing FioJSON. +type fioJob map[string]json.RawMessage + +// fioMetrics is for parsing FioJSON. +type fioMetrics map[string]json.RawMessage + +// parseFioJSON parses data and grabs "op" (read or write) and "metric" +// (bw or iops) from the JSON. +func (f *Fio) parseFioJSON(data, op, metric string) (float64, error) { + var result fioResult + if err := json.Unmarshal([]byte(data), &result); err != nil { + return 0, fmt.Errorf("could not unmarshal data: %v", err) + } + + if len(result.Jobs) < 1 { + return 0, fmt.Errorf("no jobs present to parse") + } + + var metrics fioMetrics + if err := json.Unmarshal(result.Jobs[0][op], &metrics); err != nil { + return 0, fmt.Errorf("could not unmarshal jobs: %v", err) + } + + if _, ok := metrics[metric]; !ok { + return 0, fmt.Errorf("no metric found for op: %s", op) + } + return strconv.ParseFloat(string(metrics[metric]), 64) +} diff --git a/test/benchmarks/tools/fio_test.go b/test/benchmarks/tools/fio_test.go new file mode 100644 index 000000000..a98277150 --- /dev/null +++ b/test/benchmarks/tools/fio_test.go @@ -0,0 +1,122 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tools + +import "testing" + +// TestFio checks the Fio parsers on sample output. +func TestFio(t *testing.T) { + sampleData := ` +{ + "fio version" : "fio-3.1", + "timestamp" : 1554837456, + "timestamp_ms" : 1554837456621, + "time" : "Tue Apr 9 19:17:36 2019", + "jobs" : [ + { + "jobname" : "test", + "groupid" : 0, + "error" : 0, + "eta" : 2147483647, + "elapsed" : 1, + "job options" : { + "name" : "test", + "ioengine" : "sync", + "size" : "1073741824", + "filename" : "/disk/file.dat", + "iodepth" : "4", + "bs" : "4096", + "rw" : "write" + }, + "read" : { + "io_bytes" : 0, + "io_kbytes" : 0, + "bw" : 123456, + "iops" : 1234.5678, + "runtime" : 0, + "total_ios" : 0, + "short_ios" : 0, + "bw_min" : 0, + "bw_max" : 0, + "bw_agg" : 0.000000, + "bw_mean" : 0.000000, + "bw_dev" : 0.000000, + "bw_samples" : 0, + "iops_min" : 0, + "iops_max" : 0, + "iops_mean" : 0.000000, + "iops_stddev" : 0.000000, + "iops_samples" : 0 + }, + "write" : { + "io_bytes" : 1073741824, + "io_kbytes" : 1048576, + "bw" : 1753471, + "iops" : 438367.892977, + "runtime" : 598, + "total_ios" : 262144, + "bw_min" : 1731120, + "bw_max" : 1731120, + "bw_agg" : 98.725328, + "bw_mean" : 1731120.000000, + "bw_dev" : 0.000000, + "bw_samples" : 1, + "iops_min" : 432780, + "iops_max" : 432780, + "iops_mean" : 432780.000000, + "iops_stddev" : 0.000000, + "iops_samples" : 1 + } + } + ] +} +` + fio := Fio{} + // WriteBandwidth. + got, err := fio.parseBandwidth(sampleData, false) + var want float64 = 1753471.0 * 1024 + if err != nil { + t.Fatalf("parse failed with err: %v", err) + } else if got != want { + t.Fatalf("got: %f, want: %f", got, want) + } + + // ReadBandwidth. + got, err = fio.parseBandwidth(sampleData, true) + want = 123456 * 1024 + if err != nil { + t.Fatalf("parse failed with err: %v", err) + } else if got != want { + t.Fatalf("got: %f, want: %f", got, want) + } + + // WriteIOps. + got, err = fio.parseIOps(sampleData, false) + want = 438367.892977 + if err != nil { + t.Fatalf("parse failed with err: %v", err) + } else if got != want { + t.Fatalf("got: %f, want: %f", got, want) + } + + // ReadIOps. + got, err = fio.parseIOps(sampleData, true) + want = 1234.5678 + if err != nil { + t.Fatalf("parse failed with err: %v", err) + } else if got != want { + t.Fatalf("got: %f, want: %f", got, want) + } +} diff --git a/test/benchmarks/tools/hey.go b/test/benchmarks/tools/hey.go new file mode 100644 index 000000000..699497c64 --- /dev/null +++ b/test/benchmarks/tools/hey.go @@ -0,0 +1,75 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tools + +import ( + "fmt" + "net" + "regexp" + "strconv" + "strings" + "testing" +) + +// Hey is for the client application 'hey'. +type Hey struct { + Requests int + Concurrency int + Doc string +} + +// MakeCmd returns a 'hey' command. +func (h *Hey) MakeCmd(ip net.IP, port int) []string { + return strings.Split(fmt.Sprintf("hey -n %d -c %d http://%s:%d/%s", + h.Requests, h.Concurrency, ip, port, h.Doc), " ") +} + +// Report parses output from 'hey' and reports metrics. +func (h *Hey) Report(b *testing.B, output string) { + b.Helper() + requests, err := h.parseRequestsPerSecond(output) + if err != nil { + b.Fatalf("failed to parse requests per second: %v", err) + } + b.ReportMetric(requests, "requests_per_second") + + ave, err := h.parseAverageLatency(output) + if err != nil { + b.Fatalf("failed to parse average latency: %v", err) + } + b.ReportMetric(ave, "average_latency_secs") +} + +var heyReqPerSecondRE = regexp.MustCompile(`Requests/sec:\s*(\d+\.?\d+?)\s+`) + +// parseRequestsPerSecond finds requests per second from 'hey' output. +func (h *Hey) parseRequestsPerSecond(data string) (float64, error) { + match := heyReqPerSecondRE.FindStringSubmatch(data) + if len(match) < 2 { + return 0, fmt.Errorf("failed get bandwidth: %s", data) + } + return strconv.ParseFloat(match[1], 64) +} + +var heyAverageLatencyRE = regexp.MustCompile(`Average:\s*(\d+\.?\d+?)\s+secs`) + +// parseHeyAverageLatency finds Average Latency in seconds form 'hey' output. +func (h *Hey) parseAverageLatency(data string) (float64, error) { + match := heyAverageLatencyRE.FindStringSubmatch(data) + if len(match) < 2 { + return 0, fmt.Errorf("failed get average latency match%d : %s", len(match), data) + } + return strconv.ParseFloat(match[1], 64) +} diff --git a/test/benchmarks/tools/hey_test.go b/test/benchmarks/tools/hey_test.go new file mode 100644 index 000000000..e0cab1f52 --- /dev/null +++ b/test/benchmarks/tools/hey_test.go @@ -0,0 +1,81 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tools + +import "testing" + +// TestHey checks the Hey parsers on sample output. +func TestHey(t *testing.T) { + sampleData := ` + Summary: + Total: 2.2391 secs + Slowest: 1.6292 secs + Fastest: 0.0066 secs + Average: 0.5351 secs + Requests/sec: 89.3202 + + Total data: 841200 bytes + Size/request: 4206 bytes + + Response time histogram: + 0.007 [1] | + 0.169 [0] | + 0.331 [149] |â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â– â–  + 0.493 [0] | + 0.656 [0] | + 0.818 [0] | + 0.980 [0] | + 1.142 [0] | + 1.305 [0] | + 1.467 [49] |â– â– â– â– â– â– â– â– â– â– â– â– â–  + 1.629 [1] | + + + Latency distribution: + 10% in 0.2149 secs + 25% in 0.2449 secs + 50% in 0.2703 secs + 75% in 1.3315 secs + 90% in 1.4045 secs + 95% in 1.4232 secs + 99% in 1.4362 secs + + Details (average, fastest, slowest): + DNS+dialup: 0.0002 secs, 0.0066 secs, 1.6292 secs + DNS-lookup: 0.0000 secs, 0.0000 secs, 0.0000 secs + req write: 0.0000 secs, 0.0000 secs, 0.0012 secs + resp wait: 0.5225 secs, 0.0064 secs, 1.4346 secs + resp read: 0.0122 secs, 0.0001 secs, 0.2006 secs + + Status code distribution: + [200] 200 responses + ` + hey := Hey{} + want := 89.3202 + got, err := hey.parseRequestsPerSecond(sampleData) + if err != nil { + t.Fatalf("failed to parse request per second with: %v", err) + } else if got != want { + t.Fatalf("got: %f, want: %f", got, want) + } + + want = 0.5351 + got, err = hey.parseAverageLatency(sampleData) + if err != nil { + t.Fatalf("failed to parse average latency with: %v", err) + } else if got != want { + t.Fatalf("got: %f, want: %f", got, want) + } +} diff --git a/test/benchmarks/tools/iperf.go b/test/benchmarks/tools/iperf.go new file mode 100644 index 000000000..df3d9349b --- /dev/null +++ b/test/benchmarks/tools/iperf.go @@ -0,0 +1,56 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tools + +import ( + "fmt" + "net" + "regexp" + "strconv" + "strings" + "testing" +) + +// Iperf is for the client side of `iperf`. +type Iperf struct { + Time int +} + +// MakeCmd returns a iperf client command. +func (i *Iperf) MakeCmd(ip net.IP, port int) []string { + // iperf report in Kb realtime + return strings.Split(fmt.Sprintf("iperf -f K --realtime --time %d -c %s -p %d", i.Time, ip, port), " ") +} + +// Report parses output from iperf client and reports metrics. +func (i *Iperf) Report(b *testing.B, output string) { + b.Helper() + // Parse bandwidth and report it. + bW, err := i.bandwidth(output) + if err != nil { + b.Fatalf("failed to parse bandwitdth from %s: %v", output, err) + } + b.ReportMetric(bW*1024, "bandwidth_b/s") // Convert from Kb/s to b/s. +} + +// bandwidth parses the Bandwidth number from an iperf report. A sample is below. +func (i *Iperf) bandwidth(data string) (float64, error) { + re := regexp.MustCompile(`\[\s*\d+\][^\n]+\s+(\d+\.?\d*)\s+KBytes/sec`) + match := re.FindStringSubmatch(data) + if len(match) < 1 { + return 0, fmt.Errorf("failed get bandwidth: %s", data) + } + return strconv.ParseFloat(match[1], 64) +} diff --git a/test/benchmarks/tools/iperf_test.go b/test/benchmarks/tools/iperf_test.go new file mode 100644 index 000000000..03bb30d05 --- /dev/null +++ b/test/benchmarks/tools/iperf_test.go @@ -0,0 +1,34 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package tools + +import "testing" + +// TestIperf checks the Iperf parsers on sample output. +func TestIperf(t *testing.T) { + sampleData := ` +------------------------------------------------------------ +Client connecting to 10.138.15.215, TCP port 32779 +TCP window size: 45.0 KByte (default) +------------------------------------------------------------ +[ 3] local 10.138.15.216 port 32866 connected with 10.138.15.215 port 32779 +[ ID] Interval Transfer Bandwidth +[ 3] 0.0-10.0 sec 459520 KBytes 45900 KBytes/sec +` + i := Iperf{} + bandwidth, err := i.bandwidth(sampleData) + if err != nil || bandwidth != 45900 { + t.Fatalf("failed with: %v and %f", err, bandwidth) + } +} diff --git a/test/benchmarks/tools/redis.go b/test/benchmarks/tools/redis.go new file mode 100644 index 000000000..db32460ec --- /dev/null +++ b/test/benchmarks/tools/redis.go @@ -0,0 +1,64 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tools + +import ( + "fmt" + "net" + "regexp" + "strconv" + "strings" + "testing" +) + +// Redis is for the client 'redis-benchmark'. +type Redis struct { + Operation string +} + +// MakeCmd returns a redis-benchmark client command. +func (r *Redis) MakeCmd(ip net.IP, port int) []string { + // There is no -t PING_BULK for redis-benchmark, so adjust the command in that case. + // Note that "ping" will run both PING_INLINE and PING_BULK. + if r.Operation == "PING_BULK" { + return strings.Split( + fmt.Sprintf("redis-benchmark --csv -t ping -h %s -p %d", ip, port), " ") + } + + // runs redis-benchmark -t operation for 100K requests against server. + return strings.Split( + fmt.Sprintf("redis-benchmark --csv -t %s -h %s -p %d", r.Operation, ip, port), " ") +} + +// Report parses output from redis-benchmark client and reports metrics. +func (r *Redis) Report(b *testing.B, output string) { + b.Helper() + result, err := r.parseOperation(output) + if err != nil { + b.Fatalf("parsing result %s failed with err: %v", output, err) + } + b.ReportMetric(result, r.Operation) // operations per second +} + +// parseOperation grabs the metric operations per second from redis-benchmark output. +func (r *Redis) parseOperation(data string) (float64, error) { + re := regexp.MustCompile(fmt.Sprintf(`"%s( .*)?","(\d*\.\d*)"`, r.Operation)) + match := re.FindStringSubmatch(data) + // If no match, simply don't add it to the result map. + if len(match) < 3 { + return 0.0, fmt.Errorf("could not find %s in %s", r.Operation, data) + } + return strconv.ParseFloat(match[2], 64) +} diff --git a/test/benchmarks/tools/redis_test.go b/test/benchmarks/tools/redis_test.go new file mode 100644 index 000000000..4bafda66f --- /dev/null +++ b/test/benchmarks/tools/redis_test.go @@ -0,0 +1,87 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tools + +import ( + "testing" +) + +// TestRedis checks the Redis parsers on sample output. +func TestRedis(t *testing.T) { + sampleData := ` + "PING_INLINE","48661.80" + "PING_BULK","50301.81" + "SET","48923.68" + "GET","49382.71" + "INCR","49975.02" + "LPUSH","49875.31" + "RPUSH","50276.52" + "LPOP","50327.12" + "RPOP","50556.12" + "SADD","49504.95" + "HSET","49504.95" + "SPOP","50025.02" + "LPUSH (needed to benchmark LRANGE)","48875.86" + "LRANGE_100 (first 100 elements)","33955.86" + "LRANGE_300 (first 300 elements)","16550.81"// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tools + + "LRANGE_500 (first 450 elements)","13653.74" + "LRANGE_600 (first 600 elements)","11219.57" + "MSET (10 keys)","44682.75" + ` + wants := map[string]float64{ + "PING_INLINE": 48661.80, + "PING_BULK": 50301.81, + "SET": 48923.68, + "GET": 49382.71, + "INCR": 49975.02, + "LPUSH": 49875.31, + "RPUSH": 50276.52, + "LPOP": 50327.12, + "RPOP": 50556.12, + "SADD": 49504.95, + "HSET": 49504.95, + "SPOP": 50025.02, + "LRANGE_100": 33955.86, + "LRANGE_300": 16550.81, + "LRANGE_500": 13653.74, + "LRANGE_600": 11219.57, + "MSET": 44682.75, + } + for op, want := range wants { + redis := Redis{ + Operation: op, + } + if got, err := redis.parseOperation(sampleData); err != nil { + t.Fatalf("failed to parse %s: %v", op, err) + } else if want != got { + t.Fatalf("wanted %f for op %s, got %f", want, op, got) + } + } +} diff --git a/test/benchmarks/tools/tools.go b/test/benchmarks/tools/tools.go new file mode 100644 index 000000000..eb61c0136 --- /dev/null +++ b/test/benchmarks/tools/tools.go @@ -0,0 +1,17 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package tools holds tooling to couple command formatting and output parsers +// together. +package tools -- cgit v1.2.3 From 2a7b2a61e3ea32129c26eeaa6fab3d81a5d8ad6e Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Thu, 11 Jun 2020 20:33:56 -0700 Subject: iptables: support SO_ORIGINAL_DST Envoy (#170) uses this to get the original destination of redirected packets. --- pkg/abi/linux/netfilter.go | 8 +- pkg/abi/linux/socket.go | 4 +- pkg/sentry/socket/netstack/netstack.go | 17 ++++ pkg/sentry/strace/socket.go | 1 + pkg/tcpip/stack/conntrack.go | 26 ++++++ pkg/tcpip/stack/iptables.go | 11 ++- pkg/tcpip/tcpip.go | 4 + pkg/tcpip/transport/tcp/endpoint.go | 11 +++ test/iptables/BUILD | 1 + test/iptables/iptables_test.go | 8 ++ test/iptables/iptables_unsafe.go | 63 ++++++++++++++ test/iptables/iptables_util.go | 51 ++++++++++- test/iptables/nat.go | 152 ++++++++++++++++++++++++++++++++- 13 files changed, 344 insertions(+), 13 deletions(-) create mode 100644 test/iptables/iptables_unsafe.go (limited to 'test') diff --git a/pkg/abi/linux/netfilter.go b/pkg/abi/linux/netfilter.go index a91f9f018..9c27f7bb2 100644 --- a/pkg/abi/linux/netfilter.go +++ b/pkg/abi/linux/netfilter.go @@ -59,7 +59,7 @@ var VerdictStrings = map[int32]string{ NF_RETURN: "RETURN", } -// Socket options. These correspond to values in +// Socket options for SOL_SOCKET. These correspond to values in // include/uapi/linux/netfilter_ipv4/ip_tables.h. const ( IPT_BASE_CTL = 64 @@ -74,6 +74,12 @@ const ( IPT_SO_GET_MAX = IPT_SO_GET_REVISION_TARGET ) +// Socket option for SOL_IP. This corresponds to the value in +// include/uapi/linux/netfilter_ipv4.h. +const ( + SO_ORIGINAL_DST = 80 +) + // Name lengths. These correspond to values in // include/uapi/linux/netfilter/x_tables.h. const ( diff --git a/pkg/abi/linux/socket.go b/pkg/abi/linux/socket.go index c24a8216e..d6946bb82 100644 --- a/pkg/abi/linux/socket.go +++ b/pkg/abi/linux/socket.go @@ -239,11 +239,13 @@ const SockAddrMax = 128 type InetAddr [4]byte // SockAddrInet is struct sockaddr_in, from uapi/linux/in.h. +// +// +marshal type SockAddrInet struct { Family uint16 Port uint16 Addr InetAddr - Zero [8]uint8 // pad to sizeof(struct sockaddr). + _ [8]uint8 // pad to sizeof(struct sockaddr). } // InetMulticastRequest is struct ip_mreq, from uapi/linux/in.h. diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index f86e6cd7a..31a168f7e 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -1490,6 +1490,10 @@ func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (marsha vP := primitive.Int32(boolToInt32(v)) return &vP, nil + case linux.SO_ORIGINAL_DST: + // TODO(gvisor.dev/issue/170): ip6tables. + return nil, syserr.ErrInvalidArgument + default: emitUnimplementedEventIPv6(t, name) } @@ -1600,6 +1604,19 @@ func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family in vP := primitive.Int32(boolToInt32(v)) return &vP, nil + case linux.SO_ORIGINAL_DST: + if outLen < int(binary.Size(linux.SockAddrInet{})) { + return nil, syserr.ErrInvalidArgument + } + + var v tcpip.OriginalDestinationOption + if err := ep.GetSockOpt(&v); err != nil { + return nil, syserr.TranslateNetstackError(err) + } + + a, _ := ConvertAddress(linux.AF_INET, tcpip.FullAddress(v)) + return a.(*linux.SockAddrInet), nil + default: emitUnimplementedEventIP(t, name) } diff --git a/pkg/sentry/strace/socket.go b/pkg/sentry/strace/socket.go index c0512de89..b51c4c941 100644 --- a/pkg/sentry/strace/socket.go +++ b/pkg/sentry/strace/socket.go @@ -521,6 +521,7 @@ var sockOptNames = map[uint64]abi.ValueSet{ linux.IP_ROUTER_ALERT: "IP_ROUTER_ALERT", linux.IP_PKTOPTIONS: "IP_PKTOPTIONS", linux.IP_MTU: "IP_MTU", + linux.SO_ORIGINAL_DST: "SO_ORIGINAL_DST", }, linux.SOL_SOCKET: { linux.SO_ERROR: "SO_ERROR", diff --git a/pkg/tcpip/stack/conntrack.go b/pkg/tcpip/stack/conntrack.go index 559a1c4dd..470c265aa 100644 --- a/pkg/tcpip/stack/conntrack.go +++ b/pkg/tcpip/stack/conntrack.go @@ -240,7 +240,10 @@ func (ct *ConnTrack) connFor(pkt *PacketBuffer) (*conn, direction) { if err != nil { return nil, dirOriginal } + return ct.connForTID(tid) +} +func (ct *ConnTrack) connForTID(tid tupleID) (*conn, direction) { bucket := ct.bucket(tid) now := time.Now() @@ -604,3 +607,26 @@ func (ct *ConnTrack) reapTupleLocked(tuple *tuple, bucket int, now time.Time) bo return true } + +func (ct *ConnTrack) originalDst(epID TransportEndpointID) (tcpip.Address, uint16, *tcpip.Error) { + // Lookup the connection. The reply's original destination + // describes the original address. + tid := tupleID{ + srcAddr: epID.LocalAddress, + srcPort: epID.LocalPort, + dstAddr: epID.RemoteAddress, + dstPort: epID.RemotePort, + transProto: header.TCPProtocolNumber, + netProto: header.IPv4ProtocolNumber, + } + conn, _ := ct.connForTID(tid) + if conn == nil { + // Not a tracked connection. + return "", 0, tcpip.ErrNotConnected + } else if conn.manip == manipNone { + // Unmanipulated connection. + return "", 0, tcpip.ErrInvalidOptionValue + } + + return conn.original.dstAddr, conn.original.dstPort, nil +} diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go index cbbae4224..110ba073d 100644 --- a/pkg/tcpip/stack/iptables.go +++ b/pkg/tcpip/stack/iptables.go @@ -218,19 +218,16 @@ func (it *IPTables) Check(hook Hook, pkt *PacketBuffer, gso *GSO, r *Route, addr // Many users never configure iptables. Spare them the cost of rule // traversal if rules have never been set. it.mu.RLock() + defer it.mu.RUnlock() if !it.modified { - it.mu.RUnlock() return true } - it.mu.RUnlock() // Packets are manipulated only if connection and matching // NAT rule exists. shouldTrack := it.connections.handlePacket(pkt, hook, gso, r) // Go through each table containing the hook. - it.mu.RLock() - defer it.mu.RUnlock() priorities := it.priorities[hook] for _, tableID := range priorities { // If handlePacket already NATed the packet, we don't need to @@ -418,3 +415,9 @@ func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx // All the matchers matched, so run the target. return rule.Target.Action(pkt, &it.connections, hook, gso, r, address) } + +// OriginalDst returns the original destination of redirected connections. It +// returns an error if the connection doesn't exist or isn't redirected. +func (it *IPTables) OriginalDst(epID TransportEndpointID) (tcpip.Address, uint16, *tcpip.Error) { + return it.connections.originalDst(epID) +} diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index a634b9b60..45f59b60f 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -954,6 +954,10 @@ type DefaultTTLOption uint8 // classic BPF filter on a given endpoint. type SocketDetachFilterOption int +// OriginalDestinationOption is used to get the original destination address +// and port of a redirected packet. +type OriginalDestinationOption FullAddress + // IPPacketInfo is the message structure for IP_PKTINFO. // // +stateify savable diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 0f7487963..682687ebe 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -2017,6 +2017,17 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { *o = tcpip.TCPDeferAcceptOption(e.deferAccept) e.UnlockUser() + case *tcpip.OriginalDestinationOption: + ipt := e.stack.IPTables() + addr, port, err := ipt.OriginalDst(e.ID) + if err != nil { + return err + } + *o = tcpip.OriginalDestinationOption{ + Addr: addr, + Port: port, + } + default: return tcpip.ErrUnknownProtocolOption } diff --git a/test/iptables/BUILD b/test/iptables/BUILD index 40b63ebbe..66453772a 100644 --- a/test/iptables/BUILD +++ b/test/iptables/BUILD @@ -9,6 +9,7 @@ go_library( "filter_input.go", "filter_output.go", "iptables.go", + "iptables_unsafe.go", "iptables_util.go", "nat.go", ], diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 550b6198a..fda5f694f 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -371,3 +371,11 @@ func TestFilterAddrs(t *testing.T) { } } } + +func TestNATPreOriginalDst(t *testing.T) { + singleTest(t, NATPreOriginalDst{}) +} + +func TestNATOutOriginalDst(t *testing.T) { + singleTest(t, NATOutOriginalDst{}) +} diff --git a/test/iptables/iptables_unsafe.go b/test/iptables/iptables_unsafe.go new file mode 100644 index 000000000..bd85a8fea --- /dev/null +++ b/test/iptables/iptables_unsafe.go @@ -0,0 +1,63 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iptables + +import ( + "fmt" + "syscall" + "unsafe" +) + +type originalDstError struct { + errno syscall.Errno +} + +func (e originalDstError) Error() string { + return fmt.Sprintf("errno (%d) when calling getsockopt(SO_ORIGINAL_DST): %v", int(e.errno), e.errno.Error()) +} + +// SO_ORIGINAL_DST gets the original destination of a redirected packet via +// getsockopt. +const SO_ORIGINAL_DST = 80 + +func originalDestination4(connfd int) (syscall.RawSockaddrInet4, error) { + var addr syscall.RawSockaddrInet4 + var addrLen uint32 = syscall.SizeofSockaddrInet4 + if errno := originalDestination(connfd, syscall.SOL_IP, unsafe.Pointer(&addr), &addrLen); errno != 0 { + return syscall.RawSockaddrInet4{}, originalDstError{errno} + } + return addr, nil +} + +func originalDestination6(connfd int) (syscall.RawSockaddrInet6, error) { + var addr syscall.RawSockaddrInet6 + var addrLen uint32 = syscall.SizeofSockaddrInet6 + if errno := originalDestination(connfd, syscall.SOL_IPV6, unsafe.Pointer(&addr), &addrLen); errno != 0 { + return syscall.RawSockaddrInet6{}, originalDstError{errno} + } + return addr, nil +} + +func originalDestination(connfd int, level uintptr, optval unsafe.Pointer, optlen *uint32) syscall.Errno { + _, _, errno := syscall.Syscall6( + syscall.SYS_GETSOCKOPT, + uintptr(connfd), + level, + SO_ORIGINAL_DST, + uintptr(optval), + uintptr(unsafe.Pointer(optlen)), + 0) + return errno +} diff --git a/test/iptables/iptables_util.go b/test/iptables/iptables_util.go index ca80a4b5f..5125fe47b 100644 --- a/test/iptables/iptables_util.go +++ b/test/iptables/iptables_util.go @@ -15,6 +15,8 @@ package iptables import ( + "encoding/binary" + "errors" "fmt" "net" "os/exec" @@ -218,17 +220,58 @@ func filterAddrs(addrs []string, ipv6 bool) []string { // getInterfaceName returns the name of the interface other than loopback. func getInterfaceName() (string, bool) { - var ifname string + iface, ok := getNonLoopbackInterface() + if !ok { + return "", false + } + return iface.Name, true +} + +func getInterfaceAddrs(ipv6 bool) ([]net.IP, error) { + iface, ok := getNonLoopbackInterface() + if !ok { + return nil, errors.New("no non-loopback interface found") + } + addrs, err := iface.Addrs() + if err != nil { + return nil, err + } + + // Get only IPv4 or IPv6 addresses. + ips := make([]net.IP, 0, len(addrs)) + for _, addr := range addrs { + parts := strings.Split(addr.String(), "/") + var ip net.IP + // To16() returns IPv4 addresses as IPv4-mapped IPv6 addresses. + // So we check whether To4() returns nil to test whether the + // address is v4 or v6. + if v4 := net.ParseIP(parts[0]).To4(); ipv6 && v4 == nil { + ip = net.ParseIP(parts[0]).To16() + } else { + ip = v4 + } + if ip != nil { + ips = append(ips, ip) + } + } + return ips, nil +} + +func getNonLoopbackInterface() (net.Interface, bool) { if interfaces, err := net.Interfaces(); err == nil { for _, intf := range interfaces { if intf.Name != "lo" { - ifname = intf.Name - break + return intf, true } } } + return net.Interface{}, false +} - return ifname, ifname != "" +func htons(x uint16) uint16 { + buf := make([]byte, 2) + binary.BigEndian.PutUint16(buf, x) + return binary.LittleEndian.Uint16(buf) } func localIP(ipv6 bool) string { diff --git a/test/iptables/nat.go b/test/iptables/nat.go index ac0d91bb2..b7fea2527 100644 --- a/test/iptables/nat.go +++ b/test/iptables/nat.go @@ -18,12 +18,11 @@ import ( "errors" "fmt" "net" + "syscall" "time" ) -const ( - redirectPort = 42 -) +const redirectPort = 42 func init() { RegisterTestCase(NATPreRedirectUDPPort{}) @@ -42,6 +41,8 @@ func init() { RegisterTestCase(NATOutRedirectInvert{}) RegisterTestCase(NATRedirectRequiresProtocol{}) RegisterTestCase(NATLoopbackSkipsPrerouting{}) + RegisterTestCase(NATPreOriginalDst{}) + RegisterTestCase(NATOutOriginalDst{}) } // NATPreRedirectUDPPort tests that packets are redirected to different port. @@ -471,6 +472,151 @@ func (NATLoopbackSkipsPrerouting) LocalAction(ip net.IP, ipv6 bool) error { return nil } +// NATPreOriginalDst tests that SO_ORIGINAL_DST returns the pre-NAT destination +// of PREROUTING NATted packets. +type NATPreOriginalDst struct{} + +// Name implements TestCase.Name. +func (NATPreOriginalDst) Name() string { + return "NATPreOriginalDst" +} + +// ContainerAction implements TestCase.ContainerAction. +func (NATPreOriginalDst) ContainerAction(ip net.IP, ipv6 bool) error { + // Redirect incoming TCP connections to acceptPort. + if err := natTable(ipv6, "-A", "PREROUTING", + "-p", "tcp", + "--destination-port", fmt.Sprintf("%d", dropPort), + "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", acceptPort)); err != nil { + return err + } + + addrs, err := getInterfaceAddrs(ipv6) + if err != nil { + return err + } + return listenForRedirectedConn(ipv6, addrs) +} + +// LocalAction implements TestCase.LocalAction. +func (NATPreOriginalDst) LocalAction(ip net.IP, ipv6 bool) error { + return connectTCP(ip, dropPort, sendloopDuration) +} + +// NATOutOriginalDst tests that SO_ORIGINAL_DST returns the pre-NAT destination +// of OUTBOUND NATted packets. +type NATOutOriginalDst struct{} + +// Name implements TestCase.Name. +func (NATOutOriginalDst) Name() string { + return "NATOutOriginalDst" +} + +// ContainerAction implements TestCase.ContainerAction. +func (NATOutOriginalDst) ContainerAction(ip net.IP, ipv6 bool) error { + // Redirect incoming TCP connections to acceptPort. + if err := natTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", acceptPort)); err != nil { + return err + } + + connCh := make(chan error) + go func() { + connCh <- connectTCP(ip, dropPort, sendloopDuration) + }() + + if err := listenForRedirectedConn(ipv6, []net.IP{ip}); err != nil { + return err + } + return <-connCh +} + +// LocalAction implements TestCase.LocalAction. +func (NATOutOriginalDst) LocalAction(ip net.IP, ipv6 bool) error { + // No-op. + return nil +} + +func listenForRedirectedConn(ipv6 bool, originalDsts []net.IP) error { + // The net package doesn't give guarantee access to the connection's + // underlying FD, and thus we cannot call getsockopt. We have to use + // traditional syscalls for SO_ORIGINAL_DST. + + // Create the listening socket, bind, listen, and accept. + family := syscall.AF_INET + if ipv6 { + family = syscall.AF_INET6 + } + sockfd, err := syscall.Socket(family, syscall.SOCK_STREAM, 0) + if err != nil { + return err + } + defer syscall.Close(sockfd) + + var bindAddr syscall.Sockaddr + if ipv6 { + bindAddr = &syscall.SockaddrInet6{ + Port: acceptPort, + Addr: [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // in6addr_any + } + } else { + bindAddr = &syscall.SockaddrInet4{ + Port: acceptPort, + Addr: [4]byte{0, 0, 0, 0}, // INADDR_ANY + } + } + if err := syscall.Bind(sockfd, bindAddr); err != nil { + return err + } + + if err := syscall.Listen(sockfd, 1); err != nil { + return err + } + + connfd, _, err := syscall.Accept(sockfd) + if err != nil { + return err + } + defer syscall.Close(connfd) + + // Verify that, despite listening on acceptPort, SO_ORIGINAL_DST + // indicates the packet was sent to originalDst:dropPort. + if ipv6 { + got, err := originalDestination6(connfd) + if err != nil { + return err + } + // The original destination could be any of our IPs. + for _, dst := range originalDsts { + want := syscall.RawSockaddrInet6{ + Family: syscall.AF_INET6, + Port: htons(dropPort), + } + copy(want.Addr[:], dst.To16()) + if got == want { + return nil + } + } + return fmt.Errorf("SO_ORIGINAL_DST returned %+v, but wanted one of %+v (note: port numbers are in network byte order)", got, originalDsts) + } else { + got, err := originalDestination4(connfd) + if err != nil { + return err + } + // The original destination could be any of our IPs. + for _, dst := range originalDsts { + want := syscall.RawSockaddrInet4{ + Family: syscall.AF_INET, + Port: htons(dropPort), + } + copy(want.Addr[:], dst.To4()) + if got == want { + return nil + } + } + return fmt.Errorf("SO_ORIGINAL_DST returned %+v, but wanted one of %+v (note: port numbers are in network byte order)", got, originalDsts) + } +} + // loopbackTests runs an iptables rule and ensures that packets sent to // dest:dropPort are received by localhost:acceptPort. func loopbackTest(ipv6 bool, dest net.IP, args ...string) error { -- cgit v1.2.3 From 5d2b09b71d5c073f97f59dd17f6f7ad3ceb4a536 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Fri, 31 Jul 2020 11:38:01 -0700 Subject: s/github.dev/gvisor.dev PiperOrigin-RevId: 324249991 --- test/syscalls/linux/raw_socket_hdrincl.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/raw_socket_hdrincl.cc b/test/syscalls/linux/raw_socket_hdrincl.cc index 97f0467aa..2f25aceb2 100644 --- a/test/syscalls/linux/raw_socket_hdrincl.cc +++ b/test/syscalls/linux/raw_socket_hdrincl.cc @@ -178,7 +178,7 @@ TEST_F(RawHDRINCL, ConnectToLoopback) { } TEST_F(RawHDRINCL, SendWithoutConnectSucceeds) { - // FIXME(github.dev/issue/3159): Test currently flaky. + // FIXME(gvisor.dev/issue/3159): Test currently flaky. SKIP_IF(true); struct iphdr hdr = LoopbackHeader(); @@ -284,7 +284,7 @@ TEST_F(RawHDRINCL, SendAndReceive) { // Send and receive a packet where the sendto address is not the same as the // provided destination. TEST_F(RawHDRINCL, SendAndReceiveDifferentAddress) { - // FIXME(github.dev/issue/3160): Test currently flaky. + // FIXME(gvisor.dev/issue/3160): Test currently flaky. SKIP_IF(true); int port = 40000; -- cgit v1.2.3 From 12a6657d9c037b4359923069c67377d516b15194 Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Fri, 31 Jul 2020 15:59:28 -0700 Subject: [runtime tests] Enhance java runtime test. - Added a bunch of helpful options which help in speeding up the test and providing useful output. - Unexcluded passing tests and updated bugs. Excluded tests which were failing. - Increased the batch size for java tests so that we can take advantage of the shared JVMs. The running time of the tests decreased from 3+ hours (I don't know the exact running time because this test has always timed out after 3 hours) to 1 hour 15 minutes. We can reliably run this a CI kokoro job. PiperOrigin-RevId: 324301503 --- test/runtimes/BUILD | 1 + test/runtimes/defs.bzl | 6 +++ test/runtimes/exclude_java11.csv | 93 ++++++++++++++++++++++++++++++++++------ test/runtimes/proctor/java.go | 7 ++- test/runtimes/runner/main.go | 2 +- 5 files changed, 94 insertions(+), 15 deletions(-) (limited to 'test') diff --git a/test/runtimes/BUILD b/test/runtimes/BUILD index 1728161ce..3be123d94 100644 --- a/test/runtimes/BUILD +++ b/test/runtimes/BUILD @@ -11,6 +11,7 @@ runtime_test( runtime_test( name = "java11", + batch = 100, exclude_file = "exclude_java11.csv", lang = "java", shard_count = 20, diff --git a/test/runtimes/defs.bzl b/test/runtimes/defs.bzl index 5779b9591..db22029a8 100644 --- a/test/runtimes/defs.bzl +++ b/test/runtimes/defs.bzl @@ -9,6 +9,8 @@ def _runtime_test_impl(ctx): ctx.attr.lang, "--image", ctx.attr.image, + "--batch", + str(ctx.attr.batch), ] if ctx.attr.exclude_file: args += [ @@ -47,6 +49,10 @@ _runtime_test = rule( mandatory = False, allow_single_file = True, ), + "batch": attr.int( + default = 50, + mandatory = False, + ), "_runner": attr.label( default = "//test/runtimes/runner:runner", ), diff --git a/test/runtimes/exclude_java11.csv b/test/runtimes/exclude_java11.csv index c012e5a56..4d62f7d3a 100644 --- a/test/runtimes/exclude_java11.csv +++ b/test/runtimes/exclude_java11.csv @@ -15,10 +15,9 @@ java/lang/Character/CheckScript.java,,Fails in Docker java/lang/Character/CheckUnicode.java,,Fails in Docker java/lang/Class/GetPackageBootLoaderChildLayer.java,, java/lang/ClassLoader/nativeLibrary/NativeLibraryTest.java,,Fails in Docker +java/lang/module/ModuleDescriptorTest.java,, java/lang/String/nativeEncoding/StringPlatformChars.java,, -java/net/DatagramSocket/ReuseAddressTest.java,, -java/net/DatagramSocket/SendDatagramToBadAddress.java,b/78473345, -java/net/Inet4Address/PingThis.java,, +java/net/CookieHandler/B6791927.java,,java.lang.RuntimeException: Expiration date shouldn't be 0 java/net/InterfaceAddress/NetworkPrefixLength.java,b/78507103, java/net/MulticastSocket/MulticastTTL.java,, java/net/MulticastSocket/Promiscuous.java,, @@ -29,23 +28,16 @@ java/net/MulticastSocket/TestDefaults.java,, java/net/MulticastSocket/TimeToLive.java,, java/net/NetworkInterface/NetworkInterfaceStreamTest.java,, java/net/Socket/SetSoLinger.java,b/78527327,SO_LINGER is not yet supported -java/net/Socket/TrafficClass.java,b/78527818,Not supported on gVisor java/net/Socket/UrgentDataTest.java,b/111515323, -java/net/Socket/setReuseAddress/Basic.java,b/78519214,SO_REUSEADDR enabled by default java/net/SocketOption/OptionsTest.java,,Fails in Docker -java/net/SocketOption/TcpKeepAliveTest.java,, java/net/SocketPermission/SocketPermissionTest.java,, java/net/URLConnection/6212146/TestDriver.java,,Fails in Docker java/net/httpclient/RequestBuilderTest.java,,Fails in Docker -java/net/httpclient/ShortResponseBody.java,, -java/net/httpclient/ShortResponseBodyWithRetry.java,, -java/nio/channels/AsyncCloseAndInterrupt.java,, -java/nio/channels/AsynchronousServerSocketChannel/Basic.java,, -java/nio/channels/AsynchronousSocketChannel/Basic.java,b/77921528,SO_KEEPALIVE is not settable java/nio/channels/DatagramChannel/BasicMulticastTests.java,, -java/nio/channels/DatagramChannel/SocketOptionTests.java,,Fails in Docker +java/nio/channels/DatagramChannel/SocketOptionTests.java,,java.net.SocketException: Invalid argument java/nio/channels/DatagramChannel/UseDGWithIPv6.java,, java/nio/channels/FileChannel/directio/DirectIOTest.java,,Fails in Docker +java/nio/channels/FileChannel/directio/PwriteDirect.java,,java.io.IOException: Invalid argument java/nio/channels/Selector/OutOfBand.java,, java/nio/channels/Selector/SelectWithConsumer.java,,Flaky java/nio/channels/ServerSocketChannel/SocketOptionTests.java,, @@ -59,15 +51,89 @@ java/text/Format/NumberFormat/CurrencyFormat.java,,Fails in Docker java/util/Calendar/JapaneseEraNameTest.java,, java/util/Currency/CurrencyTest.java,,Fails in Docker java/util/Currency/ValidateISO4217.java,,Fails in Docker +java/util/EnumSet/BogusEnumSet.java,,"java.io.InvalidClassException: java.util.EnumSet; local class incompatible: stream classdesc serialVersionUID = -2409567991088730183, local class serialVersionUID = 1009687484059888093" +java/util/Locale/Bug8040211.java,,java.lang.RuntimeException: Failed. java/util/Locale/LSRDataTest.java,, +java/util/Properties/CompatibilityTest.java,,"java.lang.RuntimeException: jdk.internal.org.xml.sax.SAXParseException; Internal DTD subset is not allowed. The Properties XML document must have the following DOCTYPE declaration: " +java/util/ResourceBundle/Control/XMLResourceBundleTest.java,,java.util.MissingResourceException: Can't find bundle for base name XmlRB locale +java/util/ResourceBundle/modules/xmlformat/xmlformat.sh,,Timeout reached: 60000. Process is not alive! +java/util/TimeZone/TimeZoneTest.java,,Uncaught exception thrown in test method TestShortZoneIDs java/util/concurrent/locks/Lock/TimedAcquireLeak.java,, java/util/jar/JarFile/mrjar/MultiReleaseJarAPI.java,,Fails in Docker java/util/logging/LogManager/Configuration/updateConfiguration/SimpleUpdateConfigWithInputStreamTest.java,, java/util/logging/TestLoggerWeakRefLeak.java,, +java/util/spi/ResourceBundleControlProvider/UserDefaultControlTest.java,,java.util.MissingResourceException: Can't find bundle for base name com.foo.XmlRB locale javax/imageio/AppletResourceTest.java,, +javax/imageio/plugins/jpeg/JPEGsNotAcceleratedTest.java,,java.awt.HeadlessException: No X11 DISPLAY variable was set but this program performed an operation which requires it. javax/management/security/HashedPasswordFileTest.java,, +javax/net/ssl/DTLS/DTLSBufferOverflowUnderflowTest.java,,Compilation failed +javax/net/ssl/DTLS/DTLSDataExchangeTest.java,,Compilation failed +javax/net/ssl/DTLS/DTLSEnginesClosureTest.java,,Compilation failed +javax/net/ssl/DTLS/DTLSHandshakeTest.java,,Compilation failed +javax/net/ssl/DTLS/DTLSHandshakeWithReplicatedPacketsTest.java,,Compilation failed +javax/net/ssl/DTLS/DTLSIncorrectAppDataTest.java,,Compilation failed +javax/net/ssl/DTLS/DTLSMFLNTest.java,,Compilation failed +javax/net/ssl/DTLS/DTLSNotEnabledRC4Test.java,,Compilation failed +javax/net/ssl/DTLS/DTLSRehandshakeTest.java,,Compilation failed +javax/net/ssl/DTLS/DTLSRehandshakeWithCipherChangeTest.java,,Compilation failed +javax/net/ssl/DTLS/DTLSRehandshakeWithDataExTest.java,,Compilation failed +javax/net/ssl/DTLS/DTLSSequenceNumberTest.java,,Compilation failed +javax/net/ssl/DTLS/DTLSUnsupportedCiphersTest.java,,Compilation failed +javax/net/ssl/DTLSv10/DTLSv10BufferOverflowUnderflowTest.java,,Compilation failed +javax/net/ssl/DTLSv10/DTLSv10DataExchangeTest.java,,Compilation failed +javax/net/ssl/DTLSv10/DTLSv10EnginesClosureTest.java,,Compilation failed +javax/net/ssl/DTLSv10/DTLSv10HandshakeTest.java,,Compilation failed +javax/net/ssl/DTLSv10/DTLSv10HandshakeWithReplicatedPacketsTest.java,,Compilation failed +javax/net/ssl/DTLSv10/DTLSv10IncorrectAppDataTest.java,,Compilation failed +javax/net/ssl/DTLSv10/DTLSv10MFLNTest.java,,Compilation failed +javax/net/ssl/DTLSv10/DTLSv10NotEnabledRC4Test.java,,Compilation failed +javax/net/ssl/DTLSv10/DTLSv10RehandshakeTest.java,,Compilation failed +javax/net/ssl/DTLSv10/DTLSv10RehandshakeWithCipherChangeTest.java,,Compilation failed +javax/net/ssl/DTLSv10/DTLSv10RehandshakeWithDataExTest.java,,Compilation failed +javax/net/ssl/DTLSv10/DTLSv10SequenceNumberTest.java,,Compilation failed +javax/net/ssl/DTLSv10/DTLSv10UnsupportedCiphersTest.java,,Compilation failed javax/net/ssl/SSLSession/JSSERenegotiate.java,,Fails in Docker +javax/net/ssl/TLS/TLSDataExchangeTest.java,,Compilation failed +javax/net/ssl/TLS/TLSEnginesClosureTest.java,,Compilation failed +javax/net/ssl/TLS/TLSHandshakeTest.java,,Compilation failed +javax/net/ssl/TLS/TLSMFLNTest.java,,Compilation failed +javax/net/ssl/TLS/TLSNotEnabledRC4Test.java,,Compilation failed +javax/net/ssl/TLS/TLSRehandshakeTest.java,,Compilation failed +javax/net/ssl/TLS/TLSRehandshakeWithCipherChangeTest.java,,Compilation failed +javax/net/ssl/TLS/TLSRehandshakeWithDataExTest.java,,Compilation failed +javax/net/ssl/TLS/TLSUnsupportedCiphersTest.java,,Compilation failed +javax/net/ssl/TLSv1/TLSDataExchangeTest.java,,Compilation failed +javax/net/ssl/TLSv1/TLSEnginesClosureTest.java,,Compilation failed +javax/net/ssl/TLSv1/TLSHandshakeTest.java,,Compilation failed +javax/net/ssl/TLSv1/TLSMFLNTest.java,,Compilation failed +javax/net/ssl/TLSv1/TLSNotEnabledRC4Test.java,,Compilation failed +javax/net/ssl/TLSv1/TLSRehandshakeTest.java,,Compilation failed +javax/net/ssl/TLSv1/TLSRehandshakeWithCipherChangeTest.java,,Compilation failed +javax/net/ssl/TLSv1/TLSRehandshakeWithDataExTest.java,,Compilation failed +javax/net/ssl/TLSv1/TLSUnsupportedCiphersTest.java,,Compilation failed +javax/net/ssl/TLSv11/TLSDataExchangeTest.java,,Compilation failed +javax/net/ssl/TLSv11/TLSEnginesClosureTest.java,,Compilation failed +javax/net/ssl/TLSv11/TLSHandshakeTest.java,,Compilation failed +javax/net/ssl/TLSv11/TLSMFLNTest.java,,Compilation failed +javax/net/ssl/TLSv11/TLSNotEnabledRC4Test.java,,Compilation failed +javax/net/ssl/TLSv11/TLSRehandshakeTest.java,,Compilation failed +javax/net/ssl/TLSv11/TLSRehandshakeWithCipherChangeTest.java,,Compilation failed +javax/net/ssl/TLSv11/TLSRehandshakeWithDataExTest.java,,Compilation failed +javax/net/ssl/TLSv11/TLSUnsupportedCiphersTest.java,,Compilation failed +javax/net/ssl/TLSv12/TLSEnginesClosureTest.java,,Compilation failed javax/sound/sampled/AudioInputStream/FrameLengthAfterConversion.java,, +jdk/jfr/cmd/TestHelp.java,,java.lang.RuntimeException: 'Available commands are:' missing from stdout/stderr +jdk/jfr/cmd/TestPrint.java,,Missing file' missing from stdout/stderr +jdk/jfr/cmd/TestPrintDefault.java,,java.lang.RuntimeException: 'JVMInformation' missing from stdout/stderr +jdk/jfr/cmd/TestPrintJSON.java,,javax.script.ScriptException: :1:17 Expected an operand but found eof var jsonObject = ^ in at line number 1 at column number 17 +jdk/jfr/cmd/TestPrintXML.java,,org.xml.sax.SAXParseException; lineNumber: 1; columnNumber: 1; Premature end of file. +jdk/jfr/cmd/TestReconstruct.java,,java.lang.RuntimeException: 'Too few arguments' missing from stdout/stderr +jdk/jfr/cmd/TestSplit.java,,java.lang.RuntimeException: 'Missing file' missing from stdout/stderr +jdk/jfr/cmd/TestSummary.java,,java.lang.RuntimeException: 'Missing file' missing from stdout/stderr +jdk/jfr/event/compiler/TestCompilerStats.java,,java.lang.RuntimeException: Field nmetodsSize not in event +jdk/jfr/event/metadata/TestDefaultConfigurations.java,,Setting 'threshold' in event 'jdk.SecurityPropertyModification' was not configured in the configuration 'default' +jdk/jfr/event/runtime/TestActiveSettingEvent.java,,java.lang.Exception: Could not find setting with name jdk.X509Validation#threshold +jdk/jfr/event/runtime/TestModuleEvents.java,,java.lang.RuntimeException: assertEquals: expected jdk.proxy1 to equal java.base jdk/jfr/event/runtime/TestNetworkUtilizationEvent.java,, jdk/jfr/event/runtime/TestThreadParkEvent.java,, jdk/jfr/event/sampling/TestNative.java,, @@ -86,6 +152,7 @@ jdk/jfr/jcmd/TestJcmdStartStopDefault.java,, jdk/jfr/jcmd/TestJcmdStartWithOptions.java,, jdk/jfr/jcmd/TestJcmdStartWithSettings.java,, jdk/jfr/jcmd/TestJcmdStopInvalidFile.java,, +jdk/jfr/jvm/TestGetAllEventClasses.java,,Compilation failed jdk/jfr/jvm/TestJfrJavaBase.java,, jdk/jfr/startupargs/TestStartRecording.java,, jdk/modules/incubator/ImageModules.java,, @@ -123,4 +190,6 @@ tools/jlink/JLinkTest.java,, tools/jlink/plugins/IncludeLocalesPluginTest.java,, tools/jmod/hashes/HashesTest.java,, tools/launcher/BigJar.java,b/111611473, +tools/launcher/HelpFlagsTest.java,,java.lang.AssertionError: HelpFlagsTest failed: Tool jfr not covered by this test. Add specification to jdkTools array! +tools/launcher/VersionCheck.java,,java.lang.AssertionError: VersionCheck failed: testToolVersion: [jfr]; tools/launcher/modules/patch/systemmodules/PatchSystemModules.java,, diff --git a/test/runtimes/proctor/java.go b/test/runtimes/proctor/java.go index 737fbe23e..d456fa681 100644 --- a/test/runtimes/proctor/java.go +++ b/test/runtimes/proctor/java.go @@ -64,8 +64,11 @@ func (javaRunner) ListTests() ([]string, error) { func (javaRunner) TestCmds(tests []string) []*exec.Cmd { args := append( []string{ - "-noreport", - "-dir:" + javaTestDir, + "-agentvm", // Execute each action using a pool of reusable JVMs. + "-dir:" + javaTestDir, // Base directory for test files and directories. + "-noreport", // Do not generate a final report. + "-timeoutFactor:20", // Extend the default timeout (2 min) of all tests by this factor. + "-verbose:nopass", // Verbose output but supress it for tests that passed. }, tests..., ) diff --git a/test/runtimes/runner/main.go b/test/runtimes/runner/main.go index e230912c9..948e7cf9c 100644 --- a/test/runtimes/runner/main.go +++ b/test/runtimes/runner/main.go @@ -40,7 +40,7 @@ var ( ) // Wait time for each test to run. -const timeout = 45 * time.Minute +const timeout = 90 * time.Minute func main() { flag.Parse() -- cgit v1.2.3 From fd5f4ffed1e425ec29d29477dc4e2a61420c1cf6 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Fri, 31 Jul 2020 16:10:24 -0700 Subject: test/socket_netlink_route: check that there is a route on local or main tables A new network namespace has only the local route table. PiperOrigin-RevId: 324303629 --- test/syscalls/linux/socket_netlink_route.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc index e6647a1c3..b3fcf8e7c 100644 --- a/test/syscalls/linux/socket_netlink_route.cc +++ b/test/syscalls/linux/socket_netlink_route.cc @@ -577,7 +577,10 @@ TEST(NetlinkRouteTest, GetRouteDump) { std::cout << std::endl; - if (msg->rtm_table == RT_TABLE_MAIN) { + // If the test is running in a new network namespace, it will have only + // the local route table. + if (msg->rtm_table == RT_TABLE_MAIN || + (!IsRunningOnGvisor() && msg->rtm_table == RT_TABLE_LOCAL)) { routeFound = true; dstFound = rtDstFound && dstFound; } -- cgit v1.2.3 From e76c3c1064e2cb6ef79ea010705230a646369c49 Mon Sep 17 00:00:00 2001 From: Craig Chi Date: Fri, 31 Jul 2020 15:56:18 -0700 Subject: Fix MountFuseFilesystem tests failing Before kernel version 4.16-rc6, fuse mount is protected by capable(CAP_SYS_ADMIN). After this version, it uses ns_capable(CAP_SYS_ADMIN) to protect. Before the 4.16 kernel, it was not allowed to mount fuse file systems without the global CAP_SYS_ADMIN. Fixes #3360 --- test/syscalls/linux/mount.cc | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'test') diff --git a/test/syscalls/linux/mount.cc b/test/syscalls/linux/mount.cc index 97e8d0f7e..46b6f38db 100644 --- a/test/syscalls/linux/mount.cc +++ b/test/syscalls/linux/mount.cc @@ -326,6 +326,14 @@ TEST(MountTest, MountFuseFilesystemNoDevice) { SKIP_IF(IsRunningOnGvisor() && !IsFUSEEnabled()); auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + // Before kernel version 4.16-rc6, FUSE mount is protected by + // capable(CAP_SYS_ADMIN). After this version, it uses + // ns_capable(CAP_SYS_ADMIN) to protect. Before the 4.16 kernel, it was not + // allowed to mount fuse file systems without the global CAP_SYS_ADMIN. + int res = mount("", dir.path().c_str(), "fuse", 0, ""); + SKIP_IF(!IsRunningOnGvisor() && res == -1 && errno == EPERM); + EXPECT_THAT(mount("", dir.path().c_str(), "fuse", 0, ""), SyscallFailsWithErrno(EINVAL)); } @@ -339,6 +347,12 @@ TEST(MountTest, MountFuseFilesystem) { std::string mopts = "fd=" + std::to_string(fd.get()); auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + + // See comments in MountFuseFilesystemNoDevice for the reason why we skip + // EPERM when running on Linux. + int res = mount("", dir.path().c_str(), "fuse", 0, ""); + SKIP_IF(!IsRunningOnGvisor() && res == -1 && errno == EPERM); + auto const mount = ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir.path(), "fuse", 0, mopts, 0)); } -- cgit v1.2.3 From 1a93a78d10a7e925fdb0a10ffd27d9b09cefd468 Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Mon, 3 Aug 2020 11:58:22 -0700 Subject: Add support for a reverse HTTPD test. This change adds a new reverse HTTP test where the HTTPD server runs in a native container but the client runs inside gVisor. It allows us to test download performance under varying levels of concurrency. Also tweaks the concurrent request numbers to test for high levels of concurrency. PiperOrigin-RevId: 324651203 --- test/benchmarks/network/httpd_test.go | 56 +++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 15 deletions(-) (limited to 'test') diff --git a/test/benchmarks/network/httpd_test.go b/test/benchmarks/network/httpd_test.go index 07833f9cd..21cb0b804 100644 --- a/test/benchmarks/network/httpd_test.go +++ b/test/benchmarks/network/httpd_test.go @@ -51,7 +51,7 @@ func BenchmarkHttpdConcurrency(b *testing.B) { defer serverMachine.CleanUp() // The test iterates over client concurrency, so set other parameters. - concurrency := []int{1, 5, 10, 25} + concurrency := []int{1, 25, 50, 100, 1000} for _, c := range concurrency { b.Run(fmt.Sprintf("%d", c), func(b *testing.B) { @@ -60,14 +60,26 @@ func BenchmarkHttpdConcurrency(b *testing.B) { Concurrency: c, Doc: docs["10Kb"], } - runHttpd(b, clientMachine, serverMachine, hey) + runHttpd(b, clientMachine, serverMachine, hey, false /* reverse */) }) } } // BenchmarkHttpdDocSize iterates over different sized payloads, testing how -// well the runtime handles different payload sizes. +// well the runtime handles sending different payload sizes. func BenchmarkHttpdDocSize(b *testing.B) { + benchmarkHttpDocSize(b, false /* reverse */) +} + +// BenchmarkReverseHttpdDocSize iterates over different sized payloads, testing +// how well the runtime handles receiving different payload sizes. +func BenchmarkReverseHttpdDocSize(b *testing.B) { + benchmarkHttpDocSize(b, true /* reverse */) +} + +func benchmarkHttpdDocSize(b *testing.B, reverse bool) { + b.Helper() + clientMachine, err := h.GetMachine() if err != nil { b.Fatalf("failed to get machine: %v", err) @@ -81,24 +93,33 @@ func BenchmarkHttpdDocSize(b *testing.B) { defer serverMachine.CleanUp() for name, filename := range docs { - b.Run(name, func(b *testing.B) { - hey := &tools.Hey{ - Requests: 10000, - Concurrency: 1, - Doc: filename, - } - runHttpd(b, clientMachine, serverMachine, hey) - }) + concurrency := []int{1, 25, 50, 100, 1000} + for _, c := range concurrency { + b.Run(fmt.Sprintf("%s_%d", name, c), func(b *testing.B) { + hey := &tools.Hey{ + Requests: 10000, + Concurrency: c, + Doc: filename, + } + runHttpd(b, clientMachine, serverMachine, hey, reverse) + }) + } } } // runHttpd runs a single test run. -func runHttpd(b *testing.B, clientMachine, serverMachine harness.Machine, hey *tools.Hey) { +func runHttpd(b *testing.B, clientMachine, serverMachine harness.Machine, hey *tools.Hey, reverse bool) { b.Helper() // Grab a container from the server. ctx := context.Background() - server := serverMachine.GetContainer(ctx, b) + var server *dockerutil.Container + if reverse { + server = serverMachine.GetNativeContainer(ctx, b) + } else { + server = serverMachine.GetContainer(ctx, b) + } + defer server.CleanUp(ctx) // Copy the docs to /tmp and serve from there. @@ -118,7 +139,7 @@ func runHttpd(b *testing.B, clientMachine, serverMachine harness.Machine, hey *t "APACHE_PID_FILE=/tmp/apache.pid", }, }, "sh", "-c", cmd); err != nil { - b.Fatalf("failed to start server: %v") + b.Fatalf("failed to start server: %v", err) } ip, err := serverMachine.IPAddress() @@ -134,8 +155,13 @@ func runHttpd(b *testing.B, clientMachine, serverMachine harness.Machine, hey *t // Check the server is serving. harness.WaitUntilServing(ctx, clientMachine, ip, servingPort) + var client *dockerutil.Container // Grab a client. - client := clientMachine.GetNativeContainer(ctx, b) + if reverse { + client = clientMachine.GetContainer(ctx, b) + } else { + client = clientMachine.GetNativeContainer(ctx, b) + } defer client.CleanUp(ctx) b.ResetTimer() -- cgit v1.2.3 From 1fbbc795ef8918006214032d96fa88b8a21e5e0a Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Mon, 3 Aug 2020 12:30:23 -0700 Subject: Add inotify events for fallocate and tests for fallocate/sendfile. Updates #1479, #2923. PiperOrigin-RevId: 324658826 --- pkg/sentry/syscalls/linux/vfs2/setstat.go | 9 ++--- test/syscalls/linux/inotify.cc | 55 +++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/pkg/sentry/syscalls/linux/vfs2/setstat.go b/pkg/sentry/syscalls/linux/vfs2/setstat.go index 37fa56c19..25cdb7a55 100644 --- a/pkg/sentry/syscalls/linux/vfs2/setstat.go +++ b/pkg/sentry/syscalls/linux/vfs2/setstat.go @@ -254,11 +254,12 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, syserror.EFBIG } - return 0, nil, file.Allocate(t, mode, uint64(offset), uint64(length)) + if err := file.Allocate(t, mode, uint64(offset), uint64(length)); err != nil { + return 0, nil, err + } - // File length modified, generate notification. - // TODO(gvisor.dev/issue/1479): Reenable when Inotify is ported. - // file.Dirent.InotifyEvent(linux.IN_MODIFY, 0) + file.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent) + return 0, nil, nil } // Utime implements Linux syscall utime(2). diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc index 220874aeb..6cf398097 100644 --- a/test/syscalls/linux/inotify.cc +++ b/test/syscalls/linux/inotify.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -1681,6 +1682,60 @@ TEST(Inotify, EpollNoDeadlock) { } } +TEST(Inotify, Fallocate) { + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + + const FileDescriptor inotify_fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(inotify_fd.get(), file.path(), IN_ALL_EVENTS)); + + // Do an arbitrary modification with fallocate. + ASSERT_THAT(fallocate(fd.get(), 0, 0, 123), SyscallSucceeds()); + std::vector events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({Event(IN_MODIFY, wd)})); +} + +TEST(Inotify, Sendfile) { + SKIP_IF(IsRunningWithVFS1()); + + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateFileWith(root.path(), "x", 0644)); + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor in = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + const FileDescriptor out = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_WRONLY)); + + // Create separate inotify instances for the in and out fds. If both watches + // were on the same instance, we would have discrepancies between Linux and + // gVisor (order of events, duplicate events), which is not that important + // since inotify is asynchronous anyway. + const FileDescriptor in_inotify = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const FileDescriptor out_inotify = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int in_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(in_inotify.get(), in_file.path(), IN_ALL_EVENTS)); + const int out_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(out_inotify.get(), out_file.path(), IN_ALL_EVENTS)); + + ASSERT_THAT(sendfile(out.get(), in.get(), /*offset=*/nullptr, 1), + SyscallSucceeds()); + + // Expect a single access event and a single modify event. + std::vector in_events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(in_inotify.get())); + std::vector out_events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(out_inotify.get())); + EXPECT_THAT(in_events, Are({Event(IN_ACCESS, in_wd)})); + EXPECT_THAT(out_events, Are({Event(IN_MODIFY, out_wd)})); +} + // On Linux, inotify behavior is not very consistent with splice(2). We try our // best to emulate Linux for very basic calls to splice. TEST(Inotify, SpliceOnWatchTarget) { -- cgit v1.2.3 From b5c9ff81922ba785d83eaccc464b0b15a8120798 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Mon, 3 Aug 2020 20:30:21 -0700 Subject: Exclude flaky nodejs runtime test. PiperOrigin-RevId: 324738840 --- test/runtimes/exclude_nodejs12.4.0.csv | 1 + 1 file changed, 1 insertion(+) (limited to 'test') diff --git a/test/runtimes/exclude_nodejs12.4.0.csv b/test/runtimes/exclude_nodejs12.4.0.csv index 4eb0a4807..3b1d1bc9d 100644 --- a/test/runtimes/exclude_nodejs12.4.0.csv +++ b/test/runtimes/exclude_nodejs12.4.0.csv @@ -16,6 +16,7 @@ parallel/test-dgram-bind-fd.js,b/132447356, parallel/test-dgram-create-socket-handle-fd.js,b/132447238, parallel/test-dgram-createSocket-type.js,b/68847739, parallel/test-dgram-socket-buffer-size.js,b/68847921, +parallel/test-dns-channel-timeout.js,b/161893056, parallel/test-fs-access.js,, parallel/test-fs-write-stream-double-close.js,, parallel/test-fs-write-stream-throw-type-error.js,b/110226209, -- cgit v1.2.3 From 1bdadbc4f84bf5aacd63d1f486d4c4610dea4110 Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Tue, 4 Aug 2020 09:04:53 -0700 Subject: Fix broken httpd_test. PiperOrigin-RevId: 324822613 --- test/benchmarks/network/httpd_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/benchmarks/network/httpd_test.go b/test/benchmarks/network/httpd_test.go index 21cb0b804..336e04c91 100644 --- a/test/benchmarks/network/httpd_test.go +++ b/test/benchmarks/network/httpd_test.go @@ -68,13 +68,13 @@ func BenchmarkHttpdConcurrency(b *testing.B) { // BenchmarkHttpdDocSize iterates over different sized payloads, testing how // well the runtime handles sending different payload sizes. func BenchmarkHttpdDocSize(b *testing.B) { - benchmarkHttpDocSize(b, false /* reverse */) + benchmarkHttpdDocSize(b, false /* reverse */) } // BenchmarkReverseHttpdDocSize iterates over different sized payloads, testing // how well the runtime handles receiving different payload sizes. func BenchmarkReverseHttpdDocSize(b *testing.B) { - benchmarkHttpDocSize(b, true /* reverse */) + benchmarkHttpdDocSize(b, true /* reverse */) } func benchmarkHttpdDocSize(b *testing.B, reverse bool) { -- cgit v1.2.3 From 7dfcf727a5730b15020f136282a1ede15144a268 Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Tue, 4 Aug 2020 09:49:33 -0700 Subject: [runtime tests] Fix nodejs runtime tests. - Exclude flaky tests. - Bump timeout. - Un-exclude passing tests to increase testing surface. - Create/Update bugs for tests that pass on runc but fail on runsc. PiperOrigin-RevId: 324830840 --- test/runtimes/exclude_nodejs12.4.0.csv | 69 ++++++++++++++++------------------ test/runtimes/proctor/nodejs.go | 2 +- 2 files changed, 34 insertions(+), 37 deletions(-) (limited to 'test') diff --git a/test/runtimes/exclude_nodejs12.4.0.csv b/test/runtimes/exclude_nodejs12.4.0.csv index 3b1d1bc9d..9aaf1e647 100644 --- a/test/runtimes/exclude_nodejs12.4.0.csv +++ b/test/runtimes/exclude_nodejs12.4.0.csv @@ -1,52 +1,49 @@ test name,bug id,comment benchmark/test-benchmark-fs.js,, -benchmark/test-benchmark-module.js,, benchmark/test-benchmark-napi.js,, doctool/test-make-doc.js,b/68848110,Expected to fail. -fixtures/test-error-first-line-offset.js,, -fixtures/test-fs-readfile-error.js,, -fixtures/test-fs-stat-sync-overflow.js,, -internet/test-dgram-broadcast-multi-process.js,, -internet/test-dgram-multicast-multi-process.js,, -internet/test-dgram-multicast-set-interface-lo.js,, +internet/test-dgram-multicast-set-interface-lo.js,b/162798882, internet/test-doctool-versions.js,, internet/test-uv-threadpool-schedule.js,, parallel/test-cluster-dgram-reuse.js,b/64024294, parallel/test-dgram-bind-fd.js,b/132447356, -parallel/test-dgram-create-socket-handle-fd.js,b/132447238, -parallel/test-dgram-createSocket-type.js,b/68847739, parallel/test-dgram-socket-buffer-size.js,b/68847921, parallel/test-dns-channel-timeout.js,b/161893056, parallel/test-fs-access.js,, -parallel/test-fs-write-stream-double-close.js,, +parallel/test-fs-watchfile.js,,Flaky - File already exists error parallel/test-fs-write-stream-throw-type-error.js,b/110226209, -parallel/test-fs-write-stream.js,, -parallel/test-http2-respond-file-error-pipe-offset.js,, -parallel/test-os.js,, +parallel/test-http-writable-true-after-close.js,,Flaky - Mismatched function calls. Expected exactly 1 actual 2 +parallel/test-os.js,b/63997097, parallel/test-process-uid-gid.js,, -pseudo-tty/test-assert-colors.js,, -pseudo-tty/test-assert-no-color.js,, -pseudo-tty/test-assert-position-indicator.js,, -pseudo-tty/test-async-wrap-getasyncid-tty.js,, -pseudo-tty/test-fatal-error.js,, -pseudo-tty/test-handle-wrap-isrefed-tty.js,, -pseudo-tty/test-readable-tty-keepalive.js,, -pseudo-tty/test-set-raw-mode-reset-process-exit.js,, -pseudo-tty/test-set-raw-mode-reset-signal.js,, -pseudo-tty/test-set-raw-mode-reset.js,, -pseudo-tty/test-stderr-stdout-handle-sigwinch.js,, -pseudo-tty/test-stdout-read.js,, -pseudo-tty/test-tty-color-support.js,, -pseudo-tty/test-tty-isatty.js,, -pseudo-tty/test-tty-stdin-call-end.js,, -pseudo-tty/test-tty-stdin-end.js,, -pseudo-tty/test-stdin-write.js,, -pseudo-tty/test-tty-stdout-end.js,, -pseudo-tty/test-tty-stdout-resize.js,, -pseudo-tty/test-tty-stream-constructors.js,, -pseudo-tty/test-tty-window-size.js,, -pseudo-tty/test-tty-wrap.js,, +parallel/test-tls-cli-min-version-1.0.js,,Flaky - EADDRINUSE +parallel/test-tls-cli-min-version-1.3.js,,Flaky - EADDRINUSE +parallel/test-tls-cli-max-version-1.2,,Flaky - EADDRINUSE +parallel/test-tls-min-max-version.js,,Flaky - EADDRINUSE +pseudo-tty/test-assert-colors.js,b/162801321, +pseudo-tty/test-assert-no-color.js,b/162801321, +pseudo-tty/test-assert-position-indicator.js,b/162801321, +pseudo-tty/test-async-wrap-getasyncid-tty.js,b/162801321, +pseudo-tty/test-fatal-error.js,b/162801321, +pseudo-tty/test-handle-wrap-isrefed-tty.js,b/162801321, +pseudo-tty/test-readable-tty-keepalive.js,b/162801321, +pseudo-tty/test-set-raw-mode-reset-process-exit.js,b/162801321, +pseudo-tty/test-set-raw-mode-reset-signal.js,b/162801321, +pseudo-tty/test-set-raw-mode-reset.js,b/162801321, +pseudo-tty/test-stderr-stdout-handle-sigwinch.js,b/162801321, +pseudo-tty/test-stdout-read.js,b/162801321, +pseudo-tty/test-tty-color-support.js,b/162801321, +pseudo-tty/test-tty-isatty.js,b/162801321, +pseudo-tty/test-tty-stdin-call-end.js,b/162801321, +pseudo-tty/test-tty-stdin-end.js,b/162801321, +pseudo-tty/test-stdin-write.js,b/162801321, +pseudo-tty/test-tty-stdout-end.js,b/162801321, +pseudo-tty/test-tty-stdout-resize.js,b/162801321, +pseudo-tty/test-tty-stream-constructors.js,b/162801321, +pseudo-tty/test-tty-window-size.js,b/162801321, +pseudo-tty/test-tty-wrap.js,b/162801321, pummel/test-net-pingpong.js,, -pummel/test-vm-memleak.js,, +pummel/test-vm-memleak.js,b/162799436, +sequential/test-child-process-pass-fd.js,b/63926391,Flaky +sequential/test-https-connect-localport.js,,Flaky - EADDRINUSE sequential/test-net-bytes-per-incoming-chunk-overhead.js,,flaky - timeout tick-processor/test-tick-processor-builtin.js,, diff --git a/test/runtimes/proctor/nodejs.go b/test/runtimes/proctor/nodejs.go index 23d6edc72..dead5af4f 100644 --- a/test/runtimes/proctor/nodejs.go +++ b/test/runtimes/proctor/nodejs.go @@ -41,6 +41,6 @@ func (nodejsRunner) ListTests() ([]string, error) { // TestCmds implements TestRunner.TestCmds. func (nodejsRunner) TestCmds(tests []string) []*exec.Cmd { - args := append([]string{filepath.Join("tools", "test.py")}, tests...) + args := append([]string{filepath.Join("tools", "test.py"), "--timeout=180"}, tests...) return []*exec.Cmd{exec.Command("/usr/bin/python", args...)} } -- cgit v1.2.3 From 21d0334e7f4a98ec49e7f46cacf2d51258eaec33 Mon Sep 17 00:00:00 2001 From: Craig Chi Date: Tue, 4 Aug 2020 12:27:55 -0700 Subject: Add FUSE integration test This commit adds an integration test framework for FUSE support. Please refer to the test example and test/fuse/README.md for further details. Fixes #3098 --- scripts/fuse_tests.sh | 20 +++++ test/fuse/BUILD | 1 + test/fuse/README.md | 102 ++++++++++++++++++++++ test/fuse/linux/BUILD | 21 +++++ test/fuse/linux/fuse_base.cc | 204 +++++++++++++++++++++++++++++++++++++++++++ test/fuse/linux/fuse_base.h | 97 ++++++++++++++++++++ test/runner/defs.bzl | 2 +- 7 files changed, 446 insertions(+), 1 deletion(-) create mode 100755 scripts/fuse_tests.sh create mode 100644 test/fuse/BUILD create mode 100644 test/fuse/README.md create mode 100644 test/fuse/linux/BUILD create mode 100644 test/fuse/linux/fuse_base.cc create mode 100644 test/fuse/linux/fuse_base.h (limited to 'test') diff --git a/scripts/fuse_tests.sh b/scripts/fuse_tests.sh new file mode 100755 index 000000000..bbaaa99fc --- /dev/null +++ b/scripts/fuse_tests.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# Copyright 2020 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +source $(dirname $0)/common.sh + +# Run all vfs2_fuse system call tests. +test --test_tag_filters=fuse //test/fuse/... diff --git a/test/fuse/BUILD b/test/fuse/BUILD new file mode 100644 index 000000000..34b950644 --- /dev/null +++ b/test/fuse/BUILD @@ -0,0 +1 @@ +package(licenses = ["notice"]) diff --git a/test/fuse/README.md b/test/fuse/README.md new file mode 100644 index 000000000..a899f2826 --- /dev/null +++ b/test/fuse/README.md @@ -0,0 +1,102 @@ +# gVisor FUSE Test Suite + +This is an integration test suite for fuse(4) filesystem. It runs under both +gVisor and Linux, and ensures compatibility between the two. This test suite is +based on system calls test. + +This document describes the framework of fuse integration test and the +guidelines that should be followed when adding new fuse tests. + +## Integration Test Framework + +Please refer to the figure below. `>` is entering the function, `<` is leaving +the function, and `=` indicates sequentially entering and leaving. + +``` + | Client (Test Main Process) | Server (FUSE Daemon) + | | + | >TEST_F() | + | >SetUp() | + | =MountFuse() | + | >SetUpFuseServer() | + | [create communication pipes] | + | =fork() | =fork() + | >WaitCompleted() | + | [wait for MarkDone()] | + | | =ConsumeFuseInit() + | | =MarkDone() + | SetExpected() | + | [construct expected reaction] | + | | >FuseLoop() + | | >ReceiveExpected() + | | [wait data from pipe] + | [write data to pipe] | + | [wait for MarkDone()] | + | | [save data to memory] + | | =MarkDone() + | read() + | | [wait for fs operation] + | >[Do fs operation] | + | [wait for fs response] | + | | TearDown() | + | =UnmountFuse() | + | +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "absl/strings/str_format.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +void FuseTest::SetUp() { + MountFuse(); + SetUpFuseServer(); +} + +void FuseTest::TearDown() { UnmountFuse(); } + +// Since CompareRequest is running in background thread, gTest assertions and +// expectations won't directly reflect the test result. However, the FUSE +// background server still connects to the same standard I/O as testing main +// thread. So EXPECT_XX can still be used to show different results. To +// ensure failed testing result is observable, return false and the result +// will be sent to test main thread via pipe. +bool FuseTest::CompareRequest(void* expected_mem, size_t expected_len, + void* real_mem, size_t real_len) { + if (expected_len != real_len) return false; + return memcmp(expected_mem, real_mem, expected_len) == 0; +} + +// SetExpected is called by the testing main thread to set expected request- +// response pair of a single FUSE operation. +void FuseTest::SetExpected(struct iovec* iov_in, int iov_in_cnt, + struct iovec* iov_out, int iov_out_cnt) { + EXPECT_THAT(RetryEINTR(writev)(set_expected_[1], iov_in, iov_in_cnt), + SyscallSucceedsWithValue(::testing::Gt(0))); + WaitCompleted(); + + EXPECT_THAT(RetryEINTR(writev)(set_expected_[1], iov_out, iov_out_cnt), + SyscallSucceedsWithValue(::testing::Gt(0))); + WaitCompleted(); +} + +// WaitCompleted waits for the FUSE server to finish its job and check if it +// completes without errors. +void FuseTest::WaitCompleted() { + char success; + EXPECT_THAT(RetryEINTR(read)(done_[0], &success, sizeof(success)), + SyscallSucceedsWithValue(1)); +} + +void FuseTest::MountFuse() { + EXPECT_THAT(dev_fd_ = open("/dev/fuse", O_RDWR), SyscallSucceeds()); + + std::string mount_opts = absl::StrFormat("fd=%d,%s", dev_fd_, kMountOpts); + EXPECT_THAT(mount("fuse", kMountPoint, "fuse", MS_NODEV | MS_NOSUID, + mount_opts.c_str()), + SyscallSucceedsWithValue(0)); +} + +void FuseTest::UnmountFuse() { + EXPECT_THAT(umount(kMountPoint), SyscallSucceeds()); + // TODO(gvisor.dev/issue/3330): ensure the process is terminated successfully. +} + +// ConsumeFuseInit consumes the first FUSE request and returns the +// corresponding PosixError. +PosixError FuseTest::ConsumeFuseInit() { + RETURN_ERROR_IF_SYSCALL_FAIL( + RetryEINTR(read)(dev_fd_, buf_.data(), buf_.size())); + + struct iovec iov_out[2]; + struct fuse_out_header out_header = { + .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_init_out), + .error = 0, + .unique = 2, + }; + // Returns an empty init out payload since this is just a test. + struct fuse_init_out out_payload; + iov_out[0].iov_len = sizeof(out_header); + iov_out[0].iov_base = &out_header; + iov_out[1].iov_len = sizeof(out_payload); + iov_out[1].iov_base = &out_payload; + + RETURN_ERROR_IF_SYSCALL_FAIL(RetryEINTR(writev)(dev_fd_, iov_out, 2)); + return NoError(); +} + +// ReceiveExpected reads 1 pair of expected fuse request-response `iovec`s +// from pipe and save them into member variables of this testing instance. +void FuseTest::ReceiveExpected() { + // Set expected fuse_in request. + EXPECT_THAT(len_in_ = RetryEINTR(read)(set_expected_[0], mem_in_.data(), + mem_in_.size()), + SyscallSucceedsWithValue(::testing::Gt(0))); + MarkDone(len_in_ > 0); + + // Set expected fuse_out response. + EXPECT_THAT(len_out_ = RetryEINTR(read)(set_expected_[0], mem_out_.data(), + mem_out_.size()), + SyscallSucceedsWithValue(::testing::Gt(0))); + MarkDone(len_out_ > 0); +} + +// MarkDone writes 1 byte of success indicator through pipe. +void FuseTest::MarkDone(bool success) { + char data = success ? 1 : 0; + EXPECT_THAT(RetryEINTR(write)(done_[1], &data, sizeof(data)), + SyscallSucceedsWithValue(1)); +} + +// FuseLoop is the implementation of the fake FUSE server. Read from /dev/fuse, +// compare the request by CompareRequest (use derived function if specified), +// and write the expected response to /dev/fuse. +void FuseTest::FuseLoop() { + bool success = true; + ssize_t len = 0; + while (true) { + ReceiveExpected(); + + EXPECT_THAT(len = RetryEINTR(read)(dev_fd_, buf_.data(), buf_.size()), + SyscallSucceedsWithValue(len_in_)); + if (len != len_in_) success = false; + + if (!CompareRequest(buf_.data(), len_in_, mem_in_.data(), len_in_)) { + std::cerr << "the FUSE request is not expected" << std::endl; + success = false; + } + + EXPECT_THAT(len = RetryEINTR(write)(dev_fd_, mem_out_.data(), len_out_), + SyscallSucceedsWithValue(len_out_)); + if (len != len_out_) success = false; + MarkDone(success); + } +} + +// SetUpFuseServer creates 2 pipes. First is for testing client to send the +// expected request-response pair, and the other acts as a checkpoint for the +// FUSE server to notify the client that it can proceed. +void FuseTest::SetUpFuseServer() { + ASSERT_THAT(pipe(set_expected_), SyscallSucceedsWithValue(0)); + ASSERT_THAT(pipe(done_), SyscallSucceedsWithValue(0)); + + switch (fork()) { + case -1: + GTEST_FAIL(); + return; + case 0: + break; + default: + ASSERT_THAT(close(set_expected_[0]), SyscallSucceedsWithValue(0)); + ASSERT_THAT(close(done_[1]), SyscallSucceedsWithValue(0)); + WaitCompleted(); + return; + } + + ASSERT_THAT(close(set_expected_[1]), SyscallSucceedsWithValue(0)); + ASSERT_THAT(close(done_[0]), SyscallSucceedsWithValue(0)); + + MarkDone(ConsumeFuseInit().ok()); + + FuseLoop(); + _exit(0); +} + +// GetPayloadSize is a helper function to get the number of bytes of a +// specific FUSE operation struct. +size_t FuseTest::GetPayloadSize(uint32_t opcode, bool in) { + switch (opcode) { + case FUSE_INIT: + return in ? sizeof(struct fuse_init_in) : sizeof(struct fuse_init_out); + default: + break; + } + return 0; +} + +} // namespace testing +} // namespace gvisor diff --git a/test/fuse/linux/fuse_base.h b/test/fuse/linux/fuse_base.h new file mode 100644 index 000000000..b008778de --- /dev/null +++ b/test/fuse/linux/fuse_base.h @@ -0,0 +1,97 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_FUSE_FUSE_BASE_H_ +#define GVISOR_TEST_FUSE_FUSE_BASE_H_ + +#include +#include + +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "test/util/posix_error.h" + +namespace gvisor { +namespace testing { + +constexpr char kMountPoint[] = "/mnt"; +constexpr char kMountOpts[] = "rootmode=755,user_id=0,group_id=0"; + +class FuseTest : public ::testing::Test { + public: + FuseTest() { + buf_.resize(FUSE_MIN_READ_BUFFER); + mem_in_.resize(FUSE_MIN_READ_BUFFER); + mem_out_.resize(FUSE_MIN_READ_BUFFER); + } + void SetUp() override; + void TearDown() override; + + // CompareRequest is used by the FUSE server and should be implemented to + // compare different FUSE operations. It compares the actual FUSE input + // request with the expected one set by `SetExpected()`. + virtual bool CompareRequest(void* expected_mem, size_t expected_len, + void* real_mem, size_t real_len); + + // SetExpected is called by the testing main thread. Writes a request- + // response pair into FUSE server's member variables via pipe. + void SetExpected(struct iovec* iov_in, int iov_in_cnt, struct iovec* iov_out, + int iov_out_cnt); + + // WaitCompleted waits for FUSE server to complete its processing. It + // complains if the FUSE server responds failure during tests. + void WaitCompleted(); + + private: + void MountFuse(); + void UnmountFuse(); + + // ConsumeFuseInit is only used during FUSE server setup. + PosixError ConsumeFuseInit(); + + // ReceiveExpected is the FUSE server side's corresponding code of + // `SetExpected()`. Save the request-response pair into its memory. + void ReceiveExpected(); + + // MarkDone is used by the FUSE server to tell testing main if it's OK to + // proceed next command. + void MarkDone(bool success); + + // FuseLoop is where the FUSE server stay until it is terminated. + void FuseLoop(); + + // SetUpFuseServer creates 2 pipes for communication and forks FUSE server. + void SetUpFuseServer(); + + // GetPayloadSize is a helper function to get the number of bytes of a + // specific FUSE operation struct. + size_t GetPayloadSize(uint32_t opcode, bool in); + + int dev_fd_; + int set_expected_[2]; + int done_[2]; + + std::vector buf_; + std::vector mem_in_; + std::vector mem_out_; + ssize_t len_in_; + ssize_t len_out_; +}; + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_FUSE_FUSE_BASE_H_ diff --git a/test/runner/defs.bzl b/test/runner/defs.bzl index c92392b35..ba4732ca6 100644 --- a/test/runner/defs.bzl +++ b/test/runner/defs.bzl @@ -201,7 +201,7 @@ def syscall_test( platform = default_platform, use_tmpfs = use_tmpfs, add_uds_tree = add_uds_tree, - tags = platforms[default_platform] + vfs2_tags, + tags = platforms[default_platform] + vfs2_tags + ["fuse"], vfs2 = True, fuse = True, ) -- cgit v1.2.3 From 9094adfc2792945b344e393de977b3e8627ace72 Mon Sep 17 00:00:00 2001 From: Craig Chi Date: Tue, 4 Aug 2020 14:31:15 -0700 Subject: Fix FUSE integration test failed with ECONNREFUSED The newer version of FUSE_INIT checks the response from the FUSE server if its major number is equal to 7. If it's not, then FUSE_INIT fails and further filesystem operations will get ECONNREFUSED. To mitigate this issue, we can send back a response with major version equals to 7 when consuming the first FUSE_INIT request. Fixes #3500 --- test/fuse/linux/fuse_base.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/test/fuse/linux/fuse_base.cc b/test/fuse/linux/fuse_base.cc index 6c8432fd0..32221e7b6 100644 --- a/test/fuse/linux/fuse_base.cc +++ b/test/fuse/linux/fuse_base.cc @@ -25,9 +25,9 @@ #include +#include "absl/strings/str_format.h" #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "absl/strings/str_format.h" #include "test/util/posix_error.h" #include "test/util/test_util.h" @@ -100,8 +100,11 @@ PosixError FuseTest::ConsumeFuseInit() { .error = 0, .unique = 2, }; - // Returns an empty init out payload since this is just a test. - struct fuse_init_out out_payload; + // Returns a fake fuse_init_out with 7.0 version to avoid ECONNREFUSED + // error in the initialization of FUSE connection. + struct fuse_init_out out_payload = { + .major = 7, + }; iov_out[0].iov_len = sizeof(out_header); iov_out[0].iov_base = &out_header; iov_out[1].iov_len = sizeof(out_payload); -- cgit v1.2.3 From be7079578e0907fd3f35f91e9716246c179e17e9 Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Tue, 4 Aug 2020 16:48:40 -0700 Subject: Port sysbench benchmark. PiperOrigin-RevId: 324918229 --- images/benchmarks/sysbench/Dockerfile | 7 + test/benchmarks/base/BUILD | 26 ++++ test/benchmarks/base/base.go | 31 +++++ test/benchmarks/base/sysbench_test.go | 89 ++++++++++++ test/benchmarks/database/BUILD | 4 +- test/benchmarks/tools/BUILD | 2 + test/benchmarks/tools/redis.go | 1 - test/benchmarks/tools/sysbench.go | 245 +++++++++++++++++++++++++++++++++ test/benchmarks/tools/sysbench_test.go | 169 +++++++++++++++++++++++ 9 files changed, 570 insertions(+), 4 deletions(-) create mode 100644 images/benchmarks/sysbench/Dockerfile create mode 100644 test/benchmarks/base/BUILD create mode 100644 test/benchmarks/base/base.go create mode 100644 test/benchmarks/base/sysbench_test.go create mode 100644 test/benchmarks/tools/sysbench.go create mode 100644 test/benchmarks/tools/sysbench_test.go (limited to 'test') diff --git a/images/benchmarks/sysbench/Dockerfile b/images/benchmarks/sysbench/Dockerfile new file mode 100644 index 000000000..55e865f43 --- /dev/null +++ b/images/benchmarks/sysbench/Dockerfile @@ -0,0 +1,7 @@ +FROM ubuntu:18.04 + +RUN set -x \ + && apt-get update \ + && apt-get install -y \ + sysbench \ + && rm -rf /var/lib/apt/lists/* diff --git a/test/benchmarks/base/BUILD b/test/benchmarks/base/BUILD new file mode 100644 index 000000000..3cb07797d --- /dev/null +++ b/test/benchmarks/base/BUILD @@ -0,0 +1,26 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "base", + testonly = 1, + srcs = ["base.go"], + deps = ["//test/benchmarks/harness"], +) + +go_test( + name = "base_test", + size = "small", + srcs = ["sysbench_test.go"], + library = ":base", + tags = [ + # Requires docker and runsc to be configured before test runs. + "manual", + "local", + ], + deps = [ + "//pkg/test/dockerutil", + "//test/benchmarks/tools", + ], +) diff --git a/test/benchmarks/base/base.go b/test/benchmarks/base/base.go new file mode 100644 index 000000000..7eb44d0ab --- /dev/null +++ b/test/benchmarks/base/base.go @@ -0,0 +1,31 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package base holds base performance benchmarks. +package base + +import ( + "os" + "testing" + + "gvisor.dev/gvisor/test/benchmarks/harness" +) + +var h harness.Harness + +// TestMain is the main method for package network. +func TestMain(m *testing.M) { + h.Init() + os.Exit(m.Run()) +} diff --git a/test/benchmarks/base/sysbench_test.go b/test/benchmarks/base/sysbench_test.go new file mode 100644 index 000000000..7df73e38b --- /dev/null +++ b/test/benchmarks/base/sysbench_test.go @@ -0,0 +1,89 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package base + +import ( + "context" + "testing" + + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/test/benchmarks/tools" +) + +type testCase struct { + name string + test tools.Sysbench +} + +// BenchmarSysbench runs sysbench on the runtime. +func BenchmarkSysbench(b *testing.B) { + + testCases := []testCase{ + testCase{ + name: "CPU", + test: &tools.SysbenchCPU{ + Base: tools.SysbenchBase{ + Threads: 1, + Time: 5, + }, + MaxPrime: 50000, + }, + }, + testCase{ + name: "Memory", + test: &tools.SysbenchMemory{ + Base: tools.SysbenchBase{ + Threads: 1, + }, + BlockSize: "1M", + TotalSize: "500G", + }, + }, + testCase{ + name: "Mutex", + test: &tools.SysbenchMutex{ + Base: tools.SysbenchBase{ + Threads: 8, + }, + Loops: 1, + Locks: 10000000, + Num: 4, + }, + }, + } + + machine, err := h.GetMachine() + if err != nil { + b.Fatalf("failed to get machine: %v", err) + } + defer machine.CleanUp() + + for _, tc := range testCases { + b.Run(tc.name, func(b *testing.B) { + + ctx := context.Background() + sysbench := machine.GetContainer(ctx, b) + defer sysbench.CleanUp(ctx) + + out, err := sysbench.Run(ctx, dockerutil.RunOpts{ + Image: "benchmarks/sysbench", + }, tc.test.MakeCmd()...) + if err != nil { + b.Fatalf("failed to run sysbench: %v: logs:%s", err, out) + } + tc.test.Report(b, out) + }) + } +} diff --git a/test/benchmarks/database/BUILD b/test/benchmarks/database/BUILD index 572db665f..6139f6e8a 100644 --- a/test/benchmarks/database/BUILD +++ b/test/benchmarks/database/BUILD @@ -12,9 +12,7 @@ go_library( go_test( name = "database_test", size = "enormous", - srcs = [ - "redis_test.go", - ], + srcs = ["redis_test.go"], library = ":database", tags = [ # Requires docker and runsc to be configured before test runs. diff --git a/test/benchmarks/tools/BUILD b/test/benchmarks/tools/BUILD index 4358551bc..a6bd949e6 100644 --- a/test/benchmarks/tools/BUILD +++ b/test/benchmarks/tools/BUILD @@ -10,6 +10,7 @@ go_library( "hey.go", "iperf.go", "redis.go", + "sysbench.go", "tools.go", ], visibility = ["//:sandbox"], @@ -24,6 +25,7 @@ go_test( "hey_test.go", "iperf_test.go", "redis_test.go", + "sysbench_test.go", ], library = ":tools", ) diff --git a/test/benchmarks/tools/redis.go b/test/benchmarks/tools/redis.go index db32460ec..c899ae0d4 100644 --- a/test/benchmarks/tools/redis.go +++ b/test/benchmarks/tools/redis.go @@ -56,7 +56,6 @@ func (r *Redis) Report(b *testing.B, output string) { func (r *Redis) parseOperation(data string) (float64, error) { re := regexp.MustCompile(fmt.Sprintf(`"%s( .*)?","(\d*\.\d*)"`, r.Operation)) match := re.FindStringSubmatch(data) - // If no match, simply don't add it to the result map. if len(match) < 3 { return 0.0, fmt.Errorf("could not find %s in %s", r.Operation, data) } diff --git a/test/benchmarks/tools/sysbench.go b/test/benchmarks/tools/sysbench.go new file mode 100644 index 000000000..6b2f75ca2 --- /dev/null +++ b/test/benchmarks/tools/sysbench.go @@ -0,0 +1,245 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tools + +import ( + "fmt" + "regexp" + "strconv" + "strings" + "testing" +) + +var warmup = "sysbench --threads=8 --memory-total-size=5G memory run > /dev/null &&" + +// Sysbench represents a 'sysbench' command. +type Sysbench interface { + MakeCmd() []string // Makes a sysbench command. + flags() []string + Report(*testing.B, string) // Reports results contained in string. +} + +// SysbenchBase is the top level struct for sysbench and holds top-level arguments +// for sysbench. See: 'sysbench --help' +type SysbenchBase struct { + Threads int // number of Threads for the test. + Time int // time limit for test in seconds. +} + +// baseFlags returns top level flags. +func (s *SysbenchBase) baseFlags() []string { + var ret []string + if s.Threads > 0 { + ret = append(ret, fmt.Sprintf("--threads=%d", s.Threads)) + } + if s.Time > 0 { + ret = append(ret, fmt.Sprintf("--time=%d", s.Time)) + } + return ret +} + +// SysbenchCPU is for 'sysbench [flags] cpu run' and holds CPU specific arguments. +type SysbenchCPU struct { + Base SysbenchBase + MaxPrime int // upper limit for primes generator [10000]. +} + +// MakeCmd makes commands for SysbenchCPU. +func (s *SysbenchCPU) MakeCmd() []string { + cmd := []string{warmup, "sysbench"} + cmd = append(cmd, s.flags()...) + cmd = append(cmd, "cpu run") + return []string{"sh", "-c", strings.Join(cmd, " ")} +} + +// flags makes flags for SysbenchCPU cmds. +func (s *SysbenchCPU) flags() []string { + cmd := s.Base.baseFlags() + if s.MaxPrime > 0 { + return append(cmd, fmt.Sprintf("--cpu-max-prime=%d", s.MaxPrime)) + } + return cmd +} + +// Report reports the relevant metrics for SysbenchCPU. +func (s *SysbenchCPU) Report(b *testing.B, output string) { + b.Helper() + result, err := s.parseEvents(output) + if err != nil { + b.Fatalf("parsing CPU events from %s failed: %v", output, err) + } + b.ReportMetric(result, "cpu_events_per_second") +} + +var cpuEventsPerSecondRE = regexp.MustCompile(`events per second:\s*(\d*.?\d*)\n`) + +// parseEvents parses cpu events per second. +func (s *SysbenchCPU) parseEvents(data string) (float64, error) { + match := cpuEventsPerSecondRE.FindStringSubmatch(data) + if len(match) < 2 { + return 0.0, fmt.Errorf("could not find events per second: %s", data) + } + return strconv.ParseFloat(match[1], 64) +} + +// SysbenchMemory is for 'sysbench [FLAGS] memory run' and holds Memory specific arguments. +type SysbenchMemory struct { + Base SysbenchBase + BlockSize string // size of test memory block [1K]. + TotalSize string // size of data to transfer [100G]. + Scope string // memory access scope {global, local} [global]. + HugeTLB bool // allocate memory from HugeTLB [off]. + OperationType string // type of memory ops {read, write, none} [write]. + AccessMode string // access mode {seq, rnd} [seq]. +} + +// MakeCmd makes commands for SysbenchMemory. +func (s *SysbenchMemory) MakeCmd() []string { + cmd := []string{warmup, "sysbench"} + cmd = append(cmd, s.flags()...) + cmd = append(cmd, "memory run") + return []string{"sh", "-c", strings.Join(cmd, " ")} +} + +// flags makes flags for SysbenchMemory cmds. +func (s *SysbenchMemory) flags() []string { + cmd := s.Base.baseFlags() + if s.BlockSize != "" { + cmd = append(cmd, fmt.Sprintf("--memory-block-size=%s", s.BlockSize)) + } + if s.TotalSize != "" { + cmd = append(cmd, fmt.Sprintf("--memory-total-size=%s", s.TotalSize)) + } + if s.Scope != "" { + cmd = append(cmd, fmt.Sprintf("--memory-scope=%s", s.Scope)) + } + if s.HugeTLB { + cmd = append(cmd, "--memory-hugetlb=on") + } + if s.OperationType != "" { + cmd = append(cmd, fmt.Sprintf("--memory-oper=%s", s.OperationType)) + } + if s.AccessMode != "" { + cmd = append(cmd, fmt.Sprintf("--memory-access-mode=%s", s.AccessMode)) + } + return cmd +} + +// Report reports the relevant metrics for SysbenchMemory. +func (s *SysbenchMemory) Report(b *testing.B, output string) { + b.Helper() + result, err := s.parseOperations(output) + if err != nil { + b.Fatalf("parsing result %s failed with err: %v", output, err) + } + b.ReportMetric(result, "operations_per_second") +} + +var memoryOperationsRE = regexp.MustCompile(`Total\soperations:\s+\d*\s*\((\d*\.\d*)\sper\ssecond\)`) + +// parseOperations parses memory operations per second form sysbench memory ouput. +func (s *SysbenchMemory) parseOperations(data string) (float64, error) { + match := memoryOperationsRE.FindStringSubmatch(data) + if len(match) < 2 { + return 0.0, fmt.Errorf("couldn't find memory operations per second: %s", data) + } + return strconv.ParseFloat(match[1], 64) +} + +// SysbenchMutex is for 'sysbench [FLAGS] mutex run' and holds Mutex specific arguments. +type SysbenchMutex struct { + Base SysbenchBase + Num int // total size of mutex array [4096]. + Locks int // number of mutex locks per thread [50K]. + Loops int // number of loops to do outside mutex lock [10K]. +} + +// MakeCmd makes commands for SysbenchMutex. +func (s *SysbenchMutex) MakeCmd() []string { + cmd := []string{warmup, "sysbench"} + cmd = append(cmd, s.flags()...) + cmd = append(cmd, "mutex run") + return []string{"sh", "-c", strings.Join(cmd, " ")} +} + +// flags makes flags for SysbenchMutex commands. +func (s *SysbenchMutex) flags() []string { + var cmd []string + cmd = append(cmd, s.Base.baseFlags()...) + if s.Num > 0 { + cmd = append(cmd, fmt.Sprintf("--mutex-num=%d", s.Num)) + } + if s.Locks > 0 { + cmd = append(cmd, fmt.Sprintf("--mutex-locks=%d", s.Locks)) + } + if s.Loops > 0 { + cmd = append(cmd, fmt.Sprintf("--mutex-loops=%d", s.Loops)) + } + return cmd +} + +// Report parses and reports relevant sysbench mutex metrics. +func (s *SysbenchMutex) Report(b *testing.B, output string) { + b.Helper() + + result, err := s.parseExecutionTime(output) + if err != nil { + b.Fatalf("parsing result %s failed with err: %v", output, err) + } + b.ReportMetric(result, "average_execution_time_secs") + + result, err = s.parseDeviation(output) + if err != nil { + b.Fatalf("parsing result %s failed with err: %v", output, err) + } + b.ReportMetric(result, "stdev_execution_time_secs") + + result, err = s.parseLatency(output) + if err != nil { + b.Fatalf("parsing result %s failed with err: %v", output, err) + } + b.ReportMetric(result/1000, "average_latency_secs") +} + +var executionTimeRE = regexp.MustCompile(`execution time \(avg/stddev\):\s*(\d*.?\d*)/(\d*.?\d*)`) + +// parseExecutionTime parses threads fairness average execution time from sysbench output. +func (s *SysbenchMutex) parseExecutionTime(data string) (float64, error) { + match := executionTimeRE.FindStringSubmatch(data) + if len(match) < 2 { + return 0.0, fmt.Errorf("could not find execution time average: %s", data) + } + return strconv.ParseFloat(match[1], 64) +} + +// parseDeviation parses threads fairness stddev time from sysbench output. +func (s *SysbenchMutex) parseDeviation(data string) (float64, error) { + match := executionTimeRE.FindStringSubmatch(data) + if len(match) < 3 { + return 0.0, fmt.Errorf("could not find execution time deviation: %s", data) + } + return strconv.ParseFloat(match[2], 64) +} + +var averageLatencyRE = regexp.MustCompile(`avg:[^\n^\d]*(\d*\.?\d*)`) + +// parseLatency parses latency from sysbench output. +func (s *SysbenchMutex) parseLatency(data string) (float64, error) { + match := averageLatencyRE.FindStringSubmatch(data) + if len(match) < 2 { + return 0.0, fmt.Errorf("could not find average latency: %s", data) + } + return strconv.ParseFloat(match[1], 64) +} diff --git a/test/benchmarks/tools/sysbench_test.go b/test/benchmarks/tools/sysbench_test.go new file mode 100644 index 000000000..850d1939e --- /dev/null +++ b/test/benchmarks/tools/sysbench_test.go @@ -0,0 +1,169 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tools + +import ( + "testing" +) + +// TestSysbenchCpu tests parses on sample 'sysbench cpu' output. +func TestSysbenchCpu(t *testing.T) { + sampleData := ` +sysbench 1.0.11 (using system LuaJIT 2.1.0-beta3) + +Running the test with following options: +Number of threads: 8 +Initializing random number generator from current time + + +Prime numbers limit: 10000 + +Initializing worker threads... + +Threads started! + +CPU speed: + events per second: 9093.38 + +General statistics: + total time: 10.0007s + total number of events: 90949 + +Latency (ms): + min: 0.64 + avg: 0.88 + max: 24.65 + 95th percentile: 1.55 + sum: 79936.91 + +Threads fairness: + events (avg/stddev): 11368.6250/831.38 + execution time (avg/stddev): 9.9921/0.01 +` + sysbench := SysbenchCPU{} + want := 9093.38 + if got, err := sysbench.parseEvents(sampleData); err != nil { + t.Fatalf("parse cpu events failed: %v", err) + } else if want != got { + t.Fatalf("got: %f want: %f", got, want) + } +} + +// TestSysbenchMemory tests parsers on sample 'sysbench memory' output. +func TestSysbenchMemory(t *testing.T) { + sampleData := ` +sysbench 1.0.11 (using system LuaJIT 2.1.0-beta3) + +Running the test with following options: +Number of threads: 8 +Initializing random number generator from current time + + +Running memory speed test with the following options: + block size: 1KiB + total size: 102400MiB + operation: write + scope: global + +Initializing worker threads... + +Threads started! + +Total operations: 47999046 (9597428.64 per second) + +46874.07 MiB transferred (9372.49 MiB/sec) + + +General statistics: + total time: 5.0001s + total number of events: 47999046 + +Latency (ms): + min: 0.00 + avg: 0.00 + max: 0.21 + 95th percentile: 0.00 + sum: 33165.91 + +Threads fairness: + events (avg/stddev): 5999880.7500/111242.52 + execution time (avg/stddev): 4.1457/0.09 +` + sysbench := SysbenchMemory{} + want := 9597428.64 + if got, err := sysbench.parseOperations(sampleData); err != nil { + t.Fatalf("parse memory ops failed: %v", err) + } else if want != got { + t.Fatalf("got: %f want: %f", got, want) + } +} + +// TestSysbenchMutex tests parsers on sample 'sysbench mutex' output. +func TestSysbenchMutex(t *testing.T) { + sampleData := ` +sysbench 1.0.11 (using system LuaJIT 2.1.0-beta3) + +The 'mutex' test requires a command argument. See 'sysbench mutex help' +root@ec078132e294:/# sysbench mutex --threads=8 run +sysbench 1.0.11 (using system LuaJIT 2.1.0-beta3) + +Running the test with following options: +Number of threads: 8 +Initializing random number generator from current time + + +Initializing worker threads... + +Threads started! + + +General statistics: + total time: 0.2320s + total number of events: 8 + +Latency (ms): + min: 152.35 + avg: 192.48 + max: 231.41 + 95th percentile: 231.53 + sum: 1539.83 + +Threads fairness: + events (avg/stddev): 1.0000/0.00 + execution time (avg/stddev): 0.1925/0.04 +` + + sysbench := SysbenchMutex{} + want := .1925 + if got, err := sysbench.parseExecutionTime(sampleData); err != nil { + t.Fatalf("parse mutex time failed: %v", err) + } else if want != got { + t.Fatalf("got: %f want: %f", got, want) + } + + want = 0.04 + if got, err := sysbench.parseDeviation(sampleData); err != nil { + t.Fatalf("parse mutex deviation failed: %v", err) + } else if want != got { + t.Fatalf("got: %f want: %f", got, want) + } + + want = 192.48 + if got, err := sysbench.parseLatency(sampleData); err != nil { + t.Fatalf("parse mutex time failed: %v", err) + } else if want != got { + t.Fatalf("got: %f want: %f", got, want) + } +} -- cgit v1.2.3 From 338f96b36c778748ff27f5ae73cc73b222c5a90e Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Tue, 4 Aug 2020 17:18:48 -0700 Subject: [runtime tests] Exclude flaky tests. PiperOrigin-RevId: 324923599 --- test/runtimes/exclude_nodejs12.4.0.csv | 5 ++++- test/runtimes/exclude_php7.3.6.csv | 2 ++ test/runtimes/exclude_python3.7.3.csv | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/test/runtimes/exclude_nodejs12.4.0.csv b/test/runtimes/exclude_nodejs12.4.0.csv index 9aaf1e647..96f9002f3 100644 --- a/test/runtimes/exclude_nodejs12.4.0.csv +++ b/test/runtimes/exclude_nodejs12.4.0.csv @@ -16,8 +16,11 @@ parallel/test-http-writable-true-after-close.js,,Flaky - Mismatched parallel/test-os.js,b/63997097, parallel/test-process-uid-gid.js,, parallel/test-tls-cli-min-version-1.0.js,,Flaky - EADDRINUSE +parallel/test-tls-cli-min-version-1.1.js,,Flaky - EADDRINUSE +parallel/test-tls-cli-min-version-1.2.js,,Flaky - EADDRINUSE parallel/test-tls-cli-min-version-1.3.js,,Flaky - EADDRINUSE -parallel/test-tls-cli-max-version-1.2,,Flaky - EADDRINUSE +parallel/test-tls-cli-max-version-1.2.js,,Flaky - EADDRINUSE +parallel/test-tls-cli-max-version-1.3.js,,Flaky - EADDRINUSE parallel/test-tls-min-max-version.js,,Flaky - EADDRINUSE pseudo-tty/test-assert-colors.js,b/162801321, pseudo-tty/test-assert-no-color.js,b/162801321, diff --git a/test/runtimes/exclude_php7.3.6.csv b/test/runtimes/exclude_php7.3.6.csv index 0bef786c0..efcabe752 100644 --- a/test/runtimes/exclude_php7.3.6.csv +++ b/test/runtimes/exclude_php7.3.6.csv @@ -8,6 +8,7 @@ ext/mbstring/tests/bug77165.phpt,, ext/mbstring/tests/bug77454.phpt,, ext/mbstring/tests/mb_convert_encoding_leak.phpt,, ext/mbstring/tests/mb_strrpos_encoding_3rd_param.phpt,, +ext/session/tests/session_module_name_variation4.phpt,,Flaky ext/session/tests/session_set_save_handler_class_018.phpt,, ext/session/tests/session_set_save_handler_iface_003.phpt,, ext/session/tests/session_set_save_handler_variation4.phpt,, @@ -25,6 +26,7 @@ ext/standard/tests/file/symlink_link_linkinfo_is_link_variation8.phpt,, ext/standard/tests/general_functions/escapeshellarg_bug71270.phpt,, ext/standard/tests/general_functions/escapeshellcmd_bug71270.phpt,, ext/standard/tests/network/bug20134.phpt,, +ext/standard/tests/streams/proc_open_bug69900.phpt,,Flaky ext/standard/tests/streams/stream_socket_sendto.phpt,, ext/standard/tests/strings/007.phpt,, sapi/cli/tests/upload_2G.phpt,, diff --git a/test/runtimes/exclude_python3.7.3.csv b/test/runtimes/exclude_python3.7.3.csv index 2b9947212..3580b25c3 100644 --- a/test/runtimes/exclude_python3.7.3.csv +++ b/test/runtimes/exclude_python3.7.3.csv @@ -20,6 +20,7 @@ test_readline,b/76157709,out of pty devices test_resource,b/76174079, test_selectors,b/76116849,OSError not raised with epoll test_smtplib,b/76031995,SO_REUSEADDR and unclosed sockets +test_signal,,Flaky - signal: alarm clock test_socket,b/75983380, test_ssl,b/76031995,SO_REUSEADDR test_subprocess,, -- cgit v1.2.3 From c5f5806fe69e8d4be99341318f20ce0acfd7be2a Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Tue, 4 Aug 2020 20:49:00 -0700 Subject: Enable "make packetimpact-tests" to work. This required minor fixes to the bazel wrapper. The "dut_platform" is also changed to "native" to line-up with the system call tests and remove the hard-coded "linux" and "netstack" strings. --- Makefile | 4 ++-- test/packetimpact/runner/defs.bzl | 22 +++++++++++----------- test/packetimpact/runner/packetimpact_test.go | 18 ++++++------------ test/packetimpact/testbench/testbench.go | 6 +++--- test/packetimpact/tests/tcp_reordering_test.go | 6 +++--- tools/bazel.mk | 13 ++++++++----- 6 files changed, 33 insertions(+), 36 deletions(-) (limited to 'test') diff --git a/Makefile b/Makefile index 5a9367de8..dfd1b4abc 100644 --- a/Makefile +++ b/Makefile @@ -185,7 +185,6 @@ swgso-tests: load-basic-images @$(call submake,install-test-runtime RUNTIME="swgso" ARGS="--software-gso=true --gso=false") @$(call submake,test-runtime RUNTIME="swgso" TARGETS="$(INTEGRATION_TARGETS)") .PHONY: swgso-tests - hostnet-tests: load-basic-images @$(call submake,install-test-runtime RUNTIME="hostnet" ARGS="--network=host") @$(call submake,test-runtime RUNTIME="hostnet" OPTIONS="--test_arg=-checkpoint=false" TARGETS="$(INTEGRATION_TARGETS)") @@ -211,8 +210,9 @@ packetdrill-tests: load-packetdrill .PHONY: packetdrill-tests packetimpact-tests: load-packetimpact + @sudo modprobe iptable_filter ip6table_filter @$(call submake,install-test-runtime RUNTIME="packetimpact") - @$(call submake,test-runtime RUNTIME="packetimpact" TARGETS="$(shell $(MAKE) query TARGETS='attr(tags, packetimpact, tests(//...))')") + @$(call submake,test-runtime OPTIONS="--jobs=HOST_CPUS*3 --local_test_jobs=HOST_CPUS*3" RUNTIME="packetimpact" TARGETS="$(shell $(MAKE) query TARGETS='attr(tags, packetimpact, tests(//...))')") .PHONY: packetimpact-tests root-tests: load-basic-images diff --git a/test/packetimpact/runner/defs.bzl b/test/packetimpact/runner/defs.bzl index 79b3c9162..93a36c6c2 100644 --- a/test/packetimpact/runner/defs.bzl +++ b/test/packetimpact/runner/defs.bzl @@ -61,12 +61,12 @@ PACKETIMPACT_TAGS = [ "packetimpact", ] -def packetimpact_linux_test( +def packetimpact_native_test( name, testbench_binary, expect_failure = False, **kwargs): - """Add a packetimpact test on linux. + """Add a native packetimpact test. Args: name: name of the test @@ -76,9 +76,9 @@ def packetimpact_linux_test( """ expect_failure_flag = ["--expect_failure"] if expect_failure else [] _packetimpact_test( - name = name + "_linux_test", + name = name + "_native_test", testbench_binary = testbench_binary, - flags = ["--dut_platform", "linux"] + expect_failure_flag, + flags = ["--native"] + expect_failure_flag, tags = PACKETIMPACT_TAGS, **kwargs ) @@ -102,21 +102,21 @@ def packetimpact_netstack_test( _packetimpact_test( name = name + "_netstack_test", testbench_binary = testbench_binary, - # This is the default runtime unless - # "--test_arg=--runtime=OTHER_RUNTIME" is used to override the value. - flags = ["--dut_platform", "netstack", "--runtime=runsc-d"] + expect_failure_flag, + # Note that a distinct runtime must be provided in the form + # --test_arg=--runtime=other when invoking bazel. + flags = expect_failure_flag, tags = PACKETIMPACT_TAGS, **kwargs ) -def packetimpact_go_test(name, size = "small", pure = True, expect_linux_failure = False, expect_netstack_failure = False, **kwargs): +def packetimpact_go_test(name, size = "small", pure = True, expect_native_failure = False, expect_netstack_failure = False, **kwargs): """Add packetimpact tests written in go. Args: name: name of the test size: size of the test pure: make a static go binary - expect_linux_failure: the test must fail for Linux + expect_native_failure: the test must fail natively expect_netstack_failure: the test must fail for Netstack **kwargs: all the other args, forwarded to go_test """ @@ -131,9 +131,9 @@ def packetimpact_go_test(name, size = "small", pure = True, expect_linux_failure ], **kwargs ) - packetimpact_linux_test( + packetimpact_native_test( name = name, - expect_failure = expect_linux_failure, + expect_failure = expect_native_failure, testbench_binary = testbench_binary, ) packetimpact_netstack_test( diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go index 74e1e6def..e8c183977 100644 --- a/test/packetimpact/runner/packetimpact_test.go +++ b/test/packetimpact/runner/packetimpact_test.go @@ -50,7 +50,7 @@ func (l *stringList) Set(value string) error { } var ( - dutPlatform = flag.String("dut_platform", "", "either \"linux\" or \"netstack\"") + native = flag.Bool("native", false, "whether the test should be run natively") testbenchBinary = flag.String("testbench_binary", "", "path to the testbench binary") tshark = flag.Bool("tshark", false, "use more verbose tshark in logs instead of tcpdump") extraTestArgs = stringList{} @@ -84,17 +84,9 @@ func (l logger) Logf(format string, args ...interface{}) { func TestOne(t *testing.T) { flag.Var(&extraTestArgs, "extra_test_arg", "extra arguments to pass to the testbench") flag.Parse() - if *dutPlatform != "linux" && *dutPlatform != "netstack" { - t.Fatal("--dut_platform should be either linux or netstack") - } if *testbenchBinary == "" { t.Fatal("--testbench_binary is missing") } - if *dutPlatform == "netstack" { - if _, err := dockerutil.RuntimePath(); err != nil { - t.Fatal("--runtime is missing or invalid with --dut_platform=netstack:", err) - } - } dockerutil.EnsureSupportedDockerVersion() ctx := context.Background() @@ -140,9 +132,11 @@ func TestOne(t *testing.T) { const testOutputDir = "/tmp/testoutput" // Create the Docker container for the DUT. - dut := dockerutil.MakeContainer(ctx, logger("dut")) - if *dutPlatform == "linux" { + var dut *dockerutil.Container + if *native { dut = dockerutil.MakeNativeContainer(ctx, logger("dut")) + } else { + dut = dockerutil.MakeContainer(ctx, logger("dut")) } runOpts := dockerutil.RunOpts{ @@ -307,7 +301,7 @@ func TestOne(t *testing.T) { "--remote_mac", remoteMAC.String(), "--remote_interface_id", fmt.Sprintf("%d", dutDeviceInfo.ID), "--device", testNetDev, - "--dut_type", *dutPlatform, + fmt.Sprintf("--native=%t", *native), ) testbenchLogs, err := testbench.Exec(ctx, dockerutil.ExecOpts{}, testArgs...) if (err != nil) != *expectFailure { diff --git a/test/packetimpact/testbench/testbench.go b/test/packetimpact/testbench/testbench.go index 242464e3a..e3629e1f3 100644 --- a/test/packetimpact/testbench/testbench.go +++ b/test/packetimpact/testbench/testbench.go @@ -27,8 +27,8 @@ import ( ) var ( - // DUTType is the type of device under test. - DUTType = "" + // Native indicates that the test is being run natively. + Native = false // Device is the local device on the test network. Device = "" @@ -81,7 +81,7 @@ func RegisterFlags(fs *flag.FlagSet) { fs.StringVar(&RemoteIPv6, "remote_ipv6", RemoteIPv6, "remote IPv6 address for test packets") fs.StringVar(&RemoteMAC, "remote_mac", RemoteMAC, "remote mac address for test packets") fs.StringVar(&Device, "device", Device, "local device for test packets") - fs.StringVar(&DUTType, "dut_type", DUTType, "type of device under test") + fs.BoolVar(&Native, "native", Native, "whether the test is running natively") fs.Uint64Var(&RemoteInterfaceID, "remote_interface_id", RemoteInterfaceID, "remote interface ID for test packets") } diff --git a/test/packetimpact/tests/tcp_reordering_test.go b/test/packetimpact/tests/tcp_reordering_test.go index 8742819ca..b4aeaab57 100644 --- a/test/packetimpact/tests/tcp_reordering_test.go +++ b/test/packetimpact/tests/tcp_reordering_test.go @@ -54,13 +54,13 @@ func TestReorderingWindow(t *testing.T) { acceptFd, _ := dut.Accept(t, listenFd) defer dut.Close(t, acceptFd) - if tb.DUTType == "linux" { + if tb.Native { // Linux has changed its handling of reordering, force the old behavior. dut.SetSockOpt(t, acceptFd, unix.IPPROTO_TCP, unix.TCP_CONGESTION, []byte("reno")) } pls := dut.GetSockOptInt(t, acceptFd, unix.IPPROTO_TCP, unix.TCP_MAXSEG) - if tb.DUTType == "netstack" { + if !tb.Native { // netstack does not impliment TCP_MAXSEG correctly. Fake it // here. Netstack uses the max SACK size which is 32. The MSS // option is 8 bytes, making the total 36 bytes. @@ -141,7 +141,7 @@ func TestReorderingWindow(t *testing.T) { } } - if tb.DUTType == "netstack" { + if !tb.Native { // The window should now be halved, so we should receive any // more, even if we send them. dut.Send(t, acceptFd, payload, 0) diff --git a/tools/bazel.mk b/tools/bazel.mk index 82fe11a03..3e27af7d1 100644 --- a/tools/bazel.mk +++ b/tools/bazel.mk @@ -33,8 +33,8 @@ GCLOUD_CONFIG := $(shell readlink -m ~/.config/gcloud/) DOCKER_SOCKET := /var/run/docker.sock # Bazel flags. -OPTIONS += --test_output=errors --keep_going --verbose_failures=true BAZEL := bazel $(STARTUP_OPTIONS) +OPTIONS += --color=no --curses=no # Basic options. UID := $(shell id -u ${USER}) @@ -143,12 +143,13 @@ bazel-server: ## Ensures that the server exists. Used as an internal target. @docker exec $(FULL_DOCKER_EXEC_OPTIONS) $(DOCKER_NAME) true || $(MAKE) bazel-server-start .PHONY: bazel-server -build_cmd = docker exec $(FULL_DOCKER_EXEC_OPTIONS) $(DOCKER_NAME) sh -o pipefail -c '$(BAZEL) build $(OPTIONS) $(TARGETS)' +build_cmd = docker exec $(FULL_DOCKER_EXEC_OPTIONS) $(DOCKER_NAME) sh -o pipefail -c '$(BAZEL) build $(OPTIONS) "$(TARGETS)"' build_paths = $(build_cmd) 2>&1 \ | tee /proc/self/fd/2 \ | grep -E "^ bazel-bin/" \ - | awk "{print $$1;}" \ + | tr -d '\r' \ + | awk '{$$1=$$1};1' \ | xargs -n 1 -I {} sh -c "$(1)" build: bazel-server @@ -169,10 +170,12 @@ sudo: bazel-server @$(call build_paths,sudo -E {} $(ARGS)) .PHONY: sudo +test: OPTIONS += --test_output=errors --keep_going --verbose_failures=true test: bazel-server @docker exec $(FULL_DOCKER_EXEC_OPTIONS) $(DOCKER_NAME) $(BAZEL) test $(OPTIONS) $(TARGETS) .PHONY: test -query: bazel-server - @docker exec $(FULL_DOCKER_EXEC_OPTIONS) $(DOCKER_NAME) $(BAZEL) query $(OPTIONS) '$(TARGETS)' +query: + @$(MAKE) bazel-server >&2 # If we need to start, ensure stdout is not polluted. + @docker exec $(FULL_DOCKER_EXEC_OPTIONS) $(DOCKER_NAME) sh -o pipefail -c '$(BAZEL) query $(OPTIONS) "$(TARGETS)" 2>/dev/null' .PHONY: query -- cgit v1.2.3 From d0127b23f26d546db0d525201f0ad3f43d5b8d24 Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Wed, 5 Aug 2020 10:21:17 -0700 Subject: [runtime tests] Update bugs and un-exclude passing tests. PiperOrigin-RevId: 325045486 --- test/runtimes/exclude_nodejs12.4.0.csv | 1 + test/runtimes/exclude_php7.3.6.csv | 14 ++++++-------- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'test') diff --git a/test/runtimes/exclude_nodejs12.4.0.csv b/test/runtimes/exclude_nodejs12.4.0.csv index 96f9002f3..525117f5c 100644 --- a/test/runtimes/exclude_nodejs12.4.0.csv +++ b/test/runtimes/exclude_nodejs12.4.0.csv @@ -11,6 +11,7 @@ parallel/test-dgram-socket-buffer-size.js,b/68847921, parallel/test-dns-channel-timeout.js,b/161893056, parallel/test-fs-access.js,, parallel/test-fs-watchfile.js,,Flaky - File already exists error +parallel/test-fs-write-stream.js,,Flaky parallel/test-fs-write-stream-throw-type-error.js,b/110226209, parallel/test-http-writable-true-after-close.js,,Flaky - Mismatched function calls. Expected exactly 1 actual 2 parallel/test-os.js,b/63997097, diff --git a/test/runtimes/exclude_php7.3.6.csv b/test/runtimes/exclude_php7.3.6.csv index efcabe752..e828f91dd 100644 --- a/test/runtimes/exclude_php7.3.6.csv +++ b/test/runtimes/exclude_php7.3.6.csv @@ -12,20 +12,18 @@ ext/session/tests/session_module_name_variation4.phpt,,Flaky ext/session/tests/session_set_save_handler_class_018.phpt,, ext/session/tests/session_set_save_handler_iface_003.phpt,, ext/session/tests/session_set_save_handler_variation4.phpt,, -ext/session/tests/session_set_save_handler_variation5.phpt,, -ext/standard/tests/file/filetype_variation.phpt,, -ext/standard/tests/file/fopen_variation19.phpt,, +ext/standard/tests/file/fopen_variation19.phpt,b/162894964, +ext/standard/tests/file/lstat_stat_variation14.phpt,,Flaky ext/standard/tests/file/php_fd_wrapper_01.phpt,, ext/standard/tests/file/php_fd_wrapper_02.phpt,, ext/standard/tests/file/php_fd_wrapper_03.phpt,, ext/standard/tests/file/php_fd_wrapper_04.phpt,, -ext/standard/tests/file/realpath_bug77484.phpt,, +ext/standard/tests/file/realpath_bug77484.phpt,b/162894969, ext/standard/tests/file/rename_variation.phpt,b/68717309, -ext/standard/tests/file/symlink_link_linkinfo_is_link_variation4.phpt,, -ext/standard/tests/file/symlink_link_linkinfo_is_link_variation8.phpt,, +ext/standard/tests/file/symlink_link_linkinfo_is_link_variation4.phpt,b/162895341, +ext/standard/tests/file/symlink_link_linkinfo_is_link_variation8.phpt,b/162896223, ext/standard/tests/general_functions/escapeshellarg_bug71270.phpt,, ext/standard/tests/general_functions/escapeshellcmd_bug71270.phpt,, -ext/standard/tests/network/bug20134.phpt,, ext/standard/tests/streams/proc_open_bug69900.phpt,,Flaky ext/standard/tests/streams/stream_socket_sendto.phpt,, ext/standard/tests/strings/007.phpt,, @@ -36,4 +34,4 @@ tests/output/stream_isatty_in-out-err.phpt,, tests/output/stream_isatty_in-out.phpt,b/68720299, tests/output/stream_isatty_out-err.phpt,b/68720311, tests/output/stream_isatty_out.phpt,b/68720325, -Zend/tests/concat_003.phpt,, +Zend/tests/concat_003.phpt,b/162896021, -- cgit v1.2.3 From ce463c027b0d3cbcf3b9d3e2a5ad598c0249212f Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Wed, 5 Aug 2020 17:46:13 -0700 Subject: [runtime tests] Update python tests. - un-exclude passing tests to increase testing surface. - create/update bugs for tests that fail on runsc but pass on runc. PiperOrigin-RevId: 325137657 --- test/runtimes/exclude_python3.7.3.csv | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) (limited to 'test') diff --git a/test/runtimes/exclude_python3.7.3.csv b/test/runtimes/exclude_python3.7.3.csv index 3580b25c3..a06d8361c 100644 --- a/test/runtimes/exclude_python3.7.3.csv +++ b/test/runtimes/exclude_python3.7.3.csv @@ -1,28 +1,20 @@ test name,bug id,comment -test_asynchat,b/76031995,SO_REUSEADDR test_asyncio,,Fails on Docker. -test_asyncore,b/76031995,SO_REUSEADDR -test_epoll,, -test_fcntl,,fcntl invalid argument -- artificial test to make sure something works in 64 bit mode. -test_ftplib,,Fails in Docker -test_httplib,b/76031995,SO_REUSEADDR -test_imaplib,, -test_logging,, +test_asyncore,b/162973328, +test_epoll,b/162983393, +test_fcntl,b/162978767,fcntl invalid argument -- artificial test to make sure something works in 64 bit mode. +test_imaplib,b/162979661, +test_logging,b/162980079, test_multiprocessing_fork,,Flaky. Sometimes times out. test_multiprocessing_forkserver,,Flaky. Sometimes times out. test_multiprocessing_main_handling,,Flaky. Sometimes times out. test_multiprocessing_spawn,,Flaky. Sometimes times out. -test_nntplib,b/76031995,tests should not set SO_REUSEADDR -test_poplib,,Fails on Docker test_posix,b/76174079,posix.sched_get_priority_min not implemented + posix.sched_rr_get_interval not permitted -test_pty,b/76157709,out of pty devices -test_readline,b/76157709,out of pty devices +test_pty,b/162979921, +test_readline,b/162980389,TestReadline hangs forever test_resource,b/76174079, test_selectors,b/76116849,OSError not raised with epoll -test_smtplib,b/76031995,SO_REUSEADDR and unclosed sockets +test_smtplib,b/162980434,unclosed sockets test_signal,,Flaky - signal: alarm clock test_socket,b/75983380, -test_ssl,b/76031995,SO_REUSEADDR -test_subprocess,, -test_support,b/76031995,SO_REUSEADDR -test_telnetlib,b/76031995,SO_REUSEADDR +test_subprocess,b/162980831, -- cgit v1.2.3 From 35312a95c4c8626365b4ece5ffb0bcab44b4bede Mon Sep 17 00:00:00 2001 From: Nayana Bidari Date: Wed, 5 Aug 2020 20:45:02 -0700 Subject: Add loss recovery option for TCP. /proc/sys/net/ipv4/tcp_recovery is used to enable RACK loss recovery in TCP. PiperOrigin-RevId: 325157807 --- pkg/sentry/fs/proc/sys_net.go | 95 +++++++++++++++++++++++++++++++++++++ pkg/sentry/fsimpl/proc/tasks_sys.go | 49 ++++++++++++++++++- pkg/sentry/inet/inet.go | 17 +++++++ pkg/sentry/inet/test_stack.go | 12 +++++ pkg/sentry/socket/hostinet/stack.go | 11 +++++ pkg/sentry/socket/netstack/stack.go | 14 ++++++ pkg/tcpip/transport/tcp/protocol.go | 33 +++++++++++++ test/syscalls/linux/proc_net.cc | 38 +++++++++++++++ 8 files changed, 268 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go index 702fdd392..8615b60f0 100644 --- a/pkg/sentry/fs/proc/sys_net.go +++ b/pkg/sentry/fs/proc/sys_net.go @@ -272,6 +272,96 @@ func (f *tcpSackFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSeque return n, f.tcpSack.stack.SetTCPSACKEnabled(*f.tcpSack.enabled) } +// +stateify savable +type tcpRecovery struct { + fsutil.SimpleFileInode + + stack inet.Stack `state:"wait"` + recovery inet.TCPLossRecovery +} + +func newTCPRecoveryInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode { + ts := &tcpRecovery{ + SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0644), linux.PROC_SUPER_MAGIC), + stack: s, + } + sattr := fs.StableAttr{ + DeviceID: device.ProcDevice.DeviceID(), + InodeID: device.ProcDevice.NextIno(), + BlockSize: usermem.PageSize, + Type: fs.SpecialFile, + } + return fs.NewInode(ctx, ts, msrc, sattr) +} + +// Truncate implements fs.InodeOperations.Truncate. +func (*tcpRecovery) Truncate(context.Context, *fs.Inode, int64) error { + return nil +} + +// GetFile implements fs.InodeOperations.GetFile. +func (r *tcpRecovery) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + flags.Pread = true + flags.Pwrite = true + return fs.NewFile(ctx, dirent, flags, &tcpRecoveryFile{ + tcpRecovery: r, + stack: r.stack, + }), nil +} + +// +stateify savable +type tcpRecoveryFile struct { + fsutil.FileGenericSeek `state:"nosave"` + fsutil.FileNoIoctl `state:"nosave"` + fsutil.FileNoMMap `state:"nosave"` + fsutil.FileNoSplice `state:"nosave"` + fsutil.FileNoopRelease `state:"nosave"` + fsutil.FileNoopFlush `state:"nosave"` + fsutil.FileNoopFsync `state:"nosave"` + fsutil.FileNotDirReaddir `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` + + tcpRecovery *tcpRecovery + + stack inet.Stack `state:"wait"` +} + +// Read implements fs.FileOperations.Read. +func (f *tcpRecoveryFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + if offset != 0 { + return 0, io.EOF + } + + recovery, err := f.stack.TCPRecovery() + if err != nil { + return 0, err + } + f.tcpRecovery.recovery = recovery + s := fmt.Sprintf("%d\n", f.tcpRecovery.recovery) + n, err := dst.CopyOut(ctx, []byte(s)) + return int64(n), err +} + +// Write implements fs.FileOperations.Write. +func (f *tcpRecoveryFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, offset int64) (int64, error) { + if src.NumBytes() == 0 { + return 0, nil + } + src = src.TakeFirst(usermem.PageSize - 1) + + var v int32 + n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) + if err != nil { + return 0, err + } + f.tcpRecovery.recovery = inet.TCPLossRecovery(v) + if err := f.tcpRecovery.stack.SetTCPRecovery(f.tcpRecovery.recovery); err != nil { + return 0, err + } + return n, nil +} + func (p *proc) newSysNetCore(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode { // The following files are simple stubs until they are implemented in // netstack, most of these files are configuration related. We use the @@ -351,6 +441,11 @@ func (p *proc) newSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource, s ine contents["tcp_wmem"] = newTCPMemInode(ctx, msrc, s, tcpWMem) } + // Add tcp_recovery. + if _, err := s.TCPRecovery(); err == nil { + contents["tcp_recovery"] = newTCPRecoveryInode(ctx, msrc, s) + } + d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) } diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go index 6dac2afa4..b71778128 100644 --- a/pkg/sentry/fsimpl/proc/tasks_sys.go +++ b/pkg/sentry/fsimpl/proc/tasks_sys.go @@ -55,7 +55,8 @@ func (fs *filesystem) newSysNetDir(root *auth.Credentials, k *kernel.Kernel) *ke if stack := k.RootNetworkNamespace().Stack(); stack != nil { contents = map[string]*kernfs.Dentry{ "ipv4": kernfs.NewStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{ - "tcp_sack": fs.newDentry(root, fs.NextIno(), 0644, &tcpSackData{stack: stack}), + "tcp_recovery": fs.newDentry(root, fs.NextIno(), 0644, &tcpRecoveryData{stack: stack}), + "tcp_sack": fs.newDentry(root, fs.NextIno(), 0644, &tcpSackData{stack: stack}), // The following files are simple stubs until they are implemented in // netstack, most of these files are configuration related. We use the @@ -207,3 +208,49 @@ func (d *tcpSackData) Write(ctx context.Context, src usermem.IOSequence, offset *d.enabled = v != 0 return n, d.stack.SetTCPSACKEnabled(*d.enabled) } + +// tcpRecoveryData implements vfs.WritableDynamicBytesSource for +// /proc/sys/net/ipv4/tcp_recovery. +// +// +stateify savable +type tcpRecoveryData struct { + kernfs.DynamicBytesFile + + stack inet.Stack `state:"wait"` +} + +var _ vfs.WritableDynamicBytesSource = (*tcpRecoveryData)(nil) + +// Generate implements vfs.DynamicBytesSource. +func (d *tcpRecoveryData) Generate(ctx context.Context, buf *bytes.Buffer) error { + recovery, err := d.stack.TCPRecovery() + if err != nil { + return err + } + + buf.WriteString(fmt.Sprintf("%d\n", recovery)) + return nil +} + +func (d *tcpRecoveryData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { + if offset != 0 { + // No need to handle partial writes thus far. + return 0, syserror.EINVAL + } + if src.NumBytes() == 0 { + return 0, nil + } + + // Limit the amount of memory allocated. + src = src.TakeFirst(usermem.PageSize - 1) + + var v int32 + n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) + if err != nil { + return 0, err + } + if err := d.stack.SetTCPRecovery(inet.TCPLossRecovery(v)); err != nil { + return 0, err + } + return n, nil +} diff --git a/pkg/sentry/inet/inet.go b/pkg/sentry/inet/inet.go index 2916a0644..c0b4831d1 100644 --- a/pkg/sentry/inet/inet.go +++ b/pkg/sentry/inet/inet.go @@ -56,6 +56,12 @@ type Stack interface { // settings. SetTCPSACKEnabled(enabled bool) error + // TCPRecovery returns the TCP loss detection algorithm. + TCPRecovery() (TCPLossRecovery, error) + + // SetTCPRecovery attempts to change TCP loss detection algorithm. + SetTCPRecovery(recovery TCPLossRecovery) error + // Statistics reports stack statistics. Statistics(stat interface{}, arg string) error @@ -189,3 +195,14 @@ type StatSNMPUDP [8]uint64 // StatSNMPUDPLite describes UdpLite line of /proc/net/snmp. type StatSNMPUDPLite [8]uint64 + +// TCPLossRecovery indicates TCP loss detection and recovery methods to use. +type TCPLossRecovery int32 + +// Loss recovery constants from include/net/tcp.h which are used to set +// /proc/sys/net/ipv4/tcp_recovery. +const ( + TCP_RACK_LOSS_DETECTION TCPLossRecovery = 1 << iota + TCP_RACK_STATIC_REO_WND + TCP_RACK_NO_DUPTHRESH +) diff --git a/pkg/sentry/inet/test_stack.go b/pkg/sentry/inet/test_stack.go index d8961fc94..9771f01fc 100644 --- a/pkg/sentry/inet/test_stack.go +++ b/pkg/sentry/inet/test_stack.go @@ -25,6 +25,7 @@ type TestStack struct { TCPRecvBufSize TCPBufferSize TCPSendBufSize TCPBufferSize TCPSACKFlag bool + Recovery TCPLossRecovery } // NewTestStack returns a TestStack with no network interfaces. The value of @@ -91,6 +92,17 @@ func (s *TestStack) SetTCPSACKEnabled(enabled bool) error { return nil } +// TCPRecovery implements Stack.TCPRecovery. +func (s *TestStack) TCPRecovery() (TCPLossRecovery, error) { + return s.Recovery, nil +} + +// SetTCPRecovery implements Stack.SetTCPRecovery. +func (s *TestStack) SetTCPRecovery(recovery TCPLossRecovery) error { + s.Recovery = recovery + return nil +} + // Statistics implements inet.Stack.Statistics. func (s *TestStack) Statistics(stat interface{}, arg string) error { return nil diff --git a/pkg/sentry/socket/hostinet/stack.go b/pkg/sentry/socket/hostinet/stack.go index a48082631..fda3dcb35 100644 --- a/pkg/sentry/socket/hostinet/stack.go +++ b/pkg/sentry/socket/hostinet/stack.go @@ -53,6 +53,7 @@ type Stack struct { interfaceAddrs map[int32][]inet.InterfaceAddr routes []inet.Route supportsIPv6 bool + tcpRecovery inet.TCPLossRecovery tcpRecvBufSize inet.TCPBufferSize tcpSendBufSize inet.TCPBufferSize tcpSACKEnabled bool @@ -350,6 +351,16 @@ func (s *Stack) SetTCPSACKEnabled(enabled bool) error { return syserror.EACCES } +// TCPRecovery implements inet.Stack.TCPRecovery. +func (s *Stack) TCPRecovery() (inet.TCPLossRecovery, error) { + return s.tcpRecovery, nil +} + +// SetTCPRecovery implements inet.Stack.SetTCPRecovery. +func (s *Stack) SetTCPRecovery(recovery inet.TCPLossRecovery) error { + return syserror.EACCES +} + // getLine reads one line from proc file, with specified prefix. // The last argument, withHeader, specifies if it contains line header. func getLine(f *os.File, prefix string, withHeader bool) string { diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go index 67737ae87..f0fe18684 100644 --- a/pkg/sentry/socket/netstack/stack.go +++ b/pkg/sentry/socket/netstack/stack.go @@ -207,6 +207,20 @@ func (s *Stack) SetTCPSACKEnabled(enabled bool) error { return syserr.TranslateNetstackError(s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(enabled))).ToError() } +// TCPRecovery implements inet.Stack.TCPRecovery. +func (s *Stack) TCPRecovery() (inet.TCPLossRecovery, error) { + var recovery tcp.Recovery + if err := s.Stack.TransportProtocolOption(tcp.ProtocolNumber, &recovery); err != nil { + return 0, syserr.TranslateNetstackError(err).ToError() + } + return inet.TCPLossRecovery(recovery), nil +} + +// SetTCPRecovery implements inet.Stack.SetTCPRecovery. +func (s *Stack) SetTCPRecovery(recovery inet.TCPLossRecovery) error { + return syserr.TranslateNetstackError(s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.Recovery(recovery))).ToError() +} + // Statistics implements inet.Stack.Statistics. func (s *Stack) Statistics(stat interface{}, arg string) error { switch stats := stat.(type) { diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go index b34e47bbd..d9abb8d94 100644 --- a/pkg/tcpip/transport/tcp/protocol.go +++ b/pkg/tcpip/transport/tcp/protocol.go @@ -80,6 +80,25 @@ const ( // enable/disable SACK support in TCP. See: https://tools.ietf.org/html/rfc2018. type SACKEnabled bool +// Recovery is used by stack.(*Stack).TransportProtocolOption to +// set loss detection algorithm in TCP. +type Recovery int32 + +const ( + // RACKLossDetection indicates RACK is used for loss detection and + // recovery. + RACKLossDetection Recovery = 1 << iota + + // RACKStaticReoWnd indicates the reordering window should not be + // adjusted when DSACK is received. + RACKStaticReoWnd + + // RACKNoDupTh indicates RACK should not consider the classic three + // duplicate acknowledgements rule to mark the segments as lost. This + // is used when reordering is not detected. + RACKNoDupTh +) + // DelayEnabled is used by stack.(Stack*).TransportProtocolOption to // enable/disable Nagle's algorithm in TCP. type DelayEnabled bool @@ -161,6 +180,7 @@ func (s *synRcvdCounter) Threshold() uint64 { type protocol struct { mu sync.RWMutex sackEnabled bool + recovery Recovery delayEnabled bool sendBufferSize SendBufferSizeOption recvBufferSize ReceiveBufferSizeOption @@ -280,6 +300,12 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error { p.mu.Unlock() return nil + case Recovery: + p.mu.Lock() + p.recovery = Recovery(v) + p.mu.Unlock() + return nil + case DelayEnabled: p.mu.Lock() p.delayEnabled = bool(v) @@ -394,6 +420,12 @@ func (p *protocol) Option(option interface{}) *tcpip.Error { p.mu.RUnlock() return nil + case *Recovery: + p.mu.RLock() + *v = Recovery(p.recovery) + p.mu.RUnlock() + return nil + case *DelayEnabled: p.mu.RLock() *v = DelayEnabled(p.delayEnabled) @@ -535,6 +567,7 @@ func NewProtocol() stack.TransportProtocol { minRTO: MinRTO, maxRTO: MaxRTO, maxRetries: MaxRetries, + recovery: RACKLossDetection, } p.dispatcher.init(runtime.GOMAXPROCS(0)) return &p diff --git a/test/syscalls/linux/proc_net.cc b/test/syscalls/linux/proc_net.cc index 3377b65cf..4fab097f4 100644 --- a/test/syscalls/linux/proc_net.cc +++ b/test/syscalls/linux/proc_net.cc @@ -477,6 +477,44 @@ TEST(ProcNetSnmp, CheckSnmp) { EXPECT_EQ(value_count, 1); } +TEST(ProcSysNetIpv4Recovery, Exists) { + EXPECT_THAT(open("/proc/sys/net/ipv4/tcp_recovery", O_RDONLY), + SyscallSucceeds()); +} + +TEST(ProcSysNetIpv4Recovery, CanReadAndWrite) { + // TODO(b/162988252): Enable save/restore for this test after the bug is + // fixed. + DisableSave ds; + + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability((CAP_DAC_OVERRIDE)))); + + auto const fd = ASSERT_NO_ERRNO_AND_VALUE( + Open("/proc/sys/net/ipv4/tcp_recovery", O_RDWR)); + + char buf[10] = {'\0'}; + char to_write = '2'; + + // Check initial value is set to 1. + EXPECT_THAT(PreadFd(fd.get(), &buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(to_write) + 1)); + EXPECT_EQ(strcmp(buf, "1\n"), 0); + + // Set tcp_recovery to one of the allowed constants. + EXPECT_THAT(PwriteFd(fd.get(), &to_write, sizeof(to_write), 0), + SyscallSucceedsWithValue(sizeof(to_write))); + EXPECT_THAT(PreadFd(fd.get(), &buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(to_write) + 1)); + EXPECT_EQ(strcmp(buf, "2\n"), 0); + + // Set tcp_recovery to any random value. + char kMessage[] = "100"; + EXPECT_THAT(PwriteFd(fd.get(), kMessage, strlen(kMessage), 0), + SyscallSucceedsWithValue(strlen(kMessage))); + EXPECT_THAT(PreadFd(fd.get(), buf, sizeof(kMessage), 0), + SyscallSucceedsWithValue(sizeof(kMessage))); + EXPECT_EQ(strcmp(buf, "100\n"), 0); +} } // namespace } // namespace testing } // namespace gvisor -- cgit v1.2.3 From fc4dd3ef455975a033714052b12ebebc85e937d5 Mon Sep 17 00:00:00 2001 From: Ghanan Gowripalan Date: Thu, 6 Aug 2020 01:29:32 -0700 Subject: Join IPv4 all-systems group on NIC enable Test: - stack_test.TestJoinLeaveMulticastOnNICEnableDisable - integration_test.TestIncomingMulticastAndBroadcast PiperOrigin-RevId: 325185259 --- pkg/tcpip/header/ipv4.go | 5 + pkg/tcpip/stack/ndp_test.go | 8 +- pkg/tcpip/stack/nic.go | 12 ++ pkg/tcpip/stack/stack_test.go | 191 +++++++++++++-------- .../tests/integration/multicast_broadcast_test.go | 22 ++- test/packetimpact/tests/BUILD | 2 - 6 files changed, 161 insertions(+), 79 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/header/ipv4.go b/pkg/tcpip/header/ipv4.go index 62ac932bb..d0d1efd0d 100644 --- a/pkg/tcpip/header/ipv4.go +++ b/pkg/tcpip/header/ipv4.go @@ -101,6 +101,11 @@ const ( // IPv4Version is the version of the ipv4 protocol. IPv4Version = 4 + // IPv4AllSystems is the all systems IPv4 multicast address as per + // IANA's IPv4 Multicast Address Space Registry. See + // https://www.iana.org/assignments/multicast-addresses/multicast-addresses.xhtml. + IPv4AllSystems tcpip.Address = "\xe0\x00\x00\x01" + // IPv4Broadcast is the broadcast address of the IPv4 procotol. IPv4Broadcast tcpip.Address = "\xff\xff\xff\xff" diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go index 644ba7c33..5d286ccbc 100644 --- a/pkg/tcpip/stack/ndp_test.go +++ b/pkg/tcpip/stack/ndp_test.go @@ -1689,13 +1689,7 @@ func containsV6Addr(list []tcpip.ProtocolAddress, item tcpip.AddressWithPrefix) AddressWithPrefix: item, } - for _, i := range list { - if i == protocolAddress { - return true - } - } - - return false + return containsAddr(list, protocolAddress) } // TestNoAutoGenAddr tests that SLAAC is not performed when configured not to. diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index ae4d241de..eaaf756cd 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -217,6 +217,11 @@ func (n *NIC) disableLocked() *tcpip.Error { } if _, ok := n.stack.networkProtocols[header.IPv4ProtocolNumber]; ok { + // The NIC may have already left the multicast group. + if err := n.leaveGroupLocked(header.IPv4AllSystems, false /* force */); err != nil && err != tcpip.ErrBadLocalAddress { + return err + } + // The address may have already been removed. if err := n.removePermanentAddressLocked(ipv4BroadcastAddr.AddressWithPrefix.Address); err != nil && err != tcpip.ErrBadLocalAddress { return err @@ -255,6 +260,13 @@ func (n *NIC) enable() *tcpip.Error { if _, err := n.addAddressLocked(ipv4BroadcastAddr, NeverPrimaryEndpoint, permanent, static, false /* deprecated */); err != nil { return err } + + // As per RFC 1122 section 3.3.7, all hosts should join the all-hosts + // multicast group. Note, the IANA calls the all-hosts multicast group the + // all-systems multicast group. + if err := n.joinGroupLocked(header.IPv4ProtocolNumber, header.IPv4AllSystems); err != nil { + return err + } } // Join the IPv6 All-Nodes Multicast group if the stack is configured to diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go index 63ddbc44e..0b6deda02 100644 --- a/pkg/tcpip/stack/stack_test.go +++ b/pkg/tcpip/stack/stack_test.go @@ -277,6 +277,17 @@ func (l *linkEPWithMockedAttach) isAttached() bool { return l.attached } +// Checks to see if list contains an address. +func containsAddr(list []tcpip.ProtocolAddress, item tcpip.ProtocolAddress) bool { + for _, i := range list { + if i == item { + return true + } + } + + return false +} + func TestNetworkReceive(t *testing.T) { // Create a stack with the fake network protocol, one nic, and two // addresses attached to it: 1 & 2. @@ -3089,6 +3100,13 @@ func TestIPv6SourceAddressSelectionScopeAndSameAddress(t *testing.T) { func TestAddRemoveIPv4BroadcastAddressOnNICEnableDisable(t *testing.T) { const nicID = 1 + broadcastAddr := tcpip.ProtocolAddress{ + Protocol: header.IPv4ProtocolNumber, + AddressWithPrefix: tcpip.AddressWithPrefix{ + Address: header.IPv4Broadcast, + PrefixLen: 32, + }, + } e := loopback.New() s := stack.New(stack.Options{ @@ -3099,49 +3117,41 @@ func TestAddRemoveIPv4BroadcastAddressOnNICEnableDisable(t *testing.T) { t.Fatalf("CreateNIC(%d, _, %+v) = %s", nicID, nicOpts, err) } - allStackAddrs := s.AllAddresses() - allNICAddrs, ok := allStackAddrs[nicID] - if !ok { - t.Fatalf("entry for %d missing from allStackAddrs = %+v", nicID, allStackAddrs) - } - if l := len(allNICAddrs); l != 0 { - t.Fatalf("got len(allNICAddrs) = %d, want = 0", l) + { + allStackAddrs := s.AllAddresses() + if allNICAddrs, ok := allStackAddrs[nicID]; !ok { + t.Fatalf("entry for %d missing from allStackAddrs = %+v", nicID, allStackAddrs) + } else if containsAddr(allNICAddrs, broadcastAddr) { + t.Fatalf("got allNICAddrs = %+v, don't want = %+v", allNICAddrs, broadcastAddr) + } } // Enabling the NIC should add the IPv4 broadcast address. if err := s.EnableNIC(nicID); err != nil { t.Fatalf("s.EnableNIC(%d): %s", nicID, err) } - allStackAddrs = s.AllAddresses() - allNICAddrs, ok = allStackAddrs[nicID] - if !ok { - t.Fatalf("entry for %d missing from allStackAddrs = %+v", nicID, allStackAddrs) - } - if l := len(allNICAddrs); l != 1 { - t.Fatalf("got len(allNICAddrs) = %d, want = 1", l) - } - want := tcpip.ProtocolAddress{ - Protocol: header.IPv4ProtocolNumber, - AddressWithPrefix: tcpip.AddressWithPrefix{ - Address: header.IPv4Broadcast, - PrefixLen: 32, - }, - } - if allNICAddrs[0] != want { - t.Fatalf("got allNICAddrs[0] = %+v, want = %+v", allNICAddrs[0], want) + + { + allStackAddrs := s.AllAddresses() + if allNICAddrs, ok := allStackAddrs[nicID]; !ok { + t.Fatalf("entry for %d missing from allStackAddrs = %+v", nicID, allStackAddrs) + } else if !containsAddr(allNICAddrs, broadcastAddr) { + t.Fatalf("got allNICAddrs = %+v, want = %+v", allNICAddrs, broadcastAddr) + } } // Disabling the NIC should remove the IPv4 broadcast address. if err := s.DisableNIC(nicID); err != nil { t.Fatalf("s.DisableNIC(%d): %s", nicID, err) } - allStackAddrs = s.AllAddresses() - allNICAddrs, ok = allStackAddrs[nicID] - if !ok { - t.Fatalf("entry for %d missing from allStackAddrs = %+v", nicID, allStackAddrs) - } - if l := len(allNICAddrs); l != 0 { - t.Fatalf("got len(allNICAddrs) = %d, want = 0", l) + + { + allStackAddrs := s.AllAddresses() + if allNICAddrs, ok := allStackAddrs[nicID]; !ok { + t.Fatalf("entry for %d missing from allStackAddrs = %+v", nicID, allStackAddrs) + } else if containsAddr(allNICAddrs, broadcastAddr) { + t.Fatalf("got allNICAddrs = %+v, don't want = %+v", allNICAddrs, broadcastAddr) + } } } @@ -3189,50 +3199,93 @@ func TestLeaveIPv6SolicitedNodeAddrBeforeAddrRemoval(t *testing.T) { } } -func TestJoinLeaveAllNodesMulticastOnNICEnableDisable(t *testing.T) { +func TestJoinLeaveMulticastOnNICEnableDisable(t *testing.T) { const nicID = 1 - e := loopback.New() - s := stack.New(stack.Options{ - NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()}, - }) - nicOpts := stack.NICOptions{Disabled: true} - if err := s.CreateNICWithOptions(nicID, e, nicOpts); err != nil { - t.Fatalf("CreateNIC(%d, _, %+v) = %s", nicID, nicOpts, err) + tests := []struct { + name string + proto tcpip.NetworkProtocolNumber + addr tcpip.Address + }{ + { + name: "IPv6 All-Nodes", + proto: header.IPv6ProtocolNumber, + addr: header.IPv6AllNodesMulticastAddress, + }, + { + name: "IPv4 All-Systems", + proto: header.IPv4ProtocolNumber, + addr: header.IPv4AllSystems, + }, } - // Should not be in the IPv6 all-nodes multicast group yet because the NIC has - // not been enabled yet. - isInGroup, err := s.IsInGroup(nicID, header.IPv6AllNodesMulticastAddress) - if err != nil { - t.Fatalf("IsInGroup(%d, %s): %s", nicID, header.IPv6AllNodesMulticastAddress, err) - } - if isInGroup { - t.Fatalf("got IsInGroup(%d, %s) = true, want = false", nicID, header.IPv6AllNodesMulticastAddress) - } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + e := loopback.New() + s := stack.New(stack.Options{ + NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()}, + }) + nicOpts := stack.NICOptions{Disabled: true} + if err := s.CreateNICWithOptions(nicID, e, nicOpts); err != nil { + t.Fatalf("CreateNIC(%d, _, %+v) = %s", nicID, nicOpts, err) + } - // The all-nodes multicast group should be joined when the NIC is enabled. - if err := s.EnableNIC(nicID); err != nil { - t.Fatalf("s.EnableNIC(%d): %s", nicID, err) - } - isInGroup, err = s.IsInGroup(nicID, header.IPv6AllNodesMulticastAddress) - if err != nil { - t.Fatalf("IsInGroup(%d, %s): %s", nicID, header.IPv6AllNodesMulticastAddress, err) - } - if !isInGroup { - t.Fatalf("got IsInGroup(%d, %s) = false, want = true", nicID, header.IPv6AllNodesMulticastAddress) - } + // Should not be in the multicast group yet because the NIC has not been + // enabled yet. + if isInGroup, err := s.IsInGroup(nicID, test.addr); err != nil { + t.Fatalf("IsInGroup(%d, %s): %s", nicID, test.addr, err) + } else if isInGroup { + t.Fatalf("got IsInGroup(%d, %s) = true, want = false", nicID, test.addr) + } - // The all-nodes multicast group should be left when the NIC is disabled. - if err := s.DisableNIC(nicID); err != nil { - t.Fatalf("s.DisableNIC(%d): %s", nicID, err) - } - isInGroup, err = s.IsInGroup(nicID, header.IPv6AllNodesMulticastAddress) - if err != nil { - t.Fatalf("IsInGroup(%d, %s): %s", nicID, header.IPv6AllNodesMulticastAddress, err) - } - if isInGroup { - t.Fatalf("got IsInGroup(%d, %s) = true, want = false", nicID, header.IPv6AllNodesMulticastAddress) + // The all-nodes multicast group should be joined when the NIC is enabled. + if err := s.EnableNIC(nicID); err != nil { + t.Fatalf("s.EnableNIC(%d): %s", nicID, err) + } + + if isInGroup, err := s.IsInGroup(nicID, test.addr); err != nil { + t.Fatalf("IsInGroup(%d, %s): %s", nicID, test.addr, err) + } else if !isInGroup { + t.Fatalf("got IsInGroup(%d, %s) = false, want = true", nicID, test.addr) + } + + // The multicast group should be left when the NIC is disabled. + if err := s.DisableNIC(nicID); err != nil { + t.Fatalf("s.DisableNIC(%d): %s", nicID, err) + } + + if isInGroup, err := s.IsInGroup(nicID, test.addr); err != nil { + t.Fatalf("IsInGroup(%d, %s): %s", nicID, test.addr, err) + } else if isInGroup { + t.Fatalf("got IsInGroup(%d, %s) = true, want = false", nicID, test.addr) + } + + // The all-nodes multicast group should be joined when the NIC is enabled. + if err := s.EnableNIC(nicID); err != nil { + t.Fatalf("s.EnableNIC(%d): %s", nicID, err) + } + + if isInGroup, err := s.IsInGroup(nicID, test.addr); err != nil { + t.Fatalf("IsInGroup(%d, %s): %s", nicID, test.addr, err) + } else if !isInGroup { + t.Fatalf("got IsInGroup(%d, %s) = false, want = true", nicID, test.addr) + } + + // Leaving the group before disabling the NIC should not cause an error. + if err := s.LeaveGroup(test.proto, nicID, test.addr); err != nil { + t.Fatalf("s.LeaveGroup(%d, %d, %s): %s", test.proto, nicID, test.addr, err) + } + + if err := s.DisableNIC(nicID); err != nil { + t.Fatalf("s.DisableNIC(%d): %s", nicID, err) + } + + if isInGroup, err := s.IsInGroup(nicID, test.addr); err != nil { + t.Fatalf("IsInGroup(%d, %s): %s", nicID, test.addr, err) + } else if isInGroup { + t.Fatalf("got IsInGroup(%d, %s) = true, want = false", nicID, test.addr) + } + }) } } diff --git a/pkg/tcpip/tests/integration/multicast_broadcast_test.go b/pkg/tcpip/tests/integration/multicast_broadcast_test.go index 4a860a805..d9b2d147a 100644 --- a/pkg/tcpip/tests/integration/multicast_broadcast_test.go +++ b/pkg/tcpip/tests/integration/multicast_broadcast_test.go @@ -31,7 +31,9 @@ import ( const defaultMTU = 1280 -func TestIncomingSubnetBroadcast(t *testing.T) { +// TestIncomingMulticastAndBroadcast tests receiving a packet destined to some +// multicast or broadcast address. +func TestIncomingMulticastAndBroadcast(t *testing.T) { const ( nicID = 1 remotePort = 5555 @@ -179,6 +181,24 @@ func TestIncomingSubnetBroadcast(t *testing.T) { expectRx: true, }, + { + name: "IPv4 all-systems multicast binding to all-systems multicast", + bindAddr: header.IPv4AllSystems, + dstAddr: header.IPv4AllSystems, + expectRx: true, + }, + { + name: "IPv4 all-systems multicast binding to wildcard", + dstAddr: header.IPv4AllSystems, + expectRx: true, + }, + { + name: "IPv4 all-systems multicast binding to unicast", + bindAddr: ipv4Addr.Address, + dstAddr: header.IPv4AllSystems, + expectRx: false, + }, + // IPv6 has no notion of a broadcast. { name: "IPv6 unicast binding to wildcard", diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD index 0c2a05380..74658fea0 100644 --- a/test/packetimpact/tests/BUILD +++ b/test/packetimpact/tests/BUILD @@ -40,8 +40,6 @@ packetimpact_go_test( packetimpact_go_test( name = "udp_recv_mcast_bcast", srcs = ["udp_recv_mcast_bcast_test.go"], - # TODO(b/152813495): Fix netstack then remove the line below. - expect_netstack_failure = True, deps = [ "//pkg/tcpip", "//pkg/tcpip/header", -- cgit v1.2.3 From 72b528c840342111c6e9dd5b13cc5c893707dad6 Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Thu, 6 Aug 2020 11:03:29 -0700 Subject: [runtime tests] Exclude failing python and java tests test_httplib was un-excluded in cl/325137657. Exclude it again. test_httplib fails consistently when running in a kokoro job. Could not repro with Docker. There are some java tests that pass locally on Docker but fail when running within a kokoro job. Also make shard count a multiple of 8 to minimize idle cores and minimize test time. PiperOrigin-RevId: 325267071 --- test/runtimes/BUILD | 10 +++++----- test/runtimes/exclude_java11.csv | 13 +++++++++++++ test/runtimes/exclude_python3.7.3.csv | 1 + 3 files changed, 19 insertions(+), 5 deletions(-) (limited to 'test') diff --git a/test/runtimes/BUILD b/test/runtimes/BUILD index 3be123d94..3d0f10855 100644 --- a/test/runtimes/BUILD +++ b/test/runtimes/BUILD @@ -6,7 +6,7 @@ runtime_test( name = "go1.12", exclude_file = "exclude_go1.12.csv", lang = "go", - shard_count = 5, + shard_count = 8, ) runtime_test( @@ -14,26 +14,26 @@ runtime_test( batch = 100, exclude_file = "exclude_java11.csv", lang = "java", - shard_count = 20, + shard_count = 16, ) runtime_test( name = "nodejs12.4.0", exclude_file = "exclude_nodejs12.4.0.csv", lang = "nodejs", - shard_count = 10, + shard_count = 8, ) runtime_test( name = "php7.3.6", exclude_file = "exclude_php7.3.6.csv", lang = "php", - shard_count = 5, + shard_count = 8, ) runtime_test( name = "python3.7.3", exclude_file = "exclude_python3.7.3.csv", lang = "python", - shard_count = 5, + shard_count = 8, ) diff --git a/test/runtimes/exclude_java11.csv b/test/runtimes/exclude_java11.csv index 4d62f7d3a..997a29cad 100644 --- a/test/runtimes/exclude_java11.csv +++ b/test/runtimes/exclude_java11.csv @@ -18,8 +18,16 @@ java/lang/ClassLoader/nativeLibrary/NativeLibraryTest.java,,Fails in Docker java/lang/module/ModuleDescriptorTest.java,, java/lang/String/nativeEncoding/StringPlatformChars.java,, java/net/CookieHandler/B6791927.java,,java.lang.RuntimeException: Expiration date shouldn't be 0 +java/net/ipv6tests/TcpTest.java,,java.net.ConnectException: Connection timed out (Connection timed out) +java/net/ipv6tests/UdpTest.java,,Times out +java/net/Inet6Address/B6558853.java,,Times out +java/net/InetAddress/CheckJNI.java,,java.net.ConnectException: Connection timed out (Connection timed out) java/net/InterfaceAddress/NetworkPrefixLength.java,b/78507103, +java/net/MulticastSocket/B6425815.java,,java.net.SocketException: Protocol not available (Error getting socket option) +java/net/MulticastSocket/B6427403.java,,java.net.SocketException: Protocol not available java/net/MulticastSocket/MulticastTTL.java,, +java/net/MulticastSocket/NetworkInterfaceEmptyGetInetAddressesTest.java,,java.net.SocketException: Protocol not available (Error getting socket option) +java/net/MulticastSocket/NoLoopbackPackets.java,,java.net.SocketException: Protocol not available java/net/MulticastSocket/Promiscuous.java,, java/net/MulticastSocket/SetLoopbackMode.java,, java/net/MulticastSocket/SetTTLAndGetTTL.java,, @@ -27,6 +35,7 @@ java/net/MulticastSocket/Test.java,, java/net/MulticastSocket/TestDefaults.java,, java/net/MulticastSocket/TimeToLive.java,, java/net/NetworkInterface/NetworkInterfaceStreamTest.java,, +java/net/Socket/LinkLocal.java,,java.net.SocketTimeoutException: Receive timed out java/net/Socket/SetSoLinger.java,b/78527327,SO_LINGER is not yet supported java/net/Socket/UrgentDataTest.java,b/111515323, java/net/SocketOption/OptionsTest.java,,Fails in Docker @@ -167,6 +176,10 @@ sun/management/jmxremote/bootstrap/RmiSslBootstrapTest.sh,, sun/management/jmxremote/startstop/JMXStartStopTest.java,, sun/management/jmxremote/startstop/JMXStatusPerfCountersTest.java,, sun/management/jmxremote/startstop/JMXStatusTest.java,, +sun/management/jdp/JdpDefaultsTest.java,, +sun/management/jdp/JdpJmxRemoteDynamicPortTest.java,, +sun/management/jdp/JdpOffTest.java,, +sun/management/jdp/JdpSpecificAddressTest.java,, sun/text/resources/LocaleDataTest.java,, sun/tools/jcmd/TestJcmdSanity.java,, sun/tools/jhsdb/AlternateHashingTest.java,, diff --git a/test/runtimes/exclude_python3.7.3.csv b/test/runtimes/exclude_python3.7.3.csv index a06d8361c..8760f8951 100644 --- a/test/runtimes/exclude_python3.7.3.csv +++ b/test/runtimes/exclude_python3.7.3.csv @@ -3,6 +3,7 @@ test_asyncio,,Fails on Docker. test_asyncore,b/162973328, test_epoll,b/162983393, test_fcntl,b/162978767,fcntl invalid argument -- artificial test to make sure something works in 64 bit mode. +test_httplib,b/163000009,OSError: [Errno 98] Address already in use test_imaplib,b/162979661, test_logging,b/162980079, test_multiprocessing_fork,,Flaky. Sometimes times out. -- cgit v1.2.3 From 90021e775a6e8059ea4f4262a16c4f962d3b9732 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Thu, 6 Aug 2020 12:08:12 -0700 Subject: Add bzl_library rules for .bzl files without one. PiperOrigin-RevId: 325280924 --- benchmarks/BUILD | 8 ++++++++ test/packetdrill/BUILD | 9 ++++++++- test/packetimpact/runner/BUILD | 8 +++++++- test/runner/BUILD | 8 +++++++- test/runtimes/BUILD | 7 +++++++ test/runtimes/defs.bzl | 4 ++++ tools/BUILD | 8 ++++++++ tools/bazeldefs/BUILD | 20 +++++++++++++++++++- tools/bazeldefs/defs.bzl | 2 ++ tools/defs.bzl | 3 ++- tools/go_generics/BUILD | 8 +++++++- tools/go_generics/tests/BUILD | 7 +++++++ tools/go_marshal/BUILD | 8 +++++++- tools/go_stateify/BUILD | 8 +++++++- tools/nogo/BUILD | 8 +++++++- tools/vm/BUILD | 8 +++++++- website/BUILD | 8 +++++++- 17 files changed, 121 insertions(+), 11 deletions(-) (limited to 'test') diff --git a/benchmarks/BUILD b/benchmarks/BUILD index 389351210..39ca5919c 100644 --- a/benchmarks/BUILD +++ b/benchmarks/BUILD @@ -1,3 +1,5 @@ +load("//tools:defs.bzl", "bzl_library") + package(licenses = ["notice"]) config_setting( @@ -27,3 +29,9 @@ py_binary( ], deps = ["//benchmarks/runner"], ) + +bzl_library( + name = "defs_bzl", + srcs = ["defs.bzl"], + visibility = ["//visibility:private"], +) diff --git a/test/packetdrill/BUILD b/test/packetdrill/BUILD index dfcd55f60..49642f282 100644 --- a/test/packetdrill/BUILD +++ b/test/packetdrill/BUILD @@ -1,4 +1,5 @@ -load("defs.bzl", "packetdrill_test") +load("//tools:defs.bzl", "bzl_library") +load("//test/packetdrill:defs.bzl", "packetdrill_test") package(licenses = ["notice"]) @@ -36,3 +37,9 @@ packetdrill_test( name = "tcp_defer_accept_timeout_test", scripts = ["tcp_defer_accept_timeout.pkt"], ) + +bzl_library( + name = "defs_bzl", + srcs = ["defs.bzl"], + visibility = ["//visibility:private"], +) diff --git a/test/packetimpact/runner/BUILD b/test/packetimpact/runner/BUILD index bad4f0183..ff2be9b30 100644 --- a/test/packetimpact/runner/BUILD +++ b/test/packetimpact/runner/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "go_test") +load("//tools:defs.bzl", "bzl_library", "go_test") package( default_visibility = ["//test/packetimpact:__subpackages__"], @@ -19,3 +19,9 @@ go_test( "@com_github_docker_docker//api/types/mount:go_default_library", ], ) + +bzl_library( + name = "defs_bzl", + srcs = ["defs.bzl"], + visibility = ["//visibility:private"], +) diff --git a/test/runner/BUILD b/test/runner/BUILD index 63c7ec83a..582d2946d 100644 --- a/test/runner/BUILD +++ b/test/runner/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "go_binary") +load("//tools:defs.bzl", "bzl_library", "go_binary") package(licenses = ["notice"]) @@ -21,3 +21,9 @@ go_binary( "@org_golang_x_sys//unix:go_default_library", ], ) + +bzl_library( + name = "defs_bzl", + srcs = ["defs.bzl"], + visibility = ["//visibility:private"], +) diff --git a/test/runtimes/BUILD b/test/runtimes/BUILD index 3d0f10855..066338ee3 100644 --- a/test/runtimes/BUILD +++ b/test/runtimes/BUILD @@ -1,3 +1,4 @@ +load("//tools:defs.bzl", "bzl_library") load("//test/runtimes:defs.bzl", "runtime_test") package(licenses = ["notice"]) @@ -37,3 +38,9 @@ runtime_test( lang = "python", shard_count = 8, ) + +bzl_library( + name = "defs_bzl", + srcs = ["defs.bzl"], + visibility = ["//visibility:private"], +) diff --git a/test/runtimes/defs.bzl b/test/runtimes/defs.bzl index db22029a8..702522d86 100644 --- a/test/runtimes/defs.bzl +++ b/test/runtimes/defs.bzl @@ -55,9 +55,13 @@ _runtime_test = rule( ), "_runner": attr.label( default = "//test/runtimes/runner:runner", + executable = True, + cfg = "target", ), "_proctor": attr.label( default = "//test/runtimes/proctor:proctor", + executable = True, + cfg = "target", ), }, test = True, diff --git a/tools/BUILD b/tools/BUILD index 34b950644..da83877b1 100644 --- a/tools/BUILD +++ b/tools/BUILD @@ -1 +1,9 @@ +load("//tools:defs.bzl", "bzl_library") + package(licenses = ["notice"]) + +bzl_library( + name = "defs_bzl", + srcs = ["defs.bzl"], + visibility = ["//visibility:private"], +) diff --git a/tools/bazeldefs/BUILD b/tools/bazeldefs/BUILD index 3f809065d..8d4356119 100644 --- a/tools/bazeldefs/BUILD +++ b/tools/bazeldefs/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "rbe_platform", "rbe_toolchain") +load("//tools:defs.bzl", "bzl_library", "rbe_platform", "rbe_toolchain") package(licenses = ["notice"]) @@ -86,3 +86,21 @@ rbe_toolchain( toolchain = "@bazel_toolchains_bazel3//configs/ubuntu16_04_clang/11.0.0/bazel_3.1.0/cc:cc-compiler-k8", toolchain_type = "@bazel_tools//tools/cpp:toolchain_type", ) + +bzl_library( + name = "platforms_bzl", + srcs = ["platforms.bzl"], + visibility = ["//visibility:private"], +) + +bzl_library( + name = "tags_bzl", + srcs = ["tags.bzl"], + visibility = ["//visibility:private"], +) + +bzl_library( + name = "defs_bzl", + srcs = ["defs.bzl"], + visibility = ["//visibility:private"], +) diff --git a/tools/bazeldefs/defs.bzl b/tools/bazeldefs/defs.bzl index 3db8e13d0..d0096dcec 100644 --- a/tools/bazeldefs/defs.bzl +++ b/tools/bazeldefs/defs.bzl @@ -2,6 +2,7 @@ load("@bazel_gazelle//:def.bzl", _gazelle = "gazelle") load("@bazel_skylib//rules:build_test.bzl", _build_test = "build_test") +load("@bazel_skylib//:bzl_library.bzl", _bzl_library = "bzl_library") load("@bazel_tools//tools/cpp:cc_flags_supplier.bzl", _cc_flags_supplier = "cc_flags_supplier") load("@io_bazel_rules_go//go:def.bzl", "GoLibrary", _go_binary = "go_binary", _go_context = "go_context", _go_embed_data = "go_embed_data", _go_library = "go_library", _go_path = "go_path", _go_test = "go_test") load("@io_bazel_rules_go//proto:def.bzl", _go_grpc_library = "go_grpc_library", _go_proto_library = "go_proto_library") @@ -11,6 +12,7 @@ load("@pydeps//:requirements.bzl", _py_requirement = "requirement") load("@com_github_grpc_grpc//bazel:cc_grpc_library.bzl", _cc_grpc_library = "cc_grpc_library") build_test = _build_test +bzl_library = _bzl_library cc_library = _cc_library cc_flags_supplier = _cc_flags_supplier cc_proto_library = _cc_proto_library diff --git a/tools/defs.bzl b/tools/defs.bzl index e35e29634..b64d735e6 100644 --- a/tools/defs.bzl +++ b/tools/defs.bzl @@ -7,13 +7,14 @@ change for Google-internal and bazel-compatible rules. load("//tools/go_stateify:defs.bzl", "go_stateify") load("//tools/go_marshal:defs.bzl", "go_marshal", "marshal_deps", "marshal_test_deps") -load("//tools/bazeldefs:defs.bzl", _build_test = "build_test", _cc_binary = "cc_binary", _cc_flags_supplier = "cc_flags_supplier", _cc_grpc_library = "cc_grpc_library", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test", _cc_toolchain = "cc_toolchain", _default_installer = "default_installer", _default_net_util = "default_net_util", _gazelle = "gazelle", _gbenchmark = "gbenchmark", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_grpc_and_proto_libraries = "go_grpc_and_proto_libraries", _go_library = "go_library", _go_path = "go_path", _go_proto_library = "go_proto_library", _go_test = "go_test", _grpcpp = "grpcpp", _gtest = "gtest", _loopback = "loopback", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar", _proto_library = "proto_library", _py_binary = "py_binary", _py_library = "py_library", _py_requirement = "py_requirement", _py_test = "py_test", _rbe_platform = "rbe_platform", _rbe_toolchain = "rbe_toolchain", _select_arch = "select_arch", _select_system = "select_system", _short_path = "short_path", _vdso_linker_option = "vdso_linker_option") +load("//tools/bazeldefs:defs.bzl", _build_test = "build_test", _bzl_library = "bzl_library", _cc_binary = "cc_binary", _cc_flags_supplier = "cc_flags_supplier", _cc_grpc_library = "cc_grpc_library", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test", _cc_toolchain = "cc_toolchain", _default_installer = "default_installer", _default_net_util = "default_net_util", _gazelle = "gazelle", _gbenchmark = "gbenchmark", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_grpc_and_proto_libraries = "go_grpc_and_proto_libraries", _go_library = "go_library", _go_path = "go_path", _go_proto_library = "go_proto_library", _go_test = "go_test", _grpcpp = "grpcpp", _gtest = "gtest", _loopback = "loopback", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar", _proto_library = "proto_library", _py_binary = "py_binary", _py_library = "py_library", _py_requirement = "py_requirement", _py_test = "py_test", _rbe_platform = "rbe_platform", _rbe_toolchain = "rbe_toolchain", _select_arch = "select_arch", _select_system = "select_system", _short_path = "short_path", _vdso_linker_option = "vdso_linker_option") load("//tools/bazeldefs:platforms.bzl", _default_platform = "default_platform", _platforms = "platforms") load("//tools/bazeldefs:tags.bzl", "go_suffixes") load("//tools/nogo:defs.bzl", "nogo_test") # Delegate directly. build_test = _build_test +bzl_library = _bzl_library cc_binary = _cc_binary cc_flags_supplier = _cc_flags_supplier cc_grpc_library = _cc_grpc_library diff --git a/tools/go_generics/BUILD b/tools/go_generics/BUILD index 558826bf1..807c08ead 100644 --- a/tools/go_generics/BUILD +++ b/tools/go_generics/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "go_binary") +load("//tools:defs.bzl", "bzl_library", "go_binary") package(licenses = ["notice"]) @@ -12,3 +12,9 @@ go_binary( visibility = ["//:sandbox"], deps = ["//tools/go_generics/globals"], ) + +bzl_library( + name = "defs_bzl", + srcs = ["defs.bzl"], + visibility = ["//visibility:private"], +) diff --git a/tools/go_generics/tests/BUILD b/tools/go_generics/tests/BUILD index e69de29bb..7547a6b53 100644 --- a/tools/go_generics/tests/BUILD +++ b/tools/go_generics/tests/BUILD @@ -0,0 +1,7 @@ +load("//tools:defs.bzl", "bzl_library") + +bzl_library( + name = "defs_bzl", + srcs = ["defs.bzl"], + visibility = ["//visibility:private"], +) diff --git a/tools/go_marshal/BUILD b/tools/go_marshal/BUILD index be49cf9c8..f79defea7 100644 --- a/tools/go_marshal/BUILD +++ b/tools/go_marshal/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "go_binary") +load("//tools:defs.bzl", "bzl_library", "go_binary") licenses(["notice"]) @@ -17,3 +17,9 @@ config_setting( name = "marshal_config_verbose", values = {"define": "gomarshal=verbose"}, ) + +bzl_library( + name = "defs_bzl", + srcs = ["defs.bzl"], + visibility = ["//visibility:private"], +) diff --git a/tools/go_stateify/BUILD b/tools/go_stateify/BUILD index 503cdf2e5..913558b4e 100644 --- a/tools/go_stateify/BUILD +++ b/tools/go_stateify/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "go_binary") +load("//tools:defs.bzl", "bzl_library", "go_binary") package(licenses = ["notice"]) @@ -8,3 +8,9 @@ go_binary( visibility = ["//:sandbox"], deps = ["//tools/tags"], ) + +bzl_library( + name = "defs_bzl", + srcs = ["defs.bzl"], + visibility = ["//visibility:private"], +) diff --git a/tools/nogo/BUILD b/tools/nogo/BUILD index c21b09511..e1bfb9a2c 100644 --- a/tools/nogo/BUILD +++ b/tools/nogo/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "go_library") +load("//tools:defs.bzl", "bzl_library", "go_library") package(licenses = ["notice"]) @@ -47,3 +47,9 @@ go_library( "@org_golang_x_tools//go/gcexportdata:go_tool_library", ], ) + +bzl_library( + name = "defs_bzl", + srcs = ["defs.bzl"], + visibility = ["//visibility:private"], +) diff --git a/tools/vm/BUILD b/tools/vm/BUILD index f7160c627..d95ca6c63 100644 --- a/tools/vm/BUILD +++ b/tools/vm/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "cc_binary", "gtest") +load("//tools:defs.bzl", "bzl_library", "cc_binary", "gtest") load("//tools/vm:defs.bzl", "vm_image", "vm_test") package( @@ -55,3 +55,9 @@ vm_test( shard_count = 2, targets = [":test"], ) + +bzl_library( + name = "defs_bzl", + srcs = ["defs.bzl"], + visibility = ["//visibility:private"], +) diff --git a/website/BUILD b/website/BUILD index d3bcaa09e..7b61d13c8 100644 --- a/website/BUILD +++ b/website/BUILD @@ -1,4 +1,4 @@ -load("//tools:defs.bzl", "pkg_tar") +load("//tools:defs.bzl", "bzl_library", "pkg_tar") load("//website:defs.bzl", "doc", "docs") package(licenses = ["notice"]) @@ -180,3 +180,9 @@ genrule( "rm -rf $$T", tools = ["//website/cmd/syscalldocs"], ) + +bzl_library( + name = "defs_bzl", + srcs = ["defs.bzl"], + visibility = ["//visibility:private"], +) -- cgit v1.2.3 From f20e63e31b56784c596897e86f03441f9d05f567 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Thu, 6 Aug 2020 18:13:55 -0700 Subject: Add LinkAt support to gofer Updates #1198 PiperOrigin-RevId: 325350818 --- images/basic/linktest/Dockerfile | 7 +++ images/basic/linktest/link_test.c | 93 +++++++++++++++++++++++++++++++++++ pkg/sentry/fsimpl/gofer/filesystem.go | 26 +++++++++- pkg/sentry/fsimpl/gofer/gofer.go | 2 - scripts/docker_tests.sh | 2 +- test/e2e/integration_test.go | 18 +++++++ 6 files changed, 143 insertions(+), 5 deletions(-) create mode 100644 images/basic/linktest/Dockerfile create mode 100644 images/basic/linktest/link_test.c (limited to 'test') diff --git a/images/basic/linktest/Dockerfile b/images/basic/linktest/Dockerfile new file mode 100644 index 000000000..baebc9b76 --- /dev/null +++ b/images/basic/linktest/Dockerfile @@ -0,0 +1,7 @@ +FROM ubuntu:bionic + +WORKDIR /root +COPY . . + +RUN apt-get update && apt-get install -y gcc +RUN gcc -O2 -o link_test link_test.c diff --git a/images/basic/linktest/link_test.c b/images/basic/linktest/link_test.c new file mode 100644 index 000000000..45ab00abe --- /dev/null +++ b/images/basic/linktest/link_test.c @@ -0,0 +1,93 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include + +// Basic test for linkat(2). Syscall tests requires CAP_DAC_READ_SEARCH and it +// cannot use tricks like userns as root. For this reason, run a basic link test +// to ensure some coverage. +int main(int argc, char** argv) { + const char kOldPath[] = "old.txt"; + int fd = open(kOldPath, O_RDWR | O_CREAT | O_TRUNC, 0600); + if (fd < 0) { + errx(1, "open(%s) failed", kOldPath); + } + const char kData[] = "some random content"; + if (write(fd, kData, sizeof(kData)) < 0) { + err(1, "write failed"); + } + close(fd); + + struct stat old_stat; + if (stat(kOldPath, &old_stat)) { + errx(1, "stat(%s) failed", kOldPath); + } + + const char kNewPath[] = "new.txt"; + if (link(kOldPath, kNewPath)) { + errx(1, "link(%s, %s) failed", kOldPath, kNewPath); + } + + struct stat new_stat; + if (stat(kNewPath, &new_stat)) { + errx(1, "stat(%s) failed", kNewPath); + } + + // Check that files are the same. + if (old_stat.st_dev != new_stat.st_dev) { + errx(1, "files st_dev is different, want: %lu, got: %lu", old_stat.st_dev, + new_stat.st_dev); + } + if (old_stat.st_ino != new_stat.st_ino) { + errx(1, "files st_ino is different, want: %lu, got: %lu", old_stat.st_ino, + new_stat.st_ino); + } + + // Check that link count is correct. + if (new_stat.st_nlink != old_stat.st_nlink + 1) { + errx(1, "wrong nlink, want: %lu, got: %lu", old_stat.st_nlink + 1, + new_stat.st_nlink); + } + + // Check taht contents are the same. + fd = open(kNewPath, O_RDONLY); + if (fd < 0) { + errx(1, "open(%s) failed", kNewPath); + } + char buf[sizeof(kData)] = {}; + if (read(fd, buf, sizeof(buf)) < 0) { + err(1, "read failed"); + } + close(fd); + + if (strcmp(buf, kData) != 0) { + errx(1, "file content mismatch: %s", buf); + } + + // Cleanup. + if (unlink(kNewPath)) { + errx(1, "unlink(%s) failed", kNewPath); + } + if (unlink(kOldPath)) { + errx(1, "unlink(%s) failed", kOldPath); + } + + // Success! + return 0; +} diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index e6af37d0d..582b744bb 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -15,6 +15,7 @@ package gofer import ( + "math" "sync" "sync/atomic" @@ -724,8 +725,29 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. if rp.Mount() != vd.Mount() { return syserror.EXDEV } - // 9P2000.L supports hard links, but we don't. - return syserror.EPERM + d := vd.Dentry().Impl().(*dentry) + if d.isDir() { + return syserror.EPERM + } + gid := auth.KGID(atomic.LoadUint32(&d.gid)) + uid := auth.KUID(atomic.LoadUint32(&d.uid)) + mode := linux.FileMode(atomic.LoadUint32(&d.mode)) + if err := vfs.MayLink(rp.Credentials(), mode, uid, gid); err != nil { + return err + } + if d.nlink == 0 { + return syserror.ENOENT + } + if d.nlink == math.MaxUint32 { + return syserror.EMLINK + } + if err := parent.file.link(ctx, d.file, childName); err != nil { + return err + } + + // Success! + atomic.AddUint32(&d.nlink, 1) + return nil }, nil) } diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 2e5575d8d..6ae796c6d 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -558,8 +558,6 @@ type dentry struct { // filesystem.renameMu. name string - // We don't support hard links, so each dentry maps 1:1 to an inode. - // file is the unopened p9.File that backs this dentry. file is immutable. // // If file.isNil(), this dentry represents a synthetic file, i.e. a file diff --git a/scripts/docker_tests.sh b/scripts/docker_tests.sh index 07e9f3109..be0b0a3ec 100755 --- a/scripts/docker_tests.sh +++ b/scripts/docker_tests.sh @@ -23,5 +23,5 @@ test_runsc //test/image:image_test //test/e2e:integration_test install_runsc_for_test docker --vfs2 IMAGE_FILTER="Hello|Httpd|Ruby|Stdio" -INTEGRATION_FILTER="LifeCycle|Pause|Connect|JobControl|Overlay|Exec|DirCreation/root" +INTEGRATION_FILTER="LifeCycle|Pause|Connect|JobControl|Overlay|Exec|DirCreation/root|Link" test_runsc //test/e2e:integration_test //test/image:image_test --test_filter="${IMAGE_FILTER}|${INTEGRATION_FILTER}" diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go index 6fe6d304f..71ec4791e 100644 --- a/test/e2e/integration_test.go +++ b/test/e2e/integration_test.go @@ -467,6 +467,24 @@ func TestHostOverlayfsRewindDir(t *testing.T) { } } +// Basic test for linkat(2). Syscall tests requires CAP_DAC_READ_SEARCH and it +// cannot use tricks like userns as root. For this reason, run a basic link test +// to ensure some coverage. +func TestLink(t *testing.T) { + ctx := context.Background() + d := dockerutil.MakeContainer(ctx, t) + defer d.CleanUp(ctx) + + if got, err := d.Run(ctx, dockerutil.RunOpts{ + Image: "basic/linktest", + WorkDir: "/root", + }, "./link_test"); err != nil { + t.Fatalf("docker run failed: %v", err) + } else if got != "" { + t.Errorf("test failed:\n%s", got) + } +} + func TestMain(m *testing.M) { dockerutil.EnsureSupportedDockerVersion() flag.Parse() -- cgit v1.2.3 From 8c1573ebab260a2a40c41265918df3533bd93b3d Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Fri, 7 Aug 2020 11:24:09 -0700 Subject: Tolerate EINTR from fallocate() in inotify test. PiperOrigin-RevId: 325472312 --- test/syscalls/linux/inotify.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc index 6cf398097..5cb325a9e 100644 --- a/test/syscalls/linux/inotify.cc +++ b/test/syscalls/linux/inotify.cc @@ -1693,7 +1693,7 @@ TEST(Inotify, Fallocate) { InotifyAddWatch(inotify_fd.get(), file.path(), IN_ALL_EVENTS)); // Do an arbitrary modification with fallocate. - ASSERT_THAT(fallocate(fd.get(), 0, 0, 123), SyscallSucceeds()); + ASSERT_THAT(RetryEINTR(fallocate)(fd.get(), 0, 0, 123), SyscallSucceeds()); std::vector events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); EXPECT_THAT(events, Are({Event(IN_MODIFY, wd)})); -- cgit v1.2.3 From 293f11ca95fed1b62538bb5f816e145a3b225fc7 Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Fri, 7 Aug 2020 12:05:18 -0700 Subject: [runtime tests] Exclude flaky/failing tests PiperOrigin-RevId: 325481011 --- test/runtimes/exclude_nodejs12.4.0.csv | 2 ++ test/runtimes/exclude_php7.3.6.csv | 1 + 2 files changed, 3 insertions(+) (limited to 'test') diff --git a/test/runtimes/exclude_nodejs12.4.0.csv b/test/runtimes/exclude_nodejs12.4.0.csv index 525117f5c..1d8e65fd0 100644 --- a/test/runtimes/exclude_nodejs12.4.0.csv +++ b/test/runtimes/exclude_nodejs12.4.0.csv @@ -15,6 +15,7 @@ parallel/test-fs-write-stream.js,,Flaky parallel/test-fs-write-stream-throw-type-error.js,b/110226209, parallel/test-http-writable-true-after-close.js,,Flaky - Mismatched function calls. Expected exactly 1 actual 2 parallel/test-os.js,b/63997097, +parallel/test-net-server-listen-options.js,,Flaky - EADDRINUSE parallel/test-process-uid-gid.js,, parallel/test-tls-cli-min-version-1.0.js,,Flaky - EADDRINUSE parallel/test-tls-cli-min-version-1.1.js,,Flaky - EADDRINUSE @@ -45,6 +46,7 @@ pseudo-tty/test-tty-stdout-resize.js,b/162801321, pseudo-tty/test-tty-stream-constructors.js,b/162801321, pseudo-tty/test-tty-window-size.js,b/162801321, pseudo-tty/test-tty-wrap.js,b/162801321, +pummel/test-heapdump-http2.js,,Flaky pummel/test-net-pingpong.js,, pummel/test-vm-memleak.js,b/162799436, sequential/test-child-process-pass-fd.js,b/63926391,Flaky diff --git a/test/runtimes/exclude_php7.3.6.csv b/test/runtimes/exclude_php7.3.6.csv index e828f91dd..2ce979dc8 100644 --- a/test/runtimes/exclude_php7.3.6.csv +++ b/test/runtimes/exclude_php7.3.6.csv @@ -11,6 +11,7 @@ ext/mbstring/tests/mb_strrpos_encoding_3rd_param.phpt,, ext/session/tests/session_module_name_variation4.phpt,,Flaky ext/session/tests/session_set_save_handler_class_018.phpt,, ext/session/tests/session_set_save_handler_iface_003.phpt,, +ext/session/tests/session_set_save_handler_sid_001.phpt,, ext/session/tests/session_set_save_handler_variation4.phpt,, ext/standard/tests/file/fopen_variation19.phpt,b/162894964, ext/standard/tests/file/lstat_stat_variation14.phpt,,Flaky -- cgit v1.2.3 From 93cb66825bf098f8a19b3d7f34b33272ceed8cb3 Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Fri, 7 Aug 2020 12:54:14 -0700 Subject: Support separate read/write handles in fsimpl/gofer.dentry. PiperOrigin-RevId: 325490674 --- pkg/sentry/fsimpl/gofer/BUILD | 1 + pkg/sentry/fsimpl/gofer/directory.go | 12 +- pkg/sentry/fsimpl/gofer/filesystem.go | 18 +- pkg/sentry/fsimpl/gofer/gofer.go | 308 +++++++++++++++++++++----------- pkg/sentry/fsimpl/gofer/gofer_test.go | 4 +- pkg/sentry/fsimpl/gofer/handle.go | 19 +- pkg/sentry/fsimpl/gofer/regular_file.go | 118 ++++++------ pkg/sentry/fsimpl/gofer/special_file.go | 11 +- pkg/sentry/vfs/file_description.go | 5 + test/syscalls/BUILD | 1 + 10 files changed, 312 insertions(+), 185 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD index 4a800dcf9..16787116f 100644 --- a/pkg/sentry/fsimpl/gofer/BUILD +++ b/pkg/sentry/fsimpl/gofer/BUILD @@ -85,5 +85,6 @@ go_test( deps = [ "//pkg/p9", "//pkg/sentry/contexttest", + "//pkg/sentry/pgalloc", ], ) diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go index 1679066ba..2a8011eb4 100644 --- a/pkg/sentry/fsimpl/gofer/directory.go +++ b/pkg/sentry/fsimpl/gofer/directory.go @@ -90,10 +90,8 @@ func (d *dentry) createSyntheticChildLocked(opts *createSyntheticOpts) { uid: uint32(opts.kuid), gid: uint32(opts.kgid), blockSize: usermem.PageSize, // arbitrary - handle: handle{ - fd: -1, - }, - nlink: uint32(2), + hostFD: -1, + nlink: uint32(2), } switch opts.mode.FileType() { case linux.S_IFDIR: @@ -205,14 +203,14 @@ func (d *dentry) getDirents(ctx context.Context) ([]vfs.Dirent, error) { off := uint64(0) const count = 64 * 1024 // for consistency with the vfs1 client d.handleMu.RLock() - if !d.handleReadable { + if d.readFile.isNil() { // This should not be possible because a readable handle should // have been opened when the calling directoryFD was opened. d.handleMu.RUnlock() panic("gofer.dentry.getDirents called without a readable handle") } for { - p9ds, err := d.handle.file.readdir(ctx, off, count) + p9ds, err := d.readFile.readdir(ctx, off, count) if err != nil { d.handleMu.RUnlock() return nil, err @@ -304,5 +302,5 @@ func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (in // Sync implements vfs.FileDescriptionImpl.Sync. func (fd *directoryFD) Sync(ctx context.Context) error { - return fd.dentry().handle.sync(ctx) + return fd.dentry().syncRemoteFile(ctx) } diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 582b744bb..eaef2594d 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -55,7 +55,7 @@ func (fs *filesystem) Sync(ctx context.Context) error { // Sync regular files. for _, d := range ds { - err := d.syncSharedHandle(ctx) + err := d.syncCachedFile(ctx) d.DecRef(ctx) if err != nil && retErr == nil { retErr = err @@ -1107,12 +1107,18 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving useRegularFileFD := child.fileType() == linux.S_IFREG && !d.fs.opts.regularFilesUseSpecialFileFD if useRegularFileFD { child.handleMu.Lock() - child.handle.file = openFile - if fdobj != nil { - child.handle.fd = int32(fdobj.Release()) + if vfs.MayReadFileWithOpenFlags(opts.Flags) { + child.readFile = openFile + if fdobj != nil { + child.hostFD = int32(fdobj.Release()) + } + } else if fdobj != nil { + // Can't use fdobj if it's not readable. + fdobj.Close() + } + if vfs.MayWriteFileWithOpenFlags(opts.Flags) { + child.writeFile = openFile } - child.handleReadable = vfs.MayReadFileWithOpenFlags(opts.Flags) - child.handleWritable = vfs.MayWriteFileWithOpenFlags(opts.Flags) child.handleMu.Unlock() } // Insert the dentry into the tree. diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 59323086d..f1d3bf911 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -506,9 +506,9 @@ func (fs *filesystem) Release(ctx context.Context) { for d := range fs.syncableDentries { d.handleMu.Lock() d.dataMu.Lock() - if d.handleWritable { + if h := d.writeHandleLocked(); h.isOpen() { // Write dirty cached data to the remote file. - if err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, fs.mfp.MemoryFile(), d.handle.writeFromBlocksAt); err != nil { + if err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, fs.mfp.MemoryFile(), h.writeFromBlocksAt); err != nil { log.Warningf("gofer.filesystem.Release: failed to flush dentry: %v", err) } // TODO(jamieliu): Do we need to flushf/fsync d? @@ -518,9 +518,9 @@ func (fs *filesystem) Release(ctx context.Context) { d.dirty.RemoveAll() d.dataMu.Unlock() // Close the host fd if one exists. - if d.handle.fd >= 0 { - syscall.Close(int(d.handle.fd)) - d.handle.fd = -1 + if d.hostFD >= 0 { + syscall.Close(int(d.hostFD)) + d.hostFD = -1 } d.handleMu.Unlock() } @@ -622,7 +622,12 @@ type dentry struct { mtime int64 ctime int64 btime int64 - // File size, protected by both metadataMu and dataMu (i.e. both must be + // File size, which differs from other metadata in two ways: + // + // - We make a best-effort attempt to keep it up to date even if + // !dentry.cachedMetadataAuthoritative() for the sake of O_APPEND writes. + // + // - size is protected by both metadataMu and dataMu (i.e. both must be // locked to mutate it; locking either is sufficient to access it). size uint64 // If this dentry does not represent a synthetic file, deleted is 0, and @@ -643,30 +648,28 @@ type dentry struct { // the file into memmap.MappingSpaces. mappings is protected by mapsMu. mappings memmap.MappingSet - // If this dentry represents a regular file or directory: - // - // - handle is the I/O handle used by all regularFileFDs/directoryFDs - // representing this dentry. - // - // - handleReadable is true if handle is readable. - // - // - handleWritable is true if handle is writable. - // - // Invariants: + // - If this dentry represents a regular file or directory, readFile is the + // p9.File used for reads by all regularFileFDs/directoryFDs representing + // this dentry. // - // - If handleReadable == handleWritable == false, then handle.file == nil - // (i.e. there is no open handle). Conversely, if handleReadable || - // handleWritable == true, then handle.file != nil (i.e. there is an open - // handle). + // - If this dentry represents a regular file, writeFile is the p9.File + // used for writes by all regularFileFDs representing this dentry. // - // - handleReadable and handleWritable cannot transition from true to false - // (i.e. handles may not be downgraded). + // - If this dentry represents a regular file, hostFD is the host FD used + // for memory mappings and I/O (when applicable) in preference to readFile + // and writeFile. hostFD is always readable; if !writeFile.isNil(), it must + // also be writable. If hostFD is -1, no such host FD is available. // // These fields are protected by handleMu. - handleMu sync.RWMutex - handle handle - handleReadable bool - handleWritable bool + // + // readFile and writeFile may or may not represent the same p9.File. Once + // either p9.File transitions from closed (isNil() == true) to open + // (isNil() == false), it may be mutated with handleMu locked, but cannot + // be closed until the dentry is destroyed. + handleMu sync.RWMutex + readFile p9file + writeFile p9file + hostFD int32 dataMu sync.RWMutex @@ -680,7 +683,7 @@ type dentry struct { // tracks dirty segments in cache. dirty is protected by dataMu. dirty fsutil.DirtySet - // pf implements platform.File for mappings of handle.fd. + // pf implements platform.File for mappings of hostFD. pf dentryPlatformFile // If this dentry represents a symbolic link, InteropModeShared is not in @@ -742,9 +745,7 @@ func (fs *filesystem) newDentry(ctx context.Context, file p9file, qid p9.QID, ma uid: uint32(fs.opts.dfltuid), gid: uint32(fs.opts.dfltgid), blockSize: usermem.PageSize, - handle: handle{ - fd: -1, - }, + hostFD: -1, } d.pf.dentry = d if mask.UID { @@ -835,9 +836,13 @@ func (d *dentry) updateFromP9AttrsLocked(mask p9.AttrMask, attr *p9.Attr) { // Preconditions: !d.isSynthetic() func (d *dentry) updateFromGetattr(ctx context.Context) error { - // Use d.handle.file, which represents a 9P fid that has been opened, in - // preference to d.file, which represents a 9P fid that has not. This may - // be significantly more efficient in some implementations. + // Use d.readFile or d.writeFile, which represent 9P fids that have been + // opened, in preference to d.file, which represents a 9P fid that has not. + // This may be significantly more efficient in some implementations. Prefer + // d.writeFile over d.readFile since some filesystem implementations may + // update a writable handle's metadata after writes to that handle, without + // making metadata updates immediately visible to read-only handles + // representing the same file. var ( file p9file handleMuRLocked bool @@ -847,8 +852,11 @@ func (d *dentry) updateFromGetattr(ctx context.Context) error { d.metadataMu.Lock() defer d.metadataMu.Unlock() d.handleMu.RLock() - if !d.handle.file.isNil() { - file = d.handle.file + if !d.writeFile.isNil() { + file = d.writeFile + handleMuRLocked = true + } else if !d.readFile.isNil() { + file = d.readFile handleMuRLocked = true } else { file = d.file @@ -973,8 +981,9 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs return err } if stat.Mask&linux.STATX_SIZE != 0 { - // Privatized copy-on-write mappings of truncated pages need to - // be invalidated even if InteropModeShared is in effect. + // d.size should be kept up to date, and privatized + // copy-on-write mappings of truncated pages need to be + // invalidated, even if InteropModeShared is in effect. d.updateFileSizeLocked(stat.Size) } } @@ -1245,22 +1254,31 @@ func (d *dentry) destroyLocked(ctx context.Context) { panic("dentry.destroyLocked() called with references on the dentry") } + mf := d.fs.mfp.MemoryFile() d.handleMu.Lock() - if !d.handle.file.isNil() { - mf := d.fs.mfp.MemoryFile() - d.dataMu.Lock() + d.dataMu.Lock() + if h := d.writeHandleLocked(); h.isOpen() { // Write dirty pages back to the remote filesystem. - if d.handleWritable { - if err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, mf, d.handle.writeFromBlocksAt); err != nil { - log.Warningf("gofer.dentry.destroyLocked: failed to write dirty data back: %v", err) - } + if err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, mf, h.writeFromBlocksAt); err != nil { + log.Warningf("gofer.dentry.destroyLocked: failed to write dirty data back: %v", err) } - // Discard cached data. - d.cache.DropAll(mf) - d.dirty.RemoveAll() - d.dataMu.Unlock() - // Clunk open fids and close open host FDs. - d.handle.close(ctx) + } + // Discard cached data. + d.cache.DropAll(mf) + d.dirty.RemoveAll() + d.dataMu.Unlock() + // Clunk open fids and close open host FDs. + if !d.readFile.isNil() { + d.readFile.close(ctx) + } + if !d.writeFile.isNil() && d.readFile != d.writeFile { + d.writeFile.close(ctx) + } + d.readFile = p9file{} + d.writeFile = p9file{} + if d.hostFD >= 0 { + syscall.Close(int(d.hostFD)) + d.hostFD = -1 } d.handleMu.Unlock() @@ -1393,80 +1411,120 @@ func (d *dentry) ensureSharedHandle(ctx context.Context, read, write, trunc bool // O_TRUNC). if !trunc { d.handleMu.RLock() - if (!read || d.handleReadable) && (!write || d.handleWritable) { - // The current handle is sufficient. + if (!read || !d.readFile.isNil()) && (!write || !d.writeFile.isNil()) { + // Current handles are sufficient. d.handleMu.RUnlock() return nil } d.handleMu.RUnlock() } - haveOldFD := false + fdToClose := int32(-1) + invalidateTranslations := false d.handleMu.Lock() - if (read && !d.handleReadable) || (write && !d.handleWritable) || trunc { - // Get a new handle. - wantReadable := d.handleReadable || read - wantWritable := d.handleWritable || write - h, err := openHandle(ctx, d.file, wantReadable, wantWritable, trunc) + if (read && d.readFile.isNil()) || (write && d.writeFile.isNil()) || trunc { + // Get a new handle. If this file has been opened for both reading and + // writing, try to get a single handle that is usable for both: + // + // - Writable memory mappings of a host FD require that the host FD is + // opened for both reading and writing. + // + // - NOTE(b/141991141): Some filesystems may not ensure coherence + // between multiple handles for the same file. + openReadable := !d.readFile.isNil() || read + openWritable := !d.writeFile.isNil() || write + h, err := openHandle(ctx, d.file, openReadable, openWritable, trunc) + if err == syserror.EACCES && (openReadable != read || openWritable != write) { + // It may not be possible to use a single handle for both + // reading and writing, since permissions on the file may have + // changed to e.g. disallow reading after previously being + // opened for reading. In this case, we have no choice but to + // use separate handles for reading and writing. + ctx.Debugf("gofer.dentry.ensureSharedHandle: bifurcating read/write handles for dentry %p", d) + openReadable = read + openWritable = write + h, err = openHandle(ctx, d.file, openReadable, openWritable, trunc) + } if err != nil { d.handleMu.Unlock() return err } - if !d.handle.file.isNil() { - // Check that old and new handles are compatible: If the old handle - // includes a host file descriptor but the new one does not, or - // vice versa, old and new memory mappings may be incoherent. - haveOldFD = d.handle.fd >= 0 - haveNewFD := h.fd >= 0 - if haveOldFD != haveNewFD { - d.handleMu.Unlock() - ctx.Warningf("gofer.dentry.ensureSharedHandle: can't change host FD availability from %v to %v across dentry handle upgrade", haveOldFD, haveNewFD) - h.close(ctx) - return syserror.EIO - } - if haveOldFD { - // We may have raced with callers of d.pf.FD() that are now - // using the old file descriptor, preventing us from safely - // closing it. We could handle this by invalidating existing - // memmap.Translations, but this is expensive. Instead, use - // dup3 to make the old file descriptor refer to the new file - // description, then close the new file descriptor (which is no - // longer needed). Racing callers may use the old or new file - // description, but this doesn't matter since they refer to the - // same file (unless d.fs.opts.overlayfsStaleRead is true, - // which we handle separately). - if err := syscall.Dup3(int(h.fd), int(d.handle.fd), syscall.O_CLOEXEC); err != nil { + + if d.hostFD < 0 && openReadable && h.fd >= 0 { + // We have no existing FD; use the new FD for at least reading. + d.hostFD = h.fd + } else if d.hostFD >= 0 && d.writeFile.isNil() && openWritable { + // We have an existing read-only FD, but the file has just been + // opened for writing, so we need to start supporting writable memory + // mappings. This may race with callers of d.pf.FD() using the existing + // FD, so in most cases we need to delay closing the old FD until after + // invalidating memmap.Translations that might have observed it. + if !openReadable || h.fd < 0 { + // We don't have a read/write FD, so we have no FD that can be + // used to create writable memory mappings. Switch to using the + // internal page cache. + invalidateTranslations = true + fdToClose = d.hostFD + d.hostFD = -1 + } else if d.fs.opts.overlayfsStaleRead { + // We do have a read/write FD, but it may not be coherent with + // the existing read-only FD, so we must switch to mappings of + // the new FD in both the application and sentry. + if err := d.pf.hostFileMapper.RegenerateMappings(int(h.fd)); err != nil { d.handleMu.Unlock() - ctx.Warningf("gofer.dentry.ensureSharedHandle: failed to dup fd %d to fd %d: %v", h.fd, d.handle.fd, err) + ctx.Warningf("gofer.dentry.ensureSharedHandle: failed to replace sentry mappings of old FD with mappings of new FD: %v", err) h.close(ctx) return err } - syscall.Close(int(h.fd)) - h.fd = d.handle.fd - if d.fs.opts.overlayfsStaleRead { - // Replace sentry mappings of the old FD with mappings of - // the new FD, since the two are not necessarily coherent. - if err := d.pf.hostFileMapper.RegenerateMappings(int(h.fd)); err != nil { - d.handleMu.Unlock() - ctx.Warningf("gofer.dentry.ensureSharedHandle: failed to replace sentry mappings of old FD with mappings of new FD: %v", err) - h.close(ctx) - return err - } + invalidateTranslations = true + fdToClose = d.hostFD + d.hostFD = h.fd + } else { + // We do have a read/write FD. To avoid invalidating existing + // memmap.Translations (which is expensive), use dup3 to make + // the old file descriptor refer to the new file description, + // then close the new file descriptor (which is no longer + // needed). Racing callers of d.pf.FD() may use the old or new + // file description, but this doesn't matter since they refer + // to the same file, and any racing mappings must be read-only. + if err := syscall.Dup3(int(h.fd), int(d.hostFD), syscall.O_CLOEXEC); err != nil { + oldHostFD := d.hostFD + d.handleMu.Unlock() + ctx.Warningf("gofer.dentry.ensureSharedHandle: failed to dup fd %d to fd %d: %v", h.fd, oldHostFD, err) + h.close(ctx) + return err } - // Clunk the old fid before making the new handle visible (by - // unlocking d.handleMu). - d.handle.file.close(ctx) + fdToClose = h.fd } + } else { + // h.fd is not useful. + fdToClose = h.fd + } + + // Switch to new fids. + var oldReadFile p9file + if openReadable { + oldReadFile = d.readFile + d.readFile = h.file + } + var oldWriteFile p9file + if openWritable { + oldWriteFile = d.writeFile + d.writeFile = h.file + } + // NOTE(b/141991141): Clunk old fids before making new fids visible (by + // unlocking d.handleMu). + if !oldReadFile.isNil() { + oldReadFile.close(ctx) + } + if !oldWriteFile.isNil() && oldReadFile != oldWriteFile { + oldWriteFile.close(ctx) } - // Switch to the new handle. - d.handle = h - d.handleReadable = wantReadable - d.handleWritable = wantWritable } d.handleMu.Unlock() - if d.fs.opts.overlayfsStaleRead && haveOldFD { - // Invalidate application mappings that may be using the old FD; they + if invalidateTranslations { + // Invalidate application mappings that may be using an old FD; they // will be replaced with mappings using the new FD after future calls // to d.Translate(). This requires holding d.mapsMu, which precedes // d.handleMu in the lock order. @@ -1474,7 +1532,51 @@ func (d *dentry) ensureSharedHandle(ctx context.Context, read, write, trunc bool d.mappings.InvalidateAll(memmap.InvalidateOpts{}) d.mapsMu.Unlock() } + if fdToClose >= 0 { + syscall.Close(int(fdToClose)) + } + + return nil +} +// Preconditions: d.handleMu must be locked. +func (d *dentry) readHandleLocked() handle { + return handle{ + file: d.readFile, + fd: d.hostFD, + } +} + +// Preconditions: d.handleMu must be locked. +func (d *dentry) writeHandleLocked() handle { + return handle{ + file: d.writeFile, + fd: d.hostFD, + } +} + +func (d *dentry) syncRemoteFile(ctx context.Context) error { + d.handleMu.RLock() + defer d.handleMu.RUnlock() + return d.syncRemoteFileLocked(ctx) +} + +// Preconditions: d.handleMu must be locked. +func (d *dentry) syncRemoteFileLocked(ctx context.Context) error { + // If we have a host FD, fsyncing it is likely to be faster than an fsync + // RPC. + if d.hostFD >= 0 { + ctx.UninterruptibleSleepStart(false) + err := syscall.Fsync(int(d.hostFD)) + ctx.UninterruptibleSleepFinish(false) + return err + } + if !d.writeFile.isNil() { + return d.writeFile.fsync(ctx) + } + if !d.readFile.isNil() { + return d.readFile.fsync(ctx) + } return nil } diff --git a/pkg/sentry/fsimpl/gofer/gofer_test.go b/pkg/sentry/fsimpl/gofer/gofer_test.go index 56d80bcf8..36cca3625 100644 --- a/pkg/sentry/fsimpl/gofer/gofer_test.go +++ b/pkg/sentry/fsimpl/gofer/gofer_test.go @@ -20,10 +20,13 @@ import ( "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/sentry/contexttest" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" ) func TestDestroyIdempotent(t *testing.T) { + ctx := contexttest.Context(t) fs := filesystem{ + mfp: pgalloc.MemoryFileProviderFromContext(ctx), syncableDentries: make(map[*dentry]struct{}), opts: filesystemOptions{ // Test relies on no dentry being held in the cache. @@ -31,7 +34,6 @@ func TestDestroyIdempotent(t *testing.T) { }, } - ctx := contexttest.Context(t) attr := &p9.Attr{ Mode: p9.ModeRegular, } diff --git a/pkg/sentry/fsimpl/gofer/handle.go b/pkg/sentry/fsimpl/gofer/handle.go index 8792ca4f2..104157512 100644 --- a/pkg/sentry/fsimpl/gofer/handle.go +++ b/pkg/sentry/fsimpl/gofer/handle.go @@ -63,6 +63,10 @@ func openHandle(ctx context.Context, file p9file, read, write, trunc bool) (hand }, nil } +func (h *handle) isOpen() bool { + return !h.file.isNil() +} + func (h *handle) close(ctx context.Context) { h.file.close(ctx) h.file = p9file{} @@ -124,18 +128,3 @@ func (h *handle) writeFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, o } return cp, cperr } - -func (h *handle) sync(ctx context.Context) error { - // Handle most common case first. - if h.fd >= 0 { - ctx.UninterruptibleSleepStart(false) - err := syscall.Fsync(int(h.fd)) - ctx.UninterruptibleSleepFinish(false) - return err - } - if h.file.isNil() { - // File hasn't been touched, there is nothing to sync. - return nil - } - return h.file.fsync(ctx) -} diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go index db6bed4f6..7e1cbf065 100644 --- a/pkg/sentry/fsimpl/gofer/regular_file.go +++ b/pkg/sentry/fsimpl/gofer/regular_file.go @@ -64,34 +64,34 @@ func (fd *regularFileFD) OnClose(ctx context.Context) error { } d.handleMu.RLock() defer d.handleMu.RUnlock() - return d.handle.file.flush(ctx) + if d.writeFile.isNil() { + return nil + } + return d.writeFile.flush(ctx) } // Allocate implements vfs.FileDescriptionImpl.Allocate. func (fd *regularFileFD) Allocate(ctx context.Context, mode, offset, length uint64) error { - d := fd.dentry() d.metadataMu.Lock() defer d.metadataMu.Unlock() - size := offset + length - // Allocating a smaller size is a noop. - if size <= d.size { + size := offset + length + if d.cachedMetadataAuthoritative() && size <= d.size { return nil } - d.handleMu.Lock() - defer d.handleMu.Unlock() - - err := d.handle.file.allocate(ctx, p9.ToAllocateMode(mode), offset, length) + d.handleMu.RLock() + err := d.writeFile.allocate(ctx, p9.ToAllocateMode(mode), offset, length) + d.handleMu.RUnlock() if err != nil { return err } d.dataMu.Lock() atomic.StoreUint64(&d.size, size) d.dataMu.Unlock() - if !d.cachedMetadataAuthoritative() { + if d.cachedMetadataAuthoritative() { d.touchCMtimeLocked() } return nil @@ -113,7 +113,7 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs // Check for reading at EOF before calling into MM (but not under // InteropModeShared, which makes d.size unreliable). d := fd.dentry() - if d.fs.opts.interop != InteropModeShared && uint64(offset) >= atomic.LoadUint64(&d.size) { + if d.cachedMetadataAuthoritative() && uint64(offset) >= atomic.LoadUint64(&d.size) { return 0, io.EOF } @@ -217,16 +217,23 @@ func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off n, err := src.CopyInTo(ctx, rw) if err != nil { - return n, offset, err + return n, offset + n, err } if n > 0 && fd.vfsfd.StatusFlags()&(linux.O_DSYNC|linux.O_SYNC) != 0 { - // Write dirty cached pages touched by the write back to the remote file. + // Note that if any of the following fail, then we can't guarantee that + // any data was actually written with the semantics of O_DSYNC or + // O_SYNC, so we return zero bytes written. Compare Linux's + // mm/filemap.c:generic_file_write_iter() => + // include/linux/fs.h:generic_write_sync(). + // + // Write dirty cached pages touched by the write back to the remote + // file. if err := d.writeback(ctx, offset, src.NumBytes()); err != nil { - return n, offset, err + return 0, offset, err } // Request the remote filesystem to sync the remote file. - if err := d.handle.sync(ctx); err != nil { - return n, offset, err + if err := d.syncRemoteFile(ctx); err != nil { + return 0, offset, err } } return n, offset + n, nil @@ -317,10 +324,11 @@ func (rw *dentryReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) // coherence with memory-mapped I/O), or if InteropModeShared is in effect // (which prevents us from caching file contents and makes dentry.size // unreliable), or if the file was opened O_DIRECT, read directly from - // dentry.handle without locking dentry.dataMu. + // dentry.readHandleLocked() without locking dentry.dataMu. rw.d.handleMu.RLock() - if (rw.d.handle.fd >= 0 && !rw.d.fs.opts.forcePageCache) || rw.d.fs.opts.interop == InteropModeShared || rw.direct { - n, err := rw.d.handle.readToBlocksAt(rw.ctx, dsts, rw.off) + h := rw.d.readHandleLocked() + if (rw.d.hostFD >= 0 && !rw.d.fs.opts.forcePageCache) || rw.d.fs.opts.interop == InteropModeShared || rw.direct { + n, err := h.readToBlocksAt(rw.ctx, dsts, rw.off) rw.d.handleMu.RUnlock() rw.off += n return n, err @@ -388,7 +396,7 @@ func (rw *dentryReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) End: gapEnd, } optMR := gap.Range() - err := rw.d.cache.Fill(rw.ctx, reqMR, maxFillRange(reqMR, optMR), mf, usage.PageCache, rw.d.handle.readToBlocksAt) + err := rw.d.cache.Fill(rw.ctx, reqMR, maxFillRange(reqMR, optMR), mf, usage.PageCache, h.readToBlocksAt) mf.MarkEvictable(rw.d, pgalloc.EvictableRange{optMR.Start, optMR.End}) seg, gap = rw.d.cache.Find(rw.off) if !seg.Ok() { @@ -403,7 +411,7 @@ func (rw *dentryReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) } else { // Read directly from the file. gapDsts := dsts.TakeFirst64(gapMR.Length()) - n, err := rw.d.handle.readToBlocksAt(rw.ctx, gapDsts, gapMR.Start) + n, err := h.readToBlocksAt(rw.ctx, gapDsts, gapMR.Start) done += n rw.off += n dsts = dsts.DropFirst64(n) @@ -435,11 +443,12 @@ func (rw *dentryReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, erro // If we have a mmappable host FD (which must be used here to ensure // coherence with memory-mapped I/O), or if InteropModeShared is in effect // (which prevents us from caching file contents), or if the file was - // opened with O_DIRECT, write directly to dentry.handle without locking - // dentry.dataMu. + // opened with O_DIRECT, write directly to dentry.writeHandleLocked() + // without locking dentry.dataMu. rw.d.handleMu.RLock() - if (rw.d.handle.fd >= 0 && !rw.d.fs.opts.forcePageCache) || rw.d.fs.opts.interop == InteropModeShared || rw.direct { - n, err := rw.d.handle.writeFromBlocksAt(rw.ctx, srcs, rw.off) + h := rw.d.writeHandleLocked() + if (rw.d.hostFD >= 0 && !rw.d.fs.opts.forcePageCache) || rw.d.fs.opts.interop == InteropModeShared || rw.direct { + n, err := h.writeFromBlocksAt(rw.ctx, srcs, rw.off) rw.off += n rw.d.dataMu.Lock() if rw.off > rw.d.size { @@ -501,7 +510,7 @@ func (rw *dentryReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, erro // for detecting or avoiding this. gapMR := gap.Range().Intersect(mr) gapSrcs := srcs.TakeFirst64(gapMR.Length()) - n, err := rw.d.handle.writeFromBlocksAt(rw.ctx, gapSrcs, gapMR.Start) + n, err := h.writeFromBlocksAt(rw.ctx, gapSrcs, gapMR.Start) done += n rw.off += n srcs = srcs.DropFirst64(n) @@ -527,7 +536,7 @@ exitLoop: if err := fsutil.SyncDirty(rw.ctx, memmap.MappableRange{ Start: start, End: rw.off, - }, &rw.d.cache, &rw.d.dirty, rw.d.size, mf, rw.d.handle.writeFromBlocksAt); err != nil { + }, &rw.d.cache, &rw.d.dirty, rw.d.size, mf, h.writeFromBlocksAt); err != nil { // We have no idea how many bytes were actually flushed. rw.off = start done = 0 @@ -545,6 +554,7 @@ func (d *dentry) writeback(ctx context.Context, offset, size int64) error { } d.handleMu.RLock() defer d.handleMu.RUnlock() + h := d.writeHandleLocked() d.dataMu.Lock() defer d.dataMu.Unlock() // Compute the range of valid bytes (overflow-checked). @@ -558,7 +568,7 @@ func (d *dentry) writeback(ctx context.Context, offset, size int64) error { return fsutil.SyncDirty(ctx, memmap.MappableRange{ Start: uint64(offset), End: uint64(end), - }, &d.cache, &d.dirty, d.size, d.fs.mfp.MemoryFile(), d.handle.writeFromBlocksAt) + }, &d.cache, &d.dirty, d.size, d.fs.mfp.MemoryFile(), h.writeFromBlocksAt) } // Seek implements vfs.FileDescriptionImpl.Seek. @@ -615,24 +625,23 @@ func regularFileSeekLocked(ctx context.Context, d *dentry, fdOffset, offset int6 // Sync implements vfs.FileDescriptionImpl.Sync. func (fd *regularFileFD) Sync(ctx context.Context) error { - return fd.dentry().syncSharedHandle(ctx) + return fd.dentry().syncCachedFile(ctx) } -func (d *dentry) syncSharedHandle(ctx context.Context) error { +func (d *dentry) syncCachedFile(ctx context.Context) error { d.handleMu.RLock() defer d.handleMu.RUnlock() - if d.handleWritable { + if h := d.writeHandleLocked(); h.isOpen() { d.dataMu.Lock() // Write dirty cached data to the remote file. - err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, d.fs.mfp.MemoryFile(), d.handle.writeFromBlocksAt) + err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, d.fs.mfp.MemoryFile(), h.writeFromBlocksAt) d.dataMu.Unlock() if err != nil { return err } } - // Sync the remote file. - return d.handle.sync(ctx) + return d.syncRemoteFileLocked(ctx) } // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. @@ -656,7 +665,7 @@ func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpt return syserror.ENODEV } d.handleMu.RLock() - haveFD := d.handle.fd >= 0 + haveFD := d.hostFD >= 0 d.handleMu.RUnlock() if !haveFD { return syserror.ENODEV @@ -677,7 +686,7 @@ func (d *dentry) mayCachePages() bool { return true } d.handleMu.RLock() - haveFD := d.handle.fd >= 0 + haveFD := d.hostFD >= 0 d.handleMu.RUnlock() return haveFD } @@ -735,7 +744,7 @@ func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, // Translate implements memmap.Mappable.Translate. func (d *dentry) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { d.handleMu.RLock() - if d.handle.fd >= 0 && !d.fs.opts.forcePageCache { + if d.hostFD >= 0 && !d.fs.opts.forcePageCache { d.handleMu.RUnlock() mr := optional if d.fs.opts.limitHostFDTranslation { @@ -771,7 +780,8 @@ func (d *dentry) Translate(ctx context.Context, required, optional memmap.Mappab } mf := d.fs.mfp.MemoryFile() - cerr := d.cache.Fill(ctx, required, maxFillRange(required, optional), mf, usage.PageCache, d.handle.readToBlocksAt) + h := d.readHandleLocked() + cerr := d.cache.Fill(ctx, required, maxFillRange(required, optional), mf, usage.PageCache, h.readToBlocksAt) var ts []memmap.Translation var translatedEnd uint64 @@ -840,9 +850,12 @@ func (d *dentry) InvalidateUnsavable(ctx context.Context) error { // Write the cache's contents back to the remote file so that if we have a // host fd after restore, the remote file's contents are coherent. mf := d.fs.mfp.MemoryFile() + d.handleMu.RLock() + defer d.handleMu.RUnlock() + h := d.writeHandleLocked() d.dataMu.Lock() defer d.dataMu.Unlock() - if err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, mf, d.handle.writeFromBlocksAt); err != nil { + if err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, mf, h.writeFromBlocksAt); err != nil { return err } @@ -857,20 +870,23 @@ func (d *dentry) InvalidateUnsavable(ctx context.Context) error { // Evict implements pgalloc.EvictableMemoryUser.Evict. func (d *dentry) Evict(ctx context.Context, er pgalloc.EvictableRange) { + mr := memmap.MappableRange{er.Start, er.End} + mf := d.fs.mfp.MemoryFile() d.mapsMu.Lock() defer d.mapsMu.Unlock() + d.handleMu.RLock() + defer d.handleMu.RUnlock() + h := d.writeHandleLocked() d.dataMu.Lock() defer d.dataMu.Unlock() - mr := memmap.MappableRange{er.Start, er.End} - mf := d.fs.mfp.MemoryFile() // Only allow pages that are no longer memory-mapped to be evicted. for mgap := d.mappings.LowerBoundGap(mr.Start); mgap.Ok() && mgap.Start() < mr.End; mgap = mgap.NextGap() { mgapMR := mgap.Range().Intersect(mr) if mgapMR.Length() == 0 { continue } - if err := fsutil.SyncDirty(ctx, mgapMR, &d.cache, &d.dirty, d.size, mf, d.handle.writeFromBlocksAt); err != nil { + if err := fsutil.SyncDirty(ctx, mgapMR, &d.cache, &d.dirty, d.size, mf, h.writeFromBlocksAt); err != nil { log.Warningf("Failed to writeback cached data %v: %v", mgapMR, err) } d.cache.Drop(mgapMR, mf) @@ -882,8 +898,8 @@ func (d *dentry) Evict(ctx context.Context, er pgalloc.EvictableRange) { // cannot implement both vfs.DentryImpl.IncRef and memmap.File.IncRef. // // dentryPlatformFile is only used when a host FD representing the remote file -// is available (i.e. dentry.handle.fd >= 0), and that FD is used for -// application memory mappings (i.e. !filesystem.opts.forcePageCache). +// is available (i.e. dentry.hostFD >= 0), and that FD is used for application +// memory mappings (i.e. !filesystem.opts.forcePageCache). type dentryPlatformFile struct { *dentry @@ -891,8 +907,8 @@ type dentryPlatformFile struct { // by dentry.dataMu. fdRefs fsutil.FrameRefSet - // If this dentry represents a regular file, and handle.fd >= 0, - // hostFileMapper caches mappings of handle.fd. + // If this dentry represents a regular file, and dentry.hostFD >= 0, + // hostFileMapper caches mappings of dentry.hostFD. hostFileMapper fsutil.HostFileMapper // hostFileMapperInitOnce is used to lazily initialize hostFileMapper. @@ -916,15 +932,13 @@ func (d *dentryPlatformFile) DecRef(fr memmap.FileRange) { // MapInternal implements memmap.File.MapInternal. func (d *dentryPlatformFile) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { d.handleMu.RLock() - bs, err := d.hostFileMapper.MapInternal(fr, int(d.handle.fd), at.Write) - d.handleMu.RUnlock() - return bs, err + defer d.handleMu.RUnlock() + return d.hostFileMapper.MapInternal(fr, int(d.hostFD), at.Write) } // FD implements memmap.File.FD. func (d *dentryPlatformFile) FD() int { d.handleMu.RLock() - fd := d.handle.fd - d.handleMu.RUnlock() - return int(fd) + defer d.handleMu.RUnlock() + return int(d.hostFD) } diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go index fc269ef2b..a6368fdd0 100644 --- a/pkg/sentry/fsimpl/gofer/special_file.go +++ b/pkg/sentry/fsimpl/gofer/special_file.go @@ -17,6 +17,7 @@ package gofer import ( "sync" "sync/atomic" + "syscall" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" @@ -279,5 +280,13 @@ func (fd *specialFileFD) Seek(ctx context.Context, offset int64, whence int32) ( // Sync implements vfs.FileDescriptionImpl.Sync. func (fd *specialFileFD) Sync(ctx context.Context) error { - return fd.dentry().syncSharedHandle(ctx) + // If we have a host FD, fsyncing it is likely to be faster than an fsync + // RPC. + if fd.handle.fd >= 0 { + ctx.UninterruptibleSleepStart(false) + err := syscall.Fsync(int(fd.handle.fd)) + ctx.UninterruptibleSleepFinish(false) + return err + } + return fd.handle.file.fsync(ctx) } diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go index 576ab3920..d3c1197e3 100644 --- a/pkg/sentry/vfs/file_description.go +++ b/pkg/sentry/vfs/file_description.go @@ -356,6 +356,8 @@ type FileDescriptionImpl interface { // Allocate grows the file to offset + length bytes. // Only mode == 0 is supported currently. + // + // Preconditions: The FileDescription was opened for writing. Allocate(ctx context.Context, mode, offset, length uint64) error // waiter.Waitable methods may be used to poll for I/O events. @@ -565,6 +567,9 @@ func (fd *FileDescription) StatFS(ctx context.Context) (linux.Statfs, error) { // Allocate grows file represented by FileDescription to offset + length bytes. func (fd *FileDescription) Allocate(ctx context.Context, mode, offset, length uint64) error { + if !fd.IsWritable() { + return syserror.EBADF + } return fd.impl.Allocate(ctx, mode, offset, length) } diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index c19b30b4a..a31612b41 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -1023,6 +1023,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:truncate_test", + vfs2 = "True", ) syscall_test( -- cgit v1.2.3 From 10c13bccaf5ec963b78f5e61629b4528ed9e9c6b Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Fri, 7 Aug 2020 13:08:51 -0700 Subject: tcp: change the limit of TCP_LINGER2 It was changed in the Linux kernel: commit f0628c524fd188c3f9418e12478dfdfadacba815 Date: Fri Apr 24 16:06:16 2020 +0800 net: Replace the limit of TCP_LINGER2 with TCP_FIN_TIMEOUT_MAX PiperOrigin-RevId: 325493859 --- pkg/tcpip/transport/tcp/endpoint.go | 11 ++--------- pkg/tcpip/transport/tcp/protocol.go | 4 ++++ pkg/tcpip/transport/tcp/tcp_test.go | 2 +- test/syscalls/linux/socket_ip_tcp_generic.cc | 17 ++++++++++++++--- 4 files changed, 21 insertions(+), 13 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 39ea38fe6..b8b52b03d 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -1777,15 +1777,8 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { // Same as effectively disabling TCPLinger timeout. v = 0 } - var stkTCPLingerTimeout tcpip.TCPLingerTimeoutOption - if err := e.stack.TransportProtocolOption(header.TCPProtocolNumber, &stkTCPLingerTimeout); err != nil { - // We were unable to retrieve a stack config, just use - // the DefaultTCPLingerTimeout. - if v > tcpip.TCPLingerTimeoutOption(DefaultTCPLingerTimeout) { - stkTCPLingerTimeout = tcpip.TCPLingerTimeoutOption(DefaultTCPLingerTimeout) - } - } - // Cap it to the stack wide TCPLinger timeout. + // Cap it to MaxTCPLingerTimeout. + stkTCPLingerTimeout := tcpip.TCPLingerTimeoutOption(MaxTCPLingerTimeout) if v > stkTCPLingerTimeout { v = stkTCPLingerTimeout } diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go index d9abb8d94..2e5093b36 100644 --- a/pkg/tcpip/transport/tcp/protocol.go +++ b/pkg/tcpip/transport/tcp/protocol.go @@ -62,6 +62,10 @@ const ( // FIN_WAIT_2 state before being marked closed. DefaultTCPLingerTimeout = 60 * time.Second + // MaxTCPLingerTimeout is the maximum amount of time that sockets + // linger in FIN_WAIT_2 state before being marked closed. + MaxTCPLingerTimeout = 120 * time.Second + // DefaultTCPTimeWaitTimeout is the amount of time that sockets linger // in TIME_WAIT state before being marked closed. DefaultTCPTimeWaitTimeout = 60 * time.Second diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index fb25b86b9..1b58eb91b 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -6112,7 +6112,7 @@ func TestTCPLingerTimeout(t *testing.T) { {"InRangeLingerTimeout", 10 * time.Second, 10 * time.Second}, // Values > stack's TCPLingerTimeout are capped to the stack's // value. Defaults to tcp.DefaultTCPLingerTimeout(60 seconds) - {"AboveMaxLingerTimeout", 65 * time.Second, 60 * time.Second}, + {"AboveMaxLingerTimeout", 125 * time.Second, 120 * time.Second}, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { diff --git a/test/syscalls/linux/socket_ip_tcp_generic.cc b/test/syscalls/linux/socket_ip_tcp_generic.cc index c2ecb639f..53c076787 100644 --- a/test/syscalls/linux/socket_ip_tcp_generic.cc +++ b/test/syscalls/linux/socket_ip_tcp_generic.cc @@ -34,6 +34,9 @@ namespace gvisor { namespace testing { +using ::testing::AnyOf; +using ::testing::Eq; + TEST_P(TCPSocketPairTest, TcpInfoSucceeds) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); @@ -800,6 +803,9 @@ TEST_P(TCPSocketPairTest, SetCongestionControlFailsForUnsupported) { // Linux and Netstack both default to a 60s TCP_LINGER2 timeout. constexpr int kDefaultTCPLingerTimeout = 60; +// On Linux, the maximum linger2 timeout was changed from 60sec to 120sec. +constexpr int kMaxTCPLingerTimeout = 120; +constexpr int kOldMaxTCPLingerTimeout = 60; TEST_P(TCPSocketPairTest, TCPLingerTimeoutDefault) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); @@ -827,12 +833,12 @@ TEST_P(TCPSocketPairTest, SetTCPLingerTimeoutZeroOrLess) { SyscallSucceedsWithValue(0)); } -TEST_P(TCPSocketPairTest, SetTCPLingerTimeoutAboveDefault) { +TEST_P(TCPSocketPairTest, SetTCPLingerTimeoutAboveMax) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); // Values above the net.ipv4.tcp_fin_timeout are capped to tcp_fin_timeout // on linux (defaults to 60 seconds on linux). - constexpr int kAboveDefault = kDefaultTCPLingerTimeout + 1; + constexpr int kAboveDefault = kMaxTCPLingerTimeout + 1; EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &kAboveDefault, sizeof(kAboveDefault)), SyscallSucceedsWithValue(0)); @@ -843,7 +849,12 @@ TEST_P(TCPSocketPairTest, SetTCPLingerTimeoutAboveDefault) { getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_LINGER2, &get, &get_len), SyscallSucceedsWithValue(0)); EXPECT_EQ(get_len, sizeof(get)); - EXPECT_EQ(get, kDefaultTCPLingerTimeout); + if (IsRunningOnGvisor()) { + EXPECT_EQ(get, kMaxTCPLingerTimeout); + } else { + EXPECT_THAT(get, + AnyOf(Eq(kMaxTCPLingerTimeout), Eq(kOldMaxTCPLingerTimeout))); + } } TEST_P(TCPSocketPairTest, SetTCPLingerTimeout) { -- cgit v1.2.3 From a7bd0a701289f8d808f93eaded266f6a1bab03ea Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Fri, 7 Aug 2020 13:28:11 -0700 Subject: Port Startup and Density Benchmarks. PiperOrigin-RevId: 325497346 --- images/benchmarks/alpine/Dockerfile | 1 + images/benchmarks/util/Dockerfile | 3 + test/benchmarks/base/BUILD | 13 +- test/benchmarks/base/base.go | 4 +- test/benchmarks/base/size_test.go | 220 ++++++++++++++++++++++++++++++++++ test/benchmarks/base/startup_test.go | 156 ++++++++++++++++++++++++ test/benchmarks/base/sysbench_test.go | 2 +- test/benchmarks/harness/util.go | 10 +- test/benchmarks/network/nginx_test.go | 2 +- test/benchmarks/tools/BUILD | 2 + test/benchmarks/tools/meminfo.go | 60 ++++++++++ test/benchmarks/tools/meminfo_test.go | 84 +++++++++++++ 12 files changed, 546 insertions(+), 11 deletions(-) create mode 100644 images/benchmarks/alpine/Dockerfile create mode 100644 images/benchmarks/util/Dockerfile create mode 100644 test/benchmarks/base/size_test.go create mode 100644 test/benchmarks/base/startup_test.go create mode 100644 test/benchmarks/tools/meminfo.go create mode 100644 test/benchmarks/tools/meminfo_test.go (limited to 'test') diff --git a/images/benchmarks/alpine/Dockerfile b/images/benchmarks/alpine/Dockerfile new file mode 100644 index 000000000..b09b037ca --- /dev/null +++ b/images/benchmarks/alpine/Dockerfile @@ -0,0 +1 @@ +FROM alpine:latest diff --git a/images/benchmarks/util/Dockerfile b/images/benchmarks/util/Dockerfile new file mode 100644 index 000000000..f2799b3e6 --- /dev/null +++ b/images/benchmarks/util/Dockerfile @@ -0,0 +1,3 @@ +FROM ubuntu:bionic + +RUN apt-get update && apt-get install -y wget diff --git a/test/benchmarks/base/BUILD b/test/benchmarks/base/BUILD index 3cb07797d..5e099d0f9 100644 --- a/test/benchmarks/base/BUILD +++ b/test/benchmarks/base/BUILD @@ -5,14 +5,20 @@ package(licenses = ["notice"]) go_library( name = "base", testonly = 1, - srcs = ["base.go"], + srcs = [ + "base.go", + ], deps = ["//test/benchmarks/harness"], ) go_test( name = "base_test", - size = "small", - srcs = ["sysbench_test.go"], + size = "large", + srcs = [ + "size_test.go", + "startup_test.go", + "sysbench_test.go", + ], library = ":base", tags = [ # Requires docker and runsc to be configured before test runs. @@ -21,6 +27,7 @@ go_test( ], deps = [ "//pkg/test/dockerutil", + "//test/benchmarks/harness", "//test/benchmarks/tools", ], ) diff --git a/test/benchmarks/base/base.go b/test/benchmarks/base/base.go index 7eb44d0ab..7bac52ff1 100644 --- a/test/benchmarks/base/base.go +++ b/test/benchmarks/base/base.go @@ -22,10 +22,10 @@ import ( "gvisor.dev/gvisor/test/benchmarks/harness" ) -var h harness.Harness +var testHarness harness.Harness // TestMain is the main method for package network. func TestMain(m *testing.M) { - h.Init() + testHarness.Init() os.Exit(m.Run()) } diff --git a/test/benchmarks/base/size_test.go b/test/benchmarks/base/size_test.go new file mode 100644 index 000000000..3c1364faf --- /dev/null +++ b/test/benchmarks/base/size_test.go @@ -0,0 +1,220 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package base + +import ( + "context" + "testing" + "time" + + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/test/benchmarks/harness" + "gvisor.dev/gvisor/test/benchmarks/tools" +) + +// BenchmarkSizeEmpty creates N empty containers and reads memory usage from +// /proc/meminfo. +func BenchmarkSizeEmpty(b *testing.B) { + machine, err := testHarness.GetMachine() + if err != nil { + b.Fatalf("failed to get machine: %v", err) + } + defer machine.CleanUp() + meminfo := tools.Meminfo{} + ctx := context.Background() + containers := make([]*dockerutil.Container, 0, b.N) + + // DropCaches before the test. + harness.DropCaches(machine) + + // Check available memory on 'machine'. + cmd, args := meminfo.MakeCmd() + before, err := machine.RunCommand(cmd, args...) + if err != nil { + b.Fatalf("failed to get meminfo: %v", err) + } + + // Make N containers. + for i := 0; i < b.N; i++ { + container := machine.GetContainer(ctx, b) + containers = append(containers, container) + if err := container.Spawn(ctx, dockerutil.RunOpts{ + Image: "benchmarks/alpine", + }, "sh", "-c", "echo Hello && sleep 1000"); err != nil { + cleanUpContainers(ctx, containers) + b.Fatalf("failed to run container: %v", err) + } + if _, err := container.WaitForOutputSubmatch(ctx, "Hello", 5*time.Second); err != nil { + cleanUpContainers(ctx, containers) + b.Fatalf("failed to read container output: %v", err) + } + } + + // Drop caches again before second measurement. + harness.DropCaches(machine) + + // Check available memory after containers are up. + after, err := machine.RunCommand(cmd, args...) + cleanUpContainers(ctx, containers) + if err != nil { + b.Fatalf("failed to get meminfo: %v", err) + } + meminfo.Report(b, before, after) +} + +// BenchmarkSizeNginx starts N containers running Nginx, checks that they're +// serving, and checks memory used based on /proc/meminfo. +func BenchmarkSizeNginx(b *testing.B) { + machine, err := testHarness.GetMachine() + if err != nil { + b.Fatalf("failed to get machine with: %v", err) + } + defer machine.CleanUp() + + // DropCaches for the first measurement. + harness.DropCaches(machine) + + // Measure MemAvailable before creating containers. + meminfo := tools.Meminfo{} + cmd, args := meminfo.MakeCmd() + before, err := machine.RunCommand(cmd, args...) + if err != nil { + b.Fatalf("failed to run meminfo command: %v", err) + } + + // Make N Nginx containers. + ctx := context.Background() + runOpts := dockerutil.RunOpts{ + Image: "benchmarks/nginx", + } + const port = 80 + servers := startServers(ctx, b, + serverArgs{ + machine: machine, + port: port, + runOpts: runOpts, + }) + defer cleanUpContainers(ctx, servers) + + // DropCaches after servers are created. + harness.DropCaches(machine) + // Take after measurement. + after, err := machine.RunCommand(cmd, args...) + if err != nil { + b.Fatalf("failed to run meminfo command: %v", err) + } + meminfo.Report(b, before, after) +} + +// BenchmarkSizeNode starts N containers running a Node app, checks that +// they're serving, and checks memory used based on /proc/meminfo. +func BenchmarkSizeNode(b *testing.B) { + machine, err := testHarness.GetMachine() + if err != nil { + b.Fatalf("failed to get machine with: %v", err) + } + defer machine.CleanUp() + + // Make a redis instance for Node to connect. + ctx := context.Background() + redis, redisIP := redisInstance(ctx, b, machine) + defer redis.CleanUp(ctx) + + // DropCaches after redis is created. + harness.DropCaches(machine) + + // Take before measurement. + meminfo := tools.Meminfo{} + cmd, args := meminfo.MakeCmd() + before, err := machine.RunCommand(cmd, args...) + if err != nil { + b.Fatalf("failed to run meminfo commend: %v", err) + } + + // Create N Node servers. + runOpts := dockerutil.RunOpts{ + Image: "benchmarks/node", + WorkDir: "/usr/src/app", + Links: []string{redis.MakeLink("redis")}, + } + nodeCmd := []string{"node", "index.js", redisIP.String()} + const port = 8080 + servers := startServers(ctx, b, + serverArgs{ + machine: machine, + port: port, + runOpts: runOpts, + cmd: nodeCmd, + }) + defer cleanUpContainers(ctx, servers) + + // DropCaches after servers are created. + harness.DropCaches(machine) + // Take after measurement. + cmd, args = meminfo.MakeCmd() + after, err := machine.RunCommand(cmd, args...) + if err != nil { + b.Fatalf("failed to run meminfo command: %v", err) + } + meminfo.Report(b, before, after) +} + +// serverArgs wraps args for startServers and runServerWorkload. +type serverArgs struct { + machine harness.Machine + port int + runOpts dockerutil.RunOpts + cmd []string +} + +// startServers starts b.N containers defined by 'runOpts' and 'cmd' and uses +// 'machine' to check that each is up. +func startServers(ctx context.Context, b *testing.B, args serverArgs) []*dockerutil.Container { + b.Helper() + servers := make([]*dockerutil.Container, 0, b.N) + + // Create N servers and wait until each of them is serving. + for i := 0; i < b.N; i++ { + server := args.machine.GetContainer(ctx, b) + servers = append(servers, server) + if err := server.Spawn(ctx, args.runOpts, args.cmd...); err != nil { + cleanUpContainers(ctx, servers) + b.Fatalf("failed to spawn node instance: %v", err) + } + + // Get the container IP. + servingIP, err := server.FindIP(ctx, false) + if err != nil { + cleanUpContainers(ctx, servers) + b.Fatalf("failed to get ip from server: %v", err) + } + + // Wait until the server is up. + if err := harness.WaitUntilServing(ctx, args.machine, servingIP, args.port); err != nil { + cleanUpContainers(ctx, servers) + b.Fatalf("failed to wait for serving") + } + } + return servers +} + +// cleanUpContainers cleans up a slice of containers. +func cleanUpContainers(ctx context.Context, containers []*dockerutil.Container) { + for _, c := range containers { + if c != nil { + c.CleanUp(ctx) + } + } +} diff --git a/test/benchmarks/base/startup_test.go b/test/benchmarks/base/startup_test.go new file mode 100644 index 000000000..4628a0a41 --- /dev/null +++ b/test/benchmarks/base/startup_test.go @@ -0,0 +1,156 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package base + +import ( + "context" + "fmt" + "net" + "testing" + "time" + + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/test/benchmarks/harness" +) + +// BenchmarkStartEmpty times startup time for an empty container. +func BenchmarkStartupEmpty(b *testing.B) { + machine, err := testHarness.GetMachine() + if err != nil { + b.Fatalf("failed to get machine: %v", err) + } + defer machine.CleanUp() + + ctx := context.Background() + for i := 0; i < b.N; i++ { + container := machine.GetContainer(ctx, b) + defer container.CleanUp(ctx) + if _, err := container.Run(ctx, dockerutil.RunOpts{ + Image: "benchmarks/alpine", + }, "true"); err != nil { + b.Fatalf("failed to run container: %v", err) + } + } +} + +// BenchmarkStartupNginx times startup for a Nginx instance. +// Time is measured from start until the first request is served. +func BenchmarkStartupNginx(b *testing.B) { + // The machine to hold Nginx and the Node Server. + machine, err := testHarness.GetMachine() + if err != nil { + b.Fatalf("failed to get machine with: %v", err) + } + defer machine.CleanUp() + + ctx := context.Background() + runOpts := dockerutil.RunOpts{ + Image: "benchmarks/nginx", + } + runServerWorkload(ctx, b, + serverArgs{ + machine: machine, + runOpts: runOpts, + port: 80, + }) +} + +// BenchmarkStartupNode times startup for a Node application instance. +// Time is measured from start until the first request is served. +// Note that the Node app connects to a Redis instance before serving. +func BenchmarkStartupNode(b *testing.B) { + machine, err := testHarness.GetMachine() + if err != nil { + b.Fatalf("failed to get machine with: %v", err) + } + defer machine.CleanUp() + + ctx := context.Background() + redis, redisIP := redisInstance(ctx, b, machine) + defer redis.CleanUp(ctx) + runOpts := dockerutil.RunOpts{ + Image: "benchmarks/node", + WorkDir: "/usr/src/app", + Links: []string{redis.MakeLink("redis")}, + } + + cmd := []string{"node", "index.js", redisIP.String()} + runServerWorkload(ctx, b, + serverArgs{ + machine: machine, + port: 8080, + runOpts: runOpts, + cmd: cmd, + }) +} + +// redisInstance returns a Redis container and its reachable IP. +func redisInstance(ctx context.Context, b *testing.B, machine harness.Machine) (*dockerutil.Container, net.IP) { + b.Helper() + // Spawn a redis instance for the app to use. + redis := machine.GetNativeContainer(ctx, b) + if err := redis.Spawn(ctx, dockerutil.RunOpts{ + Image: "benchmarks/redis", + }); err != nil { + redis.CleanUp(ctx) + b.Fatalf("failed to spwan redis instance: %v", err) + } + + if out, err := redis.WaitForOutput(ctx, "Ready to accept connections", 3*time.Second); err != nil { + redis.CleanUp(ctx) + b.Fatalf("failed to start redis server: %v %s", err, out) + } + redisIP, err := redis.FindIP(ctx, false) + if err != nil { + redis.CleanUp(ctx) + b.Fatalf("failed to get IP from redis instance: %v", err) + } + return redis, redisIP +} + +// runServerWorkload runs a server workload defined by 'runOpts' and 'cmd'. +// 'clientMachine' is used to connect to the server on 'serverMachine'. +func runServerWorkload(ctx context.Context, b *testing.B, args serverArgs) { + b.Helper() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := func() error { + server := args.machine.GetContainer(ctx, b) + defer func() { + b.StopTimer() + // Cleanup servers as we run so that we can go indefinitely. + server.CleanUp(ctx) + b.StartTimer() + }() + if err := server.Spawn(ctx, args.runOpts, args.cmd...); err != nil { + return fmt.Errorf("failed to spawn node instance: %v", err) + } + + servingIP, err := server.FindIP(ctx, false) + if err != nil { + return fmt.Errorf("failed to get ip from server: %v", err) + } + + // Wait until the Client sees the server as up. + if err := harness.WaitUntilServing(ctx, args.machine, servingIP, args.port); err != nil { + return fmt.Errorf("failed to wait for serving: %v", err) + } + return nil + }(); err != nil { + b.Fatal(err) + } + } +} diff --git a/test/benchmarks/base/sysbench_test.go b/test/benchmarks/base/sysbench_test.go index 7df73e38b..6fb813640 100644 --- a/test/benchmarks/base/sysbench_test.go +++ b/test/benchmarks/base/sysbench_test.go @@ -64,7 +64,7 @@ func BenchmarkSysbench(b *testing.B) { }, } - machine, err := h.GetMachine() + machine, err := testHarness.GetMachine() if err != nil { b.Fatalf("failed to get machine: %v", err) } diff --git a/test/benchmarks/harness/util.go b/test/benchmarks/harness/util.go index bc551c582..86b863f78 100644 --- a/test/benchmarks/harness/util.go +++ b/test/benchmarks/harness/util.go @@ -23,23 +23,25 @@ import ( "gvisor.dev/gvisor/pkg/test/testutil" ) +//TODO(gvisor.dev/issue/3535): move to own package or move methods to harness struct. + // WaitUntilServing grabs a container from `machine` and waits for a server at // IP:port. func WaitUntilServing(ctx context.Context, machine Machine, server net.IP, port int) error { - var logger testutil.DefaultLogger = "netcat" + var logger testutil.DefaultLogger = "util" netcat := machine.GetNativeContainer(ctx, logger) defer netcat.CleanUp(ctx) - cmd := fmt.Sprintf("while ! nc -zv %s %d; do true; done", server, port) + cmd := fmt.Sprintf("while ! wget -q --spider http://%s:%d; do true; done", server, port) _, err := netcat.Run(ctx, dockerutil.RunOpts{ - Image: "packetdrill", + Image: "benchmarks/util", }, "sh", "-c", cmd) return err } // DropCaches drops caches on the provided machine. Requires root. func DropCaches(machine Machine) error { - if out, err := machine.RunCommand("/bin/sh", "-c", "sync | sysctl vm.drop_caches=3"); err != nil { + if out, err := machine.RunCommand("/bin/sh", "-c", "sync && sysctl vm.drop_caches=3"); err != nil { return fmt.Errorf("failed to drop caches: %v logs: %s", err, out) } return nil diff --git a/test/benchmarks/network/nginx_test.go b/test/benchmarks/network/nginx_test.go index 5965652a5..2bf1a3624 100644 --- a/test/benchmarks/network/nginx_test.go +++ b/test/benchmarks/network/nginx_test.go @@ -25,7 +25,7 @@ import ( // BenchmarkNginxConcurrency iterates the concurrency argument and tests // how well the runtime under test handles requests in parallel. -// TODO(zkoopmans): Update with different doc sizes like Httpd. +// TODO(gvisor.dev/issue/3536): Update with different doc sizes like Httpd. func BenchmarkNginxConcurrency(b *testing.B) { // Grab a machine for the client and server. clientMachine, err := h.GetMachine() diff --git a/test/benchmarks/tools/BUILD b/test/benchmarks/tools/BUILD index a6bd949e6..e5734d85c 100644 --- a/test/benchmarks/tools/BUILD +++ b/test/benchmarks/tools/BUILD @@ -9,6 +9,7 @@ go_library( "fio.go", "hey.go", "iperf.go", + "meminfo.go", "redis.go", "sysbench.go", "tools.go", @@ -24,6 +25,7 @@ go_test( "fio_test.go", "hey_test.go", "iperf_test.go", + "meminfo_test.go", "redis_test.go", "sysbench_test.go", ], diff --git a/test/benchmarks/tools/meminfo.go b/test/benchmarks/tools/meminfo.go new file mode 100644 index 000000000..2414a96a7 --- /dev/null +++ b/test/benchmarks/tools/meminfo.go @@ -0,0 +1,60 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tools + +import ( + "fmt" + "regexp" + "strconv" + "testing" +) + +// Meminfo wraps measurements of MemAvailable using /proc/meminfo. +type Meminfo struct { +} + +// MakeCmd returns a command for checking meminfo. +func (*Meminfo) MakeCmd() (string, []string) { + return "cat", []string{"/proc/meminfo"} +} + +// Report takes two reads of meminfo, parses them, and reports the difference +// divided by b.N. +func (*Meminfo) Report(b *testing.B, before, after string) { + b.Helper() + + beforeVal, err := parseMemAvailable(before) + if err != nil { + b.Fatalf("could not parse before value %s: %v", before, err) + } + + afterVal, err := parseMemAvailable(after) + if err != nil { + b.Fatalf("could not parse before value %s: %v", before, err) + } + val := 1024 * ((beforeVal - afterVal) / float64(b.N)) + b.ReportMetric(val, "average_container_size_bytes") +} + +var memInfoRE = regexp.MustCompile(`MemAvailable:\s*(\d+)\skB\n`) + +// parseMemAvailable grabs the MemAvailable number from /proc/meminfo. +func parseMemAvailable(data string) (float64, error) { + match := memInfoRE.FindStringSubmatch(data) + if len(match) < 2 { + return 0, fmt.Errorf("couldn't find MemAvailable in %s", data) + } + return strconv.ParseFloat(match[1], 64) +} diff --git a/test/benchmarks/tools/meminfo_test.go b/test/benchmarks/tools/meminfo_test.go new file mode 100644 index 000000000..ba803540f --- /dev/null +++ b/test/benchmarks/tools/meminfo_test.go @@ -0,0 +1,84 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tools + +import ( + "testing" +) + +// TestMeminfo checks the Meminfo parser on sample output. +func TestMeminfo(t *testing.T) { + sampleData := ` +MemTotal: 16337408 kB +MemFree: 3742696 kB +MemAvailable: 9319948 kB +Buffers: 1433884 kB +Cached: 4607036 kB +SwapCached: 45284 kB +Active: 8288376 kB +Inactive: 2685928 kB +Active(anon): 4724912 kB +Inactive(anon): 1047940 kB +Active(file): 3563464 kB +Inactive(file): 1637988 kB +Unevictable: 326940 kB +Mlocked: 48 kB +SwapTotal: 33292284 kB +SwapFree: 32865736 kB +Dirty: 708 kB +Writeback: 0 kB +AnonPages: 4304204 kB +Mapped: 975424 kB +Shmem: 910292 kB +KReclaimable: 744532 kB +Slab: 1058448 kB +SReclaimable: 744532 kB +SUnreclaim: 313916 kB +KernelStack: 25188 kB +PageTables: 65300 kB +NFS_Unstable: 0 kB +Bounce: 0 kB +WritebackTmp: 0 kB +CommitLimit: 41460988 kB +Committed_AS: 22859492 kB +VmallocTotal: 34359738367 kB +VmallocUsed: 63088 kB +VmallocChunk: 0 kB +Percpu: 9248 kB +HardwareCorrupted: 0 kB +AnonHugePages: 786432 kB +ShmemHugePages: 0 kB +ShmemPmdMapped: 0 kB +FileHugePages: 0 kB +FilePmdMapped: 0 kB +HugePages_Total: 0 +HugePages_Free: 0 +HugePages_Rsvd: 0 +HugePages_Surp: 0 +Hugepagesize: 2048 kB +Hugetlb: 0 kB +DirectMap4k: 5408532 kB +DirectMap2M: 11241472 kB +DirectMap1G: 1048576 kB +` + want := 9319948.0 + got, err := parseMemAvailable(sampleData) + if err != nil { + t.Fatalf("parseMemAvailable failed: %v", err) + } + if got != want { + t.Fatalf("parseMemAvailable got %f, want %f", got, want) + } +} -- cgit v1.2.3 From 7b9bfc0ce094b0fd0cb3beec665a7b64c4ec552e Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Fri, 7 Aug 2020 13:47:03 -0700 Subject: Port Ruby benchmark. PiperOrigin-RevId: 325500772 --- images/Makefile | 4 +- images/benchmarks/ruby/Dockerfile | 27 +++++++ images/benchmarks/ruby/Gemfile | 5 ++ images/benchmarks/ruby/Gemfile.lock | 26 +++++++ images/benchmarks/ruby/config.ru | 2 + images/benchmarks/ruby/index.erb | 8 +++ images/benchmarks/ruby/main.rb | 27 +++++++ pkg/test/dockerutil/container.go | 83 +++++++++------------- test/benchmarks/network/BUILD | 1 + test/benchmarks/network/node_test.go | 32 ++++----- test/benchmarks/network/ruby_test.go | 134 +++++++++++++++++++++++++++++++++++ test/benchmarks/tools/hey.go | 2 +- 12 files changed, 279 insertions(+), 72 deletions(-) create mode 100755 images/benchmarks/ruby/Dockerfile create mode 100755 images/benchmarks/ruby/Gemfile create mode 100644 images/benchmarks/ruby/Gemfile.lock create mode 100755 images/benchmarks/ruby/config.ru create mode 100755 images/benchmarks/ruby/index.erb create mode 100755 images/benchmarks/ruby/main.rb create mode 100644 test/benchmarks/network/ruby_test.go (limited to 'test') diff --git a/images/Makefile b/images/Makefile index 9de359a28..278dec02f 100644 --- a/images/Makefile +++ b/images/Makefile @@ -59,9 +59,9 @@ local_image = $(LOCAL_IMAGE_PREFIX)/$(subst _,/,$(1)) # we need to explicitly repull the base layer in order to ensure that the # architecture is correct. Note that we use the term "rebuild" here to avoid # conflicting with the bazel "build" terminology, which is used elsewhere. +rebuild-%: FROM=$(shell grep FROM $(call path,$*)/Dockerfile } cut -d' ' -f2) rebuild-%: register-cross - FROM=$(shell grep FROM $(call path,$*)/Dockerfile | cut -d' ' -f2-) && \ - docker pull $(DOCKER_PLATFORM_ARGS) $$FROM + $(foreach IMAGE,$(FROM),docker $(DOCKER_PLATFORM_ARGS) $(IMAGE); &&) true T=$$(mktemp -d) && cp -a $(call path,$*)/* $$T && \ docker build $(DOCKER_PLATFORM_ARGS) -t $(call remote_image,$*) $$T && \ rm -rf $$T diff --git a/images/benchmarks/ruby/Dockerfile b/images/benchmarks/ruby/Dockerfile new file mode 100755 index 000000000..13c4f6eed --- /dev/null +++ b/images/benchmarks/ruby/Dockerfile @@ -0,0 +1,27 @@ +# example based on https://github.com/errm/fib +FROM alpine:3.9 as build + +COPY Gemfile Gemfile.lock ./ + +RUN apk add --no-cache ruby ruby-dev ruby-bundler ruby-json build-base bash \ + && bundle install --frozen -j4 -r3 --no-cache --without development \ + && apk del --no-cache ruby-bundler \ + && rm -rf /usr/lib/ruby/gems/*/cache + +FROM alpine:3.9 as prod + +COPY --from=build /usr/lib/ruby/gems /usr/lib/ruby/gems +RUN apk add --no-cache ruby ruby-json ruby-etc redis apache2-utils \ + && ruby -e "Gem::Specification.map.each do |spec| \ + Gem::Installer.for_spec( \ + spec, \ + wrappers: true, \ + force: true, \ + install_dir: spec.base_dir, \ + build_args: spec.build_args, \ + ).generate_bin \ + end" + +COPY . /app/. + +STOPSIGNAL SIGINT diff --git a/images/benchmarks/ruby/Gemfile b/images/benchmarks/ruby/Gemfile new file mode 100755 index 000000000..ac521b32c --- /dev/null +++ b/images/benchmarks/ruby/Gemfile @@ -0,0 +1,5 @@ +source "https://rubygems.org" + +gem "sinatra" +gem "puma" +gem "redis" \ No newline at end of file diff --git a/images/benchmarks/ruby/Gemfile.lock b/images/benchmarks/ruby/Gemfile.lock new file mode 100644 index 000000000..041778e02 --- /dev/null +++ b/images/benchmarks/ruby/Gemfile.lock @@ -0,0 +1,26 @@ +GEM + remote: https://rubygems.org/ + specs: + mustermann (1.0.3) + puma (3.4.0) + rack (2.0.6) + rack-protection (2.0.5) + rack + redis (4.1.0) + sinatra (2.0.5) + mustermann (~> 1.0) + rack (~> 2.0) + rack-protection (= 2.0.5) + tilt (~> 2.0) + tilt (2.0.9) + +PLATFORMS + ruby + +DEPENDENCIES + puma + redis + sinatra + +BUNDLED WITH + 1.17.1 \ No newline at end of file diff --git a/images/benchmarks/ruby/config.ru b/images/benchmarks/ruby/config.ru new file mode 100755 index 000000000..b2d135cc0 --- /dev/null +++ b/images/benchmarks/ruby/config.ru @@ -0,0 +1,2 @@ +require './main' +run Sinatra::Application \ No newline at end of file diff --git a/images/benchmarks/ruby/index.erb b/images/benchmarks/ruby/index.erb new file mode 100755 index 000000000..7f7300e80 --- /dev/null +++ b/images/benchmarks/ruby/index.erb @@ -0,0 +1,8 @@ + + + + <% text.each do |t| %> +

<%= t %>

+ <% end %> + + diff --git a/images/benchmarks/ruby/main.rb b/images/benchmarks/ruby/main.rb new file mode 100755 index 000000000..b998f004e --- /dev/null +++ b/images/benchmarks/ruby/main.rb @@ -0,0 +1,27 @@ +require "sinatra" +require "securerandom" +require "redis" + +redis_host = ENV["HOST"] +$redis = Redis.new(host: redis_host) + +def generateText + for i in 0..99 + $redis.set(i, randomBody(1024)) + end +end + +def randomBody(length) + return SecureRandom.alphanumeric(length) +end + +generateText +template = ERB.new(File.read('./index.erb')) + +get "/" do + texts = Array.new + for i in 0..4 + texts.push($redis.get(rand(0..99))) + end + template.result_with_hash(text: texts) +end diff --git a/pkg/test/dockerutil/container.go b/pkg/test/dockerutil/container.go index 5a2157951..052b6b99d 100644 --- a/pkg/test/dockerutil/container.go +++ b/pkg/test/dockerutil/container.go @@ -58,12 +58,6 @@ type Container struct { // a handle to restart the profile. Generally, tests/benchmarks using // profiles need to run as root. profiles []Profile - - // Stores streams attached to the container. Used by WaitForOutputSubmatch. - streams types.HijackedResponse - - // stores previously read data from the attached streams. - streamBuf bytes.Buffer } // RunOpts are options for running a container. @@ -175,11 +169,25 @@ func (c *Container) SpawnProcess(ctx context.Context, r RunOpts, args ...string) return Process{}, err } + // Open a connection to the container for parsing logs and for TTY. + stream, err := c.client.ContainerAttach(ctx, c.id, + types.ContainerAttachOptions{ + Stream: true, + Stdin: true, + Stdout: true, + Stderr: true, + }) + if err != nil { + return Process{}, fmt.Errorf("connect failed container id %s: %v", c.id, err) + } + + c.cleanups = append(c.cleanups, func() { stream.Close() }) + if err := c.Start(ctx); err != nil { return Process{}, err } - return Process{container: c, conn: c.streams}, nil + return Process{container: c, conn: stream}, nil } // Run is analogous to 'docker run'. @@ -273,23 +281,6 @@ func (c *Container) hostConfig(r RunOpts) *container.HostConfig { // Start is analogous to 'docker start'. func (c *Container) Start(ctx context.Context) error { - - // Open a connection to the container for parsing logs and for TTY. - streams, err := c.client.ContainerAttach(ctx, c.id, - types.ContainerAttachOptions{ - Stream: true, - Stdin: true, - Stdout: true, - Stderr: true, - }) - if err != nil { - return fmt.Errorf("failed to connect to container: %v", err) - } - - c.streams = streams - c.cleanups = append(c.cleanups, func() { - c.streams.Close() - }) if err := c.client.ContainerStart(ctx, c.id, types.ContainerStartOptions{}); err != nil { return fmt.Errorf("ContainerStart failed: %v", err) } @@ -485,34 +476,19 @@ func (c *Container) WaitForOutput(ctx context.Context, pattern string, timeout t // WaitForOutputSubmatch searches container logs for the given // pattern or times out. It returns any regexp submatches as well. func (c *Container) WaitForOutputSubmatch(ctx context.Context, pattern string, timeout time.Duration) ([]string, error) { + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() re := regexp.MustCompile(pattern) - if matches := re.FindStringSubmatch(c.streamBuf.String()); matches != nil { - return matches, nil - } - - for exp := time.Now().Add(timeout); time.Now().Before(exp); { - select { - case <-ctx.Done(): - return nil, ctx.Err() - default: - } - - c.streams.Conn.SetDeadline(time.Now().Add(50 * time.Millisecond)) - _, err := stdcopy.StdCopy(&c.streamBuf, &c.streamBuf, c.streams.Reader) - + for { + logs, err := c.Logs(ctx) if err != nil { - // check that it wasn't a timeout - if nerr, ok := err.(net.Error); !ok || !nerr.Timeout() { - return nil, err - } + return nil, fmt.Errorf("failed to get logs: %v logs: %s", err, logs) } - - if matches := re.FindStringSubmatch(c.streamBuf.String()); matches != nil { + if matches := re.FindStringSubmatch(logs); matches != nil { return matches, nil } + time.Sleep(50 * time.Millisecond) } - - return nil, fmt.Errorf("timeout waiting for output %q: out: %s", re.String(), c.streamBuf.String()) } // Kill kills the container. @@ -537,8 +513,18 @@ func (c *Container) CleanUp(ctx context.Context) { for _, profile := range c.profiles { profile.OnCleanUp(c) } + // Forget profiles. c.profiles = nil + + // Execute all cleanups. We execute cleanups here to close any + // open connections to the container before closing. Open connections + // can cause Kill and Remove to hang. + for _, c := range c.cleanups { + c() + } + c.cleanups = nil + // Kill the container. if err := c.Kill(ctx); err != nil && !strings.Contains(err.Error(), "is not running") { // Just log; can't do anything here. @@ -550,9 +536,4 @@ func (c *Container) CleanUp(ctx context.Context) { } // Forget all mounts. c.mounts = nil - // Execute all cleanups. - for _, c := range c.cleanups { - c() - } - c.cleanups = nil } diff --git a/test/benchmarks/network/BUILD b/test/benchmarks/network/BUILD index d15cd55ee..df5ff7265 100644 --- a/test/benchmarks/network/BUILD +++ b/test/benchmarks/network/BUILD @@ -17,6 +17,7 @@ go_test( "iperf_test.go", "nginx_test.go", "node_test.go", + "ruby_test.go", ], library = ":network", tags = [ diff --git a/test/benchmarks/network/node_test.go b/test/benchmarks/network/node_test.go index 5b568cfe5..52eb794c4 100644 --- a/test/benchmarks/network/node_test.go +++ b/test/benchmarks/network/node_test.go @@ -24,18 +24,16 @@ import ( "gvisor.dev/gvisor/test/benchmarks/tools" ) -// BenchmarkNode runs 10K requests using 'hey' against a Node server run on +// BenchmarkNode runs requests using 'hey' against a Node server run on // 'runtime'. The server responds to requests by grabbing some data in a // redis instance and returns the data in its reponse. The test loops through // increasing amounts of concurency for requests. func BenchmarkNode(b *testing.B) { - requests := 10000 concurrency := []int{1, 5, 10, 25} - for _, c := range concurrency { b.Run(fmt.Sprintf("Concurrency%d", c), func(b *testing.B) { hey := &tools.Hey{ - Requests: requests, + Requests: b.N * c, // Requests b.N requests per thread. Concurrency: c, } runNode(b, hey) @@ -113,19 +111,17 @@ func runNode(b *testing.B, hey *tools.Hey) { nodeApp.RestartProfiles() b.ResetTimer() - for i := 0; i < b.N; i++ { - // the client should run on Native. - client := clientMachine.GetNativeContainer(ctx, b) - out, err := client.Run(ctx, dockerutil.RunOpts{ - Image: "benchmarks/hey", - }, heyCmd...) - if err != nil { - b.Fatalf("hey container failed: %v logs: %s", err, out) - } - - // Stop the timer to parse the data and report stats. - b.StopTimer() - hey.Report(b, out) - b.StartTimer() + // the client should run on Native. + client := clientMachine.GetNativeContainer(ctx, b) + out, err := client.Run(ctx, dockerutil.RunOpts{ + Image: "benchmarks/hey", + }, heyCmd...) + if err != nil { + b.Fatalf("hey container failed: %v logs: %s", err, out) } + + // Stop the timer to parse the data and report stats. + b.StopTimer() + hey.Report(b, out) + b.StartTimer() } diff --git a/test/benchmarks/network/ruby_test.go b/test/benchmarks/network/ruby_test.go new file mode 100644 index 000000000..5e0b2b724 --- /dev/null +++ b/test/benchmarks/network/ruby_test.go @@ -0,0 +1,134 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package network + +import ( + "context" + "fmt" + "testing" + "time" + + "gvisor.dev/gvisor/pkg/test/dockerutil" + "gvisor.dev/gvisor/test/benchmarks/harness" + "gvisor.dev/gvisor/test/benchmarks/tools" +) + +// BenchmarkRuby runs requests using 'hey' against a ruby application server. +// On start, ruby app generates some random data and pushes it to a redis +// instance. On a request, the app grabs for random entries from the redis +// server, publishes it to a document, and returns the doc to the request. +func BenchmarkRuby(b *testing.B) { + concurrency := []int{1, 5, 10, 25} + for _, c := range concurrency { + b.Run(fmt.Sprintf("Concurrency%d", c), func(b *testing.B) { + hey := &tools.Hey{ + Requests: b.N * c, // b.N requests per thread. + Concurrency: c, + } + runRuby(b, hey) + }) + } +} + +// runRuby runs the test for a given # of requests and concurrency. +func runRuby(b *testing.B, hey *tools.Hey) { + b.Helper() + // The machine to hold Redis and the Ruby Server. + serverMachine, err := h.GetMachine() + if err != nil { + b.Fatal("failed to get machine with: %v", err) + } + defer serverMachine.CleanUp() + + // The machine to run 'hey'. + clientMachine, err := h.GetMachine() + if err != nil { + b.Fatal("failed to get machine with: %v", err) + } + defer clientMachine.CleanUp() + ctx := context.Background() + + // Spawn a redis instance for the app to use. + redis := serverMachine.GetNativeContainer(ctx, b) + if err := redis.Spawn(ctx, dockerutil.RunOpts{ + Image: "benchmarks/redis", + }); err != nil { + b.Fatalf("failed to spwan redis instance: %v", err) + } + defer redis.CleanUp(ctx) + + if out, err := redis.WaitForOutput(ctx, "Ready to accept connections", 3*time.Second); err != nil { + b.Fatalf("failed to start redis server: %v %s", err, out) + } + redisIP, err := redis.FindIP(ctx, false) + if err != nil { + b.Fatalf("failed to get IP from redis instance: %v", err) + } + + // Ruby runs on port 9292. + const port = 9292 + + // Start-up the Ruby server. + rubyApp := serverMachine.GetContainer(ctx, b) + if err := rubyApp.Spawn(ctx, dockerutil.RunOpts{ + Image: "benchmarks/ruby", + WorkDir: "/app", + Links: []string{redis.MakeLink("redis")}, + Ports: []int{port}, + Env: []string{ + fmt.Sprintf("PORT=%d", port), + "WEB_CONCURRENCY=20", + "WEB_MAX_THREADS=20", + "RACK_ENV=production", + fmt.Sprintf("HOST=%s", redisIP), + }, + User: "nobody", + }, "sh", "-c", "/usr/bin/puma"); err != nil { + b.Fatalf("failed to spawn node instance: %v", err) + } + defer rubyApp.CleanUp(ctx) + + servingIP, err := serverMachine.IPAddress() + if err != nil { + b.Fatalf("failed to get ip from server: %v", err) + } + + servingPort, err := rubyApp.FindPort(ctx, port) + if err != nil { + b.Fatalf("failed to port from node instance: %v", err) + } + + // Wait until the Client sees the server as up. + if err := harness.WaitUntilServing(ctx, clientMachine, servingIP, servingPort); err != nil { + b.Fatalf("failed to wait until serving: %v", err) + } + heyCmd := hey.MakeCmd(servingIP, servingPort) + rubyApp.RestartProfiles() + b.ResetTimer() + + // the client should run on Native. + client := clientMachine.GetNativeContainer(ctx, b) + defer client.CleanUp(ctx) + out, err := client.Run(ctx, dockerutil.RunOpts{ + Image: "benchmarks/hey", + }, heyCmd...) + if err != nil { + b.Fatalf("hey container failed: %v logs: %s", err, out) + } + + // Stop the timer to parse the data and report stats. + b.StopTimer() + hey.Report(b, out) + b.StartTimer() +} diff --git a/test/benchmarks/tools/hey.go b/test/benchmarks/tools/hey.go index 699497c64..b1e20e356 100644 --- a/test/benchmarks/tools/hey.go +++ b/test/benchmarks/tools/hey.go @@ -25,7 +25,7 @@ import ( // Hey is for the client application 'hey'. type Hey struct { - Requests int + Requests int // Note: requests cannot be less than concurrency. Concurrency int Doc string } -- cgit v1.2.3 From 80c80a14101aca90ee21aa6f6c934673c50e6cee Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Fri, 7 Aug 2020 16:17:25 -0700 Subject: Remove old benchmark tools. Remove the old benchmark-tools directory, including imports in the WORKSPACE file and associated bazel rules. The new Golang benchmark-tools can be found at //test/benchmarks and it is functionally equivalent, excepting syscall_test which can be found in //test/perf/linux. PiperOrigin-RevId: 325529075 --- Makefile | 2 +- WORKSPACE | 20 - benchmarks/BUILD | 37 -- benchmarks/README.md | 186 -------- benchmarks/defs.bzl | 14 - benchmarks/examples/localhost.yaml | 2 - benchmarks/harness/BUILD | 201 --------- benchmarks/harness/__init__.py | 62 --- benchmarks/harness/benchmark_driver.py | 85 ---- benchmarks/harness/container.py | 181 -------- benchmarks/harness/machine.py | 265 ----------- benchmarks/harness/machine_mocks/BUILD | 9 - benchmarks/harness/machine_mocks/__init__.py | 81 ---- benchmarks/harness/machine_producers/BUILD | 84 ---- benchmarks/harness/machine_producers/__init__.py | 13 - .../machine_producers/gcloud_mock_recorder.py | 97 ---- .../harness/machine_producers/gcloud_producer.py | 250 ----------- .../machine_producers/gcloud_producer_test.py | 48 -- .../harness/machine_producers/machine_producer.py | 51 --- .../harness/machine_producers/mock_producer.py | 52 --- .../machine_producers/testdata/get_five.json | 211 --------- .../machine_producers/testdata/get_one.json | 145 ------ .../harness/machine_producers/yaml_producer.py | 106 ----- benchmarks/harness/ssh_connection.py | 126 ------ benchmarks/harness/tunnel_dispatcher.py | 122 ------ benchmarks/requirements.txt | 32 -- benchmarks/run.py | 19 - benchmarks/runner/BUILD | 56 --- benchmarks/runner/__init__.py | 308 ------------- benchmarks/runner/commands.py | 135 ------ benchmarks/runner/runner_test.py | 59 --- benchmarks/suites/BUILD | 130 ------ benchmarks/suites/__init__.py | 119 ----- benchmarks/suites/absl.py | 37 -- benchmarks/suites/density.py | 121 ----- benchmarks/suites/fio.py | 165 ------- benchmarks/suites/helpers.py | 57 --- benchmarks/suites/http.py | 138 ------ benchmarks/suites/media.py | 42 -- benchmarks/suites/ml.py | 33 -- benchmarks/suites/network.py | 101 ----- benchmarks/suites/redis.py | 46 -- benchmarks/suites/startup.py | 110 ----- benchmarks/suites/sysbench.py | 119 ----- benchmarks/suites/syscall.py | 37 -- benchmarks/tcp/BUILD | 41 -- benchmarks/tcp/README.md | 87 ---- benchmarks/tcp/nsjoin.c | 47 -- benchmarks/tcp/tcp_benchmark.sh | 392 ----------------- benchmarks/tcp/tcp_proxy.go | 451 ------------------- benchmarks/workloads/BUILD | 35 -- benchmarks/workloads/__init__.py | 14 - benchmarks/workloads/ab/BUILD | 28 -- benchmarks/workloads/ab/Dockerfile | 15 - benchmarks/workloads/ab/__init__.py | 88 ---- benchmarks/workloads/ab/ab_test.py | 42 -- benchmarks/workloads/absl/BUILD | 28 -- benchmarks/workloads/absl/Dockerfile | 25 -- benchmarks/workloads/absl/__init__.py | 63 --- benchmarks/workloads/absl/absl_test.py | 31 -- benchmarks/workloads/curl/BUILD | 13 - benchmarks/workloads/curl/Dockerfile | 14 - benchmarks/workloads/ffmpeg/BUILD | 18 - benchmarks/workloads/ffmpeg/Dockerfile | 10 - benchmarks/workloads/ffmpeg/__init__.py | 20 - benchmarks/workloads/fio/BUILD | 28 -- benchmarks/workloads/fio/Dockerfile | 23 - benchmarks/workloads/fio/__init__.py | 369 ---------------- benchmarks/workloads/fio/fio_test.py | 44 -- benchmarks/workloads/httpd/BUILD | 14 - benchmarks/workloads/httpd/Dockerfile | 27 -- benchmarks/workloads/httpd/apache2-tmpdir.conf | 5 - benchmarks/workloads/iperf/BUILD | 28 -- benchmarks/workloads/iperf/Dockerfile | 14 - benchmarks/workloads/iperf/__init__.py | 40 -- benchmarks/workloads/iperf/iperf_test.py | 28 -- benchmarks/workloads/netcat/BUILD | 13 - benchmarks/workloads/netcat/Dockerfile | 14 - benchmarks/workloads/nginx/BUILD | 13 - benchmarks/workloads/nginx/Dockerfile | 1 - benchmarks/workloads/node/BUILD | 15 - benchmarks/workloads/node/Dockerfile | 2 - benchmarks/workloads/node/index.js | 28 -- benchmarks/workloads/node/package.json | 19 - benchmarks/workloads/node_template/BUILD | 17 - benchmarks/workloads/node_template/Dockerfile | 5 - benchmarks/workloads/node_template/index.hbs | 8 - benchmarks/workloads/node_template/index.js | 43 -- .../workloads/node_template/package-lock.json | 486 --------------------- benchmarks/workloads/node_template/package.json | 19 - benchmarks/workloads/redis/BUILD | 13 - benchmarks/workloads/redis/Dockerfile | 1 - benchmarks/workloads/redisbenchmark/BUILD | 28 -- benchmarks/workloads/redisbenchmark/Dockerfile | 4 - benchmarks/workloads/redisbenchmark/__init__.py | 85 ---- .../redisbenchmark/redisbenchmark_test.py | 51 --- benchmarks/workloads/ruby/BUILD | 28 -- benchmarks/workloads/ruby/Dockerfile | 28 -- benchmarks/workloads/ruby/Gemfile | 12 - benchmarks/workloads/ruby/Gemfile.lock | 73 ---- benchmarks/workloads/ruby/config.ru | 2 - benchmarks/workloads/ruby/index.rb | 14 - benchmarks/workloads/ruby_template/BUILD | 18 - benchmarks/workloads/ruby_template/Dockerfile | 38 -- benchmarks/workloads/ruby_template/Gemfile | 5 - benchmarks/workloads/ruby_template/Gemfile.lock | 26 -- benchmarks/workloads/ruby_template/config.ru | 2 - benchmarks/workloads/ruby_template/index.erb | 8 - benchmarks/workloads/ruby_template/main.rb | 27 -- benchmarks/workloads/sleep/BUILD | 13 - benchmarks/workloads/sleep/Dockerfile | 3 - benchmarks/workloads/sysbench/BUILD | 28 -- benchmarks/workloads/sysbench/Dockerfile | 16 - benchmarks/workloads/sysbench/__init__.py | 167 ------- benchmarks/workloads/sysbench/sysbench_test.py | 34 -- benchmarks/workloads/syscall/BUILD | 29 -- benchmarks/workloads/syscall/Dockerfile | 6 - benchmarks/workloads/syscall/__init__.py | 29 -- benchmarks/workloads/syscall/syscall.c | 55 --- benchmarks/workloads/syscall/syscall_test.py | 27 -- benchmarks/workloads/tensorflow/BUILD | 18 - benchmarks/workloads/tensorflow/Dockerfile | 14 - benchmarks/workloads/tensorflow/__init__.py | 20 - benchmarks/workloads/true/BUILD | 14 - benchmarks/workloads/true/Dockerfile | 3 - scripts/simple_tests.sh | 2 +- test/benchmarks/tcp/BUILD | 41 ++ test/benchmarks/tcp/README.md | 87 ++++ test/benchmarks/tcp/nsjoin.c | 47 ++ test/benchmarks/tcp/tcp_benchmark.sh | 392 +++++++++++++++++ test/benchmarks/tcp/tcp_proxy.go | 451 +++++++++++++++++++ tools/bazeldefs/defs.bzl | 6 - tools/defs.bzl | 5 +- website/blog/2020-04-02-networking-security.md | 2 +- website/performance/README.md | 7 +- 135 files changed, 1026 insertions(+), 8032 deletions(-) delete mode 100644 benchmarks/BUILD delete mode 100644 benchmarks/README.md delete mode 100644 benchmarks/defs.bzl delete mode 100644 benchmarks/examples/localhost.yaml delete mode 100644 benchmarks/harness/BUILD delete mode 100644 benchmarks/harness/__init__.py delete mode 100644 benchmarks/harness/benchmark_driver.py delete mode 100644 benchmarks/harness/container.py delete mode 100644 benchmarks/harness/machine.py delete mode 100644 benchmarks/harness/machine_mocks/BUILD delete mode 100644 benchmarks/harness/machine_mocks/__init__.py delete mode 100644 benchmarks/harness/machine_producers/BUILD delete mode 100644 benchmarks/harness/machine_producers/__init__.py delete mode 100644 benchmarks/harness/machine_producers/gcloud_mock_recorder.py delete mode 100644 benchmarks/harness/machine_producers/gcloud_producer.py delete mode 100644 benchmarks/harness/machine_producers/gcloud_producer_test.py delete mode 100644 benchmarks/harness/machine_producers/machine_producer.py delete mode 100644 benchmarks/harness/machine_producers/mock_producer.py delete mode 100644 benchmarks/harness/machine_producers/testdata/get_five.json delete mode 100644 benchmarks/harness/machine_producers/testdata/get_one.json delete mode 100644 benchmarks/harness/machine_producers/yaml_producer.py delete mode 100644 benchmarks/harness/ssh_connection.py delete mode 100644 benchmarks/harness/tunnel_dispatcher.py delete mode 100644 benchmarks/requirements.txt delete mode 100644 benchmarks/run.py delete mode 100644 benchmarks/runner/BUILD delete mode 100644 benchmarks/runner/__init__.py delete mode 100644 benchmarks/runner/commands.py delete mode 100644 benchmarks/runner/runner_test.py delete mode 100644 benchmarks/suites/BUILD delete mode 100644 benchmarks/suites/__init__.py delete mode 100644 benchmarks/suites/absl.py delete mode 100644 benchmarks/suites/density.py delete mode 100644 benchmarks/suites/fio.py delete mode 100644 benchmarks/suites/helpers.py delete mode 100644 benchmarks/suites/http.py delete mode 100644 benchmarks/suites/media.py delete mode 100644 benchmarks/suites/ml.py delete mode 100644 benchmarks/suites/network.py delete mode 100644 benchmarks/suites/redis.py delete mode 100644 benchmarks/suites/startup.py delete mode 100644 benchmarks/suites/sysbench.py delete mode 100644 benchmarks/suites/syscall.py delete mode 100644 benchmarks/tcp/BUILD delete mode 100644 benchmarks/tcp/README.md delete mode 100644 benchmarks/tcp/nsjoin.c delete mode 100755 benchmarks/tcp/tcp_benchmark.sh delete mode 100644 benchmarks/tcp/tcp_proxy.go delete mode 100644 benchmarks/workloads/BUILD delete mode 100644 benchmarks/workloads/__init__.py delete mode 100644 benchmarks/workloads/ab/BUILD delete mode 100644 benchmarks/workloads/ab/Dockerfile delete mode 100644 benchmarks/workloads/ab/__init__.py delete mode 100644 benchmarks/workloads/ab/ab_test.py delete mode 100644 benchmarks/workloads/absl/BUILD delete mode 100644 benchmarks/workloads/absl/Dockerfile delete mode 100644 benchmarks/workloads/absl/__init__.py delete mode 100644 benchmarks/workloads/absl/absl_test.py delete mode 100644 benchmarks/workloads/curl/BUILD delete mode 100644 benchmarks/workloads/curl/Dockerfile delete mode 100644 benchmarks/workloads/ffmpeg/BUILD delete mode 100644 benchmarks/workloads/ffmpeg/Dockerfile delete mode 100644 benchmarks/workloads/ffmpeg/__init__.py delete mode 100644 benchmarks/workloads/fio/BUILD delete mode 100644 benchmarks/workloads/fio/Dockerfile delete mode 100644 benchmarks/workloads/fio/__init__.py delete mode 100644 benchmarks/workloads/fio/fio_test.py delete mode 100644 benchmarks/workloads/httpd/BUILD delete mode 100644 benchmarks/workloads/httpd/Dockerfile delete mode 100644 benchmarks/workloads/httpd/apache2-tmpdir.conf delete mode 100644 benchmarks/workloads/iperf/BUILD delete mode 100644 benchmarks/workloads/iperf/Dockerfile delete mode 100644 benchmarks/workloads/iperf/__init__.py delete mode 100644 benchmarks/workloads/iperf/iperf_test.py delete mode 100644 benchmarks/workloads/netcat/BUILD delete mode 100644 benchmarks/workloads/netcat/Dockerfile delete mode 100644 benchmarks/workloads/nginx/BUILD delete mode 100644 benchmarks/workloads/nginx/Dockerfile delete mode 100644 benchmarks/workloads/node/BUILD delete mode 100644 benchmarks/workloads/node/Dockerfile delete mode 100644 benchmarks/workloads/node/index.js delete mode 100644 benchmarks/workloads/node/package.json delete mode 100644 benchmarks/workloads/node_template/BUILD delete mode 100644 benchmarks/workloads/node_template/Dockerfile delete mode 100644 benchmarks/workloads/node_template/index.hbs delete mode 100644 benchmarks/workloads/node_template/index.js delete mode 100644 benchmarks/workloads/node_template/package-lock.json delete mode 100644 benchmarks/workloads/node_template/package.json delete mode 100644 benchmarks/workloads/redis/BUILD delete mode 100644 benchmarks/workloads/redis/Dockerfile delete mode 100644 benchmarks/workloads/redisbenchmark/BUILD delete mode 100644 benchmarks/workloads/redisbenchmark/Dockerfile delete mode 100644 benchmarks/workloads/redisbenchmark/__init__.py delete mode 100644 benchmarks/workloads/redisbenchmark/redisbenchmark_test.py delete mode 100644 benchmarks/workloads/ruby/BUILD delete mode 100644 benchmarks/workloads/ruby/Dockerfile delete mode 100644 benchmarks/workloads/ruby/Gemfile delete mode 100644 benchmarks/workloads/ruby/Gemfile.lock delete mode 100755 benchmarks/workloads/ruby/config.ru delete mode 100755 benchmarks/workloads/ruby/index.rb delete mode 100644 benchmarks/workloads/ruby_template/BUILD delete mode 100755 benchmarks/workloads/ruby_template/Dockerfile delete mode 100755 benchmarks/workloads/ruby_template/Gemfile delete mode 100644 benchmarks/workloads/ruby_template/Gemfile.lock delete mode 100755 benchmarks/workloads/ruby_template/config.ru delete mode 100755 benchmarks/workloads/ruby_template/index.erb delete mode 100755 benchmarks/workloads/ruby_template/main.rb delete mode 100644 benchmarks/workloads/sleep/BUILD delete mode 100644 benchmarks/workloads/sleep/Dockerfile delete mode 100644 benchmarks/workloads/sysbench/BUILD delete mode 100644 benchmarks/workloads/sysbench/Dockerfile delete mode 100644 benchmarks/workloads/sysbench/__init__.py delete mode 100644 benchmarks/workloads/sysbench/sysbench_test.py delete mode 100644 benchmarks/workloads/syscall/BUILD delete mode 100644 benchmarks/workloads/syscall/Dockerfile delete mode 100644 benchmarks/workloads/syscall/__init__.py delete mode 100644 benchmarks/workloads/syscall/syscall.c delete mode 100644 benchmarks/workloads/syscall/syscall_test.py delete mode 100644 benchmarks/workloads/tensorflow/BUILD delete mode 100644 benchmarks/workloads/tensorflow/Dockerfile delete mode 100644 benchmarks/workloads/tensorflow/__init__.py delete mode 100644 benchmarks/workloads/true/BUILD delete mode 100644 benchmarks/workloads/true/Dockerfile create mode 100644 test/benchmarks/tcp/BUILD create mode 100644 test/benchmarks/tcp/README.md create mode 100644 test/benchmarks/tcp/nsjoin.c create mode 100755 test/benchmarks/tcp/tcp_benchmark.sh create mode 100644 test/benchmarks/tcp/tcp_proxy.go (limited to 'test') diff --git a/Makefile b/Makefile index dfd1b4abc..365a4b485 100644 --- a/Makefile +++ b/Makefile @@ -122,7 +122,7 @@ smoke-tests: ## Runs a simple smoke test after build runsc. .PHONY: smoke-tests unit-tests: ## Local package unit tests in pkg/..., runsc/, tools/.., etc. - @$(call submake,test TARGETS="pkg/... runsc/... tools/... benchmarks/... benchmarks/runner:runner_test") + @$(call submake,test TARGETS="pkg/... runsc/... tools/...") tests: ## Runs all unit tests and syscall tests. tests: unit-tests diff --git a/WORKSPACE b/WORKSPACE index 058d1b306..6dc060bd5 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -113,26 +113,6 @@ rules_proto_dependencies() rules_proto_toolchains() -# Load python dependencies. -git_repository( - name = "rules_python", - commit = "abc4869e02fe9b3866942e89f07b7341f830e805", - remote = "https://github.com/bazelbuild/rules_python.git", - shallow_since = "1583341286 -0500", -) - -load("@rules_python//python:pip.bzl", "pip_import") - -pip_import( - name = "pydeps", - python_interpreter = "python3", - requirements = "//benchmarks:requirements.txt", -) - -load("@pydeps//:requirements.bzl", "pip_install") - -pip_install() - # Load bazel_toolchain to support Remote Build Execution. # See releases at https://releases.bazel.build/bazel-toolchains.html http_archive( diff --git a/benchmarks/BUILD b/benchmarks/BUILD deleted file mode 100644 index 39ca5919c..000000000 --- a/benchmarks/BUILD +++ /dev/null @@ -1,37 +0,0 @@ -load("//tools:defs.bzl", "bzl_library") - -package(licenses = ["notice"]) - -config_setting( - name = "gcloud_rule", - values = { - "define": "gcloud=off", - }, -) - -py_binary( - name = "benchmarks", - testonly = 1, - srcs = ["run.py"], - data = select({ - ":gcloud_rule": [], - "//conditions:default": [ - "//tools/vm:ubuntu1604", - "//tools/vm:zone", - ], - }), - main = "run.py", - python_version = "PY3", - srcs_version = "PY3", - tags = [ - "local", - "manual", - ], - deps = ["//benchmarks/runner"], -) - -bzl_library( - name = "defs_bzl", - srcs = ["defs.bzl"], - visibility = ["//visibility:private"], -) diff --git a/benchmarks/README.md b/benchmarks/README.md deleted file mode 100644 index 814bcb220..000000000 --- a/benchmarks/README.md +++ /dev/null @@ -1,186 +0,0 @@ -# Benchmark tools - -These scripts are tools for collecting performance data for Docker-based tests. - -## Setup - -The scripts assume the following: - -* There are two sets of machines: one where the scripts will be run - (controller) and one or more machines on which docker containers will be run - (environment). -* The controller machine must have bazel installed along with this source - code. You should be able to run a command like `bazel run //benchmarks -- - --list` -* Environment machines must have docker and the required runtimes installed. - More specifically, you should be able to run a command like: `docker run - --runtime=$RUNTIME your/image`. -* The controller has ssh private key which can be used to login to environment - machines and run docker commands without using `sudo`. This is not required - if running locally via the `run-local` command. -* The docker daemon on each of your environment machines is listening on - `unix:///var/run/docker.sock` (docker's default). - -For configuring the environment manually, consult the -[dockerd documentation][dockerd]. - -## Running benchmarks - -### Locally - -The tool is built to, by default, use Google Cloud Platform to run benchmarks, -but it does support GCP workflows. To run locally, run the following from the -benchmarks directory: - -```bash -bazel run --define gcloud=off //benchmarks -- run-local startup - -... -method,metric,result -startup.empty,startup_time_ms,652.5772 -startup.node,startup_time_ms,1654.4042000000002 -startup.ruby,startup_time_ms,1429.835 -``` - -The above command ran the startup benchmark locally, which consists of three -benchmarks (empty, node, and ruby). Benchmark tools ran it on the default -runtime, runc. Running on another installed runtime, like say runsc, is as -simple as: - -```bash -bazel run --define gcloud=off //benchmarks -- run-local startup --runtime=runsc -``` - -There is help: - -```bash -bazel run --define gcloud=off //benchmarks -- --help -bazel run --define gcloud=off //benchmarks -- run-local --help -``` - -To list available benchmarks, use the `list` commmand: - -```bash -bazel --define gcloud=off run //benchmarks -- list - -... -Benchmark: sysbench.cpu -Metrics: events_per_second - Run sysbench CPU test. Additional arguments can be provided for sysbench. - - :param max_prime: The maximum prime number to search. -``` - -You can choose benchmarks by name or regex like: - -```bash -bazel run --define gcloud=off //benchmarks -- run-local startup.node -... -metric,result -startup_time_ms,1671.7178000000001 - -``` - -or - -```bash -bazel run --define gcloud=off //benchmarks -- run-local s -... -method,metric,result -startup.empty,startup_time_ms,1792.8292 -startup.node,startup_time_ms,3113.5274 -startup.ruby,startup_time_ms,3025.2424 -sysbench.cpu,cpu_events_per_second,12661.47 -sysbench.memory,memory_ops_per_second,7228268.44 -sysbench.mutex,mutex_time,17.4835 -sysbench.mutex,mutex_latency,3496.7 -sysbench.mutex,mutex_deviation,0.04 -syscall.syscall,syscall_time_ns,2065.0 -``` - -You can run parameterized benchmarks, for example to run with different -runtimes: - -```bash -bazel run --define gcloud=off //benchmarks -- run-local --runtime=runc --runtime=runsc sysbench.cpu -``` - -Or with different parameters: - -```bash -bazel run --define gcloud=off //benchmarks -- run-local --max_prime=10 --max_prime=100 sysbench.cpu -``` - -### On Google Compute Engine (GCE) - -Benchmarks may be run on GCE in an automated way. The default project configured -for `gcloud` will be used. - -An additional parameter `installers` may be provided to ensure that the latest -runtime is installed from the workspace. See the files in `tools/installers` for -supported install targets. - -```bash -bazel run //benchmarks -- run-gcp --installers=head --runtime=runsc sysbench.cpu -``` - -When running on GCE, the scripts generate a per run SSH key, which is added to -your project. The key is set to expire in GCE after 60 minutes and is stored in -a temporary directory on the local machine running the scripts. - -## Writing benchmarks - -To write new benchmarks, you should familiarize yourself with the structure of -the repository. There are three key components. - -## Harness - -The harness makes use of the [docker py SDK][docker-py]. It is advisable that -you familiarize yourself with that API when making changes, specifically: - -* clients -* containers -* images - -In general, benchmarks need only interact with the `Machine` objects provided to -the benchmark function, which are the machines defined in the environment. These -objects allow the benchmark to define the relationships between different -containers, and parse the output. - -## Workloads - -The harness requires workloads to run. These are all available in the -`workloads` directory. - -In general, a workload consists of a Dockerfile to build it (while these are not -hermetic, in general they should be as fixed and isolated as possible), some -parsers for output if required, parser tests and sample data. Provided the test -is named after the workload package and contains a function named `sample`, this -variable will be used to automatically mock workload output when the `--mock` -flag is provided to the main tool. - -## Writing benchmarks - -Benchmarks define the tests themselves. All benchmarks have the following -function signature: - -```python -def my_func(output) -> float: - return float(output) - -@benchmark(metrics = my_func, machines = 1) -def my_benchmark(machine: machine.Machine, arg: str): - return "3.4432" -``` - -Each benchmark takes a variable amount of position arguments as -`harness.Machine` objects and some set of keyword arguments. It is recommended -that you accept arbitrary keyword arguments and pass them through when -constructing the container under test. - -To write a new benchmark, open a module in the `suites` directory and use the -above signature. You should add a descriptive doc string to describe what your -benchmark is and any test centric arguments. - -[dockerd]: https://docs.docker.com/engine/reference/commandline/dockerd/ -[docker-py]: https://docker-py.readthedocs.io/en/stable/ diff --git a/benchmarks/defs.bzl b/benchmarks/defs.bzl deleted file mode 100644 index 56d28223e..000000000 --- a/benchmarks/defs.bzl +++ /dev/null @@ -1,14 +0,0 @@ -"""Provides attributes common to many workload tests.""" - -load("//tools:defs.bzl", "py_requirement") - -test_deps = [ - py_requirement("attrs", direct = False), - py_requirement("atomicwrites", direct = False), - py_requirement("more-itertools", direct = False), - py_requirement("pathlib2", direct = False), - py_requirement("pluggy", direct = False), - py_requirement("py", direct = False), - py_requirement("pytest"), - py_requirement("six", direct = False), -] diff --git a/benchmarks/examples/localhost.yaml b/benchmarks/examples/localhost.yaml deleted file mode 100644 index f70fe0fb7..000000000 --- a/benchmarks/examples/localhost.yaml +++ /dev/null @@ -1,2 +0,0 @@ -client: localhost -server: localhost diff --git a/benchmarks/harness/BUILD b/benchmarks/harness/BUILD deleted file mode 100644 index 2090d957a..000000000 --- a/benchmarks/harness/BUILD +++ /dev/null @@ -1,201 +0,0 @@ -load("//tools:defs.bzl", "pkg_tar", "py_library", "py_requirement") - -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -pkg_tar( - name = "installers", - srcs = [ - "//tools/installers:head", - "//tools/installers:master", - ], - mode = "0755", -) - -filegroup( - name = "files", - srcs = [ - ":installers", - ], -) - -py_library( - name = "harness", - srcs = ["__init__.py"], - data = [ - ":files", - ], -) - -py_library( - name = "benchmark_driver", - srcs = ["benchmark_driver.py"], - deps = [ - "//benchmarks/harness/machine_mocks", - "//benchmarks/harness/machine_producers:machine_producer", - "//benchmarks/suites", - ], -) - -py_library( - name = "container", - srcs = ["container.py"], - deps = [ - "//benchmarks/workloads", - py_requirement( - "asn1crypto", - direct = False, - ), - py_requirement( - "chardet", - direct = False, - ), - py_requirement( - "certifi", - direct = False, - ), - py_requirement("docker"), - py_requirement( - "docker-pycreds", - direct = False, - ), - py_requirement( - "idna", - direct = False, - ), - py_requirement( - "ptyprocess", - direct = False, - ), - py_requirement( - "requests", - direct = False, - ), - py_requirement( - "urllib3", - direct = False, - ), - py_requirement( - "websocket-client", - direct = False, - ), - ], -) - -py_library( - name = "machine", - srcs = ["machine.py"], - deps = [ - "//benchmarks/harness", - "//benchmarks/harness:container", - "//benchmarks/harness:ssh_connection", - "//benchmarks/harness:tunnel_dispatcher", - "//benchmarks/harness/machine_mocks", - py_requirement( - "asn1crypto", - direct = False, - ), - py_requirement( - "chardet", - direct = False, - ), - py_requirement( - "certifi", - direct = False, - ), - py_requirement("docker"), - py_requirement( - "docker-pycreds", - direct = False, - ), - py_requirement( - "idna", - direct = False, - ), - py_requirement( - "ptyprocess", - direct = False, - ), - py_requirement( - "requests", - direct = False, - ), - py_requirement( - "six", - direct = False, - ), - py_requirement( - "urllib3", - direct = False, - ), - py_requirement( - "websocket-client", - direct = False, - ), - ], -) - -py_library( - name = "ssh_connection", - srcs = ["ssh_connection.py"], - deps = [ - "//benchmarks/harness", - py_requirement( - "bcrypt", - direct = False, - ), - py_requirement("cffi"), - py_requirement("paramiko"), - py_requirement( - "cryptography", - direct = False, - ), - ], -) - -py_library( - name = "tunnel_dispatcher", - srcs = ["tunnel_dispatcher.py"], - deps = [ - py_requirement( - "asn1crypto", - direct = False, - ), - py_requirement( - "chardet", - direct = False, - ), - py_requirement( - "certifi", - direct = False, - ), - py_requirement("docker"), - py_requirement( - "docker-pycreds", - direct = False, - ), - py_requirement( - "idna", - direct = False, - ), - py_requirement("pexpect"), - py_requirement( - "ptyprocess", - direct = False, - ), - py_requirement( - "requests", - direct = False, - ), - py_requirement( - "urllib3", - direct = False, - ), - py_requirement( - "websocket-client", - direct = False, - ), - ], -) diff --git a/benchmarks/harness/__init__.py b/benchmarks/harness/__init__.py deleted file mode 100644 index 15aa2a69a..000000000 --- a/benchmarks/harness/__init__.py +++ /dev/null @@ -1,62 +0,0 @@ -# python3 -# Copyright 2019 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Core benchmark utilities.""" - -import getpass -import os -import subprocess -import tempfile - -# LOCAL_WORKLOADS_PATH defines the path to use for local workloads. This is a -# format string that accepts a single string parameter. -LOCAL_WORKLOADS_PATH = os.path.dirname(__file__) + "/../workloads/{}/tar.tar" - -# REMOTE_WORKLOADS_PATH defines the path to use for storing the workloads on the -# remote host. This is a format string that accepts a single string parameter. -REMOTE_WORKLOADS_PATH = "workloads/{}" - -# INSTALLER_ROOT is the set of files that needs to be copied. -INSTALLER_ARCHIVE = os.readlink(os.path.join( - os.path.dirname(__file__), "installers.tar")) - -# SSH_KEY_DIR holds SSH_PRIVATE_KEY for this run. bm-tools paramiko requires -# keys generated with the '-t rsa -m PEM' options from ssh-keygen. This is -# abstracted away from the user. -SSH_KEY_DIR = tempfile.TemporaryDirectory() -SSH_PRIVATE_KEY = "key" - -# DEFAULT_USER is the default user running this script. -DEFAULT_USER = getpass.getuser() - -# DEFAULT_USER_HOME is the home directory of the user running the script. -DEFAULT_USER_HOME = os.environ["HOME"] if "HOME" in os.environ else "" - -# Default directory to remotely installer "installer" targets. -REMOTE_INSTALLERS_PATH = "installers" - - -def make_key(): - """Wraps a valid ssh key in a temporary directory.""" - path = os.path.join(SSH_KEY_DIR.name, SSH_PRIVATE_KEY) - if not os.path.exists(path): - cmd = "ssh-keygen -t rsa -m PEM -b 4096 -f {key} -q -N".format( - key=path).split(" ") - cmd.append("") - subprocess.run(cmd, check=True) - return path - - -def delete_key(): - """Deletes temporary directory containing private key.""" - SSH_KEY_DIR.cleanup() diff --git a/benchmarks/harness/benchmark_driver.py b/benchmarks/harness/benchmark_driver.py deleted file mode 100644 index 9abc21b54..000000000 --- a/benchmarks/harness/benchmark_driver.py +++ /dev/null @@ -1,85 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Main driver for benchmarks.""" - -import copy -import statistics -import threading -import types - -from benchmarks import suites -from benchmarks.harness.machine_producers import machine_producer - - -# pylint: disable=too-many-instance-attributes -class BenchmarkDriver: - """Allocates machines and invokes a benchmark method.""" - - def __init__(self, - producer: machine_producer.MachineProducer, - method: types.FunctionType, - runs: int = 1, - **kwargs): - - self._producer = producer - self._method = method - self._kwargs = copy.deepcopy(kwargs) - self._threads = [] - self.lock = threading.RLock() - self._runs = runs - self._metric_results = {} - - def start(self): - """Starts a benchmark thread.""" - for _ in range(self._runs): - thread = threading.Thread(target=self._run_method) - thread.start() - self._threads.append(thread) - - def join(self): - """Joins the thread.""" - # pylint: disable=expression-not-assigned - [t.join() for t in self._threads] - - def _run_method(self): - """Runs all benchmarks.""" - machines = self._producer.get_machines( - suites.benchmark_machines(self._method)) - try: - result = self._method(*machines, **self._kwargs) - for name, res in result: - with self.lock: - if name in self._metric_results: - self._metric_results[name].append(res) - else: - self._metric_results[name] = [res] - finally: - # Always release. - self._producer.release_machines(machines) - - def median(self): - """Returns the median result, after join is finished.""" - for key, value in self._metric_results.items(): - yield key, [statistics.median(value)] - - def all(self): - """Returns all results.""" - for key, value in self._metric_results.items(): - yield key, value - - def meanstd(self): - """Returns all results.""" - for key, value in self._metric_results.items(): - mean = statistics.mean(value) - yield key, [mean, statistics.stdev(value, xbar=mean)] diff --git a/benchmarks/harness/container.py b/benchmarks/harness/container.py deleted file mode 100644 index 585436e20..000000000 --- a/benchmarks/harness/container.py +++ /dev/null @@ -1,181 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Container definitions.""" - -import contextlib -import logging -import pydoc -import types -from typing import Tuple - -import docker -import docker.errors - -from benchmarks import workloads - - -class Container: - """Abstract container. - - Must be a context manager. - - Usage: - - with Container(client, image, ...): - ... - """ - - def run(self, **env) -> str: - """Run the container synchronously.""" - raise NotImplementedError - - def detach(self, **env): - """Run the container asynchronously.""" - raise NotImplementedError - - def address(self) -> Tuple[str, int]: - """Return the bound address for the container.""" - raise NotImplementedError - - def get_names(self) -> types.GeneratorType: - """Return names of all containers.""" - raise NotImplementedError - - -# pylint: disable=too-many-instance-attributes -class DockerContainer(Container): - """Class that handles creating a docker container.""" - - # pylint: disable=too-many-arguments - def __init__(self, - client: docker.DockerClient, - host: str, - image: str, - count: int = 1, - runtime: str = "runc", - port: int = 0, - **kwargs): - """Trys to setup "count" containers. - - Args: - client: A docker client from dockerpy. - host: The host address the image is running on. - image: The name of the image to run. - count: The number of containers to setup. - runtime: The container runtime to use. - port: The port to reserve. - **kwargs: Additional container options. - """ - assert count >= 1 - assert port == 0 or count == 1 - self._client = client - self._host = host - self._containers = [] - self._count = count - self._image = image - self._runtime = runtime - self._port = port - self._kwargs = kwargs - if port != 0: - self._ports = {"%d/tcp" % port: None} - else: - self._ports = {} - - @contextlib.contextmanager - def detach(self, **env): - env = ["%s=%s" % (key, value) for (key, value) in env.items()] - # Start all containers. - for _ in range(self._count): - try: - # Start the container in a detached mode. - container = self._client.containers.run( - self._image, - detach=True, - remove=True, - runtime=self._runtime, - ports=self._ports, - environment=env, - **self._kwargs) - logging.info("Started detached container %s -> %s", self._image, - container.attrs["Id"]) - self._containers.append(container) - except Exception as exc: - self._clean_containers() - raise exc - try: - # Wait for all containers to be up. - for container in self._containers: - while not container.attrs["State"]["Running"]: - container = self._client.containers.get(container.attrs["Id"]) - yield self - finally: - self._clean_containers() - - def address(self) -> Tuple[str, int]: - assert self._count == 1 - assert self._port != 0 - container = self._client.containers.get(self._containers[0].attrs["Id"]) - port = container.attrs["NetworkSettings"]["Ports"][ - "%d/tcp" % self._port][0]["HostPort"] - return (self._host, port) - - def get_names(self) -> types.GeneratorType: - for container in self._containers: - yield container.name - - def run(self, **env) -> str: - env = ["%s=%s" % (key, value) for (key, value) in env.items()] - return self._client.containers.run( - self._image, - runtime=self._runtime, - ports=self._ports, - remove=True, - environment=env, - **self._kwargs).decode("utf-8") - - def _clean_containers(self): - """Kills all containers.""" - for container in self._containers: - try: - container.kill() - except docker.errors.NotFound: - pass - - -class MockContainer(Container): - """Mock of Container.""" - - def __init__(self, workload: str): - self._workload = workload - - def __enter__(self): - return self - - def run(self, **env): - # Lookup sample data if any exists for the workload module. We use a - # well-defined test locate and a well-defined sample function. - mod = pydoc.locate(workloads.__name__ + "." + self._workload) - if hasattr(mod, "sample"): - return mod.sample(**env) - return "" # No output. - - def address(self) -> Tuple[str, int]: - return ("example.com", 80) - - def get_names(self) -> types.GeneratorType: - yield "mock" - - @contextlib.contextmanager - def detach(self, **env): - yield self diff --git a/benchmarks/harness/machine.py b/benchmarks/harness/machine.py deleted file mode 100644 index 5bdc4aa85..000000000 --- a/benchmarks/harness/machine.py +++ /dev/null @@ -1,265 +0,0 @@ -# python3 -# Copyright 2019 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Machine abstraction passed to benchmarks to run docker containers. - -Abstraction for interacting with test machines. Machines are produced -by Machine producers and represent a local or remote machine. Benchmark -methods in /benchmarks/suite are passed the required number of machines in order -to run the benchmark. Machines contain methods to run commands via bash, -possibly over ssh. Machines also hold a connection to the docker UNIX socket -to run contianers. - - Typical usage example: - - machine = Machine() - machine.run(cmd) - machine.pull(path) - container = machine.container() -""" - -import logging -import os -import re -import subprocess -import time -from typing import List, Tuple - -import docker - -from benchmarks import harness -from benchmarks.harness import container -from benchmarks.harness import machine_mocks -from benchmarks.harness import ssh_connection -from benchmarks.harness import tunnel_dispatcher - -log = logging.getLogger(__name__) - - -class Machine(object): - """The machine object is the primary object for benchmarks. - - Machine objects are passed to each metric function call and benchmarks use - machines to access real connections to those machines. - - Attributes: - _name: Name as a string - """ - _name = "" - - def run(self, cmd: str) -> Tuple[str, str]: - """Convenience method for running a bash command on a machine object. - - Some machines may point to the local machine, and thus, do not have ssh - connections. Run runs a command either local or over ssh and returns the - output stdout and stderr as strings. - - Args: - cmd: The command to run as a string. - - Returns: - The command output. - """ - raise NotImplementedError - - def read(self, path: str) -> str: - """Reads the contents of some file. - - This will be mocked. - - Args: - path: The path to the file to be read. - - Returns: - The file contents. - """ - raise NotImplementedError - - def pull(self, workload: str) -> str: - """Send the given workload to the machine, build and tag it. - - All images must be defined by the workloads directory. - - Args: - workload: The workload name. - - Returns: - The workload tag. - """ - raise NotImplementedError - - def container(self, image: str, **kwargs) -> container.Container: - """Returns a container object. - - Args: - image: The pulled image tag. - **kwargs: Additional container options. - - Returns: - :return: a container.Container object. - """ - raise NotImplementedError - - def sleep(self, amount: float): - """Sleeps the given amount of time.""" - time.sleep(amount) - - def __str__(self): - return self._name - - -class MockMachine(Machine): - """A mocked machine.""" - _name = "mock" - - def run(self, cmd: str) -> Tuple[str, str]: - return "", "" - - def read(self, path: str) -> str: - return machine_mocks.Readfile(path) - - def pull(self, workload: str) -> str: - return workload # Workload is the tag. - - def container(self, image: str, **kwargs) -> container.Container: - return container.MockContainer(image) - - def sleep(self, amount: float): - pass - - -def get_address(machine: Machine) -> str: - """Return a machine's default address.""" - default_route, _ = machine.run("ip route get 8.8.8.8") - return re.search(" src ([0-9.]+) ", default_route).group(1) - - -class LocalMachine(Machine): - """The local machine. - - Attributes: - _name: Name as a string - _docker_client: a pythonic connection to to the local dockerd unix socket. - See: https://github.com/docker/docker-py - """ - - def __init__(self, name): - self._name = name - self._docker_client = docker.from_env() - - def run(self, cmd: str) -> Tuple[str, str]: - process = subprocess.Popen( - cmd.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = process.communicate() - return stdout.decode("utf-8"), stderr.decode("utf-8") - - def read(self, path: str) -> bytes: - # Read the exact path locally. - return open(path, "r").read() - - def pull(self, workload: str) -> str: - # Run the docker build command locally. - logging.info("Building %s@%s locally...", workload, self._name) - with open(harness.LOCAL_WORKLOADS_PATH.format(workload), - "rb") as dockerfile: - self._docker_client.images.build( - fileobj=dockerfile, tag=workload, custom_context=True) - return workload # Workload is the tag. - - def container(self, image: str, **kwargs) -> container.Container: - # Return a local docker container directly. - return container.DockerContainer(self._docker_client, get_address(self), - image, **kwargs) - - def sleep(self, amount: float): - time.sleep(amount) - - -class RemoteMachine(Machine): - """Remote machine accessible via an SSH connection. - - Attributes: - _name: Name as a string - _ssh_connection: a paramiko backed ssh connection which can be used to run - commands on this machine - _tunnel: a python wrapper around a port forwarded ssh connection between a - local unix socket and the remote machine's dockerd unix socket. - _docker_client: a pythonic wrapper backed by the _tunnel. Allows sending - docker commands: see https://github.com/docker/docker-py - """ - - def __init__(self, name, **kwargs): - self._name = name - self._ssh_connection = ssh_connection.SSHConnection(name, **kwargs) - self._tunnel = tunnel_dispatcher.Tunnel(name, **kwargs) - self._tunnel.connect() - self._docker_client = self._tunnel.get_docker_client() - self._has_installers = False - - def run(self, cmd: str) -> Tuple[str, str]: - return self._ssh_connection.run(cmd) - - def read(self, path: str) -> str: - # Just cat remotely. - stdout, stderr = self._ssh_connection.run("cat '{}'".format(path)) - return stdout + stderr - - def install(self, - installer: str, - results: List[bool] = None, - index: int = -1): - """Method unique to RemoteMachine to handle installation of installers. - - Handles installers, which install things that may change between runs (e.g. - runsc). Usually called from gcloud_producer, which expects this method to - to store results. - - Args: - installer: the installer target to run. - results: Passed by the caller of where to store success. - index: Index for this method to store the result in the passed results - list. - """ - # This generates a tarball of the full installer root (which will generate - # be the full bazel root directory) and sends it over. - if not self._has_installers: - archive = self._ssh_connection.send_installers() - self.run("tar -xvf {archive} -C {dir}".format( - archive=archive, dir=harness.REMOTE_INSTALLERS_PATH)) - self._has_installers = True - - # Execute the remote installer. - self.run("sudo {dir}/{file}".format( - dir=harness.REMOTE_INSTALLERS_PATH, file=installer)) - - if results: - results[index] = True - - def pull(self, workload: str) -> str: - # Push to the remote machine and build. - logging.info("Building %s@%s remotely...", workload, self._name) - remote_path = self._ssh_connection.send_workload(workload) - remote_dir = os.path.dirname(remote_path) - # Workloads are all tarballs. - self.run("tar -xvf {remote_path} -C {remote_dir}".format( - remote_path=remote_path, remote_dir=remote_dir)) - self.run("docker build --tag={} {}".format(workload, remote_dir)) - return workload # Workload is the tag. - - def container(self, image: str, **kwargs) -> container.Container: - # Return a remote docker container. - return container.DockerContainer(self._docker_client, get_address(self), - image, **kwargs) - - def sleep(self, amount: float): - time.sleep(amount) diff --git a/benchmarks/harness/machine_mocks/BUILD b/benchmarks/harness/machine_mocks/BUILD deleted file mode 100644 index c8ec4bc79..000000000 --- a/benchmarks/harness/machine_mocks/BUILD +++ /dev/null @@ -1,9 +0,0 @@ -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -py_library( - name = "machine_mocks", - srcs = ["__init__.py"], -) diff --git a/benchmarks/harness/machine_mocks/__init__.py b/benchmarks/harness/machine_mocks/__init__.py deleted file mode 100644 index 00f0085d7..000000000 --- a/benchmarks/harness/machine_mocks/__init__.py +++ /dev/null @@ -1,81 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Machine mock files.""" - -MEMINFO = """\ -MemTotal: 7652344 kB -MemFree: 7174724 kB -MemAvailable: 7152008 kB -Buffers: 7544 kB -Cached: 178856 kB -SwapCached: 0 kB -Active: 270928 kB -Inactive: 68436 kB -Active(anon): 153124 kB -Inactive(anon): 880 kB -Active(file): 117804 kB -Inactive(file): 67556 kB -Unevictable: 0 kB -Mlocked: 0 kB -SwapTotal: 0 kB -SwapFree: 0 kB -Dirty: 900 kB -Writeback: 0 kB -AnonPages: 153000 kB -Mapped: 129120 kB -Shmem: 1044 kB -Slab: 60864 kB -SReclaimable: 22792 kB -SUnreclaim: 38072 kB -KernelStack: 2672 kB -PageTables: 5756 kB -NFS_Unstable: 0 kB -Bounce: 0 kB -WritebackTmp: 0 kB -CommitLimit: 3826172 kB -Committed_AS: 663836 kB -VmallocTotal: 34359738367 kB -VmallocUsed: 0 kB -VmallocChunk: 0 kB -HardwareCorrupted: 0 kB -AnonHugePages: 0 kB -ShmemHugePages: 0 kB -ShmemPmdMapped: 0 kB -CmaTotal: 0 kB -CmaFree: 0 kB -HugePages_Total: 0 -HugePages_Free: 0 -HugePages_Rsvd: 0 -HugePages_Surp: 0 -Hugepagesize: 2048 kB -DirectMap4k: 94196 kB -DirectMap2M: 4624384 kB -DirectMap1G: 3145728 kB -""" - -CONTENTS = { - "/proc/meminfo": MEMINFO, -} - - -def Readfile(path: str) -> str: - """Reads a mock file. - - Args: - path: The target path. - - Returns: - Mocked file contents or None. - """ - return CONTENTS.get(path, None) diff --git a/benchmarks/harness/machine_producers/BUILD b/benchmarks/harness/machine_producers/BUILD deleted file mode 100644 index 81f19bd08..000000000 --- a/benchmarks/harness/machine_producers/BUILD +++ /dev/null @@ -1,84 +0,0 @@ -load("//tools:defs.bzl", "py_library", "py_requirement") - -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -py_library( - name = "harness", - srcs = ["__init__.py"], -) - -py_library( - name = "machine_producer", - srcs = ["machine_producer.py"], -) - -py_library( - name = "mock_producer", - srcs = ["mock_producer.py"], - deps = [ - "//benchmarks/harness:machine", - "//benchmarks/harness/machine_producers:gcloud_producer", - "//benchmarks/harness/machine_producers:machine_producer", - ], -) - -py_library( - name = "yaml_producer", - srcs = ["yaml_producer.py"], - deps = [ - "//benchmarks/harness:machine", - "//benchmarks/harness/machine_producers:machine_producer", - py_requirement( - "PyYAML", - direct = False, - ), - ], -) - -py_library( - name = "gcloud_mock_recorder", - srcs = ["gcloud_mock_recorder.py"], -) - -py_library( - name = "gcloud_producer", - srcs = ["gcloud_producer.py"], - deps = [ - "//benchmarks/harness:machine", - "//benchmarks/harness/machine_producers:gcloud_mock_recorder", - "//benchmarks/harness/machine_producers:machine_producer", - ], -) - -filegroup( - name = "test_data", - srcs = [ - "testdata/get_five.json", - "testdata/get_one.json", - ], -) - -py_library( - name = "gcloud_producer_test_lib", - srcs = ["gcloud_producer_test.py"], - deps = [ - "//benchmarks/harness/machine_producers:machine_producer", - "//benchmarks/harness/machine_producers:mock_producer", - ], -) - -py_test( - name = "gcloud_producer_test", - srcs = [":gcloud_producer_test_lib"], - data = [ - ":test_data", - ], - python_version = "PY3", - tags = [ - "local", - "manual", - ], -) diff --git a/benchmarks/harness/machine_producers/__init__.py b/benchmarks/harness/machine_producers/__init__.py deleted file mode 100644 index 634ef4843..000000000 --- a/benchmarks/harness/machine_producers/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/benchmarks/harness/machine_producers/gcloud_mock_recorder.py b/benchmarks/harness/machine_producers/gcloud_mock_recorder.py deleted file mode 100644 index fd9837a37..000000000 --- a/benchmarks/harness/machine_producers/gcloud_mock_recorder.py +++ /dev/null @@ -1,97 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""A recorder and replay for testing the GCloudProducer. - -MockPrinter and MockReader handle printing and reading mock data for the -purposes of testing. MockPrinter is passed to GCloudProducer objects. The user -can then run scenarios and record them for playback in tests later. - -MockReader is passed to MockGcloudProducer objects and handles reading the -previously recorded mock data. - -It is left to the user to check if data printed is properly redacted for their -own use. The intended usecase for this class is data coming from gcloud -commands, which will contain public IPs and other instance data. - -The data format is json and printed/read from the ./test_data directory. The -data is the output of subprocess.CompletedProcess objects in json format. - - Typical usage example: - - recorder = MockPrinter() - producer = GCloudProducer(args, recorder) - machines = producer.get_machines(1) - with open("my_file.json") as fd: - recorder.write_out(fd) - - reader = MockReader(filename) - producer = MockGcloudProducer(args, mock) - machines = producer.get_machines(1) - assert len(machines) == 1 -""" - -import io -import json -import subprocess - - -class MockPrinter(object): - """Handles printing Mock data for MockGcloudProducer. - - Attributes: - _records: list of json object records for printing - """ - - def __init__(self): - self._records = [] - - def record(self, entry: subprocess.CompletedProcess): - """Records data and strips out ip addresses.""" - - record = { - "args": entry.args, - "stdout": entry.stdout.decode("utf-8"), - "returncode": str(entry.returncode) - } - self._records.append(record) - - def write_out(self, fd: io.FileIO): - """Prints out the data into the given filepath.""" - fd.write(json.dumps(self._records, indent=4)) - - -class MockReader(object): - """Handles reading Mock data for MockGcloudProducer. - - Attributes: - _records: List[json] records read from the passed in file. - """ - - def __init__(self, filepath: str): - with open(filepath, "rb") as file: - self._records = json.loads(file.read()) - self._i = 0 - - def __iter__(self): - return self - - def __next__(self, args) -> subprocess.CompletedProcess: - """Returns the next record as a CompletedProcess.""" - if self._i < len(self._records): - record = self._records[self._i] - stdout = record["stdout"].encode("ascii") - returncode = int(record["returncode"]) - return subprocess.CompletedProcess( - args=args, returncode=returncode, stdout=stdout, stderr=b"") - raise StopIteration() diff --git a/benchmarks/harness/machine_producers/gcloud_producer.py b/benchmarks/harness/machine_producers/gcloud_producer.py deleted file mode 100644 index 44d72f575..000000000 --- a/benchmarks/harness/machine_producers/gcloud_producer.py +++ /dev/null @@ -1,250 +0,0 @@ -# python3 -# Copyright 2019 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""A machine producer which produces machine objects using `gcloud`. - -Machine producers produce valid harness.Machine objects which are backed by -real machines. This producer produces those machines on the given user's GCP -account using the `gcloud` tool. - -GCloudProducer creates instances on the given GCP account named like: -`machine-XXXXXXX-XXXX-XXXX-XXXXXXXXXXXX` in a randomized fashion such that name -collisions with user instances shouldn't happen. - - Typical usage example: - - producer = GCloudProducer(args) - machines = producer.get_machines(NUM_MACHINES) - # run stuff on machines with machines[i].run(CMD) - producer.release_machines(NUM_MACHINES) -""" -import datetime -import json -import subprocess -import threading -from typing import List, Dict, Any -import uuid - -from benchmarks.harness import machine -from benchmarks.harness.machine_producers import gcloud_mock_recorder -from benchmarks.harness.machine_producers import machine_producer - - -class GCloudProducer(machine_producer.MachineProducer): - """Implementation of MachineProducer backed by GCP. - - Produces Machine objects backed by GCP instances. - - Attributes: - image: image name as a string. - zone: string to a valid GCP zone. - machine_type: type of GCP to create (e.g. n1-standard-4). - installers: list of installers post-boot. - ssh_key_file: path to a valid ssh private key. See README on vaild ssh keys. - ssh_user: string of user name for ssh_key - ssh_password: string of password for ssh key - internal: if true, use internal IPs of instances. Used if bm-tools is - running on a GCP vm when a firewall is set for external IPs. - mock: a mock printer which will print mock data if required. Mock data is - recorded output from subprocess calls (returncode, stdout, args). - condition: mutex for this class around machine creation and deleteion. - """ - - def __init__(self, - image: str, - zone: str, - machine_type: str, - installers: List[str], - ssh_key_file: str, - ssh_user: str, - ssh_password: str, - internal: bool, - mock: gcloud_mock_recorder.MockPrinter = None): - self.image = image - self.zone = zone - self.machine_type = machine_type - self.installers = installers - self.ssh_key_file = ssh_key_file - self.ssh_user = ssh_user - self.ssh_password = ssh_password - self.internal = internal - self.mock = mock - self.condition = threading.Condition() - - def get_machines(self, num_machines: int) -> List[machine.Machine]: - """Returns requested number of machines backed by GCP instances.""" - if num_machines <= 0: - raise ValueError( - "Cannot ask for {num} machines!".format(num=num_machines)) - with self.condition: - names = self._get_unique_names(num_machines) - instances = self._build_instances(names) - self._add_ssh_key_to_instances(names) - machines = self._machines_from_instances(instances) - - # Install all bits in lock-step. - # - # This will perform paralell installations for however many machines we - # have, but it's easy to track errors because if installing (a, b, c), we - # won't install "c" until "b" is installed on all machines. - for installer in self.installers: - threads = [None] * len(machines) - results = [False] * len(machines) - for i in range(len(machines)): - threads[i] = threading.Thread( - target=machines[i].install, args=(installer, results, i)) - threads[i].start() - for thread in threads: - thread.join() - for result in results: - if not result: - raise NotImplementedError( - "Installers failed on at least one machine!") - - # Add this user to each machine's docker group. - for m in machines: - m.run("sudo setfacl -m user:$USER:rw /var/run/docker.sock") - - return machines - - def release_machines(self, machine_list: List[machine.Machine]): - """Releases the requested number of machines, deleting the instances.""" - if not machine_list: - return - cmd = "gcloud compute instances delete --quiet".split(" ") - names = [str(m) for m in machine_list] - cmd.extend(names) - cmd.append("--zone={zone}".format(zone=self.zone)) - self._run_command(cmd, detach=True) - - def _machines_from_instances( - self, instances: List[Dict[str, Any]]) -> List[machine.Machine]: - """Creates Machine Objects from json data describing created instances.""" - machines = [] - for instance in instances: - name = instance["name"] - external = instance["networkInterfaces"][0]["accessConfigs"][0]["natIP"] - internal = instance["networkInterfaces"][0]["networkIP"] - kwargs = { - "hostname": internal if self.internal else external, - "key_path": self.ssh_key_file, - "username": self.ssh_user, - "key_password": self.ssh_password - } - machines.append(machine.RemoteMachine(name=name, **kwargs)) - return machines - - def _get_unique_names(self, num_names) -> List[str]: - """Returns num_names unique names based on data from the GCP project.""" - return ["machine-" + str(uuid.uuid4()) for _ in range(0, num_names)] - - def _build_instances(self, names: List[str]) -> List[Dict[str, Any]]: - """Creates instances using gcloud command. - - Runs the command `gcloud compute instances create` and returns json data - on created instances on success. Creates len(names) instances, one for each - name. - - Args: - names: list of names of instances to create. - - Returns: - List of json data describing created machines. - """ - if not names: - raise ValueError( - "_build_instances cannot create instances without names.") - cmd = "gcloud compute instances create".split(" ") - cmd.extend(names) - cmd.append("--image=" + self.image) - cmd.append("--zone=" + self.zone) - cmd.append("--machine-type=" + self.machine_type) - res = self._run_command(cmd) - data = res.stdout - data = str(data, "utf-8") if isinstance(data, (bytes, bytearray)) else data - return json.loads(data) - - def _add_ssh_key_to_instances(self, names: List[str]) -> None: - """Adds ssh key to instances by calling gcloud ssh command. - - Runs the command `gcloud compute ssh instance_name` on list of images by - name. Tries to ssh into given instance. - - Args: - names: list of machine names to which to add the ssh-key - self.ssh_key_file. - - Raises: - subprocess.CalledProcessError: when underlying subprocess call returns an - error other than 255 (Connection closed by remote host). - TimeoutError: when 3 unsuccessful tries to ssh into the host return 255. - """ - for name in names: - cmd = "gcloud compute ssh {user}@{name}".format( - user=self.ssh_user, name=name).split(" ") - if self.internal: - cmd.append("--internal-ip") - cmd.append("--ssh-key-file={key}".format(key=self.ssh_key_file)) - cmd.append("--zone={zone}".format(zone=self.zone)) - cmd.append("--command=uname") - timeout = datetime.timedelta(seconds=5 * 60) - start = datetime.datetime.now() - while datetime.datetime.now() <= timeout + start: - try: - self._run_command(cmd) - break - except subprocess.CalledProcessError: - if datetime.datetime.now() > timeout + start: - raise TimeoutError( - "Could not SSH into instance after 5 min: {name}".format( - name=name)) - - def _run_command(self, - cmd: List[str], - detach: bool = False) -> [None, subprocess.CompletedProcess]: - """Runs command as a subprocess. - - Runs command as subprocess and returns the result. - If this has a mock recorder, use the record method to record the subprocess - call. - - Args: - cmd: command to be run as a list of strings. - detach: if True, run the child process and don't wait for it to return. - - Returns: - Completed process object to be parsed by caller or None if detach=True. - - Raises: - CalledProcessError: if subprocess.run returns an error. - """ - cmd = cmd + ["--format=json"] - if detach: - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - if self.mock: - out, _ = p.communicate() - self.mock.record( - subprocess.CompletedProcess( - returncode=p.returncode, stdout=out, args=p.args)) - return - - res = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - if self.mock: - self.mock.record(res) - if res.returncode != 0: - raise subprocess.CalledProcessError( - cmd=" ".join(res.args), - output=res.stdout, - stderr=res.stderr, - returncode=res.returncode) - return res diff --git a/benchmarks/harness/machine_producers/gcloud_producer_test.py b/benchmarks/harness/machine_producers/gcloud_producer_test.py deleted file mode 100644 index c8adb2bdc..000000000 --- a/benchmarks/harness/machine_producers/gcloud_producer_test.py +++ /dev/null @@ -1,48 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Tests GCloudProducer using mock data. - -GCloudProducer produces machines using 'get_machines' and 'release_machines' -methods. The tests check recorded data (jsonified subprocess.CompletedProcess -objects) of the producer producing one and five machines. -""" -import os -import types - -from benchmarks.harness.machine_producers import machine_producer -from benchmarks.harness.machine_producers import mock_producer - -TEST_DIR = os.path.dirname(__file__) - - -def run_get_release(producer: machine_producer.MachineProducer, - num_machines: int, - validator: types.FunctionType = None): - machines = producer.get_machines(num_machines) - assert len(machines) == num_machines - if validator: - validator(machines=machines, cmd="uname -a", workload=None) - producer.release_machines(machines) - - -def test_run_one(): - mock = mock_producer.MockReader(TEST_DIR + "get_one.json") - producer = mock_producer.MockGCloudProducer(mock) - run_get_release(producer, 1) - - -def test_run_five(): - mock = mock_producer.MockReader(TEST_DIR + "get_five.json") - producer = mock_producer.MockGCloudProducer(mock) - run_get_release(producer, 5) diff --git a/benchmarks/harness/machine_producers/machine_producer.py b/benchmarks/harness/machine_producers/machine_producer.py deleted file mode 100644 index f5591c026..000000000 --- a/benchmarks/harness/machine_producers/machine_producer.py +++ /dev/null @@ -1,51 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Abstract types.""" - -import threading -from typing import List - -from benchmarks.harness import machine - - -class MachineProducer: - """Abstract Machine producer.""" - - def get_machines(self, num_machines: int) -> List[machine.Machine]: - """Returns the requested number of machines.""" - raise NotImplementedError - - def release_machines(self, machine_list: List[machine.Machine]): - """Releases the given set of machines.""" - raise NotImplementedError - - -class LocalMachineProducer(MachineProducer): - """Produces Local Machines.""" - - def __init__(self, limit: int): - self.limit_sem = threading.Semaphore(value=limit) - - def get_machines(self, num_machines: int) -> List[machine.Machine]: - """Returns the request number of MockMachines.""" - - self.limit_sem.acquire() - return [machine.LocalMachine("local") for _ in range(num_machines)] - - def release_machines(self, machine_list: List[machine.MockMachine]): - """No-op.""" - if not machine_list: - raise ValueError("Cannot release an empty list!") - self.limit_sem.release() - machine_list.clear() diff --git a/benchmarks/harness/machine_producers/mock_producer.py b/benchmarks/harness/machine_producers/mock_producer.py deleted file mode 100644 index 37e9cb4b7..000000000 --- a/benchmarks/harness/machine_producers/mock_producer.py +++ /dev/null @@ -1,52 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Producers of mocks.""" - -from typing import List, Any - -from benchmarks.harness import machine -from benchmarks.harness.machine_producers import gcloud_mock_recorder -from benchmarks.harness.machine_producers import gcloud_producer -from benchmarks.harness.machine_producers import machine_producer - - -class MockMachineProducer(machine_producer.MachineProducer): - """Produces MockMachine objects.""" - - def get_machines(self, num_machines: int) -> List[machine.MockMachine]: - """Returns the request number of MockMachines.""" - return [machine.MockMachine() for i in range(num_machines)] - - def release_machines(self, machine_list: List[machine.MockMachine]): - """No-op.""" - return - - -class MockGCloudProducer(gcloud_producer.GCloudProducer): - """Mocks GCloudProducer for testing purposes.""" - - def __init__(self, mock: gcloud_mock_recorder.MockReader, **kwargs): - gcloud_producer.GCloudProducer.__init__( - self, project="mock", ssh_private_key_path="mock", **kwargs) - self.mock = mock - - def _validate_ssh_file(self): - pass - - def _run_command(self, cmd): - return self.mock.pop(cmd) - - def _machines_from_instances( - self, instances: List[Any]) -> List[machine.MockMachine]: - return [machine.MockMachine() for _ in instances] diff --git a/benchmarks/harness/machine_producers/testdata/get_five.json b/benchmarks/harness/machine_producers/testdata/get_five.json deleted file mode 100644 index 32bad1b06..000000000 --- a/benchmarks/harness/machine_producers/testdata/get_five.json +++ /dev/null @@ -1,211 +0,0 @@ -[ - { - "args": [ - "gcloud", - "compute", - "instances", - "list", - "--project", - "project", - "--format=json" - ], - "stdout": "[{\"name\":\"name\", \"networkInterfaces\":[{\"accessConfigs\":[{\"natIP\":\"0.0.0.0\"}]}]},{\"name\":\"name\", \"networkInterfaces\":[{\"accessConfigs\":[{\"natIP\":\"0.0.0.0\"}]}]},{\"name\":\"name\", \"networkInterfaces\":[{\"accessConfigs\":[{\"natIP\":\"0.0.0.0\"}]}]},{\"name\":\"name\", \"networkInterfaces\":[{\"accessConfigs\":[{\"natIP\":\"0.0.0.0\"}]}]},{\"name\":\"name\", \"networkInterfaces\":[{\"accessConfigs\":{\"natIP\":\"0.0.0.0\"}]}]}]", - "returncode": "0" - }, - { - "args": [ - "gcloud", - "compute", - "instances", - "create", - "machine-42c9bf6e-8d45-4c37-b1c0-7e4fdcf530fc", - "machine-5f28f145-cc2d-427d-9cbf-428d164cdb92", - "machine-da5859b5-bae6-435d-8005-0202d6f6e065", - "machine-880a8a2f-918c-4f9e-a43c-ed3c8e02ea05", - "machine-1149147d-71e2-43ea-8fe1-49256e5c441c", - "--preemptible", - "--image=ubuntu-1910-eoan-v20191204", - "--zone=us-west1-b", - "--image-project=ubuntu-os-cloud", - "--format=json" - ], - "stdout": "[{\"name\":\"name\", \"networkInterfaces\":[{\"accessConfigs\":[{\"natIP\":\"0.0.0.0\"}]}]},{\"name\":\"name\", \"networkInterfaces\":[{\"accessConfigs\":[{\"natIP\":\"0.0.0.0\"}]}]},{\"name\":\"name\", \"networkInterfaces\":[{\"accessConfigs\":[{\"natIP\":\"0.0.0.0\"}]}]},{\"name\":\"name\", \"networkInterfaces\":[{\"accessConfigs\":[{\"natIP\":\"0.0.0.0\"}]}]},{\"name\":\"name\", \"networkInterfaces\":[{\"accessConfigs\":[{\"natIP\":\"0.0.0.0\"}]}]}]", - "returncode": "0" - }, - { - "args": [ - "gcloud", - "compute", - "instances", - "start", - "machine-42c9bf6e-8d45-4c37-b1c0-7e4fdcf530fc", - "machine-5f28f145-cc2d-427d-9cbf-428d164cdb92", - "machine-da5859b5-bae6-435d-8005-0202d6f6e065", - "machine-880a8a2f-918c-4f9e-a43c-ed3c8e02ea05", - "machine-1149147d-71e2-43ea-8fe1-49256e5c441c", - "--zone=us-west1-b", - "--project=project", - "--format=json" - ], - "stdout": "[{\"name\":\"name\", \"networkInterfaces\":[{\"accessConfigs\":[{\"natIP\":\"0.0.0.0\"}]}]},{\"name\":\"name\", \"networkInterfaces\":[{\"accessConfigs\":[{\"natIP\":\"0.0.0.0\"}]}]},{\"name\":\"name\", \"networkInterfaces\":[{\"accessConfigs\":[{\"natIP\":\"0.0.0.0\"}]}]},{\"name\":\"name\", \"networkInterfaces\":[{\"accessConfigs\":[{\"natIP\":\"0.0.0.0\"}]}]},{\"name\":\"name\", \"networkInterfaces\":[{\"accessConfigs\":[{\"natIP\":\"0.0.0.0\"}]}]}]", - "returncode": "0" - }, - { - "args": [ - "gcloud", - "compute", - "ssh", - "machine-42c9bf6e-8d45-4c37-b1c0-7e4fdcf530fc", - "--ssh-key-file=/usr/local/google/home/user/.ssh/benchmark-tools", - "--zone=us-west1-b", - "--command=uname", - "--format=json" - ], - "stdout": "", - "returncode": "255" - }, - { - "args": [ - "gcloud", - "compute", - "ssh", - "machine-42c9bf6e-8d45-4c37-b1c0-7e4fdcf530fc", - "--ssh-key-file=/usr/local/google/home/user/.ssh/benchmark-tools", - "--zone=us-west1-b", - "--command=uname", - "--format=json" - ], - "stdout": "", - "returncode": "255" - }, - { - "args": [ - "gcloud", - "compute", - "ssh", - "machine-42c9bf6e-8d45-4c37-b1c0-7e4fdcf530fc", - "--ssh-key-file=/usr/local/google/home/user/.ssh/benchmark-tools", - "--zone=us-west1-b", - "--command=uname", - "--format=json" - ], - "stdout": "", - "returncode": "255" - }, - { - "args": [ - "gcloud", - "compute", - "ssh", - "machine-42c9bf6e-8d45-4c37-b1c0-7e4fdcf530fc", - "--ssh-key-file=/usr/local/google/home/user/.ssh/benchmark-tools", - "--zone=us-west1-b", - "--command=uname", - "--format=json" - ], - "stdout": "", - "returncode": "255" - }, - { - "args": [ - "gcloud", - "compute", - "ssh", - "machine-42c9bf6e-8d45-4c37-b1c0-7e4fdcf530fc", - "--ssh-key-file=/usr/local/google/home/user/.ssh/benchmark-tools", - "--zone=us-west1-b", - "--command=uname", - "--format=json" - ], - "stdout": "", - "returncode": "255" - }, - { - "args": [ - "gcloud", - "compute", - "ssh", - "machine-42c9bf6e-8d45-4c37-b1c0-7e4fdcf530fc", - "--ssh-key-file=/usr/local/google/home/user/.ssh/benchmark-tools", - "--zone=us-west1-b", - "--command=uname", - "--format=json" - ], - "stdout": "Linux\n[]\n", - "returncode": "0" - }, - { - "args": [ - "gcloud", - "compute", - "ssh", - "machine-5f28f145-cc2d-427d-9cbf-428d164cdb92", - "--ssh-key-file=/usr/local/google/home/user/.ssh/benchmark-tools", - "--zone=us-west1-b", - "--command=uname", - "--format=json" - ], - "stdout": "Linux\n[]\n", - "returncode": "0" - }, - { - "args": [ - "gcloud", - "compute", - "ssh", - "machine-da5859b5-bae6-435d-8005-0202d6f6e065", - "--ssh-key-file=/usr/local/google/home/user/.ssh/benchmark-tools", - "--zone=us-west1-b", - "--command=uname", - "--format=json" - ], - "stdout": "Linux\n[]\n", - "returncode": "0" - }, - { - "args": [ - "gcloud", - "compute", - "ssh", - "machine-880a8a2f-918c-4f9e-a43c-ed3c8e02ea05", - "--ssh-key-file=/usr/local/google/home/user/.ssh/benchmark-tools", - "--zone=us-west1-b", - "--command=uname", - "--format=json" - ], - "stdout": "Linux\n[]\n", - "returncode": "0" - }, - { - "args": [ - "gcloud", - "compute", - "ssh", - "machine-1149147d-71e2-43ea-8fe1-49256e5c441c", - "--ssh-key-file=/usr/local/google/home/user/.ssh/benchmark-tools", - "--zone=us-west1-b", - "--command=uname", - "--format=json" - ], - "stdout": "Linux\n[]\n", - "returncode": "0" - }, - { - "args": [ - "gcloud", - "compute", - "instances", - "delete", - "--quiet", - "machine-42c9bf6e-8d45-4c37-b1c0-7e4fdcf530fc", - "machine-5f28f145-cc2d-427d-9cbf-428d164cdb92", - "machine-da5859b5-bae6-435d-8005-0202d6f6e065", - "machine-880a8a2f-918c-4f9e-a43c-ed3c8e02ea05", - "machine-1149147d-71e2-43ea-8fe1-49256e5c441c", - "--zone=us-west1-b", - "--format=json" - ], - "stdout": "[]\n", - "returncode": "0" - } -] diff --git a/benchmarks/harness/machine_producers/testdata/get_one.json b/benchmarks/harness/machine_producers/testdata/get_one.json deleted file mode 100644 index c359c19c8..000000000 --- a/benchmarks/harness/machine_producers/testdata/get_one.json +++ /dev/null @@ -1,145 +0,0 @@ -[ - { - "args": [ - "gcloud", - "compute", - "instances", - "list", - "--project", - "linux-testing-user", - "--format=json" - ], - "stdout": "[{\"name\":\"name\", \"networkInterfaces\":[{\"accessConfigs\":[{\"natIP\":\"0.0.0.0\"}]}]}]", - - "returncode": "0" - }, - { - "args": [ - "gcloud", - "compute", - "instances", - "create", - "machine-129dfcf9-b05b-4c16-a4cd-21353b570ddc", - "--preemptible", - "--image=ubuntu-1910-eoan-v20191204", - "--zone=us-west1-b", - "--image-project=ubuntu-os-cloud", - "--format=json" - ], - "stdout": "[{\"name\":\"name\", \"networkInterfaces\":[{\"accessConfigs\":[{\"natIP\":\"0.0.0.0\"}]}]}]", - "returncode": "0" - }, - { - "args": [ - "gcloud", - "compute", - "instances", - "start", - "machine-129dfcf9-b05b-4c16-a4cd-21353b570ddc", - "--zone=us-west1-b", - "--project=linux-testing-user", - "--format=json" - ], - "stdout": "[{\"name\":\"name\", \"networkInterfaces\":[{\"accessConfigs\":[{\"natIP\":\"0.0.0.0\"}]}]}]", - - "returncode": "0" - }, - { - "args": [ - "gcloud", - "compute", - "ssh", - "machine-129dfcf9-b05b-4c16-a4cd-21353b570ddc", - "--ssh-key-file=/usr/local/google/home/user/.ssh/benchmark-tools", - "--zone=us-west1-b", - "--command=uname", - "--format=json" - ], - "stdout": "", - "returncode": "255" - }, - { - "args": [ - "gcloud", - "compute", - "ssh", - "machine-129dfcf9-b05b-4c16-a4cd-21353b570ddc", - "--ssh-key-file=/usr/local/google/home/user/.ssh/benchmark-tools", - "--zone=us-west1-b", - "--command=uname", - "--format=json" - ], - "stdout": "", - "returncode": "255" - }, - { - "args": [ - "gcloud", - "compute", - "ssh", - "machine-129dfcf9-b05b-4c16-a4cd-21353b570ddc", - "--ssh-key-file=/usr/local/google/home/user/.ssh/benchmark-tools", - "--zone=us-west1-b", - "--command=uname", - "--format=json" - ], - "stdout": "", - "returncode": "255" - }, - { - "args": [ - "gcloud", - "compute", - "ssh", - "machine-129dfcf9-b05b-4c16-a4cd-21353b570ddc", - "--ssh-key-file=/usr/local/google/home/user/.ssh/benchmark-tools", - "--zone=us-west1-b", - "--command=uname", - "--format=json" - ], - "stdout": "", - "returncode": "255" - }, - { - "args": [ - "gcloud", - "compute", - "ssh", - "machine-129dfcf9-b05b-4c16-a4cd-21353b570ddc", - "--ssh-key-file=/usr/local/google/home/user/.ssh/benchmark-tools", - "--zone=us-west1-b", - "--command=uname", - "--format=json" - ], - "stdout": "", - "returncode": "255" - }, - { - "args": [ - "gcloud", - "compute", - "ssh", - "machine-129dfcf9-b05b-4c16-a4cd-21353b570ddc", - "--ssh-key-file=/usr/local/google/home/user/.ssh/benchmark-tools", - "--zone=us-west1-b", - "--command=uname", - "--format=json" - ], - "stdout": "Linux\n[]\n", - "returncode": "0" - }, - { - "args": [ - "gcloud", - "compute", - "instances", - "delete", - "--quiet", - "machine-129dfcf9-b05b-4c16-a4cd-21353b570ddc", - "--zone=us-west1-b", - "--format=json" - ], - "stdout": "[]\n", - "returncode": "0" - } -] diff --git a/benchmarks/harness/machine_producers/yaml_producer.py b/benchmarks/harness/machine_producers/yaml_producer.py deleted file mode 100644 index 5d334e480..000000000 --- a/benchmarks/harness/machine_producers/yaml_producer.py +++ /dev/null @@ -1,106 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Producers based on yaml files.""" - -import os -import threading -from typing import Dict -from typing import List - -import yaml - -from benchmarks.harness import machine -from benchmarks.harness.machine_producers import machine_producer - - -class YamlMachineProducer(machine_producer.MachineProducer): - """Loads machines from a yaml file.""" - - def __init__(self, path: str): - self.machines = build_machines(path) - self.max_machines = len(self.machines) - self.machine_condition = threading.Condition() - - def get_machines(self, num_machines: int) -> List[machine.Machine]: - if num_machines > self.max_machines: - raise ValueError( - "Insufficient Ammount of Machines. {ask} asked for and have {max_num} max." - .format(ask=num_machines, max_num=self.max_machines)) - - with self.machine_condition: - while not self._enough_machines(num_machines): - self.machine_condition.wait(timeout=1) - return [self.machines.pop(0) for _ in range(num_machines)] - - def release_machines(self, machine_list: List[machine.Machine]): - with self.machine_condition: - while machine_list: - next_machine = machine_list.pop() - self.machines.append(next_machine) - self.machine_condition.notify() - - def _enough_machines(self, ask: int): - return ask <= len(self.machines) - - -def build_machines(path: str, num_machines: str = -1) -> List[machine.Machine]: - """Builds machine objects defined by the yaml file "path". - - Args: - path: The path to a yaml file which defines machines. - num_machines: Optional limit on how many machine objects to build. - - Returns: - Machine objects in a list. - - If num_machines is set, len(machines) <= num_machines. - """ - data = parse_yaml(path) - machines = [] - for key, value in data.items(): - if len(machines) == num_machines: - return machines - if isinstance(value, dict): - machines.append(machine.RemoteMachine(key, **value)) - else: - machines.append(machine.LocalMachine(key)) - return machines - - -def parse_yaml(path: str) -> Dict[str, Dict[str, str]]: - """Parse the yaml file pointed by path. - - Args: - path: The path to yaml file. - - Returns: - The contents of the yaml file as a dictionary. - """ - data = get_file_contents(path) - return yaml.load(data, Loader=yaml.Loader) - - -def get_file_contents(path: str) -> str: - """Dumps the file contents to a string and returns them. - - Args: - path: The path to dump. - - Returns: - The file contents as a string. - """ - if not os.path.isabs(path): - path = os.path.abspath(path) - with open(path) as input_file: - return input_file.read() diff --git a/benchmarks/harness/ssh_connection.py b/benchmarks/harness/ssh_connection.py deleted file mode 100644 index b8c8e42d4..000000000 --- a/benchmarks/harness/ssh_connection.py +++ /dev/null @@ -1,126 +0,0 @@ -# python3 -# Copyright 2019 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""SSHConnection handles the details of SSH connections.""" - -import logging -import os -import warnings - -import paramiko - -from benchmarks import harness - -# Get rid of paramiko Cryptography Warnings. -warnings.filterwarnings(action="ignore", module=".*paramiko.*") - -log = logging.getLogger(__name__) - - -def send_one_file(client: paramiko.SSHClient, path: str, - remote_dir: str) -> str: - """Sends a single file via an SSH client. - - Args: - client: The existing SSH client. - path: The local path. - remote_dir: The remote directory. - - Returns: - :return: The remote path as a string. - """ - filename = path.split("/").pop() - if remote_dir != ".": - client.exec_command("mkdir -p " + remote_dir) - with client.open_sftp() as ftp_client: - ftp_client.put(path, os.path.join(remote_dir, filename)) - return os.path.join(remote_dir, filename) - - -class SSHConnection: - """SSH connection to a remote machine.""" - - def __init__(self, name: str, hostname: str, key_path: str, username: str, - **kwargs): - """Sets up a paramiko ssh connection to the given hostname.""" - self._name = name # Unused. - self._hostname = hostname - self._username = username - self._key_path = key_path # RSA Key path - self._kwargs = kwargs - # SSHConnection wraps paramiko. paramiko supports RSA, ECDSA, and Ed25519 - # keys, and we've chosen to only suport and require RSA keys. paramiko - # supports RSA keys that begin with '----BEGIN RSAKEY----'. - # https://stackoverflow.com/questions/53600581/ssh-key-generated-by-ssh-keygen-is-not-recognized-by-paramiko - self.rsa_key = self._rsa() - self.run("true") # Validate. - - def _client(self) -> paramiko.SSHClient: - """Returns a connected SSH client.""" - client = paramiko.SSHClient() - client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - client.connect( - hostname=self._hostname, - port=22, - username=self._username, - pkey=self.rsa_key, - allow_agent=False, - look_for_keys=False) - return client - - def _rsa(self): - if "key_password" in self._kwargs: - password = self._kwargs["key_password"] - else: - password = None - rsa = paramiko.RSAKey.from_private_key_file(self._key_path, password) - return rsa - - def run(self, cmd: str) -> (str, str): - """Runs a command via ssh. - - Args: - cmd: The shell command to run. - - Returns: - The contents of stdout and stderr. - """ - with self._client() as client: - log.info("running command: %s", cmd) - _, stdout, stderr = client.exec_command(command=cmd) - log.info("returned status: %d", stdout.channel.recv_exit_status()) - stdout = stdout.read().decode("utf-8") - stderr = stderr.read().decode("utf-8") - log.info("stdout: %s", stdout) - log.info("stderr: %s", stderr) - return stdout, stderr - - def send_workload(self, name: str) -> str: - """Sends a workload tarball to the remote machine. - - Args: - name: The workload name. - - Returns: - The remote path. - """ - with self._client() as client: - return send_one_file(client, harness.LOCAL_WORKLOADS_PATH.format(name), - harness.REMOTE_WORKLOADS_PATH.format(name)) - - def send_installers(self) -> str: - with self._client() as client: - return send_one_file( - client, - path=harness.INSTALLER_ARCHIVE, - remote_dir=harness.REMOTE_INSTALLERS_PATH) diff --git a/benchmarks/harness/tunnel_dispatcher.py b/benchmarks/harness/tunnel_dispatcher.py deleted file mode 100644 index c56fd022a..000000000 --- a/benchmarks/harness/tunnel_dispatcher.py +++ /dev/null @@ -1,122 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Tunnel handles setting up connections to remote machines. - -Tunnel dispatcher is a wrapper around the connection from a local UNIX socket -and a remote UNIX socket via SSH with port forwarding. This is done to -initialize the pythonic dockerpy client to run containers on the remote host by -connecting to /var/run/docker.sock (where Docker is listening). Tunnel -dispatcher sets up the local UNIX socket and calls the `ssh` command as a -subprocess, and holds a reference to that subprocess. It manages clean-up on -exit as best it can by killing the ssh subprocess and deleting the local UNIX -socket,stored in /tmp for easy cleanup in most systems if this fails. - - Typical usage example: - - t = Tunnel(name, **kwargs) - t.connect() - client = t.get_docker_client() # - client.containers.run("ubuntu", "echo hello world") - -""" - -import os -import tempfile -import time - -import docker -import pexpect - -SSH_TUNNEL_COMMAND = """ssh - -o GlobalKnownHostsFile=/dev/null - -o UserKnownHostsFile=/dev/null - -o StrictHostKeyChecking=no - -o IdentitiesOnly=yes - -nNT -L {filename}:/var/run/docker.sock - -i {key_path} - {username}@{hostname}""" - - -class Tunnel(object): - """The tunnel object represents the tunnel via ssh. - - This connects a local unix domain socket with a remote socket. - - Attributes: - _filename: a temporary name of the UNIX socket prefixed by the name - argument. - _hostname: the IP or resolvable hostname of the remote host. - _username: the username of the ssh_key used to run ssh. - _key_path: path to a valid key. - _key_password: optional password to the ssh key in _key_path - _process: holds reference to the ssh subprocess created. - - Returns: - The new minimum port. - - Raises: - ConnectionError: If no available port is found. - """ - - def __init__(self, - name: str, - hostname: str, - username: str, - key_path: str, - key_password: str = "", - **kwargs): - self._filename = tempfile.NamedTemporaryFile(prefix=name).name - self._hostname = hostname - self._username = username - self._key_path = key_path - self._key_password = key_password - self._kwargs = kwargs - self._process = None - - def connect(self): - """Connects the SSH tunnel and stores the subprocess reference in _process.""" - cmd = SSH_TUNNEL_COMMAND.format( - filename=self._filename, - key_path=self._key_path, - username=self._username, - hostname=self._hostname) - self._process = pexpect.spawn(cmd, timeout=10) - - # If given a password, assume we'll be asked for it. - if self._key_password: - self._process.expect(["Enter passphrase for key .*: "]) - self._process.sendline(self._key_password) - - while True: - # Wait for the tunnel to appear. - if self._process.exitstatus is not None: - raise ConnectionError("Error in setting up ssh tunnel") - if os.path.exists(self._filename): - return - time.sleep(0.1) - - def path(self): - """Return the socket file.""" - return self._filename - - def get_docker_client(self): - """Returns a docker client for this Tunnel.""" - return docker.DockerClient(base_url="unix:/" + self._filename) - - def __del__(self): - """Closes the ssh connection process and deletes the socket file.""" - if self._process: - self._process.close() - if os.path.exists(self._filename): - os.remove(self._filename) diff --git a/benchmarks/requirements.txt b/benchmarks/requirements.txt deleted file mode 100644 index 577eb1a2e..000000000 --- a/benchmarks/requirements.txt +++ /dev/null @@ -1,32 +0,0 @@ -asn1crypto==1.2.0 -atomicwrites==1.3.0 -attrs==19.3.0 -bcrypt==3.1.7 -certifi==2019.9.11 -cffi==1.13.2 -chardet==3.0.4 -Click==7.0 -cryptography==2.8 -docker==3.7.0 -docker-pycreds==0.4.0 -idna==2.8 -importlib-metadata==0.23 -more-itertools==7.2.0 -packaging==19.2 -paramiko==2.6.0 -pathlib2==2.3.5 -pexpect==4.7.0 -pluggy==0.9.0 -ptyprocess==0.6.0 -py==1.8.0 -pycparser==2.19 -PyNaCl==1.3.0 -pyparsing==2.4.5 -pytest==4.3.0 -PyYAML==5.1.2 -requests==2.22.0 -six==1.13.0 -urllib3==1.25.7 -wcwidth==0.1.7 -websocket-client==0.56.0 -zipp==0.6.0 diff --git a/benchmarks/run.py b/benchmarks/run.py deleted file mode 100644 index a22eb8641..000000000 --- a/benchmarks/run.py +++ /dev/null @@ -1,19 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Benchmark runner.""" - -from benchmarks import runner - -if __name__ == "__main__": - runner.runner() diff --git a/benchmarks/runner/BUILD b/benchmarks/runner/BUILD deleted file mode 100644 index 471debfdf..000000000 --- a/benchmarks/runner/BUILD +++ /dev/null @@ -1,56 +0,0 @@ -load("//tools:defs.bzl", "py_library", "py_requirement", "py_test") -load("//benchmarks:defs.bzl", "test_deps") - -package(licenses = ["notice"]) - -py_library( - name = "runner", - srcs = ["__init__.py"], - data = [ - "//benchmarks/workloads:files", - ], - visibility = ["//benchmarks:__pkg__"], - deps = [ - ":commands", - "//benchmarks/harness:benchmark_driver", - "//benchmarks/harness/machine_producers:machine_producer", - "//benchmarks/harness/machine_producers:mock_producer", - "//benchmarks/harness/machine_producers:yaml_producer", - "//benchmarks/suites", - "//benchmarks/suites:absl", - "//benchmarks/suites:density", - "//benchmarks/suites:fio", - "//benchmarks/suites:helpers", - "//benchmarks/suites:http", - "//benchmarks/suites:media", - "//benchmarks/suites:ml", - "//benchmarks/suites:network", - "//benchmarks/suites:redis", - "//benchmarks/suites:startup", - "//benchmarks/suites:sysbench", - "//benchmarks/suites:syscall", - py_requirement("click"), - ], -) - -py_library( - name = "commands", - srcs = ["commands.py"], - deps = [ - py_requirement("click"), - ], -) - -py_test( - name = "runner_test", - srcs = ["runner_test.py"], - python_version = "PY3", - tags = [ - "local", - "manual", - ], - deps = test_deps + [ - ":runner", - py_requirement("click"), - ], -) diff --git a/benchmarks/runner/__init__.py b/benchmarks/runner/__init__.py deleted file mode 100644 index fc59cf505..000000000 --- a/benchmarks/runner/__init__.py +++ /dev/null @@ -1,308 +0,0 @@ -# python3 -# Copyright 2019 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""High-level benchmark utility.""" - -import copy -import csv -import logging -import pkgutil -import pydoc -import re -import subprocess -import sys -import types -from typing import List -from typing import Tuple - -import click - -from benchmarks import harness -from benchmarks import suites -from benchmarks.harness import benchmark_driver -from benchmarks.harness.machine_producers import gcloud_producer -from benchmarks.harness.machine_producers import machine_producer -from benchmarks.harness.machine_producers import mock_producer -from benchmarks.harness.machine_producers import yaml_producer -from benchmarks.runner import commands - - -@click.group() -@click.option( - "--verbose/--no-verbose", default=False, help="Enable verbose logging.") -@click.option("--debug/--no-debug", default=False, help="Enable debug logging.") -def runner(verbose: bool = False, debug: bool = False): - """Run distributed benchmarks. - - See the run and list commands for details. - - Args: - verbose: Enable verbose logging. - debug: Enable debug logging (supercedes verbose). - """ - if debug: - logging.basicConfig(level=logging.DEBUG) - elif verbose: - logging.basicConfig(level=logging.INFO) - - -def find_benchmarks( - regex: str) -> List[Tuple[str, types.ModuleType, types.FunctionType]]: - """Finds all available benchmarks. - - Args: - regex: A regular expression to match. - - Returns: - A (short_name, module, function) tuple for each match. - """ - pkgs = pkgutil.walk_packages(suites.__path__, suites.__name__ + ".") - found = [] - for _, name, _ in pkgs: - mod = pydoc.locate(name) - funcs = [ - getattr(mod, x) - for x in dir(mod) - if suites.is_benchmark(getattr(mod, x)) - ] - for func in funcs: - # Use the short_name with the benchmarks. prefix stripped. - prefix_len = len(suites.__name__ + ".") - short_name = mod.__name__[prefix_len:] + "." + func.__name__ - # Add to the list if a pattern is provided. - if re.compile(regex).match(short_name): - found.append((short_name, mod, func)) - return found - - -@runner.command("list") -@click.argument("method", nargs=-1) -def list_all(method): - """Lists available benchmarks.""" - if not method: - method = ".*" - else: - method = "(" + ",".join(method) + ")" - for (short_name, _, func) in find_benchmarks(method): - print("Benchmark %s:" % short_name) - metrics = suites.benchmark_metrics(func) - if func.__doc__: - print(" " + func.__doc__.lstrip().rstrip()) - if metrics: - print("\n Metrics:") - for metric in metrics: - print("\t{name}: {doc}".format(name=metric[0], doc=metric[1])) - print("\n") - - -@runner.command("run-local", commands.LocalCommand) -@click.pass_context -def run_local(ctx, limit: float, **kwargs): - """Runs benchmarks locally.""" - run(ctx, machine_producer.LocalMachineProducer(limit=limit), **kwargs) - - -@runner.command("run-mock", commands.RunCommand) -@click.pass_context -def run_mock(ctx, **kwargs): - """Runs benchmarks on Mock machines. Used for testing.""" - run(ctx, mock_producer.MockMachineProducer(), **kwargs) - - -@runner.command("run-gcp", commands.GCPCommand) -@click.pass_context -def run_gcp(ctx, image_file: str, zone_file: str, internal: bool, - machine_type: str, installers: List[str], **kwargs): - """Runs all benchmarks on GCP instances.""" - - # Resolve all files. - image = subprocess.check_output([image_file]).rstrip() - zone = subprocess.check_output([zone_file]).rstrip() - key_file = harness.make_key() - - producer = gcloud_producer.GCloudProducer( - image, - zone, - machine_type, - installers, - ssh_key_file=key_file, - ssh_user=harness.DEFAULT_USER, - ssh_password="", - internal=internal) - - try: - run(ctx, producer, **kwargs) - finally: - harness.delete_key() - - -def run(ctx, producer: machine_producer.MachineProducer, method: str, runs: int, - runtime: List[str], metric: List[str], stat: str, **kwargs): - """Runs arbitrary benchmarks. - - All unknown command line flags are passed through to the underlying benchmark - method. Flags may be specified multiple times, in which case it is considered - a "dimension" for the test, and a comma-separated table will be emitted - instead of a single result. - - See the output of list to see available metrics for any given benchmark - method. The method parameter is a regular expression that will match against - available benchmarks. If multiple benchmarks match, then that is considered a - distinct "dimension" for the test. - - All benchmarks are run in parallel where possible, but have exclusive - ownership over the individual machines. - - Every benchmark method will be run the times indicated by --runs. - - Args: - ctx: Click context. - producer: A Machine Producer from which to get Machines. - method: A regular expression for methods to be run. - runs: Number of runs. - runtime: A list of runtimes to test. - metric: A list of metrics to extract. - stat: The class of statistics to extract. - **kwargs: Dimensions to test. - """ - # First, calculate additional arguments. - # - # This essentially calculates any arguments that appear multiple times, and - # moves those to the "dimensions" dictionary, which maps to lists. These - # dimensions are then iterated over to generate the relevant csv output. - dimensions = {} - - if stat not in ["median", "all", "meanstd"]: - raise ValueError("Illegal value for --result, see help.") - - def squish(key: str, value: str): - """Collapse an argument into kwargs or dimensions.""" - if key in dimensions: - # Extend an existing dimension. - dimensions[key].append(value) - elif key in kwargs: - # Create a new dimension. - dimensions[key] = [kwargs[key], value] - del kwargs[key] - else: - # A single value. - kwargs[key] = value - - for item in ctx.args: - if "=" in method: - # This must be the method. The method is simply set to the first - # non-matching argument, which we're also parsing here. - item, method = method, item - if "=" not in item: - logging.error("illegal argument: %s", item) - sys.exit(1) - (key, value) = item.lstrip("-").split("=", 1) - squish(key, value) - - # Convert runtime and metric to dimensions. - # - # They exist only in the arguments above for documentation purposes. - # Essentially here we are treating them like anything else. Note however, - # that an empty set here will result in a dimension. This is important for - # metrics, where an empty set actually means all metrics. - def fold(key: str, value, allow_flatten=False): - """Collapse a list value into kwargs or dimensions.""" - if len(value) == 1 and allow_flatten: - kwargs[key] = value[0] - else: - dimensions[key] = value - - fold("runtime", runtime, allow_flatten=True) - fold("metric", metric) - - # Lookup the methods. - # - # We match the method parameter to a regular expression. This allows you to - # do things like `run --mock .*` for a broad test. Note that we track the - # short_names in the dimensions here, and look up again in the recursion. - methods = { - short_name: func for (short_name, _, func) in find_benchmarks(method) - } - if not methods: - # Must match at least one method. - logging.error("no matching benchmarks for %s: try list.", method) - sys.exit(1) - fold("method", list(methods.keys()), allow_flatten=True) - - # Spin up the drivers. - # - # We ensure that metric is the last entry, because we have special behavior. - # They actually run the test once and the benchmark is a generator that - # produces all viable metrics. - dimension_keys = list(dimensions.keys()) - if "metric" in dimension_keys: - dimension_keys.remove("metric") - dimension_keys.append("metric") - drivers = [] - - def _start(keywords, finished, left): - """Runs a test across dimensions recursively.""" - # Resolve the method fully, it starts as a string. - if "method" in keywords and isinstance(keywords["method"], str): - keywords["method"] = methods[keywords["method"]] - # Is this a non-recursive case? - if not left: - driver = benchmark_driver.BenchmarkDriver(producer, runs=runs, **keywords) - driver.start() - drivers.append((finished, driver)) - else: - # Recurse on the next dimension. - current, left = left[0], left[1:] - keywords = copy.deepcopy(keywords) - if current == "metric": - # We use a generator, popped below. Note that metric is - # guaranteed to be the last element here, and we will provide - # the value for 'done' below when generating the csv. - keywords[current] = dimensions[current] - _start(keywords, finished, left) - else: - # Generate manually. - for value in dimensions[current]: - keywords[current] = value - _start(keywords, finished + [value], left) - - # Start all the drivers, recursively. - _start(kwargs, [], dimension_keys) - - # Finish all tests, write results. - output = csv.writer(sys.stdout) - output.writerow(dimension_keys + ["result"]) - for (done, driver) in drivers: - driver.join() - for (metric_name, result) in getattr(driver, stat)(): - output.writerow([ # Collapse the method name. - hasattr(x, "__name__") and x.__name__ or x for x in done - ] + [metric_name] + result) - - -@runner.command() -@click.argument("env") -@click.option( - "--cmd", default="uname -a", help="command to run on all found machines") -@click.option( - "--workload", default="true", help="workload to run all found machines") -def validate(env, cmd, workload): - """Validates an environment described by yaml file.""" - producer = yaml_producer.YamlMachineProducer(env) - for machine in producer.machines: - print("Machine %s:" % machine) - stdout, _ = machine.run(cmd) - print(" Output of '%s': %s" % (cmd, stdout.lstrip().rstrip())) - image = machine.pull(workload) - stdout = machine.container(image).run() - print(" Container %s: %s" % (workload, stdout.lstrip().rstrip())) diff --git a/benchmarks/runner/commands.py b/benchmarks/runner/commands.py deleted file mode 100644 index 9a391eb01..000000000 --- a/benchmarks/runner/commands.py +++ /dev/null @@ -1,135 +0,0 @@ -# python3 -# Copyright 2019 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Module with the guts of `click` commands. - -Overrides of the click.core.Command. This is done so flags are inherited between -similar commands (the run command). The classes below are meant to be used in -click templates like so. - -@runner.command("run-mock", RunCommand) -def run_mock(**kwargs): - # mock implementation - -""" -import os - -import click - - -class RunCommand(click.core.Command): - """Base Run Command with flags. - - Attributes: - method: regex of which suite to choose (e.g. sysbench would run - sysbench.cpu, sysbench.memory, and sysbench.mutex) See list command for - details. - metric: metric(s) to extract. See list command for details. - runtime: the runtime(s) on which to run. - runs: the number of runs to do of each method. - stat: how to compile results in the case of multiple run (e.g. median). - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - method = click.core.Argument(("method",)) - - metric = click.core.Option(("--metric",), - help="The metric to extract.", - multiple=True) - - runtime = click.core.Option(("--runtime",), - default=["runc"], - help="The runtime to use.", - multiple=True) - runs = click.core.Option(("--runs",), - default=1, - help="The number of times to run each benchmark.") - stat = click.core.Option( - ("--stat",), - default="median", - help="How to aggregate the data from all runs." - "\nmedian - returns the median of all runs (default)" - "\nall - returns all results comma separated" - "\nmeanstd - returns result as mean,std") - self.params.extend([method, runtime, runs, stat, metric]) - self.ignore_unknown_options = True - self.allow_extra_args = True - - -class LocalCommand(RunCommand): - """LocalCommand inherits all flags from RunCommand. - - Attributes: - limit: limits the number of machines on which to run benchmarks. This limits - for local how many benchmarks may run at a time. e.g. "startup" requires - one machine -- passing two machines would limit two startup jobs at a - time. Default is infinity. - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.params.append( - click.core.Option( - ("--limit",), - default=1, - help="Limit of number of benchmarks that can run at a given time.")) - - -class GCPCommand(RunCommand): - """GCPCommand inherits all flags from RunCommand and adds flags for run_gcp method. - - Attributes: - image_file: name of the image to build machines from - zone_file: a GCP zone (e.g. us-west1-b) - installers: named installers for post-create - machine_type: type of machine to create (e.g. n1-standard-4) - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - image_file = click.core.Option( - ("--image_file",), - help="The binary that emits the GCP image.", - default=os.path.join( - os.path.dirname(__file__), "../../tools/vm/ubuntu1604"), - ) - zone_file = click.core.Option( - ("--zone_file",), - help="The binary that emits the GCP zone.", - default=os.path.join(os.path.dirname(__file__), "../../tools/vm/zone"), - ) - internal = click.core.Option( - ("--internal/--no-internal",), - help="""Use instance internal IPs. Used if bm-tools runner is running on - GCP instance with firewall rules blocking external IPs.""", - default=False, - ) - installers = click.core.Option( - ("--installers",), - help="The set of installers to use.", - multiple=True, - ) - machine_type = click.core.Option( - ("--machine_type",), - help="Type to make all machines.", - default="n1-standard-4", - ) - self.params.extend([ - image_file, - zone_file, - internal, - machine_type, - installers, - ]) diff --git a/benchmarks/runner/runner_test.py b/benchmarks/runner/runner_test.py deleted file mode 100644 index 7818d631a..000000000 --- a/benchmarks/runner/runner_test.py +++ /dev/null @@ -1,59 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Top-level tests.""" - -import os -import subprocess -import sys - -from click import testing -import pytest - -from benchmarks import runner - - -def _get_locale(): - output = subprocess.check_output(["locale", "-a"]) - locales = output.split() - if b"en_US.utf8" in locales: - return "en_US.UTF-8" - else: - return "C.UTF-8" - - -def _set_locale(): - locale = _get_locale() - if os.getenv("LANG") != locale: - os.environ["LANG"] = locale - os.environ["LC_ALL"] = locale - os.execv("/proc/self/exe", ["python"] + sys.argv) - - -def test_list(): - cli_runner = testing.CliRunner() - result = cli_runner.invoke(runner.runner, ["list"]) - print(result.output) - assert result.exit_code == 0 - - -def test_run(): - cli_runner = testing.CliRunner() - result = cli_runner.invoke(runner.runner, ["run-mock", "."]) - print(result.output) - assert result.exit_code == 0 - - -if __name__ == "__main__": - _set_locale() - sys.exit(pytest.main([__file__])) diff --git a/benchmarks/suites/BUILD b/benchmarks/suites/BUILD deleted file mode 100644 index 04fc23261..000000000 --- a/benchmarks/suites/BUILD +++ /dev/null @@ -1,130 +0,0 @@ -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -py_library( - name = "suites", - srcs = ["__init__.py"], -) - -py_library( - name = "absl", - srcs = ["absl.py"], - deps = [ - "//benchmarks/harness:machine", - "//benchmarks/suites", - "//benchmarks/workloads/absl", - ], -) - -py_library( - name = "density", - srcs = ["density.py"], - deps = [ - "//benchmarks/harness:container", - "//benchmarks/harness:machine", - "//benchmarks/suites", - "//benchmarks/suites:helpers", - ], -) - -py_library( - name = "fio", - srcs = ["fio.py"], - deps = [ - "//benchmarks/harness:machine", - "//benchmarks/suites", - "//benchmarks/suites:helpers", - "//benchmarks/workloads/fio", - ], -) - -py_library( - name = "helpers", - srcs = ["helpers.py"], - deps = ["//benchmarks/harness:machine"], -) - -py_library( - name = "http", - srcs = ["http.py"], - deps = [ - "//benchmarks/harness:machine", - "//benchmarks/suites", - "//benchmarks/workloads/ab", - ], -) - -py_library( - name = "media", - srcs = ["media.py"], - deps = [ - "//benchmarks/harness:machine", - "//benchmarks/suites", - "//benchmarks/suites:helpers", - "//benchmarks/workloads/ffmpeg", - ], -) - -py_library( - name = "ml", - srcs = ["ml.py"], - deps = [ - "//benchmarks/harness:machine", - "//benchmarks/suites", - "//benchmarks/suites:startup", - "//benchmarks/workloads/tensorflow", - ], -) - -py_library( - name = "network", - srcs = ["network.py"], - deps = [ - "//benchmarks/harness:machine", - "//benchmarks/suites", - "//benchmarks/suites:helpers", - "//benchmarks/workloads/iperf", - ], -) - -py_library( - name = "redis", - srcs = ["redis.py"], - deps = [ - "//benchmarks/harness:machine", - "//benchmarks/suites", - "//benchmarks/workloads/redisbenchmark", - ], -) - -py_library( - name = "startup", - srcs = ["startup.py"], - deps = [ - "//benchmarks/harness:machine", - "//benchmarks/suites", - "//benchmarks/suites:helpers", - ], -) - -py_library( - name = "sysbench", - srcs = ["sysbench.py"], - deps = [ - "//benchmarks/harness:machine", - "//benchmarks/suites", - "//benchmarks/workloads/sysbench", - ], -) - -py_library( - name = "syscall", - srcs = ["syscall.py"], - deps = [ - "//benchmarks/harness:machine", - "//benchmarks/suites", - "//benchmarks/workloads/syscall", - ], -) diff --git a/benchmarks/suites/__init__.py b/benchmarks/suites/__init__.py deleted file mode 100644 index 360736cc3..000000000 --- a/benchmarks/suites/__init__.py +++ /dev/null @@ -1,119 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Core benchmark annotations.""" - -import functools -import inspect -import types -from typing import List -from typing import Tuple - -BENCHMARK_METRICS = '__benchmark_metrics__' -BENCHMARK_MACHINES = '__benchmark_machines__' - - -def is_benchmark(func: types.FunctionType) -> bool: - """Returns true if the given function is a benchmark.""" - return isinstance(func, types.FunctionType) and \ - hasattr(func, BENCHMARK_METRICS) and \ - hasattr(func, BENCHMARK_MACHINES) - - -def benchmark_metrics(func: types.FunctionType) -> List[Tuple[str, str]]: - """Returns the list of available metrics.""" - return [(metric.__name__, metric.__doc__) - for metric in getattr(func, BENCHMARK_METRICS)] - - -def benchmark_machines(func: types.FunctionType) -> int: - """Returns the number of machines required.""" - return getattr(func, BENCHMARK_MACHINES) - - -# pylint: disable=unused-argument -def default(value, **kwargs): - """Returns the passed value.""" - return value - - -def benchmark(metrics: List[types.FunctionType] = None, - machines: int = 1) -> types.FunctionType: - """Define a benchmark function with metrics. - - Args: - metrics: A list of metric functions. - machines: The number of machines required. - - Returns: - A function that accepts the given number of machines, and iteratively - returns a set of (metric_name, metric_value) pairs when called repeatedly. - """ - if not metrics: - # The default passes through. - metrics = [default] - - def decorator(func: types.FunctionType) -> types.FunctionType: - """Decorator function.""" - # Every benchmark should accept at least two parameters: - # runtime: The runtime to use for the benchmark (str, required). - # metrics: The metrics to use, if not the default (str, optional). - @functools.wraps(func) - def wrapper(*args, runtime: str, metric: list = None, **kwargs): - """Wrapper function.""" - # First -- ensure that we marshall all types appropriately. In - # general, we will call this with only strings. These strings will - # need to be converted to their underlying types/classes. - sig = inspect.signature(func) - for param in sig.parameters.values(): - if param.annotation != inspect.Parameter.empty and \ - param.name in kwargs and not isinstance(kwargs[param.name], param.annotation): - try: - # Marshall to the appropriate type. - kwargs[param.name] = param.annotation(kwargs[param.name]) - except Exception as exc: - raise ValueError( - 'illegal type for %s(%s=%s): %s' % - (func.__name__, param.name, kwargs[param.name], exc)) - elif param.default != inspect.Parameter.empty and \ - param.name not in kwargs: - # Ensure that we have the value set, because it will - # be passed to the metric function for evaluation. - kwargs[param.name] = param.default - - # Next, figure out how to apply a metric. We do this prior to - # running the underlying function to prevent having to wait a few - # minutes for a result just to see some error. - if not metric: - # Return all metrics in the iterator. - result = func(*args, runtime=runtime, **kwargs) - for metric_func in metrics: - yield (metric_func.__name__, metric_func(result, **kwargs)) - else: - result = None - for single_metric in metric: - for metric_func in metrics: - # Is this a function that matches the name? - # Apply this function to the result. - if metric_func.__name__ == single_metric: - if not result: - # Lazy evaluation: only if metric matches. - result = func(*args, runtime=runtime, **kwargs) - yield single_metric, metric_func(result, **kwargs) - - # Set metadata on the benchmark (used above). - setattr(wrapper, BENCHMARK_METRICS, metrics) - setattr(wrapper, BENCHMARK_MACHINES, machines) - return wrapper - - return decorator diff --git a/benchmarks/suites/absl.py b/benchmarks/suites/absl.py deleted file mode 100644 index 5d9b57a09..000000000 --- a/benchmarks/suites/absl.py +++ /dev/null @@ -1,37 +0,0 @@ -# python3 -# Copyright 2019 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""absl build benchmark.""" - -from benchmarks import suites -from benchmarks.harness import machine -from benchmarks.workloads import absl - - -@suites.benchmark(metrics=[absl.elapsed_time], machines=1) -def build(target: machine.Machine, **kwargs) -> str: - """Runs the absl workload and report the absl build time. - - Runs the 'bazel build //absl/...' in a clean bazel directory and - monitors time elapsed. - - Args: - target: A machine object. - **kwargs: Additional container options. - - Returns: - Container output. - """ - image = target.pull("absl") - return target.container(image, **kwargs).run() diff --git a/benchmarks/suites/density.py b/benchmarks/suites/density.py deleted file mode 100644 index 89d29fb26..000000000 --- a/benchmarks/suites/density.py +++ /dev/null @@ -1,121 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Density tests.""" - -import re -import types - -from benchmarks import suites -from benchmarks.harness import container -from benchmarks.harness import machine -from benchmarks.suites import helpers - - -# pylint: disable=unused-argument -def memory_usage(value, **kwargs): - """Returns the passed value.""" - return value - - -def density(target: machine.Machine, - workload: str, - count: int = 50, - wait: float = 0, - load_func: types.FunctionType = None, - **kwargs): - """Calculate the average memory usage per container. - - Args: - target: A machine object. - workload: The workload to run. - count: The number of containers to start. - wait: The time to wait after starting. - load_func: Callback that is called after count images have been started on - the given machine. - **kwargs: Additional container options. - - Returns: - The average usage in Kb per container. - """ - count = int(count) - - # Drop all caches. - helpers.drop_caches(target) - before = target.read("/proc/meminfo") - - # Load the workload. - image = target.pull(workload) - - with target.container( - image=image, count=count, **kwargs).detach() as containers: - # Call the optional load function callback if given. - if load_func: - load_func(target, containers) - # Wait 'wait' time before taking a measurement. - target.sleep(wait) - - # Drop caches again. - helpers.drop_caches(target) - after = target.read("/proc/meminfo") - - # Calculate the memory used. - available_re = re.compile(r"MemAvailable:\s*(\d+)\skB\n") - before_available = available_re.findall(before) - after_available = available_re.findall(after) - return 1024 * float(int(before_available[0]) - - int(after_available[0])) / float(count) - - -def load_redis(target: machine.Machine, containers: container.Container): - """Use redis-benchmark "LPUSH" to load each container with 1G of data. - - Args: - target: A machine object. - containers: A set of containers. - """ - target.pull("redisbenchmark") - for name in containers.get_names(): - flags = "-d 10000 -t LPUSH" - target.container( - "redisbenchmark", links={ - name: name - }).run( - host=name, flags=flags) - - -@suites.benchmark(metrics=[memory_usage], machines=1) -def empty(target: machine.Machine, **kwargs) -> float: - """Run trivial containers in a density test.""" - return density(target, workload="sleep", wait=1.0, **kwargs) - - -@suites.benchmark(metrics=[memory_usage], machines=1) -def node(target: machine.Machine, **kwargs) -> float: - """Run node containers in a density test.""" - return density(target, workload="node", wait=3.0, **kwargs) - - -@suites.benchmark(metrics=[memory_usage], machines=1) -def ruby(target: machine.Machine, **kwargs) -> float: - """Run ruby containers in a density test.""" - return density(target, workload="ruby", wait=3.0, **kwargs) - - -@suites.benchmark(metrics=[memory_usage], machines=1) -def redis(target: machine.Machine, **kwargs) -> float: - """Run redis containers in a density test.""" - if "count" not in kwargs: - kwargs["count"] = 5 - return density( - target, workload="redis", wait=3.0, load_func=load_redis, **kwargs) diff --git a/benchmarks/suites/fio.py b/benchmarks/suites/fio.py deleted file mode 100644 index 2171790c5..000000000 --- a/benchmarks/suites/fio.py +++ /dev/null @@ -1,165 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""File I/O tests.""" - -import os - -from benchmarks import suites -from benchmarks.harness import machine -from benchmarks.suites import helpers -from benchmarks.workloads import fio - - -# pylint: disable=too-many-arguments -# pylint: disable=too-many-locals -def run_fio(target: machine.Machine, - test: str, - ioengine: str = "sync", - size: int = 1024 * 1024 * 1024, - iodepth: int = 4, - blocksize: int = 1024 * 1024, - time: int = -1, - mount_dir: str = "", - filename: str = "file.dat", - tmpfs: bool = False, - ramp_time: int = 0, - **kwargs) -> str: - """FIO benchmarks. - - For more on fio see: - https://media.readthedocs.org/pdf/fio/latest/fio.pdf - - Args: - target: A machine object. - test: The test to run (read, write, randread, randwrite, etc.) - ioengine: The engine for I/O. - size: The size of the generated file in bytes (if an integer) or 5g, 16k, - etc. - iodepth: The I/O for certain engines. - blocksize: The blocksize for reads and writes in bytes (if an integer) or - 4k, etc. - time: If test is time based, how long to run in seconds. - mount_dir: The absolute path on the host to mount a bind mount. - filename: The name of the file to creat inside container. For a path of - /dir/dir/file, the script setup a volume like 'docker run -v - mount_dir:/dir/dir fio' and fio will create (and delete) the file - /dir/dir/file. If tmpfs is set, this /dir/dir will be a tmpfs. - tmpfs: If true, mount on tmpfs. - ramp_time: The time to run before recording statistics - **kwargs: Additional container options. - - Returns: - The output of fio as a string. - """ - # Pull the image before dropping caches. - image = target.pull("fio") - - if not mount_dir: - stdout, _ = target.run("pwd") - mount_dir = stdout.rstrip() - - # Setup the volumes. - volumes = {mount_dir: {"bind": "/disk", "mode": "rw"}} if not tmpfs else None - tmpfs = {"/disk": ""} if tmpfs else None - - # Construct a file in the volume. - filepath = os.path.join("/disk", filename) - - # If we are running a read test, us fio to write a file and then flush file - # data from memory. - if "read" in test: - target.container( - image, volumes=volumes, tmpfs=tmpfs, **kwargs).run( - test="write", - ioengine="sync", - size=size, - iodepth=iodepth, - blocksize=blocksize, - path=filepath) - helpers.drop_caches(target) - - # Run the test. - time_str = "--time_base --runtime={time}".format( - time=time) if int(time) > 0 else "" - res = target.container( - image, volumes=volumes, tmpfs=tmpfs, **kwargs).run( - test=test, - ioengine=ioengine, - size=size, - iodepth=iodepth, - blocksize=blocksize, - time=time_str, - path=filepath, - ramp_time=ramp_time) - - target.run( - "rm {path}".format(path=os.path.join(mount_dir.rstrip(), filename))) - - return res - - -@suites.benchmark(metrics=[fio.read_bandwidth, fio.read_io_ops], machines=1) -def read(*args, **kwargs): - """Read test. - - Args: - *args: None. - **kwargs: Additional container options. - - Returns: - The output of fio. - """ - return run_fio(*args, test="read", **kwargs) - - -@suites.benchmark(metrics=[fio.read_bandwidth, fio.read_io_ops], machines=1) -def randread(*args, **kwargs): - """Random read test. - - Args: - *args: None. - **kwargs: Additional container options. - - Returns: - The output of fio. - """ - return run_fio(*args, test="randread", **kwargs) - - -@suites.benchmark(metrics=[fio.write_bandwidth, fio.write_io_ops], machines=1) -def write(*args, **kwargs): - """Write test. - - Args: - *args: None. - **kwargs: Additional container options. - - Returns: - The output of fio. - """ - return run_fio(*args, test="write", **kwargs) - - -@suites.benchmark(metrics=[fio.write_bandwidth, fio.write_io_ops], machines=1) -def randwrite(*args, **kwargs): - """Random write test. - - Args: - *args: None. - **kwargs: Additional container options. - - Returns: - The output of fio. - """ - return run_fio(*args, test="randwrite", **kwargs) diff --git a/benchmarks/suites/helpers.py b/benchmarks/suites/helpers.py deleted file mode 100644 index b3c7360ab..000000000 --- a/benchmarks/suites/helpers.py +++ /dev/null @@ -1,57 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Benchmark helpers.""" - -import datetime -from benchmarks.harness import machine - - -class Timer: - """Helper to time runtime of some call. - - Usage: - - with Timer as t: - # do something. - t.get_time_in_seconds() - """ - - def __init__(self): - self._start = datetime.datetime.now() - - def __enter__(self): - self.start() - return self - - def start(self): - """Starts the timer.""" - self._start = datetime.datetime.now() - - def elapsed(self) -> float: - """Returns the elapsed time in seconds.""" - return (datetime.datetime.now() - self._start).total_seconds() - - def __exit__(self, exception_type, exception_value, exception_traceback): - pass - - -def drop_caches(target: machine.Machine): - """Drops caches on the machine. - - Args: - target: A machine object. - """ - target.run("sudo sync") - target.run("sudo sysctl vm.drop_caches=3") - target.run("sudo sysctl vm.drop_caches=3") diff --git a/benchmarks/suites/http.py b/benchmarks/suites/http.py deleted file mode 100644 index 6efea938c..000000000 --- a/benchmarks/suites/http.py +++ /dev/null @@ -1,138 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""HTTP benchmarks.""" - -from benchmarks import suites -from benchmarks.harness import machine -from benchmarks.workloads import ab - - -# pylint: disable=too-many-arguments -def http(server: machine.Machine, - client: machine.Machine, - workload: str, - requests: int = 5000, - connections: int = 10, - port: int = 80, - path: str = "notfound", - **kwargs) -> str: - """Run apachebench (ab) against an http server. - - Args: - server: A machine object. - client: A machine object. - workload: The http-serving workload. - requests: Number of requests to send the server. Default is 5000. - connections: Number of concurent connections to use. Default is 10. - port: The port to access in benchmarking. - path: File to download, generally workload-specific. - **kwargs: Additional container options. - - Returns: - The full apachebench output. - """ - # Pull the client & server. - apachebench = client.pull("ab") - netcat = client.pull("netcat") - image = server.pull(workload) - - with server.container(image, port=port, **kwargs).detach() as container: - (host, port) = container.address() - # Wait for the server to come up. - client.container(netcat).run(host=host, port=port) - # Run the benchmark, no arguments. - return client.container(apachebench).run( - host=host, - port=port, - requests=requests, - connections=connections, - path=path) - - -# pylint: disable=too-many-arguments -# pylint: disable=too-many-locals -def http_app(server: machine.Machine, - client: machine.Machine, - workload: str, - requests: int = 5000, - connections: int = 10, - port: int = 80, - path: str = "notfound", - **kwargs) -> str: - """Run apachebench (ab) against an http application. - - Args: - server: A machine object. - client: A machine object. - workload: The http-serving workload. - requests: Number of requests to send the server. Default is 5000. - connections: Number of concurent connections to use. Default is 10. - port: The port to use for benchmarking. - path: File to download, generally workload-specific. - **kwargs: Additional container options. - - Returns: - The full apachebench output. - """ - # Pull the client & server. - apachebench = client.pull("ab") - netcat = client.pull("netcat") - server_netcat = server.pull("netcat") - redis = server.pull("redis") - image = server.pull(workload) - redis_port = 6379 - redis_name = "{workload}_redis_server".format(workload=workload) - - with server.container(redis, name=redis_name).detach(): - server.container(server_netcat, links={redis_name: redis_name})\ - .run(host=redis_name, port=redis_port) - with server.container(image, port=port, links={redis_name: redis_name}, **kwargs)\ - .detach(host=redis_name) as container: - (host, port) = container.address() - # Wait for the server to come up. - client.container(netcat).run(host=host, port=port) - # Run the benchmark, no arguments. - return client.container(apachebench).run( - host=host, - port=port, - requests=requests, - connections=connections, - path=path) - - -@suites.benchmark(metrics=[ab.transfer_rate, ab.latency], machines=2) -def httpd(*args, **kwargs) -> str: - """Apache2 benchmark.""" - return http(*args, workload="httpd", port=80, **kwargs) - - -@suites.benchmark( - metrics=[ab.transfer_rate, ab.latency, ab.requests_per_second], machines=2) -def nginx(*args, **kwargs) -> str: - """Nginx benchmark.""" - return http(*args, workload="nginx", port=80, **kwargs) - - -@suites.benchmark( - metrics=[ab.transfer_rate, ab.latency, ab.requests_per_second], machines=2) -def node(*args, **kwargs) -> str: - """Node benchmark.""" - return http_app(*args, workload="node_template", path="", port=8080, **kwargs) - - -@suites.benchmark( - metrics=[ab.transfer_rate, ab.latency, ab.requests_per_second], machines=2) -def ruby(*args, **kwargs) -> str: - """Ruby benchmark.""" - return http_app(*args, workload="ruby_template", path="", port=9292, **kwargs) diff --git a/benchmarks/suites/media.py b/benchmarks/suites/media.py deleted file mode 100644 index 9cbffdaa1..000000000 --- a/benchmarks/suites/media.py +++ /dev/null @@ -1,42 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Media processing benchmarks.""" - -from benchmarks import suites -from benchmarks.harness import machine -from benchmarks.suites import helpers -from benchmarks.workloads import ffmpeg - - -@suites.benchmark(metrics=[ffmpeg.run_time], machines=1) -def transcode(target: machine.Machine, **kwargs) -> float: - """Runs a video transcoding workload and times it. - - Args: - target: A machine object. - **kwargs: Additional container options. - - Returns: - Total workload runtime. - """ - # Load before timing. - image = target.pull("ffmpeg") - - # Drop caches. - helpers.drop_caches(target) - - # Time startup + transcoding. - with helpers.Timer() as timer: - target.container(image, **kwargs).run() - return timer.elapsed() diff --git a/benchmarks/suites/ml.py b/benchmarks/suites/ml.py deleted file mode 100644 index a394d1f69..000000000 --- a/benchmarks/suites/ml.py +++ /dev/null @@ -1,33 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Machine Learning tests.""" - -from benchmarks import suites -from benchmarks.harness import machine -from benchmarks.suites import startup -from benchmarks.workloads import tensorflow - - -@suites.benchmark(metrics=[tensorflow.run_time], machines=1) -def train(target: machine.Machine, **kwargs): - """Run the tensorflow benchmark and return the runtime in seconds of workload. - - Args: - target: A machine object. - **kwargs: Additional container options. - - Returns: - The total runtime. - """ - return startup.startup(target, workload="tensorflow", count=1, **kwargs) diff --git a/benchmarks/suites/network.py b/benchmarks/suites/network.py deleted file mode 100644 index f973cf3f1..000000000 --- a/benchmarks/suites/network.py +++ /dev/null @@ -1,101 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Network microbenchmarks.""" - -from typing import Dict - -from benchmarks import suites -from benchmarks.harness import machine -from benchmarks.suites import helpers -from benchmarks.workloads import iperf - - -def run_iperf(client: machine.Machine, - server: machine.Machine, - client_kwargs: Dict[str, str] = None, - server_kwargs: Dict[str, str] = None) -> str: - """Measure iperf performance. - - Args: - client: A machine object. - server: A machine object. - client_kwargs: Additional client container options. - server_kwargs: Additional server container options. - - Returns: - The output of iperf. - """ - if not client_kwargs: - client_kwargs = dict() - if not server_kwargs: - server_kwargs = dict() - - # Pull images. - netcat = client.pull("netcat") - iperf_client_image = client.pull("iperf") - iperf_server_image = server.pull("iperf") - - # Set this due to a bug in the kernel that resets connections. - client.run("sudo /sbin/sysctl -w net.netfilter.nf_conntrack_tcp_be_liberal=1") - server.run("sudo /sbin/sysctl -w net.netfilter.nf_conntrack_tcp_be_liberal=1") - - with server.container( - iperf_server_image, port=5001, **server_kwargs).detach() as iperf_server: - (host, port) = iperf_server.address() - # Wait until the service is available. - client.container(netcat).run(host=host, port=port) - # Run a warm-up run. - client.container( - iperf_client_image, stderr=True, **client_kwargs).run( - host=host, port=port) - # Run the client with relevant arguments. - res = client.container(iperf_client_image, stderr=True, **client_kwargs)\ - .run(host=host, port=port) - helpers.drop_caches(client) - return res - - -@suites.benchmark(metrics=[iperf.bandwidth], machines=2) -def upload(client: machine.Machine, server: machine.Machine, **kwargs) -> str: - """Measure upload performance. - - Args: - client: A machine object. - server: A machine object. - **kwargs: Client container options. - - Returns: - The output of iperf. - """ - if kwargs["runtime"] == "runc": - kwargs["network_mode"] = "host" - return run_iperf(client, server, client_kwargs=kwargs) - - -@suites.benchmark(metrics=[iperf.bandwidth], machines=2) -def download(client: machine.Machine, server: machine.Machine, **kwargs) -> str: - """Measure download performance. - - Args: - client: A machine object. - server: A machine object. - **kwargs: Server container options. - - Returns: - The output of iperf. - """ - - client_kwargs = {"network_mode": "host"} - return run_iperf( - client, server, client_kwargs=client_kwargs, server_kwargs=kwargs) diff --git a/benchmarks/suites/redis.py b/benchmarks/suites/redis.py deleted file mode 100644 index b84dd073d..000000000 --- a/benchmarks/suites/redis.py +++ /dev/null @@ -1,46 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Redis benchmarks.""" - -from benchmarks import suites -from benchmarks.harness import machine -from benchmarks.workloads import redisbenchmark - - -@suites.benchmark(metrics=list(redisbenchmark.METRICS.values()), machines=2) -def redis(server: machine.Machine, - client: machine.Machine, - flags: str = "", - **kwargs) -> str: - """Run redis-benchmark on client pointing at server machine. - - Args: - server: A machine object. - client: A machine object. - flags: Flags to pass redis-benchmark. - **kwargs: Additional container options. - - Returns: - Output from redis-benchmark. - """ - redis_server = server.pull("redis") - redis_client = client.pull("redisbenchmark") - netcat = client.pull("netcat") - with server.container( - redis_server, port=6379, **kwargs).detach() as container: - (host, port) = container.address() - # Wait for the container to be up. - client.container(netcat).run(host=host, port=port) - # Run all redis benchmarks. - return client.container(redis_client).run(host=host, port=port, flags=flags) diff --git a/benchmarks/suites/startup.py b/benchmarks/suites/startup.py deleted file mode 100644 index a1b6c5753..000000000 --- a/benchmarks/suites/startup.py +++ /dev/null @@ -1,110 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Start-up benchmarks.""" - -from benchmarks import suites -from benchmarks.harness import machine -from benchmarks.suites import helpers - - -# pylint: disable=unused-argument -def startup_time_ms(value, **kwargs): - """Returns average startup time per container in milliseconds. - - Args: - value: The floating point time in seconds. - **kwargs: Ignored. - - Returns: - The time given in milliseconds. - """ - return value * 1000 - - -def startup(target: machine.Machine, - workload: str, - count: int = 5, - port: int = 0, - **kwargs): - """Time the startup of some workload. - - Args: - target: A machine object. - workload: The workload to run. - count: Number of containers to start. - port: The port to check for liveness, if provided. - **kwargs: Additional container options. - - Returns: - The mean start-up time in seconds. - """ - # Load before timing. - image = target.pull(workload) - netcat = target.pull("netcat") - count = int(count) - port = int(port) - - with helpers.Timer() as timer: - for _ in range(count): - if not port: - # Run the container synchronously. - target.container(image, **kwargs).run() - else: - # Run a detached container until httpd available. - with target.container(image, port=port, **kwargs).detach() as server: - (server_host, server_port) = server.address() - target.container(netcat).run(host=server_host, port=server_port) - return timer.elapsed() / float(count) - - -@suites.benchmark(metrics=[startup_time_ms], machines=1) -def empty(target: machine.Machine, **kwargs) -> float: - """Time the startup of a trivial container. - - Args: - target: A machine object. - **kwargs: Additional startup options. - - Returns: - The time to run the container. - """ - return startup(target, workload="true", **kwargs) - - -@suites.benchmark(metrics=[startup_time_ms], machines=1) -def node(target: machine.Machine, **kwargs) -> float: - """Time the startup of the node container. - - Args: - target: A machine object. - **kwargs: Additional statup options. - - Returns: - The time to run the container. - """ - return startup(target, workload="node", port=8080, **kwargs) - - -@suites.benchmark(metrics=[startup_time_ms], machines=1) -def ruby(target: machine.Machine, **kwargs) -> float: - """Time the startup of the ruby container. - - Args: - target: A machine object. - **kwargs: Additional startup options. - - Returns: - The time to run the container. - """ - return startup(target, workload="ruby", port=3000, **kwargs) diff --git a/benchmarks/suites/sysbench.py b/benchmarks/suites/sysbench.py deleted file mode 100644 index 2a6e2126c..000000000 --- a/benchmarks/suites/sysbench.py +++ /dev/null @@ -1,119 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Sysbench-based benchmarks.""" - -from benchmarks import suites -from benchmarks.harness import machine -from benchmarks.workloads import sysbench - - -def run_sysbench(target: machine.Machine, - test: str = "cpu", - threads: int = 8, - time: int = 5, - options: str = "", - **kwargs) -> str: - """Run sysbench container with arguments. - - Args: - target: A machine object. - test: Relevant sysbench test to run (e.g. cpu, memory). - threads: The number of threads to use for tests. - time: The time to run tests. - options: Additional sysbench options. - **kwargs: Additional container options. - - Returns: - The output of the command as a string. - """ - image = target.pull("sysbench") - return target.container(image, **kwargs).run( - test=test, threads=threads, time=time, options=options) - - -@suites.benchmark(metrics=[sysbench.cpu_events_per_second], machines=1) -def cpu(target: machine.Machine, max_prime: int = 5000, **kwargs) -> str: - """Run sysbench CPU test. - - Additional arguments can be provided for sysbench. - - Args: - target: A machine object. - max_prime: The maximum prime number to search. - **kwargs: - - threads: The number of threads to use for tests. - - time: The time to run tests. - - options: Additional sysbench options. See sysbench tool: - https://github.com/akopytov/sysbench - - Returns: - Sysbench output. - """ - options = kwargs.pop("options", "") - options += " --cpu-max-prime={}".format(max_prime) - return run_sysbench(target, test="cpu", options=options, **kwargs) - - -@suites.benchmark(metrics=[sysbench.memory_ops_per_second], machines=1) -def memory(target: machine.Machine, **kwargs) -> str: - """Run sysbench memory test. - - Additional arguments can be provided per sysbench. - - Args: - target: A machine object. - **kwargs: - - threads: The number of threads to use for tests. - - time: The time to run tests. - - options: Additional sysbench options. See sysbench tool: - https://github.com/akopytov/sysbench - - Returns: - Sysbench output. - """ - return run_sysbench(target, test="memory", **kwargs) - - -@suites.benchmark( - metrics=[ - sysbench.mutex_time, sysbench.mutex_latency, sysbench.mutex_deviation - ], - machines=1) -def mutex(target: machine.Machine, - locks: int = 4, - count: int = 10000000, - threads: int = 8, - **kwargs) -> str: - """Run sysbench mutex test. - - Additional arguments can be provided per sysbench. - - Args: - target: A machine object. - locks: The number of locks to use. - count: The number of mutexes. - threads: The number of threads to use for tests. - **kwargs: - - time: The time to run tests. - - options: Additional sysbench options. See sysbench tool: - https://github.com/akopytov/sysbench - - Returns: - Sysbench output. - """ - options = kwargs.pop("options", "") - options += " --mutex-loops=1 --mutex-locks={} --mutex-num={}".format( - count, locks) - return run_sysbench( - target, test="mutex", options=options, threads=threads, **kwargs) diff --git a/benchmarks/suites/syscall.py b/benchmarks/suites/syscall.py deleted file mode 100644 index fa7665b00..000000000 --- a/benchmarks/suites/syscall.py +++ /dev/null @@ -1,37 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Syscall microbenchmark.""" - -from benchmarks import suites -from benchmarks.harness import machine -from benchmarks.workloads.syscall import syscall_time_ns - - -@suites.benchmark(metrics=[syscall_time_ns], machines=1) -def syscall(target: machine.Machine, count: int = 1000000, **kwargs) -> str: - """Runs the syscall workload and report the syscall time. - - Runs the syscall 'SYS_gettimeofday(0,0)' 'count' times and monitors time - elapsed based on the runtime's MONOTONIC clock. - - Args: - target: A machine object. - count: The number of syscalls to execute. - **kwargs: Additional container options. - - Returns: - Container output. - """ - image = target.pull("syscall") - return target.container(image, **kwargs).run(count=count) diff --git a/benchmarks/tcp/BUILD b/benchmarks/tcp/BUILD deleted file mode 100644 index 6dde7d9e6..000000000 --- a/benchmarks/tcp/BUILD +++ /dev/null @@ -1,41 +0,0 @@ -load("//tools:defs.bzl", "cc_binary", "go_binary") - -package(licenses = ["notice"]) - -go_binary( - name = "tcp_proxy", - srcs = ["tcp_proxy.go"], - visibility = ["//:sandbox"], - deps = [ - "//pkg/tcpip", - "//pkg/tcpip/adapters/gonet", - "//pkg/tcpip/link/fdbased", - "//pkg/tcpip/link/qdisc/fifo", - "//pkg/tcpip/network/arp", - "//pkg/tcpip/network/ipv4", - "//pkg/tcpip/stack", - "//pkg/tcpip/transport/tcp", - "//pkg/tcpip/transport/udp", - "@org_golang_x_sys//unix:go_default_library", - ], -) - -# nsjoin is a trivial replacement for nsenter. This is used because nsenter is -# not available on all systems where this benchmark is run (and we aim to -# minimize external dependencies.) - -cc_binary( - name = "nsjoin", - srcs = ["nsjoin.c"], - visibility = ["//:sandbox"], -) - -sh_binary( - name = "tcp_benchmark", - srcs = ["tcp_benchmark.sh"], - data = [ - ":nsjoin", - ":tcp_proxy", - ], - visibility = ["//:sandbox"], -) diff --git a/benchmarks/tcp/README.md b/benchmarks/tcp/README.md deleted file mode 100644 index 38e6e69f0..000000000 --- a/benchmarks/tcp/README.md +++ /dev/null @@ -1,87 +0,0 @@ -# TCP Benchmarks - -This directory contains a standardized TCP benchmark. This helps to evaluate the -performance of netstack and native networking stacks under various conditions. - -## `tcp_benchmark` - -This benchmark allows TCP throughput testing under various conditions. The setup -consists of an iperf client, a client proxy, a server proxy and an iperf server. -The client proxy and server proxy abstract the network mechanism used to -communicate between the iperf client and server. - -The setup looks like the following: - -``` - +--------------+ (native) +--------------+ - | iperf client |[lo @ 10.0.0.1]------>| client proxy | - +--------------+ +--------------+ - [client.0 @ 10.0.0.2] - (netstack) | | (native) - +------+-----+ - | - [br0] - | - Network emulation applied ---> [wan.0:wan.1] - | - [br1] - | - +------+-----+ - (netstack) | | (native) - [server.0 @ 10.0.0.3] - +--------------+ +--------------+ - | iperf server |<------[lo @ 10.0.0.4]| server proxy | - +--------------+ (native) +--------------+ -``` - -Different configurations can be run using different arguments. For example: - -* Native test under normal internet conditions: `tcp_benchmark` -* Native test under ideal conditions: `tcp_benchmark --ideal` -* Netstack client under ideal conditions: `tcp_benchmark --client --ideal` -* Netstack client with 5% packet loss: `tcp_benchmark --client --ideal --loss - 5` - -Use `tcp_benchmark --help` for full arguments. - -This tool may be used to easily generate data for graphing. For example, to -generate a CSV for various latencies, you might do: - -``` -rm -f /tmp/netstack_latency.csv /tmp/native_latency.csv -latencies=$(seq 0 5 50; - seq 60 10 100; - seq 125 25 250; - seq 300 50 500) -for latency in $latencies; do - read throughput client_cpu server_cpu <<< \ - $(./tcp_benchmark --duration 30 --client --ideal --latency $latency) - echo $latency,$throughput,$client_cpu >> /tmp/netstack_latency.csv -done -for latency in $latencies; do - read throughput client_cpu server_cpu <<< \ - $(./tcp_benchmark --duration 30 --ideal --latency $latency) - echo $latency,$throughput,$client_cpu >> /tmp/native_latency.csv -done -``` - -Similarly, to generate a CSV for various levels of packet loss, the following -would be appropriate: - -``` -rm -f /tmp/netstack_loss.csv /tmp/native_loss.csv -losses=$(seq 0 0.1 1.0; - seq 1.2 0.2 2.0; - seq 2.5 0.5 5.0; - seq 6.0 1.0 10.0) -for loss in $losses; do - read throughput client_cpu server_cpu <<< \ - $(./tcp_benchmark --duration 30 --client --ideal --latency 10 --loss $loss) - echo $loss,$throughput,$client_cpu >> /tmp/netstack_loss.csv -done -for loss in $losses; do - read throughput client_cpu server_cpu <<< \ - $(./tcp_benchmark --duration 30 --ideal --latency 10 --loss $loss) - echo $loss,$throughput,$client_cpu >> /tmp/native_loss.csv -done -``` diff --git a/benchmarks/tcp/nsjoin.c b/benchmarks/tcp/nsjoin.c deleted file mode 100644 index 524b4d549..000000000 --- a/benchmarks/tcp/nsjoin.c +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef _GNU_SOURCE -#define _GNU_SOURCE -#endif - -#include -#include -#include -#include -#include -#include -#include -#include - -int main(int argc, char** argv) { - if (argc <= 2) { - fprintf(stderr, "error: must provide a namespace file.\n"); - fprintf(stderr, "usage: %s [arguments...]\n", argv[0]); - return 1; - } - - int fd = open(argv[1], O_RDONLY); - if (fd < 0) { - fprintf(stderr, "error opening %s: %s\n", argv[1], strerror(errno)); - return 1; - } - if (setns(fd, 0) < 0) { - fprintf(stderr, "error joining %s: %s\n", argv[1], strerror(errno)); - return 1; - } - - execvp(argv[2], &argv[2]); - return 1; -} diff --git a/benchmarks/tcp/tcp_benchmark.sh b/benchmarks/tcp/tcp_benchmark.sh deleted file mode 100755 index ef04b4ace..000000000 --- a/benchmarks/tcp/tcp_benchmark.sh +++ /dev/null @@ -1,392 +0,0 @@ -#!/bin/bash - -# Copyright 2018 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# TCP benchmark; see README.md for documentation. - -# Fixed parameters. -iperf_port=45201 # Not likely to be privileged. -proxy_port=44000 # Ditto. -client_addr=10.0.0.1 -client_proxy_addr=10.0.0.2 -server_proxy_addr=10.0.0.3 -server_addr=10.0.0.4 -mask=8 - -# Defaults; this provides a reasonable approximation of a decent internet link. -# Parameters can be varied independently from this set to see response to -# various changes in the kind of link available. -client=false -server=false -verbose=false -gso=0 -swgso=false -mtu=1280 # 1280 is a reasonable lowest-common-denominator. -latency=10 # 10ms approximates a fast, dedicated connection. -latency_variation=1 # +/- 1ms is a relatively low amount of jitter. -loss=0.1 # 0.1% loss is non-zero, but not extremely high. -duplicate=0.1 # 0.1% means duplicates are 1/10x as frequent as losses. -duration=30 # 30s is enough time to consistent results (experimentally). -helper_dir=$(dirname $0) -netstack_opts= -disable_linux_gso= -num_client_threads=1 - -# Check for netem support. -lsmod_output=$(lsmod | grep sch_netem) -if [ "$?" != "0" ]; then - echo "warning: sch_netem may not be installed." >&2 -fi - -while [ $# -gt 0 ]; do - case "$1" in - --client) - client=true - ;; - --client_tcp_probe_file) - shift - netstack_opts="${netstack_opts} -client_tcp_probe_file=$1" - ;; - --server) - server=true - ;; - --verbose) - verbose=true - ;; - --gso) - shift - gso=$1 - ;; - --swgso) - swgso=true - ;; - --server_tcp_probe_file) - shift - netstack_opts="${netstack_opts} -server_tcp_probe_file=$1" - ;; - --ideal) - mtu=1500 # Standard ethernet. - latency=0 # No latency. - latency_variation=0 # No jitter. - loss=0 # No loss. - duplicate=0 # No duplicates. - ;; - --mtu) - shift - [ "$#" -le 0 ] && echo "no mtu provided" && exit 1 - mtu=$1 - ;; - --sack) - netstack_opts="${netstack_opts} -sack" - ;; - --cubic) - netstack_opts="${netstack_opts} -cubic" - ;; - --moderate-recv-buf) - netstack_opts="${netstack_opts} -moderate_recv_buf" - ;; - --duration) - shift - [ "$#" -le 0 ] && echo "no duration provided" && exit 1 - duration=$1 - ;; - --latency) - shift - [ "$#" -le 0 ] && echo "no latency provided" && exit 1 - latency=$1 - ;; - --latency-variation) - shift - [ "$#" -le 0 ] && echo "no latency variation provided" && exit 1 - latency_variation=$1 - ;; - --loss) - shift - [ "$#" -le 0 ] && echo "no loss probability provided" && exit 1 - loss=$1 - ;; - --duplicate) - shift - [ "$#" -le 0 ] && echo "no duplicate provided" && exit 1 - duplicate=$1 - ;; - --cpuprofile) - shift - netstack_opts="${netstack_opts} -cpuprofile=$1" - ;; - --memprofile) - shift - netstack_opts="${netstack_opts} -memprofile=$1" - ;; - --disable-linux-gso) - disable_linux_gso=1 - ;; - --num-client-threads) - shift - num_client_threads=$1 - ;; - --helpers) - shift - [ "$#" -le 0 ] && echo "no helper dir provided" && exit 1 - helper_dir=$1 - ;; - *) - echo "usage: $0 [options]" - echo "options:" - echo " --help show this message" - echo " --verbose verbose output" - echo " --client use netstack as the client" - echo " --ideal reset all network emulation" - echo " --server use netstack as the server" - echo " --mtu set the mtu (bytes)" - echo " --sack enable SACK support" - echo " --moderate-recv-buf enable TCP receive buffer auto-tuning" - echo " --cubic enable CUBIC congestion control for Netstack" - echo " --duration set the test duration (s)" - echo " --latency set the latency (ms)" - echo " --latency-variation set the latency variation" - echo " --loss set the loss probability (%)" - echo " --duplicate set the duplicate probability (%)" - echo " --helpers set the helper directory" - echo " --num-client-threads number of parallel client threads to run" - echo " --disable-linux-gso disable segmentation offload in the Linux network stack" - echo "" - echo "The output will of the script will be:" - echo " " - exit 1 - esac - shift -done - -if [ ${verbose} == "true" ]; then - set -x -fi - -# Latency needs to be halved, since it's applied on both ways. -half_latency=$(echo ${latency}/2 | bc -l | awk '{printf "%1.2f", $0}') -half_loss=$(echo ${loss}/2 | bc -l | awk '{printf "%1.6f", $0}') -half_duplicate=$(echo ${duplicate}/2 | bc -l | awk '{printf "%1.6f", $0}') -helper_dir=${helper_dir#$(pwd)/} # Use relative paths. -proxy_binary=${helper_dir}/tcp_proxy -nsjoin_binary=${helper_dir}/nsjoin - -if [ ! -e ${proxy_binary} ]; then - echo "Could not locate ${proxy_binary}, please make sure you've built the binary" - exit 1 -fi - -if [ ! -e ${nsjoin_binary} ]; then - echo "Could not locate ${nsjoin_binary}, please make sure you've built the binary" - exit 1 -fi - -if [ $(echo ${latency_variation} | awk '{printf "%1.2f", $0}') != "0.00" ]; then - # As long as there's some jitter, then we use the paretonormal distribution. - # This will preserve the minimum RTT, but add a realistic amount of jitter to - # the connection and cause re-ordering, etc. The regular pareto distribution - # appears to an unreasonable level of delay (we want only small spikes.) - distribution="distribution paretonormal" -else - distribution="" -fi - -# Client proxy that will listen on the client's iperf target forward traffic -# using the host networking stack. -client_args="${proxy_binary} -port ${proxy_port} -forward ${server_proxy_addr}:${proxy_port}" -if ${client}; then - # Client proxy that will listen on the client's iperf target - # and forward traffic using netstack. - client_args="${proxy_binary} ${netstack_opts} -port ${proxy_port} -client \\ - -mtu ${mtu} -iface client.0 -addr ${client_proxy_addr} -mask ${mask} \\ - -forward ${server_proxy_addr}:${proxy_port} -gso=${gso} -swgso=${swgso}" -fi - -# Server proxy that will listen on the proxy port and forward to the server's -# iperf server using the host networking stack. -server_args="${proxy_binary} -port ${proxy_port} -forward ${server_addr}:${iperf_port}" -if ${server}; then - # Server proxy that will listen on the proxy port and forward to the servers' - # iperf server using netstack. - server_args="${proxy_binary} ${netstack_opts} -port ${proxy_port} -server \\ - -mtu ${mtu} -iface server.0 -addr ${server_proxy_addr} -mask ${mask} \\ - -forward ${server_addr}:${iperf_port} -gso=${gso} -swgso=${swgso}" -fi - -# Specify loss and duplicate parameters only if they are non-zero -loss_opt="" -if [ "$(echo $half_loss | bc -q)" != "0" ]; then - loss_opt="loss random ${half_loss}%" -fi -duplicate_opt="" -if [ "$(echo $half_duplicate | bc -q)" != "0" ]; then - duplicate_opt="duplicate ${half_duplicate}%" -fi - -exec unshare -U -m -n -r -f -p --mount-proc /bin/bash << EOF -set -e -m - -if [ ${verbose} == "true" ]; then - set -x -fi - -mount -t tmpfs netstack-bench /tmp - -# We may have reset the path in the unshare if the shell loaded some public -# profiles. Ensure that tools are discoverable via the parent's PATH. -export PATH=${PATH} - -# Add client, server interfaces. -ip link add client.0 type veth peer name client.1 -ip link add server.0 type veth peer name server.1 - -# Add network emulation devices. -ip link add wan.0 type veth peer name wan.1 -ip link set wan.0 up -ip link set wan.1 up - -# Enroll on the bridge. -ip link add name br0 type bridge -ip link add name br1 type bridge -ip link set client.1 master br0 -ip link set server.1 master br1 -ip link set wan.0 master br0 -ip link set wan.1 master br1 -ip link set br0 up -ip link set br1 up - -# Set the MTU appropriately. -ip link set client.0 mtu ${mtu} -ip link set server.0 mtu ${mtu} -ip link set wan.0 mtu ${mtu} -ip link set wan.1 mtu ${mtu} - -# Add appropriate latency, loss and duplication. -# -# This is added in at the point of bridge connection. -for device in wan.0 wan.1; do - # NOTE: We don't support a loss correlation as testing has shown that it - # actually doesn't work. The man page actually has a small comment about this - # "It is also possible to add a correlation, but this option is now deprecated - # due to the noticed bad behavior." For more information see netem(8). - tc qdisc add dev \$device root netem \\ - delay ${half_latency}ms ${latency_variation}ms ${distribution} \\ - ${loss_opt} ${duplicate_opt} -done - -# Start a client proxy. -touch /tmp/client.netns -unshare -n mount --bind /proc/self/ns/net /tmp/client.netns - -# Move the endpoint into the namespace. -while ip link | grep client.0 > /dev/null; do - ip link set dev client.0 netns /tmp/client.netns -done - -if ! ${client}; then - # Only add the address to NIC if netstack is not in use. Otherwise the host - # will also process the inbound SYN and send a RST back. - ${nsjoin_binary} /tmp/client.netns ip addr add ${client_proxy_addr}/${mask} dev client.0 -fi - -# Start a server proxy. -touch /tmp/server.netns -unshare -n mount --bind /proc/self/ns/net /tmp/server.netns -# Move the endpoint into the namespace. -while ip link | grep server.0 > /dev/null; do - ip link set dev server.0 netns /tmp/server.netns -done -if ! ${server}; then - # Only add the address to NIC if netstack is not in use. Otherwise the host - # will also process the inbound SYN and send a RST back. - ${nsjoin_binary} /tmp/server.netns ip addr add ${server_proxy_addr}/${mask} dev server.0 -fi - -# Add client and server addresses, and bring everything up. -${nsjoin_binary} /tmp/client.netns ip addr add ${client_addr}/${mask} dev client.0 -${nsjoin_binary} /tmp/server.netns ip addr add ${server_addr}/${mask} dev server.0 -if [ "${disable_linux_gso}" == "1" ]; then - ${nsjoin_binary} /tmp/client.netns ethtool -K client.0 tso off - ${nsjoin_binary} /tmp/client.netns ethtool -K client.0 gro off - ${nsjoin_binary} /tmp/client.netns ethtool -K client.0 gso off - ${nsjoin_binary} /tmp/server.netns ethtool -K server.0 tso off - ${nsjoin_binary} /tmp/server.netns ethtool -K server.0 gso off - ${nsjoin_binary} /tmp/server.netns ethtool -K server.0 gro off -fi -${nsjoin_binary} /tmp/client.netns ip link set client.0 up -${nsjoin_binary} /tmp/client.netns ip link set lo up -${nsjoin_binary} /tmp/server.netns ip link set server.0 up -${nsjoin_binary} /tmp/server.netns ip link set lo up -ip link set dev client.1 up -ip link set dev server.1 up - -${nsjoin_binary} /tmp/client.netns ${client_args} & -client_pid=\$! -${nsjoin_binary} /tmp/server.netns ${server_args} & -server_pid=\$! - -# Start the iperf server. -${nsjoin_binary} /tmp/server.netns iperf -p ${iperf_port} -s >&2 & -iperf_pid=\$! - -# Show traffic information. -if ! ${client} && ! ${server}; then - ${nsjoin_binary} /tmp/client.netns ping -c 100 -i 0.001 -W 1 ${server_addr} >&2 || true -fi - -results_file=\$(mktemp) -function cleanup { - rm -f \$results_file - kill -TERM \$client_pid - kill -TERM \$server_pid - wait \$client_pid - wait \$server_pid - kill -9 \$iperf_pid 2>/dev/null -} - -# Allow failure from this point. -set +e -trap cleanup EXIT - -# Run the benchmark, recording the results file. -while ${nsjoin_binary} /tmp/client.netns iperf \\ - -p ${proxy_port} -c ${client_addr} -t ${duration} -f m -P ${num_client_threads} 2>&1 \\ - | tee \$results_file \\ - | grep "connect failed" >/dev/null; do - sleep 0.1 # Wait for all services. -done - -# Unlink all relevant devices from the bridge. This is because when the bridge -# is deleted, the kernel may hang. It appears that this problem is fixed in -# upstream commit 1ce5cce895309862d2c35d922816adebe094fe4a. -ip link set client.1 nomaster -ip link set server.1 nomaster -ip link set wan.0 nomaster -ip link set wan.1 nomaster - -# Emit raw results. -cat \$results_file >&2 - -# Emit a useful result (final throughput). -mbits=\$(grep Mbits/sec \$results_file \\ - | sed -n -e 's/^.*[[:space:]]\\([[:digit:]]\\+\\(\\.[[:digit:]]\\+\\)\\?\\)[[:space:]]*Mbits\\/sec.*/\\1/p') -client_cpu_ticks=\$(cat /proc/\$client_pid/stat \\ - | awk '{print (\$14+\$15);}') -server_cpu_ticks=\$(cat /proc/\$server_pid/stat \\ - | awk '{print (\$14+\$15);}') -ticks_per_sec=\$(getconf CLK_TCK) -client_cpu_load=\$(bc -l <<< \$client_cpu_ticks/\$ticks_per_sec/${duration}) -server_cpu_load=\$(bc -l <<< \$server_cpu_ticks/\$ticks_per_sec/${duration}) -echo \$mbits \$client_cpu_load \$server_cpu_load -EOF diff --git a/benchmarks/tcp/tcp_proxy.go b/benchmarks/tcp/tcp_proxy.go deleted file mode 100644 index 4b7ca7a14..000000000 --- a/benchmarks/tcp/tcp_proxy.go +++ /dev/null @@ -1,451 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Binary tcp_proxy is a simple TCP proxy. -package main - -import ( - "encoding/gob" - "flag" - "fmt" - "io" - "log" - "math/rand" - "net" - "os" - "os/signal" - "regexp" - "runtime" - "runtime/pprof" - "strconv" - "syscall" - "time" - - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/tcpip" - "gvisor.dev/gvisor/pkg/tcpip/adapters/gonet" - "gvisor.dev/gvisor/pkg/tcpip/link/fdbased" - "gvisor.dev/gvisor/pkg/tcpip/link/qdisc/fifo" - "gvisor.dev/gvisor/pkg/tcpip/network/arp" - "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" - "gvisor.dev/gvisor/pkg/tcpip/stack" - "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" - "gvisor.dev/gvisor/pkg/tcpip/transport/udp" -) - -var ( - port = flag.Int("port", 0, "bind port (all addresses)") - forward = flag.String("forward", "", "forwarding target") - client = flag.Bool("client", false, "use netstack for listen") - server = flag.Bool("server", false, "use netstack for dial") - - // Netstack-specific options. - mtu = flag.Int("mtu", 1280, "mtu for network stack") - addr = flag.String("addr", "", "address for tap-based netstack") - mask = flag.Int("mask", 8, "mask size for address") - iface = flag.String("iface", "", "network interface name to bind for netstack") - sack = flag.Bool("sack", false, "enable SACK support for netstack") - moderateRecvBuf = flag.Bool("moderate_recv_buf", false, "enable TCP Receive Buffer Auto-tuning") - cubic = flag.Bool("cubic", false, "enable use of CUBIC congestion control for netstack") - gso = flag.Int("gso", 0, "GSO maximum size") - swgso = flag.Bool("swgso", false, "software-level GSO") - clientTCPProbeFile = flag.String("client_tcp_probe_file", "", "if specified, installs a tcp probe to dump endpoint state to the specified file.") - serverTCPProbeFile = flag.String("server_tcp_probe_file", "", "if specified, installs a tcp probe to dump endpoint state to the specified file.") - cpuprofile = flag.String("cpuprofile", "", "write cpu profile to the specified file.") - memprofile = flag.String("memprofile", "", "write memory profile to the specified file.") -) - -type impl interface { - dial(address string) (net.Conn, error) - listen(port int) (net.Listener, error) - printStats() -} - -type netImpl struct{} - -func (netImpl) dial(address string) (net.Conn, error) { - return net.Dial("tcp", address) -} - -func (netImpl) listen(port int) (net.Listener, error) { - return net.Listen("tcp", fmt.Sprintf(":%d", port)) -} - -func (netImpl) printStats() { -} - -const ( - nicID = 1 // Fixed. - bufSize = 4 << 20 // 4MB. -) - -type netstackImpl struct { - s *stack.Stack - addr tcpip.Address - mode string -} - -func setupNetwork(ifaceName string, numChannels int) (fds []int, err error) { - // Get all interfaces in the namespace. - ifaces, err := net.Interfaces() - if err != nil { - return nil, fmt.Errorf("querying interfaces: %v", err) - } - - for _, iface := range ifaces { - if iface.Name != ifaceName { - continue - } - // Create the socket. - const protocol = 0x0300 // htons(ETH_P_ALL) - fds := make([]int, numChannels) - for i := range fds { - fd, err := syscall.Socket(syscall.AF_PACKET, syscall.SOCK_RAW, protocol) - if err != nil { - return nil, fmt.Errorf("unable to create raw socket: %v", err) - } - - // Bind to the appropriate device. - ll := syscall.SockaddrLinklayer{ - Protocol: protocol, - Ifindex: iface.Index, - Pkttype: syscall.PACKET_HOST, - } - if err := syscall.Bind(fd, &ll); err != nil { - return nil, fmt.Errorf("unable to bind to %q: %v", iface.Name, err) - } - - // RAW Sockets by default have a very small SO_RCVBUF of 256KB, - // up it to at least 4MB to reduce packet drops. - if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUF, bufSize); err != nil { - return nil, fmt.Errorf("setsockopt(..., SO_RCVBUF, %v,..) = %v", bufSize, err) - } - - if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_SNDBUF, bufSize); err != nil { - return nil, fmt.Errorf("setsockopt(..., SO_SNDBUF, %v,..) = %v", bufSize, err) - } - - if !*swgso && *gso != 0 { - if err := syscall.SetsockoptInt(fd, syscall.SOL_PACKET, unix.PACKET_VNET_HDR, 1); err != nil { - return nil, fmt.Errorf("unable to enable the PACKET_VNET_HDR option: %v", err) - } - } - fds[i] = fd - } - return fds, nil - } - return nil, fmt.Errorf("failed to find interface: %v", ifaceName) -} - -func newNetstackImpl(mode string) (impl, error) { - fds, err := setupNetwork(*iface, runtime.GOMAXPROCS(-1)) - if err != nil { - return nil, err - } - - // Parse details. - parsedAddr := tcpip.Address(net.ParseIP(*addr).To4()) - parsedDest := tcpip.Address("") // Filled in below. - parsedMask := tcpip.AddressMask("") // Filled in below. - switch *mask { - case 8: - parsedDest = tcpip.Address([]byte{parsedAddr[0], 0, 0, 0}) - parsedMask = tcpip.AddressMask([]byte{0xff, 0, 0, 0}) - case 16: - parsedDest = tcpip.Address([]byte{parsedAddr[0], parsedAddr[1], 0, 0}) - parsedMask = tcpip.AddressMask([]byte{0xff, 0xff, 0, 0}) - case 24: - parsedDest = tcpip.Address([]byte{parsedAddr[0], parsedAddr[1], parsedAddr[2], 0}) - parsedMask = tcpip.AddressMask([]byte{0xff, 0xff, 0xff, 0}) - default: - // This is just laziness; we don't expect a different mask. - return nil, fmt.Errorf("mask %d not supported", mask) - } - - // Create a new network stack. - netProtos := []stack.NetworkProtocol{ipv4.NewProtocol(), arp.NewProtocol()} - transProtos := []stack.TransportProtocol{tcp.NewProtocol(), udp.NewProtocol()} - s := stack.New(stack.Options{ - NetworkProtocols: netProtos, - TransportProtocols: transProtos, - }) - - // Generate a new mac for the eth device. - mac := make(net.HardwareAddr, 6) - rand.Read(mac) // Fill with random data. - mac[0] &^= 0x1 // Clear multicast bit. - mac[0] |= 0x2 // Set local assignment bit (IEEE802). - ep, err := fdbased.New(&fdbased.Options{ - FDs: fds, - MTU: uint32(*mtu), - EthernetHeader: true, - Address: tcpip.LinkAddress(mac), - // Enable checksum generation as we need to generate valid - // checksums for the veth device to deliver our packets to the - // peer. But we do want to disable checksum verification as veth - // devices do perform GRO and the linux host kernel may not - // regenerate valid checksums after GRO. - TXChecksumOffload: false, - RXChecksumOffload: true, - PacketDispatchMode: fdbased.RecvMMsg, - GSOMaxSize: uint32(*gso), - SoftwareGSOEnabled: *swgso, - }) - if err != nil { - return nil, fmt.Errorf("failed to create FD endpoint: %v", err) - } - if err := s.CreateNIC(nicID, fifo.New(ep, runtime.GOMAXPROCS(0), 1000)); err != nil { - return nil, fmt.Errorf("error creating NIC %q: %v", *iface, err) - } - if err := s.AddAddress(nicID, arp.ProtocolNumber, arp.ProtocolAddress); err != nil { - return nil, fmt.Errorf("error adding ARP address to %q: %v", *iface, err) - } - if err := s.AddAddress(nicID, ipv4.ProtocolNumber, parsedAddr); err != nil { - return nil, fmt.Errorf("error adding IP address to %q: %v", *iface, err) - } - - subnet, err := tcpip.NewSubnet(parsedDest, parsedMask) - if err != nil { - return nil, fmt.Errorf("tcpip.Subnet(%s, %s): %s", parsedDest, parsedMask, err) - } - // Add default route; we only support - s.SetRouteTable([]tcpip.Route{ - { - Destination: subnet, - NIC: nicID, - }, - }) - - // Set protocol options. - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(*sack)); err != nil { - return nil, fmt.Errorf("SetTransportProtocolOption for SACKEnabled failed: %s", err) - } - - // Enable Receive Buffer Auto-Tuning. - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(*moderateRecvBuf)); err != nil { - return nil, fmt.Errorf("SetTransportProtocolOption failed: %s", err) - } - - // Set Congestion Control to cubic if requested. - if *cubic { - if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.CongestionControlOption("cubic")); err != nil { - return nil, fmt.Errorf("SetTransportProtocolOption for CongestionControlOption(cubic) failed: %s", err) - } - } - - return netstackImpl{ - s: s, - addr: parsedAddr, - mode: mode, - }, nil -} - -func (n netstackImpl) dial(address string) (net.Conn, error) { - host, port, err := net.SplitHostPort(address) - if err != nil { - return nil, err - } - if host == "" { - // A host must be provided for the dial. - return nil, fmt.Errorf("no host provided") - } - portNumber, err := strconv.Atoi(port) - if err != nil { - return nil, err - } - addr := tcpip.FullAddress{ - NIC: nicID, - Addr: tcpip.Address(net.ParseIP(host).To4()), - Port: uint16(portNumber), - } - conn, err := gonet.DialTCP(n.s, addr, ipv4.ProtocolNumber) - if err != nil { - return nil, err - } - return conn, nil -} - -func (n netstackImpl) listen(port int) (net.Listener, error) { - addr := tcpip.FullAddress{ - NIC: nicID, - Port: uint16(port), - } - listener, err := gonet.ListenTCP(n.s, addr, ipv4.ProtocolNumber) - if err != nil { - return nil, err - } - return listener, nil -} - -var zeroFieldsRegexp = regexp.MustCompile(`\s*[a-zA-Z0-9]*:0`) - -func (n netstackImpl) printStats() { - // Don't show zero fields. - stats := zeroFieldsRegexp.ReplaceAllString(fmt.Sprintf("%+v", n.s.Stats()), "") - log.Printf("netstack %s Stats: %+v\n", n.mode, stats) -} - -// installProbe installs a TCP Probe function that will dump endpoint -// state to the specified file. It also returns a close func() that -// can be used to close the probeFile. -func (n netstackImpl) installProbe(probeFileName string) (close func()) { - // Install Probe to dump out end point state. - probeFile, err := os.Create(probeFileName) - if err != nil { - log.Fatalf("failed to create tcp_probe file %s: %v", probeFileName, err) - } - probeEncoder := gob.NewEncoder(probeFile) - // Install a TCP Probe. - n.s.AddTCPProbe(func(state stack.TCPEndpointState) { - probeEncoder.Encode(state) - }) - return func() { probeFile.Close() } -} - -func main() { - flag.Parse() - if *port == 0 { - log.Fatalf("no port provided") - } - if *forward == "" { - log.Fatalf("no forward provided") - } - // Seed the random number generator to ensure that we are given MAC addresses that don't - // for the case of the client and server stack. - rand.Seed(time.Now().UTC().UnixNano()) - - if *cpuprofile != "" { - f, err := os.Create(*cpuprofile) - if err != nil { - log.Fatal("could not create CPU profile: ", err) - } - defer func() { - if err := f.Close(); err != nil { - log.Print("error closing CPU profile: ", err) - } - }() - if err := pprof.StartCPUProfile(f); err != nil { - log.Fatal("could not start CPU profile: ", err) - } - defer pprof.StopCPUProfile() - } - - var ( - in impl - out impl - err error - ) - if *server { - in, err = newNetstackImpl("server") - if *serverTCPProbeFile != "" { - defer in.(netstackImpl).installProbe(*serverTCPProbeFile)() - } - - } else { - in = netImpl{} - } - if err != nil { - log.Fatalf("netstack error: %v", err) - } - if *client { - out, err = newNetstackImpl("client") - if *clientTCPProbeFile != "" { - defer out.(netstackImpl).installProbe(*clientTCPProbeFile)() - } - } else { - out = netImpl{} - } - if err != nil { - log.Fatalf("netstack error: %v", err) - } - - // Dial forward before binding. - var next net.Conn - for { - next, err = out.dial(*forward) - if err == nil { - break - } - time.Sleep(50 * time.Millisecond) - log.Printf("connect failed retrying: %v", err) - } - - // Bind once to the server socket. - listener, err := in.listen(*port) - if err != nil { - // Should not happen, everything must be bound by this time - // this proxy is started. - log.Fatalf("unable to listen: %v", err) - } - log.Printf("client=%v, server=%v, ready.", *client, *server) - - sigs := make(chan os.Signal, 1) - signal.Notify(sigs, syscall.SIGTERM) - go func() { - <-sigs - if *cpuprofile != "" { - pprof.StopCPUProfile() - } - if *memprofile != "" { - f, err := os.Create(*memprofile) - if err != nil { - log.Fatal("could not create memory profile: ", err) - } - defer func() { - if err := f.Close(); err != nil { - log.Print("error closing memory profile: ", err) - } - }() - runtime.GC() // get up-to-date statistics - if err := pprof.WriteHeapProfile(f); err != nil { - log.Fatalf("Unable to write heap profile: %v", err) - } - } - os.Exit(0) - }() - - for { - // Forward all connections. - inConn, err := listener.Accept() - if err != nil { - // This should not happen; we are listening - // successfully. Exhausted all available FDs? - log.Fatalf("accept error: %v", err) - } - log.Printf("incoming connection established.") - - // Copy both ways. - go io.Copy(inConn, next) - go io.Copy(next, inConn) - - // Print stats every second. - go func() { - t := time.NewTicker(time.Second) - defer t.Stop() - for { - <-t.C - in.printStats() - out.printStats() - } - }() - - for { - // Dial again. - next, err = out.dial(*forward) - if err == nil { - break - } - } - } -} diff --git a/benchmarks/workloads/BUILD b/benchmarks/workloads/BUILD deleted file mode 100644 index ccb86af5b..000000000 --- a/benchmarks/workloads/BUILD +++ /dev/null @@ -1,35 +0,0 @@ -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -py_library( - name = "workloads", - srcs = ["__init__.py"], -) - -filegroup( - name = "files", - srcs = [ - "//benchmarks/workloads/ab:tar", - "//benchmarks/workloads/absl:tar", - "//benchmarks/workloads/curl:tar", - "//benchmarks/workloads/ffmpeg:tar", - "//benchmarks/workloads/fio:tar", - "//benchmarks/workloads/httpd:tar", - "//benchmarks/workloads/iperf:tar", - "//benchmarks/workloads/netcat:tar", - "//benchmarks/workloads/nginx:tar", - "//benchmarks/workloads/node:tar", - "//benchmarks/workloads/node_template:tar", - "//benchmarks/workloads/redis:tar", - "//benchmarks/workloads/redisbenchmark:tar", - "//benchmarks/workloads/ruby:tar", - "//benchmarks/workloads/ruby_template:tar", - "//benchmarks/workloads/sleep:tar", - "//benchmarks/workloads/sysbench:tar", - "//benchmarks/workloads/syscall:tar", - "//benchmarks/workloads/tensorflow:tar", - "//benchmarks/workloads/true:tar", - ], -) diff --git a/benchmarks/workloads/__init__.py b/benchmarks/workloads/__init__.py deleted file mode 100644 index e12651e76..000000000 --- a/benchmarks/workloads/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Workloads, parsers and test data.""" diff --git a/benchmarks/workloads/ab/BUILD b/benchmarks/workloads/ab/BUILD deleted file mode 100644 index 945ac7026..000000000 --- a/benchmarks/workloads/ab/BUILD +++ /dev/null @@ -1,28 +0,0 @@ -load("//tools:defs.bzl", "pkg_tar", "py_library", "py_test") -load("//benchmarks:defs.bzl", "test_deps") - -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -py_library( - name = "ab", - srcs = ["__init__.py"], -) - -py_test( - name = "ab_test", - srcs = ["ab_test.py"], - python_version = "PY3", - deps = test_deps + [ - ":ab", - ], -) - -pkg_tar( - name = "tar", - srcs = [ - "Dockerfile", - ], -) diff --git a/benchmarks/workloads/ab/Dockerfile b/benchmarks/workloads/ab/Dockerfile deleted file mode 100644 index 0d0b6e2eb..000000000 --- a/benchmarks/workloads/ab/Dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -FROM ubuntu:18.04 - -RUN set -x \ - && apt-get update \ - && apt-get install -y \ - apache2-utils \ - && rm -rf /var/lib/apt/lists/* - -# Parameterized workload. -ENV requests 5000 -ENV connections 10 -ENV host localhost -ENV port 8080 -ENV path notfound -CMD ["sh", "-c", "ab -n ${requests} -c ${connections} http://${host}:${port}/${path}"] diff --git a/benchmarks/workloads/ab/__init__.py b/benchmarks/workloads/ab/__init__.py deleted file mode 100644 index eedf8e083..000000000 --- a/benchmarks/workloads/ab/__init__.py +++ /dev/null @@ -1,88 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Apachebench tool.""" - -import re - -SAMPLE_DATA = """This is ApacheBench, Version 2.3 <$Revision: 1826891 $> -Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/ -Licensed to The Apache Software Foundation, http://www.apache.org/ - -Benchmarking 10.10.10.10 (be patient).....done - - -Server Software: Apache/2.4.38 -Server Hostname: 10.10.10.10 -Server Port: 80 - -Document Path: /latin10k.txt -Document Length: 210 bytes - -Concurrency Level: 1 -Time taken for tests: 0.180 seconds -Complete requests: 100 -Failed requests: 0 -Non-2xx responses: 100 -Total transferred: 38800 bytes -HTML transferred: 21000 bytes -Requests per second: 556.44 [#/sec] (mean) -Time per request: 1.797 [ms] (mean) -Time per request: 1.797 [ms] (mean, across all concurrent requests) -Transfer rate: 210.84 [Kbytes/sec] received - -Connection Times (ms) - min mean[+/-sd] median max -Connect: 0 0 0.2 0 2 -Processing: 1 2 1.0 1 8 -Waiting: 1 1 1.0 1 7 -Total: 1 2 1.2 1 10 - -Percentage of the requests served within a certain time (ms) - 50% 1 - 66% 2 - 75% 2 - 80% 2 - 90% 2 - 95% 3 - 98% 7 - 99% 10 - 100% 10 (longest request)""" - - -# pylint: disable=unused-argument -def sample(**kwargs) -> str: - return SAMPLE_DATA - - -# pylint: disable=unused-argument -def transfer_rate(data: str, **kwargs) -> float: - """Mean transfer rate in Kbytes/sec.""" - regex = r"Transfer rate:\s+(\d+\.?\d+?)\s+\[Kbytes/sec\]\s+received" - return float(re.compile(regex).search(data).group(1)) - - -# pylint: disable=unused-argument -def latency(data: str, **kwargs) -> float: - """Mean latency in milliseconds.""" - regex = r"Total:\s+\d+\s+(\d+)\s+(\d+\.?\d+?)\s+\d+\s+\d+\s" - res = re.compile(regex).search(data) - return float(res.group(1)) - - -# pylint: disable=unused-argument -def requests_per_second(data: str, **kwargs) -> float: - """Requests per second.""" - regex = r"Requests per second:\s+(\d+\.?\d+?)\s+" - res = re.compile(regex).search(data) - return float(res.group(1)) diff --git a/benchmarks/workloads/ab/ab_test.py b/benchmarks/workloads/ab/ab_test.py deleted file mode 100644 index 4afac2996..000000000 --- a/benchmarks/workloads/ab/ab_test.py +++ /dev/null @@ -1,42 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Parser test.""" - -import sys - -import pytest - -from benchmarks.workloads import ab - - -def test_transfer_rate_parser(): - """Test transfer rate parser.""" - res = ab.transfer_rate(ab.sample()) - assert res == 210.84 - - -def test_latency_parser(): - """Test latency parser.""" - res = ab.latency(ab.sample()) - assert res == 2 - - -def test_requests_per_second(): - """Test requests per second parser.""" - res = ab.requests_per_second(ab.sample()) - assert res == 556.44 - - -if __name__ == "__main__": - sys.exit(pytest.main([__file__])) diff --git a/benchmarks/workloads/absl/BUILD b/benchmarks/workloads/absl/BUILD deleted file mode 100644 index bb1a308bf..000000000 --- a/benchmarks/workloads/absl/BUILD +++ /dev/null @@ -1,28 +0,0 @@ -load("//tools:defs.bzl", "pkg_tar", "py_library", "py_test") -load("//benchmarks:defs.bzl", "test_deps") - -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -py_library( - name = "absl", - srcs = ["__init__.py"], -) - -py_test( - name = "absl_test", - srcs = ["absl_test.py"], - python_version = "PY3", - deps = test_deps + [ - ":absl", - ], -) - -pkg_tar( - name = "tar", - srcs = [ - "Dockerfile", - ], -) diff --git a/benchmarks/workloads/absl/Dockerfile b/benchmarks/workloads/absl/Dockerfile deleted file mode 100644 index f29cfa156..000000000 --- a/benchmarks/workloads/absl/Dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -FROM ubuntu:18.04 - -RUN set -x \ - && apt-get update \ - && apt-get install -y \ - wget \ - git \ - pkg-config \ - zip \ - g++ \ - zlib1g-dev \ - unzip \ - python3 \ - && rm -rf /var/lib/apt/lists/* -RUN wget https://github.com/bazelbuild/bazel/releases/download/0.27.0/bazel-0.27.0-installer-linux-x86_64.sh -RUN chmod +x bazel-0.27.0-installer-linux-x86_64.sh -RUN ./bazel-0.27.0-installer-linux-x86_64.sh - -RUN mkdir abseil-cpp && cd abseil-cpp \ - && git init && git remote add origin https://github.com/abseil/abseil-cpp.git \ - && git fetch --depth 1 origin 43ef2148c0936ebf7cb4be6b19927a9d9d145b8f && git checkout FETCH_HEAD -WORKDIR abseil-cpp -RUN bazel clean -ENV path "absl/base/..." -CMD bazel build ${path} 2>&1 diff --git a/benchmarks/workloads/absl/__init__.py b/benchmarks/workloads/absl/__init__.py deleted file mode 100644 index b40e3f915..000000000 --- a/benchmarks/workloads/absl/__init__.py +++ /dev/null @@ -1,63 +0,0 @@ -# python3 -# Copyright 2019 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""ABSL build benchmark.""" - -import re - -SAMPLE_BAZEL_OUTPUT = """Extracting Bazel installation... -Starting local Bazel server and connecting to it... -Loading: -Loading: 0 packages loaded -Loading: 0 packages loaded - currently loading: absl/algorithm ... (11 packages) -Analyzing: 241 targets (16 packages loaded, 0 targets configured) -Analyzing: 241 targets (21 packages loaded, 617 targets configured) -Analyzing: 241 targets (27 packages loaded, 687 targets configured) -Analyzing: 241 targets (32 packages loaded, 1105 targets configured) -Analyzing: 241 targets (32 packages loaded, 1294 targets configured) -Analyzing: 241 targets (35 packages loaded, 1575 targets configured) -Analyzing: 241 targets (35 packages loaded, 1575 targets configured) -Analyzing: 241 targets (36 packages loaded, 1603 targets configured) -Analyzing: 241 targets (36 packages loaded, 1603 targets configured) -INFO: Analyzed 241 targets (37 packages loaded, 1864 targets configured). -INFO: Found 241 targets... -[0 / 5] [Prepa] BazelWorkspaceStatusAction stable-status.txt -[16 / 50] [Analy] Compiling absl/base/dynamic_annotations.cc ... (20 actions, 10 running) -[60 / 77] Compiling external/com_google_googletest/googletest/src/gtest.cc; 5s processwrapper-sandbox ... (12 actions, 11 running) -[158 / 174] Compiling absl/container/internal/raw_hash_set_test.cc; 2s processwrapper-sandbox ... (12 actions, 11 running) -[278 / 302] Compiling absl/container/internal/raw_hash_set_test.cc; 6s processwrapper-sandbox ... (12 actions, 11 running) -[384 / 406] Compiling absl/container/internal/raw_hash_set_test.cc; 10s processwrapper-sandbox ... (12 actions, 11 running) -[581 / 604] Compiling absl/container/flat_hash_set_test.cc; 11s processwrapper-sandbox ... (12 actions, 11 running) -[722 / 745] Compiling absl/container/node_hash_set_test.cc; 9s processwrapper-sandbox ... (12 actions, 11 running) -[846 / 867] Compiling absl/hash/hash_test.cc; 11s processwrapper-sandbox ... (12 actions, 11 running) -INFO: From Compiling absl/debugging/symbolize_test.cc: -/tmp/cclCVipU.s: Assembler messages: -/tmp/cclCVipU.s:1662: Warning: ignoring changed section attributes for .text -[999 / 1,022] Compiling absl/hash/hash_test.cc; 19s processwrapper-sandbox ... (12 actions, 11 running) -[1,082 / 1,084] Compiling absl/container/flat_hash_map_test.cc; 7s processwrapper-sandbox -INFO: Elapsed time: 81.861s, Critical Path: 23.81s -INFO: 515 processes: 515 processwrapper-sandbox. -INFO: Build completed successfully, 1084 total actions -INFO: Build completed successfully, 1084 total actions""" - - -def sample(): - return SAMPLE_BAZEL_OUTPUT - - -# pylint: disable=unused-argument -def elapsed_time(data: str, **kwargs) -> float: - """Returns the elapsed time for running an absl build.""" - return float(re.compile(r"Elapsed time: (\d*.?\d*)s").search(data).group(1)) diff --git a/benchmarks/workloads/absl/absl_test.py b/benchmarks/workloads/absl/absl_test.py deleted file mode 100644 index 41f216999..000000000 --- a/benchmarks/workloads/absl/absl_test.py +++ /dev/null @@ -1,31 +0,0 @@ -# python3 -# Copyright 2019 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""ABSL build test.""" - -import sys - -import pytest - -from benchmarks.workloads import absl - - -def test_elapsed_time(): - """Test elapsed_time.""" - res = absl.elapsed_time(absl.sample()) - assert res == 81.861 - - -if __name__ == "__main__": - sys.exit(pytest.main([__file__])) diff --git a/benchmarks/workloads/curl/BUILD b/benchmarks/workloads/curl/BUILD deleted file mode 100644 index a70873065..000000000 --- a/benchmarks/workloads/curl/BUILD +++ /dev/null @@ -1,13 +0,0 @@ -load("//tools:defs.bzl", "pkg_tar") - -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -pkg_tar( - name = "tar", - srcs = [ - "Dockerfile", - ], -) diff --git a/benchmarks/workloads/curl/Dockerfile b/benchmarks/workloads/curl/Dockerfile deleted file mode 100644 index 336cb088a..000000000 --- a/benchmarks/workloads/curl/Dockerfile +++ /dev/null @@ -1,14 +0,0 @@ -FROM ubuntu:18.04 - -RUN set -x \ - && apt-get update \ - && apt-get install -y \ - curl \ - && rm -rf /var/lib/apt/lists/* - -# Accept a host and port parameter. -ENV host localhost -ENV port 8080 - -# Spin until we make a successful request. -CMD ["sh", "-c", "while ! curl -v -i http://$host:$port; do true; done"] diff --git a/benchmarks/workloads/ffmpeg/BUILD b/benchmarks/workloads/ffmpeg/BUILD deleted file mode 100644 index 7c41ba631..000000000 --- a/benchmarks/workloads/ffmpeg/BUILD +++ /dev/null @@ -1,18 +0,0 @@ -load("//tools:defs.bzl", "pkg_tar") - -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -py_library( - name = "ffmpeg", - srcs = ["__init__.py"], -) - -pkg_tar( - name = "tar", - srcs = [ - "Dockerfile", - ], -) diff --git a/benchmarks/workloads/ffmpeg/Dockerfile b/benchmarks/workloads/ffmpeg/Dockerfile deleted file mode 100644 index f2f530d7c..000000000 --- a/benchmarks/workloads/ffmpeg/Dockerfile +++ /dev/null @@ -1,10 +0,0 @@ -FROM ubuntu:18.04 - -RUN set -x \ - && apt-get update \ - && apt-get install -y \ - ffmpeg \ - && rm -rf /var/lib/apt/lists/* -WORKDIR /media -ADD https://samples.ffmpeg.org/MPEG-4/video.mp4 video.mp4 -CMD ["ffmpeg", "-i", "video.mp4", "-c:v", "libx264", "-preset", "veryslow", "output.mp4"] diff --git a/benchmarks/workloads/ffmpeg/__init__.py b/benchmarks/workloads/ffmpeg/__init__.py deleted file mode 100644 index 7578a443b..000000000 --- a/benchmarks/workloads/ffmpeg/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Simple ffmpeg workload.""" - - -# pylint: disable=unused-argument -def run_time(value, **kwargs): - """Returns the startup and runtime of the ffmpeg workload in seconds.""" - return value diff --git a/benchmarks/workloads/fio/BUILD b/benchmarks/workloads/fio/BUILD deleted file mode 100644 index 24d909c53..000000000 --- a/benchmarks/workloads/fio/BUILD +++ /dev/null @@ -1,28 +0,0 @@ -load("//tools:defs.bzl", "pkg_tar", "py_library", "py_test") -load("//benchmarks:defs.bzl", "test_deps") - -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -py_library( - name = "fio", - srcs = ["__init__.py"], -) - -py_test( - name = "fio_test", - srcs = ["fio_test.py"], - python_version = "PY3", - deps = test_deps + [ - ":fio", - ], -) - -pkg_tar( - name = "tar", - srcs = [ - "Dockerfile", - ], -) diff --git a/benchmarks/workloads/fio/Dockerfile b/benchmarks/workloads/fio/Dockerfile deleted file mode 100644 index b3cf864eb..000000000 --- a/benchmarks/workloads/fio/Dockerfile +++ /dev/null @@ -1,23 +0,0 @@ -FROM ubuntu:18.04 - -RUN set -x \ - && apt-get update \ - && apt-get install -y \ - fio \ - && rm -rf /var/lib/apt/lists/* - -# Parameterized test. -ENV test write -ENV ioengine sync -ENV size 5000000 -ENV iodepth 4 -ENV blocksize "1m" -ENV time "" -ENV path "/disk/file.dat" -ENV ramp_time 0 - -CMD ["sh", "-c", "fio --output-format=json --name=test --ramp_time=${ramp_time} --ioengine=${ioengine} --size=${size} \ ---filename=${path} --iodepth=${iodepth} --bs=${blocksize} --rw=${test} ${time}"] - - - diff --git a/benchmarks/workloads/fio/__init__.py b/benchmarks/workloads/fio/__init__.py deleted file mode 100644 index 52711e956..000000000 --- a/benchmarks/workloads/fio/__init__.py +++ /dev/null @@ -1,369 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""FIO benchmark tool.""" - -import json - -SAMPLE_DATA = """ -{ - "fio version" : "fio-3.1", - "timestamp" : 1554837456, - "timestamp_ms" : 1554837456621, - "time" : "Tue Apr 9 19:17:36 2019", - "jobs" : [ - { - "jobname" : "test", - "groupid" : 0, - "error" : 0, - "eta" : 2147483647, - "elapsed" : 1, - "job options" : { - "name" : "test", - "ioengine" : "sync", - "size" : "1073741824", - "filename" : "/disk/file.dat", - "iodepth" : "4", - "bs" : "4096", - "rw" : "write" - }, - "read" : { - "io_bytes" : 0, - "io_kbytes" : 0, - "bw" : 0, - "iops" : 0.000000, - "runtime" : 0, - "total_ios" : 0, - "short_ios" : 0, - "drop_ios" : 0, - "slat_ns" : { - "min" : 0, - "max" : 0, - "mean" : 0.000000, - "stddev" : 0.000000 - }, - "clat_ns" : { - "min" : 0, - "max" : 0, - "mean" : 0.000000, - "stddev" : 0.000000, - "percentile" : { - "1.000000" : 0, - "5.000000" : 0, - "10.000000" : 0, - "20.000000" : 0, - "30.000000" : 0, - "40.000000" : 0, - "50.000000" : 0, - "60.000000" : 0, - "70.000000" : 0, - "80.000000" : 0, - "90.000000" : 0, - "95.000000" : 0, - "99.000000" : 0, - "99.500000" : 0, - "99.900000" : 0, - "99.950000" : 0, - "99.990000" : 0, - "0.00" : 0, - "0.00" : 0, - "0.00" : 0 - } - }, - "lat_ns" : { - "min" : 0, - "max" : 0, - "mean" : 0.000000, - "stddev" : 0.000000 - }, - "bw_min" : 0, - "bw_max" : 0, - "bw_agg" : 0.000000, - "bw_mean" : 0.000000, - "bw_dev" : 0.000000, - "bw_samples" : 0, - "iops_min" : 0, - "iops_max" : 0, - "iops_mean" : 0.000000, - "iops_stddev" : 0.000000, - "iops_samples" : 0 - }, - "write" : { - "io_bytes" : 1073741824, - "io_kbytes" : 1048576, - "bw" : 1753471, - "iops" : 438367.892977, - "runtime" : 598, - "total_ios" : 262144, - "short_ios" : 0, - "drop_ios" : 0, - "slat_ns" : { - "min" : 0, - "max" : 0, - "mean" : 0.000000, - "stddev" : 0.000000 - }, - "clat_ns" : { - "min" : 1693, - "max" : 754733, - "mean" : 2076.404373, - "stddev" : 1724.195529, - "percentile" : { - "1.000000" : 1736, - "5.000000" : 1752, - "10.000000" : 1768, - "20.000000" : 1784, - "30.000000" : 1800, - "40.000000" : 1800, - "50.000000" : 1816, - "60.000000" : 1816, - "70.000000" : 1848, - "80.000000" : 1928, - "90.000000" : 2512, - "95.000000" : 2992, - "99.000000" : 6176, - "99.500000" : 6304, - "99.900000" : 11328, - "99.950000" : 15168, - "99.990000" : 17792, - "0.00" : 0, - "0.00" : 0, - "0.00" : 0 - } - }, - "lat_ns" : { - "min" : 1731, - "max" : 754770, - "mean" : 2117.878979, - "stddev" : 1730.290512 - }, - "bw_min" : 1731120, - "bw_max" : 1731120, - "bw_agg" : 98.725328, - "bw_mean" : 1731120.000000, - "bw_dev" : 0.000000, - "bw_samples" : 1, - "iops_min" : 432780, - "iops_max" : 432780, - "iops_mean" : 432780.000000, - "iops_stddev" : 0.000000, - "iops_samples" : 1 - }, - "trim" : { - "io_bytes" : 0, - "io_kbytes" : 0, - "bw" : 0, - "iops" : 0.000000, - "runtime" : 0, - "total_ios" : 0, - "short_ios" : 0, - "drop_ios" : 0, - "slat_ns" : { - "min" : 0, - "max" : 0, - "mean" : 0.000000, - "stddev" : 0.000000 - }, - "clat_ns" : { - "min" : 0, - "max" : 0, - "mean" : 0.000000, - "stddev" : 0.000000, - "percentile" : { - "1.000000" : 0, - "5.000000" : 0, - "10.000000" : 0, - "20.000000" : 0, - "30.000000" : 0, - "40.000000" : 0, - "50.000000" : 0, - "60.000000" : 0, - "70.000000" : 0, - "80.000000" : 0, - "90.000000" : 0, - "95.000000" : 0, - "99.000000" : 0, - "99.500000" : 0, - "99.900000" : 0, - "99.950000" : 0, - "99.990000" : 0, - "0.00" : 0, - "0.00" : 0, - "0.00" : 0 - } - }, - "lat_ns" : { - "min" : 0, - "max" : 0, - "mean" : 0.000000, - "stddev" : 0.000000 - }, - "bw_min" : 0, - "bw_max" : 0, - "bw_agg" : 0.000000, - "bw_mean" : 0.000000, - "bw_dev" : 0.000000, - "bw_samples" : 0, - "iops_min" : 0, - "iops_max" : 0, - "iops_mean" : 0.000000, - "iops_stddev" : 0.000000, - "iops_samples" : 0 - }, - "usr_cpu" : 17.922948, - "sys_cpu" : 81.574539, - "ctx" : 3, - "majf" : 0, - "minf" : 10, - "iodepth_level" : { - "1" : 100.000000, - "2" : 0.000000, - "4" : 0.000000, - "8" : 0.000000, - "16" : 0.000000, - "32" : 0.000000, - ">=64" : 0.000000 - }, - "latency_ns" : { - "2" : 0.000000, - "4" : 0.000000, - "10" : 0.000000, - "20" : 0.000000, - "50" : 0.000000, - "100" : 0.000000, - "250" : 0.000000, - "500" : 0.000000, - "750" : 0.000000, - "1000" : 0.000000 - }, - "latency_us" : { - "2" : 82.737350, - "4" : 12.605286, - "10" : 4.543686, - "20" : 0.107956, - "50" : 0.010000, - "100" : 0.000000, - "250" : 0.000000, - "500" : 0.000000, - "750" : 0.000000, - "1000" : 0.010000 - }, - "latency_ms" : { - "2" : 0.000000, - "4" : 0.000000, - "10" : 0.000000, - "20" : 0.000000, - "50" : 0.000000, - "100" : 0.000000, - "250" : 0.000000, - "500" : 0.000000, - "750" : 0.000000, - "1000" : 0.000000, - "2000" : 0.000000, - ">=2000" : 0.000000 - }, - "latency_depth" : 4, - "latency_target" : 0, - "latency_percentile" : 100.000000, - "latency_window" : 0 - } - ], - "disk_util" : [ - { - "name" : "dm-1", - "read_ios" : 0, - "write_ios" : 3, - "read_merges" : 0, - "write_merges" : 0, - "read_ticks" : 0, - "write_ticks" : 0, - "in_queue" : 0, - "util" : 0.000000, - "aggr_read_ios" : 0, - "aggr_write_ios" : 3, - "aggr_read_merges" : 0, - "aggr_write_merge" : 0, - "aggr_read_ticks" : 0, - "aggr_write_ticks" : 0, - "aggr_in_queue" : 0, - "aggr_util" : 0.000000 - }, - { - "name" : "dm-0", - "read_ios" : 0, - "write_ios" : 3, - "read_merges" : 0, - "write_merges" : 0, - "read_ticks" : 0, - "write_ticks" : 0, - "in_queue" : 0, - "util" : 0.000000, - "aggr_read_ios" : 0, - "aggr_write_ios" : 3, - "aggr_read_merges" : 0, - "aggr_write_merge" : 0, - "aggr_read_ticks" : 0, - "aggr_write_ticks" : 2, - "aggr_in_queue" : 0, - "aggr_util" : 0.000000 - }, - { - "name" : "nvme0n1", - "read_ios" : 0, - "write_ios" : 3, - "read_merges" : 0, - "write_merges" : 0, - "read_ticks" : 0, - "write_ticks" : 2, - "in_queue" : 0, - "util" : 0.000000 - } - ] -} -""" - - -# pylint: disable=unused-argument -def sample(**kwargs) -> str: - return SAMPLE_DATA - - -# pylint: disable=unused-argument -def read_bandwidth(data: str, **kwargs) -> int: - """File I/O bandwidth.""" - return json.loads(data)["jobs"][0]["read"]["bw"] * 1024 - - -# pylint: disable=unused-argument -def write_bandwidth(data: str, **kwargs) -> int: - """File I/O bandwidth.""" - return json.loads(data)["jobs"][0]["write"]["bw"] * 1024 - - -# pylint: disable=unused-argument -def read_io_ops(data: str, **kwargs) -> float: - """File I/O operations per second.""" - return float(json.loads(data)["jobs"][0]["read"]["iops"]) - - -# pylint: disable=unused-argument -def write_io_ops(data: str, **kwargs) -> float: - """File I/O operations per second.""" - return float(json.loads(data)["jobs"][0]["write"]["iops"]) - - -# Change function names so we just print "bandwidth" and "io_ops". -read_bandwidth.__name__ = "bandwidth" -write_bandwidth.__name__ = "bandwidth" -read_io_ops.__name__ = "io_ops" -write_io_ops.__name__ = "io_ops" diff --git a/benchmarks/workloads/fio/fio_test.py b/benchmarks/workloads/fio/fio_test.py deleted file mode 100644 index 04a6eeb7e..000000000 --- a/benchmarks/workloads/fio/fio_test.py +++ /dev/null @@ -1,44 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Parser tests.""" - -import sys - -import pytest - -from benchmarks.workloads import fio - - -def test_read_io_ops(): - """Test read ops parser.""" - assert fio.read_io_ops(fio.sample()) == 0.0 - - -def test_write_io_ops(): - """Test write ops parser.""" - assert fio.write_io_ops(fio.sample()) == 438367.892977 - - -def test_read_bandwidth(): - """Test read bandwidth parser.""" - assert fio.read_bandwidth(fio.sample()) == 0.0 - - -def test_write_bandwith(): - """Test write bandwidth parser.""" - assert fio.write_bandwidth(fio.sample()) == 1753471 * 1024 - - -if __name__ == "__main__": - sys.exit(pytest.main([__file__])) diff --git a/benchmarks/workloads/httpd/BUILD b/benchmarks/workloads/httpd/BUILD deleted file mode 100644 index 83450d190..000000000 --- a/benchmarks/workloads/httpd/BUILD +++ /dev/null @@ -1,14 +0,0 @@ -load("//tools:defs.bzl", "pkg_tar") - -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -pkg_tar( - name = "tar", - srcs = [ - "Dockerfile", - "apache2-tmpdir.conf", - ], -) diff --git a/benchmarks/workloads/httpd/Dockerfile b/benchmarks/workloads/httpd/Dockerfile deleted file mode 100644 index 52a550678..000000000 --- a/benchmarks/workloads/httpd/Dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -FROM ubuntu:18.04 - -RUN set -x \ - && apt-get update \ - && apt-get install -y \ - apache2 \ - && rm -rf /var/lib/apt/lists/* - -# Generate a bunch of relevant files. -RUN mkdir -p /local && \ - for size in 1 10 100 1000 1024 10240; do \ - dd if=/dev/zero of=/local/latin${size}k.txt count=${size} bs=1024; \ - done - -# Rewrite DocumentRoot to point to /tmp/html instead of the default path. -RUN sed -i 's/DocumentRoot.*\/var\/www\/html$/DocumentRoot \/tmp\/html/' /etc/apache2/sites-enabled/000-default.conf -COPY ./apache2-tmpdir.conf /etc/apache2/sites-enabled/apache2-tmpdir.conf - -# Standard settings. -ENV APACHE_RUN_DIR /tmp -ENV APACHE_RUN_USER nobody -ENV APACHE_RUN_GROUP nogroup -ENV APACHE_LOG_DIR /tmp -ENV APACHE_PID_FILE /tmp/apache.pid - -# Copy on start-up; serve everything from /tmp (including the configuration). -CMD ["sh", "-c", "mkdir -p /tmp/html && cp -a /local/* /tmp/html && apache2 -X"] diff --git a/benchmarks/workloads/httpd/apache2-tmpdir.conf b/benchmarks/workloads/httpd/apache2-tmpdir.conf deleted file mode 100644 index e33f8d9bb..000000000 --- a/benchmarks/workloads/httpd/apache2-tmpdir.conf +++ /dev/null @@ -1,5 +0,0 @@ - - Options Indexes FollowSymLinks - AllowOverride None - Require all granted - \ No newline at end of file diff --git a/benchmarks/workloads/iperf/BUILD b/benchmarks/workloads/iperf/BUILD deleted file mode 100644 index 91b953718..000000000 --- a/benchmarks/workloads/iperf/BUILD +++ /dev/null @@ -1,28 +0,0 @@ -load("//tools:defs.bzl", "pkg_tar", "py_library", "py_test") -load("//benchmarks:defs.bzl", "test_deps") - -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -py_library( - name = "iperf", - srcs = ["__init__.py"], -) - -py_test( - name = "iperf_test", - srcs = ["iperf_test.py"], - python_version = "PY3", - deps = test_deps + [ - ":iperf", - ], -) - -pkg_tar( - name = "tar", - srcs = [ - "Dockerfile", - ], -) diff --git a/benchmarks/workloads/iperf/Dockerfile b/benchmarks/workloads/iperf/Dockerfile deleted file mode 100644 index 9704c506c..000000000 --- a/benchmarks/workloads/iperf/Dockerfile +++ /dev/null @@ -1,14 +0,0 @@ -FROM ubuntu:18.04 - -RUN set -x \ - && apt-get update \ - && apt-get install -y \ - iperf \ - && rm -rf /var/lib/apt/lists/* - -# Accept a host parameter. -ENV host "" -ENV port 5001 - -# Start as client if the host is provided. -CMD ["sh", "-c", "test -z \"${host}\" && iperf -s || iperf -f K --realtime -c ${host} -p ${port}"] diff --git a/benchmarks/workloads/iperf/__init__.py b/benchmarks/workloads/iperf/__init__.py deleted file mode 100644 index 3817a7ade..000000000 --- a/benchmarks/workloads/iperf/__init__.py +++ /dev/null @@ -1,40 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""iperf.""" - -import re - -SAMPLE_DATA = """ ------------------------------------------------------------- -Client connecting to 10.138.15.215, TCP port 32779 -TCP window size: 45.0 KByte (default) ------------------------------------------------------------- -[ 3] local 10.138.15.216 port 32866 connected with 10.138.15.215 port 32779 -[ ID] Interval Transfer Bandwidth -[ 3] 0.0-10.0 sec 459520 KBytes 45900 KBytes/sec - -""" - - -# pylint: disable=unused-argument -def sample(**kwargs) -> str: - return SAMPLE_DATA - - -# pylint: disable=unused-argument -def bandwidth(data: str, **kwargs) -> float: - """Calculate the bandwidth.""" - regex = r"\[\s*\d+\][^\n]+\s+(\d+\.?\d*)\s+KBytes/sec" - res = re.compile(regex).search(data) - return float(res.group(1)) * 1000 diff --git a/benchmarks/workloads/iperf/iperf_test.py b/benchmarks/workloads/iperf/iperf_test.py deleted file mode 100644 index 6959b7e8a..000000000 --- a/benchmarks/workloads/iperf/iperf_test.py +++ /dev/null @@ -1,28 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Tests for iperf.""" - -import sys - -import pytest - -from benchmarks.workloads import iperf - - -def test_bandwidth(): - assert iperf.bandwidth(iperf.sample()) == 45900 * 1000 - - -if __name__ == "__main__": - sys.exit(pytest.main([__file__])) diff --git a/benchmarks/workloads/netcat/BUILD b/benchmarks/workloads/netcat/BUILD deleted file mode 100644 index a70873065..000000000 --- a/benchmarks/workloads/netcat/BUILD +++ /dev/null @@ -1,13 +0,0 @@ -load("//tools:defs.bzl", "pkg_tar") - -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -pkg_tar( - name = "tar", - srcs = [ - "Dockerfile", - ], -) diff --git a/benchmarks/workloads/netcat/Dockerfile b/benchmarks/workloads/netcat/Dockerfile deleted file mode 100644 index d8548d89a..000000000 --- a/benchmarks/workloads/netcat/Dockerfile +++ /dev/null @@ -1,14 +0,0 @@ -FROM ubuntu:18.04 - -RUN set -x \ - && apt-get update \ - && apt-get install -y \ - netcat \ - && rm -rf /var/lib/apt/lists/* - -# Accept a host and port parameter. -ENV host localhost -ENV port 8080 - -# Spin until we make a successful request. -CMD ["sh", "-c", "while ! nc -zv $host $port; do true; done"] diff --git a/benchmarks/workloads/nginx/BUILD b/benchmarks/workloads/nginx/BUILD deleted file mode 100644 index a70873065..000000000 --- a/benchmarks/workloads/nginx/BUILD +++ /dev/null @@ -1,13 +0,0 @@ -load("//tools:defs.bzl", "pkg_tar") - -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -pkg_tar( - name = "tar", - srcs = [ - "Dockerfile", - ], -) diff --git a/benchmarks/workloads/nginx/Dockerfile b/benchmarks/workloads/nginx/Dockerfile deleted file mode 100644 index b64eb52ae..000000000 --- a/benchmarks/workloads/nginx/Dockerfile +++ /dev/null @@ -1 +0,0 @@ -FROM nginx:1.15.10 diff --git a/benchmarks/workloads/node/BUILD b/benchmarks/workloads/node/BUILD deleted file mode 100644 index bfcf78cf9..000000000 --- a/benchmarks/workloads/node/BUILD +++ /dev/null @@ -1,15 +0,0 @@ -load("//tools:defs.bzl", "pkg_tar") - -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -pkg_tar( - name = "tar", - srcs = [ - "Dockerfile", - "index.js", - "package.json", - ], -) diff --git a/benchmarks/workloads/node/Dockerfile b/benchmarks/workloads/node/Dockerfile deleted file mode 100644 index 139a38bf5..000000000 --- a/benchmarks/workloads/node/Dockerfile +++ /dev/null @@ -1,2 +0,0 @@ -FROM node:onbuild -CMD ["node", "index.js"] diff --git a/benchmarks/workloads/node/index.js b/benchmarks/workloads/node/index.js deleted file mode 100644 index 584158462..000000000 --- a/benchmarks/workloads/node/index.js +++ /dev/null @@ -1,28 +0,0 @@ -'use strict'; - -var start = new Date().getTime(); - -// Load dependencies to simulate an average nodejs app. -var req_0 = require('async'); -var req_1 = require('bluebird'); -var req_2 = require('firebase'); -var req_3 = require('firebase-admin'); -var req_4 = require('@google-cloud/container'); -var req_5 = require('@google-cloud/logging'); -var req_6 = require('@google-cloud/monitoring'); -var req_7 = require('@google-cloud/spanner'); -var req_8 = require('lodash'); -var req_9 = require('mailgun-js'); -var req_10 = require('request'); -var express = require('express'); -var app = express(); - -var loaded = new Date().getTime() - start; -app.get('/', function(req, res) { - res.send('Hello World!
Loaded in ' + loaded + 'ms'); -}); - -console.log('Loaded in ' + loaded + ' ms'); -app.listen(8080, function() { - console.log('Listening on port 8080...'); -}); diff --git a/benchmarks/workloads/node/package.json b/benchmarks/workloads/node/package.json deleted file mode 100644 index c00b9b3cb..000000000 --- a/benchmarks/workloads/node/package.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "name": "node", - "version": "1.0.0", - "main": "index.js", - "dependencies": { - "@google-cloud/container": "^0.3.0", - "@google-cloud/logging": "^4.2.0", - "@google-cloud/monitoring": "^0.6.0", - "@google-cloud/spanner": "^2.2.1", - "async": "^2.6.1", - "bluebird": "^3.5.3", - "express": "^4.16.4", - "firebase": "^5.7.2", - "firebase-admin": "^6.4.0", - "lodash": "^4.17.11", - "mailgun-js": "^0.22.0", - "request": "^2.88.0" - } -} diff --git a/benchmarks/workloads/node_template/BUILD b/benchmarks/workloads/node_template/BUILD deleted file mode 100644 index e142f082a..000000000 --- a/benchmarks/workloads/node_template/BUILD +++ /dev/null @@ -1,17 +0,0 @@ -load("//tools:defs.bzl", "pkg_tar") - -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -pkg_tar( - name = "tar", - srcs = [ - "Dockerfile", - "index.hbs", - "index.js", - "package.json", - "package-lock.json", - ], -) diff --git a/benchmarks/workloads/node_template/Dockerfile b/benchmarks/workloads/node_template/Dockerfile deleted file mode 100644 index 7eb065d54..000000000 --- a/benchmarks/workloads/node_template/Dockerfile +++ /dev/null @@ -1,5 +0,0 @@ -FROM node:onbuild - -ENV host "127.0.0.1" - -CMD ["sh", "-c", "node index.js ${host}"] diff --git a/benchmarks/workloads/node_template/index.hbs b/benchmarks/workloads/node_template/index.hbs deleted file mode 100644 index 03feceb75..000000000 --- a/benchmarks/workloads/node_template/index.hbs +++ /dev/null @@ -1,8 +0,0 @@ - - - - {{#each text}} -

{{this}}

- {{/each}} - - diff --git a/benchmarks/workloads/node_template/index.js b/benchmarks/workloads/node_template/index.js deleted file mode 100644 index 04a27f356..000000000 --- a/benchmarks/workloads/node_template/index.js +++ /dev/null @@ -1,43 +0,0 @@ -const app = require('express')(); -const path = require('path'); -const redis = require('redis'); -const srs = require('secure-random-string'); - -// The hostname is the first argument. -const host_name = process.argv[2]; - -var client = redis.createClient({host: host_name, detect_buffers: true}); - -app.set('views', __dirname); -app.set('view engine', 'hbs'); - -app.get('/', (req, res) => { - var tmp = []; - /* Pull four random keys from the redis server. */ - for (i = 0; i < 4; i++) { - client.get(Math.floor(Math.random() * (100)), function(err, reply) { - tmp.push(reply.toString()); - }); - } - - res.render('index', {text: tmp}); -}); - -/** - * Securely generate a random string. - * @param {number} len - * @return {string} - */ -function randomBody(len) { - return srs({alphanumeric: true, length: len}); -} - -/** Mutates one hundred keys randomly. */ -function generateText() { - for (i = 0; i < 100; i++) { - client.set(i, randomBody(1024)); - } -} - -generateText(); -app.listen(8080); diff --git a/benchmarks/workloads/node_template/package-lock.json b/benchmarks/workloads/node_template/package-lock.json deleted file mode 100644 index 580e68aa5..000000000 --- a/benchmarks/workloads/node_template/package-lock.json +++ /dev/null @@ -1,486 +0,0 @@ -{ - "name": "nodedum", - "version": "1.0.0", - "lockfileVersion": 1, - "requires": true, - "dependencies": { - "accepts": { - "version": "1.3.5", - "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.5.tgz", - "integrity": "sha1-63d99gEXI6OxTopywIBcjoZ0a9I=", - "requires": { - "mime-types": "~2.1.18", - "negotiator": "0.6.1" - } - }, - "array-flatten": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz", - "integrity": "sha1-ml9pkFGx5wczKPKgCJaLZOopVdI=" - }, - "async": { - "version": "2.6.2", - "resolved": "https://registry.npmjs.org/async/-/async-2.6.2.tgz", - "integrity": "sha512-H1qVYh1MYhEEFLsP97cVKqCGo7KfCyTt6uEWqsTBr9SO84oK9Uwbyd/yCW+6rKJLHksBNUVWZDAjfS+Ccx0Bbg==", - "requires": { - "lodash": "^4.17.11" - } - }, - "body-parser": { - "version": "1.18.3", - "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.18.3.tgz", - "integrity": "sha1-WykhmP/dVTs6DyDe0FkrlWlVyLQ=", - "requires": { - "bytes": "3.0.0", - "content-type": "~1.0.4", - "debug": "2.6.9", - "depd": "~1.1.2", - "http-errors": "~1.6.3", - "iconv-lite": "0.4.23", - "on-finished": "~2.3.0", - "qs": "6.5.2", - "raw-body": "2.3.3", - "type-is": "~1.6.16" - } - }, - "bytes": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.0.0.tgz", - "integrity": "sha1-0ygVQE1olpn4Wk6k+odV3ROpYEg=" - }, - "commander": { - "version": "2.20.0", - "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.0.tgz", - "integrity": "sha512-7j2y+40w61zy6YC2iRNpUe/NwhNyoXrYpHMrSunaMG64nRnaf96zO/KMQR4OyN/UnE5KLyEBnKHd4aG3rskjpQ==", - "optional": true - }, - "content-disposition": { - "version": "0.5.2", - "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.2.tgz", - "integrity": "sha1-DPaLud318r55YcOoUXjLhdunjLQ=" - }, - "content-type": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.4.tgz", - "integrity": "sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA==" - }, - "cookie": { - "version": "0.3.1", - "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.3.1.tgz", - "integrity": "sha1-5+Ch+e9DtMi6klxcWpboBtFoc7s=" - }, - "cookie-signature": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz", - "integrity": "sha1-4wOogrNCzD7oylE6eZmXNNqzriw=" - }, - "debug": { - "version": "2.6.9", - "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", - "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", - "requires": { - "ms": "2.0.0" - } - }, - "depd": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/depd/-/depd-1.1.2.tgz", - "integrity": "sha1-m81S4UwJd2PnSbJ0xDRu0uVgtak=" - }, - "destroy": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.0.4.tgz", - "integrity": "sha1-l4hXRCxEdJ5CBmE+N5RiBYJqvYA=" - }, - "double-ended-queue": { - "version": "2.1.0-0", - "resolved": "https://registry.npmjs.org/double-ended-queue/-/double-ended-queue-2.1.0-0.tgz", - "integrity": "sha1-ED01J/0xUo9AGIEwyEHv3XgmTlw=" - }, - "ee-first": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", - "integrity": "sha1-WQxhFWsK4vTwJVcyoViyZrxWsh0=" - }, - "encodeurl": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz", - "integrity": "sha1-rT/0yG7C0CkyL1oCw6mmBslbP1k=" - }, - "escape-html": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", - "integrity": "sha1-Aljq5NPQwJdN4cFpGI7wBR0dGYg=" - }, - "etag": { - "version": "1.8.1", - "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", - "integrity": "sha1-Qa4u62XvpiJorr/qg6x9eSmbCIc=" - }, - "express": { - "version": "4.16.4", - "resolved": "https://registry.npmjs.org/express/-/express-4.16.4.tgz", - "integrity": "sha512-j12Uuyb4FMrd/qQAm6uCHAkPtO8FDTRJZBDd5D2KOL2eLaz1yUNdUB/NOIyq0iU4q4cFarsUCrnFDPBcnksuOg==", - "requires": { - "accepts": "~1.3.5", - "array-flatten": "1.1.1", - "body-parser": "1.18.3", - "content-disposition": "0.5.2", - "content-type": "~1.0.4", - "cookie": "0.3.1", - "cookie-signature": "1.0.6", - "debug": "2.6.9", - "depd": "~1.1.2", - "encodeurl": "~1.0.2", - "escape-html": "~1.0.3", - "etag": "~1.8.1", - "finalhandler": "1.1.1", - "fresh": "0.5.2", - "merge-descriptors": "1.0.1", - "methods": "~1.1.2", - "on-finished": "~2.3.0", - "parseurl": "~1.3.2", - "path-to-regexp": "0.1.7", - "proxy-addr": "~2.0.4", - "qs": "6.5.2", - "range-parser": "~1.2.0", - "safe-buffer": "5.1.2", - "send": "0.16.2", - "serve-static": "1.13.2", - "setprototypeof": "1.1.0", - "statuses": "~1.4.0", - "type-is": "~1.6.16", - "utils-merge": "1.0.1", - "vary": "~1.1.2" - } - }, - "finalhandler": { - "version": "1.1.1", - "resolved": "http://registry.npmjs.org/finalhandler/-/finalhandler-1.1.1.tgz", - "integrity": "sha512-Y1GUDo39ez4aHAw7MysnUD5JzYX+WaIj8I57kO3aEPT1fFRL4sr7mjei97FgnwhAyyzRYmQZaTHb2+9uZ1dPtg==", - "requires": { - "debug": "2.6.9", - "encodeurl": "~1.0.2", - "escape-html": "~1.0.3", - "on-finished": "~2.3.0", - "parseurl": "~1.3.2", - "statuses": "~1.4.0", - "unpipe": "~1.0.0" - } - }, - "foreachasync": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/foreachasync/-/foreachasync-3.0.0.tgz", - "integrity": "sha1-VQKYfchxS+M5IJfzLgBxyd7gfPY=" - }, - "forwarded": { - "version": "0.1.2", - "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.1.2.tgz", - "integrity": "sha1-mMI9qxF1ZXuMBXPozszZGw/xjIQ=" - }, - "fresh": { - "version": "0.5.2", - "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz", - "integrity": "sha1-PYyt2Q2XZWn6g1qx+OSyOhBWBac=" - }, - "handlebars": { - "version": "4.0.14", - "resolved": "https://registry.npmjs.org/handlebars/-/handlebars-4.0.14.tgz", - "integrity": "sha512-E7tDoyAA8ilZIV3xDJgl18sX3M8xB9/fMw8+mfW4msLW8jlX97bAnWgT3pmaNXuvzIEgSBMnAHfuXsB2hdzfow==", - "requires": { - "async": "^2.5.0", - "optimist": "^0.6.1", - "source-map": "^0.6.1", - "uglify-js": "^3.1.4" - } - }, - "hbs": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/hbs/-/hbs-4.0.4.tgz", - "integrity": "sha512-esVlyV/V59mKkwFai5YmPRSNIWZzhqL5YMN0++ueMxyK1cCfPa5f6JiHtapPKAIVAhQR6rpGxow0troav9WMEg==", - "requires": { - "handlebars": "4.0.14", - "walk": "2.3.9" - } - }, - "http-errors": { - "version": "1.6.3", - "resolved": "http://registry.npmjs.org/http-errors/-/http-errors-1.6.3.tgz", - "integrity": "sha1-i1VoC7S+KDoLW/TqLjhYC+HZMg0=", - "requires": { - "depd": "~1.1.2", - "inherits": "2.0.3", - "setprototypeof": "1.1.0", - "statuses": ">= 1.4.0 < 2" - } - }, - "iconv-lite": { - "version": "0.4.23", - "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.23.tgz", - "integrity": "sha512-neyTUVFtahjf0mB3dZT77u+8O0QB89jFdnBkd5P1JgYPbPaia3gXXOVL2fq8VyU2gMMD7SaN7QukTB/pmXYvDA==", - "requires": { - "safer-buffer": ">= 2.1.2 < 3" - } - }, - "inherits": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz", - "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=" - }, - "ipaddr.js": { - "version": "1.8.0", - "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.8.0.tgz", - "integrity": "sha1-6qM9bd16zo9/b+DJygRA5wZzix4=" - }, - "lodash": { - "version": "4.17.15", - "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.15.tgz", - "integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A==" - }, - "media-typer": { - "version": "0.3.0", - "resolved": "http://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", - "integrity": "sha1-hxDXrwqmJvj/+hzgAWhUUmMlV0g=" - }, - "merge-descriptors": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz", - "integrity": "sha1-sAqqVW3YtEVoFQ7J0blT8/kMu2E=" - }, - "methods": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz", - "integrity": "sha1-VSmk1nZUE07cxSZmVoNbD4Ua/O4=" - }, - "mime": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/mime/-/mime-1.4.1.tgz", - "integrity": "sha512-KI1+qOZu5DcW6wayYHSzR/tXKCDC5Om4s1z2QJjDULzLcmf3DvzS7oluY4HCTrc+9FiKmWUgeNLg7W3uIQvxtQ==" - }, - "mime-db": { - "version": "1.37.0", - "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.37.0.tgz", - "integrity": "sha512-R3C4db6bgQhlIhPU48fUtdVmKnflq+hRdad7IyKhtFj06VPNVdk2RhiYL3UjQIlso8L+YxAtFkobT0VK+S/ybg==" - }, - "mime-types": { - "version": "2.1.21", - "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.21.tgz", - "integrity": "sha512-3iL6DbwpyLzjR3xHSFNFeb9Nz/M8WDkX33t1GFQnFOllWk8pOrh/LSrB5OXlnlW5P9LH73X6loW/eogc+F5lJg==", - "requires": { - "mime-db": "~1.37.0" - } - }, - "minimist": { - "version": "0.0.10", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-0.0.10.tgz", - "integrity": "sha1-3j+YVD2/lggr5IrRoMfNqDYwHc8=" - }, - "ms": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", - "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" - }, - "negotiator": { - "version": "0.6.1", - "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.1.tgz", - "integrity": "sha1-KzJxhOiZIQEXeyhWP7XnECrNDKk=" - }, - "on-finished": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.3.0.tgz", - "integrity": "sha1-IPEzZIGwg811M3mSoWlxqi2QaUc=", - "requires": { - "ee-first": "1.1.1" - } - }, - "optimist": { - "version": "0.6.1", - "resolved": "https://registry.npmjs.org/optimist/-/optimist-0.6.1.tgz", - "integrity": "sha1-2j6nRob6IaGaERwybpDrFaAZZoY=", - "requires": { - "minimist": "~0.0.1", - "wordwrap": "~0.0.2" - } - }, - "parseurl": { - "version": "1.3.2", - "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.2.tgz", - "integrity": "sha1-/CidTtiZMRlGDBViUyYs3I3mW/M=" - }, - "path-to-regexp": { - "version": "0.1.7", - "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz", - "integrity": "sha1-32BBeABfUi8V60SQ5yR6G/qmf4w=" - }, - "proxy-addr": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.4.tgz", - "integrity": "sha512-5erio2h9jp5CHGwcybmxmVqHmnCBZeewlfJ0pex+UW7Qny7OOZXTtH56TGNyBizkgiOwhJtMKrVzDTeKcySZwA==", - "requires": { - "forwarded": "~0.1.2", - "ipaddr.js": "1.8.0" - } - }, - "qs": { - "version": "6.5.2", - "resolved": "https://registry.npmjs.org/qs/-/qs-6.5.2.tgz", - "integrity": "sha512-N5ZAX4/LxJmF+7wN74pUD6qAh9/wnvdQcjq9TZjevvXzSUo7bfmw91saqMjzGS2xq91/odN2dW/WOl7qQHNDGA==" - }, - "range-parser": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.0.tgz", - "integrity": "sha1-9JvmtIeJTdxA3MlKMi9hEJLgDV4=" - }, - "raw-body": { - "version": "2.3.3", - "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.3.3.tgz", - "integrity": "sha512-9esiElv1BrZoI3rCDuOuKCBRbuApGGaDPQfjSflGxdy4oyzqghxu6klEkkVIvBje+FF0BX9coEv8KqW6X/7njw==", - "requires": { - "bytes": "3.0.0", - "http-errors": "1.6.3", - "iconv-lite": "0.4.23", - "unpipe": "1.0.0" - } - }, - "redis": { - "version": "2.8.0", - "resolved": "https://registry.npmjs.org/redis/-/redis-2.8.0.tgz", - "integrity": "sha512-M1OkonEQwtRmZv4tEWF2VgpG0JWJ8Fv1PhlgT5+B+uNq2cA3Rt1Yt/ryoR+vQNOQcIEgdCdfH0jr3bDpihAw1A==", - "requires": { - "double-ended-queue": "^2.1.0-0", - "redis-commands": "^1.2.0", - "redis-parser": "^2.6.0" - }, - "dependencies": { - "redis-commands": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/redis-commands/-/redis-commands-1.4.0.tgz", - "integrity": "sha512-cu8EF+MtkwI4DLIT0x9P8qNTLFhQD4jLfxLR0cCNkeGzs87FN6879JOJwNQR/1zD7aSYNbU0hgsV9zGY71Itvw==" - }, - "redis-parser": { - "version": "2.6.0", - "resolved": "https://registry.npmjs.org/redis-parser/-/redis-parser-2.6.0.tgz", - "integrity": "sha1-Uu0J2srBCPGmMcB+m2mUHnoZUEs=" - } - } - }, - "redis-commands": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/redis-commands/-/redis-commands-1.5.0.tgz", - "integrity": "sha512-6KxamqpZ468MeQC3bkWmCB1fp56XL64D4Kf0zJSwDZbVLLm7KFkoIcHrgRvQ+sk8dnhySs7+yBg94yIkAK7aJg==" - }, - "redis-parser": { - "version": "2.6.0", - "resolved": "https://registry.npmjs.org/redis-parser/-/redis-parser-2.6.0.tgz", - "integrity": "sha1-Uu0J2srBCPGmMcB+m2mUHnoZUEs=" - }, - "safe-buffer": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", - "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" - }, - "safer-buffer": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", - "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" - }, - "secure-random-string": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/secure-random-string/-/secure-random-string-1.1.0.tgz", - "integrity": "sha512-V/h8jqoz58zklNGybVhP++cWrxEPXlLM/6BeJ4e0a8zlb4BsbYRzFs16snrxByPa5LUxCVTD3M6EYIVIHR1fAg==" - }, - "send": { - "version": "0.16.2", - "resolved": "https://registry.npmjs.org/send/-/send-0.16.2.tgz", - "integrity": "sha512-E64YFPUssFHEFBvpbbjr44NCLtI1AohxQ8ZSiJjQLskAdKuriYEP6VyGEsRDH8ScozGpkaX1BGvhanqCwkcEZw==", - "requires": { - "debug": "2.6.9", - "depd": "~1.1.2", - "destroy": "~1.0.4", - "encodeurl": "~1.0.2", - "escape-html": "~1.0.3", - "etag": "~1.8.1", - "fresh": "0.5.2", - "http-errors": "~1.6.2", - "mime": "1.4.1", - "ms": "2.0.0", - "on-finished": "~2.3.0", - "range-parser": "~1.2.0", - "statuses": "~1.4.0" - } - }, - "serve-static": { - "version": "1.13.2", - "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.13.2.tgz", - "integrity": "sha512-p/tdJrO4U387R9oMjb1oj7qSMaMfmOyd4j9hOFoxZe2baQszgHcSWjuya/CiT5kgZZKRudHNOA0pYXOl8rQ5nw==", - "requires": { - "encodeurl": "~1.0.2", - "escape-html": "~1.0.3", - "parseurl": "~1.3.2", - "send": "0.16.2" - } - }, - "setprototypeof": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.1.0.tgz", - "integrity": "sha512-BvE/TwpZX4FXExxOxZyRGQQv651MSwmWKZGqvmPcRIjDqWub67kTKuIMx43cZZrS/cBBzwBcNDWoFxt2XEFIpQ==" - }, - "source-map": { - "version": "0.6.1", - "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", - "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==" - }, - "statuses": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/statuses/-/statuses-1.4.0.tgz", - "integrity": "sha512-zhSCtt8v2NDrRlPQpCNtw/heZLtfUDqxBM1udqikb/Hbk52LK4nQSwr10u77iopCW5LsyHpuXS0GnEc48mLeew==" - }, - "type-is": { - "version": "1.6.16", - "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.16.tgz", - "integrity": "sha512-HRkVv/5qY2G6I8iab9cI7v1bOIdhm94dVjQCPFElW9W+3GeDOSHmy2EBYe4VTApuzolPcmgFTN3ftVJRKR2J9Q==", - "requires": { - "media-typer": "0.3.0", - "mime-types": "~2.1.18" - } - }, - "uglify-js": { - "version": "3.5.9", - "resolved": "https://registry.npmjs.org/uglify-js/-/uglify-js-3.5.9.tgz", - "integrity": "sha512-WpT0RqsDtAWPNJK955DEnb6xjymR8Fn0OlK4TT4pS0ASYsVPqr5ELhgwOwLCP5J5vHeJ4xmMmz3DEgdqC10JeQ==", - "optional": true, - "requires": { - "commander": "~2.20.0", - "source-map": "~0.6.1" - } - }, - "unpipe": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", - "integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw=" - }, - "utils-merge": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz", - "integrity": "sha1-n5VxD1CiZ5R7LMwSR0HBAoQn5xM=" - }, - "vary": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", - "integrity": "sha1-IpnwLG3tMNSllhsLn3RSShj2NPw=" - }, - "walk": { - "version": "2.3.9", - "resolved": "https://registry.npmjs.org/walk/-/walk-2.3.9.tgz", - "integrity": "sha1-MbTbZnjyrgHDnqn7hyWpAx5Vins=", - "requires": { - "foreachasync": "^3.0.0" - } - }, - "wordwrap": { - "version": "0.0.3", - "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-0.0.3.tgz", - "integrity": "sha1-o9XabNXAvAAI03I0u68b7WMFkQc=" - } - } -} diff --git a/benchmarks/workloads/node_template/package.json b/benchmarks/workloads/node_template/package.json deleted file mode 100644 index 7dcadd523..000000000 --- a/benchmarks/workloads/node_template/package.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "name": "nodedum", - "version": "1.0.0", - "description": "", - "main": "index.js", - "scripts": { - "test": "echo \"Error: no test specified\" && exit 1" - }, - "author": "", - "license": "ISC", - "dependencies": { - "express": "^4.16.4", - "hbs": "^4.0.4", - "redis": "^2.8.0", - "redis-commands": "^1.2.0", - "redis-parser": "^2.6.0", - "secure-random-string": "^1.1.0" - } -} diff --git a/benchmarks/workloads/redis/BUILD b/benchmarks/workloads/redis/BUILD deleted file mode 100644 index a70873065..000000000 --- a/benchmarks/workloads/redis/BUILD +++ /dev/null @@ -1,13 +0,0 @@ -load("//tools:defs.bzl", "pkg_tar") - -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -pkg_tar( - name = "tar", - srcs = [ - "Dockerfile", - ], -) diff --git a/benchmarks/workloads/redis/Dockerfile b/benchmarks/workloads/redis/Dockerfile deleted file mode 100644 index 0f17249af..000000000 --- a/benchmarks/workloads/redis/Dockerfile +++ /dev/null @@ -1 +0,0 @@ -FROM redis:5.0.4 diff --git a/benchmarks/workloads/redisbenchmark/BUILD b/benchmarks/workloads/redisbenchmark/BUILD deleted file mode 100644 index 147cfedd2..000000000 --- a/benchmarks/workloads/redisbenchmark/BUILD +++ /dev/null @@ -1,28 +0,0 @@ -load("//tools:defs.bzl", "pkg_tar", "py_library", "py_test") -load("//benchmarks:defs.bzl", "test_deps") - -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -py_library( - name = "redisbenchmark", - srcs = ["__init__.py"], -) - -py_test( - name = "redisbenchmark_test", - srcs = ["redisbenchmark_test.py"], - python_version = "PY3", - deps = test_deps + [ - ":redisbenchmark", - ], -) - -pkg_tar( - name = "tar", - srcs = [ - "Dockerfile", - ], -) diff --git a/benchmarks/workloads/redisbenchmark/Dockerfile b/benchmarks/workloads/redisbenchmark/Dockerfile deleted file mode 100644 index f94f6442e..000000000 --- a/benchmarks/workloads/redisbenchmark/Dockerfile +++ /dev/null @@ -1,4 +0,0 @@ -FROM redis:5.0.4 -ENV host localhost -ENV port 6379 -CMD ["sh", "-c", "redis-benchmark --csv -h ${host} -p ${port} ${flags}"] diff --git a/benchmarks/workloads/redisbenchmark/__init__.py b/benchmarks/workloads/redisbenchmark/__init__.py deleted file mode 100644 index 229cef5fa..000000000 --- a/benchmarks/workloads/redisbenchmark/__init__.py +++ /dev/null @@ -1,85 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Redis-benchmark tool.""" - -import re - -OPERATIONS = [ - "PING_INLINE", - "PING_BULK", - "SET", - "GET", - "INCR", - "LPUSH", - "RPUSH", - "LPOP", - "RPOP", - "SADD", - "HSET", - "SPOP", - "LRANGE_100", - "LRANGE_300", - "LRANGE_500", - "LRANGE_600", - "MSET", -] - -METRICS = dict() - -SAMPLE_DATA = """ -"PING_INLINE","48661.80" -"PING_BULK","50301.81" -"SET","48923.68" -"GET","49382.71" -"INCR","49975.02" -"LPUSH","49875.31" -"RPUSH","50276.52" -"LPOP","50327.12" -"RPOP","50556.12" -"SADD","49504.95" -"HSET","49504.95" -"SPOP","50025.02" -"LPUSH (needed to benchmark LRANGE)","48875.86" -"LRANGE_100 (first 100 elements)","33955.86" -"LRANGE_300 (first 300 elements)","16550.81" -"LRANGE_500 (first 450 elements)","13653.74" -"LRANGE_600 (first 600 elements)","11219.57" -"MSET (10 keys)","44682.75" -""" - - -# pylint: disable=unused-argument -def sample(**kwargs) -> str: - return SAMPLE_DATA - - -# Bind a metric for each operation noted above. -for op in OPERATIONS: - - def bind(metric): - """Bind op to a new scope.""" - - # pylint: disable=unused-argument - def parse(data: str, **kwargs) -> float: - """Operation throughput in requests/sec.""" - regex = r"\"" + metric + r"( .*)?\",\"(\d*.\d*)" - res = re.compile(regex).search(data) - if res: - return float(res.group(2)) - return 0.0 - - parse.__name__ = metric - return parse - - METRICS[op] = bind(op) diff --git a/benchmarks/workloads/redisbenchmark/redisbenchmark_test.py b/benchmarks/workloads/redisbenchmark/redisbenchmark_test.py deleted file mode 100644 index 419ced059..000000000 --- a/benchmarks/workloads/redisbenchmark/redisbenchmark_test.py +++ /dev/null @@ -1,51 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Parser test.""" - -import sys - -import pytest - -from benchmarks.workloads import redisbenchmark - -RESULTS = { - "PING_INLINE": 48661.80, - "PING_BULK": 50301.81, - "SET": 48923.68, - "GET": 49382.71, - "INCR": 49975.02, - "LPUSH": 49875.31, - "RPUSH": 50276.52, - "LPOP": 50327.12, - "RPOP": 50556.12, - "SADD": 49504.95, - "HSET": 49504.95, - "SPOP": 50025.02, - "LRANGE_100": 33955.86, - "LRANGE_300": 16550.81, - "LRANGE_500": 13653.74, - "LRANGE_600": 11219.57, - "MSET": 44682.75 -} - - -def test_metrics(): - """Test all metrics.""" - for (metric, func) in redisbenchmark.METRICS.items(): - res = func(redisbenchmark.sample()) - assert float(res) == RESULTS[metric] - - -if __name__ == "__main__": - sys.exit(pytest.main([__file__])) diff --git a/benchmarks/workloads/ruby/BUILD b/benchmarks/workloads/ruby/BUILD deleted file mode 100644 index a3be4fe92..000000000 --- a/benchmarks/workloads/ruby/BUILD +++ /dev/null @@ -1,28 +0,0 @@ -load("//tools:defs.bzl", "pkg_tar") - -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -filegroup( - name = "files", - srcs = [ - "Dockerfile", - "Gemfile", - "Gemfile.lock", - "config.ru", - "index.rb", - ], -) - -pkg_tar( - name = "tar", - srcs = [ - "Dockerfile", - "Gemfile", - "Gemfile.lock", - "config.ru", - "index.rb", - ], -) diff --git a/benchmarks/workloads/ruby/Dockerfile b/benchmarks/workloads/ruby/Dockerfile deleted file mode 100644 index a9a7a7086..000000000 --- a/benchmarks/workloads/ruby/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -# example based on https://github.com/errm/fib - -FROM ruby:2.5 - -RUN apt-get update -qq && apt-get install -y build-essential libpq-dev nodejs libsodium-dev - -# Set an environment variable where the Rails app is installed to inside of Docker image -ENV RAILS_ROOT /var/www/app_name -RUN mkdir -p $RAILS_ROOT - -# Set working directory -WORKDIR $RAILS_ROOT - -# Setting env up -ENV RAILS_ENV='production' -ENV RACK_ENV='production' - -# Adding gems -COPY Gemfile Gemfile -COPY Gemfile.lock Gemfile.lock -RUN bundle install --jobs 20 --retry 5 --without development test - -# Adding project files -COPY . . - -EXPOSE $PORT -STOPSIGNAL SIGINT -CMD ["bundle", "exec", "puma", "config.ru"] diff --git a/benchmarks/workloads/ruby/Gemfile b/benchmarks/workloads/ruby/Gemfile deleted file mode 100644 index 8f1bdad6e..000000000 --- a/benchmarks/workloads/ruby/Gemfile +++ /dev/null @@ -1,12 +0,0 @@ -source "https://rubygems.org" -# load a bunch of dependencies to take up memory -gem "sinatra" -gem "puma" -gem "redis" -gem 'rake' -gem 'squid', '~> 1.4' -gem 'cassandra-driver' -gem 'ruby-fann' -gem 'rbnacl' -gem 'bcrypt' -gem "activemerchant" \ No newline at end of file diff --git a/benchmarks/workloads/ruby/Gemfile.lock b/benchmarks/workloads/ruby/Gemfile.lock deleted file mode 100644 index 82bfc0c79..000000000 --- a/benchmarks/workloads/ruby/Gemfile.lock +++ /dev/null @@ -1,73 +0,0 @@ -GEM - remote: https://rubygems.org/ - specs: - activemerchant (1.105.0) - activesupport (>= 4.2) - builder (>= 2.1.2, < 4.0.0) - i18n (>= 0.6.9) - nokogiri (~> 1.4) - activesupport (6.0.3.2) - concurrent-ruby (~> 1.0, >= 1.0.2) - i18n (>= 0.7, < 2) - minitest (~> 5.1) - tzinfo (~> 1.1) - zeitwerk (~> 2.2, >= 2.2.2) - bcrypt (3.1.13) - builder (3.2.4) - cassandra-driver (3.2.3) - ione (~> 1.2) - concurrent-ruby (1.1.6) - ffi (1.12.2) - i18n (1.8.5) - concurrent-ruby (~> 1.0) - ione (1.2.4) - mini_portile2 (2.4.0) - minitest (5.14.1) - mustermann (1.0.3) - nokogiri (1.10.8) - mini_portile2 (~> 2.4.0) - pdf-core (0.7.0) - prawn (2.2.2) - pdf-core (~> 0.7.0) - ttfunk (~> 1.5) - puma (3.12.4) - rack (2.2.2) - rack-protection (2.0.5) - rack - rake (12.3.3) - rbnacl (7.1.1) - ffi - redis (4.1.1) - ruby-fann (1.2.6) - sinatra (2.0.5) - mustermann (~> 1.0) - rack (~> 2.0) - rack-protection (= 2.0.5) - tilt (~> 2.0) - squid (1.4.1) - activesupport (>= 4.0) - prawn (~> 2.2) - thread_safe (0.3.6) - tilt (2.0.9) - ttfunk (1.5.1) - tzinfo (1.2.7) - thread_safe (~> 0.1) - zeitwerk (2.4.0) - -PLATFORMS - ruby - -DEPENDENCIES - activemerchant - bcrypt - cassandra-driver - puma - rake - rbnacl - redis - ruby-fann - sinatra - squid (~> 1.4) - -BUNDLED WITH - 1.17.1 diff --git a/benchmarks/workloads/ruby/config.ru b/benchmarks/workloads/ruby/config.ru deleted file mode 100755 index fbd5acc82..000000000 --- a/benchmarks/workloads/ruby/config.ru +++ /dev/null @@ -1,2 +0,0 @@ -require './index' -run Sinatra::Application \ No newline at end of file diff --git a/benchmarks/workloads/ruby/index.rb b/benchmarks/workloads/ruby/index.rb deleted file mode 100755 index 5fa85af93..000000000 --- a/benchmarks/workloads/ruby/index.rb +++ /dev/null @@ -1,14 +0,0 @@ -require "sinatra" -require "puma" -require "redis" -require "rake" -require "squid" -require "cassandra" -require "ruby-fann" -require "rbnacl" -require "bcrypt" -require "activemerchant" - -get "/" do - "Hello World!" -end \ No newline at end of file diff --git a/benchmarks/workloads/ruby_template/BUILD b/benchmarks/workloads/ruby_template/BUILD deleted file mode 100644 index 72ed9403d..000000000 --- a/benchmarks/workloads/ruby_template/BUILD +++ /dev/null @@ -1,18 +0,0 @@ -load("//tools:defs.bzl", "pkg_tar") - -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -pkg_tar( - name = "tar", - srcs = [ - "Dockerfile", - "Gemfile", - "Gemfile.lock", - "config.ru", - "index.erb", - "main.rb", - ], -) diff --git a/benchmarks/workloads/ruby_template/Dockerfile b/benchmarks/workloads/ruby_template/Dockerfile deleted file mode 100755 index a06d68bf4..000000000 --- a/benchmarks/workloads/ruby_template/Dockerfile +++ /dev/null @@ -1,38 +0,0 @@ -# example based on https://github.com/errm/fib - -FROM alpine:3.9 as build - -COPY Gemfile Gemfile.lock ./ - -RUN apk add --no-cache ruby ruby-dev ruby-bundler ruby-json build-base bash \ - && bundle install --frozen -j4 -r3 --no-cache --without development \ - && apk del --no-cache ruby-bundler \ - && rm -rf /usr/lib/ruby/gems/*/cache - -FROM alpine:3.9 as prod - -COPY --from=build /usr/lib/ruby/gems /usr/lib/ruby/gems -RUN apk add --no-cache ruby ruby-json ruby-etc redis apache2-utils \ - && ruby -e "Gem::Specification.map.each do |spec| \ - Gem::Installer.for_spec( \ - spec, \ - wrappers: true, \ - force: true, \ - install_dir: spec.base_dir, \ - build_args: spec.build_args, \ - ).generate_bin \ - end" - -WORKDIR /app -COPY . /app/. - -ENV PORT=9292 \ - WEB_CONCURRENCY=20 \ - WEB_MAX_THREADS=20 \ - RACK_ENV=production - -ENV host localhost -EXPOSE $PORT -USER nobody -STOPSIGNAL SIGINT -CMD ["sh", "-c", "/usr/bin/puma", "${host}"] diff --git a/benchmarks/workloads/ruby_template/Gemfile b/benchmarks/workloads/ruby_template/Gemfile deleted file mode 100755 index ac521b32c..000000000 --- a/benchmarks/workloads/ruby_template/Gemfile +++ /dev/null @@ -1,5 +0,0 @@ -source "https://rubygems.org" - -gem "sinatra" -gem "puma" -gem "redis" \ No newline at end of file diff --git a/benchmarks/workloads/ruby_template/Gemfile.lock b/benchmarks/workloads/ruby_template/Gemfile.lock deleted file mode 100644 index eeb3c7bbe..000000000 --- a/benchmarks/workloads/ruby_template/Gemfile.lock +++ /dev/null @@ -1,26 +0,0 @@ -GEM - remote: https://rubygems.org/ - specs: - mustermann (1.0.3) - puma (3.12.6) - rack (2.0.6) - rack-protection (2.0.5) - rack - redis (4.1.0) - sinatra (2.0.5) - mustermann (~> 1.0) - rack (~> 2.0) - rack-protection (= 2.0.5) - tilt (~> 2.0) - tilt (2.0.9) - -PLATFORMS - ruby - -DEPENDENCIES - puma - redis - sinatra - -BUNDLED WITH - 1.17.1 \ No newline at end of file diff --git a/benchmarks/workloads/ruby_template/config.ru b/benchmarks/workloads/ruby_template/config.ru deleted file mode 100755 index b2d135cc0..000000000 --- a/benchmarks/workloads/ruby_template/config.ru +++ /dev/null @@ -1,2 +0,0 @@ -require './main' -run Sinatra::Application \ No newline at end of file diff --git a/benchmarks/workloads/ruby_template/index.erb b/benchmarks/workloads/ruby_template/index.erb deleted file mode 100755 index 7f7300e80..000000000 --- a/benchmarks/workloads/ruby_template/index.erb +++ /dev/null @@ -1,8 +0,0 @@ - - - - <% text.each do |t| %> -

<%= t %>

- <% end %> - - diff --git a/benchmarks/workloads/ruby_template/main.rb b/benchmarks/workloads/ruby_template/main.rb deleted file mode 100755 index 35c239377..000000000 --- a/benchmarks/workloads/ruby_template/main.rb +++ /dev/null @@ -1,27 +0,0 @@ -require "sinatra" -require "securerandom" -require "redis" - -redis_host = ENV["host"] -$redis = Redis.new(host: redis_host) - -def generateText - for i in 0..99 - $redis.set(i, randomBody(1024)) - end -end - -def randomBody(length) - return SecureRandom.alphanumeric(length) -end - -generateText -template = ERB.new(File.read('./index.erb')) - -get "/" do - texts = Array.new - for i in 0..4 - texts.push($redis.get(rand(0..99))) - end - template.result_with_hash(text: texts) -end \ No newline at end of file diff --git a/benchmarks/workloads/sleep/BUILD b/benchmarks/workloads/sleep/BUILD deleted file mode 100644 index a70873065..000000000 --- a/benchmarks/workloads/sleep/BUILD +++ /dev/null @@ -1,13 +0,0 @@ -load("//tools:defs.bzl", "pkg_tar") - -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -pkg_tar( - name = "tar", - srcs = [ - "Dockerfile", - ], -) diff --git a/benchmarks/workloads/sleep/Dockerfile b/benchmarks/workloads/sleep/Dockerfile deleted file mode 100644 index 24c72e07a..000000000 --- a/benchmarks/workloads/sleep/Dockerfile +++ /dev/null @@ -1,3 +0,0 @@ -FROM alpine:latest - -CMD ["sleep", "315360000"] diff --git a/benchmarks/workloads/sysbench/BUILD b/benchmarks/workloads/sysbench/BUILD deleted file mode 100644 index ab2556064..000000000 --- a/benchmarks/workloads/sysbench/BUILD +++ /dev/null @@ -1,28 +0,0 @@ -load("//tools:defs.bzl", "pkg_tar", "py_library", "py_test") -load("//benchmarks:defs.bzl", "test_deps") - -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -py_library( - name = "sysbench", - srcs = ["__init__.py"], -) - -py_test( - name = "sysbench_test", - srcs = ["sysbench_test.py"], - python_version = "PY3", - deps = test_deps + [ - ":sysbench", - ], -) - -pkg_tar( - name = "tar", - srcs = [ - "Dockerfile", - ], -) diff --git a/benchmarks/workloads/sysbench/Dockerfile b/benchmarks/workloads/sysbench/Dockerfile deleted file mode 100644 index 8225e0e14..000000000 --- a/benchmarks/workloads/sysbench/Dockerfile +++ /dev/null @@ -1,16 +0,0 @@ -FROM ubuntu:18.04 - -RUN set -x \ - && apt-get update \ - && apt-get install -y \ - sysbench \ - && rm -rf /var/lib/apt/lists/* - -# Parameterize the tests. -ENV test cpu -ENV threads 1 -ENV options "" - -# run sysbench once as a warm-up and take the second result -CMD ["sh", "-c", "sysbench --threads=8 --memory-total-size=5G memory run > /dev/null && \ -sysbench --threads=${threads} ${options} ${test} run"] diff --git a/benchmarks/workloads/sysbench/__init__.py b/benchmarks/workloads/sysbench/__init__.py deleted file mode 100644 index de357b4db..000000000 --- a/benchmarks/workloads/sysbench/__init__.py +++ /dev/null @@ -1,167 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Sysbench.""" - -import re - -STD_REGEX = r"events per second:\s*(\d*.?\d*)\n" -MEM_REGEX = r"Total\soperations:\s+\d*\s*\((\d*\.\d*)\sper\ssecond\)" -ALT_REGEX = r"execution time \(avg/stddev\):\s*(\d*.?\d*)/(\d*.?\d*)" -AVG_REGEX = r"avg:[^\n^\d]*(\d*\.?\d*)" - -SAMPLE_CPU_DATA = """ -sysbench 1.0.11 (using system LuaJIT 2.1.0-beta3) - -Running the test with following options: -Number of threads: 8 -Initializing random number generator from current time - - -Prime numbers limit: 10000 - -Initializing worker threads... - -Threads started! - -CPU speed: - events per second: 9093.38 - -General statistics: - total time: 10.0007s - total number of events: 90949 - -Latency (ms): - min: 0.64 - avg: 0.88 - max: 24.65 - 95th percentile: 1.55 - sum: 79936.91 - -Threads fairness: - events (avg/stddev): 11368.6250/831.38 - execution time (avg/stddev): 9.9921/0.01 -""" - -SAMPLE_MEMORY_DATA = """ -sysbench 1.0.11 (using system LuaJIT 2.1.0-beta3) - -Running the test with following options: -Number of threads: 8 -Initializing random number generator from current time - - -Running memory speed test with the following options: - block size: 1KiB - total size: 102400MiB - operation: write - scope: global - -Initializing worker threads... - -Threads started! - -Total operations: 47999046 (9597428.64 per second) - -46874.07 MiB transferred (9372.49 MiB/sec) - - -General statistics: - total time: 5.0001s - total number of events: 47999046 - -Latency (ms): - min: 0.00 - avg: 0.00 - max: 0.21 - 95th percentile: 0.00 - sum: 33165.91 - -Threads fairness: - events (avg/stddev): 5999880.7500/111242.52 - execution time (avg/stddev): 4.1457/0.09 -""" - -SAMPLE_MUTEX_DATA = """ -sysbench 1.0.11 (using system LuaJIT 2.1.0-beta3) - -Running the test with following options: -Number of threads: 8 -Initializing random number generator from current time - - -Initializing worker threads... - -Threads started! - - -General statistics: - total time: 3.7869s - total number of events: 8 - -Latency (ms): - min: 3688.56 - avg: 3754.03 - max: 3780.94 - 95th percentile: 3773.42 - sum: 30032.28 - -Threads fairness: - events (avg/stddev): 1.0000/0.00 - execution time (avg/stddev): 3.7540/0.03 -""" - - -# pylint: disable=unused-argument -def sample(test, **kwargs): - switch = { - "cpu": SAMPLE_CPU_DATA, - "memory": SAMPLE_MEMORY_DATA, - "mutex": SAMPLE_MUTEX_DATA, - "randwr": SAMPLE_CPU_DATA - } - return switch[test] - - -# pylint: disable=unused-argument -def cpu_events_per_second(data: str, **kwargs) -> float: - """Returns events per second.""" - return float(re.compile(STD_REGEX).search(data).group(1)) - - -# pylint: disable=unused-argument -def memory_ops_per_second(data: str, **kwargs) -> float: - """Returns memory operations per second.""" - return float(re.compile(MEM_REGEX).search(data).group(1)) - - -# pylint: disable=unused-argument -def mutex_time(data: str, count: int, locks: int, threads: int, - **kwargs) -> float: - """Returns normalized mutex time (lower is better).""" - value = float(re.compile(ALT_REGEX).search(data).group(1)) - contention = float(threads) / float(locks) - scale = contention * float(count) / 100000000.0 - return value / scale - - -# pylint: disable=unused-argument -def mutex_deviation(data: str, **kwargs) -> float: - """Returns deviation for threads.""" - return float(re.compile(ALT_REGEX).search(data).group(2)) - - -# pylint: disable=unused-argument -def mutex_latency(data: str, **kwargs) -> float: - """Returns average mutex latency.""" - return float(re.compile(AVG_REGEX).search(data).group(1)) diff --git a/benchmarks/workloads/sysbench/sysbench_test.py b/benchmarks/workloads/sysbench/sysbench_test.py deleted file mode 100644 index 3fb541fd2..000000000 --- a/benchmarks/workloads/sysbench/sysbench_test.py +++ /dev/null @@ -1,34 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Parser test.""" - -import sys - -import pytest - -from benchmarks.workloads import sysbench - - -def test_sysbench_parser(): - """Test the basic parser.""" - assert sysbench.cpu_events_per_second(sysbench.sample("cpu")) == 9093.38 - assert sysbench.memory_ops_per_second(sysbench.sample("memory")) == 9597428.64 - assert sysbench.mutex_time(sysbench.sample("mutex"), 1, 1, - 100000000.0) == 3.754 - assert sysbench.mutex_deviation(sysbench.sample("mutex")) == 0.03 - assert sysbench.mutex_latency(sysbench.sample("mutex")) == 3754.03 - - -if __name__ == "__main__": - sys.exit(pytest.main([__file__])) diff --git a/benchmarks/workloads/syscall/BUILD b/benchmarks/workloads/syscall/BUILD deleted file mode 100644 index f8c43bca1..000000000 --- a/benchmarks/workloads/syscall/BUILD +++ /dev/null @@ -1,29 +0,0 @@ -load("//tools:defs.bzl", "pkg_tar", "py_library", "py_test") -load("//benchmarks:defs.bzl", "test_deps") - -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -py_library( - name = "syscall", - srcs = ["__init__.py"], -) - -py_test( - name = "syscall_test", - srcs = ["syscall_test.py"], - python_version = "PY3", - deps = test_deps + [ - ":syscall", - ], -) - -pkg_tar( - name = "tar", - srcs = [ - "Dockerfile", - "syscall.c", - ], -) diff --git a/benchmarks/workloads/syscall/Dockerfile b/benchmarks/workloads/syscall/Dockerfile deleted file mode 100644 index a2088d953..000000000 --- a/benchmarks/workloads/syscall/Dockerfile +++ /dev/null @@ -1,6 +0,0 @@ -FROM gcc:latest -COPY . /usr/src/syscall -WORKDIR /usr/src/syscall -RUN gcc -O2 -o syscall syscall.c -ENV count 1000000 -CMD ["sh", "-c", "./syscall ${count}"] diff --git a/benchmarks/workloads/syscall/__init__.py b/benchmarks/workloads/syscall/__init__.py deleted file mode 100644 index dc9028faa..000000000 --- a/benchmarks/workloads/syscall/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Simple syscall test.""" - -import re - -SAMPLE_DATA = "Called getpid syscall 1000000 times: 1117 ms, 500 ns each." - - -# pylint: disable=unused-argument -def sample(**kwargs) -> str: - return SAMPLE_DATA - - -# pylint: disable=unused-argument -def syscall_time_ns(data: str, **kwargs) -> int: - """Returns average system call time.""" - return float(re.compile(r"(\d+)\sns each.").search(data).group(1)) diff --git a/benchmarks/workloads/syscall/syscall.c b/benchmarks/workloads/syscall/syscall.c deleted file mode 100644 index ded030397..000000000 --- a/benchmarks/workloads/syscall/syscall.c +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2019 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at - -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include - -// Short program that calls getpid() a number of times and outputs time -// diference from the MONOTONIC clock. -int main(int argc, char** argv) { - struct timespec start, stop; - long result; - char buf[80]; - - if (argc < 2) { - printf("Usage:./syscall NUM_TIMES_TO_CALL"); - return 1; - } - - if (clock_gettime(CLOCK_MONOTONIC, &start)) return 1; - - long loops = atoi(argv[1]); - for (long i = 0; i < loops; i++) { - syscall(SYS_gettimeofday, 0, 0); - } - - if (clock_gettime(CLOCK_MONOTONIC, &stop)) return 1; - - if ((stop.tv_nsec - start.tv_nsec) < 0) { - result = (stop.tv_sec - start.tv_sec - 1) * 1000; - result += (stop.tv_nsec - start.tv_nsec + 1000000000) / (1000 * 1000); - } else { - result = (stop.tv_sec - start.tv_sec) * 1000; - result += (stop.tv_nsec - start.tv_nsec) / (1000 * 1000); - } - - printf("Called getpid syscall %d times: %lu ms, %lu ns each.\n", loops, - result, result * 1000000 / loops); - - return 0; -} diff --git a/benchmarks/workloads/syscall/syscall_test.py b/benchmarks/workloads/syscall/syscall_test.py deleted file mode 100644 index 72f027de1..000000000 --- a/benchmarks/workloads/syscall/syscall_test.py +++ /dev/null @@ -1,27 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys - -import pytest - -from benchmarks.workloads import syscall - - -def test_syscall_time_ns(): - assert syscall.syscall_time_ns(syscall.sample()) == 500 - - -if __name__ == "__main__": - sys.exit(pytest.main([__file__])) diff --git a/benchmarks/workloads/tensorflow/BUILD b/benchmarks/workloads/tensorflow/BUILD deleted file mode 100644 index a7b7742f4..000000000 --- a/benchmarks/workloads/tensorflow/BUILD +++ /dev/null @@ -1,18 +0,0 @@ -load("//tools:defs.bzl", "pkg_tar") - -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -py_library( - name = "tensorflow", - srcs = ["__init__.py"], -) - -pkg_tar( - name = "tar", - srcs = [ - "Dockerfile", - ], -) diff --git a/benchmarks/workloads/tensorflow/Dockerfile b/benchmarks/workloads/tensorflow/Dockerfile deleted file mode 100644 index eefe6b3eb..000000000 --- a/benchmarks/workloads/tensorflow/Dockerfile +++ /dev/null @@ -1,14 +0,0 @@ -FROM tensorflow/tensorflow:1.13.2 - -RUN apt-get update \ - && apt-get install -y git -RUN git clone --depth 1 https://github.com/aymericdamien/TensorFlow-Examples.git -RUN python -m pip install --no-cache-dir -U pip setuptools -RUN python -m pip install --no-cache-dir matplotlib - -WORKDIR /TensorFlow-Examples/examples - -ENV PYTHONPATH="$PYTHONPATH:/TensorFlow-Examples/examples" - -ENV workload "3_NeuralNetworks/convolutional_network.py" -CMD python ${workload} diff --git a/benchmarks/workloads/tensorflow/__init__.py b/benchmarks/workloads/tensorflow/__init__.py deleted file mode 100644 index b5ec213f8..000000000 --- a/benchmarks/workloads/tensorflow/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""A Tensorflow example.""" - - -# pylint: disable=unused-argument -def run_time(value, **kwargs): - """Returns the startup and runtime of the Tensorflow workload in seconds.""" - return value diff --git a/benchmarks/workloads/true/BUILD b/benchmarks/workloads/true/BUILD deleted file mode 100644 index eba23d325..000000000 --- a/benchmarks/workloads/true/BUILD +++ /dev/null @@ -1,14 +0,0 @@ -load("//tools:defs.bzl", "pkg_tar") - -package( - default_visibility = ["//benchmarks:__subpackages__"], - licenses = ["notice"], -) - -pkg_tar( - name = "tar", - srcs = [ - "Dockerfile", - ], - extension = "tar", -) diff --git a/benchmarks/workloads/true/Dockerfile b/benchmarks/workloads/true/Dockerfile deleted file mode 100644 index 2e97c921e..000000000 --- a/benchmarks/workloads/true/Dockerfile +++ /dev/null @@ -1,3 +0,0 @@ -FROM alpine:latest - -CMD ["true"] diff --git a/scripts/simple_tests.sh b/scripts/simple_tests.sh index 3a15050c2..585216aae 100755 --- a/scripts/simple_tests.sh +++ b/scripts/simple_tests.sh @@ -17,4 +17,4 @@ source $(dirname $0)/common.sh # Run all simple tests (locally). -test //pkg/... //runsc/... //tools/... //benchmarks/... //benchmarks/runner:runner_test +test //pkg/... //runsc/... //tools/... diff --git a/test/benchmarks/tcp/BUILD b/test/benchmarks/tcp/BUILD new file mode 100644 index 000000000..6dde7d9e6 --- /dev/null +++ b/test/benchmarks/tcp/BUILD @@ -0,0 +1,41 @@ +load("//tools:defs.bzl", "cc_binary", "go_binary") + +package(licenses = ["notice"]) + +go_binary( + name = "tcp_proxy", + srcs = ["tcp_proxy.go"], + visibility = ["//:sandbox"], + deps = [ + "//pkg/tcpip", + "//pkg/tcpip/adapters/gonet", + "//pkg/tcpip/link/fdbased", + "//pkg/tcpip/link/qdisc/fifo", + "//pkg/tcpip/network/arp", + "//pkg/tcpip/network/ipv4", + "//pkg/tcpip/stack", + "//pkg/tcpip/transport/tcp", + "//pkg/tcpip/transport/udp", + "@org_golang_x_sys//unix:go_default_library", + ], +) + +# nsjoin is a trivial replacement for nsenter. This is used because nsenter is +# not available on all systems where this benchmark is run (and we aim to +# minimize external dependencies.) + +cc_binary( + name = "nsjoin", + srcs = ["nsjoin.c"], + visibility = ["//:sandbox"], +) + +sh_binary( + name = "tcp_benchmark", + srcs = ["tcp_benchmark.sh"], + data = [ + ":nsjoin", + ":tcp_proxy", + ], + visibility = ["//:sandbox"], +) diff --git a/test/benchmarks/tcp/README.md b/test/benchmarks/tcp/README.md new file mode 100644 index 000000000..38e6e69f0 --- /dev/null +++ b/test/benchmarks/tcp/README.md @@ -0,0 +1,87 @@ +# TCP Benchmarks + +This directory contains a standardized TCP benchmark. This helps to evaluate the +performance of netstack and native networking stacks under various conditions. + +## `tcp_benchmark` + +This benchmark allows TCP throughput testing under various conditions. The setup +consists of an iperf client, a client proxy, a server proxy and an iperf server. +The client proxy and server proxy abstract the network mechanism used to +communicate between the iperf client and server. + +The setup looks like the following: + +``` + +--------------+ (native) +--------------+ + | iperf client |[lo @ 10.0.0.1]------>| client proxy | + +--------------+ +--------------+ + [client.0 @ 10.0.0.2] + (netstack) | | (native) + +------+-----+ + | + [br0] + | + Network emulation applied ---> [wan.0:wan.1] + | + [br1] + | + +------+-----+ + (netstack) | | (native) + [server.0 @ 10.0.0.3] + +--------------+ +--------------+ + | iperf server |<------[lo @ 10.0.0.4]| server proxy | + +--------------+ (native) +--------------+ +``` + +Different configurations can be run using different arguments. For example: + +* Native test under normal internet conditions: `tcp_benchmark` +* Native test under ideal conditions: `tcp_benchmark --ideal` +* Netstack client under ideal conditions: `tcp_benchmark --client --ideal` +* Netstack client with 5% packet loss: `tcp_benchmark --client --ideal --loss + 5` + +Use `tcp_benchmark --help` for full arguments. + +This tool may be used to easily generate data for graphing. For example, to +generate a CSV for various latencies, you might do: + +``` +rm -f /tmp/netstack_latency.csv /tmp/native_latency.csv +latencies=$(seq 0 5 50; + seq 60 10 100; + seq 125 25 250; + seq 300 50 500) +for latency in $latencies; do + read throughput client_cpu server_cpu <<< \ + $(./tcp_benchmark --duration 30 --client --ideal --latency $latency) + echo $latency,$throughput,$client_cpu >> /tmp/netstack_latency.csv +done +for latency in $latencies; do + read throughput client_cpu server_cpu <<< \ + $(./tcp_benchmark --duration 30 --ideal --latency $latency) + echo $latency,$throughput,$client_cpu >> /tmp/native_latency.csv +done +``` + +Similarly, to generate a CSV for various levels of packet loss, the following +would be appropriate: + +``` +rm -f /tmp/netstack_loss.csv /tmp/native_loss.csv +losses=$(seq 0 0.1 1.0; + seq 1.2 0.2 2.0; + seq 2.5 0.5 5.0; + seq 6.0 1.0 10.0) +for loss in $losses; do + read throughput client_cpu server_cpu <<< \ + $(./tcp_benchmark --duration 30 --client --ideal --latency 10 --loss $loss) + echo $loss,$throughput,$client_cpu >> /tmp/netstack_loss.csv +done +for loss in $losses; do + read throughput client_cpu server_cpu <<< \ + $(./tcp_benchmark --duration 30 --ideal --latency 10 --loss $loss) + echo $loss,$throughput,$client_cpu >> /tmp/native_loss.csv +done +``` diff --git a/test/benchmarks/tcp/nsjoin.c b/test/benchmarks/tcp/nsjoin.c new file mode 100644 index 000000000..524b4d549 --- /dev/null +++ b/test/benchmarks/tcp/nsjoin.c @@ -0,0 +1,47 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +int main(int argc, char** argv) { + if (argc <= 2) { + fprintf(stderr, "error: must provide a namespace file.\n"); + fprintf(stderr, "usage: %s [arguments...]\n", argv[0]); + return 1; + } + + int fd = open(argv[1], O_RDONLY); + if (fd < 0) { + fprintf(stderr, "error opening %s: %s\n", argv[1], strerror(errno)); + return 1; + } + if (setns(fd, 0) < 0) { + fprintf(stderr, "error joining %s: %s\n", argv[1], strerror(errno)); + return 1; + } + + execvp(argv[2], &argv[2]); + return 1; +} diff --git a/test/benchmarks/tcp/tcp_benchmark.sh b/test/benchmarks/tcp/tcp_benchmark.sh new file mode 100755 index 000000000..ef04b4ace --- /dev/null +++ b/test/benchmarks/tcp/tcp_benchmark.sh @@ -0,0 +1,392 @@ +#!/bin/bash + +# Copyright 2018 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# TCP benchmark; see README.md for documentation. + +# Fixed parameters. +iperf_port=45201 # Not likely to be privileged. +proxy_port=44000 # Ditto. +client_addr=10.0.0.1 +client_proxy_addr=10.0.0.2 +server_proxy_addr=10.0.0.3 +server_addr=10.0.0.4 +mask=8 + +# Defaults; this provides a reasonable approximation of a decent internet link. +# Parameters can be varied independently from this set to see response to +# various changes in the kind of link available. +client=false +server=false +verbose=false +gso=0 +swgso=false +mtu=1280 # 1280 is a reasonable lowest-common-denominator. +latency=10 # 10ms approximates a fast, dedicated connection. +latency_variation=1 # +/- 1ms is a relatively low amount of jitter. +loss=0.1 # 0.1% loss is non-zero, but not extremely high. +duplicate=0.1 # 0.1% means duplicates are 1/10x as frequent as losses. +duration=30 # 30s is enough time to consistent results (experimentally). +helper_dir=$(dirname $0) +netstack_opts= +disable_linux_gso= +num_client_threads=1 + +# Check for netem support. +lsmod_output=$(lsmod | grep sch_netem) +if [ "$?" != "0" ]; then + echo "warning: sch_netem may not be installed." >&2 +fi + +while [ $# -gt 0 ]; do + case "$1" in + --client) + client=true + ;; + --client_tcp_probe_file) + shift + netstack_opts="${netstack_opts} -client_tcp_probe_file=$1" + ;; + --server) + server=true + ;; + --verbose) + verbose=true + ;; + --gso) + shift + gso=$1 + ;; + --swgso) + swgso=true + ;; + --server_tcp_probe_file) + shift + netstack_opts="${netstack_opts} -server_tcp_probe_file=$1" + ;; + --ideal) + mtu=1500 # Standard ethernet. + latency=0 # No latency. + latency_variation=0 # No jitter. + loss=0 # No loss. + duplicate=0 # No duplicates. + ;; + --mtu) + shift + [ "$#" -le 0 ] && echo "no mtu provided" && exit 1 + mtu=$1 + ;; + --sack) + netstack_opts="${netstack_opts} -sack" + ;; + --cubic) + netstack_opts="${netstack_opts} -cubic" + ;; + --moderate-recv-buf) + netstack_opts="${netstack_opts} -moderate_recv_buf" + ;; + --duration) + shift + [ "$#" -le 0 ] && echo "no duration provided" && exit 1 + duration=$1 + ;; + --latency) + shift + [ "$#" -le 0 ] && echo "no latency provided" && exit 1 + latency=$1 + ;; + --latency-variation) + shift + [ "$#" -le 0 ] && echo "no latency variation provided" && exit 1 + latency_variation=$1 + ;; + --loss) + shift + [ "$#" -le 0 ] && echo "no loss probability provided" && exit 1 + loss=$1 + ;; + --duplicate) + shift + [ "$#" -le 0 ] && echo "no duplicate provided" && exit 1 + duplicate=$1 + ;; + --cpuprofile) + shift + netstack_opts="${netstack_opts} -cpuprofile=$1" + ;; + --memprofile) + shift + netstack_opts="${netstack_opts} -memprofile=$1" + ;; + --disable-linux-gso) + disable_linux_gso=1 + ;; + --num-client-threads) + shift + num_client_threads=$1 + ;; + --helpers) + shift + [ "$#" -le 0 ] && echo "no helper dir provided" && exit 1 + helper_dir=$1 + ;; + *) + echo "usage: $0 [options]" + echo "options:" + echo " --help show this message" + echo " --verbose verbose output" + echo " --client use netstack as the client" + echo " --ideal reset all network emulation" + echo " --server use netstack as the server" + echo " --mtu set the mtu (bytes)" + echo " --sack enable SACK support" + echo " --moderate-recv-buf enable TCP receive buffer auto-tuning" + echo " --cubic enable CUBIC congestion control for Netstack" + echo " --duration set the test duration (s)" + echo " --latency set the latency (ms)" + echo " --latency-variation set the latency variation" + echo " --loss set the loss probability (%)" + echo " --duplicate set the duplicate probability (%)" + echo " --helpers set the helper directory" + echo " --num-client-threads number of parallel client threads to run" + echo " --disable-linux-gso disable segmentation offload in the Linux network stack" + echo "" + echo "The output will of the script will be:" + echo " " + exit 1 + esac + shift +done + +if [ ${verbose} == "true" ]; then + set -x +fi + +# Latency needs to be halved, since it's applied on both ways. +half_latency=$(echo ${latency}/2 | bc -l | awk '{printf "%1.2f", $0}') +half_loss=$(echo ${loss}/2 | bc -l | awk '{printf "%1.6f", $0}') +half_duplicate=$(echo ${duplicate}/2 | bc -l | awk '{printf "%1.6f", $0}') +helper_dir=${helper_dir#$(pwd)/} # Use relative paths. +proxy_binary=${helper_dir}/tcp_proxy +nsjoin_binary=${helper_dir}/nsjoin + +if [ ! -e ${proxy_binary} ]; then + echo "Could not locate ${proxy_binary}, please make sure you've built the binary" + exit 1 +fi + +if [ ! -e ${nsjoin_binary} ]; then + echo "Could not locate ${nsjoin_binary}, please make sure you've built the binary" + exit 1 +fi + +if [ $(echo ${latency_variation} | awk '{printf "%1.2f", $0}') != "0.00" ]; then + # As long as there's some jitter, then we use the paretonormal distribution. + # This will preserve the minimum RTT, but add a realistic amount of jitter to + # the connection and cause re-ordering, etc. The regular pareto distribution + # appears to an unreasonable level of delay (we want only small spikes.) + distribution="distribution paretonormal" +else + distribution="" +fi + +# Client proxy that will listen on the client's iperf target forward traffic +# using the host networking stack. +client_args="${proxy_binary} -port ${proxy_port} -forward ${server_proxy_addr}:${proxy_port}" +if ${client}; then + # Client proxy that will listen on the client's iperf target + # and forward traffic using netstack. + client_args="${proxy_binary} ${netstack_opts} -port ${proxy_port} -client \\ + -mtu ${mtu} -iface client.0 -addr ${client_proxy_addr} -mask ${mask} \\ + -forward ${server_proxy_addr}:${proxy_port} -gso=${gso} -swgso=${swgso}" +fi + +# Server proxy that will listen on the proxy port and forward to the server's +# iperf server using the host networking stack. +server_args="${proxy_binary} -port ${proxy_port} -forward ${server_addr}:${iperf_port}" +if ${server}; then + # Server proxy that will listen on the proxy port and forward to the servers' + # iperf server using netstack. + server_args="${proxy_binary} ${netstack_opts} -port ${proxy_port} -server \\ + -mtu ${mtu} -iface server.0 -addr ${server_proxy_addr} -mask ${mask} \\ + -forward ${server_addr}:${iperf_port} -gso=${gso} -swgso=${swgso}" +fi + +# Specify loss and duplicate parameters only if they are non-zero +loss_opt="" +if [ "$(echo $half_loss | bc -q)" != "0" ]; then + loss_opt="loss random ${half_loss}%" +fi +duplicate_opt="" +if [ "$(echo $half_duplicate | bc -q)" != "0" ]; then + duplicate_opt="duplicate ${half_duplicate}%" +fi + +exec unshare -U -m -n -r -f -p --mount-proc /bin/bash << EOF +set -e -m + +if [ ${verbose} == "true" ]; then + set -x +fi + +mount -t tmpfs netstack-bench /tmp + +# We may have reset the path in the unshare if the shell loaded some public +# profiles. Ensure that tools are discoverable via the parent's PATH. +export PATH=${PATH} + +# Add client, server interfaces. +ip link add client.0 type veth peer name client.1 +ip link add server.0 type veth peer name server.1 + +# Add network emulation devices. +ip link add wan.0 type veth peer name wan.1 +ip link set wan.0 up +ip link set wan.1 up + +# Enroll on the bridge. +ip link add name br0 type bridge +ip link add name br1 type bridge +ip link set client.1 master br0 +ip link set server.1 master br1 +ip link set wan.0 master br0 +ip link set wan.1 master br1 +ip link set br0 up +ip link set br1 up + +# Set the MTU appropriately. +ip link set client.0 mtu ${mtu} +ip link set server.0 mtu ${mtu} +ip link set wan.0 mtu ${mtu} +ip link set wan.1 mtu ${mtu} + +# Add appropriate latency, loss and duplication. +# +# This is added in at the point of bridge connection. +for device in wan.0 wan.1; do + # NOTE: We don't support a loss correlation as testing has shown that it + # actually doesn't work. The man page actually has a small comment about this + # "It is also possible to add a correlation, but this option is now deprecated + # due to the noticed bad behavior." For more information see netem(8). + tc qdisc add dev \$device root netem \\ + delay ${half_latency}ms ${latency_variation}ms ${distribution} \\ + ${loss_opt} ${duplicate_opt} +done + +# Start a client proxy. +touch /tmp/client.netns +unshare -n mount --bind /proc/self/ns/net /tmp/client.netns + +# Move the endpoint into the namespace. +while ip link | grep client.0 > /dev/null; do + ip link set dev client.0 netns /tmp/client.netns +done + +if ! ${client}; then + # Only add the address to NIC if netstack is not in use. Otherwise the host + # will also process the inbound SYN and send a RST back. + ${nsjoin_binary} /tmp/client.netns ip addr add ${client_proxy_addr}/${mask} dev client.0 +fi + +# Start a server proxy. +touch /tmp/server.netns +unshare -n mount --bind /proc/self/ns/net /tmp/server.netns +# Move the endpoint into the namespace. +while ip link | grep server.0 > /dev/null; do + ip link set dev server.0 netns /tmp/server.netns +done +if ! ${server}; then + # Only add the address to NIC if netstack is not in use. Otherwise the host + # will also process the inbound SYN and send a RST back. + ${nsjoin_binary} /tmp/server.netns ip addr add ${server_proxy_addr}/${mask} dev server.0 +fi + +# Add client and server addresses, and bring everything up. +${nsjoin_binary} /tmp/client.netns ip addr add ${client_addr}/${mask} dev client.0 +${nsjoin_binary} /tmp/server.netns ip addr add ${server_addr}/${mask} dev server.0 +if [ "${disable_linux_gso}" == "1" ]; then + ${nsjoin_binary} /tmp/client.netns ethtool -K client.0 tso off + ${nsjoin_binary} /tmp/client.netns ethtool -K client.0 gro off + ${nsjoin_binary} /tmp/client.netns ethtool -K client.0 gso off + ${nsjoin_binary} /tmp/server.netns ethtool -K server.0 tso off + ${nsjoin_binary} /tmp/server.netns ethtool -K server.0 gso off + ${nsjoin_binary} /tmp/server.netns ethtool -K server.0 gro off +fi +${nsjoin_binary} /tmp/client.netns ip link set client.0 up +${nsjoin_binary} /tmp/client.netns ip link set lo up +${nsjoin_binary} /tmp/server.netns ip link set server.0 up +${nsjoin_binary} /tmp/server.netns ip link set lo up +ip link set dev client.1 up +ip link set dev server.1 up + +${nsjoin_binary} /tmp/client.netns ${client_args} & +client_pid=\$! +${nsjoin_binary} /tmp/server.netns ${server_args} & +server_pid=\$! + +# Start the iperf server. +${nsjoin_binary} /tmp/server.netns iperf -p ${iperf_port} -s >&2 & +iperf_pid=\$! + +# Show traffic information. +if ! ${client} && ! ${server}; then + ${nsjoin_binary} /tmp/client.netns ping -c 100 -i 0.001 -W 1 ${server_addr} >&2 || true +fi + +results_file=\$(mktemp) +function cleanup { + rm -f \$results_file + kill -TERM \$client_pid + kill -TERM \$server_pid + wait \$client_pid + wait \$server_pid + kill -9 \$iperf_pid 2>/dev/null +} + +# Allow failure from this point. +set +e +trap cleanup EXIT + +# Run the benchmark, recording the results file. +while ${nsjoin_binary} /tmp/client.netns iperf \\ + -p ${proxy_port} -c ${client_addr} -t ${duration} -f m -P ${num_client_threads} 2>&1 \\ + | tee \$results_file \\ + | grep "connect failed" >/dev/null; do + sleep 0.1 # Wait for all services. +done + +# Unlink all relevant devices from the bridge. This is because when the bridge +# is deleted, the kernel may hang. It appears that this problem is fixed in +# upstream commit 1ce5cce895309862d2c35d922816adebe094fe4a. +ip link set client.1 nomaster +ip link set server.1 nomaster +ip link set wan.0 nomaster +ip link set wan.1 nomaster + +# Emit raw results. +cat \$results_file >&2 + +# Emit a useful result (final throughput). +mbits=\$(grep Mbits/sec \$results_file \\ + | sed -n -e 's/^.*[[:space:]]\\([[:digit:]]\\+\\(\\.[[:digit:]]\\+\\)\\?\\)[[:space:]]*Mbits\\/sec.*/\\1/p') +client_cpu_ticks=\$(cat /proc/\$client_pid/stat \\ + | awk '{print (\$14+\$15);}') +server_cpu_ticks=\$(cat /proc/\$server_pid/stat \\ + | awk '{print (\$14+\$15);}') +ticks_per_sec=\$(getconf CLK_TCK) +client_cpu_load=\$(bc -l <<< \$client_cpu_ticks/\$ticks_per_sec/${duration}) +server_cpu_load=\$(bc -l <<< \$server_cpu_ticks/\$ticks_per_sec/${duration}) +echo \$mbits \$client_cpu_load \$server_cpu_load +EOF diff --git a/test/benchmarks/tcp/tcp_proxy.go b/test/benchmarks/tcp/tcp_proxy.go new file mode 100644 index 000000000..4b7ca7a14 --- /dev/null +++ b/test/benchmarks/tcp/tcp_proxy.go @@ -0,0 +1,451 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Binary tcp_proxy is a simple TCP proxy. +package main + +import ( + "encoding/gob" + "flag" + "fmt" + "io" + "log" + "math/rand" + "net" + "os" + "os/signal" + "regexp" + "runtime" + "runtime/pprof" + "strconv" + "syscall" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/adapters/gonet" + "gvisor.dev/gvisor/pkg/tcpip/link/fdbased" + "gvisor.dev/gvisor/pkg/tcpip/link/qdisc/fifo" + "gvisor.dev/gvisor/pkg/tcpip/network/arp" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" + "gvisor.dev/gvisor/pkg/tcpip/transport/udp" +) + +var ( + port = flag.Int("port", 0, "bind port (all addresses)") + forward = flag.String("forward", "", "forwarding target") + client = flag.Bool("client", false, "use netstack for listen") + server = flag.Bool("server", false, "use netstack for dial") + + // Netstack-specific options. + mtu = flag.Int("mtu", 1280, "mtu for network stack") + addr = flag.String("addr", "", "address for tap-based netstack") + mask = flag.Int("mask", 8, "mask size for address") + iface = flag.String("iface", "", "network interface name to bind for netstack") + sack = flag.Bool("sack", false, "enable SACK support for netstack") + moderateRecvBuf = flag.Bool("moderate_recv_buf", false, "enable TCP Receive Buffer Auto-tuning") + cubic = flag.Bool("cubic", false, "enable use of CUBIC congestion control for netstack") + gso = flag.Int("gso", 0, "GSO maximum size") + swgso = flag.Bool("swgso", false, "software-level GSO") + clientTCPProbeFile = flag.String("client_tcp_probe_file", "", "if specified, installs a tcp probe to dump endpoint state to the specified file.") + serverTCPProbeFile = flag.String("server_tcp_probe_file", "", "if specified, installs a tcp probe to dump endpoint state to the specified file.") + cpuprofile = flag.String("cpuprofile", "", "write cpu profile to the specified file.") + memprofile = flag.String("memprofile", "", "write memory profile to the specified file.") +) + +type impl interface { + dial(address string) (net.Conn, error) + listen(port int) (net.Listener, error) + printStats() +} + +type netImpl struct{} + +func (netImpl) dial(address string) (net.Conn, error) { + return net.Dial("tcp", address) +} + +func (netImpl) listen(port int) (net.Listener, error) { + return net.Listen("tcp", fmt.Sprintf(":%d", port)) +} + +func (netImpl) printStats() { +} + +const ( + nicID = 1 // Fixed. + bufSize = 4 << 20 // 4MB. +) + +type netstackImpl struct { + s *stack.Stack + addr tcpip.Address + mode string +} + +func setupNetwork(ifaceName string, numChannels int) (fds []int, err error) { + // Get all interfaces in the namespace. + ifaces, err := net.Interfaces() + if err != nil { + return nil, fmt.Errorf("querying interfaces: %v", err) + } + + for _, iface := range ifaces { + if iface.Name != ifaceName { + continue + } + // Create the socket. + const protocol = 0x0300 // htons(ETH_P_ALL) + fds := make([]int, numChannels) + for i := range fds { + fd, err := syscall.Socket(syscall.AF_PACKET, syscall.SOCK_RAW, protocol) + if err != nil { + return nil, fmt.Errorf("unable to create raw socket: %v", err) + } + + // Bind to the appropriate device. + ll := syscall.SockaddrLinklayer{ + Protocol: protocol, + Ifindex: iface.Index, + Pkttype: syscall.PACKET_HOST, + } + if err := syscall.Bind(fd, &ll); err != nil { + return nil, fmt.Errorf("unable to bind to %q: %v", iface.Name, err) + } + + // RAW Sockets by default have a very small SO_RCVBUF of 256KB, + // up it to at least 4MB to reduce packet drops. + if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUF, bufSize); err != nil { + return nil, fmt.Errorf("setsockopt(..., SO_RCVBUF, %v,..) = %v", bufSize, err) + } + + if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_SNDBUF, bufSize); err != nil { + return nil, fmt.Errorf("setsockopt(..., SO_SNDBUF, %v,..) = %v", bufSize, err) + } + + if !*swgso && *gso != 0 { + if err := syscall.SetsockoptInt(fd, syscall.SOL_PACKET, unix.PACKET_VNET_HDR, 1); err != nil { + return nil, fmt.Errorf("unable to enable the PACKET_VNET_HDR option: %v", err) + } + } + fds[i] = fd + } + return fds, nil + } + return nil, fmt.Errorf("failed to find interface: %v", ifaceName) +} + +func newNetstackImpl(mode string) (impl, error) { + fds, err := setupNetwork(*iface, runtime.GOMAXPROCS(-1)) + if err != nil { + return nil, err + } + + // Parse details. + parsedAddr := tcpip.Address(net.ParseIP(*addr).To4()) + parsedDest := tcpip.Address("") // Filled in below. + parsedMask := tcpip.AddressMask("") // Filled in below. + switch *mask { + case 8: + parsedDest = tcpip.Address([]byte{parsedAddr[0], 0, 0, 0}) + parsedMask = tcpip.AddressMask([]byte{0xff, 0, 0, 0}) + case 16: + parsedDest = tcpip.Address([]byte{parsedAddr[0], parsedAddr[1], 0, 0}) + parsedMask = tcpip.AddressMask([]byte{0xff, 0xff, 0, 0}) + case 24: + parsedDest = tcpip.Address([]byte{parsedAddr[0], parsedAddr[1], parsedAddr[2], 0}) + parsedMask = tcpip.AddressMask([]byte{0xff, 0xff, 0xff, 0}) + default: + // This is just laziness; we don't expect a different mask. + return nil, fmt.Errorf("mask %d not supported", mask) + } + + // Create a new network stack. + netProtos := []stack.NetworkProtocol{ipv4.NewProtocol(), arp.NewProtocol()} + transProtos := []stack.TransportProtocol{tcp.NewProtocol(), udp.NewProtocol()} + s := stack.New(stack.Options{ + NetworkProtocols: netProtos, + TransportProtocols: transProtos, + }) + + // Generate a new mac for the eth device. + mac := make(net.HardwareAddr, 6) + rand.Read(mac) // Fill with random data. + mac[0] &^= 0x1 // Clear multicast bit. + mac[0] |= 0x2 // Set local assignment bit (IEEE802). + ep, err := fdbased.New(&fdbased.Options{ + FDs: fds, + MTU: uint32(*mtu), + EthernetHeader: true, + Address: tcpip.LinkAddress(mac), + // Enable checksum generation as we need to generate valid + // checksums for the veth device to deliver our packets to the + // peer. But we do want to disable checksum verification as veth + // devices do perform GRO and the linux host kernel may not + // regenerate valid checksums after GRO. + TXChecksumOffload: false, + RXChecksumOffload: true, + PacketDispatchMode: fdbased.RecvMMsg, + GSOMaxSize: uint32(*gso), + SoftwareGSOEnabled: *swgso, + }) + if err != nil { + return nil, fmt.Errorf("failed to create FD endpoint: %v", err) + } + if err := s.CreateNIC(nicID, fifo.New(ep, runtime.GOMAXPROCS(0), 1000)); err != nil { + return nil, fmt.Errorf("error creating NIC %q: %v", *iface, err) + } + if err := s.AddAddress(nicID, arp.ProtocolNumber, arp.ProtocolAddress); err != nil { + return nil, fmt.Errorf("error adding ARP address to %q: %v", *iface, err) + } + if err := s.AddAddress(nicID, ipv4.ProtocolNumber, parsedAddr); err != nil { + return nil, fmt.Errorf("error adding IP address to %q: %v", *iface, err) + } + + subnet, err := tcpip.NewSubnet(parsedDest, parsedMask) + if err != nil { + return nil, fmt.Errorf("tcpip.Subnet(%s, %s): %s", parsedDest, parsedMask, err) + } + // Add default route; we only support + s.SetRouteTable([]tcpip.Route{ + { + Destination: subnet, + NIC: nicID, + }, + }) + + // Set protocol options. + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(*sack)); err != nil { + return nil, fmt.Errorf("SetTransportProtocolOption for SACKEnabled failed: %s", err) + } + + // Enable Receive Buffer Auto-Tuning. + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(*moderateRecvBuf)); err != nil { + return nil, fmt.Errorf("SetTransportProtocolOption failed: %s", err) + } + + // Set Congestion Control to cubic if requested. + if *cubic { + if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.CongestionControlOption("cubic")); err != nil { + return nil, fmt.Errorf("SetTransportProtocolOption for CongestionControlOption(cubic) failed: %s", err) + } + } + + return netstackImpl{ + s: s, + addr: parsedAddr, + mode: mode, + }, nil +} + +func (n netstackImpl) dial(address string) (net.Conn, error) { + host, port, err := net.SplitHostPort(address) + if err != nil { + return nil, err + } + if host == "" { + // A host must be provided for the dial. + return nil, fmt.Errorf("no host provided") + } + portNumber, err := strconv.Atoi(port) + if err != nil { + return nil, err + } + addr := tcpip.FullAddress{ + NIC: nicID, + Addr: tcpip.Address(net.ParseIP(host).To4()), + Port: uint16(portNumber), + } + conn, err := gonet.DialTCP(n.s, addr, ipv4.ProtocolNumber) + if err != nil { + return nil, err + } + return conn, nil +} + +func (n netstackImpl) listen(port int) (net.Listener, error) { + addr := tcpip.FullAddress{ + NIC: nicID, + Port: uint16(port), + } + listener, err := gonet.ListenTCP(n.s, addr, ipv4.ProtocolNumber) + if err != nil { + return nil, err + } + return listener, nil +} + +var zeroFieldsRegexp = regexp.MustCompile(`\s*[a-zA-Z0-9]*:0`) + +func (n netstackImpl) printStats() { + // Don't show zero fields. + stats := zeroFieldsRegexp.ReplaceAllString(fmt.Sprintf("%+v", n.s.Stats()), "") + log.Printf("netstack %s Stats: %+v\n", n.mode, stats) +} + +// installProbe installs a TCP Probe function that will dump endpoint +// state to the specified file. It also returns a close func() that +// can be used to close the probeFile. +func (n netstackImpl) installProbe(probeFileName string) (close func()) { + // Install Probe to dump out end point state. + probeFile, err := os.Create(probeFileName) + if err != nil { + log.Fatalf("failed to create tcp_probe file %s: %v", probeFileName, err) + } + probeEncoder := gob.NewEncoder(probeFile) + // Install a TCP Probe. + n.s.AddTCPProbe(func(state stack.TCPEndpointState) { + probeEncoder.Encode(state) + }) + return func() { probeFile.Close() } +} + +func main() { + flag.Parse() + if *port == 0 { + log.Fatalf("no port provided") + } + if *forward == "" { + log.Fatalf("no forward provided") + } + // Seed the random number generator to ensure that we are given MAC addresses that don't + // for the case of the client and server stack. + rand.Seed(time.Now().UTC().UnixNano()) + + if *cpuprofile != "" { + f, err := os.Create(*cpuprofile) + if err != nil { + log.Fatal("could not create CPU profile: ", err) + } + defer func() { + if err := f.Close(); err != nil { + log.Print("error closing CPU profile: ", err) + } + }() + if err := pprof.StartCPUProfile(f); err != nil { + log.Fatal("could not start CPU profile: ", err) + } + defer pprof.StopCPUProfile() + } + + var ( + in impl + out impl + err error + ) + if *server { + in, err = newNetstackImpl("server") + if *serverTCPProbeFile != "" { + defer in.(netstackImpl).installProbe(*serverTCPProbeFile)() + } + + } else { + in = netImpl{} + } + if err != nil { + log.Fatalf("netstack error: %v", err) + } + if *client { + out, err = newNetstackImpl("client") + if *clientTCPProbeFile != "" { + defer out.(netstackImpl).installProbe(*clientTCPProbeFile)() + } + } else { + out = netImpl{} + } + if err != nil { + log.Fatalf("netstack error: %v", err) + } + + // Dial forward before binding. + var next net.Conn + for { + next, err = out.dial(*forward) + if err == nil { + break + } + time.Sleep(50 * time.Millisecond) + log.Printf("connect failed retrying: %v", err) + } + + // Bind once to the server socket. + listener, err := in.listen(*port) + if err != nil { + // Should not happen, everything must be bound by this time + // this proxy is started. + log.Fatalf("unable to listen: %v", err) + } + log.Printf("client=%v, server=%v, ready.", *client, *server) + + sigs := make(chan os.Signal, 1) + signal.Notify(sigs, syscall.SIGTERM) + go func() { + <-sigs + if *cpuprofile != "" { + pprof.StopCPUProfile() + } + if *memprofile != "" { + f, err := os.Create(*memprofile) + if err != nil { + log.Fatal("could not create memory profile: ", err) + } + defer func() { + if err := f.Close(); err != nil { + log.Print("error closing memory profile: ", err) + } + }() + runtime.GC() // get up-to-date statistics + if err := pprof.WriteHeapProfile(f); err != nil { + log.Fatalf("Unable to write heap profile: %v", err) + } + } + os.Exit(0) + }() + + for { + // Forward all connections. + inConn, err := listener.Accept() + if err != nil { + // This should not happen; we are listening + // successfully. Exhausted all available FDs? + log.Fatalf("accept error: %v", err) + } + log.Printf("incoming connection established.") + + // Copy both ways. + go io.Copy(inConn, next) + go io.Copy(next, inConn) + + // Print stats every second. + go func() { + t := time.NewTicker(time.Second) + defer t.Stop() + for { + <-t.C + in.printStats() + out.printStats() + } + }() + + for { + // Dial again. + next, err = out.dial(*forward) + if err == nil { + break + } + } + } +} diff --git a/tools/bazeldefs/defs.bzl b/tools/bazeldefs/defs.bzl index d0096dcec..db7f379b8 100644 --- a/tools/bazeldefs/defs.bzl +++ b/tools/bazeldefs/defs.bzl @@ -8,7 +8,6 @@ load("@io_bazel_rules_go//go:def.bzl", "GoLibrary", _go_binary = "go_binary", _g load("@io_bazel_rules_go//proto:def.bzl", _go_grpc_library = "go_grpc_library", _go_proto_library = "go_proto_library") load("@rules_cc//cc:defs.bzl", _cc_binary = "cc_binary", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test") load("@rules_pkg//:pkg.bzl", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar") -load("@pydeps//:requirements.bzl", _py_requirement = "requirement") load("@com_github_grpc_grpc//bazel:cc_grpc_library.bzl", _cc_grpc_library = "cc_grpc_library") build_test = _build_test @@ -27,9 +26,7 @@ gbenchmark = "@com_google_benchmark//:benchmark" loopback = "//tools/bazeldefs:loopback" pkg_deb = _pkg_deb pkg_tar = _pkg_tar -py_library = native.py_library py_binary = native.py_binary -py_test = native.py_test rbe_platform = native.platform rbe_toolchain = native.toolchain vdso_linker_option = "-fuse-ld=gold " @@ -165,9 +162,6 @@ def go_context(ctx): tags = go_ctx.tags, ) -def py_requirement(name, direct = True): - return _py_requirement(name) - def select_arch(amd64 = "amd64", arm64 = "arm64", default = None, **kwargs): values = { "@bazel_tools//src/conditions:linux_x86_64": amd64, diff --git a/tools/defs.bzl b/tools/defs.bzl index b64d735e6..e71a26cf4 100644 --- a/tools/defs.bzl +++ b/tools/defs.bzl @@ -7,7 +7,7 @@ change for Google-internal and bazel-compatible rules. load("//tools/go_stateify:defs.bzl", "go_stateify") load("//tools/go_marshal:defs.bzl", "go_marshal", "marshal_deps", "marshal_test_deps") -load("//tools/bazeldefs:defs.bzl", _build_test = "build_test", _bzl_library = "bzl_library", _cc_binary = "cc_binary", _cc_flags_supplier = "cc_flags_supplier", _cc_grpc_library = "cc_grpc_library", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test", _cc_toolchain = "cc_toolchain", _default_installer = "default_installer", _default_net_util = "default_net_util", _gazelle = "gazelle", _gbenchmark = "gbenchmark", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_grpc_and_proto_libraries = "go_grpc_and_proto_libraries", _go_library = "go_library", _go_path = "go_path", _go_proto_library = "go_proto_library", _go_test = "go_test", _grpcpp = "grpcpp", _gtest = "gtest", _loopback = "loopback", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar", _proto_library = "proto_library", _py_binary = "py_binary", _py_library = "py_library", _py_requirement = "py_requirement", _py_test = "py_test", _rbe_platform = "rbe_platform", _rbe_toolchain = "rbe_toolchain", _select_arch = "select_arch", _select_system = "select_system", _short_path = "short_path", _vdso_linker_option = "vdso_linker_option") +load("//tools/bazeldefs:defs.bzl", _build_test = "build_test", _bzl_library = "bzl_library", _cc_binary = "cc_binary", _cc_flags_supplier = "cc_flags_supplier", _cc_grpc_library = "cc_grpc_library", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test", _cc_toolchain = "cc_toolchain", _default_installer = "default_installer", _default_net_util = "default_net_util", _gazelle = "gazelle", _gbenchmark = "gbenchmark", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_grpc_and_proto_libraries = "go_grpc_and_proto_libraries", _go_library = "go_library", _go_path = "go_path", _go_proto_library = "go_proto_library", _go_test = "go_test", _grpcpp = "grpcpp", _gtest = "gtest", _loopback = "loopback", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar", _proto_library = "proto_library", _py_binary = "py_binary", _rbe_platform = "rbe_platform", _rbe_toolchain = "rbe_toolchain", _select_arch = "select_arch", _select_system = "select_system", _short_path = "short_path", _vdso_linker_option = "vdso_linker_option") load("//tools/bazeldefs:platforms.bzl", _default_platform = "default_platform", _platforms = "platforms") load("//tools/bazeldefs:tags.bzl", "go_suffixes") load("//tools/nogo:defs.bzl", "nogo_test") @@ -34,9 +34,6 @@ loopback = _loopback pkg_deb = _pkg_deb pkg_tar = _pkg_tar py_binary = _py_binary -py_library = _py_library -py_requirement = _py_requirement -py_test = _py_test select_arch = _select_arch select_system = _select_system short_path = _short_path diff --git a/website/blog/2020-04-02-networking-security.md b/website/blog/2020-04-02-networking-security.md index 5a5e38fd7..f3ce02d11 100644 --- a/website/blog/2020-04-02-networking-security.md +++ b/website/blog/2020-04-02-networking-security.md @@ -108,7 +108,7 @@ re-architecting the TCP implementation to use fewer goroutines. Performance today is good enough for most applications and we are making steady improvements. For example, since May of 2019, we have improved the Netstack runsc -[iperf3 download benchmark](https://github.com/google/gvisor/blob/master/benchmarks/suites/network.py) +[iperf3 download benchmark](https://github.com/google/gvisor/tree/master/test/benchmarks/network) score by roughly 15% and upload score by around 10,000X. Current numbers are about 17 Gbps download and about 8 Gbps upload versus about 42 Gbps and 43 Gbps for native (Linux) respectively. diff --git a/website/performance/README.md b/website/performance/README.md index 0dbfd2f02..1758fc608 100644 --- a/website/performance/README.md +++ b/website/performance/README.md @@ -1,9 +1,10 @@ # Performance data -This directory holds the CSVs generated by the -[benchmark-tools][benchmark-tools] repository. +This directory holds the CSVs generated by the now removed benchmark-tools +repository. The new functionally equivalent +[benchmark-tools is available.][benchmark-tools] In the future, these will be automatically posted to a cloud storage bucket and loaded dynamically. At that point, this directory will be removed. -[benchmark-tools]: https://github.com/google/gvisor/tree/master/benchmarks +[benchmark-tools]: https://github.com/google/gvisor/tree/master/test/benchmarks -- cgit v1.2.3 From a88cf5a2e18869acb636f696700ecce2781be818 Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Mon, 10 Aug 2020 14:51:07 -0700 Subject: Add benchmarks to continuous build. PiperOrigin-RevId: 325892974 --- BUILD | 6 ++++++ test/benchmarks/base/BUILD | 1 + test/benchmarks/database/BUILD | 1 + test/benchmarks/fs/BUILD | 1 + test/benchmarks/media/BUILD | 1 + test/benchmarks/ml/BUILD | 1 + test/benchmarks/network/BUILD | 1 + 7 files changed, 12 insertions(+) (limited to 'test') diff --git a/BUILD b/BUILD index f3d0eec4e..2639f8169 100644 --- a/BUILD +++ b/BUILD @@ -57,6 +57,12 @@ build_test( "//test/e2e:integration_test", "//test/image:image_test", "//test/root:root_test", + "//test/benchmarks/base:base_test", + "//test/benchmarks/database:database_test", + "//test/benchmarks/fs:fs_test", + "//test/benchmarks/media:media_test", + "//test/benchmarks/ml:ml_test", + "//test/benchmarks/network:network_test", ], ) diff --git a/test/benchmarks/base/BUILD b/test/benchmarks/base/BUILD index 5e099d0f9..32c139204 100644 --- a/test/benchmarks/base/BUILD +++ b/test/benchmarks/base/BUILD @@ -25,6 +25,7 @@ go_test( "manual", "local", ], + visibility = ["//:sandbox"], deps = [ "//pkg/test/dockerutil", "//test/benchmarks/harness", diff --git a/test/benchmarks/database/BUILD b/test/benchmarks/database/BUILD index 6139f6e8a..93b380e8a 100644 --- a/test/benchmarks/database/BUILD +++ b/test/benchmarks/database/BUILD @@ -19,6 +19,7 @@ go_test( "manual", "local", ], + visibility = ["//:sandbox"], deps = [ "//pkg/test/dockerutil", "//test/benchmarks/harness", diff --git a/test/benchmarks/fs/BUILD b/test/benchmarks/fs/BUILD index 20654d88f..45f11372b 100644 --- a/test/benchmarks/fs/BUILD +++ b/test/benchmarks/fs/BUILD @@ -22,6 +22,7 @@ go_test( "local", "manual", ], + visibility = ["//:sandbox"], deps = [ "//pkg/test/dockerutil", "//test/benchmarks/harness", diff --git a/test/benchmarks/media/BUILD b/test/benchmarks/media/BUILD index 6c41fc4f6..bb242d385 100644 --- a/test/benchmarks/media/BUILD +++ b/test/benchmarks/media/BUILD @@ -14,6 +14,7 @@ go_test( size = "large", srcs = ["ffmpeg_test.go"], library = ":media", + visibility = ["//:sandbox"], deps = [ "//pkg/test/dockerutil", "//test/benchmarks/harness", diff --git a/test/benchmarks/ml/BUILD b/test/benchmarks/ml/BUILD index 2430b60a7..970f52706 100644 --- a/test/benchmarks/ml/BUILD +++ b/test/benchmarks/ml/BUILD @@ -14,6 +14,7 @@ go_test( size = "large", srcs = ["tensorflow_test.go"], library = ":ml", + visibility = ["//:sandbox"], deps = [ "//pkg/test/dockerutil", "//test/benchmarks/harness", diff --git a/test/benchmarks/network/BUILD b/test/benchmarks/network/BUILD index df5ff7265..bd3f6245c 100644 --- a/test/benchmarks/network/BUILD +++ b/test/benchmarks/network/BUILD @@ -25,6 +25,7 @@ go_test( "manual", "local", ], + visibility = ["//:sandbox"], deps = [ "//pkg/test/dockerutil", "//pkg/test/testutil", -- cgit v1.2.3 From a1af46c20ade5566ca8c2583f10c17765a5dc3ad Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Mon, 10 Aug 2020 15:48:24 -0700 Subject: Enable VFS2 by default for all syscall tests. Fixes #2923 PiperOrigin-RevId: 325904734 --- test/runner/defs.bzl | 2 +- test/syscalls/BUILD | 192 --------------------------------------------------- 2 files changed, 1 insertion(+), 193 deletions(-) (limited to 'test') diff --git a/test/runner/defs.bzl b/test/runner/defs.bzl index ba4732ca6..fa96b4ea2 100644 --- a/test/runner/defs.bzl +++ b/test/runner/defs.bzl @@ -132,7 +132,7 @@ def syscall_test( add_overlay = False, add_uds_tree = False, add_hostinet = False, - vfs2 = False, + vfs2 = True, fuse = False, tags = None): """syscall_test is a macro that will create targets for all platforms. diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index a31612b41..571785ad2 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -4,75 +4,62 @@ package(licenses = ["notice"]) syscall_test( test = "//test/syscalls/linux:32bit_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:accept_bind_stream_test", - vfs2 = "True", ) syscall_test( size = "large", shard_count = 50, test = "//test/syscalls/linux:accept_bind_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:access_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:affinity_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:aio_test", - vfs2 = "True", ) syscall_test( size = "medium", shard_count = 5, test = "//test/syscalls/linux:alarm_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:arch_prctl_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:bad_test", - vfs2 = "True", ) syscall_test( size = "large", add_overlay = True, test = "//test/syscalls/linux:bind_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:brk_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:socket_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:socket_capability_test", - vfs2 = "True", ) syscall_test( @@ -82,19 +69,16 @@ syscall_test( # involve much concurrency, TSAN's usefulness here is limited anyway. tags = ["nogotsan"], test = "//test/syscalls/linux:socket_stress_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:chdir_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:chmod_test", - vfs2 = "True", ) syscall_test( @@ -102,116 +86,96 @@ syscall_test( add_overlay = True, test = "//test/syscalls/linux:chown_test", use_tmpfs = True, # chwon tests require gofer to be running as root. - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:chroot_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:clock_getres_test", - vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:clock_gettime_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:clock_nanosleep_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:concurrency_test", - vfs2 = "True", ) syscall_test( add_uds_tree = True, test = "//test/syscalls/linux:connect_external_test", use_tmpfs = True, - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:creat_test", - vfs2 = "True", ) syscall_test( fuse = "True", test = "//test/syscalls/linux:dev_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:dup_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:epoll_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:eventfd_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:exceptions_test", - vfs2 = "True", ) syscall_test( size = "medium", add_overlay = True, test = "//test/syscalls/linux:exec_test", - vfs2 = "True", ) syscall_test( size = "medium", add_overlay = True, test = "//test/syscalls/linux:exec_binary_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:exit_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:fadvise64_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:fallocate_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:fault_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:fchdir_test", - vfs2 = "True", ) syscall_test( @@ -223,204 +187,168 @@ syscall_test( size = "medium", add_overlay = True, test = "//test/syscalls/linux:flock_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:fork_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:fpsig_fork_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:fpsig_nested_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:fsync_test", - vfs2 = "True", ) syscall_test( size = "medium", shard_count = 5, test = "//test/syscalls/linux:futex_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:getcpu_host_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:getcpu_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:getdents_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:getrandom_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:getrusage_test", - vfs2 = "True", ) syscall_test( size = "medium", add_overlay = False, # TODO(gvisor.dev/issue/317): enable when fixed. test = "//test/syscalls/linux:inotify_test", - vfs2 = "True", ) syscall_test( size = "medium", add_overlay = True, test = "//test/syscalls/linux:ioctl_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:iptables_test", - vfs2 = "True", ) syscall_test( size = "large", shard_count = 5, test = "//test/syscalls/linux:itimer_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:kill_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:link_test", use_tmpfs = True, # gofer needs CAP_DAC_READ_SEARCH to use AT_EMPTY_PATH with linkat(2) - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:lseek_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:madvise_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:memory_accounting_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:mempolicy_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:mincore_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:mkdir_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:mknod_test", - vfs2 = "True", ) syscall_test( size = "medium", shard_count = 5, test = "//test/syscalls/linux:mmap_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:mount_test", - vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:mremap_test", - vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:msync_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:munmap_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:network_namespace_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:open_create_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:open_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:packet_socket_raw_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:packet_socket_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:partial_bad_buffer_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:pause_test", - vfs2 = "True", ) syscall_test( @@ -428,7 +356,6 @@ syscall_test( # Takes too long under gotsan to run. tags = ["nogotsan"], test = "//test/syscalls/linux:ping_socket_test", - vfs2 = "True", ) syscall_test( @@ -436,229 +363,188 @@ syscall_test( add_overlay = True, shard_count = 5, test = "//test/syscalls/linux:pipe_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:poll_test", - vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:ppoll_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:prctl_setuid_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:prctl_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:pread64_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:preadv_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:preadv2_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:priority_test", - vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:proc_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:proc_net_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:proc_pid_oomscore_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:proc_pid_smaps_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:proc_pid_uid_gid_map_test", - vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:pselect_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:ptrace_test", - vfs2 = "True", ) syscall_test( size = "medium", shard_count = 5, test = "//test/syscalls/linux:pty_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:pty_root_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:pwritev2_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:pwrite64_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:raw_socket_hdrincl_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:raw_socket_icmp_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:raw_socket_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:read_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:readahead_test", - vfs2 = "True", ) syscall_test( size = "medium", shard_count = 5, test = "//test/syscalls/linux:readv_socket_test", - vfs2 = "True", ) syscall_test( size = "medium", add_overlay = True, test = "//test/syscalls/linux:readv_test", - vfs2 = "True", ) syscall_test( size = "medium", add_overlay = True, test = "//test/syscalls/linux:rename_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:rlimits_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:rseq_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:rtsignal_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:signalfd_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:sched_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:sched_yield_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:seccomp_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:select_test", - vfs2 = "True", ) syscall_test( shard_count = 20, test = "//test/syscalls/linux:semaphore_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:sendfile_socket_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:sendfile_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:splice_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:sigaction_test", - vfs2 = "True", ) # TODO(b/119826902): Enable once the test passes in runsc. @@ -666,62 +552,52 @@ syscall_test( syscall_test( test = "//test/syscalls/linux:sigiret_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:sigprocmask_test", - vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:sigstop_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:sigtimedwait_test", - vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:shm_test", - vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_abstract_non_blocking_test", - vfs2 = "True", ) syscall_test( size = "large", shard_count = 50, test = "//test/syscalls/linux:socket_abstract_test", - vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_domain_non_blocking_test", - vfs2 = "True", ) syscall_test( size = "large", shard_count = 50, test = "//test/syscalls/linux:socket_domain_test", - vfs2 = "True", ) syscall_test( size = "medium", add_overlay = True, test = "//test/syscalls/linux:socket_filesystem_non_blocking_test", - vfs2 = "True", ) syscall_test( @@ -729,14 +605,12 @@ syscall_test( add_overlay = True, shard_count = 50, test = "//test/syscalls/linux:socket_filesystem_test", - vfs2 = "True", ) syscall_test( size = "large", shard_count = 50, test = "//test/syscalls/linux:socket_inet_loopback_test", - vfs2 = "True", ) syscall_test( @@ -745,122 +619,101 @@ syscall_test( # Takes too long for TSAN. Creates a lot of TCP sockets. tags = ["nogotsan"], test = "//test/syscalls/linux:socket_inet_loopback_nogotsan_test", - vfs2 = "True", ) syscall_test( size = "large", shard_count = 50, test = "//test/syscalls/linux:socket_ip_tcp_generic_loopback_test", - vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_ip_tcp_loopback_non_blocking_test", - vfs2 = "True", ) syscall_test( size = "large", shard_count = 50, test = "//test/syscalls/linux:socket_ip_tcp_loopback_test", - vfs2 = "True", ) syscall_test( size = "medium", shard_count = 50, test = "//test/syscalls/linux:socket_ip_tcp_udp_generic_loopback_test", - vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_ip_udp_loopback_non_blocking_test", - vfs2 = "True", ) syscall_test( size = "large", shard_count = 50, test = "//test/syscalls/linux:socket_ip_udp_loopback_test", - vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_ipv4_udp_unbound_loopback_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:socket_ip_unbound_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:socket_netdevice_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:socket_netlink_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:socket_netlink_route_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:socket_netlink_uevent_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:socket_blocking_local_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:socket_blocking_ip_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:socket_non_stream_blocking_local_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:socket_non_stream_blocking_udp_test", - vfs2 = "True", ) syscall_test( size = "large", test = "//test/syscalls/linux:socket_stream_blocking_local_test", - vfs2 = "True", ) syscall_test( size = "large", test = "//test/syscalls/linux:socket_stream_blocking_tcp_test", - vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_stream_local_test", - vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_stream_nonblock_local_test", - vfs2 = "True", ) syscall_test( @@ -868,13 +721,11 @@ syscall_test( size = "enormous", shard_count = 5, test = "//test/syscalls/linux:socket_unix_dgram_local_test", - vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_unix_dgram_non_blocking_test", - vfs2 = "True", ) syscall_test( @@ -882,7 +733,6 @@ syscall_test( add_overlay = True, shard_count = 50, test = "//test/syscalls/linux:socket_unix_pair_test", - vfs2 = "True", ) syscall_test( @@ -890,156 +740,129 @@ syscall_test( size = "enormous", shard_count = 5, test = "//test/syscalls/linux:socket_unix_seqpacket_local_test", - vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_unix_stream_test", - vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_unix_unbound_abstract_test", - vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_unix_unbound_dgram_test", - vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_unix_unbound_filesystem_test", - vfs2 = "True", ) syscall_test( size = "medium", shard_count = 10, test = "//test/syscalls/linux:socket_unix_unbound_seqpacket_test", - vfs2 = "True", ) syscall_test( size = "large", shard_count = 50, test = "//test/syscalls/linux:socket_unix_unbound_stream_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:statfs_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:stat_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:stat_times_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:sticky_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:symlink_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:sync_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:sync_file_range_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:sysinfo_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:syslog_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:sysret_test", - vfs2 = "True", ) syscall_test( size = "medium", shard_count = 10, test = "//test/syscalls/linux:tcp_socket_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:tgkill_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:timerfd_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:timers_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:time_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:tkill_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:truncate_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:tuntap_test", - vfs2 = "True", ) syscall_test( add_hostinet = True, test = "//test/syscalls/linux:tuntap_hostinet_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:udp_bind_test", - vfs2 = "True", ) syscall_test( @@ -1047,80 +870,65 @@ syscall_test( add_hostinet = True, shard_count = 10, test = "//test/syscalls/linux:udp_socket_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:uidgid_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:uname_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:unlink_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:unshare_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:utimes_test", - vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:vdso_clock_gettime_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:vdso_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:vsyscall_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:vfork_test", - vfs2 = "True", ) syscall_test( size = "medium", shard_count = 5, test = "//test/syscalls/linux:wait_test", - vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:write_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:proc_net_unix_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:proc_net_tcp_test", - vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:proc_net_udp_test", - vfs2 = "True", ) -- cgit v1.2.3 From 0a8ae4b32f0dbc0b2a84c3f07c8c98e855a8f5fa Mon Sep 17 00:00:00 2001 From: Ghanan Gowripalan Date: Mon, 10 Aug 2020 16:20:39 -0700 Subject: Populate IPPacketInfo with destination address IPPacketInfo.DestinationAddr should hold the destination of the IP packet, not the source. This change fixes that bug. PiperOrigin-RevId: 325910766 --- pkg/tcpip/checker/BUILD | 1 + pkg/tcpip/checker/checker.go | 26 +++++-- pkg/tcpip/tcpip.go | 2 +- pkg/tcpip/transport/udp/endpoint.go | 9 ++- pkg/tcpip/transport/udp/udp_test.go | 99 ++++++++++++++++++++++++ test/syscalls/linux/socket_ipv4_udp_unbound.cc | 100 +++++++++++++++++++++++++ 6 files changed, 226 insertions(+), 11 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/checker/BUILD b/pkg/tcpip/checker/BUILD index ed434807f..c984470e6 100644 --- a/pkg/tcpip/checker/BUILD +++ b/pkg/tcpip/checker/BUILD @@ -12,5 +12,6 @@ go_library( "//pkg/tcpip/buffer", "//pkg/tcpip/header", "//pkg/tcpip/seqnum", + "@com_github_google_go_cmp//cmp:go_default_library", ], ) diff --git a/pkg/tcpip/checker/checker.go b/pkg/tcpip/checker/checker.go index ee264b726..1e5f5abf2 100644 --- a/pkg/tcpip/checker/checker.go +++ b/pkg/tcpip/checker/checker.go @@ -21,6 +21,7 @@ import ( "reflect" "testing" + "github.com/google/go-cmp/cmp" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" @@ -169,10 +170,9 @@ func ReceiveTClass(want uint32) ControlMessagesChecker { return func(t *testing.T, cm tcpip.ControlMessages) { t.Helper() if !cm.HasTClass { - t.Fatalf("got cm.HasTClass = %t, want cm.TClass = %d", cm.HasTClass, want) - } - if got := cm.TClass; got != want { - t.Fatalf("got cm.TClass = %d, want %d", got, want) + t.Errorf("got cm.HasTClass = %t, want = true", cm.HasTClass) + } else if got := cm.TClass; got != want { + t.Errorf("got cm.TClass = %d, want %d", got, want) } } } @@ -182,10 +182,22 @@ func ReceiveTOS(want uint8) ControlMessagesChecker { return func(t *testing.T, cm tcpip.ControlMessages) { t.Helper() if !cm.HasTOS { - t.Fatalf("got cm.HasTOS = %t, want cm.TOS = %d", cm.HasTOS, want) + t.Errorf("got cm.HasTOS = %t, want = true", cm.HasTOS) + } else if got := cm.TOS; got != want { + t.Errorf("got cm.TOS = %d, want %d", got, want) } - if got := cm.TOS; got != want { - t.Fatalf("got cm.TOS = %d, want %d", got, want) + } +} + +// ReceiveIPPacketInfo creates a checker that checks the PacketInfo field in +// ControlMessages. +func ReceiveIPPacketInfo(want tcpip.IPPacketInfo) ControlMessagesChecker { + return func(t *testing.T, cm tcpip.ControlMessages) { + t.Helper() + if !cm.HasIPPacketInfo { + t.Errorf("got cm.HasIPPacketInfo = %t, want = true", cm.HasIPPacketInfo) + } else if diff := cmp.Diff(want, cm.PacketInfo); diff != "" { + t.Errorf("IPPacketInfo mismatch (-want +got):\n%s", diff) } } } diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 45f59b60f..091bc5281 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -968,7 +968,7 @@ type IPPacketInfo struct { // LocalAddr is the local address. LocalAddr Address - // DestinationAddr is the destination address. + // DestinationAddr is the destination address found in the IP header. DestinationAddr Address } diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 444b5b01c..4a2b6c03a 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -1444,13 +1444,16 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk switch r.NetProto { case header.IPv4ProtocolNumber: packet.tos, _ = header.IPv4(pkt.NetworkHeader).TOS() - packet.packetInfo.LocalAddr = r.LocalAddress - packet.packetInfo.DestinationAddr = r.RemoteAddress - packet.packetInfo.NIC = r.NICID() case header.IPv6ProtocolNumber: packet.tos, _ = header.IPv6(pkt.NetworkHeader).TOS() } + // TODO(gvisor.dev/issue/3556): r.LocalAddress may be a multicast or broadcast + // address. packetInfo.LocalAddr should hold a unicast address that can be + // used to respond to the incoming packet. + packet.packetInfo.LocalAddr = r.LocalAddress + packet.packetInfo.DestinationAddr = r.LocalAddress + packet.packetInfo.NIC = r.NICID() packet.timestamp = e.stack.Clock().NowNanoseconds() e.rcvMu.Unlock() diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go index 66e8911c8..1a32622ca 100644 --- a/pkg/tcpip/transport/udp/udp_test.go +++ b/pkg/tcpip/transport/udp/udp_test.go @@ -1309,6 +1309,105 @@ func TestReadIncrementsPacketsReceived(t *testing.T) { } } +func TestReadIPPacketInfo(t *testing.T) { + tests := []struct { + name string + proto tcpip.NetworkProtocolNumber + flow testFlow + expectedLocalAddr tcpip.Address + expectedDestAddr tcpip.Address + }{ + { + name: "IPv4 unicast", + proto: header.IPv4ProtocolNumber, + flow: unicastV4, + expectedLocalAddr: stackAddr, + expectedDestAddr: stackAddr, + }, + { + name: "IPv4 multicast", + proto: header.IPv4ProtocolNumber, + flow: multicastV4, + // This should actually be a unicast address assigned to the interface. + // + // TODO(gvisor.dev/issue/3556): This check is validating incorrect + // behaviour. We still include the test so that once the bug is + // resolved, this test will start to fail and the individual tasked + // with fixing this bug knows to also fix this test :). + expectedLocalAddr: multicastAddr, + expectedDestAddr: multicastAddr, + }, + { + name: "IPv4 broadcast", + proto: header.IPv4ProtocolNumber, + flow: broadcast, + // This should actually be a unicast address assigned to the interface. + // + // TODO(gvisor.dev/issue/3556): This check is validating incorrect + // behaviour. We still include the test so that once the bug is + // resolved, this test will start to fail and the individual tasked + // with fixing this bug knows to also fix this test :). + expectedLocalAddr: broadcastAddr, + expectedDestAddr: broadcastAddr, + }, + { + name: "IPv6 unicast", + proto: header.IPv6ProtocolNumber, + flow: unicastV6, + expectedLocalAddr: stackV6Addr, + expectedDestAddr: stackV6Addr, + }, + { + name: "IPv6 multicast", + proto: header.IPv6ProtocolNumber, + flow: multicastV6, + // This should actually be a unicast address assigned to the interface. + // + // TODO(gvisor.dev/issue/3556): This check is validating incorrect + // behaviour. We still include the test so that once the bug is + // resolved, this test will start to fail and the individual tasked + // with fixing this bug knows to also fix this test :). + expectedLocalAddr: multicastV6Addr, + expectedDestAddr: multicastV6Addr, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + c := newDualTestContext(t, defaultMTU) + defer c.cleanup() + + c.createEndpoint(test.proto) + + bindAddr := tcpip.FullAddress{Port: stackPort} + if err := c.ep.Bind(bindAddr); err != nil { + t.Fatalf("Bind(%+v): %s", bindAddr, err) + } + + if test.flow.isMulticast() { + ifoptSet := tcpip.AddMembershipOption{NIC: 1, MulticastAddr: test.flow.getMcastAddr()} + if err := c.ep.SetSockOpt(ifoptSet); err != nil { + c.t.Fatalf("SetSockOpt(%+v): %s:", ifoptSet, err) + } + } + + if err := c.ep.SetSockOptBool(tcpip.ReceiveIPPacketInfoOption, true); err != nil { + t.Fatalf("c.ep.SetSockOptBool(tcpip.ReceiveIPPacketInfoOption, true): %s", err) + } + + testRead(c, test.flow, checker.ReceiveIPPacketInfo(tcpip.IPPacketInfo{ + NIC: 1, + LocalAddr: test.expectedLocalAddr, + DestinationAddr: test.expectedDestAddr, + })) + + if got := c.s.Stats().UDP.PacketsReceived.Value(); got != 1 { + t.Fatalf("Read did not increment PacketsReceived: got = %d, want = 1", got) + } + }) + } +} + func TestWriteIncrementsPacketsSent(t *testing.T) { c := newDualTestContext(t, defaultMTU) defer c.cleanup() diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound.cc b/test/syscalls/linux/socket_ipv4_udp_unbound.cc index de0f5f01b..bc005e2bb 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound.cc @@ -2452,5 +2452,105 @@ TEST_P(IPv4UDPUnboundSocketTest, SetSocketSendBuf) { ASSERT_EQ(quarter_sz, val); } + +TEST_P(IPv4UDPUnboundSocketTest, IpMulticastIPPacketInfo) { + auto sender_socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + auto receiver_socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + // Bind the first FD to the loopback. This is an alternative to + // IP_MULTICAST_IF for setting the default send interface. + auto sender_addr = V4Loopback(); + ASSERT_THAT( + bind(sender_socket->get(), reinterpret_cast(&sender_addr.addr), + sender_addr.addr_len), + SyscallSucceeds()); + + // Bind the second FD to the v4 any address to ensure that we can receive the + // multicast packet. + auto receiver_addr = V4Any(); + ASSERT_THAT(bind(receiver_socket->get(), + reinterpret_cast(&receiver_addr.addr), + receiver_addr.addr_len), + SyscallSucceeds()); + socklen_t receiver_addr_len = receiver_addr.addr_len; + ASSERT_THAT(getsockname(receiver_socket->get(), + reinterpret_cast(&receiver_addr.addr), + &receiver_addr_len), + SyscallSucceeds()); + EXPECT_EQ(receiver_addr_len, receiver_addr.addr_len); + + // Register to receive multicast packets. + ip_mreqn group = {}; + group.imr_multiaddr.s_addr = inet_addr(kMulticastAddress); + group.imr_ifindex = ASSERT_NO_ERRNO_AND_VALUE(InterfaceIndex("lo")); + ASSERT_THAT(setsockopt(receiver_socket->get(), IPPROTO_IP, IP_ADD_MEMBERSHIP, + &group, sizeof(group)), + SyscallSucceeds()); + + // Register to receive IP packet info. + const int one = 1; + ASSERT_THAT(setsockopt(receiver_socket->get(), IPPROTO_IP, IP_PKTINFO, &one, + sizeof(one)), + SyscallSucceeds()); + + // Send a multicast packet. + auto send_addr = V4Multicast(); + reinterpret_cast(&send_addr.addr)->sin_port = + reinterpret_cast(&receiver_addr.addr)->sin_port; + char send_buf[200]; + RandomizeBuffer(send_buf, sizeof(send_buf)); + ASSERT_THAT( + RetryEINTR(sendto)(sender_socket->get(), send_buf, sizeof(send_buf), 0, + reinterpret_cast(&send_addr.addr), + send_addr.addr_len), + SyscallSucceedsWithValue(sizeof(send_buf))); + + // Check that we received the multicast packet. + msghdr recv_msg = {}; + iovec recv_iov = {}; + char recv_buf[sizeof(send_buf)]; + char recv_cmsg_buf[CMSG_SPACE(sizeof(in_pktinfo))] = {}; + size_t cmsg_data_len = sizeof(in_pktinfo); + recv_iov.iov_base = recv_buf; + recv_iov.iov_len = sizeof(recv_buf); + recv_msg.msg_iov = &recv_iov; + recv_msg.msg_iovlen = 1; + recv_msg.msg_controllen = CMSG_LEN(cmsg_data_len); + recv_msg.msg_control = recv_cmsg_buf; + ASSERT_THAT(RetryEINTR(recvmsg)(receiver_socket->get(), &recv_msg, 0), + SyscallSucceedsWithValue(sizeof(send_buf))); + EXPECT_EQ(0, memcmp(send_buf, recv_buf, sizeof(send_buf))); + + // Check the IP_PKTINFO control message. + cmsghdr* cmsg = CMSG_FIRSTHDR(&recv_msg); + ASSERT_NE(cmsg, nullptr); + EXPECT_EQ(cmsg->cmsg_len, CMSG_LEN(cmsg_data_len)); + EXPECT_EQ(cmsg->cmsg_level, IPPROTO_IP); + EXPECT_EQ(cmsg->cmsg_type, IP_PKTINFO); + + // Get loopback index. + ifreq ifr = {}; + absl::SNPrintF(ifr.ifr_name, IFNAMSIZ, "lo"); + ASSERT_THAT(ioctl(receiver_socket->get(), SIOCGIFINDEX, &ifr), + SyscallSucceeds()); + ASSERT_NE(ifr.ifr_ifindex, 0); + + in_pktinfo received_pktinfo = {}; + memcpy(&received_pktinfo, CMSG_DATA(cmsg), sizeof(in_pktinfo)); + EXPECT_EQ(received_pktinfo.ipi_ifindex, ifr.ifr_ifindex); + if (IsRunningOnGvisor()) { + // This should actually be a unicast address assigned to the interface. + // + // TODO(gvisor.dev/issue/3556): This check is validating incorrect + // behaviour. We still include the test so that once the bug is + // resolved, this test will start to fail and the individual tasked + // with fixing this bug knows to also fix this test :). + EXPECT_EQ(received_pktinfo.ipi_spec_dst.s_addr, group.imr_multiaddr.s_addr); + } else { + EXPECT_EQ(received_pktinfo.ipi_spec_dst.s_addr, htonl(INADDR_LOOPBACK)); + } + EXPECT_EQ(received_pktinfo.ipi_addr.s_addr, group.imr_multiaddr.s_addr); +} + } // namespace testing } // namespace gvisor -- cgit v1.2.3 From 805a96d7ba78762a3bb96bb1cc9e32ccc569437a Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Fri, 24 Jul 2020 15:54:16 -0700 Subject: Speed up iptables tests //test/iptables:iptables_test runs 30 seconds faster on my machine. * Using contexts instead of many smaller timeouts makes the tests less likely to flake and removes unnecessary complexity. * We also use context to properly shut down concurrent goroutines and the test container. * Container logs are always logged. --- pkg/test/testutil/testutil.go | 5 + test/iptables/filter_input.go | 253 ++++++++++++++++++++++------------------- test/iptables/filter_output.go | 252 ++++++++++++++++++++++++---------------- test/iptables/iptables.go | 61 +++++++++- test/iptables/iptables_test.go | 68 +++++++++-- test/iptables/iptables_util.go | 85 +++++++------- test/iptables/nat.go | 225 +++++++++++++++++++----------------- test/iptables/runner/main.go | 5 +- 8 files changed, 571 insertions(+), 383 deletions(-) (limited to 'test') diff --git a/pkg/test/testutil/testutil.go b/pkg/test/testutil/testutil.go index 64c292698..1580527b5 100644 --- a/pkg/test/testutil/testutil.go +++ b/pkg/test/testutil/testutil.go @@ -316,6 +316,11 @@ func Copy(src, dst string) error { func Poll(cb func() error, timeout time.Duration) error { ctx, cancel := context.WithTimeout(context.Background(), timeout) defer cancel() + return PollContext(ctx, cb) +} + +// PollContext is like Poll, but takes a context instead of a timeout. +func PollContext(ctx context.Context, cb func() error) error { b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx) return backoff.Retry(cb, b) } diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go index 5737ee317..b45d448b8 100644 --- a/test/iptables/filter_input.go +++ b/test/iptables/filter_input.go @@ -15,6 +15,7 @@ package iptables import ( + "context" "errors" "fmt" "net" @@ -53,7 +54,7 @@ func init() { } // FilterInputDropUDP tests that we can drop UDP traffic. -type FilterInputDropUDP struct{} +type FilterInputDropUDP struct{ containerCase } // Name implements TestCase.Name. func (FilterInputDropUDP) Name() string { @@ -61,15 +62,17 @@ func (FilterInputDropUDP) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputDropUDP) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputDropUDP) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := filterTable(ipv6, "-A", "INPUT", "-p", "udp", "-j", "DROP"); err != nil { return err } // Listen for UDP packets on dropPort. - if err := listenUDP(dropPort, sendloopDuration); err == nil { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := listenUDP(timedCtx, dropPort); err == nil { return fmt.Errorf("packets on port %d should have been dropped, but got a packet", dropPort) - } else if netErr, ok := err.(net.Error); !ok || !netErr.Timeout() { + } else if !errors.Is(err, context.DeadlineExceeded) { return fmt.Errorf("error reading: %v", err) } @@ -79,12 +82,12 @@ func (FilterInputDropUDP) ContainerAction(ip net.IP, ipv6 bool) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputDropUDP) LocalAction(ip net.IP, ipv6 bool) error { - return spawnUDPLoop(ip, dropPort, sendloopDuration) +func (FilterInputDropUDP) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return sendUDPLoop(ctx, ip, dropPort) } // FilterInputDropOnlyUDP tests that "-p udp -j DROP" only affects UDP traffic. -type FilterInputDropOnlyUDP struct{} +type FilterInputDropOnlyUDP struct{ baseCase } // Name implements TestCase.Name. func (FilterInputDropOnlyUDP) Name() string { @@ -92,13 +95,13 @@ func (FilterInputDropOnlyUDP) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputDropOnlyUDP) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputDropOnlyUDP) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := filterTable(ipv6, "-A", "INPUT", "-p", "udp", "-j", "DROP"); err != nil { return err } // Listen for a TCP connection, which should be allowed. - if err := listenTCP(acceptPort, sendloopDuration); err != nil { + if err := listenTCP(ctx, acceptPort); err != nil { return fmt.Errorf("failed to establish a connection %v", err) } @@ -106,14 +109,14 @@ func (FilterInputDropOnlyUDP) ContainerAction(ip net.IP, ipv6 bool) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputDropOnlyUDP) LocalAction(ip net.IP, ipv6 bool) error { +func (FilterInputDropOnlyUDP) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { // Try to establish a TCP connection with the container, which should // succeed. - return connectTCP(ip, acceptPort, sendloopDuration) + return connectTCP(ctx, ip, acceptPort) } // FilterInputDropUDPPort tests that we can drop UDP traffic by port. -type FilterInputDropUDPPort struct{} +type FilterInputDropUDPPort struct{ containerCase } // Name implements TestCase.Name. func (FilterInputDropUDPPort) Name() string { @@ -121,15 +124,17 @@ func (FilterInputDropUDPPort) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputDropUDPPort) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputDropUDPPort) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := filterTable(ipv6, "-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { return err } // Listen for UDP packets on dropPort. - if err := listenUDP(dropPort, sendloopDuration); err == nil { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := listenUDP(timedCtx, dropPort); err == nil { return fmt.Errorf("packets on port %d should have been dropped, but got a packet", dropPort) - } else if netErr, ok := err.(net.Error); !ok || !netErr.Timeout() { + } else if !errors.Is(err, context.DeadlineExceeded) { return fmt.Errorf("error reading: %v", err) } @@ -139,13 +144,13 @@ func (FilterInputDropUDPPort) ContainerAction(ip net.IP, ipv6 bool) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputDropUDPPort) LocalAction(ip net.IP, ipv6 bool) error { - return spawnUDPLoop(ip, dropPort, sendloopDuration) +func (FilterInputDropUDPPort) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return sendUDPLoop(ctx, ip, dropPort) } // FilterInputDropDifferentUDPPort tests that dropping traffic for a single UDP port // doesn't drop packets on other ports. -type FilterInputDropDifferentUDPPort struct{} +type FilterInputDropDifferentUDPPort struct{ containerCase } // Name implements TestCase.Name. func (FilterInputDropDifferentUDPPort) Name() string { @@ -153,13 +158,13 @@ func (FilterInputDropDifferentUDPPort) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputDropDifferentUDPPort) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputDropDifferentUDPPort) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := filterTable(ipv6, "-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { return err } // Listen for UDP packets on another port. - if err := listenUDP(acceptPort, sendloopDuration); err != nil { + if err := listenUDP(ctx, acceptPort); err != nil { return fmt.Errorf("packets on port %d should be allowed, but encountered an error: %v", acceptPort, err) } @@ -167,12 +172,12 @@ func (FilterInputDropDifferentUDPPort) ContainerAction(ip net.IP, ipv6 bool) err } // LocalAction implements TestCase.LocalAction. -func (FilterInputDropDifferentUDPPort) LocalAction(ip net.IP, ipv6 bool) error { - return spawnUDPLoop(ip, acceptPort, sendloopDuration) +func (FilterInputDropDifferentUDPPort) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return sendUDPLoop(ctx, ip, acceptPort) } // FilterInputDropTCPDestPort tests that connections are not accepted on specified source ports. -type FilterInputDropTCPDestPort struct{} +type FilterInputDropTCPDestPort struct{ baseCase } // Name implements TestCase.Name. func (FilterInputDropTCPDestPort) Name() string { @@ -180,33 +185,36 @@ func (FilterInputDropTCPDestPort) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputDropTCPDestPort) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputDropTCPDestPort) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := filterTable(ipv6, "-A", "INPUT", "-p", "tcp", "-m", "tcp", "--dport", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { return err } // Listen for TCP packets on drop port. - if err := listenTCP(dropPort, sendloopDuration); err == nil { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := listenTCP(timedCtx, dropPort); err == nil { return fmt.Errorf("connection on port %d should not be accepted, but got accepted", dropPort) + } else if !errors.Is(err, context.DeadlineExceeded) { + return fmt.Errorf("error reading: %v", err) } return nil } // LocalAction implements TestCase.LocalAction. -func (FilterInputDropTCPDestPort) LocalAction(ip net.IP, ipv6 bool) error { +func (FilterInputDropTCPDestPort) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { // Ensure we cannot connect to the container. - for start := time.Now(); time.Since(start) < sendloopDuration; { - if err := connectTCP(ip, dropPort, sendloopDuration-time.Since(start)); err == nil { - return fmt.Errorf("expected not to connect, but was able to connect on port %d", dropPort) - } + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := connectTCP(timedCtx, ip, dropPort); err == nil { + return fmt.Errorf("expected not to connect, but was able to connect on port %d", dropPort) } - return nil } // FilterInputDropTCPSrcPort tests that connections are not accepted on specified source ports. -type FilterInputDropTCPSrcPort struct{} +type FilterInputDropTCPSrcPort struct{ baseCase } // Name implements TestCase.Name. func (FilterInputDropTCPSrcPort) Name() string { @@ -214,34 +222,37 @@ func (FilterInputDropTCPSrcPort) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputDropTCPSrcPort) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputDropTCPSrcPort) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { // Drop anything from an ephemeral port. if err := filterTable(ipv6, "-A", "INPUT", "-p", "tcp", "-m", "tcp", "--sport", "1024:65535", "-j", "DROP"); err != nil { return err } // Listen for TCP packets on accept port. - if err := listenTCP(acceptPort, sendloopDuration); err == nil { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := listenTCP(timedCtx, acceptPort); err == nil { return fmt.Errorf("connection destined to port %d should not be accepted, but was", dropPort) + } else if !errors.Is(err, context.DeadlineExceeded) { + return fmt.Errorf("error reading: %v", err) } return nil } // LocalAction implements TestCase.LocalAction. -func (FilterInputDropTCPSrcPort) LocalAction(ip net.IP, ipv6 bool) error { +func (FilterInputDropTCPSrcPort) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { // Ensure we cannot connect to the container. - for start := time.Now(); time.Since(start) < sendloopDuration; { - if err := connectTCP(ip, acceptPort, sendloopDuration-time.Since(start)); err == nil { - return fmt.Errorf("expected not to connect, but was able to connect on port %d", acceptPort) - } + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := connectTCP(timedCtx, ip, dropPort); err == nil { + return fmt.Errorf("expected not to connect, but was able to connect on port %d", acceptPort) } - return nil } // FilterInputDropAll tests that we can drop all traffic to the INPUT chain. -type FilterInputDropAll struct{} +type FilterInputDropAll struct{ containerCase } // Name implements TestCase.Name. func (FilterInputDropAll) Name() string { @@ -249,15 +260,17 @@ func (FilterInputDropAll) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputDropAll) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputDropAll) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := filterTable(ipv6, "-A", "INPUT", "-j", "DROP"); err != nil { return err } // Listen for all packets on dropPort. - if err := listenUDP(dropPort, sendloopDuration); err == nil { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := listenUDP(timedCtx, dropPort); err == nil { return fmt.Errorf("packets should have been dropped, but got a packet") - } else if netErr, ok := err.(net.Error); !ok || !netErr.Timeout() { + } else if !errors.Is(err, context.DeadlineExceeded) { return fmt.Errorf("error reading: %v", err) } @@ -267,15 +280,15 @@ func (FilterInputDropAll) ContainerAction(ip net.IP, ipv6 bool) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputDropAll) LocalAction(ip net.IP, ipv6 bool) error { - return spawnUDPLoop(ip, dropPort, sendloopDuration) +func (FilterInputDropAll) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return sendUDPLoop(ctx, ip, dropPort) } // FilterInputMultiUDPRules verifies that multiple UDP rules are applied // correctly. This has the added benefit of testing whether we're serializing // rules correctly -- if we do it incorrectly, the iptables tool will // misunderstand and save the wrong tables. -type FilterInputMultiUDPRules struct{} +type FilterInputMultiUDPRules struct{ baseCase } // Name implements TestCase.Name. func (FilterInputMultiUDPRules) Name() string { @@ -283,7 +296,7 @@ func (FilterInputMultiUDPRules) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputMultiUDPRules) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputMultiUDPRules) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { rules := [][]string{ {"-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", dropPort), "-j", "DROP"}, {"-A", "INPUT", "-p", "udp", "-m", "udp", "--destination-port", fmt.Sprintf("%d", acceptPort), "-j", "ACCEPT"}, @@ -293,14 +306,14 @@ func (FilterInputMultiUDPRules) ContainerAction(ip net.IP, ipv6 bool) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputMultiUDPRules) LocalAction(ip net.IP, ipv6 bool) error { +func (FilterInputMultiUDPRules) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { // No-op. return nil } // FilterInputRequireProtocolUDP checks that "-m udp" requires "-p udp" to be // specified. -type FilterInputRequireProtocolUDP struct{} +type FilterInputRequireProtocolUDP struct{ baseCase } // Name implements TestCase.Name. func (FilterInputRequireProtocolUDP) Name() string { @@ -308,20 +321,20 @@ func (FilterInputRequireProtocolUDP) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputRequireProtocolUDP) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputRequireProtocolUDP) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := filterTable(ipv6, "-A", "INPUT", "-m", "udp", "--destination-port", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err == nil { return errors.New("expected iptables to fail with out \"-p udp\", but succeeded") } return nil } -func (FilterInputRequireProtocolUDP) LocalAction(ip net.IP, ipv6 bool) error { +func (FilterInputRequireProtocolUDP) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { // No-op. return nil } // FilterInputCreateUserChain tests chain creation. -type FilterInputCreateUserChain struct{} +type FilterInputCreateUserChain struct{ baseCase } // Name implements TestCase.Name. func (FilterInputCreateUserChain) Name() string { @@ -329,7 +342,7 @@ func (FilterInputCreateUserChain) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputCreateUserChain) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputCreateUserChain) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { rules := [][]string{ // Create a chain. {"-N", chainName}, @@ -340,13 +353,13 @@ func (FilterInputCreateUserChain) ContainerAction(ip net.IP, ipv6 bool) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputCreateUserChain) LocalAction(ip net.IP, ipv6 bool) error { +func (FilterInputCreateUserChain) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { // No-op. return nil } // FilterInputDefaultPolicyAccept tests the default ACCEPT policy. -type FilterInputDefaultPolicyAccept struct{} +type FilterInputDefaultPolicyAccept struct{ containerCase } // Name implements TestCase.Name. func (FilterInputDefaultPolicyAccept) Name() string { @@ -354,21 +367,21 @@ func (FilterInputDefaultPolicyAccept) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputDefaultPolicyAccept) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputDefaultPolicyAccept) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { // Set the default policy to accept, then receive a packet. if err := filterTable(ipv6, "-P", "INPUT", "ACCEPT"); err != nil { return err } - return listenUDP(acceptPort, sendloopDuration) + return listenUDP(ctx, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (FilterInputDefaultPolicyAccept) LocalAction(ip net.IP, ipv6 bool) error { - return spawnUDPLoop(ip, acceptPort, sendloopDuration) +func (FilterInputDefaultPolicyAccept) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return sendUDPLoop(ctx, ip, acceptPort) } // FilterInputDefaultPolicyDrop tests the default DROP policy. -type FilterInputDefaultPolicyDrop struct{} +type FilterInputDefaultPolicyDrop struct{ containerCase } // Name implements TestCase.Name. func (FilterInputDefaultPolicyDrop) Name() string { @@ -376,15 +389,17 @@ func (FilterInputDefaultPolicyDrop) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputDefaultPolicyDrop) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputDefaultPolicyDrop) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := filterTable(ipv6, "-P", "INPUT", "DROP"); err != nil { return err } // Listen for UDP packets on dropPort. - if err := listenUDP(dropPort, sendloopDuration); err == nil { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := listenUDP(timedCtx, dropPort); err == nil { return fmt.Errorf("packets on port %d should have been dropped, but got a packet", dropPort) - } else if netErr, ok := err.(net.Error); !ok || !netErr.Timeout() { + } else if !errors.Is(err, context.DeadlineExceeded) { return fmt.Errorf("error reading: %v", err) } @@ -394,13 +409,13 @@ func (FilterInputDefaultPolicyDrop) ContainerAction(ip net.IP, ipv6 bool) error } // LocalAction implements TestCase.LocalAction. -func (FilterInputDefaultPolicyDrop) LocalAction(ip net.IP, ipv6 bool) error { - return spawnUDPLoop(ip, acceptPort, sendloopDuration) +func (FilterInputDefaultPolicyDrop) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return sendUDPLoop(ctx, ip, acceptPort) } // FilterInputReturnUnderflow tests that -j RETURN in a built-in chain causes // the underflow rule (i.e. default policy) to be executed. -type FilterInputReturnUnderflow struct{} +type FilterInputReturnUnderflow struct{ containerCase } // Name implements TestCase.Name. func (FilterInputReturnUnderflow) Name() string { @@ -408,7 +423,7 @@ func (FilterInputReturnUnderflow) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputReturnUnderflow) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputReturnUnderflow) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { // Add a RETURN rule followed by an unconditional accept, and set the // default policy to DROP. rules := [][]string{ @@ -422,16 +437,16 @@ func (FilterInputReturnUnderflow) ContainerAction(ip net.IP, ipv6 bool) error { // We should receive packets, as the RETURN rule will trigger the default // ACCEPT policy. - return listenUDP(acceptPort, sendloopDuration) + return listenUDP(ctx, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (FilterInputReturnUnderflow) LocalAction(ip net.IP, ipv6 bool) error { - return spawnUDPLoop(ip, acceptPort, sendloopDuration) +func (FilterInputReturnUnderflow) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return sendUDPLoop(ctx, ip, acceptPort) } // FilterInputSerializeJump verifies that we can serialize jumps. -type FilterInputSerializeJump struct{} +type FilterInputSerializeJump struct{ baseCase } // Name implements TestCase.Name. func (FilterInputSerializeJump) Name() string { @@ -439,7 +454,7 @@ func (FilterInputSerializeJump) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputSerializeJump) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputSerializeJump) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { // Write a JUMP rule, the serialize it with `-L`. rules := [][]string{ {"-N", chainName}, @@ -450,13 +465,13 @@ func (FilterInputSerializeJump) ContainerAction(ip net.IP, ipv6 bool) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputSerializeJump) LocalAction(ip net.IP, ipv6 bool) error { +func (FilterInputSerializeJump) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { // No-op. return nil } // FilterInputJumpBasic jumps to a chain and executes a rule there. -type FilterInputJumpBasic struct{} +type FilterInputJumpBasic struct{ containerCase } // Name implements TestCase.Name. func (FilterInputJumpBasic) Name() string { @@ -464,7 +479,7 @@ func (FilterInputJumpBasic) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputJumpBasic) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputJumpBasic) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { rules := [][]string{ {"-P", "INPUT", "DROP"}, {"-N", chainName}, @@ -476,16 +491,16 @@ func (FilterInputJumpBasic) ContainerAction(ip net.IP, ipv6 bool) error { } // Listen for UDP packets on acceptPort. - return listenUDP(acceptPort, sendloopDuration) + return listenUDP(ctx, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (FilterInputJumpBasic) LocalAction(ip net.IP, ipv6 bool) error { - return spawnUDPLoop(ip, acceptPort, sendloopDuration) +func (FilterInputJumpBasic) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return sendUDPLoop(ctx, ip, acceptPort) } // FilterInputJumpReturn jumps, returns, and executes a rule. -type FilterInputJumpReturn struct{} +type FilterInputJumpReturn struct{ containerCase } // Name implements TestCase.Name. func (FilterInputJumpReturn) Name() string { @@ -493,7 +508,7 @@ func (FilterInputJumpReturn) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputJumpReturn) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputJumpReturn) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { rules := [][]string{ {"-N", chainName}, {"-P", "INPUT", "ACCEPT"}, @@ -506,16 +521,16 @@ func (FilterInputJumpReturn) ContainerAction(ip net.IP, ipv6 bool) error { } // Listen for UDP packets on acceptPort. - return listenUDP(acceptPort, sendloopDuration) + return listenUDP(ctx, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (FilterInputJumpReturn) LocalAction(ip net.IP, ipv6 bool) error { - return spawnUDPLoop(ip, acceptPort, sendloopDuration) +func (FilterInputJumpReturn) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return sendUDPLoop(ctx, ip, acceptPort) } // FilterInputJumpReturnDrop jumps to a chain, returns, and DROPs packets. -type FilterInputJumpReturnDrop struct{} +type FilterInputJumpReturnDrop struct{ containerCase } // Name implements TestCase.Name. func (FilterInputJumpReturnDrop) Name() string { @@ -523,7 +538,7 @@ func (FilterInputJumpReturnDrop) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputJumpReturnDrop) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputJumpReturnDrop) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { rules := [][]string{ {"-N", chainName}, {"-A", "INPUT", "-j", chainName}, @@ -535,9 +550,11 @@ func (FilterInputJumpReturnDrop) ContainerAction(ip net.IP, ipv6 bool) error { } // Listen for UDP packets on dropPort. - if err := listenUDP(dropPort, sendloopDuration); err == nil { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := listenUDP(timedCtx, dropPort); err == nil { return fmt.Errorf("packets on port %d should have been dropped, but got a packet", dropPort) - } else if netErr, ok := err.(net.Error); !ok || !netErr.Timeout() { + } else if !errors.Is(err, context.DeadlineExceeded) { return fmt.Errorf("error reading: %v", err) } @@ -547,12 +564,12 @@ func (FilterInputJumpReturnDrop) ContainerAction(ip net.IP, ipv6 bool) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputJumpReturnDrop) LocalAction(ip net.IP, ipv6 bool) error { - return spawnUDPLoop(ip, dropPort, sendloopDuration) +func (FilterInputJumpReturnDrop) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return sendUDPLoop(ctx, ip, dropPort) } // FilterInputJumpBuiltin verifies that jumping to a top-levl chain is illegal. -type FilterInputJumpBuiltin struct{} +type FilterInputJumpBuiltin struct{ baseCase } // Name implements TestCase.Name. func (FilterInputJumpBuiltin) Name() string { @@ -560,7 +577,7 @@ func (FilterInputJumpBuiltin) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputJumpBuiltin) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputJumpBuiltin) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := filterTable(ipv6, "-A", "INPUT", "-j", "OUTPUT"); err == nil { return fmt.Errorf("iptables should be unable to jump to a built-in chain") } @@ -568,13 +585,13 @@ func (FilterInputJumpBuiltin) ContainerAction(ip net.IP, ipv6 bool) error { } // LocalAction implements TestCase.LocalAction. -func (FilterInputJumpBuiltin) LocalAction(ip net.IP, ipv6 bool) error { +func (FilterInputJumpBuiltin) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { // No-op. return nil } // FilterInputJumpTwice jumps twice, then returns twice and executes a rule. -type FilterInputJumpTwice struct{} +type FilterInputJumpTwice struct{ containerCase } // Name implements TestCase.Name. func (FilterInputJumpTwice) Name() string { @@ -582,7 +599,7 @@ func (FilterInputJumpTwice) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputJumpTwice) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputJumpTwice) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { const chainName2 = chainName + "2" rules := [][]string{ {"-P", "INPUT", "DROP"}, @@ -598,17 +615,17 @@ func (FilterInputJumpTwice) ContainerAction(ip net.IP, ipv6 bool) error { // UDP packets should jump and return twice, eventually hitting the // ACCEPT rule. - return listenUDP(acceptPort, sendloopDuration) + return listenUDP(ctx, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (FilterInputJumpTwice) LocalAction(ip net.IP, ipv6 bool) error { - return spawnUDPLoop(ip, acceptPort, sendloopDuration) +func (FilterInputJumpTwice) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return sendUDPLoop(ctx, ip, acceptPort) } // FilterInputDestination verifies that we can filter packets via `-d // `. -type FilterInputDestination struct{} +type FilterInputDestination struct{ containerCase } // Name implements TestCase.Name. func (FilterInputDestination) Name() string { @@ -616,7 +633,7 @@ func (FilterInputDestination) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputDestination) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputDestination) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { addrs, err := localAddrs(ipv6) if err != nil { return err @@ -632,17 +649,17 @@ func (FilterInputDestination) ContainerAction(ip net.IP, ipv6 bool) error { return err } - return listenUDP(acceptPort, sendloopDuration) + return listenUDP(ctx, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (FilterInputDestination) LocalAction(ip net.IP, ipv6 bool) error { - return spawnUDPLoop(ip, acceptPort, sendloopDuration) +func (FilterInputDestination) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return sendUDPLoop(ctx, ip, acceptPort) } // FilterInputInvertDestination verifies that we can filter packets via `! -d // `. -type FilterInputInvertDestination struct{} +type FilterInputInvertDestination struct{ containerCase } // Name implements TestCase.Name. func (FilterInputInvertDestination) Name() string { @@ -650,7 +667,7 @@ func (FilterInputInvertDestination) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputInvertDestination) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputInvertDestination) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { // Make INPUT's default action DROP, then ACCEPT all packets not bound // for 127.0.0.1. rules := [][]string{ @@ -661,17 +678,17 @@ func (FilterInputInvertDestination) ContainerAction(ip net.IP, ipv6 bool) error return err } - return listenUDP(acceptPort, sendloopDuration) + return listenUDP(ctx, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (FilterInputInvertDestination) LocalAction(ip net.IP, ipv6 bool) error { - return spawnUDPLoop(ip, acceptPort, sendloopDuration) +func (FilterInputInvertDestination) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return sendUDPLoop(ctx, ip, acceptPort) } // FilterInputSource verifies that we can filter packets via `-s // `. -type FilterInputSource struct{} +type FilterInputSource struct{ containerCase } // Name implements TestCase.Name. func (FilterInputSource) Name() string { @@ -679,7 +696,7 @@ func (FilterInputSource) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputSource) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputSource) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { // Make INPUT's default action DROP, then ACCEPT all packets from this // machine. rules := [][]string{ @@ -690,17 +707,17 @@ func (FilterInputSource) ContainerAction(ip net.IP, ipv6 bool) error { return err } - return listenUDP(acceptPort, sendloopDuration) + return listenUDP(ctx, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (FilterInputSource) LocalAction(ip net.IP, ipv6 bool) error { - return spawnUDPLoop(ip, acceptPort, sendloopDuration) +func (FilterInputSource) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return sendUDPLoop(ctx, ip, acceptPort) } // FilterInputInvertSource verifies that we can filter packets via `! -s // `. -type FilterInputInvertSource struct{} +type FilterInputInvertSource struct{ containerCase } // Name implements TestCase.Name. func (FilterInputInvertSource) Name() string { @@ -708,7 +725,7 @@ func (FilterInputInvertSource) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterInputInvertSource) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterInputInvertSource) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { // Make INPUT's default action DROP, then ACCEPT all packets not bound // for 127.0.0.1. rules := [][]string{ @@ -719,10 +736,10 @@ func (FilterInputInvertSource) ContainerAction(ip net.IP, ipv6 bool) error { return err } - return listenUDP(acceptPort, sendloopDuration) + return listenUDP(ctx, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (FilterInputInvertSource) LocalAction(ip net.IP, ipv6 bool) error { - return spawnUDPLoop(ip, acceptPort, sendloopDuration) +func (FilterInputInvertSource) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return sendUDPLoop(ctx, ip, acceptPort) } diff --git a/test/iptables/filter_output.go b/test/iptables/filter_output.go index c1d83b471..32bf2a992 100644 --- a/test/iptables/filter_output.go +++ b/test/iptables/filter_output.go @@ -15,6 +15,8 @@ package iptables import ( + "context" + "errors" "fmt" "net" ) @@ -44,7 +46,7 @@ func init() { // FilterOutputDropTCPDestPort tests that connections are not accepted on // specified source ports. -type FilterOutputDropTCPDestPort struct{} +type FilterOutputDropTCPDestPort struct{ baseCase } // Name implements TestCase.Name. func (FilterOutputDropTCPDestPort) Name() string { @@ -52,22 +54,28 @@ func (FilterOutputDropTCPDestPort) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputDropTCPDestPort) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterOutputDropTCPDestPort) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "-m", "tcp", "--dport", "1024:65535", "-j", "DROP"); err != nil { return err } // Listen for TCP packets on accept port. - if err := listenTCP(acceptPort, sendloopDuration); err == nil { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := listenTCP(timedCtx, acceptPort); err == nil { return fmt.Errorf("connection destined to port %d should not be accepted, but got accepted", dropPort) + } else if !errors.Is(err, context.DeadlineExceeded) { + return fmt.Errorf("error reading: %v", err) } return nil } // LocalAction implements TestCase.LocalAction. -func (FilterOutputDropTCPDestPort) LocalAction(ip net.IP, ipv6 bool) error { - if err := connectTCP(ip, acceptPort, sendloopDuration); err == nil { +func (FilterOutputDropTCPDestPort) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := connectTCP(timedCtx, ip, acceptPort); err == nil { return fmt.Errorf("connection on port %d should not be accepted, but got accepted", dropPort) } @@ -76,7 +84,7 @@ func (FilterOutputDropTCPDestPort) LocalAction(ip net.IP, ipv6 bool) error { // FilterOutputDropTCPSrcPort tests that connections are not accepted on // specified source ports. -type FilterOutputDropTCPSrcPort struct{} +type FilterOutputDropTCPSrcPort struct{ baseCase } // Name implements TestCase.Name. func (FilterOutputDropTCPSrcPort) Name() string { @@ -84,22 +92,28 @@ func (FilterOutputDropTCPSrcPort) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputDropTCPSrcPort) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterOutputDropTCPSrcPort) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "-m", "tcp", "--sport", fmt.Sprintf("%d", dropPort), "-j", "DROP"); err != nil { return err } // Listen for TCP packets on drop port. - if err := listenTCP(dropPort, sendloopDuration); err == nil { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := listenTCP(timedCtx, dropPort); err == nil { return fmt.Errorf("connection on port %d should not be accepted, but got accepted", dropPort) + } else if !errors.Is(err, context.DeadlineExceeded) { + return fmt.Errorf("error reading: %v", err) } return nil } // LocalAction implements TestCase.LocalAction. -func (FilterOutputDropTCPSrcPort) LocalAction(ip net.IP, ipv6 bool) error { - if err := connectTCP(ip, dropPort, sendloopDuration); err == nil { +func (FilterOutputDropTCPSrcPort) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := connectTCP(timedCtx, ip, dropPort); err == nil { return fmt.Errorf("connection destined to port %d should not be accepted, but got accepted", dropPort) } @@ -107,7 +121,7 @@ func (FilterOutputDropTCPSrcPort) LocalAction(ip net.IP, ipv6 bool) error { } // FilterOutputAcceptTCPOwner tests that TCP connections from uid owner are accepted. -type FilterOutputAcceptTCPOwner struct{} +type FilterOutputAcceptTCPOwner struct{ baseCase } // Name implements TestCase.Name. func (FilterOutputAcceptTCPOwner) Name() string { @@ -115,22 +129,22 @@ func (FilterOutputAcceptTCPOwner) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputAcceptTCPOwner) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterOutputAcceptTCPOwner) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "-m", "owner", "--uid-owner", "root", "-j", "ACCEPT"); err != nil { return err } // Listen for TCP packets on accept port. - return listenTCP(acceptPort, sendloopDuration) + return listenTCP(ctx, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (FilterOutputAcceptTCPOwner) LocalAction(ip net.IP, ipv6 bool) error { - return connectTCP(ip, acceptPort, sendloopDuration) +func (FilterOutputAcceptTCPOwner) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return connectTCP(ctx, ip, acceptPort) } // FilterOutputDropTCPOwner tests that TCP connections from uid owner are dropped. -type FilterOutputDropTCPOwner struct{} +type FilterOutputDropTCPOwner struct{ baseCase } // Name implements TestCase.Name. func (FilterOutputDropTCPOwner) Name() string { @@ -138,22 +152,28 @@ func (FilterOutputDropTCPOwner) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputDropTCPOwner) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterOutputDropTCPOwner) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "-m", "owner", "--uid-owner", "root", "-j", "DROP"); err != nil { return err } // Listen for TCP packets on accept port. - if err := listenTCP(acceptPort, sendloopDuration); err == nil { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := listenTCP(timedCtx, acceptPort); err == nil { return fmt.Errorf("connection on port %d should be dropped, but got accepted", acceptPort) + } else if !errors.Is(err, context.DeadlineExceeded) { + return fmt.Errorf("error reading: %v", err) } return nil } // LocalAction implements TestCase.LocalAction. -func (FilterOutputDropTCPOwner) LocalAction(ip net.IP, ipv6 bool) error { - if err := connectTCP(ip, acceptPort, sendloopDuration); err == nil { +func (FilterOutputDropTCPOwner) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := connectTCP(timedCtx, ip, acceptPort); err == nil { return fmt.Errorf("connection destined to port %d should be dropped, but got accepted", acceptPort) } @@ -161,7 +181,7 @@ func (FilterOutputDropTCPOwner) LocalAction(ip net.IP, ipv6 bool) error { } // FilterOutputAcceptUDPOwner tests that UDP packets from uid owner are accepted. -type FilterOutputAcceptUDPOwner struct{} +type FilterOutputAcceptUDPOwner struct{ localCase } // Name implements TestCase.Name. func (FilterOutputAcceptUDPOwner) Name() string { @@ -169,23 +189,23 @@ func (FilterOutputAcceptUDPOwner) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputAcceptUDPOwner) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterOutputAcceptUDPOwner) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "udp", "-m", "owner", "--uid-owner", "root", "-j", "ACCEPT"); err != nil { return err } // Send UDP packets on acceptPort. - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return sendUDPLoop(ctx, ip, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (FilterOutputAcceptUDPOwner) LocalAction(ip net.IP, ipv6 bool) error { +func (FilterOutputAcceptUDPOwner) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { // Listen for UDP packets on acceptPort. - return listenUDP(acceptPort, sendloopDuration) + return listenUDP(ctx, acceptPort) } // FilterOutputDropUDPOwner tests that UDP packets from uid owner are dropped. -type FilterOutputDropUDPOwner struct{} +type FilterOutputDropUDPOwner struct{ localCase } // Name implements TestCase.Name. func (FilterOutputDropUDPOwner) Name() string { @@ -193,20 +213,24 @@ func (FilterOutputDropUDPOwner) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputDropUDPOwner) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterOutputDropUDPOwner) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "udp", "-m", "owner", "--uid-owner", "root", "-j", "DROP"); err != nil { return err } // Send UDP packets on dropPort. - return sendUDPLoop(ip, dropPort, sendloopDuration) + return sendUDPLoop(ctx, ip, dropPort) } // LocalAction implements TestCase.LocalAction. -func (FilterOutputDropUDPOwner) LocalAction(ip net.IP, ipv6 bool) error { +func (FilterOutputDropUDPOwner) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { // Listen for UDP packets on dropPort. - if err := listenUDP(dropPort, sendloopDuration); err == nil { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := listenUDP(timedCtx, dropPort); err == nil { return fmt.Errorf("packets should not be received") + } else if !errors.Is(err, context.DeadlineExceeded) { + return fmt.Errorf("error reading: %v", err) } return nil @@ -214,7 +238,7 @@ func (FilterOutputDropUDPOwner) LocalAction(ip net.IP, ipv6 bool) error { // FilterOutputOwnerFail tests that without uid/gid option, owner rule // will fail. -type FilterOutputOwnerFail struct{} +type FilterOutputOwnerFail struct{ baseCase } // Name implements TestCase.Name. func (FilterOutputOwnerFail) Name() string { @@ -222,7 +246,7 @@ func (FilterOutputOwnerFail) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputOwnerFail) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterOutputOwnerFail) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "udp", "-m", "owner", "-j", "ACCEPT"); err == nil { return fmt.Errorf("Invalid argument") } @@ -231,13 +255,13 @@ func (FilterOutputOwnerFail) ContainerAction(ip net.IP, ipv6 bool) error { } // LocalAction implements TestCase.LocalAction. -func (FilterOutputOwnerFail) LocalAction(ip net.IP, ipv6 bool) error { +func (FilterOutputOwnerFail) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { // no-op. return nil } // FilterOutputAcceptGIDOwner tests that TCP connections from gid owner are accepted. -type FilterOutputAcceptGIDOwner struct{} +type FilterOutputAcceptGIDOwner struct{ baseCase } // Name implements TestCase.Name. func (FilterOutputAcceptGIDOwner) Name() string { @@ -245,22 +269,22 @@ func (FilterOutputAcceptGIDOwner) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputAcceptGIDOwner) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterOutputAcceptGIDOwner) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "-m", "owner", "--gid-owner", "root", "-j", "ACCEPT"); err != nil { return err } // Listen for TCP packets on accept port. - return listenTCP(acceptPort, sendloopDuration) + return listenTCP(ctx, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (FilterOutputAcceptGIDOwner) LocalAction(ip net.IP, ipv6 bool) error { - return connectTCP(ip, acceptPort, sendloopDuration) +func (FilterOutputAcceptGIDOwner) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return connectTCP(ctx, ip, acceptPort) } // FilterOutputDropGIDOwner tests that TCP connections from gid owner are dropped. -type FilterOutputDropGIDOwner struct{} +type FilterOutputDropGIDOwner struct{ baseCase } // Name implements TestCase.Name. func (FilterOutputDropGIDOwner) Name() string { @@ -268,22 +292,28 @@ func (FilterOutputDropGIDOwner) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputDropGIDOwner) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterOutputDropGIDOwner) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "-m", "owner", "--gid-owner", "root", "-j", "DROP"); err != nil { return err } // Listen for TCP packets on accept port. - if err := listenTCP(acceptPort, sendloopDuration); err == nil { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := listenTCP(timedCtx, acceptPort); err == nil { return fmt.Errorf("connection on port %d should not be accepted, but got accepted", acceptPort) + } else if !errors.Is(err, context.DeadlineExceeded) { + return fmt.Errorf("error reading: %v", err) } return nil } // LocalAction implements TestCase.LocalAction. -func (FilterOutputDropGIDOwner) LocalAction(ip net.IP, ipv6 bool) error { - if err := connectTCP(ip, acceptPort, sendloopDuration); err == nil { +func (FilterOutputDropGIDOwner) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := connectTCP(timedCtx, ip, acceptPort); err == nil { return fmt.Errorf("connection destined to port %d should not be accepted, but got accepted", acceptPort) } @@ -291,7 +321,7 @@ func (FilterOutputDropGIDOwner) LocalAction(ip net.IP, ipv6 bool) error { } // FilterOutputInvertGIDOwner tests that TCP connections from gid owner are dropped. -type FilterOutputInvertGIDOwner struct{} +type FilterOutputInvertGIDOwner struct{ baseCase } // Name implements TestCase.Name. func (FilterOutputInvertGIDOwner) Name() string { @@ -299,7 +329,7 @@ func (FilterOutputInvertGIDOwner) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputInvertGIDOwner) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterOutputInvertGIDOwner) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { rules := [][]string{ {"-A", "OUTPUT", "-p", "tcp", "-m", "owner", "!", "--gid-owner", "root", "-j", "ACCEPT"}, {"-A", "OUTPUT", "-p", "tcp", "-j", "DROP"}, @@ -309,16 +339,22 @@ func (FilterOutputInvertGIDOwner) ContainerAction(ip net.IP, ipv6 bool) error { } // Listen for TCP packets on accept port. - if err := listenTCP(acceptPort, sendloopDuration); err == nil { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := listenTCP(timedCtx, acceptPort); err == nil { return fmt.Errorf("connection on port %d should not be accepted, but got accepted", acceptPort) + } else if !errors.Is(err, context.DeadlineExceeded) { + return fmt.Errorf("error reading: %v", err) } return nil } // LocalAction implements TestCase.LocalAction. -func (FilterOutputInvertGIDOwner) LocalAction(ip net.IP, ipv6 bool) error { - if err := connectTCP(ip, acceptPort, sendloopDuration); err == nil { +func (FilterOutputInvertGIDOwner) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := connectTCP(timedCtx, ip, acceptPort); err == nil { return fmt.Errorf("connection destined to port %d should not be accepted, but got accepted", acceptPort) } @@ -326,7 +362,7 @@ func (FilterOutputInvertGIDOwner) LocalAction(ip net.IP, ipv6 bool) error { } // FilterOutputInvertUIDOwner tests that TCP connections from gid owner are dropped. -type FilterOutputInvertUIDOwner struct{} +type FilterOutputInvertUIDOwner struct{ baseCase } // Name implements TestCase.Name. func (FilterOutputInvertUIDOwner) Name() string { @@ -334,7 +370,7 @@ func (FilterOutputInvertUIDOwner) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputInvertUIDOwner) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterOutputInvertUIDOwner) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { rules := [][]string{ {"-A", "OUTPUT", "-p", "tcp", "-m", "owner", "!", "--uid-owner", "root", "-j", "DROP"}, {"-A", "OUTPUT", "-p", "tcp", "-j", "ACCEPT"}, @@ -344,17 +380,17 @@ func (FilterOutputInvertUIDOwner) ContainerAction(ip net.IP, ipv6 bool) error { } // Listen for TCP packets on accept port. - return listenTCP(acceptPort, sendloopDuration) + return listenTCP(ctx, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (FilterOutputInvertUIDOwner) LocalAction(ip net.IP, ipv6 bool) error { - return connectTCP(ip, acceptPort, sendloopDuration) +func (FilterOutputInvertUIDOwner) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return connectTCP(ctx, ip, acceptPort) } // FilterOutputInvertUIDAndGIDOwner tests that TCP connections from uid and gid // owner are dropped. -type FilterOutputInvertUIDAndGIDOwner struct{} +type FilterOutputInvertUIDAndGIDOwner struct{ baseCase } // Name implements TestCase.Name. func (FilterOutputInvertUIDAndGIDOwner) Name() string { @@ -362,7 +398,7 @@ func (FilterOutputInvertUIDAndGIDOwner) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputInvertUIDAndGIDOwner) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterOutputInvertUIDAndGIDOwner) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { rules := [][]string{ {"-A", "OUTPUT", "-p", "tcp", "-m", "owner", "!", "--uid-owner", "root", "!", "--gid-owner", "root", "-j", "ACCEPT"}, {"-A", "OUTPUT", "-p", "tcp", "-j", "DROP"}, @@ -372,16 +408,22 @@ func (FilterOutputInvertUIDAndGIDOwner) ContainerAction(ip net.IP, ipv6 bool) er } // Listen for TCP packets on accept port. - if err := listenTCP(acceptPort, sendloopDuration); err == nil { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := listenTCP(timedCtx, acceptPort); err == nil { return fmt.Errorf("connection on port %d should not be accepted, but got accepted", acceptPort) + } else if !errors.Is(err, context.DeadlineExceeded) { + return fmt.Errorf("error reading: %v", err) } return nil } // LocalAction implements TestCase.LocalAction. -func (FilterOutputInvertUIDAndGIDOwner) LocalAction(ip net.IP, ipv6 bool) error { - if err := connectTCP(ip, acceptPort, sendloopDuration); err == nil { +func (FilterOutputInvertUIDAndGIDOwner) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := connectTCP(timedCtx, ip, acceptPort); err == nil { return fmt.Errorf("connection destined to port %d should not be accepted, but got accepted", acceptPort) } @@ -390,7 +432,7 @@ func (FilterOutputInvertUIDAndGIDOwner) LocalAction(ip net.IP, ipv6 bool) error // FilterOutputDestination tests that we can selectively allow packets to // certain destinations. -type FilterOutputDestination struct{} +type FilterOutputDestination struct{ localCase } // Name implements TestCase.Name. func (FilterOutputDestination) Name() string { @@ -398,7 +440,7 @@ func (FilterOutputDestination) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputDestination) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterOutputDestination) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { rules := [][]string{ {"-A", "OUTPUT", "-d", ip.String(), "-j", "ACCEPT"}, {"-P", "OUTPUT", "DROP"}, @@ -407,17 +449,17 @@ func (FilterOutputDestination) ContainerAction(ip net.IP, ipv6 bool) error { return err } - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return sendUDPLoop(ctx, ip, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (FilterOutputDestination) LocalAction(ip net.IP, ipv6 bool) error { - return listenUDP(acceptPort, sendloopDuration) +func (FilterOutputDestination) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return listenUDP(ctx, acceptPort) } // FilterOutputInvertDestination tests that we can selectively allow packets // not headed for a particular destination. -type FilterOutputInvertDestination struct{} +type FilterOutputInvertDestination struct{ localCase } // Name implements TestCase.Name. func (FilterOutputInvertDestination) Name() string { @@ -425,7 +467,7 @@ func (FilterOutputInvertDestination) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputInvertDestination) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterOutputInvertDestination) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { rules := [][]string{ {"-A", "OUTPUT", "!", "-d", localIP(ipv6), "-j", "ACCEPT"}, {"-P", "OUTPUT", "DROP"}, @@ -434,17 +476,17 @@ func (FilterOutputInvertDestination) ContainerAction(ip net.IP, ipv6 bool) error return err } - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return sendUDPLoop(ctx, ip, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (FilterOutputInvertDestination) LocalAction(ip net.IP, ipv6 bool) error { - return listenUDP(acceptPort, sendloopDuration) +func (FilterOutputInvertDestination) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return listenUDP(ctx, acceptPort) } // FilterOutputInterfaceAccept tests that packets are sent via interface // matching the iptables rule. -type FilterOutputInterfaceAccept struct{} +type FilterOutputInterfaceAccept struct{ localCase } // Name implements TestCase.Name. func (FilterOutputInterfaceAccept) Name() string { @@ -452,7 +494,7 @@ func (FilterOutputInterfaceAccept) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputInterfaceAccept) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterOutputInterfaceAccept) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { ifname, ok := getInterfaceName() if !ok { return fmt.Errorf("no interface is present, except loopback") @@ -461,17 +503,17 @@ func (FilterOutputInterfaceAccept) ContainerAction(ip net.IP, ipv6 bool) error { return err } - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return sendUDPLoop(ctx, ip, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (FilterOutputInterfaceAccept) LocalAction(ip net.IP, ipv6 bool) error { - return listenUDP(acceptPort, sendloopDuration) +func (FilterOutputInterfaceAccept) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return listenUDP(ctx, acceptPort) } // FilterOutputInterfaceDrop tests that packets are not sent via interface // matching the iptables rule. -type FilterOutputInterfaceDrop struct{} +type FilterOutputInterfaceDrop struct{ localCase } // Name implements TestCase.Name. func (FilterOutputInterfaceDrop) Name() string { @@ -479,7 +521,7 @@ func (FilterOutputInterfaceDrop) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputInterfaceDrop) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterOutputInterfaceDrop) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { ifname, ok := getInterfaceName() if !ok { return fmt.Errorf("no interface is present, except loopback") @@ -488,13 +530,17 @@ func (FilterOutputInterfaceDrop) ContainerAction(ip net.IP, ipv6 bool) error { return err } - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return sendUDPLoop(ctx, ip, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (FilterOutputInterfaceDrop) LocalAction(ip net.IP, ipv6 bool) error { - if err := listenUDP(acceptPort, sendloopDuration); err == nil { +func (FilterOutputInterfaceDrop) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := listenUDP(timedCtx, acceptPort); err == nil { return fmt.Errorf("packets should not be received on port %v, but are received", acceptPort) + } else if !errors.Is(err, context.DeadlineExceeded) { + return fmt.Errorf("error reading: %v", err) } return nil @@ -502,7 +548,7 @@ func (FilterOutputInterfaceDrop) LocalAction(ip net.IP, ipv6 bool) error { // FilterOutputInterface tests that packets are sent via interface which is // not matching the interface name in the iptables rule. -type FilterOutputInterface struct{} +type FilterOutputInterface struct{ localCase } // Name implements TestCase.Name. func (FilterOutputInterface) Name() string { @@ -510,22 +556,22 @@ func (FilterOutputInterface) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputInterface) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterOutputInterface) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "udp", "-o", "lo", "-j", "DROP"); err != nil { return err } - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return sendUDPLoop(ctx, ip, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (FilterOutputInterface) LocalAction(ip net.IP, ipv6 bool) error { - return listenUDP(acceptPort, sendloopDuration) +func (FilterOutputInterface) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return listenUDP(ctx, acceptPort) } // FilterOutputInterfaceBeginsWith tests that packets are not sent via an // interface which begins with the given interface name. -type FilterOutputInterfaceBeginsWith struct{} +type FilterOutputInterfaceBeginsWith struct{ localCase } // Name implements TestCase.Name. func (FilterOutputInterfaceBeginsWith) Name() string { @@ -533,18 +579,22 @@ func (FilterOutputInterfaceBeginsWith) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputInterfaceBeginsWith) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterOutputInterfaceBeginsWith) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "udp", "-o", "e+", "-j", "DROP"); err != nil { return err } - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return sendUDPLoop(ctx, ip, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (FilterOutputInterfaceBeginsWith) LocalAction(ip net.IP, ipv6 bool) error { - if err := listenUDP(acceptPort, sendloopDuration); err == nil { +func (FilterOutputInterfaceBeginsWith) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := listenUDP(timedCtx, acceptPort); err == nil { return fmt.Errorf("packets should not be received on port %v, but are received", acceptPort) + } else if !errors.Is(err, context.DeadlineExceeded) { + return fmt.Errorf("error reading: %v", err) } return nil @@ -552,7 +602,7 @@ func (FilterOutputInterfaceBeginsWith) LocalAction(ip net.IP, ipv6 bool) error { // FilterOutputInterfaceInvertDrop tests that we selectively do not send // packets via interface not matching the interface name. -type FilterOutputInterfaceInvertDrop struct{} +type FilterOutputInterfaceInvertDrop struct{ baseCase } // Name implements TestCase.Name. func (FilterOutputInterfaceInvertDrop) Name() string { @@ -560,22 +610,28 @@ func (FilterOutputInterfaceInvertDrop) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputInterfaceInvertDrop) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterOutputInterfaceInvertDrop) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "!", "-o", "lo", "-j", "DROP"); err != nil { return err } // Listen for TCP packets on accept port. - if err := listenTCP(acceptPort, sendloopDuration); err == nil { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := listenTCP(timedCtx, acceptPort); err == nil { return fmt.Errorf("connection on port %d should not be accepted, but got accepted", acceptPort) + } else if !errors.Is(err, context.DeadlineExceeded) { + return fmt.Errorf("error reading: %v", err) } return nil } // LocalAction implements TestCase.LocalAction. -func (FilterOutputInterfaceInvertDrop) LocalAction(ip net.IP, ipv6 bool) error { - if err := connectTCP(ip, acceptPort, sendloopDuration); err == nil { +func (FilterOutputInterfaceInvertDrop) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := connectTCP(timedCtx, ip, acceptPort); err == nil { return fmt.Errorf("connection destined to port %d should not be accepted, but got accepted", acceptPort) } @@ -584,7 +640,7 @@ func (FilterOutputInterfaceInvertDrop) LocalAction(ip net.IP, ipv6 bool) error { // FilterOutputInterfaceInvertAccept tests that we can selectively send packets // not matching the specific outgoing interface. -type FilterOutputInterfaceInvertAccept struct{} +type FilterOutputInterfaceInvertAccept struct{ baseCase } // Name implements TestCase.Name. func (FilterOutputInterfaceInvertAccept) Name() string { @@ -592,16 +648,16 @@ func (FilterOutputInterfaceInvertAccept) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (FilterOutputInterfaceInvertAccept) ContainerAction(ip net.IP, ipv6 bool) error { +func (FilterOutputInterfaceInvertAccept) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := filterTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "!", "-o", "lo", "-j", "ACCEPT"); err != nil { return err } // Listen for TCP packets on accept port. - return listenTCP(acceptPort, sendloopDuration) + return listenTCP(ctx, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (FilterOutputInterfaceInvertAccept) LocalAction(ip net.IP, ipv6 bool) error { - return connectTCP(ip, acceptPort, sendloopDuration) +func (FilterOutputInterfaceInvertAccept) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return connectTCP(ctx, ip, acceptPort) } diff --git a/test/iptables/iptables.go b/test/iptables/iptables.go index dfbd80cd1..c2a03f54c 100644 --- a/test/iptables/iptables.go +++ b/test/iptables/iptables.go @@ -16,6 +16,7 @@ package iptables import ( + "context" "fmt" "net" "time" @@ -29,7 +30,11 @@ const IPExchangePort = 2349 const TerminalStatement = "Finished!" // TestTimeout is the timeout used for all tests. -const TestTimeout = 10 * time.Minute +const TestTimeout = 10 * time.Second + +// NegativeTimeout is the time tests should wait to establish the negative +// case, i.e. that connections are not made. +const NegativeTimeout = 2 * time.Second // A TestCase contains one action to run in the container and one to run // locally. The actions run concurrently and each must succeed for the test @@ -40,10 +45,60 @@ type TestCase interface { // ContainerAction runs inside the container. It receives the IP of the // local process. - ContainerAction(ip net.IP, ipv6 bool) error + ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error // LocalAction runs locally. It receives the IP of the container. - LocalAction(ip net.IP, ipv6 bool) error + LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error + + // ContainerSufficient indicates whether ContainerAction's return value + // alone indicates whether the test succeeded. + ContainerSufficient() bool + + // LocalSufficient indicates whether LocalAction's return value alone + // indicates whether the test succeeded. + LocalSufficient() bool +} + +// baseCase provides defaults for ContainerSufficient and LocalSufficient when +// both actions are required to finish. +type baseCase struct{} + +// ContainerSufficient implements TestCase.ContainerSufficient. +func (baseCase) ContainerSufficient() bool { + return false +} + +// LocalSufficient implements TestCase.LocalSufficient. +func (baseCase) LocalSufficient() bool { + return false +} + +// localCase provides defaults for ContainerSufficient and LocalSufficient when +// only the local action is required to finish. +type localCase struct{} + +// ContainerSufficient implements TestCase.ContainerSufficient. +func (localCase) ContainerSufficient() bool { + return false +} + +// LocalSufficient implements TestCase.LocalSufficient. +func (localCase) LocalSufficient() bool { + return true +} + +// containerCase provides defaults for ContainerSufficient and LocalSufficient +// when only the container action is required to finish. +type containerCase struct{} + +// ContainerSufficient implements TestCase.ContainerSufficient. +func (containerCase) ContainerSufficient() bool { + return true +} + +// LocalSufficient implements TestCase.LocalSufficient. +func (containerCase) LocalSufficient() bool { + return false } // Tests maps test names to TestCase. diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index fda5f694f..e2beb30d5 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -16,9 +16,11 @@ package iptables import ( "context" + "errors" "fmt" "net" "reflect" + "sync" "testing" "gvisor.dev/gvisor/pkg/test/dockerutil" @@ -51,9 +53,24 @@ func iptablesTest(t *testing.T, test TestCase, ipv6 bool) { t.Fatalf("no test found with name %q. Has it been registered?", test.Name()) } - ctx := context.Background() + // Wait for the local and container goroutines to finish. + var wg sync.WaitGroup + defer wg.Wait() + + ctx, cancel := context.WithTimeout(context.Background(), TestTimeout) + defer cancel() + d := dockerutil.MakeContainer(ctx, t) - defer d.CleanUp(ctx) + defer func() { + if logs, err := d.Logs(context.Background()); err != nil { + t.Logf("Failed to retrieve container logs.") + } else { + t.Logf("=== Container logs: ===\n%s", logs) + } + // Use a new context, as cleanup should run even when we + // timeout. + d.CleanUp(context.Background()) + }() // TODO(gvisor.dev/issue/170): Skipping IPv6 gVisor tests. if ipv6 && dockerutil.Runtime() != "runc" { @@ -86,15 +103,44 @@ func iptablesTest(t *testing.T, test TestCase, ipv6 bool) { } // Run our side of the test. - if err := test.LocalAction(ip, ipv6); err != nil { - t.Fatalf("LocalAction failed: %v", err) - } - - // Wait for the final statement. This structure has the side effect - // that all container logs will appear within the individual test - // context. - if _, err := d.WaitForOutput(ctx, TerminalStatement, TestTimeout); err != nil { - t.Fatalf("test failed: %v", err) + errCh := make(chan error, 2) + wg.Add(1) + go func() { + defer wg.Done() + if err := test.LocalAction(ctx, ip, ipv6); err != nil && !errors.Is(err, context.Canceled) { + errCh <- fmt.Errorf("LocalAction failed: %v", err) + } else { + errCh <- nil + } + if test.LocalSufficient() { + errCh <- nil + } + }() + + // Run the container side. + wg.Add(1) + go func() { + defer wg.Done() + // Wait for the final statement. This structure has the side + // effect that all container logs will appear within the + // individual test context. + if _, err := d.WaitForOutput(ctx, TerminalStatement, TestTimeout); err != nil && !errors.Is(err, context.Canceled) { + errCh <- fmt.Errorf("ContainerAction failed: %v", err) + } else { + errCh <- nil + } + if test.ContainerSufficient() { + errCh <- nil + } + }() + + for i := 0; i < 2; i++ { + select { + case err := <-errCh: + if err != nil { + t.Fatal(err) + } + } } } diff --git a/test/iptables/iptables_util.go b/test/iptables/iptables_util.go index 5125fe47b..a6ec5cca3 100644 --- a/test/iptables/iptables_util.go +++ b/test/iptables/iptables_util.go @@ -15,6 +15,7 @@ package iptables import ( + "context" "encoding/binary" "errors" "fmt" @@ -70,7 +71,7 @@ func tableRules(ipv6 bool, table string, argsList [][]string) error { // listenUDP listens on a UDP port and returns the value of net.Conn.Read() for // the first read on that port. -func listenUDP(port int, timeout time.Duration) error { +func listenUDP(ctx context.Context, port int) error { localAddr := net.UDPAddr{ Port: port, } @@ -79,68 +80,53 @@ func listenUDP(port int, timeout time.Duration) error { return err } defer conn.Close() - conn.SetDeadline(time.Now().Add(timeout)) - _, err = conn.Read([]byte{0}) - return err -} -// sendUDPLoop sends 1 byte UDP packets repeatedly to the IP and port specified -// over a duration. -func sendUDPLoop(ip net.IP, port int, duration time.Duration) error { - conn, err := connectUDP(ip, port) - if err != nil { - return err - } - defer conn.Close() - loopUDP(conn, duration) - return nil -} + ch := make(chan error) + go func() { + _, err = conn.Read([]byte{0}) + ch <- err + }() -// spawnUDPLoop works like sendUDPLoop, but returns immediately and sends -// packets in another goroutine. -func spawnUDPLoop(ip net.IP, port int, duration time.Duration) error { - conn, err := connectUDP(ip, port) - if err != nil { + select { + case err := <-ch: return err + case <-ctx.Done(): + return ctx.Err() } - go func() { - defer conn.Close() - loopUDP(conn, duration) - }() - return nil } -func connectUDP(ip net.IP, port int) (net.Conn, error) { +// sendUDPLoop sends 1 byte UDP packets repeatedly to the IP and port specified +// over a duration. +func sendUDPLoop(ctx context.Context, ip net.IP, port int) error { remote := net.UDPAddr{ IP: ip, Port: port, } conn, err := net.DialUDP("udp", nil, &remote) if err != nil { - return nil, err + return err } - return conn, nil -} + defer conn.Close() -func loopUDP(conn net.Conn, duration time.Duration) { - to := time.After(duration) - for timedOut := false; !timedOut; { + for { // This may return an error (connection refused) if the remote // hasn't started listening yet or they're dropping our // packets. So we ignore Write errors and depend on the remote // to report a failure if it doesn't get a packet it needs. conn.Write([]byte{0}) select { - case <-to: - timedOut = true - default: - time.Sleep(200 * time.Millisecond) + case <-ctx.Done(): + // Being cancelled or timing out isn't an error, as we + // cannot tell with UDP whether we succeeded. + return nil + // Continue looping. + case <-time.After(200 * time.Millisecond): } } } // listenTCP listens for connections on a TCP port. -func listenTCP(port int, timeout time.Duration) error { +func listenTCP(ctx context.Context, port int) error { localAddr := net.TCPAddr{ Port: port, } @@ -153,17 +139,23 @@ func listenTCP(port int, timeout time.Duration) error { defer lConn.Close() // Accept connections on port. - lConn.SetDeadline(time.Now().Add(timeout)) - conn, err := lConn.AcceptTCP() - if err != nil { + ch := make(chan error) + go func() { + conn, err := lConn.AcceptTCP() + ch <- err + conn.Close() + }() + + select { + case err := <-ch: return err + case <-ctx.Done(): + return fmt.Errorf("timed out waiting for a connection at %#v: %w", localAddr, ctx.Err()) } - conn.Close() - return nil } // connectTCP connects to the given IP and port from an ephemeral local address. -func connectTCP(ip net.IP, port int, timeout time.Duration) error { +func connectTCP(ctx context.Context, ip net.IP, port int) error { contAddr := net.TCPAddr{ IP: ip, Port: port, @@ -171,13 +163,14 @@ func connectTCP(ip net.IP, port int, timeout time.Duration) error { // The container may not be listening when we first connect, so retry // upon error. callback := func() error { - conn, err := net.DialTimeout("tcp", contAddr.String(), timeout) + var d net.Dialer + conn, err := d.DialContext(ctx, "tcp", contAddr.String()) if conn != nil { conn.Close() } return err } - if err := testutil.Poll(callback, timeout); err != nil { + if err := testutil.PollContext(ctx, callback); err != nil { return fmt.Errorf("timed out waiting to connect IP on port %v, most recent error: %v", port, err) } diff --git a/test/iptables/nat.go b/test/iptables/nat.go index b7fea2527..dd9a18339 100644 --- a/test/iptables/nat.go +++ b/test/iptables/nat.go @@ -15,11 +15,11 @@ package iptables import ( + "context" "errors" "fmt" "net" "syscall" - "time" ) const redirectPort = 42 @@ -46,7 +46,7 @@ func init() { } // NATPreRedirectUDPPort tests that packets are redirected to different port. -type NATPreRedirectUDPPort struct{} +type NATPreRedirectUDPPort struct{ containerCase } // Name implements TestCase.Name. func (NATPreRedirectUDPPort) Name() string { @@ -54,12 +54,12 @@ func (NATPreRedirectUDPPort) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATPreRedirectUDPPort) ContainerAction(ip net.IP, ipv6 bool) error { +func (NATPreRedirectUDPPort) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := natTable(ipv6, "-A", "PREROUTING", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil { return err } - if err := listenUDP(redirectPort, sendloopDuration); err != nil { + if err := listenUDP(ctx, redirectPort); err != nil { return fmt.Errorf("packets on port %d should be allowed, but encountered an error: %v", redirectPort, err) } @@ -67,12 +67,12 @@ func (NATPreRedirectUDPPort) ContainerAction(ip net.IP, ipv6 bool) error { } // LocalAction implements TestCase.LocalAction. -func (NATPreRedirectUDPPort) LocalAction(ip net.IP, ipv6 bool) error { - return spawnUDPLoop(ip, acceptPort, sendloopDuration) +func (NATPreRedirectUDPPort) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return sendUDPLoop(ctx, ip, acceptPort) } // NATPreRedirectTCPPort tests that connections are redirected on specified ports. -type NATPreRedirectTCPPort struct{} +type NATPreRedirectTCPPort struct{ baseCase } // Name implements TestCase.Name. func (NATPreRedirectTCPPort) Name() string { @@ -80,23 +80,23 @@ func (NATPreRedirectTCPPort) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATPreRedirectTCPPort) ContainerAction(ip net.IP, ipv6 bool) error { +func (NATPreRedirectTCPPort) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := natTable(ipv6, "-A", "PREROUTING", "-p", "tcp", "-m", "tcp", "--dport", fmt.Sprintf("%d", dropPort), "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)); err != nil { return err } // Listen for TCP packets on redirect port. - return listenTCP(acceptPort, sendloopDuration) + return listenTCP(ctx, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (NATPreRedirectTCPPort) LocalAction(ip net.IP, ipv6 bool) error { - return connectTCP(ip, dropPort, sendloopDuration) +func (NATPreRedirectTCPPort) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return connectTCP(ctx, ip, dropPort) } // NATPreRedirectTCPOutgoing verifies that outgoing TCP connections aren't // affected by PREROUTING connection tracking. -type NATPreRedirectTCPOutgoing struct{} +type NATPreRedirectTCPOutgoing struct{ baseCase } // Name implements TestCase.Name. func (NATPreRedirectTCPOutgoing) Name() string { @@ -104,24 +104,24 @@ func (NATPreRedirectTCPOutgoing) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATPreRedirectTCPOutgoing) ContainerAction(ip net.IP, ipv6 bool) error { +func (NATPreRedirectTCPOutgoing) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { // Redirect all incoming TCP traffic to a closed port. if err := natTable(ipv6, "-A", "PREROUTING", "-p", "tcp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", dropPort)); err != nil { return err } // Establish a connection to the host process. - return connectTCP(ip, acceptPort, sendloopDuration) + return connectTCP(ctx, ip, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (NATPreRedirectTCPOutgoing) LocalAction(ip net.IP, ipv6 bool) error { - return listenTCP(acceptPort, sendloopDuration) +func (NATPreRedirectTCPOutgoing) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return listenTCP(ctx, acceptPort) } // NATOutRedirectTCPIncoming verifies that incoming TCP connections aren't // affected by OUTPUT connection tracking. -type NATOutRedirectTCPIncoming struct{} +type NATOutRedirectTCPIncoming struct{ baseCase } // Name implements TestCase.Name. func (NATOutRedirectTCPIncoming) Name() string { @@ -129,23 +129,23 @@ func (NATOutRedirectTCPIncoming) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATOutRedirectTCPIncoming) ContainerAction(ip net.IP, ipv6 bool) error { +func (NATOutRedirectTCPIncoming) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { // Redirect all outgoing TCP traffic to a closed port. if err := natTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", dropPort)); err != nil { return err } // Establish a connection to the host process. - return listenTCP(acceptPort, sendloopDuration) + return listenTCP(ctx, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (NATOutRedirectTCPIncoming) LocalAction(ip net.IP, ipv6 bool) error { - return connectTCP(ip, acceptPort, sendloopDuration) +func (NATOutRedirectTCPIncoming) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return connectTCP(ctx, ip, acceptPort) } // NATOutRedirectUDPPort tests that packets are redirected to different port. -type NATOutRedirectUDPPort struct{} +type NATOutRedirectUDPPort struct{ containerCase } // Name implements TestCase.Name. func (NATOutRedirectUDPPort) Name() string { @@ -153,19 +153,19 @@ func (NATOutRedirectUDPPort) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATOutRedirectUDPPort) ContainerAction(ip net.IP, ipv6 bool) error { - return loopbackTest(ipv6, net.ParseIP(nowhereIP(ipv6)), "-A", "OUTPUT", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)) +func (NATOutRedirectUDPPort) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return loopbackTest(ctx, ipv6, net.ParseIP(nowhereIP(ipv6)), "-A", "OUTPUT", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)) } // LocalAction implements TestCase.LocalAction. -func (NATOutRedirectUDPPort) LocalAction(ip net.IP, ipv6 bool) error { +func (NATOutRedirectUDPPort) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { // No-op. return nil } // NATDropUDP tests that packets are not received in ports other than redirect // port. -type NATDropUDP struct{} +type NATDropUDP struct{ containerCase } // Name implements TestCase.Name. func (NATDropUDP) Name() string { @@ -173,25 +173,29 @@ func (NATDropUDP) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATDropUDP) ContainerAction(ip net.IP, ipv6 bool) error { +func (NATDropUDP) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := natTable(ipv6, "-A", "PREROUTING", "-p", "udp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil { return err } - if err := listenUDP(acceptPort, sendloopDuration); err == nil { + timedCtx, cancel := context.WithTimeout(ctx, NegativeTimeout) + defer cancel() + if err := listenUDP(timedCtx, acceptPort); err == nil { return fmt.Errorf("packets on port %d should have been redirected to port %d", acceptPort, redirectPort) + } else if !errors.Is(err, context.DeadlineExceeded) { + return fmt.Errorf("error reading: %v", err) } return nil } // LocalAction implements TestCase.LocalAction. -func (NATDropUDP) LocalAction(ip net.IP, ipv6 bool) error { - return spawnUDPLoop(ip, acceptPort, sendloopDuration) +func (NATDropUDP) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return sendUDPLoop(ctx, ip, acceptPort) } // NATAcceptAll tests that all UDP packets are accepted. -type NATAcceptAll struct{} +type NATAcceptAll struct{ containerCase } // Name implements TestCase.Name. func (NATAcceptAll) Name() string { @@ -199,12 +203,12 @@ func (NATAcceptAll) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATAcceptAll) ContainerAction(ip net.IP, ipv6 bool) error { +func (NATAcceptAll) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := natTable(ipv6, "-A", "PREROUTING", "-p", "udp", "-j", "ACCEPT"); err != nil { return err } - if err := listenUDP(acceptPort, sendloopDuration); err != nil { + if err := listenUDP(ctx, acceptPort); err != nil { return fmt.Errorf("packets on port %d should be allowed, but encountered an error: %v", acceptPort, err) } @@ -212,13 +216,13 @@ func (NATAcceptAll) ContainerAction(ip net.IP, ipv6 bool) error { } // LocalAction implements TestCase.LocalAction. -func (NATAcceptAll) LocalAction(ip net.IP, ipv6 bool) error { - return spawnUDPLoop(ip, acceptPort, sendloopDuration) +func (NATAcceptAll) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return sendUDPLoop(ctx, ip, acceptPort) } // NATOutRedirectIP uses iptables to select packets based on destination IP and // redirects them. -type NATOutRedirectIP struct{} +type NATOutRedirectIP struct{ baseCase } // Name implements TestCase.Name. func (NATOutRedirectIP) Name() string { @@ -226,9 +230,9 @@ func (NATOutRedirectIP) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATOutRedirectIP) ContainerAction(ip net.IP, ipv6 bool) error { +func (NATOutRedirectIP) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { // Redirect OUTPUT packets to a listening localhost port. - return loopbackTest(ipv6, net.ParseIP(nowhereIP(ipv6)), + return loopbackTest(ctx, ipv6, net.ParseIP(nowhereIP(ipv6)), "-A", "OUTPUT", "-d", nowhereIP(ipv6), "-p", "udp", @@ -236,14 +240,14 @@ func (NATOutRedirectIP) ContainerAction(ip net.IP, ipv6 bool) error { } // LocalAction implements TestCase.LocalAction. -func (NATOutRedirectIP) LocalAction(ip net.IP, ipv6 bool) error { +func (NATOutRedirectIP) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { // No-op. return nil } // NATOutDontRedirectIP tests that iptables matching with "-d" does not match // packets it shouldn't. -type NATOutDontRedirectIP struct{} +type NATOutDontRedirectIP struct{ localCase } // Name implements TestCase.Name. func (NATOutDontRedirectIP) Name() string { @@ -251,20 +255,20 @@ func (NATOutDontRedirectIP) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATOutDontRedirectIP) ContainerAction(ip net.IP, ipv6 bool) error { +func (NATOutDontRedirectIP) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := natTable(ipv6, "-A", "OUTPUT", "-d", localIP(ipv6), "-p", "udp", "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", dropPort)); err != nil { return err } - return sendUDPLoop(ip, acceptPort, sendloopDuration) + return sendUDPLoop(ctx, ip, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (NATOutDontRedirectIP) LocalAction(ip net.IP, ipv6 bool) error { - return listenUDP(acceptPort, sendloopDuration) +func (NATOutDontRedirectIP) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return listenUDP(ctx, acceptPort) } // NATOutRedirectInvert tests that iptables can match with "! -d". -type NATOutRedirectInvert struct{} +type NATOutRedirectInvert struct{ baseCase } // Name implements TestCase.Name. func (NATOutRedirectInvert) Name() string { @@ -272,13 +276,13 @@ func (NATOutRedirectInvert) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATOutRedirectInvert) ContainerAction(ip net.IP, ipv6 bool) error { +func (NATOutRedirectInvert) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { // Redirect OUTPUT packets to a listening localhost port. dest := "192.0.2.2" if ipv6 { dest = "2001:db8::2" } - return loopbackTest(ipv6, net.ParseIP(nowhereIP(ipv6)), + return loopbackTest(ctx, ipv6, net.ParseIP(nowhereIP(ipv6)), "-A", "OUTPUT", "!", "-d", dest, "-p", "udp", @@ -286,14 +290,14 @@ func (NATOutRedirectInvert) ContainerAction(ip net.IP, ipv6 bool) error { } // LocalAction implements TestCase.LocalAction. -func (NATOutRedirectInvert) LocalAction(ip net.IP, ipv6 bool) error { +func (NATOutRedirectInvert) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { // No-op. return nil } // NATPreRedirectIP tests that we can use iptables to select packets based on // destination IP and redirect them. -type NATPreRedirectIP struct{} +type NATPreRedirectIP struct{ containerCase } // Name implements TestCase.Name. func (NATPreRedirectIP) Name() string { @@ -301,7 +305,7 @@ func (NATPreRedirectIP) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATPreRedirectIP) ContainerAction(ip net.IP, ipv6 bool) error { +func (NATPreRedirectIP) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { addrs, err := localAddrs(ipv6) if err != nil { return err @@ -314,17 +318,17 @@ func (NATPreRedirectIP) ContainerAction(ip net.IP, ipv6 bool) error { if err := natTableRules(ipv6, rules); err != nil { return err } - return listenUDP(acceptPort, sendloopDuration) + return listenUDP(ctx, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (NATPreRedirectIP) LocalAction(ip net.IP, ipv6 bool) error { - return spawnUDPLoop(ip, dropPort, sendloopDuration) +func (NATPreRedirectIP) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return sendUDPLoop(ctx, ip, dropPort) } // NATPreDontRedirectIP tests that iptables matching with "-d" does not match // packets it shouldn't. -type NATPreDontRedirectIP struct{} +type NATPreDontRedirectIP struct{ containerCase } // Name implements TestCase.Name. func (NATPreDontRedirectIP) Name() string { @@ -332,20 +336,20 @@ func (NATPreDontRedirectIP) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATPreDontRedirectIP) ContainerAction(ip net.IP, ipv6 bool) error { +func (NATPreDontRedirectIP) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := natTable(ipv6, "-A", "PREROUTING", "-p", "udp", "-d", localIP(ipv6), "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", dropPort)); err != nil { return err } - return listenUDP(acceptPort, sendloopDuration) + return listenUDP(ctx, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (NATPreDontRedirectIP) LocalAction(ip net.IP, ipv6 bool) error { - return spawnUDPLoop(ip, acceptPort, sendloopDuration) +func (NATPreDontRedirectIP) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return sendUDPLoop(ctx, ip, acceptPort) } // NATPreRedirectInvert tests that iptables can match with "! -d". -type NATPreRedirectInvert struct{} +type NATPreRedirectInvert struct{ containerCase } // Name implements TestCase.Name. func (NATPreRedirectInvert) Name() string { @@ -353,21 +357,21 @@ func (NATPreRedirectInvert) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATPreRedirectInvert) ContainerAction(ip net.IP, ipv6 bool) error { +func (NATPreRedirectInvert) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := natTable(ipv6, "-A", "PREROUTING", "-p", "udp", "!", "-d", localIP(ipv6), "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)); err != nil { return err } - return listenUDP(acceptPort, sendloopDuration) + return listenUDP(ctx, acceptPort) } // LocalAction implements TestCase.LocalAction. -func (NATPreRedirectInvert) LocalAction(ip net.IP, ipv6 bool) error { - return spawnUDPLoop(ip, dropPort, sendloopDuration) +func (NATPreRedirectInvert) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return sendUDPLoop(ctx, ip, dropPort) } // NATRedirectRequiresProtocol tests that use of the --to-ports flag requires a // protocol to be specified with -p. -type NATRedirectRequiresProtocol struct{} +type NATRedirectRequiresProtocol struct{ baseCase } // Name implements TestCase.Name. func (NATRedirectRequiresProtocol) Name() string { @@ -375,7 +379,7 @@ func (NATRedirectRequiresProtocol) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATRedirectRequiresProtocol) ContainerAction(ip net.IP, ipv6 bool) error { +func (NATRedirectRequiresProtocol) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := natTable(ipv6, "-A", "PREROUTING", "-d", localIP(ipv6), "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)); err == nil { return errors.New("expected an error using REDIRECT --to-ports without a protocol") } @@ -383,13 +387,13 @@ func (NATRedirectRequiresProtocol) ContainerAction(ip net.IP, ipv6 bool) error { } // LocalAction implements TestCase.LocalAction. -func (NATRedirectRequiresProtocol) LocalAction(ip net.IP, ipv6 bool) error { +func (NATRedirectRequiresProtocol) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { // No-op. return nil } // NATOutRedirectTCPPort tests that connections are redirected on specified ports. -type NATOutRedirectTCPPort struct{} +type NATOutRedirectTCPPort struct{ baseCase } // Name implements TestCase.Name. func (NATOutRedirectTCPPort) Name() string { @@ -397,12 +401,11 @@ func (NATOutRedirectTCPPort) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATOutRedirectTCPPort) ContainerAction(ip net.IP, ipv6 bool) error { +func (NATOutRedirectTCPPort) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { if err := natTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "-m", "tcp", "--dport", fmt.Sprintf("%d", dropPort), "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", acceptPort)); err != nil { return err } - timeout := 20 * time.Second localAddr := net.TCPAddr{ IP: net.ParseIP(localIP(ipv6)), Port: acceptPort, @@ -416,9 +419,7 @@ func (NATOutRedirectTCPPort) ContainerAction(ip net.IP, ipv6 bool) error { defer lConn.Close() // Accept connections on port. - lConn.SetDeadline(time.Now().Add(timeout)) - err = connectTCP(ip, dropPort, timeout) - if err != nil { + if err := connectTCP(ctx, ip, dropPort); err != nil { return err } @@ -432,13 +433,13 @@ func (NATOutRedirectTCPPort) ContainerAction(ip net.IP, ipv6 bool) error { } // LocalAction implements TestCase.LocalAction. -func (NATOutRedirectTCPPort) LocalAction(ip net.IP, ipv6 bool) error { +func (NATOutRedirectTCPPort) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { return nil } // NATLoopbackSkipsPrerouting tests that packets sent via loopback aren't // affected by PREROUTING rules. -type NATLoopbackSkipsPrerouting struct{} +type NATLoopbackSkipsPrerouting struct{ baseCase } // Name implements TestCase.Name. func (NATLoopbackSkipsPrerouting) Name() string { @@ -446,7 +447,7 @@ func (NATLoopbackSkipsPrerouting) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATLoopbackSkipsPrerouting) ContainerAction(ip net.IP, ipv6 bool) error { +func (NATLoopbackSkipsPrerouting) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { // Redirect anything sent to localhost to an unused port. dest := []byte{127, 0, 0, 1} if err := natTable(ipv6, "-A", "PREROUTING", "-p", "tcp", "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", dropPort)); err != nil { @@ -457,24 +458,24 @@ func (NATLoopbackSkipsPrerouting) ContainerAction(ip net.IP, ipv6 bool) error { // loopback traffic, the connection would fail. sendCh := make(chan error) go func() { - sendCh <- connectTCP(dest, acceptPort, sendloopDuration) + sendCh <- connectTCP(ctx, dest, acceptPort) }() - if err := listenTCP(acceptPort, sendloopDuration); err != nil { + if err := listenTCP(ctx, acceptPort); err != nil { return err } return <-sendCh } // LocalAction implements TestCase.LocalAction. -func (NATLoopbackSkipsPrerouting) LocalAction(ip net.IP, ipv6 bool) error { +func (NATLoopbackSkipsPrerouting) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { // No-op. return nil } // NATPreOriginalDst tests that SO_ORIGINAL_DST returns the pre-NAT destination // of PREROUTING NATted packets. -type NATPreOriginalDst struct{} +type NATPreOriginalDst struct{ baseCase } // Name implements TestCase.Name. func (NATPreOriginalDst) Name() string { @@ -482,7 +483,7 @@ func (NATPreOriginalDst) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATPreOriginalDst) ContainerAction(ip net.IP, ipv6 bool) error { +func (NATPreOriginalDst) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { // Redirect incoming TCP connections to acceptPort. if err := natTable(ipv6, "-A", "PREROUTING", "-p", "tcp", @@ -495,17 +496,17 @@ func (NATPreOriginalDst) ContainerAction(ip net.IP, ipv6 bool) error { if err != nil { return err } - return listenForRedirectedConn(ipv6, addrs) + return listenForRedirectedConn(ctx, ipv6, addrs) } // LocalAction implements TestCase.LocalAction. -func (NATPreOriginalDst) LocalAction(ip net.IP, ipv6 bool) error { - return connectTCP(ip, dropPort, sendloopDuration) +func (NATPreOriginalDst) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { + return connectTCP(ctx, ip, dropPort) } // NATOutOriginalDst tests that SO_ORIGINAL_DST returns the pre-NAT destination // of OUTBOUND NATted packets. -type NATOutOriginalDst struct{} +type NATOutOriginalDst struct{ baseCase } // Name implements TestCase.Name. func (NATOutOriginalDst) Name() string { @@ -513,7 +514,7 @@ func (NATOutOriginalDst) Name() string { } // ContainerAction implements TestCase.ContainerAction. -func (NATOutOriginalDst) ContainerAction(ip net.IP, ipv6 bool) error { +func (NATOutOriginalDst) ContainerAction(ctx context.Context, ip net.IP, ipv6 bool) error { // Redirect incoming TCP connections to acceptPort. if err := natTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", acceptPort)); err != nil { return err @@ -521,22 +522,22 @@ func (NATOutOriginalDst) ContainerAction(ip net.IP, ipv6 bool) error { connCh := make(chan error) go func() { - connCh <- connectTCP(ip, dropPort, sendloopDuration) + connCh <- connectTCP(ctx, ip, dropPort) }() - if err := listenForRedirectedConn(ipv6, []net.IP{ip}); err != nil { + if err := listenForRedirectedConn(ctx, ipv6, []net.IP{ip}); err != nil { return err } return <-connCh } // LocalAction implements TestCase.LocalAction. -func (NATOutOriginalDst) LocalAction(ip net.IP, ipv6 bool) error { +func (NATOutOriginalDst) LocalAction(ctx context.Context, ip net.IP, ipv6 bool) error { // No-op. return nil } -func listenForRedirectedConn(ipv6 bool, originalDsts []net.IP) error { +func listenForRedirectedConn(ctx context.Context, ipv6 bool, originalDsts []net.IP) error { // The net package doesn't give guarantee access to the connection's // underlying FD, and thus we cannot call getsockopt. We have to use // traditional syscalls for SO_ORIGINAL_DST. @@ -572,16 +573,32 @@ func listenForRedirectedConn(ipv6 bool, originalDsts []net.IP) error { return err } - connfd, _, err := syscall.Accept(sockfd) - if err != nil { + // Block on accept() in another goroutine. + connCh := make(chan int) + errCh := make(chan error) + go func() { + connFD, _, err := syscall.Accept(sockfd) + if err != nil { + errCh <- err + } + connCh <- connFD + }() + + // Wait for accept() to return or for the context to finish. + var connFD int + select { + case <-ctx.Done(): + return ctx.Err() + case err := <-errCh: return err + case connFD = <-connCh: } - defer syscall.Close(connfd) + defer syscall.Close(connFD) // Verify that, despite listening on acceptPort, SO_ORIGINAL_DST // indicates the packet was sent to originalDst:dropPort. if ipv6 { - got, err := originalDestination6(connfd) + got, err := originalDestination6(connFD) if err != nil { return err } @@ -598,7 +615,7 @@ func listenForRedirectedConn(ipv6 bool, originalDsts []net.IP) error { } return fmt.Errorf("SO_ORIGINAL_DST returned %+v, but wanted one of %+v (note: port numbers are in network byte order)", got, originalDsts) } else { - got, err := originalDestination4(connfd) + got, err := originalDestination4(connFD) if err != nil { return err } @@ -619,26 +636,22 @@ func listenForRedirectedConn(ipv6 bool, originalDsts []net.IP) error { // loopbackTests runs an iptables rule and ensures that packets sent to // dest:dropPort are received by localhost:acceptPort. -func loopbackTest(ipv6 bool, dest net.IP, args ...string) error { +func loopbackTest(ctx context.Context, ipv6 bool, dest net.IP, args ...string) error { if err := natTable(ipv6, args...); err != nil { return err } - sendCh := make(chan error) - listenCh := make(chan error) + sendCh := make(chan error, 1) + listenCh := make(chan error, 1) go func() { - sendCh <- sendUDPLoop(dest, dropPort, sendloopDuration) + sendCh <- sendUDPLoop(ctx, dest, dropPort) }() go func() { - listenCh <- listenUDP(acceptPort, sendloopDuration) + listenCh <- listenUDP(ctx, acceptPort) }() select { case err := <-listenCh: - if err != nil { - return err - } - case <-time.After(sendloopDuration): - return errors.New("timed out") + return err + case err := <-sendCh: + return err } - // sendCh will always take the full sendloop time. - return <-sendCh } diff --git a/test/iptables/runner/main.go b/test/iptables/runner/main.go index 69d3ef121..9ae2d1b4d 100644 --- a/test/iptables/runner/main.go +++ b/test/iptables/runner/main.go @@ -16,6 +16,7 @@ package main import ( + "context" "flag" "fmt" "log" @@ -46,7 +47,9 @@ func main() { } // Run the test. - if err := test.ContainerAction(ip, *ipv6); err != nil { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + if err := test.ContainerAction(ctx, ip, *ipv6); err != nil { log.Fatalf("Failed running test %q: %v", *name, err) } -- cgit v1.2.3 From 51e64d2fc590b0271d4e0cbbc75882cf81ada182 Mon Sep 17 00:00:00 2001 From: Craig Chi Date: Mon, 10 Aug 2020 18:15:32 -0700 Subject: Implement FUSE_GETATTR FUSE_GETATTR is called when a stat(2), fstat(2), or lstat(2) is issued from VFS2 layer to a FUSE filesystem. Fixes #3175 --- pkg/abi/linux/fuse.go | 55 +++++++++++++ pkg/sentry/fsimpl/fuse/fusefs.go | 96 ++++++++++++++++++++++ test/fuse/BUILD | 8 ++ test/fuse/linux/BUILD | 21 +++-- test/fuse/linux/fuse_base.cc | 15 ++-- test/fuse/linux/fuse_base.h | 6 +- test/fuse/linux/stat_test.cc | 169 +++++++++++++++++++++++++++++++++++++++ test/runner/defs.bzl | 45 +++++------ 8 files changed, 375 insertions(+), 40 deletions(-) create mode 100644 test/fuse/linux/stat_test.cc (limited to 'test') diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go index 5c6ffe4a3..7e30483ee 100644 --- a/pkg/abi/linux/fuse.go +++ b/pkg/abi/linux/fuse.go @@ -246,3 +246,58 @@ type FUSEInitOut struct { _ [8]uint32 } + +// FUSEGetAttrIn is the request sent by the kernel to the daemon, +// to get the attribute of a inode. +// +// +marshal +type FUSEGetAttrIn struct { + // GetAttrFlags specifies whether getattr request is sent with a nodeid or + // with a file handle. + GetAttrFlags uint32 + + _ uint32 + + // Fh is the file handler when GetAttrFlags has FUSE_GETATTR_FH bit. If + // used, the operation is analogous to fstat(2). + Fh uint64 +} + +// FUSEAttr is the struct used in the reponse FUSEGetAttrOut. +// +// +marshal +type FUSEAttr struct { + Ino uint64 + Size uint64 + Blocks uint64 + Atime uint64 + Mtime uint64 + Ctime uint64 + AtimeNsec uint32 + MtimeNsec uint32 + CtimeNsec uint32 + Mode uint32 + Nlink uint32 + UID uint32 + GID uint32 + Rdev uint32 + BlkSize uint32 + _ uint32 +} + +// FUSEGetAttrOut is the reply sent by the daemon to the kernel +// for FUSEGetAttrIn. +// +// +marshal +type FUSEGetAttrOut struct { + // AttrValid and AttrValidNsec describe the attribute cache duration + AttrValid uint64 + + // AttrValidNsec is the nanosecond part of the attribute cache duration + AttrValidNsec uint32 + + _ uint32 + + // Attr contains the metadata returned from the FUSE server + Attr FUSEAttr +} diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go index a1405f7c3..83c24ec25 100644 --- a/pkg/sentry/fsimpl/fuse/fusefs.go +++ b/pkg/sentry/fsimpl/fuse/fusefs.go @@ -226,3 +226,99 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr } return fd.VFSFileDescription(), nil } + +// statFromFUSEAttr makes attributes from linux.FUSEAttr to linux.Statx. The +// opts.Sync attribute is ignored since the synchronization is handled by the +// FUSE server. +func statFromFUSEAttr(attr linux.FUSEAttr, mask, devMinor uint32) linux.Statx { + var stat linux.Statx + stat.Blksize = attr.BlkSize + stat.DevMajor, stat.DevMinor = linux.UNNAMED_MAJOR, devMinor + + rdevMajor, rdevMinor := linux.DecodeDeviceID(attr.Rdev) + stat.RdevMajor, stat.RdevMinor = uint32(rdevMajor), rdevMinor + + if mask&linux.STATX_MODE != 0 { + stat.Mode = uint16(attr.Mode) + } + if mask&linux.STATX_NLINK != 0 { + stat.Nlink = attr.Nlink + } + if mask&linux.STATX_UID != 0 { + stat.UID = attr.UID + } + if mask&linux.STATX_GID != 0 { + stat.GID = attr.GID + } + if mask&linux.STATX_ATIME != 0 { + stat.Atime = linux.StatxTimestamp{ + Sec: int64(attr.Atime), + Nsec: attr.AtimeNsec, + } + } + if mask&linux.STATX_MTIME != 0 { + stat.Mtime = linux.StatxTimestamp{ + Sec: int64(attr.Mtime), + Nsec: attr.MtimeNsec, + } + } + if mask&linux.STATX_CTIME != 0 { + stat.Ctime = linux.StatxTimestamp{ + Sec: int64(attr.Ctime), + Nsec: attr.CtimeNsec, + } + } + if mask&linux.STATX_INO != 0 { + stat.Ino = attr.Ino + } + if mask&linux.STATX_SIZE != 0 { + stat.Size = attr.Size + } + if mask&linux.STATX_BLOCKS != 0 { + stat.Blocks = attr.Blocks + } + return stat +} + +// Stat implements kernfs.Inode.Stat. +func (i *inode) Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) { + fusefs := fs.Impl().(*filesystem) + conn := fusefs.conn + task, creds := kernel.TaskFromContext(ctx), auth.CredentialsFromContext(ctx) + if task == nil { + log.Warningf("couldn't get kernel task from context") + return linux.Statx{}, syserror.EINVAL + } + + var in linux.FUSEGetAttrIn + // We don't set any attribute in the request, because in VFS2 fstat(2) will + // finally be translated into vfs.FilesystemImpl.StatAt() (see + // pkg/sentry/syscalls/linux/vfs2/stat.go), resulting in the same flow + // as stat(2). Thus GetAttrFlags and Fh variable will never be used in VFS2. + req, err := conn.NewRequest(creds, uint32(task.ThreadID()), i.Ino(), linux.FUSE_GETATTR, &in) + if err != nil { + return linux.Statx{}, err + } + + res, err := conn.Call(task, req) + if err != nil { + return linux.Statx{}, err + } + if err := res.Error(); err != nil { + return linux.Statx{}, err + } + + var out linux.FUSEGetAttrOut + if err := res.UnmarshalPayload(&out); err != nil { + return linux.Statx{}, err + } + + // Set all metadata into kernfs.InodeAttrs. + if err := i.SetStat(ctx, fs, creds, vfs.SetStatOptions{ + Stat: statFromFUSEAttr(out.Attr, linux.STATX_ALL, fusefs.devMinor), + }); err != nil { + return linux.Statx{}, err + } + + return statFromFUSEAttr(out.Attr, opts.Mask, fusefs.devMinor), nil +} diff --git a/test/fuse/BUILD b/test/fuse/BUILD index 34b950644..56157c96b 100644 --- a/test/fuse/BUILD +++ b/test/fuse/BUILD @@ -1 +1,9 @@ +load("//test/runner:defs.bzl", "syscall_test") + package(licenses = ["notice"]) + +syscall_test( + fuse = "True", + test = "//test/fuse/linux:stat_test", + vfs2 = "True", +) diff --git a/test/fuse/linux/BUILD b/test/fuse/linux/BUILD index 49dc96c20..4871bb531 100644 --- a/test/fuse/linux/BUILD +++ b/test/fuse/linux/BUILD @@ -1,20 +1,31 @@ -load("//tools:defs.bzl", "cc_library", "gtest") +load("//tools:defs.bzl", "cc_binary", "cc_library", "gtest") package( default_visibility = ["//:sandbox"], licenses = ["notice"], ) +cc_binary( + name = "stat_test", + testonly = 1, + srcs = ["stat_test.cc"], + deps = [ + gtest, + ":fuse_base", + "//test/util:test_main", + "//test/util:test_util", + ], +) + cc_library( name = "fuse_base", testonly = 1, - srcs = [ - "fuse_base.cc", - "fuse_base.h", - ], + srcs = ["fuse_base.cc"], + hdrs = ["fuse_base.h"], deps = [ gtest, "//test/util:posix_error", + "//test/util:temp_path", "//test/util:test_util", "@com_google_absl//absl/strings:str_format", ], diff --git a/test/fuse/linux/fuse_base.cc b/test/fuse/linux/fuse_base.cc index ce69276c9..4a2c64998 100644 --- a/test/fuse/linux/fuse_base.cc +++ b/test/fuse/linux/fuse_base.cc @@ -12,23 +12,23 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "fuse_base.h" +#include "test/fuse/linux/fuse_base.h" #include #include -#include #include #include #include +#include #include #include #include -#include "gmock/gmock.h" -#include "gtest/gtest.h" #include "absl/strings/str_format.h" +#include "gtest/gtest.h" #include "test/util/posix_error.h" +#include "test/util/temp_path.h" #include "test/util/test_util.h" namespace gvisor { @@ -78,13 +78,14 @@ void FuseTest::MountFuse() { EXPECT_THAT(dev_fd_ = open("/dev/fuse", O_RDWR), SyscallSucceeds()); std::string mount_opts = absl::StrFormat("fd=%d,%s", dev_fd_, kMountOpts); - EXPECT_THAT(mount("fuse", kMountPoint, "fuse", MS_NODEV | MS_NOSUID, - mount_opts.c_str()), + mount_point_ = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + EXPECT_THAT(mount("fuse", mount_point_.path().c_str(), "fuse", + MS_NODEV | MS_NOSUID, mount_opts.c_str()), SyscallSucceedsWithValue(0)); } void FuseTest::UnmountFuse() { - EXPECT_THAT(umount(kMountPoint), SyscallSucceeds()); + EXPECT_THAT(umount(mount_point_.path().c_str()), SyscallSucceeds()); // TODO(gvisor.dev/issue/3330): ensure the process is terminated successfully. } diff --git a/test/fuse/linux/fuse_base.h b/test/fuse/linux/fuse_base.h index b008778de..3a2f255a9 100644 --- a/test/fuse/linux/fuse_base.h +++ b/test/fuse/linux/fuse_base.h @@ -20,14 +20,13 @@ #include -#include "gmock/gmock.h" #include "gtest/gtest.h" #include "test/util/posix_error.h" +#include "test/util/temp_path.h" namespace gvisor { namespace testing { -constexpr char kMountPoint[] = "/mnt"; constexpr char kMountOpts[] = "rootmode=755,user_id=0,group_id=0"; class FuseTest : public ::testing::Test { @@ -55,6 +54,9 @@ class FuseTest : public ::testing::Test { // complains if the FUSE server responds failure during tests. void WaitCompleted(); + protected: + TempPath mount_point_; + private: void MountFuse(); void UnmountFuse(); diff --git a/test/fuse/linux/stat_test.cc b/test/fuse/linux/stat_test.cc new file mode 100644 index 000000000..172e09867 --- /dev/null +++ b/test/fuse/linux/stat_test.cc @@ -0,0 +1,169 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "gtest/gtest.h" +#include "test/fuse/linux/fuse_base.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +class StatTest : public FuseTest { + public: + bool CompareRequest(void* expected_mem, size_t expected_len, void* real_mem, + size_t real_len) override { + if (expected_len != real_len) return false; + struct fuse_in_header* real_header = + reinterpret_cast(real_mem); + + if (real_header->opcode != FUSE_GETATTR) { + std::cerr << "expect header opcode " << FUSE_GETATTR << " but got " + << real_header->opcode << std::endl; + return false; + } + return true; + } + + bool StatsAreEqual(struct stat expected, struct stat actual) { + // device number will be dynamically allocated by kernel, we cannot know + // in advance + actual.st_dev = expected.st_dev; + return memcmp(&expected, &actual, sizeof(struct stat)) == 0; + } +}; + +TEST_F(StatTest, StatNormal) { + struct iovec iov_in[2]; + struct iovec iov_out[2]; + + struct fuse_in_header in_header = { + .len = sizeof(struct fuse_in_header) + sizeof(struct fuse_getattr_in), + .opcode = FUSE_GETATTR, + .unique = 4, + .nodeid = 1, + .uid = 0, + .gid = 0, + .pid = 4, + .padding = 0, + }; + struct fuse_getattr_in in_payload = {0}; + iov_in[0].iov_len = sizeof(in_header); + iov_in[0].iov_base = &in_header; + iov_in[1].iov_len = sizeof(in_payload); + iov_in[1].iov_base = &in_payload; + + mode_t expected_mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; + struct timespec atime = {.tv_sec = 1595436289, .tv_nsec = 134150844}; + struct timespec mtime = {.tv_sec = 1595436290, .tv_nsec = 134150845}; + struct timespec ctime = {.tv_sec = 1595436291, .tv_nsec = 134150846}; + struct fuse_out_header out_header = { + .len = sizeof(struct fuse_out_header) + sizeof(struct fuse_attr_out), + .error = 0, + .unique = 4, + }; + struct fuse_attr attr = { + .ino = 1, + .size = 512, + .blocks = 4, + .atime = static_cast(atime.tv_sec), + .mtime = static_cast(mtime.tv_sec), + .ctime = static_cast(ctime.tv_sec), + .atimensec = static_cast(atime.tv_nsec), + .mtimensec = static_cast(mtime.tv_nsec), + .ctimensec = static_cast(ctime.tv_nsec), + .mode = expected_mode, + .nlink = 2, + .uid = 1234, + .gid = 4321, + .rdev = 12, + .blksize = 4096, + }; + struct fuse_attr_out out_payload = { + .attr = attr, + }; + iov_out[0].iov_len = sizeof(out_header); + iov_out[0].iov_base = &out_header; + iov_out[1].iov_len = sizeof(out_payload); + iov_out[1].iov_base = &out_payload; + + SetExpected(iov_in, 2, iov_out, 2); + + struct stat stat_buf; + EXPECT_THAT(stat(mount_point_.path().c_str(), &stat_buf), SyscallSucceeds()); + + struct stat expected_stat = { + .st_ino = attr.ino, + .st_nlink = attr.nlink, + .st_mode = expected_mode, + .st_uid = attr.uid, + .st_gid = attr.gid, + .st_rdev = attr.rdev, + .st_size = static_cast(attr.size), + .st_blksize = attr.blksize, + .st_blocks = static_cast(attr.blocks), + .st_atim = atime, + .st_mtim = mtime, + .st_ctim = ctime, + }; + EXPECT_TRUE(StatsAreEqual(stat_buf, expected_stat)); + WaitCompleted(); +} + +TEST_F(StatTest, StatNotFound) { + struct iovec iov_in[2]; + struct iovec iov_out[2]; + + struct fuse_in_header in_header = { + .len = sizeof(struct fuse_in_header) + sizeof(struct fuse_getattr_in), + .opcode = FUSE_GETATTR, + .unique = 4, + }; + struct fuse_getattr_in in_payload = {0}; + iov_in[0].iov_len = sizeof(in_header); + iov_in[0].iov_base = &in_header; + iov_in[1].iov_len = sizeof(in_payload); + iov_in[1].iov_base = &in_payload; + + struct fuse_out_header out_header = { + .len = sizeof(struct fuse_out_header), + .error = -ENOENT, + .unique = 4, + }; + iov_out[0].iov_len = sizeof(out_header); + iov_out[0].iov_base = &out_header; + + SetExpected(iov_in, 2, iov_out, 1); + + struct stat stat_buf; + EXPECT_THAT(stat(mount_point_.path().c_str(), &stat_buf), + SyscallFailsWithErrno(ENOENT)); + WaitCompleted(); +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/runner/defs.bzl b/test/runner/defs.bzl index ba4732ca6..1ae13a4a5 100644 --- a/test/runner/defs.bzl +++ b/test/runner/defs.bzl @@ -150,31 +150,12 @@ def syscall_test( if not tags: tags = [] - _syscall_test( - test = test, - shard_count = shard_count, - size = size, - platform = "native", - use_tmpfs = False, - add_uds_tree = add_uds_tree, - tags = list(tags), - ) - - for (platform, platform_tags) in platforms.items(): - _syscall_test( - test = test, - shard_count = shard_count, - size = size, - platform = platform, - use_tmpfs = use_tmpfs, - add_uds_tree = add_uds_tree, - tags = platform_tags + tags, - ) - vfs2_tags = list(tags) if vfs2: # Add tag to easily run VFS2 tests with --test_tag_filters=vfs2 vfs2_tags.append("vfs2") + if fuse: + vfs2_tags.append("fuse") else: # Don't automatically run tests tests not yet passing. @@ -191,19 +172,31 @@ def syscall_test( add_uds_tree = add_uds_tree, tags = platforms[default_platform] + vfs2_tags, vfs2 = True, + fuse = fuse, + ) + if fuse: + # Only generate *_vfs2_fuse target if fuse parameter is enabled. + return + + _syscall_test( + test = test, + shard_count = shard_count, + size = size, + platform = "native", + use_tmpfs = False, + add_uds_tree = add_uds_tree, + tags = list(tags), ) - if vfs2 and fuse: + for (platform, platform_tags) in platforms.items(): _syscall_test( test = test, shard_count = shard_count, size = size, - platform = default_platform, + platform = platform, use_tmpfs = use_tmpfs, add_uds_tree = add_uds_tree, - tags = platforms[default_platform] + vfs2_tags + ["fuse"], - vfs2 = True, - fuse = True, + tags = platform_tags + tags, ) # TODO(gvisor.dev/issue/1487): Enable VFS2 overlay tests. -- cgit v1.2.3 From 89f3197fc368702cbe1842932d9cfbede250269f Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Tue, 11 Aug 2020 10:35:46 -0700 Subject: Mark integration tests as passing in VFS2 except CheckpointRestore. Mark all tests passing for VFS2 in: image_test integration_test There's no way to do negative look ahead/behind in golang test regex, so check if the tests uses VFS2 and skip CheckPointRestore if it does. PiperOrigin-RevId: 326050915 --- Makefile | 6 +---- pkg/test/dockerutil/dockerutil.go | 54 ++++++++++++++++++++++++++++++--------- scripts/docker_tests.sh | 5 +--- test/e2e/integration_test.go | 7 +++++ 4 files changed, 51 insertions(+), 21 deletions(-) (limited to 'test') diff --git a/Makefile b/Makefile index d62567939..fdbc6fb41 100644 --- a/Makefile +++ b/Makefile @@ -166,15 +166,11 @@ do-tests: runsc simple-tests: unit-tests # Compatibility target. .PHONY: simple-tests -# Keep these in sync with //scripts/docker_tests.sh. -IMAGE_FILTER := HelloWorld\|Httpd\|Ruby\|Stdio -INTEGRATION_FILTER := Life\|Pause\|Connect\|JobControl\|Overlay\|Exec\|DirCreation\|Link - docker-tests: load-basic-images @$(call submake,install-test-runtime RUNTIME="vfs1") @$(call submake,test-runtime RUNTIME="vfs1" TARGETS="$(INTEGRATION_TARGETS)") @$(call submake,install-test-runtime RUNTIME="vfs2" ARGS="--vfs2") - @$(call submake,test-runtime RUNTIME="vfs2" OPTIONS="--test_filter=$(IMAGE_FILTER)\|$(INTEGRATION_FILTER)" TARGETS="$(INTEGRATION_TARGETS)") + @$(call submake,test-runtime RUNTIME="vfs2" TARGETS="$(INTEGRATION_TARGETS)") .PHONY: docker-tests overlay-tests: load-basic-images diff --git a/pkg/test/dockerutil/dockerutil.go b/pkg/test/dockerutil/dockerutil.go index 5a9dd8bd8..952871f95 100644 --- a/pkg/test/dockerutil/dockerutil.go +++ b/pkg/test/dockerutil/dockerutil.go @@ -88,44 +88,74 @@ func EnsureSupportedDockerVersion() { // RuntimePath returns the binary path for the current runtime. func RuntimePath() (string, error) { + rs, err := runtimeMap() + if err != nil { + return "", err + } + + p, ok := rs["path"].(string) + if !ok { + // The runtime does not declare a path. + return "", fmt.Errorf("runtime does not declare a path: %v", rs) + } + return p, nil +} + +// UsingVFS2 returns true if the 'runtime' has the vfs2 flag set. +// TODO(gvisor.dev/issue/1624): Remove. +func UsingVFS2() (bool, error) { + rMap, err := runtimeMap() + if err != nil { + return false, err + } + + list, ok := rMap["runtimeArgs"].([]interface{}) + if !ok { + return false, fmt.Errorf("unexpected format: %v", rMap) + } + + for _, element := range list { + if element == "--vfs2" { + return true, nil + } + } + return false, nil +} + +func runtimeMap() (map[string]interface{}, error) { // Read the configuration data; the file must exist. configBytes, err := ioutil.ReadFile(*config) if err != nil { - return "", err + return nil, err } // Unmarshal the configuration. c := make(map[string]interface{}) if err := json.Unmarshal(configBytes, &c); err != nil { - return "", err + return nil, err } // Decode the expected configuration. r, ok := c["runtimes"] if !ok { - return "", fmt.Errorf("no runtimes declared: %v", c) + return nil, fmt.Errorf("no runtimes declared: %v", c) } rs, ok := r.(map[string]interface{}) if !ok { // The runtimes are not a map. - return "", fmt.Errorf("unexpected format: %v", c) + return nil, fmt.Errorf("unexpected format: %v", rs) } r, ok = rs[*runtime] if !ok { // The expected runtime is not declared. - return "", fmt.Errorf("runtime %q not found: %v", *runtime, c) + return nil, fmt.Errorf("runtime %q not found: %v", *runtime, rs) } rs, ok = r.(map[string]interface{}) if !ok { // The runtime is not a map. - return "", fmt.Errorf("unexpected format: %v", c) - } - p, ok := rs["path"].(string) - if !ok { - // The runtime does not declare a path. - return "", fmt.Errorf("unexpected format: %v", c) + return nil, fmt.Errorf("unexpected format: %v", r) } - return p, nil + return rs, nil } // Save exports a container image to the given Writer. diff --git a/scripts/docker_tests.sh b/scripts/docker_tests.sh index ce3ffa2eb..4f3867d05 100755 --- a/scripts/docker_tests.sh +++ b/scripts/docker_tests.sh @@ -22,7 +22,4 @@ install_runsc_for_test docker test_runsc //test/image:image_test //test/e2e:integration_test install_runsc_for_test docker --vfs2 -# Sync with //Makefile. -IMAGE_FILTER="Hello|Httpd|Ruby|Stdio" -INTEGRATION_FILTER="LifeCycle|Pause|Connect|JobControl|Overlay|Exec|DirCreation|Link" -test_runsc //test/e2e:integration_test //test/image:image_test --test_filter="${IMAGE_FILTER}|${INTEGRATION_FILTER}" +test_runsc //test/e2e:integration_test //test/image:image_test diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go index 71ec4791e..809244bab 100644 --- a/test/e2e/integration_test.go +++ b/test/e2e/integration_test.go @@ -167,6 +167,13 @@ func TestCheckpointRestore(t *testing.T) { t.Skip("Pause/resume is not supported.") } + // TODO(gvisor.dev/issue/3373): Remove after implementing. + if usingVFS2, err := dockerutil.UsingVFS2(); usingVFS2 { + t.Skip("CheckpointRestore not implemented in VFS2.") + } else if err != nil { + t.Fatalf("failed to read config for runtime %s: %v", dockerutil.Runtime(), err) + } + ctx := context.Background() d := dockerutil.MakeContainer(ctx, t) defer d.CleanUp(ctx) -- cgit v1.2.3 From 00b684ea7fd96beb32d1517dd06c5d736621ff70 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Wed, 12 Aug 2020 14:21:04 -0700 Subject: Limit the scope when deleting test log directory on success The code was deleting logs for all tests when a single test passed. Change it to delete only the logs relevant to the test at hand. Also fixed the benchmark lookup code, which was always generating a single empty benchmark entry if there were not benchmarks. PiperOrigin-RevId: 326311477 --- test/runner/gtest/gtest.go | 8 +++++--- test/runner/runner.go | 31 ++++++++++++++++--------------- 2 files changed, 21 insertions(+), 18 deletions(-) (limited to 'test') diff --git a/test/runner/gtest/gtest.go b/test/runner/gtest/gtest.go index 869169ad5..e4445e01b 100644 --- a/test/runner/gtest/gtest.go +++ b/test/runner/gtest/gtest.go @@ -146,10 +146,13 @@ func ParseTestCases(testBin string, benchmarks bool, extraArgs ...string) ([]Tes return nil, fmt.Errorf("could not enumerate gtest benchmarks: %v\nstderr\n%s", err, exitErr.Stderr) } - out = []byte(strings.Trim(string(out), "\n")) + benches := strings.Trim(string(out), "\n") + if len(benches) == 0 { + return t, nil + } // Parse benchmark output. - for _, line := range strings.Split(string(out), "\n") { + for _, line := range strings.Split(benches, "\n") { // Strip comments. line = strings.Split(line, "#")[0] @@ -163,6 +166,5 @@ func ParseTestCases(testBin string, benchmarks bool, extraArgs ...string) ([]Tes benchmark: true, }) } - return t, nil } diff --git a/test/runner/runner.go b/test/runner/runner.go index bc4b39cbb..5ac91310d 100644 --- a/test/runner/runner.go +++ b/test/runner/runner.go @@ -172,13 +172,14 @@ func runRunsc(tc gtest.TestCase, spec *specs.Spec) error { args = append(args, "-fsgofer-host-uds") } - undeclaredOutputsDir, ok := syscall.Getenv("TEST_UNDECLARED_OUTPUTS_DIR") - if ok { - tdir := filepath.Join(undeclaredOutputsDir, strings.Replace(name, "/", "_", -1)) - if err := os.MkdirAll(tdir, 0755); err != nil { + testLogDir := "" + if undeclaredOutputsDir, ok := syscall.Getenv("TEST_UNDECLARED_OUTPUTS_DIR"); ok { + // Create log directory dedicated for this test. + testLogDir = filepath.Join(undeclaredOutputsDir, strings.Replace(name, "/", "_", -1)) + if err := os.MkdirAll(testLogDir, 0755); err != nil { return fmt.Errorf("could not create test dir: %v", err) } - debugLogDir, err := ioutil.TempDir(tdir, "runsc") + debugLogDir, err := ioutil.TempDir(testLogDir, "runsc") if err != nil { return fmt.Errorf("could not create temp dir: %v", err) } @@ -227,10 +228,10 @@ func runRunsc(tc gtest.TestCase, spec *specs.Spec) error { dArgs := append([]string{}, args...) dArgs = append(dArgs, "-alsologtostderr=true", "debug", "--stacks", id) go func(dArgs []string) { - cmd := exec.Command(*runscPath, dArgs...) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - cmd.Run() + debug := exec.Command(*runscPath, dArgs...) + debug.Stdout = os.Stdout + debug.Stderr = os.Stderr + debug.Run() done <- true }(dArgs) @@ -245,17 +246,17 @@ func runRunsc(tc gtest.TestCase, spec *specs.Spec) error { dArgs = append(args, "debug", fmt.Sprintf("--signal=%d", syscall.SIGTERM), id) - cmd := exec.Command(*runscPath, dArgs...) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - cmd.Run() + signal := exec.Command(*runscPath, dArgs...) + signal.Stdout = os.Stdout + signal.Stderr = os.Stderr + signal.Run() }() err = cmd.Run() - if err == nil { + if err == nil && len(testLogDir) > 0 { // If the test passed, then we erase the log directory. This speeds up // uploading logs in continuous integration & saves on disk space. - os.RemoveAll(undeclaredOutputsDir) + os.RemoveAll(testLogDir) } return err -- cgit v1.2.3 From 42b610d56750b4bb8e3d69b680e4fb538f8fb554 Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Wed, 12 Aug 2020 17:17:17 -0700 Subject: [vfs2][gofer] Return appropriate errors when opening and creating files. Fixes php test ext/standard/tests/file/touch_variation5.phpt on vfs2. Updates #3516 Also spotted a bug with O_EXCL, where we did not return EEXIST when we tried to open the root of the filesystem with O_EXCL | O_CREAT. Added some more tests for open() corner cases. PiperOrigin-RevId: 326346863 --- pkg/sentry/fsimpl/gofer/filesystem.go | 11 +++++++++++ pkg/sentry/fsimpl/tmpfs/filesystem.go | 2 +- test/syscalls/linux/open.cc | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index eaef2594d..40fec890a 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -844,6 +844,13 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf } } if rp.Done() { + // Reject attempts to open mount root directory with O_CREAT. + if mayCreate && rp.MustBeDir() { + return nil, syserror.EISDIR + } + if mustCreate { + return nil, syserror.EEXIST + } return start.openLocked(ctx, rp, &opts) } @@ -856,6 +863,10 @@ afterTrailingSymlink: if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { return nil, err } + // Reject attempts to open directories with O_CREAT. + if mayCreate && rp.MustBeDir() { + return nil, syserror.EISDIR + } // Determine whether or not we need to create a file. parent.dirMu.Lock() child, err := fs.stepLocked(ctx, rp, parent, false /* mayFollowSymlinks */, &ds) diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 065812065..a4864df53 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -320,7 +320,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf fs.mu.Lock() defer fs.mu.Unlock() if rp.Done() { - // Reject attempts to open directories with O_CREAT. + // Reject attempts to open mount root directory with O_CREAT. if rp.MustBeDir() { return nil, syserror.EISDIR } diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc index bf350946b..c7147c20b 100644 --- a/test/syscalls/linux/open.cc +++ b/test/syscalls/linux/open.cc @@ -95,6 +95,38 @@ TEST_F(OpenTest, OTruncAndReadOnlyFile) { Open(dirpath.c_str(), O_TRUNC | O_RDONLY, 0666)); } +TEST_F(OpenTest, OCreateDirectory) { + SKIP_IF(IsRunningWithVFS1()); + auto dirpath = GetAbsoluteTestTmpdir(); + + // Normal case: existing directory. + ASSERT_THAT(open(dirpath.c_str(), O_RDWR | O_CREAT, 0666), + SyscallFailsWithErrno(EISDIR)); + // Trailing separator on existing directory. + ASSERT_THAT(open(dirpath.append("/").c_str(), O_RDWR | O_CREAT, 0666), + SyscallFailsWithErrno(EISDIR)); + // Trailing separator on non-existing directory. + ASSERT_THAT(open(JoinPath(dirpath, "non-existent").append("/").c_str(), + O_RDWR | O_CREAT, 0666), + SyscallFailsWithErrno(EISDIR)); + // "." special case. + ASSERT_THAT(open(JoinPath(dirpath, ".").c_str(), O_RDWR | O_CREAT, 0666), + SyscallFailsWithErrno(EISDIR)); +} + +TEST_F(OpenTest, MustCreateExisting) { + auto dirPath = GetAbsoluteTestTmpdir(); + + // Existing directory. + ASSERT_THAT(open(dirPath.c_str(), O_RDWR | O_CREAT | O_EXCL, 0666), + SyscallFailsWithErrno(EEXIST)); + + // Existing file. + auto newFile = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dirPath)); + ASSERT_THAT(open(newFile.path().c_str(), O_RDWR | O_CREAT | O_EXCL, 0666), + SyscallFailsWithErrno(EEXIST)); +} + TEST_F(OpenTest, ReadOnly) { char buf; const FileDescriptor ro_file = -- cgit v1.2.3 From b928d074b461c6f2578c989e48adadc951ed3154 Mon Sep 17 00:00:00 2001 From: Bhasker Hariharan Date: Thu, 13 Aug 2020 09:02:18 -0700 Subject: Ensure TCP TIME-WAIT is not terminated prematurely. Netstack's TIME-WAIT state for a TCP socket could be terminated prematurely if the socket entered TIME-WAIT using shutdown(..., SHUT_RDWR) and then was closed using close(). This fixes that bug and updates the tests to verify that Netstack correctly honors TIME-WAIT under such conditions. Fixes #3106 PiperOrigin-RevId: 326456443 --- pkg/tcpip/header/ipv4.go | 9 +++ pkg/tcpip/header/ipv6.go | 3 + pkg/tcpip/tcpip.go | 20 +++++ pkg/tcpip/transport/tcp/connect.go | 2 +- pkg/tcpip/transport/tcp/endpoint.go | 73 ++++++++++++++--- pkg/tcpip/transport/tcp/endpoint_state.go | 10 +++ pkg/tcpip/transport/tcp/protocol.go | 33 ++++++-- pkg/tcpip/transport/tcp/tcp_test.go | 50 ++++++++++++ test/syscalls/linux/socket_generic_stress.cc | 49 +++++++++++- test/syscalls/linux/socket_inet_loopback.cc | 112 ++++++++++++++++++++++++++- test/syscalls/linux/tcp_socket.cc | 24 ++++++ 11 files changed, 362 insertions(+), 23 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/header/ipv4.go b/pkg/tcpip/header/ipv4.go index d0d1efd0d..680eafd16 100644 --- a/pkg/tcpip/header/ipv4.go +++ b/pkg/tcpip/header/ipv4.go @@ -315,3 +315,12 @@ func IsV4MulticastAddress(addr tcpip.Address) bool { } return (addr[0] & 0xf0) == 0xe0 } + +// IsV4LoopbackAddress determines if the provided address is an IPv4 loopback +// address (belongs to 127.0.0.1/8 subnet). +func IsV4LoopbackAddress(addr tcpip.Address) bool { + if len(addr) != IPv4AddressSize { + return false + } + return addr[0] == 0x7f +} diff --git a/pkg/tcpip/header/ipv6.go b/pkg/tcpip/header/ipv6.go index 4f367fe4c..ea3823898 100644 --- a/pkg/tcpip/header/ipv6.go +++ b/pkg/tcpip/header/ipv6.go @@ -98,6 +98,9 @@ const ( // section 5. IPv6MinimumMTU = 1280 + // IPv6Loopback is the IPv6 Loopback address. + IPv6Loopback tcpip.Address = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01" + // IPv6Any is the non-routable IPv6 "any" meta address. It is also // known as the unspecified address. IPv6Any tcpip.Address = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 091bc5281..07c85ce59 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -958,6 +958,26 @@ type SocketDetachFilterOption int // and port of a redirected packet. type OriginalDestinationOption FullAddress +// TCPTimeWaitReuseOption is used stack.(*Stack).TransportProtocolOption to +// specify if the stack can reuse the port bound by an endpoint in TIME-WAIT for +// new connections when it is safe from protocol viewpoint. +type TCPTimeWaitReuseOption uint8 + +const ( + // TCPTimeWaitReuseDisabled indicates reuse of port bound by endponts in TIME-WAIT cannot + // be reused for new connections. + TCPTimeWaitReuseDisabled TCPTimeWaitReuseOption = iota + + // TCPTimeWaitReuseGlobal indicates reuse of port bound by endponts in TIME-WAIT can + // be reused for new connections irrespective of the src/dest addresses. + TCPTimeWaitReuseGlobal + + // TCPTimeWaitReuseLoopbackOnly indicates reuse of port bound by endpoint in TIME-WAIT can + // only be reused if the connection was a connection over loopback. i.e src/dest adddresses + // are loopback addresses. + TCPTimeWaitReuseLoopbackOnly +) + // IPPacketInfo is the message structure for IP_PKTINFO. // // +stateify savable diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index 8dd759ba2..46702906b 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -1706,7 +1706,7 @@ func (e *endpoint) doTimeWait() (twReuse func()) { } case notification: n := e.fetchNotifications() - if n¬ifyClose != 0 || n¬ifyAbort != 0 { + if n¬ifyAbort != 0 { return nil } if n¬ifyDrain != 0 { diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index b8b52b03d..d08cfe0ff 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -449,10 +449,11 @@ type endpoint struct { // recentTS is the timestamp that should be sent in the TSEcr field of // the timestamp for future segments sent by the endpoint. This field is // updated if required when a new segment is received by this endpoint. - // - // recentTS must be read/written atomically. recentTS uint32 + // recentTSTime is the unix time when we updated recentTS last. + recentTSTime time.Time `state:".(unixTime)"` + // tsOffset is a randomized offset added to the value of the // TSVal field in the timestamp option. tsOffset uint32 @@ -795,15 +796,15 @@ func (e *endpoint) EndpointState() EndpointState { return EndpointState(atomic.LoadUint32((*uint32)(&e.state))) } -// setRecentTimestamp atomically sets the recentTS field to the -// provided value. +// setRecentTimestamp sets the recentTS field to the provided value. func (e *endpoint) setRecentTimestamp(recentTS uint32) { - atomic.StoreUint32(&e.recentTS, recentTS) + e.recentTS = recentTS + e.recentTSTime = time.Now() } -// recentTimestamp atomically reads and returns the value of the recentTS field. +// recentTimestamp returns the value of the recentTS field. func (e *endpoint) recentTimestamp() uint32 { - return atomic.LoadUint32(&e.recentTS) + return e.recentTS } // keepalive is a synchronization wrapper used to appease stateify. See the @@ -902,7 +903,7 @@ func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask { case StateInitial, StateBound, StateConnecting, StateSynSent, StateSynRecv: // Ready for nothing. - case StateClose, StateError: + case StateClose, StateError, StateTimeWait: // Ready for anything. result = mask @@ -2148,12 +2149,66 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc h.Write(portBuf) portOffset := h.Sum32() + var twReuse tcpip.TCPTimeWaitReuseOption + if err := e.stack.TransportProtocolOption(ProtocolNumber, &twReuse); err != nil { + panic(fmt.Sprintf("e.stack.TransportProtocolOption(%d, %#v) = %s", ProtocolNumber, &twReuse, err)) + } + + reuse := twReuse == tcpip.TCPTimeWaitReuseGlobal + if twReuse == tcpip.TCPTimeWaitReuseLoopbackOnly { + switch netProto { + case header.IPv4ProtocolNumber: + reuse = header.IsV4LoopbackAddress(e.ID.LocalAddress) && header.IsV4LoopbackAddress(e.ID.RemoteAddress) + case header.IPv6ProtocolNumber: + reuse = e.ID.LocalAddress == header.IPv6Loopback && e.ID.RemoteAddress == header.IPv6Loopback + } + } + if _, err := e.stack.PickEphemeralPortStable(portOffset, func(p uint16) (bool, *tcpip.Error) { if sameAddr && p == e.ID.RemotePort { return false, nil } if _, err := e.stack.ReservePort(netProtos, ProtocolNumber, e.ID.LocalAddress, p, e.portFlags, e.bindToDevice, addr); err != nil { - return false, nil + if err != tcpip.ErrPortInUse || !reuse { + return false, nil + } + transEPID := e.ID + transEPID.LocalPort = p + // Check if an endpoint is registered with demuxer in TIME-WAIT and if + // we can reuse it. If we can't find a transport endpoint then we just + // skip using this port as it's possible that either an endpoint has + // bound the port but not registered with demuxer yet (no listen/connect + // done yet) or the reservation was freed between the check above and + // the FindTransportEndpoint below. But rather than retry the same port + // we just skip it and move on. + transEP := e.stack.FindTransportEndpoint(netProto, ProtocolNumber, transEPID, &r) + if transEP == nil { + // ReservePort failed but there is no registered endpoint with + // demuxer. Which indicates there is at least some endpoint that has + // bound the port. + return false, nil + } + + tcpEP := transEP.(*endpoint) + tcpEP.LockUser() + // If the endpoint is not in TIME-WAIT or if it is in TIME-WAIT but + // less than 1 second has elapsed since its recentTS was updated then + // we cannot reuse the port. + if tcpEP.EndpointState() != StateTimeWait || time.Since(tcpEP.recentTSTime) < 1*time.Second { + tcpEP.UnlockUser() + return false, nil + } + // Since the endpoint is in TIME-WAIT it should be safe to acquire its + // Lock while holding the lock for this endpoint as endpoints in + // TIME-WAIT do not acquire locks on other endpoints. + tcpEP.workerCleanup = false + tcpEP.cleanupLocked() + tcpEP.notifyProtocolGoroutine(notifyAbort) + tcpEP.UnlockUser() + // Now try and Reserve again if it fails then we skip. + if _, err := e.stack.ReservePort(netProtos, ProtocolNumber, e.ID.LocalAddress, p, e.portFlags, e.bindToDevice, addr); err != nil { + return false, nil + } } id := e.ID diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go index abf1ac5c9..723e47ddc 100644 --- a/pkg/tcpip/transport/tcp/endpoint_state.go +++ b/pkg/tcpip/transport/tcp/endpoint_state.go @@ -309,6 +309,16 @@ func (e *endpoint) loadLastError(s string) { e.lastError = tcpip.StringToError(s) } +// saveRecentTSTime is invoked by stateify. +func (e *endpoint) saveRecentTSTime() unixTime { + return unixTime{e.recentTSTime.Unix(), e.recentTSTime.UnixNano()} +} + +// loadRecentTSTime is invoked by stateify. +func (e *endpoint) loadRecentTSTime(unix unixTime) { + e.recentTSTime = time.Unix(unix.second, unix.nano) +} + // saveHardError is invoked by stateify. func (e *EndpointInfo) saveHardError() string { if e.HardError == nil { diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go index 2e5093b36..49a673b42 100644 --- a/pkg/tcpip/transport/tcp/protocol.go +++ b/pkg/tcpip/transport/tcp/protocol.go @@ -191,8 +191,9 @@ type protocol struct { congestionControl string availableCongestionControl []string moderateReceiveBuffer bool - tcpLingerTimeout time.Duration - tcpTimeWaitTimeout time.Duration + lingerTimeout time.Duration + timeWaitTimeout time.Duration + timeWaitReuse tcpip.TCPTimeWaitReuseOption minRTO time.Duration maxRTO time.Duration maxRetries uint32 @@ -358,7 +359,7 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error { v = 0 } p.mu.Lock() - p.tcpLingerTimeout = time.Duration(v) + p.lingerTimeout = time.Duration(v) p.mu.Unlock() return nil @@ -367,7 +368,16 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error { v = 0 } p.mu.Lock() - p.tcpTimeWaitTimeout = time.Duration(v) + p.timeWaitTimeout = time.Duration(v) + p.mu.Unlock() + return nil + + case tcpip.TCPTimeWaitReuseOption: + if v < tcpip.TCPTimeWaitReuseDisabled || v > tcpip.TCPTimeWaitReuseLoopbackOnly { + return tcpip.ErrInvalidOptionValue + } + p.mu.Lock() + p.timeWaitReuse = v p.mu.Unlock() return nil @@ -468,13 +478,19 @@ func (p *protocol) Option(option interface{}) *tcpip.Error { case *tcpip.TCPLingerTimeoutOption: p.mu.RLock() - *v = tcpip.TCPLingerTimeoutOption(p.tcpLingerTimeout) + *v = tcpip.TCPLingerTimeoutOption(p.lingerTimeout) p.mu.RUnlock() return nil case *tcpip.TCPTimeWaitTimeoutOption: p.mu.RLock() - *v = tcpip.TCPTimeWaitTimeoutOption(p.tcpTimeWaitTimeout) + *v = tcpip.TCPTimeWaitTimeoutOption(p.timeWaitTimeout) + p.mu.RUnlock() + return nil + + case *tcpip.TCPTimeWaitReuseOption: + p.mu.RLock() + *v = tcpip.TCPTimeWaitReuseOption(p.timeWaitReuse) p.mu.RUnlock() return nil @@ -564,8 +580,9 @@ func NewProtocol() stack.TransportProtocol { }, congestionControl: ccReno, availableCongestionControl: []string{ccReno, ccCubic}, - tcpLingerTimeout: DefaultTCPLingerTimeout, - tcpTimeWaitTimeout: DefaultTCPTimeWaitTimeout, + lingerTimeout: DefaultTCPLingerTimeout, + timeWaitTimeout: DefaultTCPTimeWaitTimeout, + timeWaitReuse: tcpip.TCPTimeWaitReuseLoopbackOnly, synRcvdCount: synRcvdCounter{threshold: SynRcvdCountThreshold}, synRetries: DefaultSynRetries, minRTO: MinRTO, diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 1b58eb91b..0f7e958e4 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -7297,3 +7297,53 @@ func TestResetDuringClose(t *testing.T) { wg.Wait() } + +func TestStackTimeWaitReuse(t *testing.T) { + c := context.New(t, defaultMTU) + defer c.Cleanup() + + s := c.Stack() + var twReuse tcpip.TCPTimeWaitReuseOption + if err := s.TransportProtocolOption(tcp.ProtocolNumber, &twReuse); err != nil { + t.Fatalf("s.TransportProtocolOption(%v, %v) = %v", tcp.ProtocolNumber, &twReuse, err) + } + if got, want := twReuse, tcpip.TCPTimeWaitReuseLoopbackOnly; got != want { + t.Fatalf("got tcpip.TCPTimeWaitReuseOption: %v, want: %v", got, want) + } +} + +func TestSetStackTimeWaitReuse(t *testing.T) { + c := context.New(t, defaultMTU) + defer c.Cleanup() + + s := c.Stack() + testCases := []struct { + v int + err *tcpip.Error + }{ + {int(tcpip.TCPTimeWaitReuseDisabled), nil}, + {int(tcpip.TCPTimeWaitReuseGlobal), nil}, + {int(tcpip.TCPTimeWaitReuseLoopbackOnly), nil}, + {int(tcpip.TCPTimeWaitReuseLoopbackOnly) + 1, tcpip.ErrInvalidOptionValue}, + {int(tcpip.TCPTimeWaitReuseDisabled) - 1, tcpip.ErrInvalidOptionValue}, + } + + for _, tc := range testCases { + err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPTimeWaitReuseOption(tc.v)) + if got, want := err, tc.err; got != want { + t.Fatalf("s.TransportProtocolOption(%v, %v) = %v, want %v", tcp.ProtocolNumber, tc.v, err, tc.err) + } + if tc.err != nil { + continue + } + + var twReuse tcpip.TCPTimeWaitReuseOption + if err := s.TransportProtocolOption(tcp.ProtocolNumber, &twReuse); err != nil { + t.Fatalf("s.TransportProtocolOption(%v, %v) = %v, want nil", tcp.ProtocolNumber, &twReuse, err) + } + + if got, want := twReuse, tcpip.TCPTimeWaitReuseOption(tc.v); got != want { + t.Fatalf("got tcpip.TCPTimeWaitReuseOption: %v, want: %v", got, want) + } + } +} diff --git a/test/syscalls/linux/socket_generic_stress.cc b/test/syscalls/linux/socket_generic_stress.cc index 6a232238d..19239e9e9 100644 --- a/test/syscalls/linux/socket_generic_stress.cc +++ b/test/syscalls/linux/socket_generic_stress.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -37,6 +38,14 @@ TEST_P(ConnectStressTest, Reset65kTimes) { char sent_data[100] = {}; ASSERT_THAT(write(sockets->first_fd(), sent_data, sizeof(sent_data)), SyscallSucceedsWithValue(sizeof(sent_data))); + // Poll the other FD to make sure that the data is in the receive buffer + // before closing it to ensure a RST is triggered. + const int kTimeout = 10000; + struct pollfd pfd = { + .fd = sockets->second_fd(), + .events = POLL_IN, + }; + ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1)); } } @@ -58,7 +67,45 @@ INSTANTIATE_TEST_SUITE_P( // a persistent listener (if applicable). using PersistentListenerConnectStressTest = SocketPairTest; -TEST_P(PersistentListenerConnectStressTest, 65kTimes) { +TEST_P(PersistentListenerConnectStressTest, 65kTimesShutdownCloseFirst) { + for (int i = 0; i < 1 << 16; ++i) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(shutdown(sockets->first_fd(), SHUT_RDWR), SyscallSucceeds()); + if (GetParam().type == SOCK_STREAM) { + // Poll the other FD to make sure that we see the FIN from the other + // side before closing the second_fd. This ensures that the first_fd + // enters TIME-WAIT and not second_fd. + const int kTimeout = 10000; + struct pollfd pfd = { + .fd = sockets->second_fd(), + .events = POLL_IN, + }; + ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1)); + } + ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_RDWR), SyscallSucceeds()); + } +} + +TEST_P(PersistentListenerConnectStressTest, 65kTimesShutdownCloseSecond) { + for (int i = 0; i < 1 << 16; ++i) { + auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + ASSERT_THAT(shutdown(sockets->second_fd(), SHUT_RDWR), SyscallSucceeds()); + if (GetParam().type == SOCK_STREAM) { + // Poll the other FD to make sure that we see the FIN from the other + // side before closing the first_fd. This ensures that the second_fd + // enters TIME-WAIT and not first_fd. + const int kTimeout = 10000; + struct pollfd pfd = { + .fd = sockets->first_fd(), + .events = POLL_IN, + }; + ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1)); + } + ASSERT_THAT(shutdown(sockets->first_fd(), SHUT_RDWR), SyscallSucceeds()); + } +} + +TEST_P(PersistentListenerConnectStressTest, 65kTimesClose) { for (int i = 0; i < 1 << 16; ++i) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); } diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 18b9e4b70..c3b42682f 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -865,7 +865,7 @@ TEST_P(SocketInetLoopbackTest, TCPResetAfterClose) { // results in the stack.Seed() being different which can cause // sequence number of final connect to be one that is considered // old and can cause the test to be flaky. -TEST_P(SocketInetLoopbackTest, TCPTimeWaitTest_NoRandomSave) { +TEST_P(SocketInetLoopbackTest, TCPPassiveCloseNoTimeWaitTest_NoRandomSave) { auto const& param = GetParam(); TestAddress const& listener = param.listener; TestAddress const& connector = param.connector; @@ -920,14 +920,27 @@ TEST_P(SocketInetLoopbackTest, TCPTimeWaitTest_NoRandomSave) { &conn_addrlen), SyscallSucceeds()); - // close the accept FD to trigger TIME_WAIT on the accepted socket which + // shutdown the accept FD to trigger TIME_WAIT on the accepted socket which // should cause the conn_fd to follow CLOSE_WAIT->LAST_ACK->CLOSED instead of // TIME_WAIT. - accepted.reset(); - absl::SleepFor(absl::Seconds(1)); + ASSERT_THAT(shutdown(accepted.get(), SHUT_RDWR), SyscallSucceeds()); + { + const int kTimeout = 10000; + struct pollfd pfd = { + .fd = conn_fd.get(), + .events = POLLIN, + }; + ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1)); + ASSERT_EQ(pfd.revents, POLLIN); + } + conn_fd.reset(); + // This sleep is required to give conn_fd time to transition to TIME-WAIT. absl::SleepFor(absl::Seconds(1)); + // At this point conn_fd should be the one that moved to CLOSE_WAIT and + // eventually to CLOSED. + // Now bind and connect a new socket and verify that we can immediately // rebind the address bound by the conn_fd as it never entered TIME_WAIT. const FileDescriptor conn_fd2 = ASSERT_NO_ERRNO_AND_VALUE( @@ -942,6 +955,97 @@ TEST_P(SocketInetLoopbackTest, TCPTimeWaitTest_NoRandomSave) { SyscallSucceeds()); } +TEST_P(SocketInetLoopbackTest, TCPActiveCloseTimeWaitTest_NoRandomSave) { + auto const& param = GetParam(); + TestAddress const& listener = param.listener; + TestAddress const& connector = param.connector; + + // Create the listening socket. + const FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP)); + sockaddr_storage listen_addr = listener.addr; + ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast(&listen_addr), + listener.addr_len), + SyscallSucceeds()); + ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds()); + + // Get the port bound by the listening socket. + socklen_t addrlen = listener.addr_len; + ASSERT_THAT(getsockname(listen_fd.get(), + reinterpret_cast(&listen_addr), &addrlen), + SyscallSucceeds()); + + uint16_t const port = + ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr)); + + // Connect to the listening socket. + FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + // We disable saves after this point as a S/R causes the netstack seed + // to be regenerated which changes what ports/ISN is picked for a given + // tuple (src ip,src port, dst ip, dst port). This can cause the final + // SYN to use a sequence number that looks like one from the current + // connection in TIME_WAIT and will not be accepted causing the test + // to timeout. + // + // TODO(gvisor.dev/issue/940): S/R portSeed/portHint + DisableSave ds; + + sockaddr_storage conn_addr = connector.addr; + ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port)); + ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(), + reinterpret_cast(&conn_addr), + connector.addr_len), + SyscallSucceeds()); + + // Accept the connection. + auto accepted = + ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); + + // Get the address/port bound by the connecting socket. + sockaddr_storage conn_bound_addr; + socklen_t conn_addrlen = connector.addr_len; + ASSERT_THAT( + getsockname(conn_fd.get(), reinterpret_cast(&conn_bound_addr), + &conn_addrlen), + SyscallSucceeds()); + + // shutdown the conn FD to trigger TIME_WAIT on the connect socket. + ASSERT_THAT(shutdown(conn_fd.get(), SHUT_RDWR), SyscallSucceeds()); + { + const int kTimeout = 10000; + struct pollfd pfd = { + .fd = accepted.get(), + .events = POLLIN, + }; + ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1)); + ASSERT_EQ(pfd.revents, POLLIN); + } + ScopedThread t([&]() { + constexpr int kTimeout = 10000; + constexpr int16_t want_events = POLLHUP; + struct pollfd pfd = { + .fd = conn_fd.get(), + .events = want_events, + }; + ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1)); + }); + + accepted.reset(); + t.Join(); + conn_fd.reset(); + + // Now bind and connect a new socket and verify that we can't immediately + // rebind the address bound by the conn_fd as it is in TIME_WAIT. + conn_fd = ASSERT_NO_ERRNO_AND_VALUE( + Socket(connector.family(), SOCK_STREAM, IPPROTO_TCP)); + + ASSERT_THAT(bind(conn_fd.get(), reinterpret_cast(&conn_bound_addr), + conn_addrlen), + SyscallFailsWithErrno(EADDRINUSE)); +} + TEST_P(SocketInetLoopbackTest, AcceptedInheritsTCPUserTimeout) { auto const& param = GetParam(); TestAddress const& listener = param.listener; diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc index 0cea7d11f..a6325a761 100644 --- a/test/syscalls/linux/tcp_socket.cc +++ b/test/syscalls/linux/tcp_socket.cc @@ -720,6 +720,30 @@ TEST_P(TcpSocketTest, TcpSCMPriority) { ASSERT_EQ(cmsg, nullptr); } +TEST_P(TcpSocketTest, TimeWaitPollHUP) { + shutdown(s_, SHUT_RDWR); + ScopedThread t([&]() { + constexpr int kTimeout = 10000; + constexpr int16_t want_events = POLLHUP; + struct pollfd pfd = { + .fd = s_, + .events = want_events, + }; + ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1)); + }); + shutdown(t_, SHUT_RDWR); + t.Join(); + // At this point s_ should be in TIME-WAIT and polling for POLLHUP should + // return with 1 FD. + constexpr int kTimeout = 10000; + constexpr int16_t want_events = POLLHUP; + struct pollfd pfd = { + .fd = s_, + .events = want_events, + }; + ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1)); +} + INSTANTIATE_TEST_SUITE_P(AllInetTests, TcpSocketTest, ::testing::Values(AF_INET, AF_INET6)); -- cgit v1.2.3 From 5036f135e4370c84dd4afe1c43c14275b170c364 Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Thu, 13 Aug 2020 16:56:39 -0700 Subject: Disable vfs2 for socket_stress_test. PiperOrigin-RevId: 326553620 --- test/syscalls/BUILD | 1 + 1 file changed, 1 insertion(+) (limited to 'test') diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 571785ad2..0eadc6b08 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -69,6 +69,7 @@ syscall_test( # involve much concurrency, TSAN's usefulness here is limited anyway. tags = ["nogotsan"], test = "//test/syscalls/linux:socket_stress_test", + vfs2 = False, ) syscall_test( -- cgit v1.2.3 From d6520e1d0592f99161faedef3eba49439b140917 Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Thu, 13 Aug 2020 19:31:17 -0700 Subject: [vfs2][gofer] Fix file creation flags sent to gofer. Fixes php runtime test ext/standard/tests/file/readfile_basic.phpt Fixes #3516 fsgofers only want the access mode in the OpenFlags passed to Create(). If more flags are supplied (like O_APPEND in this case), read/write from that fd will fail with EBADF. See runsc/fsgofer/fsgofer.go:WriteAt() VFS2 was providing more than just access modes. So filtering the flags using p9.OpenFlagsModeMask == linux.O_ACCMODE fixes the issue. Gofer in VFS1 also only extracts the access mode flags while making the create RPC. See pkg/sentry/fs/gofer/path.go:Create() Even in VFS2, when we open a handle, we extract out only the access mode flags + O_TRUNC. See third_party/gvisor/pkg/sentry/fsimpl/gofer/handle.go:openHandle() Added a test for this. PiperOrigin-RevId: 326574829 --- pkg/sentry/fsimpl/gofer/filesystem.go | 6 ++---- test/syscalls/linux/open.cc | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 40fec890a..610a7ed78 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -1082,10 +1082,8 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving } creds := rp.Credentials() name := rp.Component() - // Filter file creation flags and O_LARGEFILE out; the create RPC already - // has the semantics of O_CREAT|O_EXCL, while some servers will choke on - // O_LARGEFILE. - createFlags := p9.OpenFlags(opts.Flags &^ (vfs.FileCreationFlags | linux.O_LARGEFILE)) + // We only want the access mode for creating the file. + createFlags := p9.OpenFlags(opts.Flags) & p9.OpenFlagsModeMask fdobj, openFile, createQID, _, err := dirfile.create(ctx, name, createFlags, (p9.FileMode)(opts.Mode), (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID)) if err != nil { dirfile.close(ctx) diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc index c7147c20b..8f0c9cb49 100644 --- a/test/syscalls/linux/open.cc +++ b/test/syscalls/linux/open.cc @@ -147,6 +147,26 @@ TEST_F(OpenTest, WriteOnly) { EXPECT_THAT(write(wo_file.get(), &buf, 1), SyscallSucceedsWithValue(1)); } +TEST_F(OpenTest, CreateWithAppend) { + std::string data = "text"; + std::string new_file = NewTempAbsPath(); + const FileDescriptor file = ASSERT_NO_ERRNO_AND_VALUE( + Open(new_file, O_WRONLY | O_APPEND | O_CREAT, 0666)); + EXPECT_THAT(write(file.get(), data.c_str(), data.size()), + SyscallSucceedsWithValue(data.size())); + EXPECT_THAT(lseek(file.get(), 0, SEEK_SET), SyscallSucceeds()); + EXPECT_THAT(write(file.get(), data.c_str(), data.size()), + SyscallSucceedsWithValue(data.size())); + + // Check that the size of the file is correct and that the offset has been + // incremented to that size. + struct stat s0; + EXPECT_THAT(fstat(file.get(), &s0), SyscallSucceeds()); + EXPECT_EQ(s0.st_size, 2 * data.size()); + EXPECT_THAT(lseek(file.get(), 0, SEEK_CUR), + SyscallSucceedsWithValue(2 * data.size())); +} + TEST_F(OpenTest, ReadWrite) { char buf; const FileDescriptor rw_file = -- cgit v1.2.3 From 190634e0fcf4cf25a449e1bd39533ca2ddad66e6 Mon Sep 17 00:00:00 2001 From: Julian Elischer Date: Fri, 14 Aug 2020 02:05:23 -0700 Subject: Give the ICMP Code its own type This is a preparatory commit for a larger commit working on ICMP generation in error cases. This is removal of technical debt and cleanup in the gvisor code as part of gvisor issue 2211. Updates #2211. PiperOrigin-RevId: 326615389 --- pkg/tcpip/checker/checker.go | 4 +- pkg/tcpip/header/icmpv4.go | 18 +++++---- pkg/tcpip/header/icmpv6.go | 43 ++++++++++++++++------ pkg/tcpip/network/ip_test.go | 4 +- pkg/tcpip/network/ipv6/ndp_test.go | 4 +- pkg/tcpip/transport/tcp/testing/context/context.go | 2 +- test/packetimpact/testbench/layers.go | 22 ++++++++--- test/packetimpact/testbench/layers_test.go | 2 +- .../tests/ipv6_fragment_reassembly_test.go | 6 +-- .../tests/ipv6_unknown_options_action_test.go | 2 +- .../tests/tcp_network_unreachable_test.go | 6 ++- .../tests/udp_icmp_error_propagation_test.go | 8 +++- 12 files changed, 81 insertions(+), 40 deletions(-) (limited to 'test') diff --git a/pkg/tcpip/checker/checker.go b/pkg/tcpip/checker/checker.go index 1e5f5abf2..b769094dc 100644 --- a/pkg/tcpip/checker/checker.go +++ b/pkg/tcpip/checker/checker.go @@ -699,7 +699,7 @@ func ICMPv4Type(want header.ICMPv4Type) TransportChecker { } // ICMPv4Code creates a checker that checks the ICMPv4 Code field. -func ICMPv4Code(want byte) TransportChecker { +func ICMPv4Code(want header.ICMPv4Code) TransportChecker { return func(t *testing.T, h header.Transport) { t.Helper() @@ -757,7 +757,7 @@ func ICMPv6Type(want header.ICMPv6Type) TransportChecker { } // ICMPv6Code creates a checker that checks the ICMPv6 Code field. -func ICMPv6Code(want byte) TransportChecker { +func ICMPv6Code(want header.ICMPv6Code) TransportChecker { return func(t *testing.T, h header.Transport) { t.Helper() diff --git a/pkg/tcpip/header/icmpv4.go b/pkg/tcpip/header/icmpv4.go index 1a631b31a..be03fb086 100644 --- a/pkg/tcpip/header/icmpv4.go +++ b/pkg/tcpip/header/icmpv4.go @@ -54,6 +54,9 @@ const ( // ICMPv4Type is the ICMP type field described in RFC 792. type ICMPv4Type byte +// ICMPv4Code is the ICMP code field described in RFC 792. +type ICMPv4Code byte + // Typical values of ICMPv4Type defined in RFC 792. const ( ICMPv4EchoReply ICMPv4Type = 0 @@ -69,12 +72,13 @@ const ( ICMPv4InfoReply ICMPv4Type = 16 ) -// Values for ICMP code as defined in RFC 792. +// ICMP codes for ICMPv4 Destination Unreachable messages as defined in RFC 792. const ( - ICMPv4TTLExceeded = 0 - ICMPv4HostUnreachable = 1 - ICMPv4PortUnreachable = 3 - ICMPv4FragmentationNeeded = 4 + ICMPv4TTLExceeded ICMPv4Code = 0 + ICMPv4HostUnreachable ICMPv4Code = 1 + ICMPv4ProtoUnreachable ICMPv4Code = 2 + ICMPv4PortUnreachable ICMPv4Code = 3 + ICMPv4FragmentationNeeded ICMPv4Code = 4 ) // Type is the ICMP type field. @@ -84,10 +88,10 @@ func (b ICMPv4) Type() ICMPv4Type { return ICMPv4Type(b[0]) } func (b ICMPv4) SetType(t ICMPv4Type) { b[0] = byte(t) } // Code is the ICMP code field. Its meaning depends on the value of Type. -func (b ICMPv4) Code() byte { return b[1] } +func (b ICMPv4) Code() ICMPv4Code { return ICMPv4Code(b[1]) } // SetCode sets the ICMP code field. -func (b ICMPv4) SetCode(c byte) { b[1] = c } +func (b ICMPv4) SetCode(c ICMPv4Code) { b[1] = byte(c) } // Checksum is the ICMP checksum field. func (b ICMPv4) Checksum() uint16 { diff --git a/pkg/tcpip/header/icmpv6.go b/pkg/tcpip/header/icmpv6.go index a13b4b809..20b01d8f4 100644 --- a/pkg/tcpip/header/icmpv6.go +++ b/pkg/tcpip/header/icmpv6.go @@ -92,7 +92,6 @@ const ( // ICMPv6Type is the ICMP type field described in RFC 4443 and friends. type ICMPv6Type byte -// Typical values of ICMPv6Type defined in RFC 4443. const ( ICMPv6DstUnreachable ICMPv6Type = 1 ICMPv6PacketTooBig ICMPv6Type = 2 @@ -110,18 +109,38 @@ const ( ICMPv6RedirectMsg ICMPv6Type = 137 ) -// Values for ICMP destination unreachable code as defined in RFC 4443 section -// 3.1. +// ICMPv6Code is the ICMP code field described in RFC 4443. +type ICMPv6Code byte + +// ICMP codes used with Destination Unreachable (Type 1). As per RFC 4443 +// section 3.1. +const ( + ICMPv6NetworkUnreachable ICMPv6Code = 0 + ICMPv6Prohibited ICMPv6Code = 1 + ICMPv6BeyondScope ICMPv6Code = 2 + ICMPv6AddressUnreachable ICMPv6Code = 3 + ICMPv6PortUnreachable ICMPv6Code = 4 + ICMPv6Policy ICMPv6Code = 5 + ICMPv6RejectRoute ICMPv6Code = 6 +) + +// ICMP codes used with Time Exceeded (Type 3). As per RFC 4443 section 3.3. const ( - ICMPv6NetworkUnreachable = 0 - ICMPv6Prohibited = 1 - ICMPv6BeyondScope = 2 - ICMPv6AddressUnreachable = 3 - ICMPv6PortUnreachable = 4 - ICMPv6Policy = 5 - ICMPv6RejectRoute = 6 + ICMPv6HopLimitExceeded ICMPv6Code = 0 + ICMPv6ReassemblyTimeout ICMPv6Code = 1 ) +// ICMP codes used with Parameter Problem (Type 4). As per RFC 4443 section 3.4. +const ( + ICMPv6ErroneousHeader ICMPv6Code = 0 + ICMPv6UnknownHeader ICMPv6Code = 1 + ICMPv6UnknownOption ICMPv6Code = 2 +) + +// ICMPv6UnusedCode is the code value used with ICMPv6 messages which don't use +// the code field. (Types not mentioned above.) +const ICMPv6UnusedCode ICMPv6Code = 0 + // Type is the ICMP type field. func (b ICMPv6) Type() ICMPv6Type { return ICMPv6Type(b[0]) } @@ -129,10 +148,10 @@ func (b ICMPv6) Type() ICMPv6Type { return ICMPv6Type(b[0]) } func (b ICMPv6) SetType(t ICMPv6Type) { b[0] = byte(t) } // Code is the ICMP code field. Its meaning depends on the value of Type. -func (b ICMPv6) Code() byte { return b[1] } +func (b ICMPv6) Code() ICMPv6Code { return ICMPv6Code(b[1]) } // SetCode sets the ICMP code field. -func (b ICMPv6) SetCode(c byte) { b[1] = c } +func (b ICMPv6) SetCode(c ICMPv6Code) { b[1] = byte(c) } // Checksum is the ICMP checksum field. func (b ICMPv6) Checksum() uint16 { diff --git a/pkg/tcpip/network/ip_test.go b/pkg/tcpip/network/ip_test.go index e6768258a..491d936a1 100644 --- a/pkg/tcpip/network/ip_test.go +++ b/pkg/tcpip/network/ip_test.go @@ -321,7 +321,7 @@ func TestIPv4ReceiveControl(t *testing.T) { name string expectedCount int fragmentOffset uint16 - code uint8 + code header.ICMPv4Code expectedTyp stack.ControlType expectedExtra uint32 trunc int @@ -579,7 +579,7 @@ func TestIPv6ReceiveControl(t *testing.T) { expectedCount int fragmentOffset *uint16 typ header.ICMPv6Type - code uint8 + code header.ICMPv6Code expectedTyp stack.ControlType expectedExtra uint32 trunc int diff --git a/pkg/tcpip/network/ipv6/ndp_test.go b/pkg/tcpip/network/ipv6/ndp_test.go index fe159b24f..2efa82e60 100644 --- a/pkg/tcpip/network/ipv6/ndp_test.go +++ b/pkg/tcpip/network/ipv6/ndp_test.go @@ -637,7 +637,7 @@ func TestNDPValidation(t *testing.T) { name string atomicFragment bool hopLimit uint8 - code uint8 + code header.ICMPv6Code valid bool }{ { @@ -730,7 +730,7 @@ func TestRouterAdvertValidation(t *testing.T) { name string src tcpip.Address hopLimit uint8 - code uint8 + code header.ICMPv6Code ndpPayload []byte expectedSuccess bool }{ diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go index 927bc71e0..b6031354e 100644 --- a/pkg/tcpip/transport/tcp/testing/context/context.go +++ b/pkg/tcpip/transport/tcp/testing/context/context.go @@ -292,7 +292,7 @@ func (c *Context) GetPacketNonBlocking() []byte { } // SendICMPPacket builds and sends an ICMPv4 packet via the link layer endpoint. -func (c *Context) SendICMPPacket(typ header.ICMPv4Type, code uint8, p1, p2 []byte, maxTotalSize int) { +func (c *Context) SendICMPPacket(typ header.ICMPv4Type, code header.ICMPv4Code, p1, p2 []byte, maxTotalSize int) { // Allocate a buffer data and headers. buf := buffer.NewView(header.IPv4MinimumSize + header.ICMPv4PayloadOffset + len(p2)) if len(buf) > maxTotalSize { diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go index 24aa46cce..a35562ca8 100644 --- a/test/packetimpact/testbench/layers.go +++ b/test/packetimpact/testbench/layers.go @@ -775,7 +775,7 @@ func (l *IPv6FragmentExtHdr) String() string { type ICMPv6 struct { LayerBase Type *header.ICMPv6Type - Code *byte + Code *header.ICMPv6Code Checksum *uint16 Payload []byte } @@ -823,6 +823,12 @@ func ICMPv6Type(v header.ICMPv6Type) *header.ICMPv6Type { return &v } +// ICMPv6Code is a helper routine that allocates a new ICMPv6Type value to store +// v and returns a pointer to it. +func ICMPv6Code(v header.ICMPv6Code) *header.ICMPv6Code { + return &v +} + // Byte is a helper routine that allocates a new byte value to store // v and returns a pointer to it. func Byte(v byte) *byte { @@ -834,7 +840,7 @@ func parseICMPv6(b []byte) (Layer, layerParser) { h := header.ICMPv6(b) icmpv6 := ICMPv6{ Type: ICMPv6Type(h.Type()), - Code: Byte(h.Code()), + Code: ICMPv6Code(h.Code()), Checksum: Uint16(h.Checksum()), Payload: h.NDPPayload(), } @@ -861,11 +867,17 @@ func ICMPv4Type(t header.ICMPv4Type) *header.ICMPv4Type { return &t } +// ICMPv4Code is a helper routine that allocates a new header.ICMPv4Code value +// to store t and returns a pointer to it. +func ICMPv4Code(t header.ICMPv4Code) *header.ICMPv4Code { + return &t +} + // ICMPv4 can construct and match an ICMPv4 encapsulation. type ICMPv4 struct { LayerBase Type *header.ICMPv4Type - Code *uint8 + Code *header.ICMPv4Code Checksum *uint16 } @@ -881,7 +893,7 @@ func (l *ICMPv4) ToBytes() ([]byte, error) { h.SetType(*l.Type) } if l.Code != nil { - h.SetCode(byte(*l.Code)) + h.SetCode(*l.Code) } if l.Checksum != nil { h.SetChecksum(*l.Checksum) @@ -901,7 +913,7 @@ func parseICMPv4(b []byte) (Layer, layerParser) { h := header.ICMPv4(b) icmpv4 := ICMPv4{ Type: ICMPv4Type(h.Type()), - Code: Uint8(h.Code()), + Code: ICMPv4Code(h.Code()), Checksum: Uint16(h.Checksum()), } return &icmpv4, parsePayload diff --git a/test/packetimpact/testbench/layers_test.go b/test/packetimpact/testbench/layers_test.go index a2a763034..eca0780b5 100644 --- a/test/packetimpact/testbench/layers_test.go +++ b/test/packetimpact/testbench/layers_test.go @@ -594,7 +594,7 @@ func TestIPv6ExtHdrOptions(t *testing.T) { }, &ICMPv6{ Type: ICMPv6Type(header.ICMPv6ParamProblem), - Code: Byte(0), + Code: ICMPv6Code(header.ICMPv6ErroneousHeader), Checksum: Uint16(0x5f98), Payload: []byte{0x00, 0x00, 0x00, 0x06}, }, diff --git a/test/packetimpact/tests/ipv6_fragment_reassembly_test.go b/test/packetimpact/tests/ipv6_fragment_reassembly_test.go index b5f94ad4b..a24c85566 100644 --- a/test/packetimpact/tests/ipv6_fragment_reassembly_test.go +++ b/test/packetimpact/tests/ipv6_fragment_reassembly_test.go @@ -67,7 +67,7 @@ func TestIPv6FragmentReassembly(t *testing.T) { rIP := tcpip.Address(net.ParseIP(testbench.RemoteIPv6).To16()) icmpv6 := testbench.ICMPv6{ Type: testbench.ICMPv6Type(header.ICMPv6EchoRequest), - Code: testbench.Byte(0), + Code: testbench.ICMPv6Code(header.ICMPv6UnusedCode), Payload: icmpv6EchoPayload, } icmpv6Bytes, err := icmpv6.ToBytes() @@ -89,7 +89,7 @@ func TestIPv6FragmentReassembly(t *testing.T) { }, &testbench.ICMPv6{ Type: testbench.ICMPv6Type(header.ICMPv6EchoRequest), - Code: testbench.Byte(0), + Code: testbench.ICMPv6Code(header.ICMPv6UnusedCode), Payload: icmpv6EchoPayload, Checksum: &cksum, }) @@ -116,7 +116,7 @@ func TestIPv6FragmentReassembly(t *testing.T) { }, &testbench.ICMPv6{ Type: testbench.ICMPv6Type(header.ICMPv6EchoReply), - Code: testbench.Byte(0), + Code: testbench.ICMPv6Code(header.ICMPv6UnusedCode), }, }, time.Second) if err != nil { diff --git a/test/packetimpact/tests/ipv6_unknown_options_action_test.go b/test/packetimpact/tests/ipv6_unknown_options_action_test.go index d7d63cbd2..e79d74476 100644 --- a/test/packetimpact/tests/ipv6_unknown_options_action_test.go +++ b/test/packetimpact/tests/ipv6_unknown_options_action_test.go @@ -172,7 +172,7 @@ func TestIPv6UnknownOptionAction(t *testing.T) { &testbench.IPv6{}, &testbench.ICMPv6{ Type: testbench.ICMPv6Type(header.ICMPv6ParamProblem), - Code: testbench.Byte(2), + Code: testbench.ICMPv6Code(header.ICMPv6UnknownOption), Payload: icmpv6Payload, }, }, time.Second) diff --git a/test/packetimpact/tests/tcp_network_unreachable_test.go b/test/packetimpact/tests/tcp_network_unreachable_test.go index 900352fa1..2f57dff19 100644 --- a/test/packetimpact/tests/tcp_network_unreachable_test.go +++ b/test/packetimpact/tests/tcp_network_unreachable_test.go @@ -72,7 +72,9 @@ func TestTCPSynSentUnreachable(t *testing.T) { if !ok { t.Fatalf("expected %s to be TCP", tcpLayers[tcpLayer]) } - var icmpv4 testbench.ICMPv4 = testbench.ICMPv4{Type: testbench.ICMPv4Type(header.ICMPv4DstUnreachable), Code: testbench.Uint8(header.ICMPv4HostUnreachable)} + var icmpv4 testbench.ICMPv4 = testbench.ICMPv4{ + Type: testbench.ICMPv4Type(header.ICMPv4DstUnreachable), + Code: testbench.ICMPv4Code(header.ICMPv4HostUnreachable)} layers = append(layers, &icmpv4, ip, tcp) rawConn.SendFrameStateless(t, layers) @@ -126,7 +128,7 @@ func TestTCPSynSentUnreachable6(t *testing.T) { } var icmpv6 testbench.ICMPv6 = testbench.ICMPv6{ Type: testbench.ICMPv6Type(header.ICMPv6DstUnreachable), - Code: testbench.Uint8(header.ICMPv6NetworkUnreachable), + Code: testbench.ICMPv6Code(header.ICMPv6NetworkUnreachable), // Per RFC 4443 3.1, the payload contains 4 zeroed bytes. Payload: []byte{0, 0, 0, 0}, } diff --git a/test/packetimpact/tests/udp_icmp_error_propagation_test.go b/test/packetimpact/tests/udp_icmp_error_propagation_test.go index b47ddb6c3..df35d16c8 100644 --- a/test/packetimpact/tests/udp_icmp_error_propagation_test.go +++ b/test/packetimpact/tests/udp_icmp_error_propagation_test.go @@ -62,9 +62,13 @@ func (e icmpError) String() string { func (e icmpError) ToICMPv4() *testbench.ICMPv4 { switch e { case portUnreachable: - return &testbench.ICMPv4{Type: testbench.ICMPv4Type(header.ICMPv4DstUnreachable), Code: testbench.Uint8(header.ICMPv4PortUnreachable)} + return &testbench.ICMPv4{ + Type: testbench.ICMPv4Type(header.ICMPv4DstUnreachable), + Code: testbench.ICMPv4Code(header.ICMPv4PortUnreachable)} case timeToLiveExceeded: - return &testbench.ICMPv4{Type: testbench.ICMPv4Type(header.ICMPv4TimeExceeded), Code: testbench.Uint8(header.ICMPv4TTLExceeded)} + return &testbench.ICMPv4{ + Type: testbench.ICMPv4Type(header.ICMPv4TimeExceeded), + Code: testbench.ICMPv4Code(header.ICMPv4TTLExceeded)} } return nil } -- cgit v1.2.3 From e6ea59203ba6e9c0431999463795a2c14036053f Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Fri, 14 Aug 2020 10:51:12 -0700 Subject: Skip UDPMulticast Tests when net interfaces aren't found. PiperOrigin-RevId: 326686761 --- .../socket_ipv4_udp_unbound_external_networking.cc | 37 +++++++++++++++------- .../socket_ipv4_udp_unbound_external_networking.h | 6 ++-- 2 files changed, 29 insertions(+), 14 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.cc index d690d9564..b206137eb 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.cc @@ -42,7 +42,9 @@ TestAddress V4EmptyAddress() { } void IPv4UDPUnboundExternalNetworkingSocketTest::SetUp() { - got_if_infos_ = false; + // FIXME(b/137899561): Linux instance for syscall tests sometimes misses its + // IPv4 address on eth0. + found_net_interfaces_ = false; // Get interface list. ASSERT_NO_ERRNO(if_helper_.Load()); @@ -71,7 +73,7 @@ void IPv4UDPUnboundExternalNetworkingSocketTest::SetUp() { } eth_if_addr_ = *reinterpret_cast(eth_if_addr); - got_if_infos_ = true; + found_net_interfaces_ = true; } // Verifies that a newly instantiated UDP socket does not have the @@ -110,6 +112,7 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, SetUDPBroadcast) { // the destination port number. TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, UDPBroadcastReceivedOnExpectedPort) { + SKIP_IF(!found_net_interfaces_); auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto rcvr1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto rcvr2 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); @@ -185,9 +188,7 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, // not a unicast address. TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, UDPBroadcastReceivedOnExpectedAddresses) { - // FIXME(b/137899561): Linux instance for syscall tests sometimes misses its - // IPv4 address on eth0. - SKIP_IF(!got_if_infos_); + SKIP_IF(!found_net_interfaces_); auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto rcvr1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); @@ -272,6 +273,7 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, // (UDPBroadcastSendRecvOnSocketBoundToAny). TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, UDPBroadcastSendRecvOnSocketBoundToBroadcast) { + SKIP_IF(!found_net_interfaces_); auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Enable SO_BROADCAST. @@ -313,6 +315,7 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, // (UDPBroadcastSendRecvOnSocketBoundToBroadcast). TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, UDPBroadcastSendRecvOnSocketBoundToAny) { + SKIP_IF(!found_net_interfaces_); auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Enable SO_BROADCAST. @@ -351,6 +354,7 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, // Verifies that a UDP broadcast fails to send on a socket with SO_BROADCAST // disabled. TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendBroadcast) { + SKIP_IF(!found_net_interfaces_); auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); // Broadcast a test message without having enabled SO_BROADCAST on the sending @@ -401,6 +405,8 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, // multicast on gVisor. SKIP_IF(IsRunningOnGvisor()); + SKIP_IF(!found_net_interfaces_); + auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto bind_addr = V4Any(); @@ -435,6 +441,7 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, // Check that multicast packets will be delivered to the sending socket without // setting an interface. TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticastSelf) { + SKIP_IF(!found_net_interfaces_); auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto bind_addr = V4Any(); @@ -478,6 +485,7 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticastSelf) { // set interface and IP_MULTICAST_LOOP disabled. TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticastSelfLoopOff) { + SKIP_IF(!found_net_interfaces_); auto socket = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto bind_addr = V4Any(); @@ -528,6 +536,8 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticastNoGroup) { // multicast on gVisor. SKIP_IF(IsRunningOnGvisor()); + SKIP_IF(!found_net_interfaces_); + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); @@ -566,6 +576,7 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticastNoGroup) { // Check that multicast packets will be delivered to another socket without // setting an interface. TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticast) { + SKIP_IF(!found_net_interfaces_); auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); @@ -613,6 +624,7 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticast) { // set interface and IP_MULTICAST_LOOP disabled on the sending socket. TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticastSenderNoLoop) { + SKIP_IF(!found_net_interfaces_); auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); @@ -664,6 +676,8 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, // setting an interface and IP_MULTICAST_LOOP disabled on the receiving socket. TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticastReceiverNoLoop) { + SKIP_IF(!found_net_interfaces_); + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); @@ -716,6 +730,7 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, // and both will receive data on it when bound to the ANY address. TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticastToTwoBoundToAny) { + SKIP_IF(!found_net_interfaces_); auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); std::unique_ptr receivers[2] = { ASSERT_NO_ERRNO_AND_VALUE(NewSocket()), @@ -782,6 +797,7 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, // and both will receive data on it when bound to the multicast address. TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticastToTwoBoundToMulticastAddress) { + SKIP_IF(!found_net_interfaces_); auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); std::unique_ptr receivers[2] = { ASSERT_NO_ERRNO_AND_VALUE(NewSocket()), @@ -851,6 +867,7 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, // multicast address, both will receive data. TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, TestSendMulticastToTwoBoundToAnyAndMulticastAddress) { + SKIP_IF(!found_net_interfaces_); auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); std::unique_ptr receivers[2] = { ASSERT_NO_ERRNO_AND_VALUE(NewSocket()), @@ -924,6 +941,8 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, // is not a multicast address. TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, IpMulticastLoopbackFromAddr) { + SKIP_IF(!found_net_interfaces_); + auto sender = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); @@ -991,9 +1010,7 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, // interface, a multicast packet sent out uses the latter as its source address. TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, IpMulticastLoopbackIfNicAndAddr) { - // FIXME(b/137899561): Linux instance for syscall tests sometimes misses its - // IPv4 address on eth0. - SKIP_IF(!got_if_infos_); + SKIP_IF(!found_net_interfaces_); // Create receiver, bind to ANY and join the multicast group. auto receiver = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); @@ -1059,9 +1076,7 @@ TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, // another interface. TEST_P(IPv4UDPUnboundExternalNetworkingSocketTest, IpMulticastLoopbackBindToOneIfSetMcastIfToAnother) { - // FIXME(b/137899561): Linux instance for syscall tests sometimes misses its - // IPv4 address on eth0. - SKIP_IF(!got_if_infos_); + SKIP_IF(!found_net_interfaces_); // FIXME (b/137790511): When bound to one interface it is not possible to set // IP_MULTICAST_IF to a different interface. diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h index 10b90b1e0..0e9e70e8e 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h +++ b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h @@ -29,9 +29,9 @@ class IPv4UDPUnboundExternalNetworkingSocketTest : public SimpleSocketTest { IfAddrHelper if_helper_; - // got_if_infos_ is set to false if SetUp() could not obtain all interface - // infos that we need. - bool got_if_infos_; + // found_net_interfaces_ is set to false if SetUp() could not obtain + // all interface infos that we need. + bool found_net_interfaces_; // Interface infos. int lo_if_idx_; -- cgit v1.2.3 From af433e159dbf07b084c0f05e65ec646c056033a7 Mon Sep 17 00:00:00 2001 From: Nayana Bidari Date: Fri, 14 Aug 2020 14:47:19 -0700 Subject: Update README for packetimpact PiperOrigin-RevId: 326733912 --- test/packetimpact/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/packetimpact/README.md b/test/packetimpact/README.md index f46c67a0c..ffa96ba98 100644 --- a/test/packetimpact/README.md +++ b/test/packetimpact/README.md @@ -30,7 +30,7 @@ $ make load-packetimpact Run a test, e.g. `fin_wait2_timeout`, against Linux: ```bash -$ bazel test //test/packetimpact/tests:fin_wait2_timeout_linux_test +$ bazel test //test/packetimpact/tests:fin_wait2_timeout_native_test ``` Run the same test, but against gVisor: -- cgit v1.2.3 From e3e1b36896750e9c4d07ecb6eaf1a738b08cd876 Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Mon, 17 Aug 2020 13:24:09 -0700 Subject: [vfs] Do O_DIRECTORY check after resolving symlinks. Fixes python runtime test test_glob. Updates #3515 We were checking is the to-be-opened dentry is a dir or not before resolving symlinks. We should check that after resolving symlinks. This was preventing us from opening a symlink which pointed to a directory with O_DIRECTORY. Also added this check in tmpfs and removed a duplicate check. PiperOrigin-RevId: 327085895 --- pkg/sentry/fsimpl/gofer/filesystem.go | 6 +++--- pkg/sentry/fsimpl/tmpfs/filesystem.go | 5 ++--- test/syscalls/linux/open.cc | 8 ++++++++ 3 files changed, 13 insertions(+), 6 deletions(-) (limited to 'test') diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 610a7ed78..a3903db33 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -886,9 +886,6 @@ afterTrailingSymlink: if mustCreate { return nil, syserror.EEXIST } - if !child.isDir() && rp.MustBeDir() { - return nil, syserror.ENOTDIR - } // Open existing child or follow symlink. if child.isSymlink() && rp.ShouldFollowSymlink() { target, err := child.readlink(ctx, rp.Mount()) @@ -901,6 +898,9 @@ afterTrailingSymlink: start = parent goto afterTrailingSymlink } + if rp.MustBeDir() && !child.isDir() { + return nil, syserror.ENOTDIR + } return child.openLocked(ctx, rp, &opts) } diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index a4864df53..cb8b2d944 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -389,9 +389,8 @@ afterTrailingSymlink: start = &parentDir.dentry goto afterTrailingSymlink } - // Open existing file. - if mustCreate { - return nil, syserror.EEXIST + if rp.MustBeDir() && !child.inode.isDir() { + return nil, syserror.ENOTDIR } return child.open(ctx, rp, &opts, false) } diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc index 8f0c9cb49..77f390f3c 100644 --- a/test/syscalls/linux/open.cc +++ b/test/syscalls/linux/open.cc @@ -27,6 +27,7 @@ #include "test/util/cleanup.h" #include "test/util/file_descriptor.h" #include "test/util/fs_util.h" +#include "test/util/posix_error.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" @@ -408,6 +409,13 @@ TEST_F(OpenTest, FileNotDirectory) { SyscallFailsWithErrno(ENOTDIR)); } +TEST_F(OpenTest, SymlinkDirectory) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + std::string link = NewTempAbsPath(); + ASSERT_THAT(symlink(dir.path().c_str(), link.c_str()), SyscallSucceeds()); + ASSERT_NO_ERRNO(Open(link, O_RDONLY | O_DIRECTORY)); +} + TEST_F(OpenTest, Null) { char c = '\0'; ASSERT_THAT(open(&c, O_RDONLY), SyscallFailsWithErrno(ENOENT)); -- cgit v1.2.3 From 988ab27058829fd2ccb550560a7651511d7b872a Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Mon, 17 Aug 2020 14:01:38 -0700 Subject: Fix AllSocketPairTest for open source. Setting timeouts for sockets on GCP images (debian) for usecs only respects multiples of 4K. Set the test with a multiple of 4K with a comment. PiperOrigin-RevId: 327093848 --- test/syscalls/linux/socket_generic.cc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'test') diff --git a/test/syscalls/linux/socket_generic.cc b/test/syscalls/linux/socket_generic.cc index f7d6139f1..a6182f0ac 100644 --- a/test/syscalls/linux/socket_generic.cc +++ b/test/syscalls/linux/socket_generic.cc @@ -462,6 +462,7 @@ TEST_P(AllSocketPairTest, SendTimeoutDefault) { TEST_P(AllSocketPairTest, SetGetSendTimeout) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); + // tv_usec should be a multiple of 4000 to work on most systems. timeval tv = {.tv_sec = 89, .tv_usec = 42000}; EXPECT_THAT( setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)), @@ -472,8 +473,8 @@ TEST_P(AllSocketPairTest, SetGetSendTimeout) { EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, &actual_tv, &len), SyscallSucceeds()); - EXPECT_EQ(actual_tv.tv_sec, 89); - EXPECT_EQ(actual_tv.tv_usec, 42000); + EXPECT_EQ(actual_tv.tv_sec, tv.tv_sec); + EXPECT_EQ(actual_tv.tv_usec, tv.tv_usec); } TEST_P(AllSocketPairTest, SetGetSendTimeoutLargerArg) { @@ -484,8 +485,9 @@ TEST_P(AllSocketPairTest, SetGetSendTimeoutLargerArg) { int64_t extra_data; } ABSL_ATTRIBUTE_PACKED; + // tv_usec should be a multiple of 4000 to work on most systems. timeval_with_extra tv_extra = { - .tv = {.tv_sec = 0, .tv_usec = 123000}, + .tv = {.tv_sec = 0, .tv_usec = 124000}, }; EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, @@ -497,8 +499,8 @@ TEST_P(AllSocketPairTest, SetGetSendTimeoutLargerArg) { EXPECT_THAT(getsockopt(sockets->first_fd(), SOL_SOCKET, SO_SNDTIMEO, &actual_tv, &len), SyscallSucceeds()); - EXPECT_EQ(actual_tv.tv.tv_sec, 0); - EXPECT_EQ(actual_tv.tv.tv_usec, 123000); + EXPECT_EQ(actual_tv.tv.tv_sec, tv_extra.tv.tv_sec); + EXPECT_EQ(actual_tv.tv.tv_usec, tv_extra.tv.tv_usec); } TEST_P(AllSocketPairTest, SendTimeoutAllowsWrite) { -- cgit v1.2.3